FluidInference · Alex-Wengg · Jun 27, 2025 · Jun 25, 2025 · Jun 25, 2025 · Jun 25, 2025
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -0,0 +1,26 @@
+name: CoreML Build Compile
+
+on:
+  pull_request:
+    branches: [ main ]
+
+jobs:
+  verify-coreml:
+    name: Verify CoreMLDiarizerManager Builds
+    runs-on: macos-latest
+
+    steps:
+    - name: Checkout code
+      uses: actions/checkout@v4
+
+    - name: Setup Swift 6.1
+      uses: swift-actions/setup-swift@v2
+      with:
+        swift-version: '6.1'
+
+    - name: Build package
+      run: swift build
+
+    - name: Verify DiarizerManager runs
+      run: swift test --filter testManagerBasicValidation
+      timeout-minutes: 5
diff --git a/Package.swift b/Package.swift
@@ -1,60 +1,33 @@
 // swift-tools-version: 6.1
 import PackageDescription
-import Foundation
-
-// Use SwiftPM's built-in package directory resolution
-let packageDir = Context.packageDirectory
 
 let package = Package(
-    name: "SeamlessAudioSwift",
+    name: "FluidAudioSwift",
     platforms: [
         .macOS(.v13),
         .iOS(.v16)
     ],
     products: [
         .library(
-            name: "SeamlessAudioSwift",
-            targets: ["SeamlessAudioSwift"]
+            name: "FluidAudioSwift",
+            targets: ["FluidAudioSwift"]
         ),
     ],
     dependencies: [
         // Add any external dependencies here if needed
     ],
     targets: [
         .target(
-            name: "SeamlessAudioSwift",
-            dependencies: ["SherpaOnnxWrapper"],
-            path: "Sources/SeamlessAudioSwift",
-            linkerSettings: [
-                .unsafeFlags(["-L\(packageDir)/Sources/SherpaOnnxWrapperC/lib"]),
-                .linkedLibrary("onnxruntime"),
-                .linkedLibrary("piper_phonemize"),
-                .linkedLibrary("sherpa-onnx"),
-                .linkedLibrary("sherpa-onnx-c-api"),
-                .linkedLibrary("sherpa-onnx-core"),
-                .linkedLibrary("sherpa-onnx-cxx-api"),
-                .linkedLibrary("sherpa-onnx-fst"),
-                .linkedLibrary("sherpa-onnx-fstfar"),
-                .linkedLibrary("sherpa-onnx-kaldifst-core"),
-                .linkedLibrary("sherpa-onnx-portaudio_static"),
-                .linkedLibrary("ssentencepiece_core"),
-                .linkedLibrary("ucd"),
-                .linkedLibrary("c++")
-            ]
-        ),
-        .target(
-            name: "SherpaOnnxWrapper",
-            dependencies: ["SherpaOnnxWrapperC"],
-            path: "Sources/SherpaOnnxWrapper",
-            exclude: ["lib/"]
-        ),
-        .systemLibrary(
-            name: "SherpaOnnxWrapperC",
-            path: "Sources/SherpaOnnxWrapperC"
+            name: "FluidAudioSwift",
+            dependencies: [],
+            path: "Sources/FluidAudioSwift"
         ),
         .testTarget(
-            name: "SeamlessAudioSwiftTests",
-            dependencies: ["SeamlessAudioSwift"]
+            name: "FluidAudioSwiftTests",
+            dependencies: ["FluidAudioSwift"],
+            resources: [
+                .copy("README_BENCHMARKS.md")
+            ]
         ),
     ]
 )
diff --git a/README.md b/README.md
@@ -1,131 +1,76 @@
-# SeamlessAudioSwift
+# FluidAudioSwift
 
-A Swift package for seamless audio processing, speech recognition, and speaker diarization using SherpaOnnx.
+[![Swift](https://img.shields.io/badge/Swift-5.9+-orange.svg)](https://swift.org)
+[![Platform](https://img.shields.io/badge/Platform-macOS%20%7C%20iOS-blue.svg)](https://developer.apple.com)
+[![License](https://img.shields.io/badge/License-MIT-green.svg)](LICENSE)
+
+FluidAudioSwift is a Swift framework for on-device speaker diarization and audio processing.
 
 ## Features
 
-- 🎤 **Speech Recognition**: Real-time and offline speech-to-text
-- 👥 **Speaker Diarization**: Identify and separate different speakers in audio
-- 🔊 **Speaker Embedding**: Extract speaker embeddings for identification
-- 🎯 **Voice Activity Detection**: Detect speech segments in audio
-- 📱 **Cross-Platform**: Works on macOS and iOS
-- ⚡ **High Performance**: Optimized with native C++ libraries
+- **Speaker Diarization**: Automatically identify and separate different speakers in audio recordings
+- **Speaker Embedding Extraction**: Generate speaker embeddings for voice comparison and clustering
+- **CoreML Integration**: Native Apple CoreML backend for optimal performance on Apple Silicon and iOS support
+- **Real-time Processing**: Support for streaming audio processing with minimal latency
+- **Cross-platform**: Full support for macOS 13.0+ and iOS 16.0+
 
 ## Installation
 
-### Swift Package Manager
-
-Add SeamlessAudioSwift to your project using Xcode:
-
-1. In Xcode, go to **File → Add Package Dependencies**
-2. Enter the repository URL: `https://github.com/SeamlessCompute/SeamlessAudioSwift.git`
-3. Choose the version and add to your target
-
-Or add it to your `Package.swift`:
+Add FluidAudioSwift to your project using Swift Package Manager:
 
 ```swift
 dependencies: [
-    .package(url: "https://github.com/SeamlessCompute/SeamlessAudioSwift.git", from: "1.0.0")
-]
+    .package(url: "https://github.com/FluidInference/FluidAudioSwift.git", from: "1.0.0"),
+],
 ```
 
 ## Quick Start
 
-### Basic Speech Recognition
-
 ```swift
-import SeamlessAudioSwift
-
-// Initialize the speech recognition manager
-let manager = SpeakerDiarizationManager()
+import FluidAudioSwift
 
-// Process audio samples
-let audioSamples: [Float] = // your audio data
-let result = try await manager.extractEmbedding(from: audioSamples)
-```
+// Initialize and process audio
+Task {
+    let diarizer = DiarizerManager()
+    try await diarizer.initialize()
 
-### Speaker Diarization
+    let audioSamples: [Float] = // your 16kHz audio data
+    let result = try await diarizer.performCompleteDiarization(audioSamples, sampleRate: 16000)
 
-```swift
-import SeamlessAudioSwift
-
-// Initialize with models
-let manager = SpeakerDiarizationManager()
-try await manager.initialize()
-
-// Process audio for speaker separation
-let segments = try await manager.performDiarization(on: audioSamples)
-
-for segment in segments {
-    print("Speaker \(segment.speaker): \(segment.start)s - \(segment.end)s")
+    for segment in result.segments {
+        print("\(segment.speakerId): \(segment.startTimeSeconds)s - \(segment.endTimeSeconds)s")
+    }
 }
 ```
 
-## Requirements
-
-- **iOS**: 16.0+
-- **macOS**: 13.0+
-- **Xcode**: 16.0+
-- **Swift**: 6.1+
-
-## Models and Attribution
+## Configuration
 
-This package uses models and libraries from the excellent [**SherpaOnnx**](https://github.com/k2-fsa/sherpa-onnx) project by the K2-FSA team.
+Customize behavior with `DiarizerConfig`:
 
-### SherpaOnnx Models
-
-SherpaOnnx provides state-of-the-art speech recognition and audio processing models. You can find pre-trained models at:
-
-- **Main Repository**: [https://github.com/k2-fsa/sherpa-onnx](https://github.com/k2-fsa/sherpa-onnx)
-- **Pre-trained Models**: [https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html)
-- **Documentation**: [https://k2-fsa.github.io/sherpa/onnx/](https://k2-fsa.github.io/sherpa/onnx/)
-
-### Supported Model Types
-
-- **Speech Recognition**: Transducer, Paraformer, Whisper, CTC models
-- **Speaker Diarization**: Pyannote-based segmentation models
-- **Speaker Embedding**: Speaker verification and identification models
-- **Voice Activity Detection**: Silero VAD models
-
-## Architecture
+```swift
+let config = DiarizerConfig(
+    clusteringThreshold: 0.7,      // Speaker similarity (0.0-1.0, higher = stricter)
+    minActivityThreshold: 10.0,    // Minimum activity frames for speaker detection
+    minDurationOn: 1.0,           // Minimum speech duration (seconds)
+    minDurationOff: 0.5,          // Minimum silence between speakers (seconds)
+    numClusters: -1,              // Number of speakers (-1 = auto-detect)
+    debugMode: false
+)
+```
 
-SeamlessAudioSwift is built on top of:
+## API Reference
 
-- **SherpaOnnx C++ Libraries**: High-performance audio processing
-- **ONNX Runtime**: Optimized neural network inference
-- **Swift Package Manager**: Modern dependency management
-- **Git LFS**: Efficient handling of large model files
+- **`DiarizerManager`**: Main diarization class
+- **`performCompleteDiarization(_:sampleRate:)`**: Process audio and return speaker segments
+- **`compareSpeakers(audio1:audio2:)`**: Compare similarity between two audio samples
+- **`validateAudio(_:)`**: Validate audio quality and characteristics
 
 ## License
 
-This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
+MIT License - see [LICENSE](LICENSE) for details.
 
 ## Acknowledgments
 
-- **[SherpaOnnx](https://github.com/k2-fsa/sherpa-onnx)** by the K2-FSA team for the underlying speech processing libraries
-- **[ONNX Runtime](https://onnxruntime.ai/)** for neural network inference
-- **[Pyannote](https://github.com/pyannote/pyannote-audio)** for speaker diarization models
-- **[Silero](https://github.com/snakers4/silero-vad)** for voice activity detection models
-
-## Contributing
-
-Contributions are welcome! Please feel free to submit a Pull Request.
-
-## Support
-
-For questions and support:
-- 📧 Contact: [SeamlessCompute](https://github.com/SeamlessCompute)
-- 🐛 Issues: [GitHub Issues](https://github.com/SeamlessCompute/SeamlessAudioSwift/issues)
-
----
+This project builds upon the excellent work of the [sherpa-onnx](https://github.com/k2-fsa/sherpa-onnx) project for speaker diarization algorithms and techniques. We extend our gratitude to the sherpa-onnx contributors for their foundational work in on-device speech processing.
 
-**Note**: This package includes pre-compiled libraries and models. The first build may take longer due to the size of the dependencies.
 
-# macOS
-.DS_Store
-.DS_Store?
-._*
-.Spotlight-V100
-.Trashes
-ehthumbs.db
-Thumbs.db