narcotic-sh
diff --git a/‎.gitignore‎
Lines changed: 0 additions & 1 deletion b/‎.gitignore‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎CMakeLists.txt‎
Lines changed: 6 additions & 2 deletions b/‎CMakeLists.txt‎
Lines changed: 6 additions & 2 deletions
diff --git a/‎DOCS.md‎
Lines changed: 2 additions & 2 deletions b/‎DOCS.md‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎README.md‎
Lines changed: 2 additions & 2 deletions b/‎README.md‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎THIRD_PARTY_LICENSES‎
Lines changed: 4 additions & 0 deletions b/‎THIRD_PARTY_LICENSES‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎evaluation/README.md‎
Lines changed: 4 additions & 4 deletions b/‎evaluation/README.md‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎evaluation/eval.py‎
Lines changed: 1 addition & 1 deletion b/‎evaluation/eval.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎models/README.md‎
Lines changed: 1 addition & 0 deletions b/‎models/README.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎models/camplusplus_batch16.mlpackage/Manifest.json‎
Lines changed: 18 additions & 0 deletions b/‎models/camplusplus_batch16.mlpackage/Manifest.json‎
Lines changed: 18 additions & 0 deletions
diff --git a/‎models/pyannote_segmentation.mlmodelc/Manifest.json‎
Lines changed: 1 addition & 0 deletions b/‎models/pyannote_segmentation.mlmodelc/Manifest.json‎
Lines changed: 1 addition & 0 deletions
@@ -17,7 +17,6 @@ result/
 
 **/CLAUDE.md
 
-*.json
 *.rttm
 
 audio/
@@ -8,11 +8,15 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON)
 # Find Python
 find_package(Python COMPONENTS Interpreter Development.Module REQUIRED)
 
-# Add the fbank_extractor subdirectory. This is where the C++ library is defined, built, and its installation is configured
+# libfbank_extractor
 add_subdirectory(senko/fbank_extractor)
 
+# libvad_coreml
+if(APPLE)
+    add_subdirectory(senko/vad_coreml)
+endif()
+
 # Only install in wheel builds (skip editable and sdist)
 if(SKBUILD_STATE STREQUAL "wheel")
-    install(TARGETS fbank_extractor DESTINATION senko)
     install(DIRECTORY models DESTINATION senko)
 endif()
@@ -8,9 +8,9 @@ diarizer = senko.Diarizer(device='auto', vad='auto', clustering='auto', warmup=T
 - `device`: Device to use for PyTorch operations (`auto`, `cuda`, `coreml`, `cpu`)
     - `auto` automatically selects `coreml` if on macOS, if not, then `cuda`, if not, then `cpu`
 - `vad`: Voice Activity Detection model to use (`auto`, `pyannote`, `silero`)
-    - `auto` automatically selects `pyannote` for `cuda`, `silero` for everything else
+    - `auto` automatically selects `pyannote` for `cuda` & `coreml`, `silero` for `cpu`
     - `pyannote` uses Pyannote VAD (requires `cuda` for optimal performance)
-    - `silero` uses Silero VAD (works on all devices, runs on CPU)
+    - `silero` uses Silero VAD (runs on CPU; not available on macOS)
 - `clustering`: Clustering location when `device` == `cuda` (`auto`, `gpu`, `cpu`)
     - Only applies to CUDA devices; non-CUDA devices always use CPU clustering
     - `auto` uses GPU clustering for CUDA devices with compute capability >= 7.0, CPU clustering otherwise
 
@@ -5,7 +5,7 @@ A very fast and accurate speaker diarization pipeline.
 
 1 hour of audio processed in 5 seconds (RTX 4090, Ryzen 9 7950X). ~17x faster than [Pyannote 3.1](https://huggingface.co/pyannote/speaker-diarization-3.1).
 
-On Apple M3, 1 hour in 15 seconds (~22x faster).
+On Apple M3, 1 hour in 7.7 seconds (~42x faster).
 
 The pipeline achieves a best score of 10.5% DER on VoxConverse, 9.3% on AISHELL-4, and 24.9% on AMI (IHM/SDM).
 
@@ -111,7 +111,7 @@ During the embeddings generation phase, for example, while the actual model infe
 <br><br>
 Therefore, for optimal performance, pair a fast GPU with a fast CPU. The CPU bottleneck becomes more noticeable with very fast GPUs (ex. RTX 4090) where the GPU can execute the batch preparation and inference faster than the CPU can orchestrate/dispatch these operations.
 <br><br>
-As for Mac, by default, the only part of the pipeline that doesn't run on the CPU is the embeddings gen phase, which runs on the ANE (Apple Neural Engine) through CoreML. All other parts run on the CPU. You <i>can</i> get VAD running on the GPU by setting <code>vad="pyannote"</code> in the <code>Diarizer</code> object instantiation. However, Pyannote VAD only runs fast on <code>cuda</code>, not on Mac GPUs. Therefore it is best to leave <code>vad="silero"</code> when on Mac, which is the default.
+As for Mac, both the VAD and embeddings gen phases run on the ANE (Apple Neural Engine) & CPU through CoreML. The fbank stage and clustering run purely on the CPU.
 </details>
 <details>
 <summary>Known limitations?</summary>
 
@@ -26,6 +26,10 @@ The following components are licensed under the Apache 2.0 License:
     - See individual source files for specific copyright holders
   Source: https://github.com/kaldi-asr/kaldi
 
+- FluidAudio
+  Copyright 2025 Fluid Inference
+  Source: https://github.com/FluidInference/FluidAudio
+
 License text:
 
                                  Apache License
 
@@ -11,7 +11,7 @@ A dataset of conversational speech from YouTube videos. Primarily English, with
 | Device | VAD | Clustering Location | Global DER | Global RTF | System |
 |:--------:|:-----:|:-------------------:|:------------:|:------------:|:------------:|
 | `cuda` | pyannote | CPU | 10.5% | 0.0021401 | RTX 5090 + Ryzen 9 9950X |
-| `coreml` | silero | CPU | 11.0% | 0.0041023 | Apple M3 |
+| `coreml` | pyannote | CPU | 10.8% | 0.0020203 | Apple M3 |
 | `cuda` | pyannote | GPU | 14.5% | 0.0015595 | RTX 5090 + Ryzen 9 9950X |
 
 </center>
@@ -25,7 +25,7 @@ A dataset of meeting recordings in Mandarin Chinese.
 |:--------:|:-----:|:-------------------:|:------------:|:------------:|:------------:|
 | `cuda` | pyannote | GPU | 9.3% | 0.0015444 | RTX 5090 + Ryzen 9 9950X |
 | `cuda` | pyannote | CPU | 9.4% | 0.0034435 | RTX 5090 + Ryzen 9 9950X |
-| `coreml` | silero | CPU | 10.7% | 0.0043948 | Apple M3 |
+| `coreml` | pyannote | CPU | 9.5% | 0.0036052 | Apple M3 |
 
 </center>
 
@@ -40,7 +40,7 @@ A dataset of meeting recordings in English, with participants recorded using hea
 |:--------:|:-----:|:-------------------:|:------------:|:------------:|:------------:|
 | `cuda` | pyannote | GPU | 24.9% | 0.0014214 | RTX 5090 + Ryzen 9 9950X |
 | `cuda` | pyannote | CPU | 24.9% | 0.0028280 | RTX 5090 + Ryzen 9 9950X |
-| `coreml` | silero | CPU | 26.2% | 0.0042680 | Apple M3 |
+| `coreml` | pyannote | CPU | 25.2% | 0.0030760 | Apple M3 |
 
 </center>
 
@@ -52,6 +52,6 @@ A dataset of meeting recordings in English, with participants recorded using hea
 |:--------:|:-----:|:-------------------:|:------------:|:------------:|:------------:|
 | `cuda` | pyannote | GPU | 24.9% | 0.0014103 | RTX 5090 + Ryzen 9 9950X |
 | `cuda` | pyannote | CPU | 24.9% | 0.0028629 | RTX 5090 + Ryzen 9 9950X |
-| `coreml` | silero | CPU | 33.3% | 0.0040706 | Apple M3 |
+| `coreml` | pyannote | CPU | 30.7% | 0.0029834 | Apple M3 |
 
 </center>
@@ -320,7 +320,7 @@ def main():
     parser.add_argument('--device', choices=['auto', 'cuda', 'coreml', 'cpu'], default='auto',
                        help='Device for Senko processing')
     parser.add_argument('--vad', choices=['auto', 'pyannote', 'silero'], default='auto',
-                       help='VAD system to use (auto=pyannote for CUDA, silero otherwise)')
+                       help='VAD to use')
     parser.add_argument('--clustering', choices=['auto', 'gpu', 'cpu'], default='auto',
                        help='Clustering location (auto=gpu for CUDA compute >=7.0, cpu otherwise)')
     parser.add_argument('--results_dir', type=Path, default='./senko_evaluation_results',
 
@@ -1,5 +1,6 @@
 # Model Links
 - [Pyannote segmentation-3.0](https://huggingface.co/pyannote/segmentation-3.0)
+- [Pyannote segmentation-3.0 CoreML version](https://huggingface.co/FluidInference/speaker-diarization-coreml)
 - [CAM++](https://modelscope.cn/models/iic/speech_campplus_sv_zh_en_16k-common_advanced)
 - CAM++ CoreML version: see `../tracing/coreml`
 - CAM++ CUDA TorchScript JIT-traced version: see `../tracing/cuda`
@@ -0,0 +1,18 @@
+{
+    "fileFormatVersion": "1.0.0",
+    "itemInfoEntries": {
+        "10905DEA-14C4-4986-A29B-BF63B7ABE1C1": {
+            "author": "com.apple.CoreML",
+            "description": "CoreML Model Specification",
+            "name": "model.mlmodel",
+            "path": "com.apple.CoreML/model.mlmodel"
+        },
+        "167F767A-9FC7-4CE9-B70F-6E5D91BE2245": {
+            "author": "com.apple.CoreML",
+            "description": "CoreML Model Weights",
+            "name": "weights",
+            "path": "com.apple.CoreML/weights"
+        }
+    },
+    "rootModelIdentifier": "10905DEA-14C4-4986-A29B-BF63B7ABE1C1"
+}
@@ -0,0 +1 @@
+{"fileFormatVersion": "1.0.0", "itemInfoEntries": {"model.mil": {"author": "pyannote", "description": "Segmentation model"}}, "rootModelIdentifier": "model.mil"}
-Original file line number
+Diff line change
 **/CLAUDE.md
 -*.json
 *.rttm
 audio/
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+{"fileFormatVersion": "1.0.0", "itemInfoEntries": {"model.mil": {"author": "pyannote", "description": "Segmentation model"}}, "rootModelIdentifier": "model.mil"}`