From 81cdff2ad0c733d33b129902b20bbfcb2c0ec2c1 Mon Sep 17 00:00:00 2001
From: Felix <139258421+integrITsolutions@users.noreply.github.com>
Date: Sat, 4 Apr 2026 18:24:13 +0200
Subject: [PATCH 1/2] feat(tts): add configurable computeUnits for Kokoro TTS
 models

Adds a `computeUnits` parameter (default: `.all`) to `TtsModels.download()`,
`KokoroTtsManager.init()`, and `KokoroModelCache.init()`, allowing callers
to override CoreML compute units for Kokoro model loading.

This is needed because iOS 26 introduces ANE compiler regressions that cause
Kokoro models to fail with "Cannot retrieve vector from IRValue format int32"
when loaded with `.all` (which includes the Neural Engine). Using `.cpuAndGPU`
bypasses the ANE and resolves the issue, matching the approach already used
by PocketTTS to avoid ANE float16 precision artifacts.

The default `.all` preserves existing behavior on iOS 17-18. Callers on
iOS 26+ can pass `.cpuAndGPU` to work around the ANE regression.

Example:
```swift
let manager = KokoroTtsManager(computeUnits: .cpuAndGPU)
try await manager.initialize()
```
---
 .../TTS/Kokoro/KokoroTtsManager.swift         | 21 ++++++++++++++++---
 .../Preprocess/KokoroModelCache.swift         | 15 ++++++++-----
 Sources/FluidAudio/TTS/TtsModels.swift        | 13 ++++++++++--
 3 files changed, 39 insertions(+), 10 deletions(-)
diff --git a/Sources/FluidAudio/TTS/Kokoro/KokoroTtsManager.swift b/Sources/FluidAudio/TTS/Kokoro/KokoroTtsManager.swift
index 5e932a248..2b49c50be 100644
--- a/Sources/FluidAudio/TTS/Kokoro/KokoroTtsManager.swift
+++ b/Sources/FluidAudio/TTS/Kokoro/KokoroTtsManager.swift
@@ -1,5 +1,6 @@
 import Foundation
 import OSLog
+@preconcurrency import CoreML
 
 /// Manages text-to-speech synthesis using Kokoro CoreML models.
 ///
@@ -12,6 +13,12 @@ import OSLog
 /// try await manager.initialize()
 /// let audioData = try await manager.synthesize(text: "Hello, world!")
 /// ```
+///
+/// On iOS 26+, use `.cpuAndGPU` to work around ANE compiler regressions:
+/// ```swift
+/// let manager = KokoroTtsManager(computeUnits: .cpuAndGPU)
+/// try await manager.initialize()
+/// ```
 public final class KokoroTtsManager {
 
     private let logger = AppLogger(category: "KokoroTtsManager")
@@ -22,6 +29,7 @@ public final class KokoroTtsManager {
     private var isInitialized = false
     private var assetsReady = false
     private let directory: URL?
+    private let computeUnits: MLComputeUnits
     private var defaultVoice: String
     private var defaultSpeakerId: Int
     private var ensuredVoices: Set<String> = []
@@ -36,6 +44,9 @@ public final class KokoroTtsManager {
     ///   - defaultSpeakerId: Default speaker ID for multi-speaker voices.
     ///   - directory: Optional override for the base cache directory.
     ///     When `nil`, uses the default platform cache location.
+    ///   - computeUnits: CoreML compute units for model compilation. Defaults to `.all`.
+    ///     Use `.cpuAndGPU` on iOS 26+ to work around ANE compiler regressions
+    ///     ("Cannot retrieve vector from IRValue format int32").
     ///   - modelCache: Cache for loaded CoreML models.
     ///   - customLexicon: Optional custom pronunciation dictionary. Entries in this dictionary
     ///     take precedence over all built-in dictionaries and grapheme-to-phoneme conversion.
@@ -43,11 +54,13 @@ public final class KokoroTtsManager {
         defaultVoice: String = TtsConstants.recommendedVoice,
         defaultSpeakerId: Int = 0,
         directory: URL? = nil,
+        computeUnits: MLComputeUnits = .all,
         modelCache: KokoroModelCache = KokoroModelCache(),
         customLexicon: TtsCustomLexicon? = nil
     ) {
         self.directory = directory
-        self.modelCache = directory != nil ? KokoroModelCache(directory: directory) : modelCache
+        self.computeUnits = computeUnits
+        self.modelCache = directory != nil ? KokoroModelCache(directory: directory, computeUnits: computeUnits) : modelCache
         self.lexiconAssets = LexiconAssetManager()
         self.defaultVoice = Self.normalizeVoice(defaultVoice)
         self.defaultSpeakerId = defaultSpeakerId
@@ -58,12 +71,14 @@ public final class KokoroTtsManager {
         defaultVoice: String = TtsConstants.recommendedVoice,
         defaultSpeakerId: Int = 0,
         directory: URL? = nil,
+        computeUnits: MLComputeUnits = .all,
         modelCache: KokoroModelCache = KokoroModelCache(),
         lexiconAssets: LexiconAssetManager,
         customLexicon: TtsCustomLexicon? = nil
     ) {
         self.directory = directory
-        self.modelCache = directory != nil ? KokoroModelCache(directory: directory) : modelCache
+        self.computeUnits = computeUnits
+        self.modelCache = directory != nil ? KokoroModelCache(directory: directory, computeUnits: computeUnits) : modelCache
         self.lexiconAssets = lexiconAssets
         self.defaultVoice = Self.normalizeVoice(defaultVoice)
         self.defaultSpeakerId = defaultSpeakerId
@@ -90,7 +105,7 @@ public final class KokoroTtsManager {
     }
 
     public func initialize(preloadVoices: Set<String>? = nil) async throws {
-        let models = try await TtsModels.download(directory: directory)
+        let models = try await TtsModels.download(directory: directory, computeUnits: computeUnits)
         try await initialize(models: models, preloadVoices: preloadVoices)
     }
 
diff --git a/Sources/FluidAudio/TTS/Kokoro/Pipeline/Preprocess/KokoroModelCache.swift b/Sources/FluidAudio/TTS/Kokoro/Pipeline/Preprocess/KokoroModelCache.swift
index ceae7eca3..a3454b1fa 100644
--- a/Sources/FluidAudio/TTS/Kokoro/Pipeline/Preprocess/KokoroModelCache.swift
+++ b/Sources/FluidAudio/TTS/Kokoro/Pipeline/Preprocess/KokoroModelCache.swift
@@ -10,11 +10,16 @@ public actor KokoroModelCache {
     private var downloadedModels: [ModelNames.TTS.Variant: MLModel] = [:]
     private var referenceDimension: Int?
     private let directory: URL?
-
-    /// - Parameter directory: Optional override for the base cache directory.
-    ///   When `nil`, uses the default platform cache location.
-    public init(directory: URL? = nil) {
+    private let computeUnits: MLComputeUnits
+
+    /// - Parameters:
+    ///   - directory: Optional override for the base cache directory.
+    ///     When `nil`, uses the default platform cache location.
+    ///   - computeUnits: CoreML compute units for model compilation. Defaults to `.all`.
+    ///     Use `.cpuAndGPU` on iOS 26+ to work around ANE compiler regressions.
+    public init(directory: URL? = nil, computeUnits: MLComputeUnits = .all) {
         self.directory = directory
+        self.computeUnits = computeUnits
     }
 
     public func loadModelsIfNeeded(variants: Set<ModelNames.TTS.Variant>? = nil) async throws {
@@ -32,7 +37,7 @@ public actor KokoroModelCache {
 
         if !variantsNeedingDownload.isEmpty {
             let newlyDownloaded = try await TtsModels.download(
-                variants: Set(variantsNeedingDownload), directory: directory)
+                variants: Set(variantsNeedingDownload), directory: directory, computeUnits: computeUnits)
             for (variant, model) in newlyDownloaded.modelsByVariant {
                 downloadedModels[variant] = model
             }
diff --git a/Sources/FluidAudio/TTS/TtsModels.swift b/Sources/FluidAudio/TTS/TtsModels.swift
index 3d192d111..a808d04c2 100644
--- a/Sources/FluidAudio/TTS/TtsModels.swift
+++ b/Sources/FluidAudio/TTS/TtsModels.swift
@@ -24,10 +24,20 @@ public struct TtsModels: Sendable {
         kokoroModels[variant]
     }
 
+    /// Downloads and compiles Kokoro CoreML models.
+    ///
+    /// - Parameters:
+    ///   - requestedVariants: Which model variants to download. Pass `nil` for all.
+    ///   - repo: HuggingFace repository to download from.
+    ///   - directory: Optional override for the cache directory.
+    ///   - computeUnits: CoreML compute units for model compilation. Defaults to `.all`.
+    ///     Use `.cpuAndGPU` on iOS 26+ to work around ANE compiler regressions.
+    ///   - progressHandler: Optional download progress callback.
     public static func download(
         variants requestedVariants: Set<ModelNames.TTS.Variant>? = nil,
         from repo: String = TtsConstants.defaultRepository,
         directory: URL? = nil,
+        computeUnits: MLComputeUnits = .all,
         progressHandler: DownloadUtils.ProgressHandler? = nil
     ) async throws -> TtsModels {
         let targetDir = try directory ?? getCacheDirectory()
@@ -46,8 +56,7 @@ public struct TtsModels: Sendable {
             .kokoro,
             modelNames: modelNames,
             directory: modelsDirectory,
-            // v2 models converted with fp16 precision schedule BERT + generator ops to ANE (1.67x speedup)
-            computeUnits: .all,
+            computeUnits: computeUnits,
             variant: variantFilter,
             progressHandler: progressHandler
         )

From 5edcba9bfdaf31053589c5372ef085b14f1e4665 Mon Sep 17 00:00:00 2001
From: Felix IntegrIT Solutions <development@integritsol.de>
Date: Sat, 4 Apr 2026 18:54:04 +0200
Subject: [PATCH 2/2] fix(tts): propagate computeUnits to modelCache when
 directory is nil

When KokoroTtsManager was initialized with a custom computeUnits but no
directory (the common case), the modelCache default parameter was used
as-is with .all compute units, silently ignoring the caller's setting.
This meant on-demand model loading could still hit the ANE, defeating
the iOS 26 workaround.

Make modelCache optional (nil = not user-provided) so we always create
a cache with the correct computeUnits when the caller doesn't supply
their own.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 Sources/FluidAudio/TTS/Kokoro/KokoroTtsManager.swift | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/Sources/FluidAudio/TTS/Kokoro/KokoroTtsManager.swift b/Sources/FluidAudio/TTS/Kokoro/KokoroTtsManager.swift
index 2b49c50be..b70f160c6 100644
--- a/Sources/FluidAudio/TTS/Kokoro/KokoroTtsManager.swift
+++ b/Sources/FluidAudio/TTS/Kokoro/KokoroTtsManager.swift
@@ -47,7 +47,8 @@ public final class KokoroTtsManager {
     ///   - computeUnits: CoreML compute units for model compilation. Defaults to `.all`.
     ///     Use `.cpuAndGPU` on iOS 26+ to work around ANE compiler regressions
     ///     ("Cannot retrieve vector from IRValue format int32").
-    ///   - modelCache: Cache for loaded CoreML models.
+    ///   - modelCache: Cache for loaded CoreML models. When `nil` (default),
+    ///     a cache is created using the provided `directory` and `computeUnits`.
     ///   - customLexicon: Optional custom pronunciation dictionary. Entries in this dictionary
     ///     take precedence over all built-in dictionaries and grapheme-to-phoneme conversion.
     public init(
@@ -55,12 +56,12 @@ public final class KokoroTtsManager {
         defaultSpeakerId: Int = 0,
         directory: URL? = nil,
         computeUnits: MLComputeUnits = .all,
-        modelCache: KokoroModelCache = KokoroModelCache(),
+        modelCache: KokoroModelCache? = nil,
         customLexicon: TtsCustomLexicon? = nil
     ) {
         self.directory = directory
         self.computeUnits = computeUnits
-        self.modelCache = directory != nil ? KokoroModelCache(directory: directory, computeUnits: computeUnits) : modelCache
+        self.modelCache = modelCache ?? KokoroModelCache(directory: directory, computeUnits: computeUnits)
         self.lexiconAssets = LexiconAssetManager()
         self.defaultVoice = Self.normalizeVoice(defaultVoice)
         self.defaultSpeakerId = defaultSpeakerId
@@ -72,13 +73,13 @@ public final class KokoroTtsManager {
         defaultSpeakerId: Int = 0,
         directory: URL? = nil,
         computeUnits: MLComputeUnits = .all,
-        modelCache: KokoroModelCache = KokoroModelCache(),
+        modelCache: KokoroModelCache? = nil,
         lexiconAssets: LexiconAssetManager,
         customLexicon: TtsCustomLexicon? = nil
     ) {
         self.directory = directory
         self.computeUnits = computeUnits
-        self.modelCache = directory != nil ? KokoroModelCache(directory: directory, computeUnits: computeUnits) : modelCache
+        self.modelCache = modelCache ?? KokoroModelCache(directory: directory, computeUnits: computeUnits)
         self.lexiconAssets = lexiconAssets
         self.defaultVoice = Self.normalizeVoice(defaultVoice)
         self.defaultSpeakerId = defaultSpeakerId