From 81cdff2ad0c733d33b129902b20bbfcb2c0ec2c1 Mon Sep 17 00:00:00 2001 From: Felix <139258421+integrITsolutions@users.noreply.github.com> Date: Sat, 4 Apr 2026 18:24:13 +0200 Subject: [PATCH 1/2] feat(tts): add configurable computeUnits for Kokoro TTS models Adds a `computeUnits` parameter (default: `.all`) to `TtsModels.download()`, `KokoroTtsManager.init()`, and `KokoroModelCache.init()`, allowing callers to override CoreML compute units for Kokoro model loading. This is needed because iOS 26 introduces ANE compiler regressions that cause Kokoro models to fail with "Cannot retrieve vector from IRValue format int32" when loaded with `.all` (which includes the Neural Engine). Using `.cpuAndGPU` bypasses the ANE and resolves the issue, matching the approach already used by PocketTTS to avoid ANE float16 precision artifacts. The default `.all` preserves existing behavior on iOS 17-18. Callers on iOS 26+ can pass `.cpuAndGPU` to work around the ANE regression. Example: ```swift let manager = KokoroTtsManager(computeUnits: .cpuAndGPU) try await manager.initialize() ``` --- .../TTS/Kokoro/KokoroTtsManager.swift | 21 ++++++++++++++++--- .../Preprocess/KokoroModelCache.swift | 15 ++++++++----- Sources/FluidAudio/TTS/TtsModels.swift | 13 ++++++++++-- 3 files changed, 39 insertions(+), 10 deletions(-) diff --git a/Sources/FluidAudio/TTS/Kokoro/KokoroTtsManager.swift b/Sources/FluidAudio/TTS/Kokoro/KokoroTtsManager.swift index 5e932a248..2b49c50be 100644 --- a/Sources/FluidAudio/TTS/Kokoro/KokoroTtsManager.swift +++ b/Sources/FluidAudio/TTS/Kokoro/KokoroTtsManager.swift @@ -1,5 +1,6 @@ import Foundation import OSLog +@preconcurrency import CoreML /// Manages text-to-speech synthesis using Kokoro CoreML models. /// @@ -12,6 +13,12 @@ import OSLog /// try await manager.initialize() /// let audioData = try await manager.synthesize(text: "Hello, world!") /// ``` +/// +/// On iOS 26+, use `.cpuAndGPU` to work around ANE compiler regressions: +/// ```swift +/// let manager = KokoroTtsManager(computeUnits: .cpuAndGPU) +/// try await manager.initialize() +/// ``` public final class KokoroTtsManager { private let logger = AppLogger(category: "KokoroTtsManager") @@ -22,6 +29,7 @@ public final class KokoroTtsManager { private var isInitialized = false private var assetsReady = false private let directory: URL? + private let computeUnits: MLComputeUnits private var defaultVoice: String private var defaultSpeakerId: Int private var ensuredVoices: Set = [] @@ -36,6 +44,9 @@ public final class KokoroTtsManager { /// - defaultSpeakerId: Default speaker ID for multi-speaker voices. /// - directory: Optional override for the base cache directory. /// When `nil`, uses the default platform cache location. + /// - computeUnits: CoreML compute units for model compilation. Defaults to `.all`. + /// Use `.cpuAndGPU` on iOS 26+ to work around ANE compiler regressions + /// ("Cannot retrieve vector from IRValue format int32"). /// - modelCache: Cache for loaded CoreML models. /// - customLexicon: Optional custom pronunciation dictionary. Entries in this dictionary /// take precedence over all built-in dictionaries and grapheme-to-phoneme conversion. @@ -43,11 +54,13 @@ public final class KokoroTtsManager { defaultVoice: String = TtsConstants.recommendedVoice, defaultSpeakerId: Int = 0, directory: URL? = nil, + computeUnits: MLComputeUnits = .all, modelCache: KokoroModelCache = KokoroModelCache(), customLexicon: TtsCustomLexicon? = nil ) { self.directory = directory - self.modelCache = directory != nil ? KokoroModelCache(directory: directory) : modelCache + self.computeUnits = computeUnits + self.modelCache = directory != nil ? KokoroModelCache(directory: directory, computeUnits: computeUnits) : modelCache self.lexiconAssets = LexiconAssetManager() self.defaultVoice = Self.normalizeVoice(defaultVoice) self.defaultSpeakerId = defaultSpeakerId @@ -58,12 +71,14 @@ public final class KokoroTtsManager { defaultVoice: String = TtsConstants.recommendedVoice, defaultSpeakerId: Int = 0, directory: URL? = nil, + computeUnits: MLComputeUnits = .all, modelCache: KokoroModelCache = KokoroModelCache(), lexiconAssets: LexiconAssetManager, customLexicon: TtsCustomLexicon? = nil ) { self.directory = directory - self.modelCache = directory != nil ? KokoroModelCache(directory: directory) : modelCache + self.computeUnits = computeUnits + self.modelCache = directory != nil ? KokoroModelCache(directory: directory, computeUnits: computeUnits) : modelCache self.lexiconAssets = lexiconAssets self.defaultVoice = Self.normalizeVoice(defaultVoice) self.defaultSpeakerId = defaultSpeakerId @@ -90,7 +105,7 @@ public final class KokoroTtsManager { } public func initialize(preloadVoices: Set? = nil) async throws { - let models = try await TtsModels.download(directory: directory) + let models = try await TtsModels.download(directory: directory, computeUnits: computeUnits) try await initialize(models: models, preloadVoices: preloadVoices) } diff --git a/Sources/FluidAudio/TTS/Kokoro/Pipeline/Preprocess/KokoroModelCache.swift b/Sources/FluidAudio/TTS/Kokoro/Pipeline/Preprocess/KokoroModelCache.swift index ceae7eca3..a3454b1fa 100644 --- a/Sources/FluidAudio/TTS/Kokoro/Pipeline/Preprocess/KokoroModelCache.swift +++ b/Sources/FluidAudio/TTS/Kokoro/Pipeline/Preprocess/KokoroModelCache.swift @@ -10,11 +10,16 @@ public actor KokoroModelCache { private var downloadedModels: [ModelNames.TTS.Variant: MLModel] = [:] private var referenceDimension: Int? private let directory: URL? - - /// - Parameter directory: Optional override for the base cache directory. - /// When `nil`, uses the default platform cache location. - public init(directory: URL? = nil) { + private let computeUnits: MLComputeUnits + + /// - Parameters: + /// - directory: Optional override for the base cache directory. + /// When `nil`, uses the default platform cache location. + /// - computeUnits: CoreML compute units for model compilation. Defaults to `.all`. + /// Use `.cpuAndGPU` on iOS 26+ to work around ANE compiler regressions. + public init(directory: URL? = nil, computeUnits: MLComputeUnits = .all) { self.directory = directory + self.computeUnits = computeUnits } public func loadModelsIfNeeded(variants: Set? = nil) async throws { @@ -32,7 +37,7 @@ public actor KokoroModelCache { if !variantsNeedingDownload.isEmpty { let newlyDownloaded = try await TtsModels.download( - variants: Set(variantsNeedingDownload), directory: directory) + variants: Set(variantsNeedingDownload), directory: directory, computeUnits: computeUnits) for (variant, model) in newlyDownloaded.modelsByVariant { downloadedModels[variant] = model } diff --git a/Sources/FluidAudio/TTS/TtsModels.swift b/Sources/FluidAudio/TTS/TtsModels.swift index 3d192d111..a808d04c2 100644 --- a/Sources/FluidAudio/TTS/TtsModels.swift +++ b/Sources/FluidAudio/TTS/TtsModels.swift @@ -24,10 +24,20 @@ public struct TtsModels: Sendable { kokoroModels[variant] } + /// Downloads and compiles Kokoro CoreML models. + /// + /// - Parameters: + /// - requestedVariants: Which model variants to download. Pass `nil` for all. + /// - repo: HuggingFace repository to download from. + /// - directory: Optional override for the cache directory. + /// - computeUnits: CoreML compute units for model compilation. Defaults to `.all`. + /// Use `.cpuAndGPU` on iOS 26+ to work around ANE compiler regressions. + /// - progressHandler: Optional download progress callback. public static func download( variants requestedVariants: Set? = nil, from repo: String = TtsConstants.defaultRepository, directory: URL? = nil, + computeUnits: MLComputeUnits = .all, progressHandler: DownloadUtils.ProgressHandler? = nil ) async throws -> TtsModels { let targetDir = try directory ?? getCacheDirectory() @@ -46,8 +56,7 @@ public struct TtsModels: Sendable { .kokoro, modelNames: modelNames, directory: modelsDirectory, - // v2 models converted with fp16 precision schedule BERT + generator ops to ANE (1.67x speedup) - computeUnits: .all, + computeUnits: computeUnits, variant: variantFilter, progressHandler: progressHandler ) From 5edcba9bfdaf31053589c5372ef085b14f1e4665 Mon Sep 17 00:00:00 2001 From: Felix IntegrIT Solutions Date: Sat, 4 Apr 2026 18:54:04 +0200 Subject: [PATCH 2/2] fix(tts): propagate computeUnits to modelCache when directory is nil When KokoroTtsManager was initialized with a custom computeUnits but no directory (the common case), the modelCache default parameter was used as-is with .all compute units, silently ignoring the caller's setting. This meant on-demand model loading could still hit the ANE, defeating the iOS 26 workaround. Make modelCache optional (nil = not user-provided) so we always create a cache with the correct computeUnits when the caller doesn't supply their own. Co-Authored-By: Claude Opus 4.6 (1M context) --- Sources/FluidAudio/TTS/Kokoro/KokoroTtsManager.swift | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/Sources/FluidAudio/TTS/Kokoro/KokoroTtsManager.swift b/Sources/FluidAudio/TTS/Kokoro/KokoroTtsManager.swift index 2b49c50be..b70f160c6 100644 --- a/Sources/FluidAudio/TTS/Kokoro/KokoroTtsManager.swift +++ b/Sources/FluidAudio/TTS/Kokoro/KokoroTtsManager.swift @@ -47,7 +47,8 @@ public final class KokoroTtsManager { /// - computeUnits: CoreML compute units for model compilation. Defaults to `.all`. /// Use `.cpuAndGPU` on iOS 26+ to work around ANE compiler regressions /// ("Cannot retrieve vector from IRValue format int32"). - /// - modelCache: Cache for loaded CoreML models. + /// - modelCache: Cache for loaded CoreML models. When `nil` (default), + /// a cache is created using the provided `directory` and `computeUnits`. /// - customLexicon: Optional custom pronunciation dictionary. Entries in this dictionary /// take precedence over all built-in dictionaries and grapheme-to-phoneme conversion. public init( @@ -55,12 +56,12 @@ public final class KokoroTtsManager { defaultSpeakerId: Int = 0, directory: URL? = nil, computeUnits: MLComputeUnits = .all, - modelCache: KokoroModelCache = KokoroModelCache(), + modelCache: KokoroModelCache? = nil, customLexicon: TtsCustomLexicon? = nil ) { self.directory = directory self.computeUnits = computeUnits - self.modelCache = directory != nil ? KokoroModelCache(directory: directory, computeUnits: computeUnits) : modelCache + self.modelCache = modelCache ?? KokoroModelCache(directory: directory, computeUnits: computeUnits) self.lexiconAssets = LexiconAssetManager() self.defaultVoice = Self.normalizeVoice(defaultVoice) self.defaultSpeakerId = defaultSpeakerId @@ -72,13 +73,13 @@ public final class KokoroTtsManager { defaultSpeakerId: Int = 0, directory: URL? = nil, computeUnits: MLComputeUnits = .all, - modelCache: KokoroModelCache = KokoroModelCache(), + modelCache: KokoroModelCache? = nil, lexiconAssets: LexiconAssetManager, customLexicon: TtsCustomLexicon? = nil ) { self.directory = directory self.computeUnits = computeUnits - self.modelCache = directory != nil ? KokoroModelCache(directory: directory, computeUnits: computeUnits) : modelCache + self.modelCache = modelCache ?? KokoroModelCache(directory: directory, computeUnits: computeUnits) self.lexiconAssets = lexiconAssets self.defaultVoice = Self.normalizeVoice(defaultVoice) self.defaultSpeakerId = defaultSpeakerId