diff --git a/Sources/FluidAudio/TTS/Kokoro/KokoroTtsManager.swift b/Sources/FluidAudio/TTS/Kokoro/KokoroTtsManager.swift index 5e932a248..b70f160c6 100644 --- a/Sources/FluidAudio/TTS/Kokoro/KokoroTtsManager.swift +++ b/Sources/FluidAudio/TTS/Kokoro/KokoroTtsManager.swift @@ -1,5 +1,6 @@ import Foundation import OSLog +@preconcurrency import CoreML /// Manages text-to-speech synthesis using Kokoro CoreML models. /// @@ -12,6 +13,12 @@ import OSLog /// try await manager.initialize() /// let audioData = try await manager.synthesize(text: "Hello, world!") /// ``` +/// +/// On iOS 26+, use `.cpuAndGPU` to work around ANE compiler regressions: +/// ```swift +/// let manager = KokoroTtsManager(computeUnits: .cpuAndGPU) +/// try await manager.initialize() +/// ``` public final class KokoroTtsManager { private let logger = AppLogger(category: "KokoroTtsManager") @@ -22,6 +29,7 @@ public final class KokoroTtsManager { private var isInitialized = false private var assetsReady = false private let directory: URL? + private let computeUnits: MLComputeUnits private var defaultVoice: String private var defaultSpeakerId: Int private var ensuredVoices: Set = [] @@ -36,18 +44,24 @@ public final class KokoroTtsManager { /// - defaultSpeakerId: Default speaker ID for multi-speaker voices. /// - directory: Optional override for the base cache directory. /// When `nil`, uses the default platform cache location. - /// - modelCache: Cache for loaded CoreML models. + /// - computeUnits: CoreML compute units for model compilation. Defaults to `.all`. + /// Use `.cpuAndGPU` on iOS 26+ to work around ANE compiler regressions + /// ("Cannot retrieve vector from IRValue format int32"). + /// - modelCache: Cache for loaded CoreML models. When `nil` (default), + /// a cache is created using the provided `directory` and `computeUnits`. /// - customLexicon: Optional custom pronunciation dictionary. Entries in this dictionary /// take precedence over all built-in dictionaries and grapheme-to-phoneme conversion. public init( defaultVoice: String = TtsConstants.recommendedVoice, defaultSpeakerId: Int = 0, directory: URL? = nil, - modelCache: KokoroModelCache = KokoroModelCache(), + computeUnits: MLComputeUnits = .all, + modelCache: KokoroModelCache? = nil, customLexicon: TtsCustomLexicon? = nil ) { self.directory = directory - self.modelCache = directory != nil ? KokoroModelCache(directory: directory) : modelCache + self.computeUnits = computeUnits + self.modelCache = modelCache ?? KokoroModelCache(directory: directory, computeUnits: computeUnits) self.lexiconAssets = LexiconAssetManager() self.defaultVoice = Self.normalizeVoice(defaultVoice) self.defaultSpeakerId = defaultSpeakerId @@ -58,12 +72,14 @@ public final class KokoroTtsManager { defaultVoice: String = TtsConstants.recommendedVoice, defaultSpeakerId: Int = 0, directory: URL? = nil, - modelCache: KokoroModelCache = KokoroModelCache(), + computeUnits: MLComputeUnits = .all, + modelCache: KokoroModelCache? = nil, lexiconAssets: LexiconAssetManager, customLexicon: TtsCustomLexicon? = nil ) { self.directory = directory - self.modelCache = directory != nil ? KokoroModelCache(directory: directory) : modelCache + self.computeUnits = computeUnits + self.modelCache = modelCache ?? KokoroModelCache(directory: directory, computeUnits: computeUnits) self.lexiconAssets = lexiconAssets self.defaultVoice = Self.normalizeVoice(defaultVoice) self.defaultSpeakerId = defaultSpeakerId @@ -90,7 +106,7 @@ public final class KokoroTtsManager { } public func initialize(preloadVoices: Set? = nil) async throws { - let models = try await TtsModels.download(directory: directory) + let models = try await TtsModels.download(directory: directory, computeUnits: computeUnits) try await initialize(models: models, preloadVoices: preloadVoices) } diff --git a/Sources/FluidAudio/TTS/Kokoro/Pipeline/Preprocess/KokoroModelCache.swift b/Sources/FluidAudio/TTS/Kokoro/Pipeline/Preprocess/KokoroModelCache.swift index ceae7eca3..a3454b1fa 100644 --- a/Sources/FluidAudio/TTS/Kokoro/Pipeline/Preprocess/KokoroModelCache.swift +++ b/Sources/FluidAudio/TTS/Kokoro/Pipeline/Preprocess/KokoroModelCache.swift @@ -10,11 +10,16 @@ public actor KokoroModelCache { private var downloadedModels: [ModelNames.TTS.Variant: MLModel] = [:] private var referenceDimension: Int? private let directory: URL? - - /// - Parameter directory: Optional override for the base cache directory. - /// When `nil`, uses the default platform cache location. - public init(directory: URL? = nil) { + private let computeUnits: MLComputeUnits + + /// - Parameters: + /// - directory: Optional override for the base cache directory. + /// When `nil`, uses the default platform cache location. + /// - computeUnits: CoreML compute units for model compilation. Defaults to `.all`. + /// Use `.cpuAndGPU` on iOS 26+ to work around ANE compiler regressions. + public init(directory: URL? = nil, computeUnits: MLComputeUnits = .all) { self.directory = directory + self.computeUnits = computeUnits } public func loadModelsIfNeeded(variants: Set? = nil) async throws { @@ -32,7 +37,7 @@ public actor KokoroModelCache { if !variantsNeedingDownload.isEmpty { let newlyDownloaded = try await TtsModels.download( - variants: Set(variantsNeedingDownload), directory: directory) + variants: Set(variantsNeedingDownload), directory: directory, computeUnits: computeUnits) for (variant, model) in newlyDownloaded.modelsByVariant { downloadedModels[variant] = model } diff --git a/Sources/FluidAudio/TTS/TtsModels.swift b/Sources/FluidAudio/TTS/TtsModels.swift index 3d192d111..a808d04c2 100644 --- a/Sources/FluidAudio/TTS/TtsModels.swift +++ b/Sources/FluidAudio/TTS/TtsModels.swift @@ -24,10 +24,20 @@ public struct TtsModels: Sendable { kokoroModels[variant] } + /// Downloads and compiles Kokoro CoreML models. + /// + /// - Parameters: + /// - requestedVariants: Which model variants to download. Pass `nil` for all. + /// - repo: HuggingFace repository to download from. + /// - directory: Optional override for the cache directory. + /// - computeUnits: CoreML compute units for model compilation. Defaults to `.all`. + /// Use `.cpuAndGPU` on iOS 26+ to work around ANE compiler regressions. + /// - progressHandler: Optional download progress callback. public static func download( variants requestedVariants: Set? = nil, from repo: String = TtsConstants.defaultRepository, directory: URL? = nil, + computeUnits: MLComputeUnits = .all, progressHandler: DownloadUtils.ProgressHandler? = nil ) async throws -> TtsModels { let targetDir = try directory ?? getCacheDirectory() @@ -46,8 +56,7 @@ public struct TtsModels: Sendable { .kokoro, modelNames: modelNames, directory: modelsDirectory, - // v2 models converted with fp16 precision schedule BERT + generator ops to ANE (1.67x speedup) - computeUnits: .all, + computeUnits: computeUnits, variant: variantFilter, progressHandler: progressHandler )