Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
feat(tts): add configurable computeUnits for Kokoro TTS models
Adds a `computeUnits` parameter (default: `.all`) to `TtsModels.download()`,
`KokoroTtsManager.init()`, and `KokoroModelCache.init()`, allowing callers
to override CoreML compute units for Kokoro model loading.

This is needed because iOS 26 introduces ANE compiler regressions that cause
Kokoro models to fail with "Cannot retrieve vector from IRValue format int32"
when loaded with `.all` (which includes the Neural Engine). Using `.cpuAndGPU`
bypasses the ANE and resolves the issue, matching the approach already used
by PocketTTS to avoid ANE float16 precision artifacts.

The default `.all` preserves existing behavior on iOS 17-18. Callers on
iOS 26+ can pass `.cpuAndGPU` to work around the ANE regression.

Example:
```swift
let manager = KokoroTtsManager(computeUnits: .cpuAndGPU)
try await manager.initialize()
```
  • Loading branch information
integrITsolutions committed Apr 4, 2026
commit 81cdff2ad0c733d33b129902b20bbfcb2c0ec2c1
21 changes: 18 additions & 3 deletions Sources/FluidAudio/TTS/Kokoro/KokoroTtsManager.swift
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import Foundation
import OSLog
@preconcurrency import CoreML

/// Manages text-to-speech synthesis using Kokoro CoreML models.
///
Expand All @@ -12,6 +13,12 @@ import OSLog
/// try await manager.initialize()
/// let audioData = try await manager.synthesize(text: "Hello, world!")
/// ```
///
/// On iOS 26+, use `.cpuAndGPU` to work around ANE compiler regressions:
/// ```swift
/// let manager = KokoroTtsManager(computeUnits: .cpuAndGPU)
/// try await manager.initialize()
/// ```
public final class KokoroTtsManager {

private let logger = AppLogger(category: "KokoroTtsManager")
Expand All @@ -22,6 +29,7 @@ public final class KokoroTtsManager {
private var isInitialized = false
private var assetsReady = false
private let directory: URL?
private let computeUnits: MLComputeUnits
private var defaultVoice: String
private var defaultSpeakerId: Int
private var ensuredVoices: Set<String> = []
Expand All @@ -36,18 +44,23 @@ public final class KokoroTtsManager {
/// - defaultSpeakerId: Default speaker ID for multi-speaker voices.
/// - directory: Optional override for the base cache directory.
/// When `nil`, uses the default platform cache location.
/// - computeUnits: CoreML compute units for model compilation. Defaults to `.all`.
/// Use `.cpuAndGPU` on iOS 26+ to work around ANE compiler regressions
/// ("Cannot retrieve vector from IRValue format int32").
/// - modelCache: Cache for loaded CoreML models.
/// - customLexicon: Optional custom pronunciation dictionary. Entries in this dictionary
/// take precedence over all built-in dictionaries and grapheme-to-phoneme conversion.
public init(
defaultVoice: String = TtsConstants.recommendedVoice,
defaultSpeakerId: Int = 0,
directory: URL? = nil,
computeUnits: MLComputeUnits = .all,
modelCache: KokoroModelCache = KokoroModelCache(),
customLexicon: TtsCustomLexicon? = nil
) {
self.directory = directory
self.modelCache = directory != nil ? KokoroModelCache(directory: directory) : modelCache
self.computeUnits = computeUnits
self.modelCache = directory != nil ? KokoroModelCache(directory: directory, computeUnits: computeUnits) : modelCache
self.lexiconAssets = LexiconAssetManager()
self.defaultVoice = Self.normalizeVoice(defaultVoice)
self.defaultSpeakerId = defaultSpeakerId
Expand All @@ -58,12 +71,14 @@ public final class KokoroTtsManager {
defaultVoice: String = TtsConstants.recommendedVoice,
defaultSpeakerId: Int = 0,
directory: URL? = nil,
computeUnits: MLComputeUnits = .all,
modelCache: KokoroModelCache = KokoroModelCache(),
lexiconAssets: LexiconAssetManager,
customLexicon: TtsCustomLexicon? = nil
) {
self.directory = directory
self.modelCache = directory != nil ? KokoroModelCache(directory: directory) : modelCache
self.computeUnits = computeUnits
self.modelCache = directory != nil ? KokoroModelCache(directory: directory, computeUnits: computeUnits) : modelCache
self.lexiconAssets = lexiconAssets
self.defaultVoice = Self.normalizeVoice(defaultVoice)
self.defaultSpeakerId = defaultSpeakerId
Expand All @@ -90,7 +105,7 @@ public final class KokoroTtsManager {
}

public func initialize(preloadVoices: Set<String>? = nil) async throws {
let models = try await TtsModels.download(directory: directory)
let models = try await TtsModels.download(directory: directory, computeUnits: computeUnits)
try await initialize(models: models, preloadVoices: preloadVoices)
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,16 @@ public actor KokoroModelCache {
private var downloadedModels: [ModelNames.TTS.Variant: MLModel] = [:]
private var referenceDimension: Int?
private let directory: URL?

/// - Parameter directory: Optional override for the base cache directory.
/// When `nil`, uses the default platform cache location.
public init(directory: URL? = nil) {
private let computeUnits: MLComputeUnits

/// - Parameters:
/// - directory: Optional override for the base cache directory.
/// When `nil`, uses the default platform cache location.
/// - computeUnits: CoreML compute units for model compilation. Defaults to `.all`.
/// Use `.cpuAndGPU` on iOS 26+ to work around ANE compiler regressions.
public init(directory: URL? = nil, computeUnits: MLComputeUnits = .all) {
self.directory = directory
self.computeUnits = computeUnits
}

public func loadModelsIfNeeded(variants: Set<ModelNames.TTS.Variant>? = nil) async throws {
Expand All @@ -32,7 +37,7 @@ public actor KokoroModelCache {

if !variantsNeedingDownload.isEmpty {
let newlyDownloaded = try await TtsModels.download(
variants: Set(variantsNeedingDownload), directory: directory)
variants: Set(variantsNeedingDownload), directory: directory, computeUnits: computeUnits)
for (variant, model) in newlyDownloaded.modelsByVariant {
downloadedModels[variant] = model
}
Expand Down
13 changes: 11 additions & 2 deletions Sources/FluidAudio/TTS/TtsModels.swift
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,20 @@ public struct TtsModels: Sendable {
kokoroModels[variant]
}

/// Downloads and compiles Kokoro CoreML models.
///
/// - Parameters:
/// - requestedVariants: Which model variants to download. Pass `nil` for all.
/// - repo: HuggingFace repository to download from.
/// - directory: Optional override for the cache directory.
/// - computeUnits: CoreML compute units for model compilation. Defaults to `.all`.
/// Use `.cpuAndGPU` on iOS 26+ to work around ANE compiler regressions.
/// - progressHandler: Optional download progress callback.
public static func download(
variants requestedVariants: Set<ModelNames.TTS.Variant>? = nil,
from repo: String = TtsConstants.defaultRepository,
directory: URL? = nil,
computeUnits: MLComputeUnits = .all,
progressHandler: DownloadUtils.ProgressHandler? = nil
) async throws -> TtsModels {
let targetDir = try directory ?? getCacheDirectory()
Expand All @@ -46,8 +56,7 @@ public struct TtsModels: Sendable {
.kokoro,
modelNames: modelNames,
directory: modelsDirectory,
// v2 models converted with fp16 precision schedule BERT + generator ops to ANE (1.67x speedup)
computeUnits: .all,
computeUnits: computeUnits,
variant: variantFilter,
progressHandler: progressHandler
)
Expand Down
Loading