Skip to content

Commit bec2e3e

Browse files
committed
feat(parakeet): add version-aware v2/v3 download/load/delete with FluidAudio 0.6.1; fix false-success download for v2 by verifying loaded model and cache; autoload selected Parakeet model on startup
Why: ensure English-only v2 and multilingual v3 are selectable and managed precisely, and avoid misleading success toasts when the wrong version is loaded.
1 parent 2f54a53 commit bec2e3e

File tree

9 files changed

+417
-173
lines changed

9 files changed

+417
-173
lines changed

sidecar/parakeet-swift/Package.resolved

Lines changed: 3 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

sidecar/parakeet-swift/Package.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ let package = Package(
99
.macOS(.v13) // FluidAudio requires macOS 13.0+
1010
],
1111
dependencies: [
12-
.package(url: "https://github.com/FluidInference/FluidAudio.git", from: "0.5.2")
12+
.package(url: "https://github.com/FluidInference/FluidAudio.git", from: "0.6.1")
1313
],
1414
targets: [
1515
.executableTarget(

sidecar/parakeet-swift/Sources/main.swift

Lines changed: 119 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,6 @@ func log(_ message: String) {
88
}
99

1010
// JSON message structures for communication with Tauri
11-
struct TranscribeRequest: Codable {
12-
let type: String
13-
let audio_path: String
14-
}
15-
1611
struct TranscriptionResponse: Codable {
1712
let type: String = "transcription"
1813
let text: String
@@ -34,12 +29,11 @@ struct Segment: Codable {
3429

3530
struct StatusResponse: Codable {
3631
let type: String = "status"
37-
let loadedModel: String? // camelCase to match Rust's expectation
32+
let loadedModel: String?
33+
let modelVersion: String?
3834
let modelPath: String? = nil
3935
let precision: String? = nil
4036
let attention: String? = nil
41-
42-
// No CodingKeys needed - Swift's camelCase matches Rust's camelCase
4337
}
4438

4539
struct ErrorResponse: Codable {
@@ -49,10 +43,34 @@ struct ErrorResponse: Codable {
4943
let details: [String: String]? = nil // Optional details field to match Rust
5044
}
5145

52-
// Global ASR manager
46+
enum SupportedModelVersion: String, CaseIterable {
47+
case v2
48+
case v3
49+
50+
var asrVersion: AsrModelVersion {
51+
switch self {
52+
case .v2: return .v2
53+
case .v3: return .v3
54+
}
55+
}
56+
57+
var modelIdentifier: String {
58+
switch self {
59+
case .v2: return "parakeet-tdt-0.6b-v2"
60+
case .v3: return "parakeet-tdt-0.6b-v3"
61+
}
62+
}
63+
64+
var repoFolderName: String {
65+
"\(modelIdentifier)-coreml"
66+
}
67+
}
68+
69+
// Global ASR manager state
5370
var asrManager: AsrManager?
5471
var isModelLoaded = false
55-
var isModelDownloaded = false
72+
var loadedModelVersion: SupportedModelVersion?
73+
var downloadedVersions = Set<SupportedModelVersion>()
5674

5775
@main
5876
struct ParakeetSidecar {
@@ -66,6 +84,7 @@ struct ParakeetSidecar {
6684
if CommandLine.arguments.count > 1 {
6785
// Direct file mode for testing
6886
let audioPath = CommandLine.arguments[1]
87+
await loadModel(version: .v3, forceDownload: true, emitStatus: false, encoder: encoder)
6988
await transcribeFile(audioPath, language: nil, translateToEnglish: false, encoder: encoder)
7089
} else {
7190
// JSON communication mode for Tauri
@@ -91,20 +110,26 @@ struct ParakeetSidecar {
91110

92111
switch json["type"] as? String {
93112
case "load_model", "download_model":
94-
// Handle both load_model and download_model commands
95-
// This allows the UI's Download button to trigger model download
96-
let modelId = json["model_id"] as? String
97-
await loadModel(modelId: modelId, encoder: encoder)
113+
let version = parseModelVersion(json["model_version"], fallbackModelId: json["model_id"] as? String)
114+
let forceDownload: Bool
115+
if let explicit = json["force_download"] as? Bool {
116+
forceDownload = explicit
117+
} else {
118+
forceDownload = (json["type"] as? String) == "download_model"
119+
}
120+
await loadModel(version: version, forceDownload: forceDownload, encoder: encoder)
98121

99122
case "unload_model":
100123
unloadModel()
101-
sendResponse(StatusResponse(loadedModel: nil), encoder: encoder)
124+
sendResponse(StatusResponse(loadedModel: nil, modelVersion: nil), encoder: encoder)
102125

103126
case "delete_model":
104-
// Delete the actual model files from FluidAudio cache
105-
deleteModelFiles()
106-
unloadModel() // Also unload from memory
107-
sendResponse(StatusResponse(loadedModel: nil), encoder: encoder)
127+
let version = parseModelVersion(json["model_version"], fallbackModelId: json["model_id"] as? String)
128+
deleteModelFiles(for: version)
129+
if loadedModelVersion == version {
130+
unloadModel()
131+
}
132+
sendResponse(StatusResponse(loadedModel: loadedModelVersion?.modelIdentifier, modelVersion: loadedModelVersion?.rawValue), encoder: encoder)
108133

109134
case "transcribe":
110135
if let audioPath = json["audio_path"] as? String {
@@ -117,9 +142,13 @@ struct ParakeetSidecar {
117142
}
118143

119144
case "status":
120-
sendResponse(StatusResponse(
121-
loadedModel: isModelLoaded ? "parakeet-tdt-0.6b-v3" : nil
122-
), encoder: encoder)
145+
sendResponse(
146+
StatusResponse(
147+
loadedModel: loadedModelVersion?.modelIdentifier,
148+
modelVersion: loadedModelVersion?.rawValue
149+
),
150+
encoder: encoder
151+
)
123152

124153
case "shutdown":
125154
unloadModel()
@@ -134,39 +163,45 @@ struct ParakeetSidecar {
134163
}
135164
}
136165

137-
static func loadModel(modelId: String? = nil, encoder: JSONEncoder) async {
138-
// Use provided model_id or default to parakeet-tdt-0.6b-v3
139-
let actualModelId = modelId ?? "parakeet-tdt-0.6b-v3"
140-
141-
// If already loaded with same model, just return success
142-
if isModelLoaded {
143-
log("⚡ Model already loaded: \(actualModelId)")
144-
sendResponse(StatusResponse(loadedModel: actualModelId), encoder: encoder)
166+
static func loadModel(version: SupportedModelVersion = .v3, forceDownload: Bool = false, emitStatus: Bool = true, encoder: JSONEncoder) async {
167+
if isModelLoaded, let loadedVersion = loadedModelVersion, loadedVersion == version {
168+
log("⚡ Model already loaded: \(loadedVersion.modelIdentifier)")
169+
if emitStatus {
170+
sendResponse(StatusResponse(loadedModel: loadedVersion.modelIdentifier, modelVersion: loadedVersion.rawValue), encoder: encoder)
171+
}
145172
return
146173
}
147174

148175
do {
149-
// Download models NOW when user clicks Download button
150-
// This ensures user has control over when download happens
151-
log("📥 Starting Parakeet model download via FluidAudio...")
152-
log("📦 Model: \(actualModelId)")
153-
log("🌐 This will download ~500MB from FluidAudio servers...")
154-
log("⏳ Please wait, this may take 2-5 minutes depending on your connection...")
155-
156-
let models = try await AsrModels.downloadAndLoad()
157-
158-
log("✅ Model downloaded successfully!")
159-
log("🔧 Initializing ASR manager...")
160-
isModelDownloaded = true
161-
162-
// Initialize ASR manager with downloaded models
176+
let models: AsrModels
177+
178+
if forceDownload {
179+
log("📥 Force-downloading Parakeet \(version.rawValue.uppercased()) via FluidAudio...")
180+
log("🌐 This will download ~500MB. Please wait...")
181+
models = try await AsrModels.downloadAndLoad(version: version.asrVersion)
182+
downloadedVersions.insert(version)
183+
} else {
184+
log("🔍 Attempting to load Parakeet \(version.rawValue.uppercased()) from cache...")
185+
do {
186+
models = try await AsrModels.loadFromCache(version: version.asrVersion)
187+
downloadedVersions.insert(version)
188+
log("✅ Loaded Parakeet \(version.rawValue.uppercased()) from cache")
189+
} catch {
190+
log("❌ Failed to load from cache: \(error)")
191+
sendError("model_not_downloaded", message: "Parakeet \(version.rawValue.uppercased()) is not downloaded. Please download it first.", encoder: encoder)
192+
return
193+
}
194+
}
195+
163196
let manager = AsrManager(config: .default)
164197
try await manager.initialize(models: models)
165198
asrManager = manager
166199

167-
log("✅ ASR manager initialized, model ready for use!")
168200
isModelLoaded = true
169-
sendResponse(StatusResponse(loadedModel: actualModelId), encoder: encoder)
201+
loadedModelVersion = version
202+
if emitStatus {
203+
sendResponse(StatusResponse(loadedModel: version.modelIdentifier, modelVersion: version.rawValue), encoder: encoder)
204+
}
170205
} catch {
171206
log("❌ Failed to load model: \(error)")
172207
sendError("model_load_error", message: "Failed to load model: \(error)", encoder: encoder)
@@ -177,24 +212,28 @@ struct ParakeetSidecar {
177212
asrManager?.cleanup()
178213
asrManager = nil
179214
isModelLoaded = false
215+
loadedModelVersion = nil
180216
}
181217

182-
static func deleteModelFiles() {
183-
// FluidAudio stores models in ~/Library/Application Support/
184-
// We need to delete the actual model files
218+
static func deleteModelFiles(for version: SupportedModelVersion) {
185219
let fileManager = FileManager.default
186220

187-
// Possible locations where FluidAudio might store models
188-
let appSupportPaths = [
189-
fileManager.homeDirectoryForCurrentUser
190-
.appendingPathComponent("Library/Application Support/FluidAudio"),
191-
fileManager.homeDirectoryForCurrentUser
192-
.appendingPathComponent("Library/Application Support/parakeet-tdt-0.6b-v3-coreml"),
193-
fileManager.homeDirectoryForCurrentUser
194-
.appendingPathComponent("Library/Caches/FluidAudio")
221+
let home = fileManager.homeDirectoryForCurrentUser
222+
let repoFolder = version.repoFolderName
223+
224+
let targets: [URL] = [
225+
home
226+
.appendingPathComponent("Library/Application Support/FluidAudio/Models", isDirectory: true)
227+
.appendingPathComponent(repoFolder, isDirectory: true),
228+
home
229+
.appendingPathComponent("Library/Application Support", isDirectory: true)
230+
.appendingPathComponent(repoFolder, isDirectory: true),
231+
home
232+
.appendingPathComponent("Library/Caches/FluidAudio", isDirectory: true)
233+
.appendingPathComponent(repoFolder, isDirectory: true)
195234
]
196235

197-
for path in appSupportPaths {
236+
for path in targets {
198237
if fileManager.fileExists(atPath: path.path) {
199238
do {
200239
try fileManager.removeItem(at: path)
@@ -205,8 +244,7 @@ struct ParakeetSidecar {
205244
}
206245
}
207246

208-
// Mark as not downloaded after deletion
209-
isModelDownloaded = false
247+
downloadedVersions.remove(version)
210248
}
211249

212250
static func transcribeFile(_ audioPath: String, language: String? = nil, translateToEnglish: Bool = false, encoder: JSONEncoder) async {
@@ -236,8 +274,8 @@ struct ParakeetSidecar {
236274
// Send transcription response
237275
let response = TranscriptionResponse(
238276
text: result.text,
239-
segments: [], // FluidAudio doesn't provide segments
240-
language: language, // Pass through the language if provided
277+
segments: [],
278+
language: language,
241279
duration: Float(result.duration)
242280
)
243281
sendResponse(response, encoder: encoder)
@@ -263,5 +301,24 @@ struct ParakeetSidecar {
263301
static func sendError(_ code: String, message: String, encoder: JSONEncoder) {
264302
sendResponse(ErrorResponse(code: code, message: message), encoder: encoder)
265303
}
304+
305+
static func parseModelVersion(_ value: Any?) -> SupportedModelVersion {
306+
if let str = (value as? String)?.lowercased(), str == "v2" {
307+
return .v2
308+
}
309+
return .v3
310+
}
311+
312+
static func parseModelVersion(_ value: Any?, fallbackModelId: String?) -> SupportedModelVersion {
313+
if let value = value {
314+
return parseModelVersion(value)
315+
}
316+
317+
if let modelId = fallbackModelId?.lowercased(), modelId.contains("-v2") {
318+
return .v2
319+
}
320+
321+
return .v3
322+
}
266323
}
267324

0 commit comments

Comments
 (0)