@@ -8,11 +8,6 @@ func log(_ message: String) {
88}
99
1010// JSON message structures for communication with Tauri
11- struct TranscribeRequest : Codable {
12- let type : String
13- let audio_path : String
14- }
15-
1611struct TranscriptionResponse : Codable {
1712 let type : String = " transcription "
1813 let text : String
@@ -34,12 +29,11 @@ struct Segment: Codable {
3429
3530struct StatusResponse : Codable {
3631 let type : String = " status "
37- let loadedModel : String ? // camelCase to match Rust's expectation
32+ let loadedModel : String ?
33+ let modelVersion : String ?
3834 let modelPath : String ? = nil
3935 let precision : String ? = nil
4036 let attention : String ? = nil
41-
42- // No CodingKeys needed - Swift's camelCase matches Rust's camelCase
4337}
4438
4539struct ErrorResponse : Codable {
@@ -49,10 +43,34 @@ struct ErrorResponse: Codable {
4943 let details : [ String : String ] ? = nil // Optional details field to match Rust
5044}
5145
52- // Global ASR manager
46+ enum SupportedModelVersion : String , CaseIterable {
47+ case v2
48+ case v3
49+
50+ var asrVersion : AsrModelVersion {
51+ switch self {
52+ case . v2: return . v2
53+ case . v3: return . v3
54+ }
55+ }
56+
57+ var modelIdentifier : String {
58+ switch self {
59+ case . v2: return " parakeet-tdt-0.6b-v2 "
60+ case . v3: return " parakeet-tdt-0.6b-v3 "
61+ }
62+ }
63+
64+ var repoFolderName : String {
65+ " \( modelIdentifier) -coreml "
66+ }
67+ }
68+
69+ // Global ASR manager state
5370var asrManager : AsrManager ?
5471var isModelLoaded = false
55- var isModelDownloaded = false
72+ var loadedModelVersion : SupportedModelVersion ?
73+ var downloadedVersions = Set < SupportedModelVersion > ( )
5674
5775@main
5876struct ParakeetSidecar {
@@ -66,6 +84,7 @@ struct ParakeetSidecar {
6684 if CommandLine . arguments. count > 1 {
6785 // Direct file mode for testing
6886 let audioPath = CommandLine . arguments [ 1 ]
87+ await loadModel ( version: . v3, forceDownload: true , emitStatus: false , encoder: encoder)
6988 await transcribeFile ( audioPath, language: nil , translateToEnglish: false , encoder: encoder)
7089 } else {
7190 // JSON communication mode for Tauri
@@ -91,20 +110,26 @@ struct ParakeetSidecar {
91110
92111 switch json [ " type " ] as? String {
93112 case " load_model " , " download_model " :
94- // Handle both load_model and download_model commands
95- // This allows the UI's Download button to trigger model download
96- let modelId = json [ " model_id " ] as? String
97- await loadModel ( modelId: modelId, encoder: encoder)
113+ let version = parseModelVersion ( json [ " model_version " ] , fallbackModelId: json [ " model_id " ] as? String )
114+ let forceDownload : Bool
115+ if let explicit = json [ " force_download " ] as? Bool {
116+ forceDownload = explicit
117+ } else {
118+ forceDownload = ( json [ " type " ] as? String ) == " download_model "
119+ }
120+ await loadModel ( version: version, forceDownload: forceDownload, encoder: encoder)
98121
99122 case " unload_model " :
100123 unloadModel ( )
101- sendResponse ( StatusResponse ( loadedModel: nil ) , encoder: encoder)
124+ sendResponse ( StatusResponse ( loadedModel: nil , modelVersion : nil ) , encoder: encoder)
102125
103126 case " delete_model " :
104- // Delete the actual model files from FluidAudio cache
105- deleteModelFiles ( )
106- unloadModel ( ) // Also unload from memory
107- sendResponse ( StatusResponse ( loadedModel: nil ) , encoder: encoder)
127+ let version = parseModelVersion ( json [ " model_version " ] , fallbackModelId: json [ " model_id " ] as? String )
128+ deleteModelFiles ( for: version)
129+ if loadedModelVersion == version {
130+ unloadModel ( )
131+ }
132+ sendResponse ( StatusResponse ( loadedModel: loadedModelVersion? . modelIdentifier, modelVersion: loadedModelVersion? . rawValue) , encoder: encoder)
108133
109134 case " transcribe " :
110135 if let audioPath = json [ " audio_path " ] as? String {
@@ -117,9 +142,13 @@ struct ParakeetSidecar {
117142 }
118143
119144 case " status " :
120- sendResponse ( StatusResponse (
121- loadedModel: isModelLoaded ? " parakeet-tdt-0.6b-v3 " : nil
122- ) , encoder: encoder)
145+ sendResponse (
146+ StatusResponse (
147+ loadedModel: loadedModelVersion? . modelIdentifier,
148+ modelVersion: loadedModelVersion? . rawValue
149+ ) ,
150+ encoder: encoder
151+ )
123152
124153 case " shutdown " :
125154 unloadModel ( )
@@ -134,39 +163,45 @@ struct ParakeetSidecar {
134163 }
135164 }
136165
137- static func loadModel( modelId: String ? = nil , encoder: JSONEncoder ) async {
138- // Use provided model_id or default to parakeet-tdt-0.6b-v3
139- let actualModelId = modelId ?? " parakeet-tdt-0.6b-v3 "
140-
141- // If already loaded with same model, just return success
142- if isModelLoaded {
143- log ( " ⚡ Model already loaded: \( actualModelId) " )
144- sendResponse ( StatusResponse ( loadedModel: actualModelId) , encoder: encoder)
166+ static func loadModel( version: SupportedModelVersion = . v3, forceDownload: Bool = false , emitStatus: Bool = true , encoder: JSONEncoder ) async {
167+ if isModelLoaded, let loadedVersion = loadedModelVersion, loadedVersion == version {
168+ log ( " ⚡ Model already loaded: \( loadedVersion. modelIdentifier) " )
169+ if emitStatus {
170+ sendResponse ( StatusResponse ( loadedModel: loadedVersion. modelIdentifier, modelVersion: loadedVersion. rawValue) , encoder: encoder)
171+ }
145172 return
146173 }
147174
148175 do {
149- // Download models NOW when user clicks Download button
150- // This ensures user has control over when download happens
151- log ( " 📥 Starting Parakeet model download via FluidAudio... " )
152- log ( " 📦 Model: \( actualModelId) " )
153- log ( " 🌐 This will download ~500MB from FluidAudio servers... " )
154- log ( " ⏳ Please wait, this may take 2-5 minutes depending on your connection... " )
155-
156- let models = try await AsrModels . downloadAndLoad ( )
157-
158- log ( " ✅ Model downloaded successfully! " )
159- log ( " 🔧 Initializing ASR manager... " )
160- isModelDownloaded = true
161-
162- // Initialize ASR manager with downloaded models
176+ let models : AsrModels
177+
178+ if forceDownload {
179+ log ( " 📥 Force-downloading Parakeet \( version. rawValue. uppercased ( ) ) via FluidAudio... " )
180+ log ( " 🌐 This will download ~500MB. Please wait... " )
181+ models = try await AsrModels . downloadAndLoad ( version: version. asrVersion)
182+ downloadedVersions. insert ( version)
183+ } else {
184+ log ( " 🔍 Attempting to load Parakeet \( version. rawValue. uppercased ( ) ) from cache... " )
185+ do {
186+ models = try await AsrModels . loadFromCache ( version: version. asrVersion)
187+ downloadedVersions. insert ( version)
188+ log ( " ✅ Loaded Parakeet \( version. rawValue. uppercased ( ) ) from cache " )
189+ } catch {
190+ log ( " ❌ Failed to load from cache: \( error) " )
191+ sendError ( " model_not_downloaded " , message: " Parakeet \( version. rawValue. uppercased ( ) ) is not downloaded. Please download it first. " , encoder: encoder)
192+ return
193+ }
194+ }
195+
163196 let manager = AsrManager ( config: . default)
164197 try await manager. initialize ( models: models)
165198 asrManager = manager
166199
167- log ( " ✅ ASR manager initialized, model ready for use! " )
168200 isModelLoaded = true
169- sendResponse ( StatusResponse ( loadedModel: actualModelId) , encoder: encoder)
201+ loadedModelVersion = version
202+ if emitStatus {
203+ sendResponse ( StatusResponse ( loadedModel: version. modelIdentifier, modelVersion: version. rawValue) , encoder: encoder)
204+ }
170205 } catch {
171206 log ( " ❌ Failed to load model: \( error) " )
172207 sendError ( " model_load_error " , message: " Failed to load model: \( error) " , encoder: encoder)
@@ -177,24 +212,28 @@ struct ParakeetSidecar {
177212 asrManager? . cleanup ( )
178213 asrManager = nil
179214 isModelLoaded = false
215+ loadedModelVersion = nil
180216 }
181217
182- static func deleteModelFiles( ) {
183- // FluidAudio stores models in ~/Library/Application Support/
184- // We need to delete the actual model files
218+ static func deleteModelFiles( for version: SupportedModelVersion ) {
185219 let fileManager = FileManager . default
186220
187- // Possible locations where FluidAudio might store models
188- let appSupportPaths = [
189- fileManager. homeDirectoryForCurrentUser
190- . appendingPathComponent ( " Library/Application Support/FluidAudio " ) ,
191- fileManager. homeDirectoryForCurrentUser
192- . appendingPathComponent ( " Library/Application Support/parakeet-tdt-0.6b-v3-coreml " ) ,
193- fileManager. homeDirectoryForCurrentUser
194- . appendingPathComponent ( " Library/Caches/FluidAudio " )
221+ let home = fileManager. homeDirectoryForCurrentUser
222+ let repoFolder = version. repoFolderName
223+
224+ let targets : [ URL ] = [
225+ home
226+ . appendingPathComponent ( " Library/Application Support/FluidAudio/Models " , isDirectory: true )
227+ . appendingPathComponent ( repoFolder, isDirectory: true ) ,
228+ home
229+ . appendingPathComponent ( " Library/Application Support " , isDirectory: true )
230+ . appendingPathComponent ( repoFolder, isDirectory: true ) ,
231+ home
232+ . appendingPathComponent ( " Library/Caches/FluidAudio " , isDirectory: true )
233+ . appendingPathComponent ( repoFolder, isDirectory: true )
195234 ]
196235
197- for path in appSupportPaths {
236+ for path in targets {
198237 if fileManager. fileExists ( atPath: path. path) {
199238 do {
200239 try fileManager. removeItem ( at: path)
@@ -205,8 +244,7 @@ struct ParakeetSidecar {
205244 }
206245 }
207246
208- // Mark as not downloaded after deletion
209- isModelDownloaded = false
247+ downloadedVersions. remove ( version)
210248 }
211249
212250 static func transcribeFile( _ audioPath: String , language: String ? = nil , translateToEnglish: Bool = false , encoder: JSONEncoder ) async {
@@ -236,8 +274,8 @@ struct ParakeetSidecar {
236274 // Send transcription response
237275 let response = TranscriptionResponse (
238276 text: result. text,
239- segments: [ ] , // FluidAudio doesn't provide segments
240- language: language, // Pass through the language if provided
277+ segments: [ ] ,
278+ language: language,
241279 duration: Float ( result. duration)
242280 )
243281 sendResponse ( response, encoder: encoder)
@@ -263,5 +301,24 @@ struct ParakeetSidecar {
263301 static func sendError( _ code: String , message: String , encoder: JSONEncoder ) {
264302 sendResponse ( ErrorResponse ( code: code, message: message) , encoder: encoder)
265303 }
304+
305+ static func parseModelVersion( _ value: Any ? ) -> SupportedModelVersion {
306+ if let str = ( value as? String ) ? . lowercased ( ) , str == " v2 " {
307+ return . v2
308+ }
309+ return . v3
310+ }
311+
312+ static func parseModelVersion( _ value: Any ? , fallbackModelId: String ? ) -> SupportedModelVersion {
313+ if let value = value {
314+ return parseModelVersion ( value)
315+ }
316+
317+ if let modelId = fallbackModelId? . lowercased ( ) , modelId. contains ( " -v2 " ) {
318+ return . v2
319+ }
320+
321+ return . v3
322+ }
266323}
267324
0 commit comments