Skip to content

Commit 97e9fad

Browse files
committed
fix(audio): prevent empty or ultra-short recordings from causing errors; add mode-specific min durations (PTT≥1s, Toggle≥3s); auto-recover from Error→Idle and gate start transitions
- Rust: add early empty-WAV guard, Error→Idle recovery, and state gating - Rust: compute PTT/Toggle thresholds once and use accurate pill messages - Swift: make response types Encodable-only to silence decoding warnings - Cleanups: remove unused imports/vars; minor normalizer refactor
1 parent f1e5c02 commit 97e9fad

File tree

5 files changed

+58
-14
lines changed

5 files changed

+58
-14
lines changed

sidecar/parakeet-swift/Sources/main.swift

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ func log(_ message: String) {
88
}
99

1010
// JSON message structures for communication with Tauri
11-
struct TranscriptionResponse: Codable {
11+
struct TranscriptionResponse: Encodable {
1212
let type: String = "transcription"
1313
let text: String
1414
let segments: [Segment]
@@ -23,11 +23,11 @@ struct TranscriptionResponse: Codable {
2323
}
2424
}
2525

26-
struct Segment: Codable {
26+
struct Segment: Encodable {
2727
let text: String
2828
}
2929

30-
struct StatusResponse: Codable {
30+
struct StatusResponse: Encodable {
3131
let type: String = "status"
3232
let loadedModel: String?
3333
let modelVersion: String?
@@ -36,7 +36,7 @@ struct StatusResponse: Codable {
3636
let attention: String? = nil
3737
}
3838

39-
struct ErrorResponse: Codable {
39+
struct ErrorResponse: Encodable {
4040
let type: String = "error"
4141
let code: String
4242
let message: String
@@ -285,7 +285,7 @@ struct ParakeetSidecar {
285285
}
286286
}
287287

288-
static func sendResponse<T: Codable>(_ response: T, encoder: JSONEncoder) {
288+
static func sendResponse<T: Encodable>(_ response: T, encoder: JSONEncoder) {
289289
do {
290290
let data = try encoder.encode(response)
291291
if let jsonString = String(data: data, encoding: .utf8) {

src-tauri/src/audio/normalizer.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ pub fn normalize_to_whisper_wav(input_wav: &Path, out_dir: &Path) -> Result<Path
4545
return Err("WAV contains no samples".to_string());
4646
}
4747

48-
let mut samples_f32: Vec<f32> = samples_i16
48+
let samples_f32: Vec<f32> = samples_i16
4949
.iter()
5050
.map(|&s| s as f32 / i16::MAX as f32)
5151
.collect();
@@ -69,7 +69,7 @@ pub fn normalize_to_whisper_wav(input_wav: &Path, out_dir: &Path) -> Result<Path
6969
.iter()
7070
.fold(0.0f32, |m, &x| m.max(x.abs()));
7171
let gain = if peak > 0.0 { (TARGET_PEAK / peak).min(10.0) } else { 1.0 };
72-
let mut normalized: Vec<f32> = if (gain - 1.0).abs() > 1e-3 {
72+
let normalized: Vec<f32> = if (gain - 1.0).abs() > 1e-3 {
7373
resampled.iter().map(|&x| (x * gain).clamp(-1.0, 1.0)).collect()
7474
} else {
7575
resampled

src-tauri/src/commands/audio.rs

Lines changed: 50 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ use crate::utils::system_monitor;
1212
use crate::whisper::cache::TranscriberCache;
1313
use crate::whisper::languages::validate_language;
1414
use crate::whisper::manager::WhisperManager;
15-
use crate::{emit_to_window, update_recording_state, AppState, RecordingState};
15+
use crate::{emit_to_window, update_recording_state, AppState, RecordingMode, RecordingState};
1616
use cpal::traits::{DeviceTrait, HostTrait};
1717
use serde_json;
1818
use std::panic::{RefUnwindSafe, UnwindSafe};
@@ -434,6 +434,12 @@ pub async fn start_recording(
434434
],
435435
);
436436

437+
// If we're stuck in Error, recover to Idle before attempting a new start
438+
let current_state = crate::get_recording_state(&app);
439+
if matches!(current_state, crate::RecordingState::Error) {
440+
crate::update_recording_state(&app, crate::RecordingState::Idle, Some("recover".to_string()));
441+
}
442+
437443
// Validate all requirements upfront
438444
let validation_start = Instant::now();
439445
match validate_recording_requirements(&app).await {
@@ -464,6 +470,10 @@ pub async fn start_recording(
464470
// All validation passed, update state to starting
465471
log_state_transition("RECORDING", "idle", "starting", true, None);
466472
update_recording_state(&app, RecordingState::Starting, None);
473+
// Ensure transition actually happened; if blocked, abort early
474+
if !matches!(crate::get_recording_state(&app), crate::RecordingState::Starting) {
475+
return Err("Cannot start recording in current state".to_string());
476+
}
467477

468478
// Load recording config once to avoid repeated store access
469479
let config = get_recording_config(&app).await.map_err(|e| {
@@ -923,6 +933,22 @@ pub async fn stop_recording(
923933
}
924934
};
925935

936+
// Fast-path: handle header-only/empty WAV files before normalization
937+
if let Ok(meta) = std::fs::metadata(&audio_path) {
938+
// A valid WAV header is typically 44 bytes; <= 44 implies no audio samples were written
939+
if meta.len() <= 44 {
940+
let _ = emit_to_window(&app, "pill", "recording-too-short", "No audio captured");
941+
if let Err(e) = std::fs::remove_file(&audio_path) {
942+
log::debug!("Failed to remove empty audio file: {}", e);
943+
}
944+
if let Err(e) = crate::commands::window::hide_pill_widget(app.clone()).await {
945+
log::error!("Failed to hide pill window: {}", e);
946+
}
947+
update_recording_state(&app, RecordingState::Idle, None);
948+
return Ok("".to_string());
949+
}
950+
}
951+
926952

927953
// Normalize captured audio to Whisper contract (WAV PCM s16, mono, 16k)
928954
let parent_dir = audio_path
@@ -946,7 +972,22 @@ pub async fn stop_recording(
946972
log::debug!("Failed to remove raw audio: {}", e);
947973
}
948974

949-
// Duration gate (min 5s) using normalized file
975+
// Determine min duration based on recording mode (PTT vs Toggle) once
976+
let (min_duration_s_f32, min_duration_s_i32) = {
977+
let app_state = app.state::<AppState>();
978+
let mode = app_state
979+
.recording_mode
980+
.lock()
981+
.ok()
982+
.map(|g| *g)
983+
.unwrap_or(RecordingMode::Toggle);
984+
match mode {
985+
RecordingMode::PushToTalk => (1.0f32, 1i32),
986+
RecordingMode::Toggle => (3.0f32, 3i32),
987+
}
988+
};
989+
990+
// Duration gate (mode-specific) using normalized file
950991
let too_short = (|| -> Result<bool, String> {
951992
let reader = hound::WavReader::open(&normalized_path)
952993
.map_err(|e| format!("Failed to open normalized wav: {}", e))?;
@@ -960,12 +1001,17 @@ pub async fn stop_recording(
9601001
("bits", &spec.bits_per_sample.to_string().as_str()),
9611002
("duration_s", &format!("{:.2}", duration).as_str()),
9621003
]);
963-
Ok(duration < 5.0)
1004+
Ok(duration < min_duration_s_f32)
9641005
})();
9651006

9661007
if let Ok(true) = too_short {
9671008
// Emit friendly feedback and stop here
968-
let _ = emit_to_window(&app, "pill", "recording-too-short", "Recording shorter than 5 seconds");
1009+
let _ = emit_to_window(
1010+
&app,
1011+
"pill",
1012+
"recording-too-short",
1013+
format!("Recording shorter than {} seconds", min_duration_s_i32),
1014+
);
9691015
if let Err(e) = std::fs::remove_file(&normalized_path) {
9701016
log::debug!("Failed to remove short normalized audio: {}", e);
9711017
}

src-tauri/src/commands/license.rs

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,7 @@ use crate::license::{api_client::LicenseApiClient, device, keychain, LicenseStat
22
use crate::AppState;
33
use chrono::{DateTime, Duration, Utc};
44
use serde::{Deserialize, Serialize};
5-
use std::collections::HashMap;
65
use std::panic::{RefUnwindSafe, UnwindSafe};
7-
use std::sync::Arc;
86
use std::time::Instant;
97
use tauri::{AppHandle, Manager};
108
use tauri_plugin_cache::{CacheExt, SetItemOptions};

src-tauri/src/parakeet/manager.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,7 @@ impl ParakeetManager {
137137
&self,
138138
app: &AppHandle,
139139
model_name: &str,
140-
cancel_flag: Option<Arc<AtomicBool>>,
140+
_cancel_flag: Option<Arc<AtomicBool>>,
141141
progress_callback: impl Fn(u64, u64) + Send + 'static,
142142
) -> Result<(), String> {
143143
let Some(definition) = self.get_model_definition(model_name) else {

0 commit comments

Comments
 (0)