Skip to content

Commit 54e2a0e

Browse files
committed
fix: Add support for multi-channel audio recording (4+ channels)
- Convert 4+ channel audio to mono by averaging all channels - Fixes recording with professional audio devices (Realtek arrays, etc) - Maintains compatibility with existing mono/stereo recordings - Uses same averaging algorithm as FFmpeg/industry standard - Fixes: Unsupported channel count: 4 error Users can now record with any audio device regardless of channel count.
1 parent 7f7bef8 commit 54e2a0e

File tree

7 files changed

+159
-35
lines changed

7 files changed

+159
-35
lines changed

.claude/settings.local.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,8 @@
1616
"WebSearch",
1717
"Bash(gh pr diff:*)",
1818
"Read(//Users/moinulmoin/Developer/oss/voicetypr-streaming/**)",
19-
"mcp__sequential-thinking__sequentialthinking"
19+
"mcp__sequential-thinking__sequentialthinking",
20+
"Bash(pnpm audit:*)"
2021
],
2122
"deny": [],
2223
"defaultMode": "acceptEdits"

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@
6868
"release-it": "^19.0.3",
6969
"tw-animate-css": "^1.3.5",
7070
"typescript": "~5.6.2",
71-
"vite": "^6.0.3",
71+
"vite": "^6.3.6",
7272
"vitest": "^3.2.4"
7373
}
7474
}

pnpm-lock.yaml

Lines changed: 23 additions & 17 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src-tauri/Cargo.lock

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src-tauri/src/whisper/transcriber.rs

Lines changed: 97 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -312,12 +312,17 @@ impl Transcriber {
312312
}
313313

314314
/* ----------------------------------------------
315-
3) stereo → mono (Whisper needs mono)
315+
3) multi-channel → mono (Whisper needs mono)
316316
---------------------------------------------- */
317317
if spec.channels == 2 {
318+
// Use the built-in stereo to mono conversion
318319
audio = convert_stereo_to_mono_audio(&audio).map_err(|e| e.to_string())?;
320+
} else if spec.channels > 2 {
321+
// Handle multi-channel audio (3, 4, 5.1, 7.1, etc.)
322+
log::info!("[TRANSCRIPTION_DEBUG] Converting {}-channel audio to mono", spec.channels);
323+
audio = convert_multichannel_to_mono(&audio, spec.channels as usize)?;
319324
} else if spec.channels != 1 {
320-
return Err(format!("Unsupported channel count: {}", spec.channels));
325+
return Err(format!("Invalid channel count: {}", spec.channels));
321326
}
322327

323328
// Store original audio length before the move
@@ -559,3 +564,93 @@ impl Transcriber {
559564
Ok(result)
560565
}
561566
}
567+
568+
/// Convert multi-channel audio to mono by averaging all channels
569+
///
570+
/// # Arguments
571+
/// * `audio` - Interleaved audio samples (ch1, ch2, ch3, ch4, ch1, ch2, ...)
572+
/// * `channels` - Number of channels in the audio
573+
///
574+
/// # Returns
575+
/// Mono audio with averaged samples from all channels
576+
fn convert_multichannel_to_mono(audio: &[f32], channels: usize) -> Result<Vec<f32>, String> {
577+
if channels == 0 {
578+
return Err("Channel count cannot be zero".to_string());
579+
}
580+
581+
if channels == 1 {
582+
// Already mono, just return a copy
583+
return Ok(audio.to_vec());
584+
}
585+
586+
let samples_per_channel = audio.len() / channels;
587+
let mut mono_audio = Vec::with_capacity(samples_per_channel);
588+
589+
// Process each frame (set of samples across all channels)
590+
for i in 0..samples_per_channel {
591+
let mut sum = 0.0f32;
592+
593+
// Sum all channels for this sample position
594+
for ch in 0..channels {
595+
let idx = i * channels + ch;
596+
if idx < audio.len() {
597+
sum += audio[idx];
598+
}
599+
}
600+
601+
// Average the channels
602+
mono_audio.push(sum / channels as f32);
603+
}
604+
605+
log::info!(
606+
"[AUDIO] Downmixed {}-channel audio to mono: {} samples -> {} samples",
607+
channels,
608+
audio.len(),
609+
mono_audio.len()
610+
);
611+
612+
Ok(mono_audio)
613+
}
614+
615+
#[cfg(test)]
616+
mod tests {
617+
use super::*;
618+
619+
#[test]
620+
fn test_convert_multichannel_to_mono() {
621+
// Test 4-channel audio downmixing
622+
// Simulating interleaved 4-channel audio: [ch1, ch2, ch3, ch4, ch1, ch2, ...]
623+
let four_channel_audio = vec![
624+
1.0, 2.0, 3.0, 4.0, // Frame 1: channels 1-4
625+
5.0, 6.0, 7.0, 8.0, // Frame 2: channels 1-4
626+
-1.0, -2.0, -3.0, -4.0, // Frame 3: channels 1-4
627+
];
628+
629+
let result = convert_multichannel_to_mono(&four_channel_audio, 4).unwrap();
630+
631+
// Expected: average of each frame's channels
632+
// Frame 1: (1+2+3+4)/4 = 2.5
633+
// Frame 2: (5+6+7+8)/4 = 6.5
634+
// Frame 3: (-1-2-3-4)/4 = -2.5
635+
assert_eq!(result.len(), 3);
636+
assert!((result[0] - 2.5).abs() < 0.001);
637+
assert!((result[1] - 6.5).abs() < 0.001);
638+
assert!((result[2] - (-2.5)).abs() < 0.001);
639+
}
640+
641+
#[test]
642+
fn test_convert_stereo_passthrough() {
643+
// Test that mono audio passes through unchanged
644+
let mono_audio = vec![1.0, 2.0, 3.0, 4.0];
645+
let result = convert_multichannel_to_mono(&mono_audio, 1).unwrap();
646+
assert_eq!(result, mono_audio);
647+
}
648+
649+
#[test]
650+
fn test_convert_invalid_channels() {
651+
// Test that zero channels returns an error
652+
let audio = vec![1.0, 2.0];
653+
let result = convert_multichannel_to_mono(&audio, 0);
654+
assert!(result.is_err());
655+
}
656+
}

src/components/AppContainer.test.tsx

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,22 @@ vi.mock('@/contexts/ReadinessContext', () => ({
3434
})
3535
}));
3636

37+
// Mock ModelManagementContext that AppContainer actually uses
38+
vi.mock('@/contexts/ModelManagementContext', () => ({
39+
useModelManagementContext: () => ({
40+
models: {},
41+
downloadProgress: {},
42+
verifyingModels: new Set(),
43+
downloadModel: vi.fn(),
44+
retryDownload: vi.fn(),
45+
cancelDownload: vi.fn(),
46+
deleteModel: vi.fn(),
47+
refreshModels: vi.fn(),
48+
preloadModel: vi.fn(),
49+
verifyModel: vi.fn()
50+
})
51+
}));
52+
3753
// Mock services
3854
vi.mock('@/services/updateService', () => ({
3955
updateService: {

src/components/tabs/ModelsTab.test.tsx

Lines changed: 18 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,9 @@ import { ModelsTab } from './ModelsTab';
66
vi.mock('sonner', () => ({
77
toast: {
88
info: vi.fn(),
9-
warning: vi.fn()
9+
warning: vi.fn(),
10+
error: vi.fn(),
11+
success: vi.fn()
1012
}
1113
}));
1214

@@ -25,16 +27,20 @@ let mockModels = {
2527
'small.en': { id: 'small.en', name: 'Small English', size: 244, downloaded: false }
2628
};
2729

28-
vi.mock('@/hooks/useModelManagement', () => ({
29-
useModelManagement: () => ({
30+
// Mock the ModelManagementContext that ModelsTab actually imports
31+
vi.mock('@/contexts/ModelManagementContext', () => ({
32+
useModelManagementContext: () => ({
3033
models: mockModels,
3134
downloadProgress: {},
3235
verifyingModels: new Set(),
33-
sortedModels: Object.values(mockModels),
36+
sortedModels: Object.entries(mockModels),
3437
downloadModel: vi.fn(),
3538
deleteModel: vi.fn(),
36-
selectModel: vi.fn(),
37-
retryDownload: vi.fn()
39+
cancelDownload: vi.fn(),
40+
retryDownload: vi.fn(),
41+
refreshModels: vi.fn(),
42+
preloadModel: vi.fn(),
43+
verifyModel: vi.fn()
3844
})
3945
}));
4046

@@ -70,15 +76,15 @@ describe('ModelsTab', () => {
7076
expect(screen.getByText('Models Count: 2')).toBeInTheDocument();
7177
});
7278

73-
it('shows toast on download retry', async () => {
79+
it('shows error toast on download failure', async () => {
7480
const { toast } = await import('sonner');
7581
render(<ModelsTab />);
76-
77-
const callback = (window as any).__testEventCallbacks['download-retry'];
78-
callback({ model: 'small.en', attempt: 1, max_attempts: 3 });
7982

80-
expect(toast.warning).toHaveBeenCalledWith(
81-
'Download Retry',
83+
const callback = (window as any).__testEventCallbacks['download-error'];
84+
callback({ model: 'small.en', error: 'Network error' });
85+
86+
expect(toast.error).toHaveBeenCalledWith(
87+
'Download Failed',
8288
expect.objectContaining({
8389
description: expect.stringContaining('small.en')
8490
})

0 commit comments

Comments
 (0)