Skip to content

Commit c59cc6f

Browse files
committed
interim results on transformers.js implementation
1 parent 88f5381 commit c59cc6f

File tree

3 files changed

+85
-11
lines changed

3 files changed

+85
-11
lines changed

app/src/utils/whisper/WhisperModelManager.ts

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ export class WhisperModelManager {
1919
private currentConfig: WhisperModelConfig | null = null;
2020
private stateChangeListeners: Array<(state: WhisperModelState) => void> = [];
2121
private pendingTranscriptions = new Map<number, PendingTranscription>();
22+
private interimResultListeners: Array<(text: string, chunkId: number) => void> = [];
2223

2324
private constructor() {}
2425

@@ -40,6 +41,17 @@ export class WhisperModelManager {
4041
};
4142
}
4243

44+
public onInterimResult(listener: (text: string, chunkId: number) => void): () => void {
45+
this.interimResultListeners.push(listener);
46+
return () => {
47+
this.interimResultListeners = this.interimResultListeners.filter(l => l !== listener);
48+
};
49+
}
50+
51+
private notifyInterimResult(text: string, chunkId: number): void {
52+
this.interimResultListeners.forEach(listener => listener(text, chunkId));
53+
}
54+
4355
private setState(updates: Partial<WhisperModelState>): void {
4456
this.state = { ...this.state, ...updates };
4557
this.stateChangeListeners.forEach(listener => listener(this.getState()));
@@ -189,6 +201,10 @@ export class WhisperModelManager {
189201
});
190202
break;
191203

204+
case 'transcription-interim':
205+
this.handleInterimResult(data);
206+
break;
207+
192208
case 'transcription-complete':
193209
this.handleTranscriptionComplete(data);
194210
break;
@@ -202,6 +218,12 @@ export class WhisperModelManager {
202218
}
203219
}
204220

221+
private handleInterimResult(data: { text: string; chunkId: number }): void {
222+
const { text, chunkId } = data;
223+
Logger.debug('WhisperModelManager', `Interim result for chunk ${chunkId}: "${text.slice(0, 50)}..."`);
224+
this.notifyInterimResult(text, chunkId);
225+
}
226+
205227
private handleProgressMessage(progress: ProgressItem): void {
206228
this.setState({
207229
progress: this.updateProgressItems(this.state.progress, progress)

app/src/utils/whisper/WhisperTranscriptionService.ts

Lines changed: 31 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,9 @@ export class WhisperTranscriptionService {
1919
// Subscriber management - services push to all subscribers
2020
private subscribers = new Set<TranscriptionSubscriber>();
2121

22+
// Cleanup function for interim result listener
23+
private unsubscribeInterimResults: (() => void) | null = null;
24+
2225
public async start(stream: MediaStream, streamType?: AudioStreamType): Promise<void> {
2326
if (this.isRunning) {
2427
Logger.warn('WhisperTranscriptionService', 'Service already running');
@@ -39,6 +42,14 @@ export class WhisperTranscriptionService {
3942
const settings = SensorSettings.getWhisperSettings();
4043
this.chunkDurationMs = settings.chunkDurationMs;
4144

45+
// Subscribe to interim results from the model manager
46+
this.unsubscribeInterimResults = modelManager.onInterimResult((text, _chunkId) => {
47+
if (this.isRunning) {
48+
this.setInterimToSubscribers(text);
49+
TranscriptionStateManager.setInterimText(this.streamType, text);
50+
}
51+
});
52+
4253
Logger.info('WhisperTranscriptionService', `Starting transcription for ${this.streamType} with ${this.chunkDurationMs}ms chunks`);
4354

4455
this.transcribeLoop();
@@ -51,6 +62,12 @@ export class WhisperTranscriptionService {
5162
this.isRunning = false;
5263
this.currentStream = null;
5364

65+
// Unsubscribe from interim results
66+
if (this.unsubscribeInterimResults) {
67+
this.unsubscribeInterimResults();
68+
this.unsubscribeInterimResults = null;
69+
}
70+
5471
// Note: We don't clear subscribers here - they are managed by StreamManager
5572
// and may persist across service restarts
5673

@@ -71,11 +88,20 @@ export class WhisperTranscriptionService {
7188
}
7289

7390
/**
74-
* Push transcribed text to all registered subscribers
91+
* Set interim text on all registered subscribers (replaces previous interim)
92+
*/
93+
private setInterimToSubscribers(text: string): void {
94+
for (const subscriber of this.subscribers) {
95+
subscriber.setInterimText(text);
96+
}
97+
}
98+
99+
/**
100+
* Commit final text to all registered subscribers (clears interim, adds to committed)
75101
*/
76-
private pushToSubscribers(text: string): void {
102+
private commitToSubscribers(text: string): void {
77103
for (const subscriber of this.subscribers) {
78-
subscriber.appendText(text);
104+
subscriber.commitText(text);
79105
}
80106
}
81107

@@ -121,8 +147,8 @@ export class WhisperTranscriptionService {
121147
.transcribe(await audioBlob.arrayBuffer(), chunkId);
122148

123149
if (result?.text && this.isRunning) {
124-
// Push to all registered subscribers (agent-specific accumulators)
125-
this.pushToSubscribers(result.text);
150+
// Commit final text to all registered subscribers (clears interim, adds to committed)
151+
this.commitToSubscribers(result.text);
126152

127153
// Notify state manager for UI updates (maintains its own rolling window)
128154
TranscriptionStateManager.chunkTranscriptionEnded(

app/src/utils/whisper/whisper.worker.ts

Lines changed: 32 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,9 @@ import { WhisperModelConfig } from './types';
22

33
const TASK = 'automatic-speech-recognition';
44

5+
// WhisperTextStreamer will be loaded dynamically with transformers module
6+
let WhisperTextStreamer: any = null;
7+
58
class WhisperPipelineFactory {
69
static task = TASK;
710
static model: string | null = null;
@@ -20,6 +23,8 @@ class WhisperPipelineFactory {
2023
// Dynamic import - only loads when actually needed!
2124
this.transformersModule = await import('@huggingface/transformers');
2225
this.transformersModule.env.allowLocalModels = false;
26+
// Load WhisperTextStreamer for streaming interim results
27+
WhisperTextStreamer = this.transformersModule.WhisperTextStreamer;
2328
}
2429
return this.transformersModule;
2530
}
@@ -79,35 +84,56 @@ self.onmessage = async (event) => {
7984

8085
case 'transcribe':
8186
const { audio, chunkId } = data;
82-
87+
8388
const instance = await WhisperPipelineFactory.getInstance();
8489
const currentConfig = WhisperPipelineFactory.config;
85-
90+
8691
if (!instance || !currentConfig) {
8792
throw new Error('Pipeline not initialized');
8893
}
8994

9095
// Build transcription options - only for multilingual models
9196
const isEnglishOnlyModel = currentConfig.modelId.endsWith('.en');
9297
const transcribeOptions: any = {};
93-
98+
9499
if (!isEnglishOnlyModel) {
95100
// Only add parameters for multilingual models
96101
if (currentConfig.task) {
97102
transcribeOptions.task = currentConfig.task;
98103
}
99-
104+
100105
if (currentConfig.language && currentConfig.language !== 'auto') {
101106
transcribeOptions.language = currentConfig.language;
102107
}
103-
108+
104109
// Set default chunking for different model types
105110
const isDistilWhisper = currentConfig.modelId.startsWith('distil-whisper/');
106111
transcribeOptions.chunk_length_s = isDistilWhisper ? 20 : 30;
107112
transcribeOptions.stride_length_s = isDistilWhisper ? 3 : 5;
108113
}
109114

110-
const output = Object.keys(transcribeOptions).length > 0
115+
// Create streamer for interim results if WhisperTextStreamer is available
116+
if (WhisperTextStreamer && instance.tokenizer) {
117+
let lastInterimText = '';
118+
const streamer = new WhisperTextStreamer(instance.tokenizer, {
119+
skip_prompt: true,
120+
skip_special_tokens: true,
121+
callback_function: (text: string) => {
122+
// Only send if text has changed to reduce message spam
123+
const trimmedText = text.trim();
124+
if (trimmedText && trimmedText !== lastInterimText) {
125+
lastInterimText = trimmedText;
126+
self.postMessage({
127+
type: 'transcription-interim',
128+
data: { text: trimmedText, chunkId }
129+
});
130+
}
131+
}
132+
});
133+
transcribeOptions.streamer = streamer;
134+
}
135+
136+
const output = Object.keys(transcribeOptions).length > 0
111137
? await instance(audio, transcribeOptions)
112138
: await instance(audio);
113139
const newText = (output.text as string).trim();

0 commit comments

Comments
 (0)