@@ -54,8 +54,8 @@ public class SpeechRecAugmentos extends SpeechRecFramework {
5454 private SherpaOnnxTranscriber sherpaTranscriber ;
5555
5656 // Backend data sending control flags
57- private volatile boolean shouldSendPcmToBackend = true ;
58- private volatile boolean shouldSendTranscriptionToBackend = false ;
57+ private volatile boolean sendPcmToBackend = true ;
58+ private volatile boolean sendTranscriptionToBackend = false ;
5959
6060 private SpeechRecAugmentos (Context context ) {
6161 this .mContext = context ;
@@ -128,8 +128,11 @@ private void sendFormattedTranscriptionToBackend(String text, boolean isFinal, l
128128 long currentTime = System .currentTimeMillis ();
129129 long relativeTime = currentTime - sessionStartTime ;
130130
131- // Adjust start time based on result type
132- // Final results use longer timespan than partial results
131+
132+ // TODO: This is wrong
133+ // Instead we should have some timer inside sherpa that is reset on every final result.
134+ // Also I don't think we need speaker id as there is no diarization.
135+
133136 int timeOffset = isFinal ? 2000 : 1000 ;
134137 transcription .put ("startTime" , relativeTime - timeOffset );
135138 transcription .put ("endTime" , relativeTime );
@@ -254,7 +257,6 @@ private void startVadProcessingThread() {
254257 }
255258 }
256259 // If poll times out, just continue the loop
257-
258260 } catch (InterruptedException e ) {
259261 Thread .currentThread ().interrupt ();
260262 break ;
@@ -270,10 +272,6 @@ private void sendVadStatus(boolean isNowSpeaking) {
270272 ServerComms .getInstance ().sendVadStatus (isNowSpeaking );
271273 }
272274
273- public boolean sendPcmToBackend = true ;
274-
275- public boolean sendTranscriptionToBackend = false ;
276-
277275 /**
278276 * Called by external code to feed raw PCM chunks (16-bit, 16kHz).
279277 * runs VAD on decoded data to tell whether or not we should send the encoded data to the backend
@@ -310,8 +308,6 @@ public void ingestAudioChunk(byte[] audioChunk) {
310308 lc3RollingBuffer .remove (0 ); // Remove oldest chunks to maintain rolling window
311309 }
312310 }
313-
314-
315311 //SENDING STUFF
316312 // If bypassing VAD for debugging or currently speaking, send data live
317313 if (bypassVadForDebugging || isSpeaking ) {
@@ -356,16 +352,14 @@ public void ingestLC3AudioChunk(byte[] LC3audioChunk) {
356352 }
357353 }
358354
359- if (sendTranscriptionToBackend ) {
360- if (bypassVadForDebugging || isSpeaking ) {
361- if (sherpaTranscriber != null ) {
362- // TODO: Verify whether this would work properly
363- // Invoking this because same sendAudioChunk handling in servercomms
364- // So assuming this would also work
365- sherpaTranscriber .acceptAudio (LC3audioChunk );
366- }
367- }
368- }
355+ // TODO: Should we use this?
356+ // if (sendTranscriptionToBackend) {
357+ // if (bypassVadForDebugging || isSpeaking) {
358+ // if (sherpaTranscriber != null) {
359+ // sherpaTranscriber.acceptAudio(LC3audioChunk);
360+ // }
361+ // }
362+ // }
369363 }
370364
371365 /**
@@ -487,22 +481,22 @@ public void microphoneStateChanged(boolean state, List<SpeechRequiredDataType> r
487481 sherpaTranscriber .microphoneStateChanged (state );
488482 }
489483
490- // Set shouldSendPcmToBackend and shouldSendTranscriptionToBackend based on required data
484+ // Set sendPcmToBackend and sendTranscriptionToBackend based on required data
491485 // if state is PCM_OR_TRANS then based on the bandwidth of the internet if it falls below certain threshold decide to send PCM or Transcription
492- if (requiredData .contains (SpeechRequiredDataType .PCM_OR_TRANS )) {
486+ if (requiredData .contains (SpeechRequiredDataType .PCM_OR_TRANSCRIPTION )) {
493487 // TODO: Implement bandwidth detection logic
494488 // For now, default to transcription as it's more bandwidth efficient
495489 // In the future, check network quality and decide:
496490 // - If high bandwidth: send PCM for better quality
497491 // - If low bandwidth: send transcription for efficiency
498492 // For now default to pcm
499- shouldSendPcmToBackend = state ;
493+ sendPcmToBackend = state ;
500494 }
501- if (requiredData .contains (SpeechRequiredDataType .PCM_AUDIO )) {
502- shouldSendPcmToBackend = state ;
503- }
504- if (requiredData .contains (SpeechRequiredDataType .TRANSCRIPTION )) {
505- shouldSendTranscriptionToBackend = state ;
495+ if (requiredData .contains (SpeechRequiredDataType .PCM )) {
496+ sendPcmToBackend = state ;
497+ }
498+ if (requiredData .contains (SpeechRequiredDataType .TRANSCRIPTION )) {
499+ sendTranscriptionToBackend = state ;
506500 }
507501
508502
0 commit comments