Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 28 additions & 1 deletion wavefront/client/src/config/voice-providers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,7 @@ export const VOICE_PROVIDERS_CONFIG: VoiceProvidersConfig = {
},
},
stt: {
providers: ['deepgram', 'sarvam'] as const,
providers: ['deepgram', 'sarvam', 'elevenlabs'] as const,
configs: {
deepgram: {
name: 'Deepgram',
Expand Down Expand Up @@ -325,6 +325,33 @@ export const VOICE_PROVIDERS_CONFIG: VoiceProvidersConfig = {
},
},
},
elevenlabs: {
name: 'ElevenLabs',
badge: {
bg: 'bg-purple-100',
text: 'text-purple-800',
},
parameters: {
model: {
type: 'string',
default: 'scribe_v2_realtime',
description: 'ElevenLabs STT model',
options: ['scribe_v2_realtime'],
},
language: {
type: 'string',
default: '',
description: 'Language code (ISO-639-1, e.g., en, hi)',
placeholder: 'en',
},
sample_rate: {
type: 'number',
default: 8000,
description: 'Audio sample rate in Hz',
placeholder: '8000',
},
Comment on lines +347 to +352
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

sample_rate default of 8000 is suboptimal — recommend 16000.

The ElevenLabs realtime STT API supports PCM at 8–48 kHz, so 8000 is technically within range. However, the ElevenLabs API reference examples show "sample_rate": 16000 as the session default, and the default in reference SDK integrations is also 16000. Defaulting to 8 kHz (telephone-grade quality) will silently degrade transcription accuracy for users who don't explicitly override the value.

🐛 Proposed fix
          sample_rate: {
            type: 'number',
-           default: 8000,
+           default: 16000,
            description: 'Audio sample rate in Hz',
-           placeholder: '8000',
+           placeholder: '16000',
          },
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@wavefront/client/src/config/voice-providers.ts` around lines 347 - 352, The
schema field sample_rate in voice-providers.ts has a suboptimal default of 8000
Hz; update the sample_rate property's default value from 8000 to 16000 to match
ElevenLabs' reference examples and SDK integrations so transcription quality
isn't degraded for users who don't override it (ensure you change the default in
the sample_rate object where it's defined).

},
},
},
},
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import { Input } from '@app/components/ui/input';
import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from '@app/components/ui/select';
import { Textarea } from '@app/components/ui/textarea';
import { VOICE_PROVIDERS_CONFIG, getProviderConfig } from '@app/config/voice-providers';
import { SttProvider } from '@app/types/stt-config';
import { extractErrorMessage } from '@app/lib/utils';
import { useNotifyStore } from '@app/store';
import { zodResolver } from '@hookform/resolvers/zod';
Expand All @@ -32,7 +33,7 @@ import { z } from 'zod';
const createSttConfigSchema = z.object({
display_name: z.string().min(1, 'Display name is required').max(100, 'Display name must be 100 characters or less'),
description: z.string().max(500, 'Description must be 500 characters or less').optional(),
provider: z.enum(['deepgram', 'sarvam'] as [string, ...string[]]),
provider: z.enum(['deepgram', 'sarvam', 'elevenlabs'] as [string, ...string[]]),
api_key: z.string().min(1, 'API key is required'),
});

Expand Down Expand Up @@ -76,7 +77,7 @@ const CreateSttConfigDialog: React.FC<CreateSttConfigDialogProps> = ({ isOpen, o
await floConsoleService.sttConfigService.createSttConfig({
display_name: data.display_name.trim(),
description: data.description?.trim() || null,
provider: data.provider as 'deepgram',
provider: data.provider as SttProvider,
api_key: data.api_key.trim(),
});
notifySuccess('STT configuration created successfully');
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ import { z } from 'zod';
const updateSttConfigSchema = z.object({
display_name: z.string().min(1, 'Display name is required').max(100, 'Display name must be 100 characters or less'),
description: z.string().max(500, 'Description must be 500 characters or less').optional(),
provider: z.enum(['deepgram', 'sarvam'] as [string, ...string[]]),
provider: z.enum(['deepgram', 'sarvam', 'elevenlabs'] as [string, ...string[]]),
api_key: z.string().optional(),
});

Expand Down
9 changes: 8 additions & 1 deletion wavefront/client/src/types/stt-config.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { IApiResponse } from '@app/lib/axios';

export type SttProvider = 'deepgram' | 'sarvam';
export type SttProvider = 'deepgram' | 'sarvam' | 'elevenlabs';

export interface SttConfig {
id: string;
Expand Down Expand Up @@ -60,3 +60,10 @@ export interface SarvamSttParameters {
vad_signals?: boolean;
high_vad_sensitivity?: boolean;
}

// ElevenLabs STT specific parameters
export interface ElevenLabsSttParameters {
model?: string; // default: 'scribe_v2_realtime'
language?: string; // ISO-639-1 code e.g. 'en', 'hi'
sample_rate?: number; // default: 8000
}
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""
STT (Speech-to-Text) service factory

Supports multiple providers: Deepgram, AssemblyAI, Whisper, Google, Azure
Supports multiple providers: Deepgram, Sarvam, ElevenLabs
"""

from typing import Dict, Any
Expand All @@ -10,6 +10,7 @@
# Pipecat STT services
from pipecat.services.deepgram.stt import DeepgramSTTService
from pipecat.services.sarvam.stt import SarvamSTTService
from pipecat.services.elevenlabs.stt import ElevenLabsRealtimeSTTService

# Pipecat language enum
from pipecat.transcriptions.language import Language
Expand Down Expand Up @@ -57,6 +58,8 @@ def create_stt_service(stt_config: Dict[str, Any]):
return STTServiceFactory._create_deepgram_stt(api_key, parameters)
elif provider == 'sarvam':
return STTServiceFactory._create_sarvam_stt(api_key, parameters)
elif provider == 'elevenlabs':
return STTServiceFactory._create_elevenlabs_stt(api_key, parameters)
elif provider == 'assemblyai':
return STTServiceFactory._create_assemblyai_stt(api_key, parameters)
elif provider == 'whisper':
Expand Down Expand Up @@ -162,6 +165,55 @@ def _create_sarvam_stt(api_key: str, parameters: Dict[str, Any]):
params=input_params,
)

# Mapping of short language codes to ElevenLabs ISO-639-3 language codes
ELEVENLABS_LANGUAGE_MAP = {
'en': 'eng',
'hi': 'hin',
'ta': 'tam',
'te': 'tel',
'kn': 'kan',
'ml': 'mal',
'gu': 'guj',
'bn': 'ben',
'mr': 'mar',
'pa': 'pan',
'or': 'ori',
}

@staticmethod
def _create_elevenlabs_stt(api_key: str, parameters: Dict[str, Any]):
"""Create ElevenLabs Realtime STT service (WebSocket streaming, scribe_v2_realtime)"""
params_dict = {}

# Map language code to ElevenLabs ISO-639-3 code
if 'language' in parameters and parameters['language']:
lang_code = parameters['language']
elevenlabs_lang = STTServiceFactory.ELEVENLABS_LANGUAGE_MAP.get(lang_code)
if elevenlabs_lang:
params_dict['language_code'] = elevenlabs_lang
else:
logger.warning(
f"Unknown ElevenLabs language '{lang_code}', skipping (auto-detect will be used)"
)

model = parameters.get('model', 'scribe_v2_realtime')
sample_rate = parameters.get('sample_rate', 8000)

input_params = (
ElevenLabsRealtimeSTTService.InputParams(**params_dict)
if params_dict
else None
)

logger.info(f'ElevenLabs STT config: model={model}, sample_rate={sample_rate}')

return ElevenLabsRealtimeSTTService(
api_key=api_key,
model=model,
sample_rate=sample_rate,
params=input_params,
)

@staticmethod
def _create_assemblyai_stt(api_key: str, parameters: Dict[str, Any]):
"""Create AssemblyAI STT service"""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ class SttProvider(str, Enum):
GOOGLE = 'google'
AZURE = 'azure'
SARVAM = 'sarvam'
ELEVENLABS = 'elevenlabs'


class CreateSttConfigPayload(BaseModel):
Expand Down