-
Notifications
You must be signed in to change notification settings - Fork 6
Expand file tree
/
Copy pathspeech-server.yaml.example
More file actions
44 lines (37 loc) · 2.09 KB
/
speech-server.yaml.example
File metadata and controls
44 lines (37 loc) · 2.09 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
# speech-server configuration
# All fields are optional; omitted fields use the built-in defaults shown here.
# Config discovery order:
# 1. SPEECH_SERVER_CONFIG env var (path to this file)
# 2. ./speech-server.yaml in the working directory (this file, if present)
# 3. Built-in defaults
#
# speech-server.yaml is gitignored so local IPs stay out of version control.
# Copy this file to speech-server.yaml and edit as needed.
log_level: notice # trace | debug | info | notice | warning | error | critical
servers:
http:
host: 127.0.0.1 # Bind address; override with HTTP_HOST env var or Vapor's --hostname flag.
port: 8080 # Listening port; override with HTTP_PORT env var or Vapor's --port flag.
upload_limit_mb: 500 # Maximum multipart upload size for /audio/transcriptions
wyoming:
host: 127.0.0.1 # Bind address for Wyoming TCP server; override with WYOMING_HOST env var.
port: 10300 # TCP port for Wyoming protocol (Home Assistant). 0 = disabled; override with WYOMING_PORT env var.
stt:
engine: parakeet # Speech-to-text engine. Currently only: parakeet
parakeet:
model_version: v3 # v3 = Parakeet TDT 0.6B v3, multilingual (25 langs, default)
# v2 = Parakeet TDT 0.6B v2, English-only (higher recall)
tts:
engine: pocket_tts # Text-to-speech engine: pocket_tts | avspeech | kokoro
# PocketTTS settings (only used when engine: pocket_tts)
pocket_tts:
sanitize_emoji: true # Strip emoji before synthesis (default true)
# AVSpeech settings (only used when engine: avspeech)
# Uses macOS's built-in AVSpeechSynthesizer — no model downloads, 150+ voices.
# avspeech:
# default_voice: Samantha # Short name or full identifier; nil = system locale default
# sample_rate: 22050 # Native AVSpeech output rate (Hz); change only if needed
# Kokoro TTS settings (only used when engine: kokoro)
# Uses FluidAudio's Kokoro model — 50 voices across 8 languages, 24 kHz, high quality.
# kokoro:
# default_voice: af_heart # Any Kokoro voice ID (e.g. af_heart, am_adam); default af_heart