Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion examples/avatar_agents/audio_wave/agent_worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,6 @@ async def entrypoint(ctx: JobContext):
stt=inference.STT("deepgram/nova-3"),
llm=inference.LLM("google/gemini-2.5-flash"),
tts=inference.TTS("cartesia/sonic-3"),
vad=inference.VAD(model="silero"),
resume_false_interruption=False,
)

Expand Down
1 change: 0 additions & 1 deletion examples/avatar_agents/keyframe/agent_worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,6 @@ async def entrypoint(ctx: JobContext):
llm=inference.LLM("google/gemini-2.5-flash"),
tts=inference.TTS("cartesia/sonic-3"),
resume_false_interruption=False,
vad=inference.VAD(model="silero"),
turn_detection=AudioTurnDetector(),
)

Expand Down
1 change: 0 additions & 1 deletion examples/drive-thru/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -487,7 +487,6 @@ async def drive_thru_agent(ctx: JobContext) -> None:
llm=inference.LLM("openai/gpt-5-mini"),
tts=inference.TTS("cartesia/sonic-3", voice="f786b574-daa5-4673-aa0c-cbe3e8534c02"),
turn_detection=AudioTurnDetector(),
vad=inference.VAD(model="silero"),
max_tool_steps=10,
)

Expand Down
1 change: 0 additions & 1 deletion examples/frontdesk/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -266,7 +266,6 @@ async def frontdesk_agent(ctx: JobContext):
llm=inference.LLM("google/gemini-2.5-flash"),
tts=inference.TTS("cartesia/sonic-3", voice="39b376fc-488e-4d0c-8b37-e00b72059fdd"),
turn_detection=AudioTurnDetector(),
vad=inference.VAD(model="silero"),
max_tool_steps=1,
)

Expand Down
1 change: 0 additions & 1 deletion examples/healthcare/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -753,7 +753,6 @@ async def entrypoint(ctx: JobContext):
stt=inference.STT("deepgram/nova-3", language="multi"),
llm=inference.LLM("openai/gpt-4.1-mini"),
tts=inference.TTS("inworld/inworld-tts-1"),
vad=inference.VAD(model="silero"),
preemptive_generation=True,
)

Expand Down
1 change: 0 additions & 1 deletion examples/inference/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,6 @@ async def entrypoint(ctx: JobContext) -> None:
stt=inference.STT(model=DEFAULT_STT),
llm=inference.LLM(model=DEFAULT_LLM),
tts=inference.TTS(model=DEFAULT_TTS),
vad=inference.VAD(model="silero"),
)

def parse_value(payload: str, fallback: str) -> str:
Expand Down
1 change: 0 additions & 1 deletion examples/other/elevenlab_scribe_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@ async def entrypoint(ctx: JobContext) -> None:

session: AgentSession = AgentSession(
allow_interruptions=True,
vad=inference.VAD(model="silero"),
stt=stt,
llm=inference.LLM("openai/gpt-4.1-mini"),
tts=inference.TTS("cartesia/sonic-3"),
Expand Down
2 changes: 0 additions & 2 deletions examples/other/kokoro_tts.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
AgentSession,
JobContext,
cli,
inference,
metrics,
)
from livekit.agents.voice import MetricsCollectedEvent
Expand Down Expand Up @@ -43,7 +42,6 @@ async def entrypoint(ctx: JobContext):
"user_id": "your user_id",
}
session = AgentSession(
vad=inference.VAD(model="silero"),
# any combination of STT, LLM, TTS, or realtime API can be used
llm=openai.LLM(model="gpt-4.1-mini"),
stt=deepgram.STT(model="nova-3", language="multi"),
Expand Down
4 changes: 1 addition & 3 deletions examples/other/transcription/multi-user-transcriber.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,9 +89,7 @@ async def _start_session(self, participant: rtc.RemoteParticipant) -> AgentSessi
if participant.identity in self._sessions:
return self._sessions[participant.identity]

session = AgentSession(
vad=inference.VAD(model="silero"),
)
session = AgentSession()
await session.start(
agent=Transcriber(
participant_identity=participant.identity,
Expand Down
2 changes: 0 additions & 2 deletions examples/other/transcription/translator.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
MetricsCollectedEvent,
StopResponse,
cli,
inference,
llm,
metrics,
room_io,
Expand Down Expand Up @@ -77,7 +76,6 @@ async def entrypoint(ctx: JobContext):

session = AgentSession(
# vad is only needed for non-streaming STT implementations
vad=inference.VAD(model="silero"),
)

@session.on("metrics_collected")
Expand Down
1 change: 0 additions & 1 deletion examples/survey/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -353,7 +353,6 @@ async def entrypoint(ctx: JobContext):
llm=inference.LLM("google/gemini-2.5-flash"),
stt=inference.STT("deepgram/nova-3", language="multi"),
tts=inference.TTS("inworld/inworld-tts-1"),
vad=inference.VAD(model="silero"),
turn_detection=AudioTurnDetector(),
preemptive_generation=True,
)
Expand Down
1 change: 0 additions & 1 deletion examples/telephony/amd.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@ async def entrypoint(ctx: JobContext):
llm=inference.LLM("openai/gpt-4.1-mini"),
tts=inference.TTS("cartesia/sonic-3", voice="9626c31c-bec5-4cca-baa8-f8ba9e84c8bc"),
turn_detection=AudioTurnDetector(),
vad=inference.VAD(model="silero"),
preemptive_generation=True,
)

Expand Down
1 change: 0 additions & 1 deletion examples/telephony/bank-ivr/ivr_navigator_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,6 @@ async def dtmf_session(ctx: JobContext) -> None:
}

session: AgentSession = AgentSession(
vad=inference.VAD(model="silero"),
llm=inference.LLM("openai/gpt-4.1"),
stt=inference.STT("deepgram/nova-3"),
tts=inference.TTS("rime/arcana"),
Expand Down
1 change: 0 additions & 1 deletion examples/telephony/bank-ivr/ivr_system_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -630,7 +630,6 @@ async def bank_ivr_session(ctx: JobContext) -> None:
state = SessionState()

session: AgentSession[SessionState] = AgentSession(
vad=inference.VAD(model="silero"),
llm=inference.LLM("openai/gpt-4.1"),
stt=inference.STT("deepgram/nova-3"),
tts=inference.TTS("cartesia/sonic-3"),
Expand Down
1 change: 0 additions & 1 deletion examples/telephony/basic_dtmf_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,6 @@ async def entrypoint(ctx: JobContext) -> None:
}

session: AgentSession = AgentSession(
vad=inference.VAD(model="silero"),
llm=inference.LLM("openai/gpt-4.1-mini"),
stt=inference.STT("deepgram/nova-3"),
tts=inference.TTS("inworld/inworld-tts-1"),
Expand Down
1 change: 0 additions & 1 deletion examples/voice_agents/annotated_tool_args.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,6 @@ async def get_number(
@server.rtc_session()
async def entrypoint(ctx: JobContext):
agent = AgentSession(
vad=inference.VAD(model="silero"),
stt=inference.STT("deepgram/nova-3"),
llm=inference.LLM("google/gemini-2.5-flash"),
tts=inference.TTS("rime/arcana"),
Expand Down
1 change: 0 additions & 1 deletion examples/voice_agents/async_tool_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,6 @@ async def entrypoint(ctx: JobContext):
llm=inference.LLM("openai/gpt-5.3-chat-latest"),
tts=inference.TTS("cartesia/sonic-3", voice="e07c00bc-4134-4eae-9ea4-1a55fb45746b"),
# llm=google.realtime.RealtimeModel(),
vad=inference.VAD(model="silero"),
turn_handling={"interruption": {"mode": "vad"}},
)

Expand Down
1 change: 0 additions & 1 deletion examples/voice_agents/basic_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,6 @@ async def entrypoint(ctx: JobContext) -> None:
# Text-to-speech (TTS) is your agent's voice, turning the LLM's text into speech that the user can hear
# See all available models as well as voice selections at https://docs.livekit.io/agents/models/tts/
tts=inference.TTS("cartesia/sonic-3", voice="9626c31c-bec5-4cca-baa8-f8ba9e84c8bc"),
vad=inference.VAD(model="silero"),
turn_handling=TurnHandlingOptions(
# VAD and turn detection are used to determine when the user is speaking and when the agent should respond
# See more at https://docs.livekit.io/agents/build/turns
Expand Down
1 change: 0 additions & 1 deletion examples/voice_agents/dynamic_tool_creation.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,6 @@ async def _random_number() -> int:
)

session = AgentSession(
vad=inference.VAD(model="silero"),
stt=inference.STT("deepgram/nova-3"),
llm=inference.LLM("openai/gpt-4.1-mini"),
tts=inference.TTS("cartesia/sonic-3"),
Expand Down
1 change: 0 additions & 1 deletion examples/voice_agents/email_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,6 @@ async def register_for_event(self, context: RunContext):
@server.rtc_session()
async def entrypoint(ctx: JobContext):
session = AgentSession(
vad=inference.VAD(model="silero"),
llm=inference.LLM("openai/gpt-4.1-mini"),
stt=inference.STT("deepgram/nova-3"),
tts=inference.TTS("cartesia/sonic-3"),
Expand Down
1 change: 0 additions & 1 deletion examples/voice_agents/error_callback.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ async def entrypoint(ctx: JobContext):
stt=inference.STT("deepgram/nova-3"),
llm=inference.LLM("openai/gpt-4.1-mini"),
tts=inference.TTS("cartesia/sonic-3"),
vad=inference.VAD(model="silero"),
)

custom_error_audio = os.path.join(pathlib.Path(__file__).parent.absolute(), "error_message.ogg")
Expand Down
2 changes: 0 additions & 2 deletions examples/voice_agents/fast-preresponse.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
AgentSession,
JobContext,
cli,
inference,
llm,
)
from livekit.agents.llm.chat_context import ChatContext, ChatMessage
Expand Down Expand Up @@ -83,7 +82,6 @@ async def entrypoint(ctx: JobContext):
session = AgentSession(
stt=deepgram.STT(),
tts=openai.TTS(),
vad=inference.VAD(model="silero"),
)
await session.start(PreResponseAgent(), room=ctx.room)

Expand Down
2 changes: 0 additions & 2 deletions examples/voice_agents/flush_llm_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
ModelSettings,
cli,
function_tool,
inference,
llm,
metrics,
)
Expand Down Expand Up @@ -111,7 +110,6 @@ async def llm_node(
@server.rtc_session()
async def entrypoint(ctx: JobContext):
session = AgentSession(
vad=inference.VAD(model="silero"),
llm="openai/gpt-4.1-mini",
stt="deepgram/nova-3:en",
tts="cartesia/sonic-3:9626c31c-bec5-4cca-baa8-f8ba9e84c8bc",
Expand Down
2 changes: 0 additions & 2 deletions examples/voice_agents/grok/grok_voice_agent_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
AgentSession,
JobContext,
cli,
inference,
room_io,
)
from livekit.agents.inference import AudioTurnDetector
Expand Down Expand Up @@ -45,7 +44,6 @@ async def my_agent(ctx: JobContext):
llm=xai.realtime.RealtimeModel(voice="ara"),
turn_detection=AudioTurnDetector(),
tools=[xai.realtime.XSearch(), xai.realtime.WebSearch()],
vad=inference.VAD(model="silero"),
preemptive_generation=True,
)

Expand Down
1 change: 0 additions & 1 deletion examples/voice_agents/inactive_user.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
@server.rtc_session()
async def entrypoint(ctx: JobContext):
session = AgentSession(
vad=inference.VAD(model="silero"),
llm=inference.LLM("openai/gpt-4.1-mini"),
stt=inference.STT("deepgram/nova-3"),
tts=inference.TTS("cartesia/sonic-3"),
Expand Down
1 change: 0 additions & 1 deletion examples/voice_agents/instructions_per_modality.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,6 @@ async def entrypoint(ctx: JobContext) -> None:
stt=inference.STT("deepgram/nova-3"),
llm=inference.LLM("openai/gpt-4.1-mini"),
tts=inference.TTS("cartesia/sonic-3"),
vad=inference.VAD(model="silero"),
)

await session.start(agent=SchedulingAgent(), room=ctx.room)
Expand Down
2 changes: 1 addition & 1 deletion examples/voice_agents/langfuse_trace.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ async def flush_trace():

ctx.add_shutdown_callback(flush_trace)

session = AgentSession(vad=inference.VAD(model="silero"))
session = AgentSession()

@session.on("metrics_collected")
def _on_metrics_collected(ev: MetricsCollectedEvent):
Expand Down
1 change: 0 additions & 1 deletion examples/voice_agents/langgraph_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,6 @@ async def entrypoint(ctx: JobContext):
)

session = AgentSession(
vad=inference.VAD(model="silero"),
# any combination of STT, LLM, TTS, or realtime API can be used
stt=inference.STT("deepgram/nova-3", language="multi"),
tts=inference.TTS("cartesia/sonic-3"),
Expand Down
1 change: 0 additions & 1 deletion examples/voice_agents/llamaindex-rag/chat_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,6 @@ def __init__(self, index: VectorStoreIndex):
"with users will be voice. You should use short and concise "
"responses, and avoiding usage of unpronouncable punctuation."
),
vad=inference.VAD(model="silero"),
stt=inference.STT("deepgram/nova-3"),
llm=DummyLLM(), # use a dummy LLM to enable the pipeline reply
tts=inference.TTS("cartesia/sonic-3"),
Expand Down
1 change: 0 additions & 1 deletion examples/voice_agents/llamaindex-rag/query_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,6 @@ async def entrypoint(ctx: JobContext):
"with users will be voice. You should use short and concise "
"responses, and avoiding usage of unpronouncable punctuation."
),
vad=inference.VAD(model="silero"),
stt=inference.STT("deepgram/nova-3"),
llm=inference.LLM("openai/gpt-4.1-mini"),
tts=inference.TTS("cartesia/sonic-3"),
Expand Down
2 changes: 0 additions & 2 deletions examples/voice_agents/llamaindex-rag/retrieval.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
AutoSubscribe,
JobContext,
cli,
inference,
llm,
)
from livekit.agents.voice.agent import ModelSettings
Expand Down Expand Up @@ -47,7 +46,6 @@ def __init__(self, index: VectorStoreIndex):
"with users will be voice. You should use short and concise "
"responses, and avoiding usage of unpronouncable punctuation."
),
vad=inference.VAD(model="silero"),
stt=deepgram.STT(),
llm=openai.LLM(),
tts=openai.TTS(),
Expand Down
1 change: 0 additions & 1 deletion examples/voice_agents/long_running_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,6 @@ async def entrypoint(ctx: JobContext):
stt=inference.STT("deepgram/nova-3"),
llm=inference.LLM("openai/gpt-4.1-mini"),
tts=inference.TTS("cartesia/sonic-3"),
vad=inference.VAD(model="silero"),
)

await session.start(agent=MyAgent(), room=ctx.room)
Expand Down
1 change: 0 additions & 1 deletion examples/voice_agents/mcp/mcp-agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@ async def on_enter(self):
@server.rtc_session()
async def entrypoint(ctx: JobContext):
session = AgentSession(
vad=inference.VAD(model="silero"),
stt=inference.STT("deepgram/nova-3", language="multi"),
llm=inference.LLM("openai/gpt-4.1-mini"),
tts=inference.TTS("cartesia/sonic-3"),
Expand Down
2 changes: 0 additions & 2 deletions examples/voice_agents/multi_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
JobContext,
RunContext,
cli,
inference,
metrics,
)
from livekit.agents.job import get_job_context
Expand Down Expand Up @@ -135,7 +134,6 @@ async def story_finished(self, context: RunContext[StoryData]):
@server.rtc_session()
async def entrypoint(ctx: JobContext):
session = AgentSession[StoryData](
vad=inference.VAD(model="silero"),
# any combination of STT, LLM, TTS, or realtime API can be used
llm=openai.LLM(model="gpt-4.1-mini"),
stt=deepgram.STT(model="nova-3"),
Expand Down
2 changes: 0 additions & 2 deletions examples/voice_agents/nvidia_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
JobContext,
WorkerOptions,
cli,
inference,
)
from livekit.agents.inference import AudioTurnDetector
from livekit.plugins import nvidia, openai
Expand All @@ -20,7 +19,6 @@

async def entrypoint(ctx: JobContext):
session = AgentSession(
vad=inference.VAD(model="silero"),
llm=openai.LLM(model="gpt-4.1-mini"),
stt=nvidia.STT(),
tts=nvidia.TTS(),
Expand Down
2 changes: 0 additions & 2 deletions examples/voice_agents/realtime_joke_teller.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,6 @@
AgentSession,
AutoSubscribe,
ToolError,
inference,
room_io,
)
from livekit.agents.llm import function_tool
Expand Down Expand Up @@ -222,7 +221,6 @@ async def _on_shutdown(_reason: str) -> None:
stt=aws.STT(),
llm=aws.LLM(),
tts=aws.TTS(),
vad=inference.VAD(model="silero"),
)
else:
print("⚡ Using REALTIME mode: Nova Sonic 2.0")
Expand Down
3 changes: 1 addition & 2 deletions examples/voice_agents/realtime_turn_detector.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from dotenv import load_dotenv
from google.genai import types # noqa: F401

from livekit.agents import Agent, AgentServer, AgentSession, JobContext, cli, inference
from livekit.agents import Agent, AgentServer, AgentSession, JobContext, cli
from livekit.agents.inference import AudioTurnDetector
from livekit.plugins import deepgram, google, openai # noqa: F401

Expand All @@ -25,7 +25,6 @@ async def entrypoint(ctx: JobContext):
session = AgentSession(
allow_interruptions=True,
turn_detection=AudioTurnDetector(),
vad=inference.VAD(model="silero"),
stt=deepgram.STT(),
# To use OpenAI Realtime API
llm=openai.realtime.RealtimeModel(
Expand Down
2 changes: 0 additions & 2 deletions examples/voice_agents/realtime_video_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
AgentSession,
JobContext,
cli,
inference,
room_io,
voice, # noqa: F401
)
Expand All @@ -24,7 +23,6 @@
@server.rtc_session()
async def entrypoint(ctx: JobContext):
session = AgentSession(
vad=inference.VAD(model="silero"),
# both Gemini and OpenAI Realtime API support streaming video input
llm=google.realtime.RealtimeModel(),
# customize how video frames are sampled
Expand Down
Loading
Loading