livekit · chenghao-mou · May 22, 2026
diff --git a/examples/avatar_agents/audio_wave/agent_worker.py b/examples/avatar_agents/audio_wave/agent_worker.py
@@ -74,7 +74,6 @@ async def entrypoint(ctx: JobContext):
         stt=inference.STT("deepgram/nova-3"),
         llm=inference.LLM("google/gemini-2.5-flash"),
         tts=inference.TTS("cartesia/sonic-3"),
-        vad=inference.VAD(model="silero"),
         resume_false_interruption=False,
     )
 

diff --git a/examples/avatar_agents/keyframe/agent_worker.py b/examples/avatar_agents/keyframe/agent_worker.py
@@ -51,7 +51,6 @@ async def entrypoint(ctx: JobContext):
         llm=inference.LLM("google/gemini-2.5-flash"),
         tts=inference.TTS("cartesia/sonic-3"),
         resume_false_interruption=False,
-        vad=inference.VAD(model="silero"),
         turn_detection=AudioTurnDetector(),
     )
 

diff --git a/examples/drive-thru/agent.py b/examples/drive-thru/agent.py
@@ -487,7 +487,6 @@ async def drive_thru_agent(ctx: JobContext) -> None:
         llm=inference.LLM("openai/gpt-5-mini"),
         tts=inference.TTS("cartesia/sonic-3", voice="f786b574-daa5-4673-aa0c-cbe3e8534c02"),
         turn_detection=AudioTurnDetector(),
-        vad=inference.VAD(model="silero"),
         max_tool_steps=10,
     )
 

diff --git a/examples/frontdesk/agent.py b/examples/frontdesk/agent.py
@@ -266,7 +266,6 @@ async def frontdesk_agent(ctx: JobContext):
         llm=inference.LLM("google/gemini-2.5-flash"),
         tts=inference.TTS("cartesia/sonic-3", voice="39b376fc-488e-4d0c-8b37-e00b72059fdd"),
         turn_detection=AudioTurnDetector(),
-        vad=inference.VAD(model="silero"),
         max_tool_steps=1,
     )
 

diff --git a/examples/healthcare/agent.py b/examples/healthcare/agent.py
@@ -753,7 +753,6 @@ async def entrypoint(ctx: JobContext):
         stt=inference.STT("deepgram/nova-3", language="multi"),
         llm=inference.LLM("openai/gpt-4.1-mini"),
         tts=inference.TTS("inworld/inworld-tts-1"),
-        vad=inference.VAD(model="silero"),
         preemptive_generation=True,
     )
 

diff --git a/examples/inference/agent.py b/examples/inference/agent.py
@@ -58,7 +58,6 @@ async def entrypoint(ctx: JobContext) -> None:
         stt=inference.STT(model=DEFAULT_STT),
         llm=inference.LLM(model=DEFAULT_LLM),
         tts=inference.TTS(model=DEFAULT_TTS),
-        vad=inference.VAD(model="silero"),
     )
 
     def parse_value(payload: str, fallback: str) -> str:

diff --git a/examples/other/elevenlab_scribe_v2.py b/examples/other/elevenlab_scribe_v2.py
@@ -30,7 +30,6 @@ async def entrypoint(ctx: JobContext) -> None:
 
     session: AgentSession = AgentSession(
         allow_interruptions=True,
-        vad=inference.VAD(model="silero"),
         stt=stt,
         llm=inference.LLM("openai/gpt-4.1-mini"),
         tts=inference.TTS("cartesia/sonic-3"),

diff --git a/examples/other/kokoro_tts.py b/examples/other/kokoro_tts.py
@@ -8,7 +8,6 @@
     AgentSession,
     JobContext,
     cli,
-    inference,
     metrics,
 )
 from livekit.agents.voice import MetricsCollectedEvent
@@ -43,7 +42,6 @@ async def entrypoint(ctx: JobContext):
         "user_id": "your user_id",
     }
     session = AgentSession(
-        vad=inference.VAD(model="silero"),
         # any combination of STT, LLM, TTS, or realtime API can be used
         llm=openai.LLM(model="gpt-4.1-mini"),
         stt=deepgram.STT(model="nova-3", language="multi"),

diff --git a/examples/other/transcription/multi-user-transcriber.py b/examples/other/transcription/multi-user-transcriber.py
@@ -89,9 +89,7 @@ async def _start_session(self, participant: rtc.RemoteParticipant) -> AgentSessi
         if participant.identity in self._sessions:
             return self._sessions[participant.identity]
 
-        session = AgentSession(
-            vad=inference.VAD(model="silero"),
-        )
+        session = AgentSession()
         await session.start(
             agent=Transcriber(
                 participant_identity=participant.identity,

diff --git a/examples/other/transcription/translator.py b/examples/other/transcription/translator.py
@@ -12,7 +12,6 @@
     MetricsCollectedEvent,
     StopResponse,
     cli,
-    inference,
     llm,
     metrics,
     room_io,
@@ -77,7 +76,6 @@ async def entrypoint(ctx: JobContext):
 
     session = AgentSession(
         # vad is only needed for non-streaming STT implementations
-        vad=inference.VAD(model="silero"),
     )
 
     @session.on("metrics_collected")

diff --git a/examples/survey/agent.py b/examples/survey/agent.py
@@ -353,7 +353,6 @@ async def entrypoint(ctx: JobContext):
         llm=inference.LLM("google/gemini-2.5-flash"),
         stt=inference.STT("deepgram/nova-3", language="multi"),
         tts=inference.TTS("inworld/inworld-tts-1"),
-        vad=inference.VAD(model="silero"),
         turn_detection=AudioTurnDetector(),
         preemptive_generation=True,
     )

diff --git a/examples/telephony/amd.py b/examples/telephony/amd.py
@@ -46,7 +46,6 @@ async def entrypoint(ctx: JobContext):
         llm=inference.LLM("openai/gpt-4.1-mini"),
         tts=inference.TTS("cartesia/sonic-3", voice="9626c31c-bec5-4cca-baa8-f8ba9e84c8bc"),
         turn_detection=AudioTurnDetector(),
-        vad=inference.VAD(model="silero"),
         preemptive_generation=True,
     )
 

diff --git a/examples/telephony/bank-ivr/ivr_navigator_agent.py b/examples/telephony/bank-ivr/ivr_navigator_agent.py
@@ -82,7 +82,6 @@ async def dtmf_session(ctx: JobContext) -> None:
     }
 
     session: AgentSession = AgentSession(
-        vad=inference.VAD(model="silero"),
         llm=inference.LLM("openai/gpt-4.1"),
         stt=inference.STT("deepgram/nova-3"),
         tts=inference.TTS("rime/arcana"),

diff --git a/examples/telephony/bank-ivr/ivr_system_agent.py b/examples/telephony/bank-ivr/ivr_system_agent.py
@@ -630,7 +630,6 @@ async def bank_ivr_session(ctx: JobContext) -> None:
     state = SessionState()
 
     session: AgentSession[SessionState] = AgentSession(
-        vad=inference.VAD(model="silero"),
         llm=inference.LLM("openai/gpt-4.1"),
         stt=inference.STT("deepgram/nova-3"),
         tts=inference.TTS("cartesia/sonic-3"),

diff --git a/examples/telephony/basic_dtmf_agent.py b/examples/telephony/basic_dtmf_agent.py
@@ -135,7 +135,6 @@ async def entrypoint(ctx: JobContext) -> None:
     }
 
     session: AgentSession = AgentSession(
-        vad=inference.VAD(model="silero"),
         llm=inference.LLM("openai/gpt-4.1-mini"),
         stt=inference.STT("deepgram/nova-3"),
         tts=inference.TTS("inworld/inworld-tts-1"),

diff --git a/examples/voice_agents/annotated_tool_args.py b/examples/voice_agents/annotated_tool_args.py
@@ -91,7 +91,6 @@ async def get_number(
 @server.rtc_session()
 async def entrypoint(ctx: JobContext):
     agent = AgentSession(
-        vad=inference.VAD(model="silero"),
         stt=inference.STT("deepgram/nova-3"),
         llm=inference.LLM("google/gemini-2.5-flash"),
         tts=inference.TTS("rime/arcana"),

diff --git a/examples/voice_agents/async_tool_agent.py b/examples/voice_agents/async_tool_agent.py
@@ -193,7 +193,6 @@ async def entrypoint(ctx: JobContext):
         llm=inference.LLM("openai/gpt-5.3-chat-latest"),
         tts=inference.TTS("cartesia/sonic-3", voice="e07c00bc-4134-4eae-9ea4-1a55fb45746b"),
         # llm=google.realtime.RealtimeModel(),
-        vad=inference.VAD(model="silero"),
         turn_handling={"interruption": {"mode": "vad"}},
     )
 

diff --git a/examples/voice_agents/basic_agent.py b/examples/voice_agents/basic_agent.py
@@ -85,7 +85,6 @@ async def entrypoint(ctx: JobContext) -> None:
         # Text-to-speech (TTS) is your agent's voice, turning the LLM's text into speech that the user can hear
         # See all available models as well as voice selections at https://docs.livekit.io/agents/models/tts/
         tts=inference.TTS("cartesia/sonic-3", voice="9626c31c-bec5-4cca-baa8-f8ba9e84c8bc"),
-        vad=inference.VAD(model="silero"),
         turn_handling=TurnHandlingOptions(
             # VAD and turn detection are used to determine when the user is speaking and when the agent should respond
             # See more at https://docs.livekit.io/agents/build/turns

diff --git a/examples/voice_agents/dynamic_tool_creation.py b/examples/voice_agents/dynamic_tool_creation.py
@@ -114,7 +114,6 @@ async def _random_number() -> int:
     )
 
     session = AgentSession(
-        vad=inference.VAD(model="silero"),
         stt=inference.STT("deepgram/nova-3"),
         llm=inference.LLM("openai/gpt-4.1-mini"),
         tts=inference.TTS("cartesia/sonic-3"),

diff --git a/examples/voice_agents/email_example.py b/examples/voice_agents/email_example.py
@@ -60,7 +60,6 @@ async def register_for_event(self, context: RunContext):
 @server.rtc_session()
 async def entrypoint(ctx: JobContext):
     session = AgentSession(
-        vad=inference.VAD(model="silero"),
         llm=inference.LLM("openai/gpt-4.1-mini"),
         stt=inference.STT("deepgram/nova-3"),
         tts=inference.TTS("cartesia/sonic-3"),

diff --git a/examples/voice_agents/error_callback.py b/examples/voice_agents/error_callback.py
@@ -29,7 +29,6 @@ async def entrypoint(ctx: JobContext):
         stt=inference.STT("deepgram/nova-3"),
         llm=inference.LLM("openai/gpt-4.1-mini"),
         tts=inference.TTS("cartesia/sonic-3"),
-        vad=inference.VAD(model="silero"),
     )
 
     custom_error_audio = os.path.join(pathlib.Path(__file__).parent.absolute(), "error_message.ogg")

diff --git a/examples/voice_agents/fast-preresponse.py b/examples/voice_agents/fast-preresponse.py
@@ -10,7 +10,6 @@
     AgentSession,
     JobContext,
     cli,
-    inference,
     llm,
 )
 from livekit.agents.llm.chat_context import ChatContext, ChatMessage
@@ -83,7 +82,6 @@ async def entrypoint(ctx: JobContext):
     session = AgentSession(
         stt=deepgram.STT(),
         tts=openai.TTS(),
-        vad=inference.VAD(model="silero"),
     )
     await session.start(PreResponseAgent(), room=ctx.room)
 

diff --git a/examples/voice_agents/flush_llm_node.py b/examples/voice_agents/flush_llm_node.py
@@ -15,7 +15,6 @@
     ModelSettings,
     cli,
     function_tool,
-    inference,
     llm,
     metrics,
 )
@@ -111,7 +110,6 @@ async def llm_node(
 @server.rtc_session()
 async def entrypoint(ctx: JobContext):
     session = AgentSession(
-        vad=inference.VAD(model="silero"),
         llm="openai/gpt-4.1-mini",
         stt="deepgram/nova-3:en",
         tts="cartesia/sonic-3:9626c31c-bec5-4cca-baa8-f8ba9e84c8bc",

diff --git a/examples/voice_agents/grok/grok_voice_agent_api.py b/examples/voice_agents/grok/grok_voice_agent_api.py
@@ -8,7 +8,6 @@
     AgentSession,
     JobContext,
     cli,
-    inference,
     room_io,
 )
 from livekit.agents.inference import AudioTurnDetector
@@ -45,7 +44,6 @@ async def my_agent(ctx: JobContext):
         llm=xai.realtime.RealtimeModel(voice="ara"),
         turn_detection=AudioTurnDetector(),
         tools=[xai.realtime.XSearch(), xai.realtime.WebSearch()],
-        vad=inference.VAD(model="silero"),
         preemptive_generation=True,
     )
 

diff --git a/examples/voice_agents/inactive_user.py b/examples/voice_agents/inactive_user.py
@@ -23,7 +23,6 @@
 @server.rtc_session()
 async def entrypoint(ctx: JobContext):
     session = AgentSession(
-        vad=inference.VAD(model="silero"),
         llm=inference.LLM("openai/gpt-4.1-mini"),
         stt=inference.STT("deepgram/nova-3"),
         tts=inference.TTS("cartesia/sonic-3"),

diff --git a/examples/voice_agents/instructions_per_modality.py b/examples/voice_agents/instructions_per_modality.py
@@ -86,7 +86,6 @@ async def entrypoint(ctx: JobContext) -> None:
         stt=inference.STT("deepgram/nova-3"),
         llm=inference.LLM("openai/gpt-4.1-mini"),
         tts=inference.TTS("cartesia/sonic-3"),
-        vad=inference.VAD(model="silero"),
     )
 
     await session.start(agent=SchedulingAgent(), room=ctx.room)

diff --git a/examples/voice_agents/langfuse_trace.py b/examples/voice_agents/langfuse_trace.py
@@ -149,7 +149,7 @@ async def flush_trace():
 
     ctx.add_shutdown_callback(flush_trace)
 
-    session = AgentSession(vad=inference.VAD(model="silero"))
+    session = AgentSession()
 
     @session.on("metrics_collected")
     def _on_metrics_collected(ev: MetricsCollectedEvent):

diff --git a/examples/voice_agents/langgraph_agent.py b/examples/voice_agents/langgraph_agent.py
@@ -63,7 +63,6 @@ async def entrypoint(ctx: JobContext):
     )
 
     session = AgentSession(
-        vad=inference.VAD(model="silero"),
         # any combination of STT, LLM, TTS, or realtime API can be used
         stt=inference.STT("deepgram/nova-3", language="multi"),
         tts=inference.TTS("cartesia/sonic-3"),

diff --git a/examples/voice_agents/llamaindex-rag/chat_engine.py b/examples/voice_agents/llamaindex-rag/chat_engine.py
@@ -53,7 +53,6 @@ def __init__(self, index: VectorStoreIndex):
                 "with users will be voice. You should use short and concise "
                 "responses, and avoiding usage of unpronouncable punctuation."
             ),
-            vad=inference.VAD(model="silero"),
             stt=inference.STT("deepgram/nova-3"),
             llm=DummyLLM(),  # use a dummy LLM to enable the pipeline reply
             tts=inference.TTS("cartesia/sonic-3"),

diff --git a/examples/voice_agents/llamaindex-rag/query_engine.py b/examples/voice_agents/llamaindex-rag/query_engine.py
@@ -58,7 +58,6 @@ async def entrypoint(ctx: JobContext):
             "with users will be voice. You should use short and concise "
             "responses, and avoiding usage of unpronouncable punctuation."
         ),
-        vad=inference.VAD(model="silero"),
         stt=inference.STT("deepgram/nova-3"),
         llm=inference.LLM("openai/gpt-4.1-mini"),
         tts=inference.TTS("cartesia/sonic-3"),

diff --git a/examples/voice_agents/llamaindex-rag/retrieval.py b/examples/voice_agents/llamaindex-rag/retrieval.py
@@ -16,7 +16,6 @@
     AutoSubscribe,
     JobContext,
     cli,
-    inference,
     llm,
 )
 from livekit.agents.voice.agent import ModelSettings
@@ -47,7 +46,6 @@ def __init__(self, index: VectorStoreIndex):
                 "with users will be voice. You should use short and concise "
                 "responses, and avoiding usage of unpronouncable punctuation."
             ),
-            vad=inference.VAD(model="silero"),
             stt=deepgram.STT(),
             llm=openai.LLM(),
             tts=openai.TTS(),

diff --git a/examples/voice_agents/long_running_function.py b/examples/voice_agents/long_running_function.py
@@ -68,7 +68,6 @@ async def entrypoint(ctx: JobContext):
         stt=inference.STT("deepgram/nova-3"),
         llm=inference.LLM("openai/gpt-4.1-mini"),
         tts=inference.TTS("cartesia/sonic-3"),
-        vad=inference.VAD(model="silero"),
     )
 
     await session.start(agent=MyAgent(), room=ctx.room)

diff --git a/examples/voice_agents/mcp/mcp-agent.py b/examples/voice_agents/mcp/mcp-agent.py
@@ -31,7 +31,6 @@ async def on_enter(self):
 @server.rtc_session()
 async def entrypoint(ctx: JobContext):
     session = AgentSession(
-        vad=inference.VAD(model="silero"),
         stt=inference.STT("deepgram/nova-3", language="multi"),
         llm=inference.LLM("openai/gpt-4.1-mini"),
         tts=inference.TTS("cartesia/sonic-3"),

diff --git a/examples/voice_agents/multi_agent.py b/examples/voice_agents/multi_agent.py
@@ -12,7 +12,6 @@
     JobContext,
     RunContext,
     cli,
-    inference,
     metrics,
 )
 from livekit.agents.job import get_job_context
@@ -135,7 +134,6 @@ async def story_finished(self, context: RunContext[StoryData]):
 @server.rtc_session()
 async def entrypoint(ctx: JobContext):
     session = AgentSession[StoryData](
-        vad=inference.VAD(model="silero"),
         # any combination of STT, LLM, TTS, or realtime API can be used
         llm=openai.LLM(model="gpt-4.1-mini"),
         stt=deepgram.STT(model="nova-3"),

diff --git a/examples/voice_agents/nvidia_test.py b/examples/voice_agents/nvidia_test.py
@@ -8,7 +8,6 @@
     JobContext,
     WorkerOptions,
     cli,
-    inference,
 )
 from livekit.agents.inference import AudioTurnDetector
 from livekit.plugins import nvidia, openai
@@ -20,7 +19,6 @@
 
 async def entrypoint(ctx: JobContext):
     session = AgentSession(
-        vad=inference.VAD(model="silero"),
         llm=openai.LLM(model="gpt-4.1-mini"),
         stt=nvidia.STT(),
         tts=nvidia.TTS(),

diff --git a/examples/voice_agents/realtime_joke_teller.py b/examples/voice_agents/realtime_joke_teller.py
@@ -53,7 +53,6 @@
     AgentSession,
     AutoSubscribe,
     ToolError,
-    inference,
     room_io,
 )
 from livekit.agents.llm import function_tool
@@ -222,7 +221,6 @@ async def _on_shutdown(_reason: str) -> None:
                     stt=aws.STT(),
                     llm=aws.LLM(),
                     tts=aws.TTS(),
-                    vad=inference.VAD(model="silero"),
                 )
             else:
                 print("⚡ Using REALTIME mode: Nova Sonic 2.0")

diff --git a/examples/voice_agents/realtime_turn_detector.py b/examples/voice_agents/realtime_turn_detector.py
@@ -3,7 +3,7 @@
 from dotenv import load_dotenv
 from google.genai import types  # noqa: F401
 
-from livekit.agents import Agent, AgentServer, AgentSession, JobContext, cli, inference
+from livekit.agents import Agent, AgentServer, AgentSession, JobContext, cli
 from livekit.agents.inference import AudioTurnDetector
 from livekit.plugins import deepgram, google, openai  # noqa: F401
 
@@ -25,7 +25,6 @@ async def entrypoint(ctx: JobContext):
     session = AgentSession(
         allow_interruptions=True,
         turn_detection=AudioTurnDetector(),
-        vad=inference.VAD(model="silero"),
         stt=deepgram.STT(),
         # To use OpenAI Realtime API
         llm=openai.realtime.RealtimeModel(

diff --git a/examples/voice_agents/realtime_video_agent.py b/examples/voice_agents/realtime_video_agent.py
@@ -8,7 +8,6 @@
     AgentSession,
     JobContext,
     cli,
-    inference,
     room_io,
     voice,  # noqa: F401
 )
@@ -24,7 +23,6 @@
 @server.rtc_session()
 async def entrypoint(ctx: JobContext):
     session = AgentSession(
-        vad=inference.VAD(model="silero"),
         # both Gemini and OpenAI Realtime API support streaming video input
         llm=google.realtime.RealtimeModel(),
         # customize how video frames are sampled