livekit · he-yufeng · May 23, 2026 · devin-ai-integration · May 23, 2026
diff --git a/livekit-plugins/livekit-plugins-anthropic/livekit/plugins/anthropic/llm.py b/livekit-plugins/livekit-plugins-anthropic/livekit/plugins/anthropic/llm.py
@@ -15,7 +15,7 @@
 from __future__ import annotations
 
 import os
-from collections.abc import Awaitable
+from collections.abc import Awaitable, Callable
 from dataclasses import dataclass
 from typing import Any, Literal, cast
 
@@ -244,27 +244,31 @@ def chat(
                     content[-1]["cache_control"] = CACHE_CONTROL_EPHEMERAL  # type: ignore
                     break
 
-        if beta_flag:
-            stream = self._client.beta.messages.create(
-                betas=[beta_flag],
-                messages=messages,  # type: ignore[arg-type]
-                model=self._opts.model,
-                stream=True,
-                timeout=conn_options.timeout,
-                **extra,
-            )
-        else:
-            stream = self._client.messages.create(
-                messages=messages,
-                model=self._opts.model,
-                stream=True,
-                timeout=conn_options.timeout,
-                **extra,
-            )
+        async def create_anthropic_stream() -> anthropic.AsyncStream[
+            anthropic.types.RawMessageStreamEvent
+        ]:
+            if beta_flag:
+                stream = await self._client.beta.messages.create(
+                    betas=[beta_flag],
+                    messages=messages,  # type: ignore[arg-type]
+                    model=self._opts.model,
+                    stream=True,
+                    timeout=conn_options.timeout,
+                    **extra,
+                )
+            else:
+                stream = await self._client.messages.create(
+                    messages=messages,
+                    model=self._opts.model,
+                    stream=True,
+                    timeout=conn_options.timeout,
+                    **extra,
+                )
+            return cast(anthropic.AsyncStream[anthropic.types.RawMessageStreamEvent], stream)
 
         return LLMStream(
             self,
-            anthropic_stream=stream,  # type: ignore[arg-type]
+            create_anthropic_stream=create_anthropic_stream,
             chat_ctx=chat_ctx,
             tools=tools or [],
             conn_options=conn_options,
@@ -276,16 +280,15 @@ def __init__(
         self,
         llm: LLM,
         *,
-        anthropic_stream: Awaitable[anthropic.AsyncStream[anthropic.types.RawMessageStreamEvent]],
+        create_anthropic_stream: Callable[
+            [], Awaitable[anthropic.AsyncStream[anthropic.types.RawMessageStreamEvent]]
+        ],
         chat_ctx: llm.ChatContext,
         tools: list[Tool],
         conn_options: APIConnectOptions,
     ) -> None:
         super().__init__(llm, chat_ctx=chat_ctx, tools=tools, conn_options=conn_options)
-        self._awaitable_anthropic_stream = anthropic_stream
-        self._anthropic_stream: (
-            anthropic.AsyncStream[anthropic.types.RawMessageStreamEvent] | None
-        ) = None
+        self._create_anthropic_stream = create_anthropic_stream
 
         # current function call that we're waiting for full completion (args are streamed)
         self._tool_call_id: str | None = None
@@ -302,10 +305,7 @@ def __init__(
     async def _run(self) -> None:
         retryable = True
         try:
-            if not self._anthropic_stream:
-                self._anthropic_stream = await self._awaitable_anthropic_stream
-
-            async with self._anthropic_stream as stream:
+            async with await self._create_anthropic_stream() as stream:
-    async def _run(self) -> None:
-        retryable = True
-        try:
-            if not self._anthropic_stream:
-                self._anthropic_stream = await self._awaitable_anthropic_stream
-
-            async with self._anthropic_stream as stream:
-            async with await self._create_anthropic_stream() as stream:
+    async def _run(self) -> None:
+        # Reset per-attempt state so retries start clean
+        self._tool_call_id = None
+        self._fnc_name = None
+        self._fnc_raw_arguments = None
+        self._request_id = ""
+        self._ignoring_cot = False
+        self._input_tokens = 0
+        self._cache_creation_tokens = 0
+        self._cache_read_tokens = 0
+        self._output_tokens = 0
+        retryable = True
+        try:
+            async with await self._create_anthropic_stream() as stream:
-    async def _run(self) -> None:
-        retryable = True
-        try:
-            if not self._anthropic_stream:
-                self._anthropic_stream = await self._awaitable_anthropic_stream
-
-            async with self._anthropic_stream as stream:
-            async with await self._create_anthropic_stream() as stream:
+    async def _run(self) -> None:
+        # Reset per-attempt state so retries start clean
+        self._tool_call_id = None
+        self._fnc_name = None
+        self._fnc_raw_arguments = None
+        self._request_id = ""
+        self._ignoring_cot = False
+        self._input_tokens = 0
+        self._cache_creation_tokens = 0
+        self._cache_read_tokens = 0
+        self._output_tokens = 0
+        retryable = True
+        try:
+            async with await self._create_anthropic_stream() as stream:
                 async for event in stream:
                     chat_chunk = self._parse_event(event)
                     if chat_chunk is not None:

diff --git a/tests/test_plugin_anthropic.py b/tests/test_plugin_anthropic.py
@@ -3,6 +3,10 @@
 from __future__ import annotations
 
 import httpx
+import pytest
+
+from livekit.agents import APIConnectOptions, llm
+from livekit.plugins.anthropic.llm import LLMStream
 
 
 def _make_llm(**kwargs):
@@ -57,3 +61,47 @@ def test_explicit_client_bypasses_timeout_param(self) -> None:
         # timeout= argument should have no effect here
         llm = _make_llm(client=tight_client, timeout=httpx.Timeout(5.0, read=999.0))
         assert llm._client._client.timeout.read == 1.0
+
+
+class _EmptyAnthropicStream:
+    async def __aenter__(self):
+        return self
+
+    async def __aexit__(self, exc_type, exc, tb):
+        return None
+
+    def __aiter__(self):
+        return self
+
+    async def __anext__(self):
+        raise StopAsyncIteration
+
+
+class TestAnthropicStreamRetry:
+    @pytest.mark.asyncio
+    async def test_retry_creates_a_fresh_stream_awaitable(self) -> None:
+        calls = 0
+
+        async def failing_stream():
+            raise RuntimeError("transient connect failure")
+
+        async def empty_stream():
+            return _EmptyAnthropicStream()
+
+        def create_stream():
+            nonlocal calls
+            calls += 1
+            return failing_stream() if calls == 1 else empty_stream()
+
+        stream = LLMStream(
+            _make_llm(),
+            create_anthropic_stream=create_stream,
+            chat_ctx=llm.ChatContext.empty(),
+            tools=[],
+            conn_options=APIConnectOptions(max_retry=1, retry_interval=0),
+        )
+
+        response = await stream.collect()
+
+        assert calls == 2
+        assert response.usage is not None