diff --git a/src/agents/extensions/handoff_filters.py b/src/agents/extensions/handoff_filters.py index f24dd1658c..2986685d5a 100644 --- a/src/agents/extensions/handoff_filters.py +++ b/src/agents/extensions/handoff_filters.py @@ -104,6 +104,7 @@ def _remove_tool_types_from_input( "apply_patch_call_output", "custom_tool_call", "custom_tool_call_output", + "hosted_tool_call", ] filtered_items: list[TResponseInputItem] = [] diff --git a/src/agents/extensions/sandbox/vercel/sandbox.py b/src/agents/extensions/sandbox/vercel/sandbox.py index 92513077ab..44812c1788 100644 --- a/src/agents/extensions/sandbox/vercel/sandbox.py +++ b/src/agents/extensions/sandbox/vercel/sandbox.py @@ -79,6 +79,12 @@ httpx.ProtocolError, ) +# Sandbox status values from which the sandbox can still transition to RUNNING. +# Only "pending" qualifies: a freshly created sandbox transitions PENDING -> RUNNING. +# Other non-RUNNING states ("stopping", "stopped", "failed", "aborted", +# "snapshotting") cannot reach RUNNING, so waiting is futile. +_VERCEL_TRANSIENT_SANDBOX_STATUSES: frozenset[str] = frozenset({"pending"}) + def _is_transient_create_error(exc: BaseException) -> bool: if exception_chain_has_status_code(exc, {408, 425, 429, 500, 502, 503, 504}): @@ -754,15 +760,22 @@ async def resume(self, state: SandboxSessionState) -> SandboxSession: project_id=resolved_project_id, team_id=resolved_team_id, ) - # XXX(scotttrinh): This will wait even if in a terminal state. - # We should make wait_for_status smarter about the possible - # transitions to avoid waiting for a status if it's impossible - # to transition to it from the current status. - await sandbox.wait_for_status( - SandboxStatus.RUNNING, - timeout=DEFAULT_VERCEL_WAIT_FOR_RUNNING_TIMEOUT_S, - ) - reconnected = True + current_status = str(sandbox.status) + if current_status == str(SandboxStatus.RUNNING): + # Already running; skip the wait entirely. + reconnected = True + elif current_status in _VERCEL_TRANSIENT_SANDBOX_STATUSES: + # Still transitioning toward RUNNING (e.g. PENDING); wait normally. + await sandbox.wait_for_status( + SandboxStatus.RUNNING, + timeout=DEFAULT_VERCEL_WAIT_FOR_RUNNING_TIMEOUT_S, + ) + reconnected = True + else: + # Cannot reach RUNNING from here (STOPPING, STOPPED, FAILED, + # ABORTED, SNAPSHOTTING). Drop the handle and recreate below. + await sandbox.client.aclose() + sandbox = None except TimeoutError: if sandbox is not None: await sandbox.client.aclose() diff --git a/src/agents/items.py b/src/agents/items.py index 50a017c221..c761cc221f 100644 --- a/src/agents/items.py +++ b/src/agents/items.py @@ -684,7 +684,12 @@ def extract_last_content(cls, message: TResponseOutputItem) -> str: return "" last_content = message.content[-1] if isinstance(last_content, ResponseOutputText): - return last_content.text + # ``last_content.text`` is typed as ``str`` per the Responses API schema, + # but provider gateways (e.g. LiteLLM) and ``model_construct`` paths during + # streaming have been observed surfacing ``None``. Coerce so callers relying + # on the ``-> str`` return type don't see a ``None``. Same rationale as + # ``extract_text`` below. + return last_content.text or "" elif isinstance(last_content, ResponseOutputRefusal): return last_content.refusal else: diff --git a/tests/extensions/sandbox/test_vercel.py b/tests/extensions/sandbox/test_vercel.py index 306acf9527..71c4130b4c 100644 --- a/tests/extensions/sandbox/test_vercel.py +++ b/tests/extensions/sandbox/test_vercel.py @@ -793,13 +793,70 @@ async def test_vercel_resume_reconnects_existing_running_sandbox( "team_id": None, } ] + assert resumed._inner.state.sandbox_id == "sandbox-existing" + assert _FakeAsyncSandbox.create_calls == [] + # Sandbox is already RUNNING, so wait_for_status should not be called. + assert existing.wait_for_status_calls == [] + assert resumed._inner._workspace_state_preserved_on_start() is True # noqa: SLF001 + assert resumed._inner._system_state_preserved_on_start() is True # noqa: SLF001 + + +@pytest.mark.asyncio +async def test_vercel_resume_waits_when_sandbox_pending( + monkeypatch: pytest.MonkeyPatch, +) -> None: + vercel_module = _load_vercel_module(monkeypatch) + existing = _FakeAsyncSandbox(sandbox_id="sandbox-existing", status="pending") + _FakeAsyncSandbox.sandboxes[existing.sandbox_id] = existing + + state = vercel_module.VercelSandboxSessionState( + session_id="00000000-0000-0000-0000-000000000200", + manifest=Manifest(), + snapshot=NoopSnapshot(id="snapshot"), + sandbox_id=existing.sandbox_id, + ) + + client = vercel_module.VercelSandboxClient() + resumed = await client.resume(state) + assert resumed._inner.state.sandbox_id == "sandbox-existing" assert _FakeAsyncSandbox.create_calls == [] assert existing.wait_for_status_calls == [ ("running", vercel_module.DEFAULT_VERCEL_WAIT_FOR_RUNNING_TIMEOUT_S) ] assert resumed._inner._workspace_state_preserved_on_start() is True # noqa: SLF001 - assert resumed._inner._system_state_preserved_on_start() is True # noqa: SLF001 + + +@pytest.mark.asyncio +@pytest.mark.parametrize( + "terminal_status", ["stopping", "stopped", "failed", "aborted", "snapshotting"] +) +async def test_vercel_resume_recreates_sandbox_when_cannot_reach_running( + monkeypatch: pytest.MonkeyPatch, + terminal_status: str, +) -> None: + """A sandbox in any state that cannot transition to RUNNING must be recreated + immediately, without waiting for the wait_for_status timeout.""" + vercel_module = _load_vercel_module(monkeypatch) + existing = _FakeAsyncSandbox(sandbox_id="sandbox-terminal", status=terminal_status) + _FakeAsyncSandbox.sandboxes[existing.sandbox_id] = existing + + state = vercel_module.VercelSandboxSessionState( + session_id="00000000-0000-0000-0000-000000000201", + manifest=Manifest(), + snapshot=NoopSnapshot(id="snapshot"), + sandbox_id=existing.sandbox_id, + ) + + client = vercel_module.VercelSandboxClient() + resumed = await client.resume(state) + + assert existing.wait_for_status_calls == [] + assert existing.client.closed is True + assert len(_FakeAsyncSandbox.create_calls) == 1 + assert resumed._inner.state.sandbox_id != "sandbox-terminal" + assert resumed._inner.state.workspace_root_ready is False + assert resumed._inner._workspace_state_preserved_on_start() is False # noqa: SLF001 @pytest.mark.asyncio @@ -837,7 +894,8 @@ async def test_vercel_resume_recreates_sandbox_after_wait_timeout( monkeypatch: pytest.MonkeyPatch, ) -> None: vercel_module = _load_vercel_module(monkeypatch) - existing = _FakeAsyncSandbox(sandbox_id="sandbox-existing") + # Use "pending" so that the code enters the wait path (not already RUNNING). + existing = _FakeAsyncSandbox(sandbox_id="sandbox-existing", status="pending") existing.wait_for_status_error = TimeoutError() _FakeAsyncSandbox.sandboxes[existing.sandbox_id] = existing diff --git a/tests/test_extension_filters.py b/tests/test_extension_filters.py index a4b95f792a..113340c1f4 100644 --- a/tests/test_extension_filters.py +++ b/tests/test_extension_filters.py @@ -1130,6 +1130,7 @@ def test_removes_hosted_tool_types_from_input_history() -> None: "apply_patch_call_output", "custom_tool_call", "custom_tool_call_output", + "hosted_tool_call", ] input_items: list[TResponseInputItem] = [_get_message_input_item("Hello")] for t in hosted_types: diff --git a/tests/utils/test_pretty_print_and_items.py b/tests/utils/test_pretty_print_and_items.py index 34939df368..ab0fd6b821 100644 --- a/tests/utils/test_pretty_print_and_items.py +++ b/tests/utils/test_pretty_print_and_items.py @@ -38,6 +38,30 @@ def test_text_message_outputs_handles_none_text_across_items(): assert ItemHelpers.text_message_outputs(items) == "world" +def _make_output_message(text: str | None) -> ResponseOutputMessage: + return ResponseOutputMessage.model_construct( + id="msg_1", + role="assistant", + status="completed", + content=[ResponseOutputText.model_construct(type="output_text", text=text, annotations=[])], + ) + + +def test_extract_last_content_returns_empty_string_for_none_text(): + """extract_last_content is declared `-> str` and must not return None even if + the underlying ResponseOutputText.text is None (observed via LiteLLM gateways + and ``model_construct`` paths during streaming, per items.py:714-720).""" + msg = _make_output_message(None) + result = ItemHelpers.extract_last_content(msg) + assert isinstance(result, str) + assert result == "" + + +def test_extract_last_content_returns_text_normally(): + msg = _make_output_message("hello") + assert ItemHelpers.extract_last_content(msg) == "hello" + + def _make_run_error_details(n_input: int = 0, n_output: int = 0) -> RunErrorDetails: return RunErrorDetails( input="hi",