Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/agents/extensions/handoff_filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ def _remove_tool_types_from_input(
"apply_patch_call_output",
"custom_tool_call",
"custom_tool_call_output",
"hosted_tool_call",
]

filtered_items: list[TResponseInputItem] = []
Expand Down
31 changes: 22 additions & 9 deletions src/agents/extensions/sandbox/vercel/sandbox.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,12 @@
httpx.ProtocolError,
)

# Sandbox status values from which the sandbox can still transition to RUNNING.
# Only "pending" qualifies: a freshly created sandbox transitions PENDING -> RUNNING.
# Other non-RUNNING states ("stopping", "stopped", "failed", "aborted",
# "snapshotting") cannot reach RUNNING, so waiting is futile.
_VERCEL_TRANSIENT_SANDBOX_STATUSES: frozenset[str] = frozenset({"pending"})


def _is_transient_create_error(exc: BaseException) -> bool:
if exception_chain_has_status_code(exc, {408, 425, 429, 500, 502, 503, 504}):
Expand Down Expand Up @@ -754,15 +760,22 @@ async def resume(self, state: SandboxSessionState) -> SandboxSession:
project_id=resolved_project_id,
team_id=resolved_team_id,
)
# XXX(scotttrinh): This will wait even if in a terminal state.
# We should make wait_for_status smarter about the possible
# transitions to avoid waiting for a status if it's impossible
# to transition to it from the current status.
await sandbox.wait_for_status(
SandboxStatus.RUNNING,
timeout=DEFAULT_VERCEL_WAIT_FOR_RUNNING_TIMEOUT_S,
)
reconnected = True
current_status = str(sandbox.status)
if current_status == str(SandboxStatus.RUNNING):
# Already running; skip the wait entirely.
reconnected = True
elif current_status in _VERCEL_TRANSIENT_SANDBOX_STATUSES:
# Still transitioning toward RUNNING (e.g. PENDING); wait normally.
await sandbox.wait_for_status(
SandboxStatus.RUNNING,
timeout=DEFAULT_VERCEL_WAIT_FOR_RUNNING_TIMEOUT_S,
)
reconnected = True
else:
# Cannot reach RUNNING from here (STOPPING, STOPPED, FAILED,
# ABORTED, SNAPSHOTTING). Drop the handle and recreate below.
await sandbox.client.aclose()
sandbox = None
except TimeoutError:
if sandbox is not None:
await sandbox.client.aclose()
Expand Down
7 changes: 6 additions & 1 deletion src/agents/items.py
Original file line number Diff line number Diff line change
Expand Up @@ -684,7 +684,12 @@ def extract_last_content(cls, message: TResponseOutputItem) -> str:
return ""
last_content = message.content[-1]
if isinstance(last_content, ResponseOutputText):
return last_content.text
# ``last_content.text`` is typed as ``str`` per the Responses API schema,
# but provider gateways (e.g. LiteLLM) and ``model_construct`` paths during
# streaming have been observed surfacing ``None``. Coerce so callers relying
# on the ``-> str`` return type don't see a ``None``. Same rationale as
# ``extract_text`` below.
return last_content.text or ""
elif isinstance(last_content, ResponseOutputRefusal):
return last_content.refusal
else:
Expand Down
62 changes: 60 additions & 2 deletions tests/extensions/sandbox/test_vercel.py
Original file line number Diff line number Diff line change
Expand Up @@ -793,13 +793,70 @@ async def test_vercel_resume_reconnects_existing_running_sandbox(
"team_id": None,
}
]
assert resumed._inner.state.sandbox_id == "sandbox-existing"
assert _FakeAsyncSandbox.create_calls == []
# Sandbox is already RUNNING, so wait_for_status should not be called.
assert existing.wait_for_status_calls == []
assert resumed._inner._workspace_state_preserved_on_start() is True # noqa: SLF001
assert resumed._inner._system_state_preserved_on_start() is True # noqa: SLF001


@pytest.mark.asyncio
async def test_vercel_resume_waits_when_sandbox_pending(
monkeypatch: pytest.MonkeyPatch,
) -> None:
vercel_module = _load_vercel_module(monkeypatch)
existing = _FakeAsyncSandbox(sandbox_id="sandbox-existing", status="pending")
_FakeAsyncSandbox.sandboxes[existing.sandbox_id] = existing

state = vercel_module.VercelSandboxSessionState(
session_id="00000000-0000-0000-0000-000000000200",
manifest=Manifest(),
snapshot=NoopSnapshot(id="snapshot"),
sandbox_id=existing.sandbox_id,
)

client = vercel_module.VercelSandboxClient()
resumed = await client.resume(state)

assert resumed._inner.state.sandbox_id == "sandbox-existing"
assert _FakeAsyncSandbox.create_calls == []
assert existing.wait_for_status_calls == [
("running", vercel_module.DEFAULT_VERCEL_WAIT_FOR_RUNNING_TIMEOUT_S)
]
assert resumed._inner._workspace_state_preserved_on_start() is True # noqa: SLF001
assert resumed._inner._system_state_preserved_on_start() is True # noqa: SLF001


@pytest.mark.asyncio
@pytest.mark.parametrize(
"terminal_status", ["stopping", "stopped", "failed", "aborted", "snapshotting"]
)
async def test_vercel_resume_recreates_sandbox_when_cannot_reach_running(
monkeypatch: pytest.MonkeyPatch,
terminal_status: str,
) -> None:
"""A sandbox in any state that cannot transition to RUNNING must be recreated
immediately, without waiting for the wait_for_status timeout."""
vercel_module = _load_vercel_module(monkeypatch)
existing = _FakeAsyncSandbox(sandbox_id="sandbox-terminal", status=terminal_status)
_FakeAsyncSandbox.sandboxes[existing.sandbox_id] = existing

state = vercel_module.VercelSandboxSessionState(
session_id="00000000-0000-0000-0000-000000000201",
manifest=Manifest(),
snapshot=NoopSnapshot(id="snapshot"),
sandbox_id=existing.sandbox_id,
)

client = vercel_module.VercelSandboxClient()
resumed = await client.resume(state)

assert existing.wait_for_status_calls == []
assert existing.client.closed is True
assert len(_FakeAsyncSandbox.create_calls) == 1
assert resumed._inner.state.sandbox_id != "sandbox-terminal"
assert resumed._inner.state.workspace_root_ready is False
assert resumed._inner._workspace_state_preserved_on_start() is False # noqa: SLF001


@pytest.mark.asyncio
Expand Down Expand Up @@ -837,7 +894,8 @@ async def test_vercel_resume_recreates_sandbox_after_wait_timeout(
monkeypatch: pytest.MonkeyPatch,
) -> None:
vercel_module = _load_vercel_module(monkeypatch)
existing = _FakeAsyncSandbox(sandbox_id="sandbox-existing")
# Use "pending" so that the code enters the wait path (not already RUNNING).
existing = _FakeAsyncSandbox(sandbox_id="sandbox-existing", status="pending")
existing.wait_for_status_error = TimeoutError()
_FakeAsyncSandbox.sandboxes[existing.sandbox_id] = existing

Expand Down
1 change: 1 addition & 0 deletions tests/test_extension_filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -1130,6 +1130,7 @@ def test_removes_hosted_tool_types_from_input_history() -> None:
"apply_patch_call_output",
"custom_tool_call",
"custom_tool_call_output",
"hosted_tool_call",
]
input_items: list[TResponseInputItem] = [_get_message_input_item("Hello")]
for t in hosted_types:
Expand Down
24 changes: 24 additions & 0 deletions tests/utils/test_pretty_print_and_items.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,30 @@ def test_text_message_outputs_handles_none_text_across_items():
assert ItemHelpers.text_message_outputs(items) == "world"


def _make_output_message(text: str | None) -> ResponseOutputMessage:
return ResponseOutputMessage.model_construct(
id="msg_1",
role="assistant",
status="completed",
content=[ResponseOutputText.model_construct(type="output_text", text=text, annotations=[])],
)


def test_extract_last_content_returns_empty_string_for_none_text():
"""extract_last_content is declared `-> str` and must not return None even if
the underlying ResponseOutputText.text is None (observed via LiteLLM gateways
and ``model_construct`` paths during streaming, per items.py:714-720)."""
msg = _make_output_message(None)
result = ItemHelpers.extract_last_content(msg)
assert isinstance(result, str)
assert result == ""


def test_extract_last_content_returns_text_normally():
msg = _make_output_message("hello")
assert ItemHelpers.extract_last_content(msg) == "hello"


def _make_run_error_details(n_input: int = 0, n_output: int = 0) -> RunErrorDetails:
return RunErrorDetails(
input="hi",
Expand Down
Loading