diff --git a/actions/setup/js/copilot_sdk_driver.test.cjs b/actions/setup/js/copilot_sdk_driver.test.cjs index 540ae7229bc..06e47c350fa 100644 --- a/actions/setup/js/copilot_sdk_driver.test.cjs +++ b/actions/setup/js/copilot_sdk_driver.test.cjs @@ -119,6 +119,149 @@ describe("copilot_sdk_driver.cjs", () => { expect(stop).toHaveBeenCalledTimes(1); }); + it("resolves exitCode 0 on SDK idle-timeout when output collected and all tool calls complete", async () => { + // Regression test: when sendAndWait throws an idle-timeout error but the agent + // produced output and all tool calls completed, the driver must return exitCode 0. + // This covers the case where the SDK drops the session.idle signal on long runs. + const disconnect = vi.fn().mockResolvedValue(undefined); + const stop = vi.fn().mockResolvedValue(undefined); + let onEvent = () => {}; + const session = { + sessionId: "session-idle-timeout-success", + on: handler => { + onEvent = handler; + }, + sendAndWait: vi.fn().mockImplementation(async () => { + // Simulate tool execution events before the idle-timeout + onEvent({ + type: "tool.execution_start", + ephemeral: false, + timestamp: new Date().toISOString(), + data: { toolName: "bash", mcpServerName: "terminal", toolCallId: "call-1" }, + }); + onEvent({ + type: "assistant.message", + ephemeral: false, + timestamp: new Date().toISOString(), + data: { content: "I found the answer" }, + }); + onEvent({ + type: "tool.execution_complete", + ephemeral: false, + timestamp: new Date().toISOString(), + data: { toolCallId: "call-1", success: true }, + }); + throw new Error("Timeout after 870000ms waiting for session.idle"); + }), + disconnect, + }; + class FakeCopilotClient { + start = vi.fn().mockResolvedValue(undefined); + createSession = vi.fn().mockResolvedValue(session); + stop = stop; + } + + const result = await runWithCopilotSDK({ + sdkUri: "http://127.0.0.1:3002", + prompt: "test prompt", + logger: () => {}, + sdkModule: { + CopilotClient: FakeCopilotClient, + RuntimeConnection: { forUri: vi.fn(() => ({})) }, + approveAll: () => "allow", + }, + }); + + expect(result.exitCode).toBe(0); + expect(result.hasOutput).toBe(true); + expect(result.output).toContain("I found the answer"); + expect(disconnect).toHaveBeenCalledTimes(1); + expect(stop).toHaveBeenCalledTimes(1); + }); + + it("returns exitCode 1 on SDK idle-timeout when tool calls are still pending", async () => { + // When the idle-timeout fires with in-flight (unmatched) tool calls, the agent did + // not finish cleanly — the driver must NOT treat it as success. + const disconnect = vi.fn().mockResolvedValue(undefined); + const stop = vi.fn().mockResolvedValue(undefined); + let onEvent = () => {}; + const session = { + sessionId: "session-idle-timeout-pending-tools", + on: handler => { + onEvent = handler; + }, + sendAndWait: vi.fn().mockImplementation(async () => { + onEvent({ + type: "tool.execution_start", + ephemeral: false, + timestamp: new Date().toISOString(), + data: { toolName: "bash", mcpServerName: "terminal", toolCallId: "call-pending" }, + }); + onEvent({ + type: "assistant.message", + ephemeral: false, + timestamp: new Date().toISOString(), + data: { content: "working on it" }, + }); + // tool.execution_complete is never emitted — tool call remains pending + throw new Error("Timeout after 870000ms waiting for session.idle"); + }), + disconnect, + }; + class FakeCopilotClient { + start = vi.fn().mockResolvedValue(undefined); + createSession = vi.fn().mockResolvedValue(session); + stop = stop; + } + + const result = await runWithCopilotSDK({ + sdkUri: "http://127.0.0.1:3002", + prompt: "test prompt", + logger: () => {}, + sdkModule: { + CopilotClient: FakeCopilotClient, + RuntimeConnection: { forUri: vi.fn(() => ({})) }, + approveAll: () => "allow", + }, + }); + + expect(result.exitCode).toBe(1); + expect(result.hasOutput).toBe(true); + expect(result.output).toContain("working on it"); + }); + + it("returns exitCode 1 on SDK idle-timeout with no output collected", async () => { + // When the idle-timeout fires before the agent produces any output, the driver + // must return exitCode 1 — there is nothing useful to surface. + const disconnect = vi.fn().mockResolvedValue(undefined); + const stop = vi.fn().mockResolvedValue(undefined); + const session = { + sessionId: "session-idle-timeout-no-output", + on: () => {}, + sendAndWait: vi.fn().mockRejectedValue(new Error("Timeout after 870000ms waiting for session.idle")), + disconnect, + }; + class FakeCopilotClient { + start = vi.fn().mockResolvedValue(undefined); + createSession = vi.fn().mockResolvedValue(session); + stop = stop; + } + + const result = await runWithCopilotSDK({ + sdkUri: "http://127.0.0.1:3002", + prompt: "test prompt", + logger: () => {}, + sdkModule: { + CopilotClient: FakeCopilotClient, + RuntimeConnection: { forUri: vi.fn(() => ({})) }, + approveAll: () => "allow", + }, + }); + + expect(result.exitCode).toBe(1); + expect(result.hasOutput).toBe(false); + }); + it("passes custom provider and model through to SDK createSession", async () => { const disconnect = vi.fn().mockResolvedValue(undefined); const stop = vi.fn().mockResolvedValue(undefined); diff --git a/actions/setup/js/copilot_sdk_session.cjs b/actions/setup/js/copilot_sdk_session.cjs index b709fb83987..a2991741161 100644 --- a/actions/setup/js/copilot_sdk_session.cjs +++ b/actions/setup/js/copilot_sdk_session.cjs @@ -36,6 +36,12 @@ const { buildCopilotSDKPermissionHandler, getEnvPositiveIntOrDefault, parseMaxTo // Override via the COPILOT_SDK_SEND_TIMEOUT_MS environment variable. const SDK_SEND_TIMEOUT_MS_DEFAULT = 10 * 60 * 1000; +// Pattern matching the SDK idle-timeout error emitted when sendAndWait reaches its +// deadline waiting for the session.idle event. This matches the message format +// "Timeout after ms waiting for session.idle" produced by the Copilot SDK. +// Keep in sync with SDK_SESSION_IDLE_TIMEOUT_PATTERN in copilot_harness.cjs. +const SDK_IDLE_TIMEOUT_PATTERN = /Timeout after \d+ms waiting for session\.idle/; + /** * Extract the prompt text from a resolved args array. * Looks for the first occurrence of "-p " or "--prompt ". @@ -141,6 +147,13 @@ async function runWithCopilotSDK({ sdkUri, prompt, logger, attempt = 0, model, c let toolDenialCount = 0; let catastrophicToolDenialsError = null; let catastrophicToolDenialsTriggered = false; + /** + * Map from toolCallId → {toolName, mcpServerName} for enriching tool.execution_complete + * events and for tracking in-flight tool calls when the idle-timeout fires. + * Declared at function scope so the catch block can check pendingToolCalls.size. + * @type {Map} + */ + const pendingToolCalls = new Map(); /** * Best-effort write of a driver-level event to events.jsonl and stderr. @@ -214,13 +227,6 @@ async function runWithCopilotSDK({ sdkUri, prompt, logger, attempt = 0, model, c const stream = eventsStream; log(`serialising SDK events to ${eventsPath}`); - /** - * Map from toolCallId → {toolName, mcpServerName} so that tool.execution_complete - * events (which carry no mcpServerName) can be enriched from the matching start event. - * @type {Map} - */ - const pendingToolCalls = new Map(); - /** * Write one JSONL entry to the events file and stderr. * Uses the event's own ISO-8601 timestamp when available. @@ -316,10 +322,24 @@ async function runWithCopilotSDK({ sdkUri, prompt, logger, attempt = 0, model, c const durationMs = Date.now() - startTime; const failure = catastrophicToolDenialsError ?? (err instanceof Error ? err : new Error(String(err))); log(`error: ${failure.message}`); + + // When sendAndWait times out waiting for session.idle but the agent produced + // output and all tracked tool calls have already completed, the session work is + // done — the SDK simply failed to emit the idle signal. Treat it as a successful + // run so the harness does not classify it as a failure or waste retry attempts. + const isIdleTimeout = !catastrophicToolDenialsError && SDK_IDLE_TIMEOUT_PATTERN.test(failure.message); + if (isIdleTimeout && hasOutput && pendingToolCalls.size === 0) { + log(`warning: SDK idle-timeout with collected output and no pending tool calls — treating as completed`); + log(`session completed: hasOutput=${hasOutput} durationMs=${durationMs}`); + return { exitCode: 0, output, hasOutput, durationMs }; + } + + // Preserve any output collected before the error so the harness can use it + // for retry decisions and diagnostics. return { exitCode: 1, - output: failure.message, - hasOutput: false, + output: hasOutput ? output : failure.message, + hasOutput, durationMs, }; } finally { @@ -345,4 +365,4 @@ async function runWithCopilotSDK({ sdkUri, prompt, logger, attempt = 0, model, c } } -module.exports = { SDK_SEND_TIMEOUT_MS_DEFAULT, extractPromptFromArgs, runWithCopilotSDK }; +module.exports = { SDK_SEND_TIMEOUT_MS_DEFAULT, SDK_IDLE_TIMEOUT_PATTERN, extractPromptFromArgs, runWithCopilotSDK };