From 267c5d5cee5912a3d2db2bf106ff580b8b46fb21 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 20 Jun 2026 03:00:49 +0000 Subject: [PATCH 1/3] Initial plan From dae20f1a531a44169a6cde8921a82c5d30889593 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 20 Jun 2026 03:15:15 +0000 Subject: [PATCH 2/3] fix(sdk-driver): treat idle-timeout as success when output collected and no pending tools MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When session.sendAndWait times out waiting for session.idle but the agent produced output and all tracked tool calls have already completed, the SDK simply failed to emit the idle signal — the agent's work is done. Resolve with exitCode 0 instead of 1 so the harness does not classify the run as a failure or waste retry budget. Also fix the catch block to: - Use the accumulated hasOutput value (was hardcoded false) - Preserve the accumulated output in the error return (was discarded) Adds three regression tests covering: - idle-timeout with output + all tool calls complete → exitCode 0 (success) - idle-timeout with pending tool calls → exitCode 1 (genuine failure, retry) - idle-timeout with no output collected → exitCode 1 (no work done) Closes #40418 Co-authored-by: pelikhan <4175913+pelikhan@users.noreply.github.com> --- actions/setup/js/copilot_sdk_driver.test.cjs | 143 +++++++++++++++++++ actions/setup/js/copilot_sdk_session.cjs | 32 +++-- 2 files changed, 166 insertions(+), 9 deletions(-) diff --git a/actions/setup/js/copilot_sdk_driver.test.cjs b/actions/setup/js/copilot_sdk_driver.test.cjs index 540ae7229bc..06e47c350fa 100644 --- a/actions/setup/js/copilot_sdk_driver.test.cjs +++ b/actions/setup/js/copilot_sdk_driver.test.cjs @@ -119,6 +119,149 @@ describe("copilot_sdk_driver.cjs", () => { expect(stop).toHaveBeenCalledTimes(1); }); + it("resolves exitCode 0 on SDK idle-timeout when output collected and all tool calls complete", async () => { + // Regression test: when sendAndWait throws an idle-timeout error but the agent + // produced output and all tool calls completed, the driver must return exitCode 0. + // This covers the case where the SDK drops the session.idle signal on long runs. + const disconnect = vi.fn().mockResolvedValue(undefined); + const stop = vi.fn().mockResolvedValue(undefined); + let onEvent = () => {}; + const session = { + sessionId: "session-idle-timeout-success", + on: handler => { + onEvent = handler; + }, + sendAndWait: vi.fn().mockImplementation(async () => { + // Simulate tool execution events before the idle-timeout + onEvent({ + type: "tool.execution_start", + ephemeral: false, + timestamp: new Date().toISOString(), + data: { toolName: "bash", mcpServerName: "terminal", toolCallId: "call-1" }, + }); + onEvent({ + type: "assistant.message", + ephemeral: false, + timestamp: new Date().toISOString(), + data: { content: "I found the answer" }, + }); + onEvent({ + type: "tool.execution_complete", + ephemeral: false, + timestamp: new Date().toISOString(), + data: { toolCallId: "call-1", success: true }, + }); + throw new Error("Timeout after 870000ms waiting for session.idle"); + }), + disconnect, + }; + class FakeCopilotClient { + start = vi.fn().mockResolvedValue(undefined); + createSession = vi.fn().mockResolvedValue(session); + stop = stop; + } + + const result = await runWithCopilotSDK({ + sdkUri: "http://127.0.0.1:3002", + prompt: "test prompt", + logger: () => {}, + sdkModule: { + CopilotClient: FakeCopilotClient, + RuntimeConnection: { forUri: vi.fn(() => ({})) }, + approveAll: () => "allow", + }, + }); + + expect(result.exitCode).toBe(0); + expect(result.hasOutput).toBe(true); + expect(result.output).toContain("I found the answer"); + expect(disconnect).toHaveBeenCalledTimes(1); + expect(stop).toHaveBeenCalledTimes(1); + }); + + it("returns exitCode 1 on SDK idle-timeout when tool calls are still pending", async () => { + // When the idle-timeout fires with in-flight (unmatched) tool calls, the agent did + // not finish cleanly — the driver must NOT treat it as success. + const disconnect = vi.fn().mockResolvedValue(undefined); + const stop = vi.fn().mockResolvedValue(undefined); + let onEvent = () => {}; + const session = { + sessionId: "session-idle-timeout-pending-tools", + on: handler => { + onEvent = handler; + }, + sendAndWait: vi.fn().mockImplementation(async () => { + onEvent({ + type: "tool.execution_start", + ephemeral: false, + timestamp: new Date().toISOString(), + data: { toolName: "bash", mcpServerName: "terminal", toolCallId: "call-pending" }, + }); + onEvent({ + type: "assistant.message", + ephemeral: false, + timestamp: new Date().toISOString(), + data: { content: "working on it" }, + }); + // tool.execution_complete is never emitted — tool call remains pending + throw new Error("Timeout after 870000ms waiting for session.idle"); + }), + disconnect, + }; + class FakeCopilotClient { + start = vi.fn().mockResolvedValue(undefined); + createSession = vi.fn().mockResolvedValue(session); + stop = stop; + } + + const result = await runWithCopilotSDK({ + sdkUri: "http://127.0.0.1:3002", + prompt: "test prompt", + logger: () => {}, + sdkModule: { + CopilotClient: FakeCopilotClient, + RuntimeConnection: { forUri: vi.fn(() => ({})) }, + approveAll: () => "allow", + }, + }); + + expect(result.exitCode).toBe(1); + expect(result.hasOutput).toBe(true); + expect(result.output).toContain("working on it"); + }); + + it("returns exitCode 1 on SDK idle-timeout with no output collected", async () => { + // When the idle-timeout fires before the agent produces any output, the driver + // must return exitCode 1 — there is nothing useful to surface. + const disconnect = vi.fn().mockResolvedValue(undefined); + const stop = vi.fn().mockResolvedValue(undefined); + const session = { + sessionId: "session-idle-timeout-no-output", + on: () => {}, + sendAndWait: vi.fn().mockRejectedValue(new Error("Timeout after 870000ms waiting for session.idle")), + disconnect, + }; + class FakeCopilotClient { + start = vi.fn().mockResolvedValue(undefined); + createSession = vi.fn().mockResolvedValue(session); + stop = stop; + } + + const result = await runWithCopilotSDK({ + sdkUri: "http://127.0.0.1:3002", + prompt: "test prompt", + logger: () => {}, + sdkModule: { + CopilotClient: FakeCopilotClient, + RuntimeConnection: { forUri: vi.fn(() => ({})) }, + approveAll: () => "allow", + }, + }); + + expect(result.exitCode).toBe(1); + expect(result.hasOutput).toBe(false); + }); + it("passes custom provider and model through to SDK createSession", async () => { const disconnect = vi.fn().mockResolvedValue(undefined); const stop = vi.fn().mockResolvedValue(undefined); diff --git a/actions/setup/js/copilot_sdk_session.cjs b/actions/setup/js/copilot_sdk_session.cjs index b709fb83987..a41b196f123 100644 --- a/actions/setup/js/copilot_sdk_session.cjs +++ b/actions/setup/js/copilot_sdk_session.cjs @@ -141,6 +141,13 @@ async function runWithCopilotSDK({ sdkUri, prompt, logger, attempt = 0, model, c let toolDenialCount = 0; let catastrophicToolDenialsError = null; let catastrophicToolDenialsTriggered = false; + /** + * Map from toolCallId → {toolName, mcpServerName} for enriching tool.execution_complete + * events and for tracking in-flight tool calls when the idle-timeout fires. + * Declared at function scope so the catch block can check pendingToolCalls.size. + * @type {Map} + */ + const pendingToolCalls = new Map(); /** * Best-effort write of a driver-level event to events.jsonl and stderr. @@ -214,13 +221,6 @@ async function runWithCopilotSDK({ sdkUri, prompt, logger, attempt = 0, model, c const stream = eventsStream; log(`serialising SDK events to ${eventsPath}`); - /** - * Map from toolCallId → {toolName, mcpServerName} so that tool.execution_complete - * events (which carry no mcpServerName) can be enriched from the matching start event. - * @type {Map} - */ - const pendingToolCalls = new Map(); - /** * Write one JSONL entry to the events file and stderr. * Uses the event's own ISO-8601 timestamp when available. @@ -316,10 +316,24 @@ async function runWithCopilotSDK({ sdkUri, prompt, logger, attempt = 0, model, c const durationMs = Date.now() - startTime; const failure = catastrophicToolDenialsError ?? (err instanceof Error ? err : new Error(String(err))); log(`error: ${failure.message}`); + + // When sendAndWait times out waiting for session.idle but the agent produced + // output and all tracked tool calls have already completed, the session work is + // done — the SDK simply failed to emit the idle signal. Treat it as a successful + // run so the harness does not classify it as a failure or waste retry attempts. + const isIdleTimeout = !catastrophicToolDenialsError && /waiting for session\.idle/i.test(failure.message); + if (isIdleTimeout && hasOutput && pendingToolCalls.size === 0) { + log(`warning: SDK idle-timeout with collected output and no pending tool calls — treating as completed`); + log(`session completed: hasOutput=${hasOutput} durationMs=${durationMs}`); + return { exitCode: 0, output, hasOutput, durationMs }; + } + + // Preserve any output collected before the error so the harness can use it + // for retry decisions and diagnostics. return { exitCode: 1, - output: failure.message, - hasOutput: false, + output: hasOutput ? output : failure.message, + hasOutput, durationMs, }; } finally { From 112d83eca273e69eae26a36fddeda22573419fe8 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 20 Jun 2026 03:16:29 +0000 Subject: [PATCH 3/3] refactor: extract SDK_IDLE_TIMEOUT_PATTERN constant for clarity Extract the inline idle-timeout regex into a named module-level constant SDK_IDLE_TIMEOUT_PATTERN, matching the more specific format already used in copilot_harness.cjs (SDK_SESSION_IDLE_TIMEOUT_PATTERN). This makes the intent clear and is easy to update if the SDK error message format changes. Also export the constant for testability. Co-authored-by: pelikhan <4175913+pelikhan@users.noreply.github.com> --- actions/setup/js/copilot_sdk_session.cjs | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/actions/setup/js/copilot_sdk_session.cjs b/actions/setup/js/copilot_sdk_session.cjs index a41b196f123..a2991741161 100644 --- a/actions/setup/js/copilot_sdk_session.cjs +++ b/actions/setup/js/copilot_sdk_session.cjs @@ -36,6 +36,12 @@ const { buildCopilotSDKPermissionHandler, getEnvPositiveIntOrDefault, parseMaxTo // Override via the COPILOT_SDK_SEND_TIMEOUT_MS environment variable. const SDK_SEND_TIMEOUT_MS_DEFAULT = 10 * 60 * 1000; +// Pattern matching the SDK idle-timeout error emitted when sendAndWait reaches its +// deadline waiting for the session.idle event. This matches the message format +// "Timeout after ms waiting for session.idle" produced by the Copilot SDK. +// Keep in sync with SDK_SESSION_IDLE_TIMEOUT_PATTERN in copilot_harness.cjs. +const SDK_IDLE_TIMEOUT_PATTERN = /Timeout after \d+ms waiting for session\.idle/; + /** * Extract the prompt text from a resolved args array. * Looks for the first occurrence of "-p " or "--prompt ". @@ -321,7 +327,7 @@ async function runWithCopilotSDK({ sdkUri, prompt, logger, attempt = 0, model, c // output and all tracked tool calls have already completed, the session work is // done — the SDK simply failed to emit the idle signal. Treat it as a successful // run so the harness does not classify it as a failure or waste retry attempts. - const isIdleTimeout = !catastrophicToolDenialsError && /waiting for session\.idle/i.test(failure.message); + const isIdleTimeout = !catastrophicToolDenialsError && SDK_IDLE_TIMEOUT_PATTERN.test(failure.message); if (isIdleTimeout && hasOutput && pendingToolCalls.size === 0) { log(`warning: SDK idle-timeout with collected output and no pending tool calls — treating as completed`); log(`session completed: hasOutput=${hasOutput} durationMs=${durationMs}`); @@ -359,4 +365,4 @@ async function runWithCopilotSDK({ sdkUri, prompt, logger, attempt = 0, model, c } } -module.exports = { SDK_SEND_TIMEOUT_MS_DEFAULT, extractPromptFromArgs, runWithCopilotSDK }; +module.exports = { SDK_SEND_TIMEOUT_MS_DEFAULT, SDK_IDLE_TIMEOUT_PATTERN, extractPromptFromArgs, runWithCopilotSDK };