github · pelikhan · Jun 20, 2026 · Jun 20, 2026 · Jun 20, 2026 · Jun 20, 2026
diff --git a/actions/setup/js/copilot_sdk_driver.test.cjs b/actions/setup/js/copilot_sdk_driver.test.cjs
@@ -119,6 +119,149 @@ describe("copilot_sdk_driver.cjs", () => {
       expect(stop).toHaveBeenCalledTimes(1);
     });
 
+    it("resolves exitCode 0 on SDK idle-timeout when output collected and all tool calls complete", async () => {
+      // Regression test: when sendAndWait throws an idle-timeout error but the agent
+      // produced output and all tool calls completed, the driver must return exitCode 0.
+      // This covers the case where the SDK drops the session.idle signal on long runs.
+      const disconnect = vi.fn().mockResolvedValue(undefined);
+      const stop = vi.fn().mockResolvedValue(undefined);
+      let onEvent = () => {};
+      const session = {
+        sessionId: "session-idle-timeout-success",
+        on: handler => {
+          onEvent = handler;
+        },
+        sendAndWait: vi.fn().mockImplementation(async () => {
+          // Simulate tool execution events before the idle-timeout
+          onEvent({
+            type: "tool.execution_start",
+            ephemeral: false,
+            timestamp: new Date().toISOString(),
+            data: { toolName: "bash", mcpServerName: "terminal", toolCallId: "call-1" },
+          });
+          onEvent({
+            type: "assistant.message",
+            ephemeral: false,
+            timestamp: new Date().toISOString(),
+            data: { content: "I found the answer" },
+          });
+          onEvent({
+            type: "tool.execution_complete",
+            ephemeral: false,
+            timestamp: new Date().toISOString(),
+            data: { toolCallId: "call-1", success: true },
+          });
+          throw new Error("Timeout after 870000ms waiting for session.idle");
+        }),
+        disconnect,
+      };
+      class FakeCopilotClient {
+        start = vi.fn().mockResolvedValue(undefined);
+        createSession = vi.fn().mockResolvedValue(session);
+        stop = stop;
+      }
+
+      const result = await runWithCopilotSDK({
+        sdkUri: "http://127.0.0.1:3002",
+        prompt: "test prompt",
+        logger: () => {},
+        sdkModule: {
+          CopilotClient: FakeCopilotClient,
+          RuntimeConnection: { forUri: vi.fn(() => ({})) },
+          approveAll: () => "allow",
+        },
+      });
+
+      expect(result.exitCode).toBe(0);
+      expect(result.hasOutput).toBe(true);
+      expect(result.output).toContain("I found the answer");
+      expect(disconnect).toHaveBeenCalledTimes(1);
+      expect(stop).toHaveBeenCalledTimes(1);
+    });
+
+    it("returns exitCode 1 on SDK idle-timeout when tool calls are still pending", async () => {
+      // When the idle-timeout fires with in-flight (unmatched) tool calls, the agent did
+      // not finish cleanly — the driver must NOT treat it as success.
+      const disconnect = vi.fn().mockResolvedValue(undefined);
+      const stop = vi.fn().mockResolvedValue(undefined);
+      let onEvent = () => {};
+      const session = {
+        sessionId: "session-idle-timeout-pending-tools",
+        on: handler => {
+          onEvent = handler;
+        },
+        sendAndWait: vi.fn().mockImplementation(async () => {
+          onEvent({
+            type: "tool.execution_start",
+            ephemeral: false,
+            timestamp: new Date().toISOString(),
+            data: { toolName: "bash", mcpServerName: "terminal", toolCallId: "call-pending" },
+          });
+          onEvent({
+            type: "assistant.message",
+            ephemeral: false,
+            timestamp: new Date().toISOString(),
+            data: { content: "working on it" },
+          });
+          // tool.execution_complete is never emitted — tool call remains pending
+          throw new Error("Timeout after 870000ms waiting for session.idle");
+        }),
+        disconnect,
+      };
+      class FakeCopilotClient {
+        start = vi.fn().mockResolvedValue(undefined);
+        createSession = vi.fn().mockResolvedValue(session);
+        stop = stop;
+      }
+
+      const result = await runWithCopilotSDK({
+        sdkUri: "http://127.0.0.1:3002",
+        prompt: "test prompt",
+        logger: () => {},
+        sdkModule: {
+          CopilotClient: FakeCopilotClient,
+          RuntimeConnection: { forUri: vi.fn(() => ({})) },
+          approveAll: () => "allow",
+        },
+      });
+
+      expect(result.exitCode).toBe(1);
+      expect(result.hasOutput).toBe(true);
+      expect(result.output).toContain("working on it");
+    });
+
+    it("returns exitCode 1 on SDK idle-timeout with no output collected", async () => {
+      // When the idle-timeout fires before the agent produces any output, the driver
+      // must return exitCode 1 — there is nothing useful to surface.
+      const disconnect = vi.fn().mockResolvedValue(undefined);
+      const stop = vi.fn().mockResolvedValue(undefined);
+      const session = {
+        sessionId: "session-idle-timeout-no-output",
+        on: () => {},
+        sendAndWait: vi.fn().mockRejectedValue(new Error("Timeout after 870000ms waiting for session.idle")),
+        disconnect,
+      };
+      class FakeCopilotClient {
+        start = vi.fn().mockResolvedValue(undefined);
+        createSession = vi.fn().mockResolvedValue(session);
+        stop = stop;
+      }
+
+      const result = await runWithCopilotSDK({
+        sdkUri: "http://127.0.0.1:3002",
+        prompt: "test prompt",
+        logger: () => {},
+        sdkModule: {
+          CopilotClient: FakeCopilotClient,
+          RuntimeConnection: { forUri: vi.fn(() => ({})) },
+          approveAll: () => "allow",
+        },
+      });
+
+      expect(result.exitCode).toBe(1);
+      expect(result.hasOutput).toBe(false);
+    });
+
     it("passes custom provider and model through to SDK createSession", async () => {
       const disconnect = vi.fn().mockResolvedValue(undefined);
       const stop = vi.fn().mockResolvedValue(undefined);

diff --git a/actions/setup/js/copilot_sdk_session.cjs b/actions/setup/js/copilot_sdk_session.cjs
@@ -36,6 +36,12 @@ const { buildCopilotSDKPermissionHandler, getEnvPositiveIntOrDefault, parseMaxTo
 // Override via the COPILOT_SDK_SEND_TIMEOUT_MS environment variable.
 const SDK_SEND_TIMEOUT_MS_DEFAULT = 10 * 60 * 1000;
 
+// Pattern matching the SDK idle-timeout error emitted when sendAndWait reaches its
+// deadline waiting for the session.idle event.  This matches the message format
+// "Timeout after <N>ms waiting for session.idle" produced by the Copilot SDK.
+// Keep in sync with SDK_SESSION_IDLE_TIMEOUT_PATTERN in copilot_harness.cjs.
+const SDK_IDLE_TIMEOUT_PATTERN = /Timeout after \d+ms waiting for session\.idle/;
+
 /**
  * Extract the prompt text from a resolved args array.
  * Looks for the first occurrence of "-p <value>" or "--prompt <value>".
@@ -141,6 +147,13 @@ async function runWithCopilotSDK({ sdkUri, prompt, logger, attempt = 0, model, c
   let toolDenialCount = 0;
   let catastrophicToolDenialsError = null;
   let catastrophicToolDenialsTriggered = false;
+  /**
+   * Map from toolCallId → {toolName, mcpServerName} for enriching tool.execution_complete
+   * events and for tracking in-flight tool calls when the idle-timeout fires.
+   * Declared at function scope so the catch block can check pendingToolCalls.size.
+   * @type {Map<string, {toolName: string, mcpServerName: string}>}
+   */
+  const pendingToolCalls = new Map();
 
   /**
    * Best-effort write of a driver-level event to events.jsonl and stderr.
@@ -214,13 +227,6 @@ async function runWithCopilotSDK({ sdkUri, prompt, logger, attempt = 0, model, c
     const stream = eventsStream;
     log(`serialising SDK events to ${eventsPath}`);
 
-    /**
-     * Map from toolCallId → {toolName, mcpServerName} so that tool.execution_complete
-     * events (which carry no mcpServerName) can be enriched from the matching start event.
-     * @type {Map<string, {toolName: string, mcpServerName: string}>}
-     */
-    const pendingToolCalls = new Map();
-
     /**
      * Write one JSONL entry to the events file and stderr.
      * Uses the event's own ISO-8601 timestamp when available.
@@ -316,10 +322,24 @@ async function runWithCopilotSDK({ sdkUri, prompt, logger, attempt = 0, model, c
     const durationMs = Date.now() - startTime;
     const failure = catastrophicToolDenialsError ?? (err instanceof Error ? err : new Error(String(err)));
     log(`error: ${failure.message}`);
+
+    // When sendAndWait times out waiting for session.idle but the agent produced
+    // output and all tracked tool calls have already completed, the session work is
+    // done — the SDK simply failed to emit the idle signal.  Treat it as a successful
+    // run so the harness does not classify it as a failure or waste retry attempts.
+    const isIdleTimeout = !catastrophicToolDenialsError && SDK_IDLE_TIMEOUT_PATTERN.test(failure.message);
+    if (isIdleTimeout && hasOutput && pendingToolCalls.size === 0) {
+      log(`warning: SDK idle-timeout with collected output and no pending tool calls — treating as completed`);
+      log(`session completed: hasOutput=${hasOutput} durationMs=${durationMs}`);
+      return { exitCode: 0, output, hasOutput, durationMs };
+    }
+
+    // Preserve any output collected before the error so the harness can use it
+    // for retry decisions and diagnostics.
     return {
       exitCode: 1,
-      output: failure.message,
-      hasOutput: false,
+      output: hasOutput ? output : failure.message,
+      hasOutput,
       durationMs,
     };
   } finally {
@@ -345,4 +365,4 @@ async function runWithCopilotSDK({ sdkUri, prompt, logger, attempt = 0, model, c
   }
 }
 
-module.exports = { SDK_SEND_TIMEOUT_MS_DEFAULT, extractPromptFromArgs, runWithCopilotSDK };
+module.exports = { SDK_SEND_TIMEOUT_MS_DEFAULT, SDK_IDLE_TIMEOUT_PATTERN, extractPromptFromArgs, runWithCopilotSDK };