From cef4429c480c4627f8d452b247fba17d629463f1 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Thu, 18 Jun 2026 13:12:25 +0000
Subject: [PATCH 1/4] Initial plan


From 0b9cb686a815d8adb6e1dc8537c9623a6693b530 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Thu, 18 Jun 2026 13:28:48 +0000
Subject: [PATCH 2/4] fix: preserve isError from handler result; add
 defense-in-depth content check in apply_samples

Co-authored-by: dsyme <7204669+dsyme@users.noreply.github.com>
---
 actions/setup/js/apply_samples.cjs            | 30 ++++++++++++-
 actions/setup/js/apply_samples.test.cjs       | 40 +++++++++++++++++
 actions/setup/js/mcp_server_core.cjs          |  8 +++-
 actions/setup/js/mcp_server_core.test.cjs     | 44 +++++++++++++++++++
 .../setup/js/safe_outputs_mcp_server_http.cjs | 10 +++--
 5 files changed, 124 insertions(+), 8 deletions(-)

diff --git a/actions/setup/js/apply_samples.cjs b/actions/setup/js/apply_samples.cjs
index c632661b98f..9089d09e4ba 100644
--- a/actions/setup/js/apply_samples.cjs
+++ b/actions/setup/js/apply_samples.cjs
@@ -392,6 +392,30 @@ function resolveMcpServerPath() {
   throw new Error(`${ERR_CONFIG}: apply_samples: could not locate safe_outputs_mcp_server.cjs. Looked in: ${candidates.join(", ")}`);
 }
 
+/**
+ * Return true when an MCP tool-call result carries an application-level error
+ * payload, regardless of whether the transport-level `isError` flag was set.
+ *
+ * Safe-output handlers encode errors as `{"result":"error", ...}` inside the
+ * first content text item.  This helper provides a defense-in-depth check so
+ * that the driver fails even when an older server version did not set `isError`.
+ *
+ * @param {any} result - The `result` object from a JSON-RPC tools/call response.
+ * @returns {boolean}
+ */
+function sampleResultIsError(result) {
+  if (!result) return false;
+  if (result.isError) return true;
+  const text = result.content && result.content[0] && result.content[0].text;
+  if (!text) return false;
+  try {
+    const parsed = JSON.parse(text);
+    return !!(parsed && parsed.result === "error");
+  } catch {
+    return false;
+  }
+}
+
 /**
  * Append a synthetic terminal_reason: completed marker to the engine stdio log
  * so downstream parsers / handle_agent_failure recognize the replay as a
@@ -489,8 +513,8 @@ async function main() {
         continue;
       }
       const result = callRsp.result;
-      if (result && result.isError) {
-        const text = result.content && result.content[0] && result.content[0].text;
+      if (sampleResultIsError(result)) {
+        const text = result && result.content && result.content[0] && result.content[0].text;
         failures.push(`sample[${i}] (tool=${sample.tool}): ${text || JSON.stringify(result)}`);
       } else {
         core.info(`apply_samples: sample[${i}] (tool=${sample.tool}) ok`);
@@ -543,4 +567,6 @@ module.exports = {
   // Exported for unit testing of the 3-tier PR head ref resolution logic.
   derivePrHeadRef,
   fetchPullRequestHeadRef,
+  // Exported for unit testing of the defense-in-depth error detection.
+  sampleResultIsError,
 };
diff --git a/actions/setup/js/apply_samples.test.cjs b/actions/setup/js/apply_samples.test.cjs
index 5a0a6ac45a3..f0a16ab0442 100644
--- a/actions/setup/js/apply_samples.test.cjs
+++ b/actions/setup/js/apply_samples.test.cjs
@@ -489,3 +489,43 @@ describe("apply_samples.cjs preStagePatch (create_pull_request / push_to_pull_re
     expect(log).toEqual(["seed"]);
   });
 });
+
+describe("apply_samples.cjs sampleResultIsError", () => {
+  const { sampleResultIsError } = require("./apply_samples.cjs");
+
+  it("returns false for a successful result (no isError, no error content)", () => {
+    const result = { content: [{ type: "text", text: JSON.stringify({ result: "success", id: 1 }) }], isError: false };
+    expect(sampleResultIsError(result)).toBe(false);
+  });
+
+  it("returns true when isError is true on the result", () => {
+    const result = {
+      content: [{ type: "text", text: JSON.stringify({ result: "error", error: "something failed" }) }],
+      isError: true,
+    };
+    expect(sampleResultIsError(result)).toBe(true);
+  });
+
+  it("returns true (defense-in-depth) when content text has result:error but isError is false", () => {
+    // This simulates the bug: an older server that forgot to set isError:true.
+    const result = {
+      content: [{ type: "text", text: JSON.stringify({ result: "error", error: "patch failed", details: "no merge base" }) }],
+      isError: false,
+    };
+    expect(sampleResultIsError(result)).toBe(true);
+  });
+
+  it("returns false for non-JSON content text", () => {
+    const result = { content: [{ type: "text", text: "plain text, not JSON" }], isError: false };
+    expect(sampleResultIsError(result)).toBe(false);
+  });
+
+  it("returns false for null result", () => {
+    expect(sampleResultIsError(null)).toBe(false);
+  });
+
+  it("returns false for empty content array", () => {
+    const result = { content: [], isError: false };
+    expect(sampleResultIsError(result)).toBe(false);
+  });
+});
diff --git a/actions/setup/js/mcp_server_core.cjs b/actions/setup/js/mcp_server_core.cjs
index ab85a617c8f..ca51dc2f6a5 100644
--- a/actions/setup/js/mcp_server_core.cjs
+++ b/actions/setup/js/mcp_server_core.cjs
@@ -796,7 +796,9 @@ async function handleRequest(server, request, defaultHandler) {
       // Call handler and await the result (supports both sync and async handlers)
       const handlerResult = await Promise.resolve(handler(args));
       const content = handlerResult && handlerResult.content ? handlerResult.content : [];
-      result = { content, isError: false };
+      // Preserve isError from the handler result (e.g. safe-output handlers return isError:true on error)
+      const isError = !!(handlerResult && handlerResult.isError);
+      result = { content, isError };
     } else if (/^notifications\//.test(method)) {
       // Notifications don't need a response
       return null;
@@ -953,7 +955,9 @@ async function handleMessage(server, req, defaultHandler) {
       const result = await Promise.resolve(handler(args));
       server.debug(`Handler returned for tool: ${name}`);
       const content = result && result.content ? result.content : [];
-      server.replyResult(id, { content, isError: false });
+      // Preserve isError from the handler result (e.g. safe-output handlers return isError:true on error)
+      const isError = !!(result && result.isError);
+      server.replyResult(id, { content, isError });
     } else if (/^notifications\//.test(method)) {
       server.debug(`ignore ${method}`);
     } else {
diff --git a/actions/setup/js/mcp_server_core.test.cjs b/actions/setup/js/mcp_server_core.test.cjs
index ee25725de8b..e6da6aa43f4 100644
--- a/actions/setup/js/mcp_server_core.test.cjs
+++ b/actions/setup/js/mcp_server_core.test.cjs
@@ -187,6 +187,50 @@ describe("mcp_server_core.cjs", () => {
       expect(results[0].result.isError).toBe(false);
     });
 
+    it("should propagate isError:true from handler result to MCP response", async () => {
+      const { createServer, registerTool, handleMessage } = await import("./mcp_server_core.cjs");
+      results = [];
+
+      // Suppress stderr output during tests
+      vi.spyOn(process.stderr, "write").mockImplementation(() => true);
+
+      server = createServer({ name: "test-server", version: "1.0.0" });
+      server.writeMessage = msg => results.push(msg);
+      server.replyResult = (id, result) => {
+        if (id === undefined || id === null) return;
+        results.push({ jsonrpc: "2.0", id, result });
+      };
+      server.replyError = (id, code, message) => {
+        if (id === undefined || id === null) return;
+        results.push({ jsonrpc: "2.0", id, error: { code, message } });
+      };
+
+      registerTool(server, {
+        name: "failing_tool",
+        description: "A tool that returns an application-level error",
+        inputSchema: {
+          type: "object",
+          properties: { input: { type: "string", description: "Input" } },
+          required: ["input"],
+        },
+        handler: () => ({
+          content: [{ type: "text", text: JSON.stringify({ result: "error", error: "something went wrong" }) }],
+          isError: true,
+        }),
+      });
+
+      await handleMessage(server, {
+        jsonrpc: "2.0",
+        id: 42,
+        method: "tools/call",
+        params: { name: "failing_tool", arguments: { input: "test" } },
+      });
+
+      expect(results).toHaveLength(1);
+      expect(results[0].result.isError).toBe(true);
+      expect(results[0].result.content[0].text).toContain('"result":"error"');
+    });
+
     it("should normalize tool arguments before required-field validation", async () => {
       const { createServer, registerTool, handleMessage } = await import("./mcp_server_core.cjs");
       results = [];
diff --git a/actions/setup/js/safe_outputs_mcp_server_http.cjs b/actions/setup/js/safe_outputs_mcp_server_http.cjs
index 094389d9d3c..0cf2eab79b1 100644
--- a/actions/setup/js/safe_outputs_mcp_server_http.cjs
+++ b/actions/setup/js/safe_outputs_mcp_server_http.cjs
@@ -189,9 +189,10 @@ function createMCPServer(options = {}) {
       const result = await Promise.resolve(toolHandler(args));
       logger.debug(`Handler returned for tool: ${tool.name}`);
 
-      // Normalize result to MCP format
+      // Normalize result to MCP format; preserve isError from the handler result
       const content = result && result.content ? result.content : [];
-      return { content, isError: false };
+      const isError = !!(result && result.isError);
+      return { content, isError };
     });
 
     registeredCount++;
@@ -226,9 +227,10 @@ function createMCPServer(options = {}) {
         const result = await Promise.resolve(defaultHandler({ toolName, ...args }));
         logger.debug(`Handler returned for dynamic tool: ${toolName}`);
 
-        // Normalize result to MCP format
+        // Normalize result to MCP format; preserve isError from the handler result
         const content = result && result.content ? result.content : [];
-        return { content, isError: false };
+        const isError = !!(result && result.isError);
+        return { content, isError };
       });
 
       registeredCount++;

From f419df0b5cadede3133e4eb57170193f466fb695 Mon Sep 17 00:00:00 2001
From: Don Syme <dsyme@users.noreply.github.com>
Date: Thu, 18 Jun 2026 14:56:24 +0100
Subject: [PATCH 3/4] Potential fix for pull request finding

Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com>
---
 actions/setup/js/mcp_server_core.test.cjs | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/actions/setup/js/mcp_server_core.test.cjs b/actions/setup/js/mcp_server_core.test.cjs
index e6da6aa43f4..e5e58d83b0f 100644
--- a/actions/setup/js/mcp_server_core.test.cjs
+++ b/actions/setup/js/mcp_server_core.test.cjs
@@ -191,9 +191,7 @@ describe("mcp_server_core.cjs", () => {
       const { createServer, registerTool, handleMessage } = await import("./mcp_server_core.cjs");
       results = [];
 
-      // Suppress stderr output during tests
-      vi.spyOn(process.stderr, "write").mockImplementation(() => true);
-
+      // Stderr is already mocked in this describe's beforeEach.
       server = createServer({ name: "test-server", version: "1.0.0" });
       server.writeMessage = msg => results.push(msg);
       server.replyResult = (id, result) => {

From 68370f9b793e41bc6dde3da96fc350b7472fd1f4 Mon Sep 17 00:00:00 2001
From: Don Syme <dsyme@github.com>
Date: Thu, 18 Jun 2026 15:15:58 +0100
Subject: [PATCH 4/4] test: address review feedback on isError propagation
 coverage

- Simplify handleMessage isError test to reuse beforeEach server/stderr spy
- Add handleRequest isError propagation regression tests
- Isolate flag path in sampleResultIsError flag-precedence test (success content)
- Extract normalizeMcpToolResult helper in HTTP server + unit-test both reg paths
---
 actions/setup/js/apply_samples.test.cjs       |  6 +-
 actions/setup/js/mcp_server_core.test.cjs     | 82 +++++++++++++++----
 .../setup/js/safe_outputs_mcp_server_http.cjs | 23 ++++--
 .../js/safe_outputs_mcp_server_http.test.cjs  | 45 ++++++++++
 4 files changed, 134 insertions(+), 22 deletions(-)
 create mode 100644 actions/setup/js/safe_outputs_mcp_server_http.test.cjs

diff --git a/actions/setup/js/apply_samples.test.cjs b/actions/setup/js/apply_samples.test.cjs
index f0a16ab0442..b5a1e3ef738 100644
--- a/actions/setup/js/apply_samples.test.cjs
+++ b/actions/setup/js/apply_samples.test.cjs
@@ -498,9 +498,11 @@ describe("apply_samples.cjs sampleResultIsError", () => {
     expect(sampleResultIsError(result)).toBe(false);
   });
 
-  it("returns true when isError is true on the result", () => {
+  it("returns true when isError is true on the result (flag takes precedence)", () => {
     const result = {
-      content: [{ type: "text", text: JSON.stringify({ result: "error", error: "something failed" }) }],
+      // Content is a success payload — proves the early-return on the isError flag,
+      // not the content-text fallback path.
+      content: [{ type: "text", text: JSON.stringify({ result: "success", id: 1 }) }],
       isError: true,
     };
     expect(sampleResultIsError(result)).toBe(true);
diff --git a/actions/setup/js/mcp_server_core.test.cjs b/actions/setup/js/mcp_server_core.test.cjs
index e5e58d83b0f..e8e41c2fbaf 100644
--- a/actions/setup/js/mcp_server_core.test.cjs
+++ b/actions/setup/js/mcp_server_core.test.cjs
@@ -188,20 +188,8 @@ describe("mcp_server_core.cjs", () => {
     });
 
     it("should propagate isError:true from handler result to MCP response", async () => {
-      const { createServer, registerTool, handleMessage } = await import("./mcp_server_core.cjs");
-      results = [];
-
-      // Stderr is already mocked in this describe's beforeEach.
-      server = createServer({ name: "test-server", version: "1.0.0" });
-      server.writeMessage = msg => results.push(msg);
-      server.replyResult = (id, result) => {
-        if (id === undefined || id === null) return;
-        results.push({ jsonrpc: "2.0", id, result });
-      };
-      server.replyError = (id, code, message) => {
-        if (id === undefined || id === null) return;
-        results.push({ jsonrpc: "2.0", id, error: { code, message } });
-      };
+      // Reuse the server, capture hooks, and stderr spy already set up in beforeEach.
+      const { registerTool, handleMessage } = await import("./mcp_server_core.cjs");
 
       registerTool(server, {
         name: "failing_tool",
@@ -482,6 +470,72 @@ describe("mcp_server_core.cjs", () => {
     });
   });
 
+  describe("handleRequest", () => {
+    beforeEach(() => {
+      // Suppress stderr output during tests
+      vi.spyOn(process.stderr, "write").mockImplementation(() => true);
+    });
+
+    it("should propagate isError:true from handler result into the JSON-RPC response", async () => {
+      const { createServer, registerTool, handleRequest } = await import("./mcp_server_core.cjs");
+      const server = createServer({ name: "test-server", version: "1.0.0" });
+
+      registerTool(server, {
+        name: "failing_tool",
+        description: "A tool that returns an application-level error",
+        inputSchema: {
+          type: "object",
+          properties: { input: { type: "string", description: "Input" } },
+          required: ["input"],
+        },
+        handler: () => ({
+          content: [{ type: "text", text: JSON.stringify({ result: "error", error: "something went wrong" }) }],
+          isError: true,
+        }),
+      });
+
+      const response = await handleRequest(server, {
+        jsonrpc: "2.0",
+        id: 7,
+        method: "tools/call",
+        params: { name: "failing_tool", arguments: { input: "test" } },
+      });
+
+      expect(response.id).toBe(7);
+      expect(response.result.isError).toBe(true);
+      expect(response.result.content[0].text).toContain('"result":"error"');
+    });
+
+    it("should report isError:false for a successful handler result", async () => {
+      const { createServer, registerTool, handleRequest } = await import("./mcp_server_core.cjs");
+      const server = createServer({ name: "test-server", version: "1.0.0" });
+
+      registerTool(server, {
+        name: "ok_tool",
+        description: "A tool that succeeds",
+        inputSchema: {
+          type: "object",
+          properties: { input: { type: "string", description: "Input" } },
+          required: ["input"],
+        },
+        handler: args => ({
+          content: [{ type: "text", text: `received: ${args.input}` }],
+        }),
+      });
+
+      const response = await handleRequest(server, {
+        jsonrpc: "2.0",
+        id: 8,
+        method: "tools/call",
+        params: { name: "ok_tool", arguments: { input: "hello" } },
+      });
+
+      expect(response.id).toBe(8);
+      expect(response.result.isError).toBe(false);
+      expect(response.result.content[0].text).toBe("received: hello");
+    });
+  });
+
   describe("loadToolHandlers", () => {
     let server;
     const fs = require("fs");
diff --git a/actions/setup/js/safe_outputs_mcp_server_http.cjs b/actions/setup/js/safe_outputs_mcp_server_http.cjs
index 0cf2eab79b1..f8d3a5f58df 100644
--- a/actions/setup/js/safe_outputs_mcp_server_http.cjs
+++ b/actions/setup/js/safe_outputs_mcp_server_http.cjs
@@ -49,6 +49,20 @@ const { runHttpServer, logStartupError } = require("./mcp_http_server_runner.cjs
 const { normalizeSafeOutputToolArguments, stripInternalSafeOutputSchemaMetadata } = require("./safe_outputs_mcp_arguments.cjs");
 moduleLogger.debug("All modules loaded successfully");
 
+/**
+ * Normalize a handler result into the MCP tool-call response shape, preserving
+ * the handler-provided `isError` flag (e.g. safe-output handlers return
+ * `isError: true` on error). Hardcoding `isError: false` here would mask
+ * application-level errors from clients such as the samples replay driver.
+ * @param {any} result - Raw result returned by a tool handler.
+ * @returns {{ content: any[], isError: boolean }}
+ */
+function normalizeMcpToolResult(result) {
+  const content = result && result.content ? result.content : [];
+  const isError = !!(result && result.isError);
+  return { content, isError };
+}
+
 /**
  * Create and configure the MCP server with tools
  * @param {Object} [options] - Additional options
@@ -190,9 +204,7 @@ function createMCPServer(options = {}) {
       logger.debug(`Handler returned for tool: ${tool.name}`);
 
       // Normalize result to MCP format; preserve isError from the handler result
-      const content = result && result.content ? result.content : [];
-      const isError = !!(result && result.isError);
-      return { content, isError };
+      return normalizeMcpToolResult(result);
     });
 
     registeredCount++;
@@ -228,9 +240,7 @@ function createMCPServer(options = {}) {
         logger.debug(`Handler returned for dynamic tool: ${toolName}`);
 
         // Normalize result to MCP format; preserve isError from the handler result
-        const content = result && result.content ? result.content : [];
-        const isError = !!(result && result.isError);
-        return { content, isError };
+        return normalizeMcpToolResult(result);
       });
 
       registeredCount++;
@@ -348,4 +358,5 @@ if (require.main === module) {
 module.exports = {
   startHttpServer,
   createMCPServer,
+  normalizeMcpToolResult,
 };
diff --git a/actions/setup/js/safe_outputs_mcp_server_http.test.cjs b/actions/setup/js/safe_outputs_mcp_server_http.test.cjs
new file mode 100644
index 00000000000..fbf4f1ac831
--- /dev/null
+++ b/actions/setup/js/safe_outputs_mcp_server_http.test.cjs
@@ -0,0 +1,45 @@
+import { describe, it, expect } from "vitest";
+
+// normalizeMcpToolResult is the shared helper used by BOTH tool-registration
+// paths in safe_outputs_mcp_server_http.cjs (predefined tools and dynamic
+// safe-job tools). The HTTP server cannot be unit-tested without real
+// transport/config bootstrap, so the isError-preservation logic that both
+// `server.tool(...)` callbacks rely on is exercised here directly.
+const { normalizeMcpToolResult } = require("./safe_outputs_mcp_server_http.cjs");
+
+describe("safe_outputs_mcp_server_http.cjs normalizeMcpToolResult", () => {
+  it("preserves isError:true from the handler result", () => {
+    const result = {
+      content: [{ type: "text", text: JSON.stringify({ result: "error", error: "boom" }) }],
+      isError: true,
+    };
+    expect(normalizeMcpToolResult(result)).toEqual({
+      content: result.content,
+      isError: true,
+    });
+  });
+
+  it("reports isError:false for a successful handler result", () => {
+    const result = {
+      content: [{ type: "text", text: JSON.stringify({ result: "success", id: 1 }) }],
+    };
+    expect(normalizeMcpToolResult(result)).toEqual({
+      content: result.content,
+      isError: false,
+    });
+  });
+
+  it("coerces a truthy non-boolean isError to a boolean", () => {
+    const result = { content: [{ type: "text", text: "x" }], isError: "yes" };
+    expect(normalizeMcpToolResult(result).isError).toBe(true);
+  });
+
+  it("defaults content to an empty array when the handler returns no content", () => {
+    expect(normalizeMcpToolResult({ isError: true })).toEqual({ content: [], isError: true });
+  });
+
+  it("returns a safe shape for null/undefined results", () => {
+    expect(normalizeMcpToolResult(null)).toEqual({ content: [], isError: false });
+    expect(normalizeMcpToolResult(undefined)).toEqual({ content: [], isError: false });
+  });
+});