From 3e4d546bd037113131edc5fe73b62567fb9a0950 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 19 Jun 2026 16:22:51 +0000 Subject: [PATCH 1/3] Initial plan From 9eb3c0b16b1371cf291eb8f0da18140c0090ff77 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 19 Jun 2026 16:41:31 +0000 Subject: [PATCH 2/3] Enforce per-type safe-output max count at MCP invocation time (MCE4) Add session-scoped per-type operation counters inside createHandlers that enforce the user-configured max at tool-invocation time (the MCP-server half of MCE4 dual enforcement). Previously the count cap was only applied downstream in collect_ndjson_output.cjs / safe_output_processor.cjs, so the agent received no in-loop feedback and silently over-produced outputs that were then dropped. Changes: - operationCounts Map tracks how many items of each type have been appended during the current MCP session. - getExplicitMax(type) reads the user's explicit max: setting via getSafeOutputsToolConfig; returns null when no explicit limit is set or when max: -1 (unlimited), so only intentional limits are enforced. - enforcePerTypeMax(type) throws a JSON-RPC -32602 error with E002 code and actionable guidance when the configured limit has already been reached. - appendSafeOutputCounted(entry) wraps appendSafeOutput: checks the limit, delegates to the real append, then increments the counter only on success (mirrors the inlineReviewCommentCount pattern). - All appendSafeOutput(entry) call sites within createHandlers replaced with appendSafeOutputCounted(entry). Downstream enforcement in collect_ndjson_output.cjs and safe_output_processor.cjs is preserved unchanged as the processor-time defence-in-depth layer (MCE4). 10 new tests added covering: limit enforcement, key normalisation, addCommentHandler, independent per-type budgets, unlimited (-1), unconfigured types, error message content, failed-write counter isolation, and fresh counter per createHandlers() call. Closes #40311 Co-authored-by: dsyme <7204669+dsyme@users.noreply.github.com> --- actions/setup/js/safe_outputs_handlers.cjs | 100 +++++++-- .../setup/js/safe_outputs_handlers.test.cjs | 191 ++++++++++++++++++ 2 files changed, 278 insertions(+), 13 deletions(-) diff --git a/actions/setup/js/safe_outputs_handlers.cjs b/actions/setup/js/safe_outputs_handlers.cjs index 846041ef455..8104a738114 100644 --- a/actions/setup/js/safe_outputs_handlers.cjs +++ b/actions/setup/js/safe_outputs_handlers.cjs @@ -203,6 +203,80 @@ function resolvePatchWorkspacePath(workspacePath) { function createHandlers(server, appendSafeOutput, config = {}) { const TOKEN_THRESHOLD = 16000; + /** + * Session-scoped per-type operation counters. + * Incremented on every successful appendSafeOutput call (MCE4 dual enforcement). + * @type {Map} + */ + const operationCounts = new Map(); + + /** + * Return the explicitly user-configured max for a safe-output type, or null if not set / unlimited. + * Uses getSafeOutputsToolConfig for consistent key-normalisation (hyphens → underscores). + * Does NOT fall back to validation-config defaults: MCP-time enforcement is only + * applied when the user has explicitly set a limit; downstream enforcement covers defaults. + * Per Safe Outputs Specification MCE5: the same config source as the processor. + * @param {string} type - normalised safe-output type name (e.g. "add_comment") + * @returns {number | null} + */ + function getExplicitMax(type) { + const toolConfig = getSafeOutputsToolConfig(config, type); + if (!toolConfig || typeof toolConfig !== "object") return null; + if (!("max" in toolConfig)) return null; + const maxVal = toolConfig.max; + if (maxVal === -1) return null; // -1 means unlimited + if (typeof maxVal === "number" && Number.isInteger(maxVal) && maxVal > 0) { + return maxVal; + } + return null; + } + + /** + * Enforce the per-type operation count limit at invocation time. + * Throws a JSON-RPC -32602 error when the configured max has already been reached. + * Per Safe Outputs Specification MCE4: Dual Enforcement — constraints MUST be + * enforced at both invocation time (MCP server) and processing time (safe output + * processor) to provide defence-in-depth. + * @param {string} type - normalised safe-output type name + */ + function enforcePerTypeMax(type) { + const maxAllowed = getExplicitMax(type); + if (maxAllowed === null) return; // no explicit limit configured + const current = operationCounts.get(type) || 0; + if (current >= maxAllowed) { + throw { + code: -32602, + message: `E002: ${type} limit reached — ${current} of ${maxAllowed} already used this run`, + data: { + constraint: "max", + type, + limit: maxAllowed, + guidance: + `You have used all ${maxAllowed} ${type} operations for this run. ` + + `Further ${type} calls will be ignored. Prioritize the most important items ` + + `(e.g. consolidate multiple updates into one), or call noop. ` + + `Note: other safe-output types have independent budgets, so applying one type ` + + `without its companion type can leave inconsistent state.`, + }, + }; + } + } + + /** + * Append a safe-output entry after enforcing the per-type max count. + * Increments the session counter only after a successful write, mirroring the + * approach used by inlineReviewCommentCount so that write errors do not advance + * the counter. + * Per Safe Outputs Specification MCE4: invocation-time half of dual enforcement. + * @param {Record} entry + */ + const appendSafeOutputCounted = entry => { + const type = entry?.type; + if (type) enforcePerTypeMax(type); + appendSafeOutput(entry); + if (type) operationCounts.set(type, (operationCounts.get(type) || 0) + 1); + }; + /** * Validate schema-declared explicit target parameters for wildcard-target tools. * @param {Record} entry @@ -258,7 +332,7 @@ function createHandlers(server, appendSafeOutput, config = {}) { const fileInfo = writeLargeContentToFile(largeContent); entry[largeFieldName] = `[Content too large, saved to file: ${fileInfo.filename}]`; - appendSafeOutput(entry); + appendSafeOutputCounted(entry); return { content: [ @@ -286,7 +360,7 @@ function createHandlers(server, appendSafeOutput, config = {}) { if (largeContentResponse) return largeContentResponse; // Normal case - no large content - appendSafeOutput(entry); + appendSafeOutputCounted(entry); return { content: [ { @@ -416,7 +490,7 @@ function createHandlers(server, appendSafeOutput, config = {}) { targetFileName: targetFileName, }; - appendSafeOutput(entry); + appendSafeOutputCounted(entry); return { content: [ @@ -620,7 +694,7 @@ function createHandlers(server, appendSafeOutput, config = {}) { if (allowEmpty) { server.debug(`allow-empty is enabled for create_pull_request - skipping patch generation`); // Append the safe output entry without generating a patch - appendSafeOutput(entry); + appendSafeOutputCounted(entry); return { content: [ { @@ -834,7 +908,7 @@ function createHandlers(server, appendSafeOutput, config = {}) { entry.base_commit = bundleResult.baseCommit; } - appendSafeOutput(entry); + appendSafeOutputCounted(entry); return { content: [ { @@ -856,7 +930,7 @@ function createHandlers(server, appendSafeOutput, config = {}) { }; } - appendSafeOutput(entry); + appendSafeOutputCounted(entry); return { content: [ { @@ -1277,7 +1351,7 @@ function createHandlers(server, appendSafeOutput, config = {}) { entry.base_commit = bundleResult.baseCommit; } - appendSafeOutput(entry); + appendSafeOutputCounted(entry); return { content: [ { @@ -1299,7 +1373,7 @@ function createHandlers(server, appendSafeOutput, config = {}) { }; } - appendSafeOutput(entry); + appendSafeOutputCounted(entry); return { content: [ { @@ -1551,7 +1625,7 @@ function createHandlers(server, appendSafeOutput, config = {}) { }; const largeContentResponse = maybeHandleLargeContent(droppedEntry); if (!largeContentResponse) { - appendSafeOutput(droppedEntry); + appendSafeOutputCounted(droppedEntry); } return { content: [ @@ -1572,7 +1646,7 @@ function createHandlers(server, appendSafeOutput, config = {}) { const largeContentResponse = maybeHandleLargeContent(entry); if (largeContentResponse) return largeContentResponse; - appendSafeOutput(entry); + appendSafeOutputCounted(entry); return { content: [ { @@ -1603,7 +1677,7 @@ function createHandlers(server, appendSafeOutput, config = {}) { server.debug(`temporary_id for create_project: ${entry.temporary_id}`); // Append to safe outputs - appendSafeOutput(entry); + appendSafeOutputCounted(entry); // Return the temporary_id to the agent so it can reference this project return { @@ -1714,7 +1788,7 @@ function createHandlers(server, appendSafeOutput, config = {}) { server.debug(`temporary_id for add_comment: ${entry.temporary_id}`); // Append to safe outputs - appendSafeOutput(entry); + appendSafeOutputCounted(entry); // Return the temporary_id to the agent so it can reference this comment return { @@ -1893,7 +1967,7 @@ function createHandlers(server, appendSafeOutput, config = {}) { server.debug(`upload_artifact: staged ${filePath} as ${destName}`); } - appendSafeOutput(entry); + appendSafeOutputCounted(entry); const temporaryId = entry.temporary_id || null; return { diff --git a/actions/setup/js/safe_outputs_handlers.test.cjs b/actions/setup/js/safe_outputs_handlers.test.cjs index 934b02a10b2..c3795d4fec3 100644 --- a/actions/setup/js/safe_outputs_handlers.test.cjs +++ b/actions/setup/js/safe_outputs_handlers.test.cjs @@ -2736,6 +2736,197 @@ describe("safe_outputs_handlers", () => { }); }); +describe("per-type max enforcement (MCE4 dual enforcement)", () => { + let mockServer; + let mockAppendSafeOutput; + + beforeEach(() => { + vi.clearAllMocks(); + mockServer = { debug: vi.fn() }; + mockAppendSafeOutput = vi.fn(); + }); + + it("allows calls up to the configured max and rejects the (max+1)th call via defaultHandler", () => { + const h = createHandlers(mockServer, mockAppendSafeOutput, { + add_labels: { max: 2 }, + }); + + // First two calls succeed + expect(h.defaultHandler("add_labels")({ labels: ["approved"] })).not.toHaveProperty("isError"); + expect(h.defaultHandler("add_labels")({ labels: ["approved"] })).not.toHaveProperty("isError"); + expect(mockAppendSafeOutput).toHaveBeenCalledTimes(2); + + // Third call must throw E002 + expect(() => h.defaultHandler("add_labels")({ labels: ["approved"] })).toThrow( + expect.objectContaining({ + code: -32602, + message: expect.stringContaining("E002"), + }) + ); + // No additional append after limit + expect(mockAppendSafeOutput).toHaveBeenCalledTimes(2); + }); + + it("rejects immediately when max is 0 and config uses hyphen-keyed type (key normalisation)", () => { + // Ensure getSafeOutputsToolConfig's hyphen→underscore lookup works for max checks + const h = createHandlers(mockServer, mockAppendSafeOutput, { + "add-labels": { max: 1 }, + }); + + h.defaultHandler("add_labels")({ labels: ["ok"] }); + expect(mockAppendSafeOutput).toHaveBeenCalledTimes(1); + + expect(() => h.defaultHandler("add_labels")({ labels: ["ok"] })).toThrow( + expect.objectContaining({ code: -32602, message: expect.stringContaining("E002") }) + ); + expect(mockAppendSafeOutput).toHaveBeenCalledTimes(1); + }); + + it("enforces max for addCommentHandler", () => { + const h = createHandlers(mockServer, mockAppendSafeOutput, { + add_comment: { max: 1 }, + }); + + global.context = { repo: { owner: "o", repo: "r" }, eventName: "issues", payload: { issue: { number: 1 } } }; + try { + const ok = h.addCommentHandler({ body: "first comment", item_number: 1 }); + expect(ok).not.toHaveProperty("isError"); + expect(mockAppendSafeOutput).toHaveBeenCalledTimes(1); + + expect(() => h.addCommentHandler({ body: "second comment", item_number: 2 })).toThrow( + expect.objectContaining({ code: -32602, message: expect.stringContaining("E002") }) + ); + expect(mockAppendSafeOutput).toHaveBeenCalledTimes(1); + } finally { + global.context = { repo: { owner: "test-owner", repo: "test-repo" }, eventName: "push", payload: {} }; + } + }); + + it("independent per-type budgets: exceeding add_comment limit does not affect add_labels", () => { + const h = createHandlers(mockServer, mockAppendSafeOutput, { + add_comment: { max: 1 }, + add_labels: { max: 3 }, + }); + + global.context = { repo: { owner: "o", repo: "r" }, eventName: "issues", payload: { issue: { number: 1 } } }; + try { + h.addCommentHandler({ body: "first comment", item_number: 1 }); + expect(() => h.addCommentHandler({ body: "second comment", item_number: 2 })).toThrow( + expect.objectContaining({ code: -32602, message: expect.stringContaining("E002") }) + ); + + // add_labels budget is separate — all 3 calls should succeed + expect(h.defaultHandler("add_labels")({ labels: ["a"] })).not.toHaveProperty("isError"); + expect(h.defaultHandler("add_labels")({ labels: ["b"] })).not.toHaveProperty("isError"); + expect(h.defaultHandler("add_labels")({ labels: ["c"] })).not.toHaveProperty("isError"); + + // 4th add_labels call must fail + expect(() => h.defaultHandler("add_labels")({ labels: ["d"] })).toThrow( + expect.objectContaining({ code: -32602, message: expect.stringContaining("E002") }) + ); + } finally { + global.context = { repo: { owner: "test-owner", repo: "test-repo" }, eventName: "push", payload: {} }; + } + }); + + it("does not enforce when max is -1 (unlimited)", () => { + const h = createHandlers(mockServer, mockAppendSafeOutput, { + add_labels: { max: -1 }, + }); + + for (let i = 0; i < 20; i++) { + expect(h.defaultHandler("add_labels")({ labels: ["ok"] })).not.toHaveProperty("isError"); + } + expect(mockAppendSafeOutput).toHaveBeenCalledTimes(20); + }); + + it("does not enforce when max is not explicitly configured", () => { + // Only target is set — no max → no invocation-time limit + const h = createHandlers(mockServer, mockAppendSafeOutput, { + add_labels: { target: "*" }, + }); + + for (let i = 0; i < 5; i++) { + expect(h.defaultHandler("add_labels")({ labels: ["ok"] })).not.toHaveProperty("isError"); + } + expect(mockAppendSafeOutput).toHaveBeenCalledTimes(5); + }); + + it("does not enforce when config is empty (no safe-outputs config)", () => { + const h = createHandlers(mockServer, mockAppendSafeOutput); + + for (let i = 0; i < 5; i++) { + expect(h.defaultHandler("add_labels")({ labels: ["ok"] })).not.toHaveProperty("isError"); + } + expect(mockAppendSafeOutput).toHaveBeenCalledTimes(5); + }); + + it("error message includes type, current count, and limit", () => { + const h = createHandlers(mockServer, mockAppendSafeOutput, { + add_labels: { max: 3 }, + }); + + h.defaultHandler("add_labels")({ labels: ["a"] }); + h.defaultHandler("add_labels")({ labels: ["b"] }); + h.defaultHandler("add_labels")({ labels: ["c"] }); + + let thrown; + try { + h.defaultHandler("add_labels")({ labels: ["d"] }); + } catch (err) { + thrown = err; + } + + expect(thrown).toBeDefined(); + expect(thrown.code).toBe(-32602); + expect(thrown.message).toContain("add_labels"); + expect(thrown.message).toContain("3 of 3"); + expect(thrown.data).toMatchObject({ + constraint: "max", + type: "add_labels", + limit: 3, + }); + expect(thrown.data.guidance).toContain("add_labels"); + }); + + it("counter does not increment when append throws (write error)", () => { + const h = createHandlers(mockServer, mockAppendSafeOutput, { + add_labels: { max: 2 }, + }); + + // First call succeeds + h.defaultHandler("add_labels")({ labels: ["ok"] }); + expect(mockAppendSafeOutput).toHaveBeenCalledTimes(1); + + // Second call: append throws a write error + mockAppendSafeOutput.mockImplementationOnce(() => { + throw new Error("disk write error"); + }); + expect(() => h.defaultHandler("add_labels")({ labels: ["fail"] })).toThrow("disk write error"); + + // Counter is still 1 (not 2) because the failed write shouldn't count + // Third call should succeed (not hit limit) + expect(h.defaultHandler("add_labels")({ labels: ["ok2"] })).not.toHaveProperty("isError"); + expect(mockAppendSafeOutput).toHaveBeenCalledTimes(3); // call 1 + (failed) call 2 + call 3 + }); + + it("each createHandlers() call gets a fresh independent counter", () => { + const config = { add_labels: { max: 1 } }; + + const h1 = createHandlers(mockServer, mockAppendSafeOutput, config); + const h2 = createHandlers(mockServer, mockAppendSafeOutput, config); + + h1.defaultHandler("add_labels")({ labels: ["a"] }); + // h1's budget is now exhausted — must throw + expect(() => h1.defaultHandler("add_labels")({ labels: ["b"] })).toThrow( + expect.objectContaining({ code: -32602 }) + ); + + // h2 has its own fresh counter — should still allow 1 call + expect(h2.defaultHandler("add_labels")({ labels: ["a"] })).not.toHaveProperty("isError"); + }); +}); + describe("hasUpdatePullRequestFields", () => { it("returns false for empty object", () => { expect(hasUpdatePullRequestFields({})).toBe(false); From 042c655969bb815fea988a6f6a28ba66b23157bb Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 19 Jun 2026 19:32:01 +0000 Subject: [PATCH 3/3] fix: run Prettier on safe_outputs_handlers.test.cjs to fix lint-cjs CI failure Co-authored-by: pelikhan <4175913+pelikhan@users.noreply.github.com> --- .../setup/js/safe_outputs_handlers.test.cjs | 20 +++++-------------- 1 file changed, 5 insertions(+), 15 deletions(-) diff --git a/actions/setup/js/safe_outputs_handlers.test.cjs b/actions/setup/js/safe_outputs_handlers.test.cjs index c3795d4fec3..1c0434056b1 100644 --- a/actions/setup/js/safe_outputs_handlers.test.cjs +++ b/actions/setup/js/safe_outputs_handlers.test.cjs @@ -2776,9 +2776,7 @@ describe("per-type max enforcement (MCE4 dual enforcement)", () => { h.defaultHandler("add_labels")({ labels: ["ok"] }); expect(mockAppendSafeOutput).toHaveBeenCalledTimes(1); - expect(() => h.defaultHandler("add_labels")({ labels: ["ok"] })).toThrow( - expect.objectContaining({ code: -32602, message: expect.stringContaining("E002") }) - ); + expect(() => h.defaultHandler("add_labels")({ labels: ["ok"] })).toThrow(expect.objectContaining({ code: -32602, message: expect.stringContaining("E002") })); expect(mockAppendSafeOutput).toHaveBeenCalledTimes(1); }); @@ -2793,9 +2791,7 @@ describe("per-type max enforcement (MCE4 dual enforcement)", () => { expect(ok).not.toHaveProperty("isError"); expect(mockAppendSafeOutput).toHaveBeenCalledTimes(1); - expect(() => h.addCommentHandler({ body: "second comment", item_number: 2 })).toThrow( - expect.objectContaining({ code: -32602, message: expect.stringContaining("E002") }) - ); + expect(() => h.addCommentHandler({ body: "second comment", item_number: 2 })).toThrow(expect.objectContaining({ code: -32602, message: expect.stringContaining("E002") })); expect(mockAppendSafeOutput).toHaveBeenCalledTimes(1); } finally { global.context = { repo: { owner: "test-owner", repo: "test-repo" }, eventName: "push", payload: {} }; @@ -2811,9 +2807,7 @@ describe("per-type max enforcement (MCE4 dual enforcement)", () => { global.context = { repo: { owner: "o", repo: "r" }, eventName: "issues", payload: { issue: { number: 1 } } }; try { h.addCommentHandler({ body: "first comment", item_number: 1 }); - expect(() => h.addCommentHandler({ body: "second comment", item_number: 2 })).toThrow( - expect.objectContaining({ code: -32602, message: expect.stringContaining("E002") }) - ); + expect(() => h.addCommentHandler({ body: "second comment", item_number: 2 })).toThrow(expect.objectContaining({ code: -32602, message: expect.stringContaining("E002") })); // add_labels budget is separate — all 3 calls should succeed expect(h.defaultHandler("add_labels")({ labels: ["a"] })).not.toHaveProperty("isError"); @@ -2821,9 +2815,7 @@ describe("per-type max enforcement (MCE4 dual enforcement)", () => { expect(h.defaultHandler("add_labels")({ labels: ["c"] })).not.toHaveProperty("isError"); // 4th add_labels call must fail - expect(() => h.defaultHandler("add_labels")({ labels: ["d"] })).toThrow( - expect.objectContaining({ code: -32602, message: expect.stringContaining("E002") }) - ); + expect(() => h.defaultHandler("add_labels")({ labels: ["d"] })).toThrow(expect.objectContaining({ code: -32602, message: expect.stringContaining("E002") })); } finally { global.context = { repo: { owner: "test-owner", repo: "test-repo" }, eventName: "push", payload: {} }; } @@ -2918,9 +2910,7 @@ describe("per-type max enforcement (MCE4 dual enforcement)", () => { h1.defaultHandler("add_labels")({ labels: ["a"] }); // h1's budget is now exhausted — must throw - expect(() => h1.defaultHandler("add_labels")({ labels: ["b"] })).toThrow( - expect.objectContaining({ code: -32602 }) - ); + expect(() => h1.defaultHandler("add_labels")({ labels: ["b"] })).toThrow(expect.objectContaining({ code: -32602 })); // h2 has its own fresh counter — should still allow 1 call expect(h2.defaultHandler("add_labels")({ labels: ["a"] })).not.toHaveProperty("isError");