-
Notifications
You must be signed in to change notification settings - Fork 425
Enforce per-type safe-output max count at MCP invocation time (MCE4) #40348
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
3e4d546
9eb3c0b
042c655
091463d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -203,6 +203,80 @@ function resolvePatchWorkspacePath(workspacePath) { | |
| function createHandlers(server, appendSafeOutput, config = {}) { | ||
| const TOKEN_THRESHOLD = 16000; | ||
|
|
||
| /** | ||
| * Session-scoped per-type operation counters. | ||
| * Incremented on every successful appendSafeOutput call (MCE4 dual enforcement). | ||
| * @type {Map<string, number>} | ||
| */ | ||
| const operationCounts = new Map(); | ||
|
|
||
| /** | ||
| * Return the explicitly user-configured max for a safe-output type, or null if not set / unlimited. | ||
| * Uses getSafeOutputsToolConfig for consistent key-normalisation (hyphens → underscores). | ||
| * Does NOT fall back to validation-config defaults: MCP-time enforcement is only | ||
| * applied when the user has explicitly set a limit; downstream enforcement covers defaults. | ||
| * Per Safe Outputs Specification MCE5: the same config source as the processor. | ||
| * @param {string} type - normalised safe-output type name (e.g. "add_comment") | ||
| * @returns {number | null} | ||
| */ | ||
| function getExplicitMax(type) { | ||
| const toolConfig = getSafeOutputsToolConfig(config, type); | ||
| if (!toolConfig || typeof toolConfig !== "object") return null; | ||
| if (!("max" in toolConfig)) return null; | ||
| const maxVal = toolConfig.max; | ||
| if (maxVal === -1) return null; // -1 means unlimited | ||
| if (typeof maxVal === "number" && Number.isInteger(maxVal) && maxVal > 0) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
💡 Impact and fix
There is also no regression test for this: the test named "rejects immediately when max is 0" (line 2770 in the test file) uses // current — max: 0 returns null (unlimited)
if (typeof maxVal === "number" && Number.isInteger(maxVal) && maxVal > 0) {
// fix — include 0 so max: 0 blocks all calls
if (typeof maxVal === "number" && Number.isInteger(maxVal) && maxVal >= 0) {Also add a dedicated test: it("blocks all calls when max is 0", () => {
const h = createHandlers(mockServer, mockAppendSafeOutput, {
add_labels: { max: 0 },
});
expect(() => h.defaultHandler("add_labels")({ labels: ["x"] })).toThrow(
expect.objectContaining({ code: -32602, message: expect.stringContaining("E002") })
);
expect(mockAppendSafeOutput).not.toHaveBeenCalled();
}); |
||
| return maxVal; | ||
|
Comment on lines
+222
to
+229
|
||
| } | ||
| return null; | ||
|
Comment on lines
+214
to
+231
|
||
| } | ||
|
|
||
| /** | ||
| * Enforce the per-type operation count limit at invocation time. | ||
| * Throws a JSON-RPC -32602 error when the configured max has already been reached. | ||
| * Per Safe Outputs Specification MCE4: Dual Enforcement — constraints MUST be | ||
| * enforced at both invocation time (MCP server) and processing time (safe output | ||
| * processor) to provide defence-in-depth. | ||
| * @param {string} type - normalised safe-output type name | ||
| */ | ||
| function enforcePerTypeMax(type) { | ||
| const maxAllowed = getExplicitMax(type); | ||
| if (maxAllowed === null) return; // no explicit limit configured | ||
| const current = operationCounts.get(type) || 0; | ||
| if (current >= maxAllowed) { | ||
| throw { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [/zoom-out] Throwing a plain object instead of an
💡 ContextThis is likely intentional for the JSON-RPC protocol (the MCP framework converts thrown objects directly to JSON-RPC error responses). If so, a brief comment would confirm the intent: // Throw a plain JSON-RPC error object (not Error) — the MCP framework
// catches this and returns it directly as the JSON-RPC error payload.
throw {
code: -32602,
...
};If the framework does rely on |
||
| code: -32602, | ||
| message: `E002: ${type} limit reached — ${current} of ${maxAllowed} already used this run`, | ||
| data: { | ||
| constraint: "max", | ||
| type, | ||
| limit: maxAllowed, | ||
| guidance: | ||
| `You have used all ${maxAllowed} ${type} operations for this run. ` + | ||
| `Further ${type} calls will be ignored. Prioritize the most important items ` + | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [/tdd] Guidance says "will be ignored" but calls actually throw — misleading error message. The phrase 💡 Suggested fixChange: `Further ${type} calls will be ignored. Prioritize the most important items ` +To: `Further ${type} calls will fail with this error. Prioritize the most important items ` +This accurately describes the runtime behavior and removes the risk of the agent misunderstanding the enforcement mode. |
||
| `(e.g. consolidate multiple updates into one), or call noop. ` + | ||
|
Comment on lines
+254
to
+257
|
||
| `Note: other safe-output types have independent budgets, so applying one type ` + | ||
| `without its companion type can leave inconsistent state.`, | ||
| }, | ||
| }; | ||
| } | ||
| } | ||
|
|
||
| /** | ||
| * Append a safe-output entry after enforcing the per-type max count. | ||
| * Increments the session counter only after a successful write, mirroring the | ||
| * approach used by inlineReviewCommentCount so that write errors do not advance | ||
| * the counter. | ||
| * Per Safe Outputs Specification MCE4: invocation-time half of dual enforcement. | ||
| * @param {Record<string, any>} entry | ||
| */ | ||
| const appendSafeOutputCounted = entry => { | ||
| const type = entry?.type; | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [/tdd] Entries without a
💡 Suggested test or guardAdd a defensive assertion, or at least a test: it("does not count or enforce when entry.type is missing", () => {
const h = createHandlers(mockServer, mockAppendSafeOutput, { add_labels: { max: 1 } });
// Direct call to appendSafeOutputCounted with no type (internal/unusual)
// Verify it appends without throwing and does not consume budget
// ... (requires exposing appendSafeOutputCounted or testing via a handler that passes a typeless entry)
});Alternatively, throw an internal error when
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
💡 Details and suggested hardeningCurrently all callers set The minimal fix is a debug-level warning: const appendSafeOutputCounted = entry => {
const type = entry?.type;
if (!type) {
// enforcement impossible without a type — log and fall through
server.debug("appendSafeOutputCounted: entry.type is missing; per-type max not enforced");
appendSafeOutput(entry);
return;
}
enforcePerTypeMax(type);
appendSafeOutput(entry);
operationCounts.set(type, (operationCounts.get(type) || 0) + 1);
};A stronger fix would be to throw: if (!type) throw new Error("appendSafeOutputCounted: entry.type is required for limit enforcement");This would surface integration bugs immediately in tests rather than silently degrading enforcement in production. |
||
| if (type) enforcePerTypeMax(type); | ||
| appendSafeOutput(entry); | ||
| if (type) operationCounts.set(type, (operationCounts.get(type) || 0) + 1); | ||
| }; | ||
|
|
||
| /** | ||
| * Validate schema-declared explicit target parameters for wildcard-target tools. | ||
| * @param {Record<string, any>} entry | ||
|
|
@@ -258,7 +332,7 @@ function createHandlers(server, appendSafeOutput, config = {}) { | |
|
|
||
| const fileInfo = writeLargeContentToFile(largeContent); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [/diagnose] File is written to disk before the enforcement check runs — orphaned file on limit hit. When 💡 Suggested fixEnforce the limit before writing the file: // Check limit before committing the expensive file write
if (entry?.type) enforcePerTypeMax(entry.type);
const fileInfo = writeLargeContentToFile(largeContent);
entry[largeFieldName] = `[Content too large, saved to file: ${fileInfo.filename}]`;
appendSafeOutputCounted(entry);Alternatively, call This also needs a regression test: a test that verifies the file is NOT written when the limit is already reached. |
||
| entry[largeFieldName] = `[Content too large, saved to file: ${fileInfo.filename}]`; | ||
| appendSafeOutput(entry); | ||
| appendSafeOutputCounted(entry); | ||
|
|
||
| return { | ||
| content: [ | ||
|
|
@@ -286,7 +360,7 @@ function createHandlers(server, appendSafeOutput, config = {}) { | |
| if (largeContentResponse) return largeContentResponse; | ||
|
|
||
| // Normal case - no large content | ||
| appendSafeOutput(entry); | ||
| appendSafeOutputCounted(entry); | ||
| return { | ||
| content: [ | ||
| { | ||
|
|
@@ -416,7 +490,7 @@ function createHandlers(server, appendSafeOutput, config = {}) { | |
| targetFileName: targetFileName, | ||
| }; | ||
|
|
||
| appendSafeOutput(entry); | ||
| appendSafeOutputCounted(entry); | ||
|
|
||
| return { | ||
| content: [ | ||
|
|
@@ -620,7 +694,7 @@ function createHandlers(server, appendSafeOutput, config = {}) { | |
| if (allowEmpty) { | ||
| server.debug(`allow-empty is enabled for create_pull_request - skipping patch generation`); | ||
| // Append the safe output entry without generating a patch | ||
| appendSafeOutput(entry); | ||
| appendSafeOutputCounted(entry); | ||
| return { | ||
| content: [ | ||
| { | ||
|
|
@@ -834,7 +908,7 @@ function createHandlers(server, appendSafeOutput, config = {}) { | |
| entry.base_commit = bundleResult.baseCommit; | ||
| } | ||
|
|
||
| appendSafeOutput(entry); | ||
| appendSafeOutputCounted(entry); | ||
| return { | ||
| content: [ | ||
| { | ||
|
|
@@ -856,7 +930,7 @@ function createHandlers(server, appendSafeOutput, config = {}) { | |
| }; | ||
| } | ||
|
|
||
| appendSafeOutput(entry); | ||
| appendSafeOutputCounted(entry); | ||
| return { | ||
| content: [ | ||
| { | ||
|
|
@@ -1277,7 +1351,7 @@ function createHandlers(server, appendSafeOutput, config = {}) { | |
| entry.base_commit = bundleResult.baseCommit; | ||
| } | ||
|
|
||
| appendSafeOutput(entry); | ||
| appendSafeOutputCounted(entry); | ||
| return { | ||
| content: [ | ||
| { | ||
|
|
@@ -1299,7 +1373,7 @@ function createHandlers(server, appendSafeOutput, config = {}) { | |
| }; | ||
| } | ||
|
|
||
| appendSafeOutput(entry); | ||
| appendSafeOutputCounted(entry); | ||
| return { | ||
| content: [ | ||
| { | ||
|
|
@@ -1551,7 +1625,7 @@ function createHandlers(server, appendSafeOutput, config = {}) { | |
| }; | ||
| const largeContentResponse = maybeHandleLargeContent(droppedEntry); | ||
| if (!largeContentResponse) { | ||
| appendSafeOutput(droppedEntry); | ||
| appendSafeOutputCounted(droppedEntry); | ||
| } | ||
| return { | ||
| content: [ | ||
|
|
@@ -1572,7 +1646,7 @@ function createHandlers(server, appendSafeOutput, config = {}) { | |
| const largeContentResponse = maybeHandleLargeContent(entry); | ||
| if (largeContentResponse) return largeContentResponse; | ||
|
|
||
| appendSafeOutput(entry); | ||
| appendSafeOutputCounted(entry); | ||
| return { | ||
| content: [ | ||
| { | ||
|
|
@@ -1603,7 +1677,7 @@ function createHandlers(server, appendSafeOutput, config = {}) { | |
| server.debug(`temporary_id for create_project: ${entry.temporary_id}`); | ||
|
|
||
| // Append to safe outputs | ||
| appendSafeOutput(entry); | ||
| appendSafeOutputCounted(entry); | ||
|
|
||
| // Return the temporary_id to the agent so it can reference this project | ||
| return { | ||
|
|
@@ -1714,7 +1788,7 @@ function createHandlers(server, appendSafeOutput, config = {}) { | |
| server.debug(`temporary_id for add_comment: ${entry.temporary_id}`); | ||
|
|
||
| // Append to safe outputs | ||
| appendSafeOutput(entry); | ||
| appendSafeOutputCounted(entry); | ||
|
|
||
| // Return the temporary_id to the agent so it can reference this comment | ||
| return { | ||
|
|
@@ -1893,7 +1967,7 @@ function createHandlers(server, appendSafeOutput, config = {}) { | |
| server.debug(`upload_artifact: staged ${filePath} as ${destName}`); | ||
| } | ||
|
|
||
| appendSafeOutput(entry); | ||
| appendSafeOutputCounted(entry); | ||
|
|
||
| const temporaryId = entry.temporary_id || null; | ||
| return { | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -2736,6 +2736,187 @@ describe("safe_outputs_handlers", () => { | |
| }); | ||
| }); | ||
|
|
||
| describe("per-type max enforcement (MCE4 dual enforcement)", () => { | ||
| let mockServer; | ||
| let mockAppendSafeOutput; | ||
|
|
||
| beforeEach(() => { | ||
| vi.clearAllMocks(); | ||
| mockServer = { debug: vi.fn() }; | ||
| mockAppendSafeOutput = vi.fn(); | ||
| }); | ||
|
|
||
| it("allows calls up to the configured max and rejects the (max+1)th call via defaultHandler", () => { | ||
| const h = createHandlers(mockServer, mockAppendSafeOutput, { | ||
| add_labels: { max: 2 }, | ||
| }); | ||
|
|
||
| // First two calls succeed | ||
| expect(h.defaultHandler("add_labels")({ labels: ["approved"] })).not.toHaveProperty("isError"); | ||
| expect(h.defaultHandler("add_labels")({ labels: ["approved"] })).not.toHaveProperty("isError"); | ||
| expect(mockAppendSafeOutput).toHaveBeenCalledTimes(2); | ||
|
|
||
| // Third call must throw E002 | ||
| expect(() => h.defaultHandler("add_labels")({ labels: ["approved"] })).toThrow( | ||
| expect.objectContaining({ | ||
| code: -32602, | ||
| message: expect.stringContaining("E002"), | ||
| }) | ||
| ); | ||
| // No additional append after limit | ||
| expect(mockAppendSafeOutput).toHaveBeenCalledTimes(2); | ||
| }); | ||
|
|
||
| it("rejects immediately when max is 0 and config uses hyphen-keyed type (key normalisation)", () => { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [/tdd] Test description says "when max is 0" but the config uses The test title references 💡 Suggested fixRename the test to match what it actually asserts: it("enforces max via hyphen-keyed config (key normalisation: hyphen→underscore)", () => {If you also want to cover |
||
| // Ensure getSafeOutputsToolConfig's hyphen→underscore lookup works for max checks | ||
| const h = createHandlers(mockServer, mockAppendSafeOutput, { | ||
| "add-labels": { max: 1 }, | ||
| }); | ||
|
Comment on lines
+2770
to
+2774
|
||
|
|
||
| h.defaultHandler("add_labels")({ labels: ["ok"] }); | ||
| expect(mockAppendSafeOutput).toHaveBeenCalledTimes(1); | ||
|
|
||
| expect(() => h.defaultHandler("add_labels")({ labels: ["ok"] })).toThrow(expect.objectContaining({ code: -32602, message: expect.stringContaining("E002") })); | ||
| expect(mockAppendSafeOutput).toHaveBeenCalledTimes(1); | ||
| }); | ||
|
|
||
| it("enforces max for addCommentHandler", () => { | ||
| const h = createHandlers(mockServer, mockAppendSafeOutput, { | ||
| add_comment: { max: 1 }, | ||
| }); | ||
|
|
||
| global.context = { repo: { owner: "o", repo: "r" }, eventName: "issues", payload: { issue: { number: 1 } } }; | ||
| try { | ||
| const ok = h.addCommentHandler({ body: "first comment", item_number: 1 }); | ||
| expect(ok).not.toHaveProperty("isError"); | ||
| expect(mockAppendSafeOutput).toHaveBeenCalledTimes(1); | ||
|
|
||
| expect(() => h.addCommentHandler({ body: "second comment", item_number: 2 })).toThrow(expect.objectContaining({ code: -32602, message: expect.stringContaining("E002") })); | ||
| expect(mockAppendSafeOutput).toHaveBeenCalledTimes(1); | ||
| } finally { | ||
| global.context = { repo: { owner: "test-owner", repo: "test-repo" }, eventName: "push", payload: {} }; | ||
| } | ||
| }); | ||
|
|
||
| it("independent per-type budgets: exceeding add_comment limit does not affect add_labels", () => { | ||
| const h = createHandlers(mockServer, mockAppendSafeOutput, { | ||
| add_comment: { max: 1 }, | ||
| add_labels: { max: 3 }, | ||
| }); | ||
|
|
||
| global.context = { repo: { owner: "o", repo: "r" }, eventName: "issues", payload: { issue: { number: 1 } } }; | ||
| try { | ||
| h.addCommentHandler({ body: "first comment", item_number: 1 }); | ||
| expect(() => h.addCommentHandler({ body: "second comment", item_number: 2 })).toThrow(expect.objectContaining({ code: -32602, message: expect.stringContaining("E002") })); | ||
|
|
||
| // add_labels budget is separate — all 3 calls should succeed | ||
| expect(h.defaultHandler("add_labels")({ labels: ["a"] })).not.toHaveProperty("isError"); | ||
| expect(h.defaultHandler("add_labels")({ labels: ["b"] })).not.toHaveProperty("isError"); | ||
| expect(h.defaultHandler("add_labels")({ labels: ["c"] })).not.toHaveProperty("isError"); | ||
|
|
||
| // 4th add_labels call must fail | ||
| expect(() => h.defaultHandler("add_labels")({ labels: ["d"] })).toThrow(expect.objectContaining({ code: -32602, message: expect.stringContaining("E002") })); | ||
| } finally { | ||
| global.context = { repo: { owner: "test-owner", repo: "test-repo" }, eventName: "push", payload: {} }; | ||
| } | ||
| }); | ||
|
|
||
| it("does not enforce when max is -1 (unlimited)", () => { | ||
| const h = createHandlers(mockServer, mockAppendSafeOutput, { | ||
| add_labels: { max: -1 }, | ||
| }); | ||
|
|
||
| for (let i = 0; i < 20; i++) { | ||
| expect(h.defaultHandler("add_labels")({ labels: ["ok"] })).not.toHaveProperty("isError"); | ||
| } | ||
| expect(mockAppendSafeOutput).toHaveBeenCalledTimes(20); | ||
| }); | ||
|
|
||
| it("does not enforce when max is not explicitly configured", () => { | ||
| // Only target is set — no max → no invocation-time limit | ||
| const h = createHandlers(mockServer, mockAppendSafeOutput, { | ||
| add_labels: { target: "*" }, | ||
| }); | ||
|
|
||
| for (let i = 0; i < 5; i++) { | ||
| expect(h.defaultHandler("add_labels")({ labels: ["ok"] })).not.toHaveProperty("isError"); | ||
| } | ||
| expect(mockAppendSafeOutput).toHaveBeenCalledTimes(5); | ||
| }); | ||
|
|
||
| it("does not enforce when config is empty (no safe-outputs config)", () => { | ||
| const h = createHandlers(mockServer, mockAppendSafeOutput); | ||
|
|
||
| for (let i = 0; i < 5; i++) { | ||
| expect(h.defaultHandler("add_labels")({ labels: ["ok"] })).not.toHaveProperty("isError"); | ||
| } | ||
| expect(mockAppendSafeOutput).toHaveBeenCalledTimes(5); | ||
| }); | ||
|
|
||
| it("error message includes type, current count, and limit", () => { | ||
| const h = createHandlers(mockServer, mockAppendSafeOutput, { | ||
| add_labels: { max: 3 }, | ||
| }); | ||
|
|
||
| h.defaultHandler("add_labels")({ labels: ["a"] }); | ||
| h.defaultHandler("add_labels")({ labels: ["b"] }); | ||
| h.defaultHandler("add_labels")({ labels: ["c"] }); | ||
|
|
||
| let thrown; | ||
| try { | ||
| h.defaultHandler("add_labels")({ labels: ["d"] }); | ||
| } catch (err) { | ||
| thrown = err; | ||
| } | ||
|
|
||
| expect(thrown).toBeDefined(); | ||
| expect(thrown.code).toBe(-32602); | ||
| expect(thrown.message).toContain("add_labels"); | ||
| expect(thrown.message).toContain("3 of 3"); | ||
| expect(thrown.data).toMatchObject({ | ||
| constraint: "max", | ||
| type: "add_labels", | ||
| limit: 3, | ||
| }); | ||
| expect(thrown.data.guidance).toContain("add_labels"); | ||
| }); | ||
|
|
||
| it("counter does not increment when append throws (write error)", () => { | ||
| const h = createHandlers(mockServer, mockAppendSafeOutput, { | ||
| add_labels: { max: 2 }, | ||
| }); | ||
|
|
||
| // First call succeeds | ||
| h.defaultHandler("add_labels")({ labels: ["ok"] }); | ||
| expect(mockAppendSafeOutput).toHaveBeenCalledTimes(1); | ||
|
|
||
| // Second call: append throws a write error | ||
| mockAppendSafeOutput.mockImplementationOnce(() => { | ||
| throw new Error("disk write error"); | ||
| }); | ||
| expect(() => h.defaultHandler("add_labels")({ labels: ["fail"] })).toThrow("disk write error"); | ||
|
|
||
| // Counter is still 1 (not 2) because the failed write shouldn't count | ||
| // Third call should succeed (not hit limit) | ||
| expect(h.defaultHandler("add_labels")({ labels: ["ok2"] })).not.toHaveProperty("isError"); | ||
| expect(mockAppendSafeOutput).toHaveBeenCalledTimes(3); // call 1 + (failed) call 2 + call 3 | ||
| }); | ||
|
|
||
| it("each createHandlers() call gets a fresh independent counter", () => { | ||
| const config = { add_labels: { max: 1 } }; | ||
|
|
||
| const h1 = createHandlers(mockServer, mockAppendSafeOutput, config); | ||
| const h2 = createHandlers(mockServer, mockAppendSafeOutput, config); | ||
|
|
||
| h1.defaultHandler("add_labels")({ labels: ["a"] }); | ||
| // h1's budget is now exhausted — must throw | ||
| expect(() => h1.defaultHandler("add_labels")({ labels: ["b"] })).toThrow(expect.objectContaining({ code: -32602 })); | ||
|
|
||
| // h2 has its own fresh counter — should still allow 1 call | ||
| expect(h2.defaultHandler("add_labels")({ labels: ["a"] })).not.toHaveProperty("isError"); | ||
| }); | ||
| }); | ||
|
|
||
| describe("hasUpdatePullRequestFields", () => { | ||
| it("returns false for empty object", () => { | ||
| expect(hasUpdatePullRequestFields({})).toBe(false); | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
[/tdd]
max: 0is silently treated as unlimited — undocumented edge case.The
maxVal > 0guard causesgetExplicitMaxto returnnullformax: 0, which disables invocation-time enforcement for that type. This is surprising if a user setsmax: 0expecting to block a tool entirely. The downstream processor may handle it differently, creating an MCE4/MCE5 consistency gap.💡 Suggested options
Option A — Document the intent with an inline comment:
Option B — Add a test that explicitly asserts
max: 0is treated as unlimited:Either way, the current silent behavior should be intentional and documented.