-
Notifications
You must be signed in to change notification settings - Fork 424
Improve max-tool-denials report with recent shell call context #40506
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
ba53101
0483ae4
b1f478e
8100e51
4691208
eff70e4
8a619d8
2913fb4
5f011aa
286e649
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -16,6 +16,7 @@ const { formatAICCredits } = require("./daily_aic_workflow_helpers.cjs"); | |
| const { formatAIC } = require("./model_costs.cjs"); | ||
| const { parseTokenUsageJsonl, generateTokenUsageSummary } = require("./parse_mcp_gateway_log.cjs"); | ||
| const { readDedupedTokenUsage, TOKEN_USAGE_PATHS } = require("./parse_token_usage.cjs"); | ||
| const { extractShellCommandFromToolData } = require("./tool_call_details.cjs"); | ||
| const fs = require("fs"); | ||
| const os = require("os"); | ||
| const path = require("path"); | ||
|
|
@@ -29,6 +30,9 @@ const DEFAULT_OTEL_JSONL_PATH = "/tmp/gh-aw/otel.jsonl"; | |
| const FAILURE_CATEGORIES_PATH = "/tmp/gh-aw/failure_categories.json"; | ||
| const GITHUB_API_VERSION = "2022-11-28"; | ||
| const COPILOT_SESSION_STATE_DIR = path.join(os.tmpdir(), "gh-aw", "sandbox", "agent", "logs", "copilot-session-state"); | ||
| const RECENT_TOOL_CALLS_WITH_COMMAND_PREVIEW = new Set(["bash", "shell"]); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [/grill-with-docs] A single-line comment stating the intent — "tools that accept a free-form command string" — would help maintainers decide whether to extend the set without guessing. 💡 Suggested comment// Tools that accept a free-form shell command string and should
// show a command preview in denial reports. Add new shell-like
// MCP server tool names here (lowercase).
const RECENT_TOOL_CALLS_WITH_COMMAND_PREVIEW = new Set(["bash", "shell"]); |
||
| const ELLIPSIS = "..."; | ||
| const ELLIPSIS_LENGTH = ELLIPSIS.length; | ||
| // Engine-side 429/rate-limit signatures: | ||
| // - HTTP 429 accompanied by "too many requests"/"rate limit" phrasing | ||
| // - provider error codes like rate_limit_error / rate_limit_exceeded | ||
|
|
@@ -1174,6 +1178,48 @@ function normalizeDeniedPermissionCommand(command) { | |
| return cmd; | ||
| } | ||
|
|
||
| /** | ||
| * Collapse tool call details to a compact single-line preview. | ||
| * @param {string} value | ||
| * @param {number} [maxLen] | ||
| * @returns {string} | ||
| */ | ||
| function normalizeToolCallPreview(value, maxLen = 120) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [/grill-with-docs] The 💡 Suggested change// At the top of the file (or in tool_call_details.cjs)
const TOOL_CALL_PREVIEW_MAX_LEN = 120;
// Then:
function normalizeToolCallPreview(value, maxLen = TOOL_CALL_PREVIEW_MAX_LEN) {This makes the constant visible in grep/audit and easy to tune in one place. |
||
| const singleLine = String(value || "") | ||
| .replace(/`/g, "'") | ||
| .replace(/\s+/g, " ") | ||
| .trim(); | ||
| if (!singleLine) return ""; | ||
| if (singleLine.length <= maxLen) return singleLine; | ||
| return `${singleLine.slice(0, maxLen - ELLIPSIS_LENGTH)}${ELLIPSIS}`; | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Negative slice index when 💡 Suggested fixAdd a guard before the slice: if (maxLen <= ELLIPSIS_LENGTH) return singleLine.slice(0, maxLen);
return `${singleLine.slice(0, maxLen - ELLIPSIS_LENGTH)}${ELLIPSIS}`;Currently only called with the default |
||
| } | ||
|
Comment on lines
+1187
to
+1195
|
||
|
|
||
| /** | ||
| * Best-effort extraction of a shell command preview from a tool.execution_start payload. | ||
| * @param {Record<string, any>} data | ||
| * @returns {string} | ||
| */ | ||
| function extractShellCommandPreview(data) { | ||
| return normalizeToolCallPreview(extractShellCommandFromToolData(data)); | ||
| } | ||
|
|
||
| /** | ||
| * Format a compact display value for a recent tool call entry. | ||
| * @param {string} toolName | ||
| * @param {string} mcpServerName | ||
| * @param {Record<string, any>} data | ||
| * @returns {string} | ||
| */ | ||
| function formatRecentToolCall(toolName, mcpServerName, data) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [/improve-codebase-architecture] This splits "what is a tool call" logic across two files. When a third call site needs formatted previews (e.g., OTEL trace rendering), the author may re-implement rather than reuse. 💡 Suggested moveExport |
||
| const base = mcpServerName ? `${mcpServerName}.${toolName}` : toolName; | ||
| const normalizedToolName = typeof toolName === "string" ? toolName.toLowerCase() : ""; | ||
| if (!RECENT_TOOL_CALLS_WITH_COMMAND_PREVIEW.has(normalizedToolName)) { | ||
| return base; | ||
| } | ||
| const commandPreview = extractShellCommandPreview(data); | ||
| return commandPreview ? `${base}(${commandPreview})` : base; | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Backtick injection corrupts markdown rendering: 💡 Suggested fixEscape backticks in the preview before returning, so the rendered output stays inside the code span: function formatRecentToolCall(toolName, mcpServerName, data) {
const base = mcpServerName ? `${mcpServerName}.${toolName}` : toolName;
const normalizedToolName = typeof toolName === "string" ? toolName.toLowerCase() : "";
if (!RECENT_TOOL_CALLS_WITH_COMMAND_PREVIEW.has(normalizedToolName)) return base;
const commandPreview = extractShellCommandPreview(data).replace(/`/g, "'");
return commandPreview ? `${base}(${commandPreview})` : base;
}Alternatively, render A test using a command like |
||
| } | ||
|
|
||
| /** | ||
| * Load missing_tool messages from agent output. | ||
| * Returns an empty array when the output file doesn't exist, cannot be parsed, or has no missing_tool items. | ||
|
|
@@ -1320,7 +1366,7 @@ function loadToolDenialsExceededEvents() { | |
| const toolName = typeof parsed.data.toolName === "string" ? parsed.data.toolName.trim() : ""; | ||
| if (toolName) { | ||
| const mcpServerName = typeof parsed.data.mcpServerName === "string" ? parsed.data.mcpServerName.trim() : ""; | ||
| recentToolCalls.push(mcpServerName ? `${mcpServerName}.${toolName}` : toolName); | ||
| recentToolCalls.push(formatRecentToolCall(toolName, mcpServerName, parsed.data)); | ||
| if (recentToolCalls.length > 5) recentToolCalls.shift(); | ||
| } | ||
| continue; | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -3272,6 +3272,68 @@ describe("handle_agent_failure", () => { | |
| }, | ||
| ]); | ||
| }); | ||
|
|
||
| it("captures shell command details for recent bash tool calls", () => { | ||
| const sessionDir = path.join(os.tmpdir(), "gh-aw", "sandbox", "agent", "logs", "copilot-session-state", "session-1"); | ||
| fs.mkdirSync(sessionDir, { recursive: true }); | ||
| fs.writeFileSync( | ||
| path.join(sessionDir, "events.jsonl"), | ||
| [ | ||
| JSON.stringify({ | ||
| type: "tool.execution_start", | ||
| timestamp: "2026-06-06T00:00:00Z", | ||
| data: { toolName: "bash", mcpServerName: "terminal", command: "cd /home/runner/work/gh-aw/gh-aw && git diff --name-only" }, | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [/tdd] The test fixture pre-bakes If 💡 Suggested additional fixtureAdd a second |
||
| }), | ||
| JSON.stringify({ | ||
| type: "guard.tool_denials_exceeded", | ||
| timestamp: "2026-06-06T00:00:01Z", | ||
| data: { denialCount: 5, threshold: 5, reason: "permission denied: bash" }, | ||
| }), | ||
| ].join("\n") + "\n" | ||
| ); | ||
|
|
||
| const events = loadToolDenialsExceededEvents(); | ||
| expect(events).toEqual([ | ||
| { | ||
| denialCount: 5, | ||
| threshold: 5, | ||
| reason: "permission denied: bash", | ||
| recentToolCalls: ["terminal.bash(cd /home/runner/work/gh-aw/gh-aw && git diff --name-only)"], | ||
| timestamp: "2026-06-06T00:00:01Z", | ||
| }, | ||
| ]); | ||
| }); | ||
|
|
||
| it("sanitizes backticks in shell command previews", () => { | ||
| const sessionDir = path.join(os.tmpdir(), "gh-aw", "sandbox", "agent", "logs", "copilot-session-state", "session-1"); | ||
| fs.mkdirSync(sessionDir, { recursive: true }); | ||
| fs.writeFileSync( | ||
| path.join(sessionDir, "events.jsonl"), | ||
| [ | ||
| JSON.stringify({ | ||
| type: "tool.execution_start", | ||
| timestamp: "2026-06-06T00:00:00Z", | ||
| data: { toolName: "bash", mcpServerName: "terminal", command: "echo `hostname` && echo ok" }, | ||
| }), | ||
| JSON.stringify({ | ||
| type: "guard.tool_denials_exceeded", | ||
| timestamp: "2026-06-06T00:00:01Z", | ||
| data: { denialCount: 5, threshold: 5, reason: "permission denied: bash" }, | ||
| }), | ||
| ].join("\n") + "\n" | ||
| ); | ||
|
|
||
| const events = loadToolDenialsExceededEvents(); | ||
| expect(events).toEqual([ | ||
| { | ||
| denialCount: 5, | ||
| threshold: 5, | ||
| reason: "permission denied: bash", | ||
| recentToolCalls: ["terminal.bash(echo 'hostname' && echo ok)"], | ||
| timestamp: "2026-06-06T00:00:01Z", | ||
| }, | ||
| ]); | ||
| }); | ||
| }); | ||
|
|
||
| // ────────────────────────────────────────────────────── | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,37 @@ | ||
| // @ts-check | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Missing |
||
|
|
||
| /** | ||
| * Best-effort extraction of shell command text from a tool.execution_start payload. | ||
| * @param {any} data | ||
| * @returns {string} | ||
| */ | ||
| function extractShellCommandFromToolData(data) { | ||
| if (!data || typeof data !== "object") return ""; | ||
| // Priority order prefers top-level command-like fields emitted by tool wrappers, | ||
| // then object-shaped payloads used by MCP/SDK tool schemas. | ||
| /** @type {Array<any>} */ | ||
| const commandFieldCandidates = []; | ||
| if ("command" in data) commandFieldCandidates.push(data.command); | ||
| if ("input" in data) commandFieldCandidates.push(data.input); | ||
| if ("arguments" in data) commandFieldCandidates.push(data.arguments); | ||
| if ("args" in data) commandFieldCandidates.push(data.args); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [/tdd] If 💡 Suggested fixAdd an explicit array branch in the loop: if (Array.isArray(candidate)) {
// Some tools pass args as [flag, command_string]; try the last string element
const last = [...candidate].reverse().find(x => typeof x === "string" && x.trim());
if (last) return last.trim();
continue;
}Or document in the JSDoc which array shapes are intentionally out of scope.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Array-shaped 💡 Suggested fixEither handle the array case explicitly, or clarify in the JSDoc that only if (Array.isArray(candidate)) {
const joined = candidate.filter(s => typeof s === "string").join(" ").trim();
if (joined) return joined;
continue;
}As-is, consumers passing |
||
| if ("toolInput" in data) commandFieldCandidates.push(data.toolInput); | ||
| if ("parameters" in data) commandFieldCandidates.push(data.parameters); | ||
| for (const candidate of commandFieldCandidates) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [/tdd] The multi-candidate fallback chain has no dedicated unit tests — bugs in specific paths (e.g., Both call sites rely on this function silently returning 💡 Suggested `tool_call_details.test.cjs` casesdescribe("extractShellCommandFromToolData", () => {
it("returns top-level command string", () =>
expect(fn({ command: "ls" })).toBe("ls"));
it("extracts from input.command (MCP schema)", () =>
expect(fn({ input: { command: "git status" } })).toBe("git status"));
it("extracts from toolInput.cmd", () =>
expect(fn({ toolInput: { cmd: "make test" } })).toBe("make test"));
it("prefers command over input when both present", () =>
expect(fn({ command: "first", input: { command: "second" } })).toBe("first"));
it("returns empty string when no match", () =>
expect(fn({ workingDir: "/tmp" })).toBe(""));
it("returns empty string for null input", () =>
expect(fn(null)).toBe(""));
}); |
||
| if (typeof candidate === "string" && candidate.trim()) { | ||
| return candidate.trim(); | ||
| } | ||
| if (!candidate || typeof candidate !== "object") continue; | ||
| if (typeof candidate.command === "string" && candidate.command.trim()) { | ||
| return candidate.command.trim(); | ||
| } | ||
| if (typeof candidate.cmd === "string" && candidate.cmd.trim()) { | ||
| return candidate.cmd.trim(); | ||
| } | ||
| } | ||
| return ""; | ||
| } | ||
|
|
||
| module.exports = { | ||
| extractShellCommandFromToolData, | ||
| }; | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
[/tdd] The test verifies the happy path (command present → field added), but not the absence path: when a non-shell tool fires
tool.execution_startwith no command data, the JSONL event must not get acommandfield.The
if (command) eventData.command = commandconditional incopilot_sdk_session.cjsis untested for the falsy case, so a future refactor that always writescommand: ""would go undetected.💡 Suggested addition
Add a parallel
itblock withtoolName: "read_file"andinput: {}, then assert that the capturedtool.execution_startevent has nocommandkey: