diff --git a/actions/setup/js/copilot_sdk_driver.test.cjs b/actions/setup/js/copilot_sdk_driver.test.cjs index 06e47c350fa..ff1f3ade9d4 100644 --- a/actions/setup/js/copilot_sdk_driver.test.cjs +++ b/actions/setup/js/copilot_sdk_driver.test.cjs @@ -119,6 +119,76 @@ describe("copilot_sdk_driver.cjs", () => { expect(stop).toHaveBeenCalledTimes(1); }); + it("serializes tool.execution_start command details when available", async () => { + const disconnect = vi.fn().mockResolvedValue(undefined); + const stop = vi.fn().mockResolvedValue(undefined); + const stderrWriteSpy = vi.spyOn(process.stderr, "write").mockImplementation(() => true); + try { + let onEvent = () => {}; + const session = { + sessionId: "session-tool-start-command", + on: handler => { + onEvent = handler; + }, + sendAndWait: vi.fn().mockImplementation(async () => { + onEvent({ + type: "tool.execution_start", + ephemeral: false, + timestamp: new Date().toISOString(), + data: { + toolName: "bash", + mcpServerName: "terminal", + input: { command: "git status" }, + }, + }); + onEvent({ + type: "assistant.message", + ephemeral: false, + timestamp: new Date().toISOString(), + data: { content: "ok" }, + }); + return { data: { content: "ok" } }; + }), + disconnect, + }; + class FakeCopilotClient { + start = vi.fn().mockResolvedValue(undefined); + createSession = vi.fn().mockResolvedValue(session); + stop = stop; + } + + const result = await runWithCopilotSDK({ + sdkUri: "http://127.0.0.1:3002", + prompt: "test prompt", + logger: () => {}, + sdkModule: { + CopilotClient: FakeCopilotClient, + RuntimeConnection: { forUri: vi.fn(() => ({})) }, + approveAll: () => "allow", + }, + }); + + expect(result.exitCode).toBe(0); + const parsedEvents = stderrWriteSpy.mock.calls + .map(([message]) => { + if (typeof message !== "string" || !message.endsWith("\n")) return null; + try { + return JSON.parse(message.trimEnd()); + } catch { + return null; + } + }) + .filter(Boolean); + const startEvent = parsedEvents.find(event => event.type === "tool.execution_start"); + expect(startEvent).toMatchObject({ + type: "tool.execution_start", + data: { toolName: "bash", mcpServerName: "terminal", command: "git status" }, + }); + } finally { + stderrWriteSpy.mockRestore(); + } + }); + it("resolves exitCode 0 on SDK idle-timeout when output collected and all tool calls complete", async () => { // Regression test: when sendAndWait throws an idle-timeout error but the agent // produced output and all tool calls completed, the driver must return exitCode 0. diff --git a/actions/setup/js/copilot_sdk_session.cjs b/actions/setup/js/copilot_sdk_session.cjs index a2991741161..9f3129c43f0 100644 --- a/actions/setup/js/copilot_sdk_session.cjs +++ b/actions/setup/js/copilot_sdk_session.cjs @@ -9,7 +9,7 @@ * * Event mapping: * SDK "user.message" → JSONL "user.message" - * SDK "tool.execution_start" → JSONL "tool.execution_start" (toolName, mcpServerName) + * SDK "tool.execution_start" → JSONL "tool.execution_start" (toolName, mcpServerName, command?) * SDK "tool.execution_complete" → JSONL "tool.execution_complete" (toolName, mcpServerName, success, result) * SDK "assistant.message" → JSONL "assistant.message" (content) * @@ -29,6 +29,7 @@ const fs = require("fs"); const path = require("path"); const os = require("os"); const { buildCopilotSDKPermissionHandler, getEnvPositiveIntOrDefault, parseMaxToolDenialsLimit, MAX_TOOL_DENIALS_DEFAULT } = require("./copilot_sdk_permissions.cjs"); +const { extractShellCommandFromToolData } = require("./tool_call_details.cjs"); // Default timeout for a single sendAndWait call: 10 minutes. // This is intentionally generous — the headless Copilot CLI has its own internal @@ -256,10 +257,12 @@ async function runWithCopilotSDK({ sdkUri, prompt, logger, attempt = 0, model, c const toolName = event.data?.toolName ?? "unknown"; const mcpServerName = event.data?.mcpServerName ?? ""; const toolCallId = event.data?.toolCallId; + const command = extractShellCommandFromToolData(event.data); if (toolCallId) { pendingToolCalls.set(toolCallId, { toolName, mcpServerName }); } - writeEvent("tool.execution_start", { toolName, mcpServerName }, event.timestamp); + const eventData = command ? { toolName, mcpServerName, command } : { toolName, mcpServerName }; + writeEvent("tool.execution_start", eventData, event.timestamp); break; } diff --git a/actions/setup/js/handle_agent_failure.cjs b/actions/setup/js/handle_agent_failure.cjs index d9c4081842b..08d45e45dd0 100644 --- a/actions/setup/js/handle_agent_failure.cjs +++ b/actions/setup/js/handle_agent_failure.cjs @@ -16,6 +16,7 @@ const { formatAICCredits } = require("./daily_aic_workflow_helpers.cjs"); const { formatAIC } = require("./model_costs.cjs"); const { parseTokenUsageJsonl, generateTokenUsageSummary } = require("./parse_mcp_gateway_log.cjs"); const { readDedupedTokenUsage, TOKEN_USAGE_PATHS } = require("./parse_token_usage.cjs"); +const { extractShellCommandFromToolData } = require("./tool_call_details.cjs"); const fs = require("fs"); const os = require("os"); const path = require("path"); @@ -29,6 +30,9 @@ const DEFAULT_OTEL_JSONL_PATH = "/tmp/gh-aw/otel.jsonl"; const FAILURE_CATEGORIES_PATH = "/tmp/gh-aw/failure_categories.json"; const GITHUB_API_VERSION = "2022-11-28"; const COPILOT_SESSION_STATE_DIR = path.join(os.tmpdir(), "gh-aw", "sandbox", "agent", "logs", "copilot-session-state"); +const RECENT_TOOL_CALLS_WITH_COMMAND_PREVIEW = new Set(["bash", "shell"]); +const ELLIPSIS = "..."; +const ELLIPSIS_LENGTH = ELLIPSIS.length; // Engine-side 429/rate-limit signatures: // - HTTP 429 accompanied by "too many requests"/"rate limit" phrasing // - provider error codes like rate_limit_error / rate_limit_exceeded @@ -1174,6 +1178,48 @@ function normalizeDeniedPermissionCommand(command) { return cmd; } +/** + * Collapse tool call details to a compact single-line preview. + * @param {string} value + * @param {number} [maxLen] + * @returns {string} + */ +function normalizeToolCallPreview(value, maxLen = 120) { + const singleLine = String(value || "") + .replace(/`/g, "'") + .replace(/\s+/g, " ") + .trim(); + if (!singleLine) return ""; + if (singleLine.length <= maxLen) return singleLine; + return `${singleLine.slice(0, maxLen - ELLIPSIS_LENGTH)}${ELLIPSIS}`; +} + +/** + * Best-effort extraction of a shell command preview from a tool.execution_start payload. + * @param {Record} data + * @returns {string} + */ +function extractShellCommandPreview(data) { + return normalizeToolCallPreview(extractShellCommandFromToolData(data)); +} + +/** + * Format a compact display value for a recent tool call entry. + * @param {string} toolName + * @param {string} mcpServerName + * @param {Record} data + * @returns {string} + */ +function formatRecentToolCall(toolName, mcpServerName, data) { + const base = mcpServerName ? `${mcpServerName}.${toolName}` : toolName; + const normalizedToolName = typeof toolName === "string" ? toolName.toLowerCase() : ""; + if (!RECENT_TOOL_CALLS_WITH_COMMAND_PREVIEW.has(normalizedToolName)) { + return base; + } + const commandPreview = extractShellCommandPreview(data); + return commandPreview ? `${base}(${commandPreview})` : base; +} + /** * Load missing_tool messages from agent output. * Returns an empty array when the output file doesn't exist, cannot be parsed, or has no missing_tool items. @@ -1320,7 +1366,7 @@ function loadToolDenialsExceededEvents() { const toolName = typeof parsed.data.toolName === "string" ? parsed.data.toolName.trim() : ""; if (toolName) { const mcpServerName = typeof parsed.data.mcpServerName === "string" ? parsed.data.mcpServerName.trim() : ""; - recentToolCalls.push(mcpServerName ? `${mcpServerName}.${toolName}` : toolName); + recentToolCalls.push(formatRecentToolCall(toolName, mcpServerName, parsed.data)); if (recentToolCalls.length > 5) recentToolCalls.shift(); } continue; diff --git a/actions/setup/js/handle_agent_failure.test.cjs b/actions/setup/js/handle_agent_failure.test.cjs index e142fe5d1c5..372b10297e5 100644 --- a/actions/setup/js/handle_agent_failure.test.cjs +++ b/actions/setup/js/handle_agent_failure.test.cjs @@ -3272,6 +3272,68 @@ describe("handle_agent_failure", () => { }, ]); }); + + it("captures shell command details for recent bash tool calls", () => { + const sessionDir = path.join(os.tmpdir(), "gh-aw", "sandbox", "agent", "logs", "copilot-session-state", "session-1"); + fs.mkdirSync(sessionDir, { recursive: true }); + fs.writeFileSync( + path.join(sessionDir, "events.jsonl"), + [ + JSON.stringify({ + type: "tool.execution_start", + timestamp: "2026-06-06T00:00:00Z", + data: { toolName: "bash", mcpServerName: "terminal", command: "cd /home/runner/work/gh-aw/gh-aw && git diff --name-only" }, + }), + JSON.stringify({ + type: "guard.tool_denials_exceeded", + timestamp: "2026-06-06T00:00:01Z", + data: { denialCount: 5, threshold: 5, reason: "permission denied: bash" }, + }), + ].join("\n") + "\n" + ); + + const events = loadToolDenialsExceededEvents(); + expect(events).toEqual([ + { + denialCount: 5, + threshold: 5, + reason: "permission denied: bash", + recentToolCalls: ["terminal.bash(cd /home/runner/work/gh-aw/gh-aw && git diff --name-only)"], + timestamp: "2026-06-06T00:00:01Z", + }, + ]); + }); + + it("sanitizes backticks in shell command previews", () => { + const sessionDir = path.join(os.tmpdir(), "gh-aw", "sandbox", "agent", "logs", "copilot-session-state", "session-1"); + fs.mkdirSync(sessionDir, { recursive: true }); + fs.writeFileSync( + path.join(sessionDir, "events.jsonl"), + [ + JSON.stringify({ + type: "tool.execution_start", + timestamp: "2026-06-06T00:00:00Z", + data: { toolName: "bash", mcpServerName: "terminal", command: "echo `hostname` && echo ok" }, + }), + JSON.stringify({ + type: "guard.tool_denials_exceeded", + timestamp: "2026-06-06T00:00:01Z", + data: { denialCount: 5, threshold: 5, reason: "permission denied: bash" }, + }), + ].join("\n") + "\n" + ); + + const events = loadToolDenialsExceededEvents(); + expect(events).toEqual([ + { + denialCount: 5, + threshold: 5, + reason: "permission denied: bash", + recentToolCalls: ["terminal.bash(echo 'hostname' && echo ok)"], + timestamp: "2026-06-06T00:00:01Z", + }, + ]); + }); }); // ────────────────────────────────────────────────────── diff --git a/actions/setup/js/tool_call_details.cjs b/actions/setup/js/tool_call_details.cjs new file mode 100644 index 00000000000..dc51ed0c68d --- /dev/null +++ b/actions/setup/js/tool_call_details.cjs @@ -0,0 +1,37 @@ +// @ts-check + +/** + * Best-effort extraction of shell command text from a tool.execution_start payload. + * @param {any} data + * @returns {string} + */ +function extractShellCommandFromToolData(data) { + if (!data || typeof data !== "object") return ""; + // Priority order prefers top-level command-like fields emitted by tool wrappers, + // then object-shaped payloads used by MCP/SDK tool schemas. + /** @type {Array} */ + const commandFieldCandidates = []; + if ("command" in data) commandFieldCandidates.push(data.command); + if ("input" in data) commandFieldCandidates.push(data.input); + if ("arguments" in data) commandFieldCandidates.push(data.arguments); + if ("args" in data) commandFieldCandidates.push(data.args); + if ("toolInput" in data) commandFieldCandidates.push(data.toolInput); + if ("parameters" in data) commandFieldCandidates.push(data.parameters); + for (const candidate of commandFieldCandidates) { + if (typeof candidate === "string" && candidate.trim()) { + return candidate.trim(); + } + if (!candidate || typeof candidate !== "object") continue; + if (typeof candidate.command === "string" && candidate.command.trim()) { + return candidate.command.trim(); + } + if (typeof candidate.cmd === "string" && candidate.cmd.trim()) { + return candidate.cmd.trim(); + } + } + return ""; +} + +module.exports = { + extractShellCommandFromToolData, +};