github · pelikhan · Jun 20, 2026 · Jun 20, 2026 · Jun 20, 2026 · Jun 20, 2026
diff --git a/actions/setup/js/copilot_sdk_driver.test.cjs b/actions/setup/js/copilot_sdk_driver.test.cjs
@@ -119,6 +119,76 @@ describe("copilot_sdk_driver.cjs", () => {
       expect(stop).toHaveBeenCalledTimes(1);
     });
 
+    it("serializes tool.execution_start command details when available", async () => {
+      const disconnect = vi.fn().mockResolvedValue(undefined);
+      const stop = vi.fn().mockResolvedValue(undefined);
+      const stderrWriteSpy = vi.spyOn(process.stderr, "write").mockImplementation(() => true);
+      try {
+        let onEvent = () => {};
+        const session = {
+          sessionId: "session-tool-start-command",
+          on: handler => {
+            onEvent = handler;
+          },
+          sendAndWait: vi.fn().mockImplementation(async () => {
+            onEvent({
+              type: "tool.execution_start",
+              ephemeral: false,
+              timestamp: new Date().toISOString(),
+              data: {
+                toolName: "bash",
+                mcpServerName: "terminal",
+                input: { command: "git status" },
+              },
+            });
+            onEvent({
+              type: "assistant.message",
+              ephemeral: false,
+              timestamp: new Date().toISOString(),
+              data: { content: "ok" },
+            });
+            return { data: { content: "ok" } };
+          }),
+          disconnect,
+        };
+        class FakeCopilotClient {
+          start = vi.fn().mockResolvedValue(undefined);
+          createSession = vi.fn().mockResolvedValue(session);
+          stop = stop;
+        }
+
+        const result = await runWithCopilotSDK({
+          sdkUri: "http://127.0.0.1:3002",
+          prompt: "test prompt",
+          logger: () => {},
+          sdkModule: {
+            CopilotClient: FakeCopilotClient,
+            RuntimeConnection: { forUri: vi.fn(() => ({})) },
+            approveAll: () => "allow",
+          },
+        });
+
+        expect(result.exitCode).toBe(0);
+        const parsedEvents = stderrWriteSpy.mock.calls
+          .map(([message]) => {
+            if (typeof message !== "string" || !message.endsWith("\n")) return null;
+            try {
+              return JSON.parse(message.trimEnd());
+            } catch {
+              return null;
+            }
+          })
+          .filter(Boolean);
+        const startEvent = parsedEvents.find(event => event.type === "tool.execution_start");
+        expect(startEvent).toMatchObject({
+          type: "tool.execution_start",
+          data: { toolName: "bash", mcpServerName: "terminal", command: "git status" },
+        });
+      } finally {
+        stderrWriteSpy.mockRestore();
+      }
+    });
+
     it("resolves exitCode 0 on SDK idle-timeout when output collected and all tool calls complete", async () => {
       // Regression test: when sendAndWait throws an idle-timeout error but the agent
       // produced output and all tool calls completed, the driver must return exitCode 0.

diff --git a/actions/setup/js/copilot_sdk_session.cjs b/actions/setup/js/copilot_sdk_session.cjs
@@ -9,7 +9,7 @@
  *
  * Event mapping:
  *   SDK "user.message"            → JSONL "user.message"
- *   SDK "tool.execution_start"    → JSONL "tool.execution_start"  (toolName, mcpServerName)
+ *   SDK "tool.execution_start"    → JSONL "tool.execution_start"  (toolName, mcpServerName, command?)
  *   SDK "tool.execution_complete" → JSONL "tool.execution_complete" (toolName, mcpServerName, success, result)
  *   SDK "assistant.message"       → JSONL "assistant.message"     (content)
  *
@@ -29,6 +29,7 @@ const fs = require("fs");
 const path = require("path");
 const os = require("os");
 const { buildCopilotSDKPermissionHandler, getEnvPositiveIntOrDefault, parseMaxToolDenialsLimit, MAX_TOOL_DENIALS_DEFAULT } = require("./copilot_sdk_permissions.cjs");
+const { extractShellCommandFromToolData } = require("./tool_call_details.cjs");
 
 // Default timeout for a single sendAndWait call: 10 minutes.
 // This is intentionally generous — the headless Copilot CLI has its own internal
@@ -256,10 +257,12 @@ async function runWithCopilotSDK({ sdkUri, prompt, logger, attempt = 0, model, c
           const toolName = event.data?.toolName ?? "unknown";
           const mcpServerName = event.data?.mcpServerName ?? "";
           const toolCallId = event.data?.toolCallId;
+          const command = extractShellCommandFromToolData(event.data);
           if (toolCallId) {
             pendingToolCalls.set(toolCallId, { toolName, mcpServerName });
           }
-          writeEvent("tool.execution_start", { toolName, mcpServerName }, event.timestamp);
+          const eventData = command ? { toolName, mcpServerName, command } : { toolName, mcpServerName };
+          writeEvent("tool.execution_start", eventData, event.timestamp);
           break;
         }
 

diff --git a/actions/setup/js/handle_agent_failure.cjs b/actions/setup/js/handle_agent_failure.cjs
@@ -16,6 +16,7 @@ const { formatAICCredits } = require("./daily_aic_workflow_helpers.cjs");
 const { formatAIC } = require("./model_costs.cjs");
 const { parseTokenUsageJsonl, generateTokenUsageSummary } = require("./parse_mcp_gateway_log.cjs");
 const { readDedupedTokenUsage, TOKEN_USAGE_PATHS } = require("./parse_token_usage.cjs");
+const { extractShellCommandFromToolData } = require("./tool_call_details.cjs");
 const fs = require("fs");
 const os = require("os");
 const path = require("path");
@@ -29,6 +30,9 @@ const DEFAULT_OTEL_JSONL_PATH = "/tmp/gh-aw/otel.jsonl";
 const FAILURE_CATEGORIES_PATH = "/tmp/gh-aw/failure_categories.json";
 const GITHUB_API_VERSION = "2022-11-28";
 const COPILOT_SESSION_STATE_DIR = path.join(os.tmpdir(), "gh-aw", "sandbox", "agent", "logs", "copilot-session-state");
+const RECENT_TOOL_CALLS_WITH_COMMAND_PREVIEW = new Set(["bash", "shell"]);
+const ELLIPSIS = "...";
+const ELLIPSIS_LENGTH = ELLIPSIS.length;
 // Engine-side 429/rate-limit signatures:
 // - HTTP 429 accompanied by "too many requests"/"rate limit" phrasing
 // - provider error codes like rate_limit_error / rate_limit_exceeded
@@ -1174,6 +1178,48 @@ function normalizeDeniedPermissionCommand(command) {
   return cmd;
 }
 
+/**
+ * Collapse tool call details to a compact single-line preview.
+ * @param {string} value
+ * @param {number} [maxLen]
+ * @returns {string}
+ */
+function normalizeToolCallPreview(value, maxLen = 120) {
+  const singleLine = String(value || "")
+    .replace(/`/g, "'")
+    .replace(/\s+/g, " ")
+    .trim();
+  if (!singleLine) return "";
+  if (singleLine.length <= maxLen) return singleLine;
+  return `${singleLine.slice(0, maxLen - ELLIPSIS_LENGTH)}${ELLIPSIS}`;
+}
+
+/**
+ * Best-effort extraction of a shell command preview from a tool.execution_start payload.
+ * @param {Record<string, any>} data
+ * @returns {string}
+ */
+function extractShellCommandPreview(data) {
+  return normalizeToolCallPreview(extractShellCommandFromToolData(data));
+}
+
+/**
+ * Format a compact display value for a recent tool call entry.
+ * @param {string} toolName
+ * @param {string} mcpServerName
+ * @param {Record<string, any>} data
+ * @returns {string}
+ */
+function formatRecentToolCall(toolName, mcpServerName, data) {
+  const base = mcpServerName ? `${mcpServerName}.${toolName}` : toolName;
+  const normalizedToolName = typeof toolName === "string" ? toolName.toLowerCase() : "";
+  if (!RECENT_TOOL_CALLS_WITH_COMMAND_PREVIEW.has(normalizedToolName)) {
+    return base;
+  }
+  const commandPreview = extractShellCommandPreview(data);
+  return commandPreview ? `${base}(${commandPreview})` : base;
+}
+
 /**
  * Load missing_tool messages from agent output.
  * Returns an empty array when the output file doesn't exist, cannot be parsed, or has no missing_tool items.
@@ -1320,7 +1366,7 @@ function loadToolDenialsExceededEvents() {
             const toolName = typeof parsed.data.toolName === "string" ? parsed.data.toolName.trim() : "";
             if (toolName) {
               const mcpServerName = typeof parsed.data.mcpServerName === "string" ? parsed.data.mcpServerName.trim() : "";
-              recentToolCalls.push(mcpServerName ? `${mcpServerName}.${toolName}` : toolName);
+              recentToolCalls.push(formatRecentToolCall(toolName, mcpServerName, parsed.data));
               if (recentToolCalls.length > 5) recentToolCalls.shift();
             }
             continue;

diff --git a/actions/setup/js/handle_agent_failure.test.cjs b/actions/setup/js/handle_agent_failure.test.cjs
@@ -3272,6 +3272,68 @@ describe("handle_agent_failure", () => {
         },
       ]);
     });
+
+    it("captures shell command details for recent bash tool calls", () => {
+      const sessionDir = path.join(os.tmpdir(), "gh-aw", "sandbox", "agent", "logs", "copilot-session-state", "session-1");
+      fs.mkdirSync(sessionDir, { recursive: true });
+      fs.writeFileSync(
+        path.join(sessionDir, "events.jsonl"),
+        [
+          JSON.stringify({
+            type: "tool.execution_start",
+            timestamp: "2026-06-06T00:00:00Z",
+            data: { toolName: "bash", mcpServerName: "terminal", command: "cd /home/runner/work/gh-aw/gh-aw && git diff --name-only" },
+          }),
+          JSON.stringify({
+            type: "guard.tool_denials_exceeded",
+            timestamp: "2026-06-06T00:00:01Z",
+            data: { denialCount: 5, threshold: 5, reason: "permission denied: bash" },
+          }),
+        ].join("\n") + "\n"
+      );
+
+      const events = loadToolDenialsExceededEvents();
+      expect(events).toEqual([
+        {
+          denialCount: 5,
+          threshold: 5,
+          reason: "permission denied: bash",
+          recentToolCalls: ["terminal.bash(cd /home/runner/work/gh-aw/gh-aw && git diff --name-only)"],
+          timestamp: "2026-06-06T00:00:01Z",
+        },
+      ]);
+    });
+
+    it("sanitizes backticks in shell command previews", () => {
+      const sessionDir = path.join(os.tmpdir(), "gh-aw", "sandbox", "agent", "logs", "copilot-session-state", "session-1");
+      fs.mkdirSync(sessionDir, { recursive: true });
+      fs.writeFileSync(
+        path.join(sessionDir, "events.jsonl"),
+        [
+          JSON.stringify({
+            type: "tool.execution_start",
+            timestamp: "2026-06-06T00:00:00Z",
+            data: { toolName: "bash", mcpServerName: "terminal", command: "echo `hostname` && echo ok" },
+          }),
+          JSON.stringify({
+            type: "guard.tool_denials_exceeded",
+            timestamp: "2026-06-06T00:00:01Z",
+            data: { denialCount: 5, threshold: 5, reason: "permission denied: bash" },
+          }),
+        ].join("\n") + "\n"
+      );
+
+      const events = loadToolDenialsExceededEvents();
+      expect(events).toEqual([
+        {
+          denialCount: 5,
+          threshold: 5,
+          reason: "permission denied: bash",
+          recentToolCalls: ["terminal.bash(echo 'hostname' && echo ok)"],
+          timestamp: "2026-06-06T00:00:01Z",
+        },
+      ]);
+    });
   });
 
   // ──────────────────────────────────────────────────────

diff --git a/actions/setup/js/tool_call_details.cjs b/actions/setup/js/tool_call_details.cjs
@@ -0,0 +1,37 @@
+// @ts-check
+
+/**
+ * Best-effort extraction of shell command text from a tool.execution_start payload.
+ * @param {any} data
+ * @returns {string}
+ */
+function extractShellCommandFromToolData(data) {
+  if (!data || typeof data !== "object") return "";
+  // Priority order prefers top-level command-like fields emitted by tool wrappers,
+  // then object-shaped payloads used by MCP/SDK tool schemas.
+  /** @type {Array<any>} */
+  const commandFieldCandidates = [];
+  if ("command" in data) commandFieldCandidates.push(data.command);
+  if ("input" in data) commandFieldCandidates.push(data.input);
+  if ("arguments" in data) commandFieldCandidates.push(data.arguments);
+  if ("args" in data) commandFieldCandidates.push(data.args);
+  if ("toolInput" in data) commandFieldCandidates.push(data.toolInput);
+  if ("parameters" in data) commandFieldCandidates.push(data.parameters);
+  for (const candidate of commandFieldCandidates) {
+    if (typeof candidate === "string" && candidate.trim()) {
+      return candidate.trim();
+    }
+    if (!candidate || typeof candidate !== "object") continue;
+    if (typeof candidate.command === "string" && candidate.command.trim()) {
+      return candidate.command.trim();
+    }
+    if (typeof candidate.cmd === "string" && candidate.cmd.trim()) {
+      return candidate.cmd.trim();
+    }
+  }
+  return "";
+}
+
+module.exports = {
+  extractShellCommandFromToolData,
+};