diff --git a/actions/setup/js/copilot_sdk_driver.test.cjs b/actions/setup/js/copilot_sdk_driver.test.cjs index fd88ddcecd1..19ee5a1e8f9 100644 --- a/actions/setup/js/copilot_sdk_driver.test.cjs +++ b/actions/setup/js/copilot_sdk_driver.test.cjs @@ -646,6 +646,88 @@ describe("copilot_sdk_driver.cjs", () => { } } }); + + it("writes session.shutdown event to events.jsonl when session emits model metrics", async () => { + const stop = vi.fn().mockResolvedValue(undefined); + const stderrWriteSpy = vi.spyOn(process.stderr, "write").mockImplementation(() => true); + try { + let onEvent = () => {}; + const disconnect = vi.fn().mockImplementation(async () => { + // Emit session.shutdown synchronously during disconnect to simulate SDK behaviour. + onEvent({ + type: "session.shutdown", + ephemeral: false, + timestamp: new Date().toISOString(), + data: { + modelMetrics: { + "claude-sonnet-4.6": { + usage: { inputTokens: 1000, outputTokens: 200, cacheReadTokens: 500, cacheWriteTokens: 0 }, + }, + }, + currentModel: "claude-sonnet-4.6", + }, + }); + }); + const session = { + sessionId: "session-shutdown-metrics", + on: handler => { + onEvent = handler; + }, + sendAndWait: vi.fn().mockImplementation(async () => { + onEvent({ + type: "assistant.message", + ephemeral: false, + timestamp: new Date().toISOString(), + data: { content: "done" }, + }); + return { data: { content: "done" } }; + }), + disconnect, + }; + class FakeCopilotClient { + start = vi.fn().mockResolvedValue(undefined); + createSession = vi.fn().mockResolvedValue(session); + stop = stop; + } + + const result = await runWithCopilotSDK({ + sdkUri: "http://127.0.0.1:3002", + prompt: "test prompt", + logger: () => {}, + sdkModule: { + CopilotClient: FakeCopilotClient, + RuntimeConnection: { forUri: vi.fn(() => ({})) }, + approveAll: () => "allow", + }, + }); + + expect(result.exitCode).toBe(0); + expect(disconnect).toHaveBeenCalledTimes(1); + + // The session.shutdown entry must be written to stderr (and thus events.jsonl). + const stderrLines = stderrWriteSpy.mock.calls + .map(([message]) => { + if (typeof message !== "string" || !message.endsWith("\n")) return null; + try { + return JSON.parse(message.trimEnd()); + } catch { + return null; + } + }) + .filter(Boolean); + + const shutdownEntry = stderrLines.find(e => e.type === "session.shutdown"); + expect(shutdownEntry).toBeDefined(); + expect(shutdownEntry.data.modelMetrics).toEqual({ + "claude-sonnet-4.6": { + usage: { inputTokens: 1000, outputTokens: 200, cacheReadTokens: 500, cacheWriteTokens: 0 }, + }, + }); + expect(shutdownEntry.data.currentModel).toBe("claude-sonnet-4.6"); + } finally { + stderrWriteSpy.mockRestore(); + } + }); }); describe("parsePermissionConfigFromServerArgs", () => { diff --git a/actions/setup/js/copilot_sdk_session.cjs b/actions/setup/js/copilot_sdk_session.cjs index c8510abb884..23936c2abd1 100644 --- a/actions/setup/js/copilot_sdk_session.cjs +++ b/actions/setup/js/copilot_sdk_session.cjs @@ -138,6 +138,8 @@ async function runWithCopilotSDK({ sdkUri, prompt, logger, attempt = 0, model, c let toolDenialCount = 0; let catastrophicToolDenialsError = null; let catastrophicToolDenialsTriggered = false; + /** @type {{ modelMetrics: Record; currentModel?: string } | null} */ + let capturedShutdownData = null; /** * Best-effort write of a driver-level event to events.jsonl and stderr. @@ -277,6 +279,18 @@ async function runWithCopilotSDK({ sdkUri, prompt, logger, attempt = 0, model, c break; } + case "session.shutdown": { + // Capture model metrics for AIC computation. + // The event fires when the session disconnects, so data is stored in a + // variable and written to the stream in the finally block before closing. + const modelMetrics = event.data?.modelMetrics; + const currentModel = event.data?.currentModel; + if (modelMetrics && typeof modelMetrics === "object" && Object.keys(modelMetrics).length > 0) { + capturedShutdownData = { modelMetrics, ...(currentModel ? { currentModel } : {}) }; + } + break; + } + default: // Other event types are not consumed by unified_timeline.cjs; skip them. break; @@ -318,9 +332,12 @@ async function runWithCopilotSDK({ sdkUri, prompt, logger, attempt = 0, model, c } finally { // Snapshot for null-safe cleanup in this scope. const stream = eventsStream; - if (stream) { - await new Promise(resolve => stream.end(resolve)); - } + // Disconnect the session first so that the session.shutdown event fires + // and capturedShutdownData is populated before we write to the stream. + // The Copilot SDK emits session.shutdown synchronously inside disconnect() + // (standard Node.js EventEmitter semantics), so capturedShutdownData is + // populated before the await resolves. If the SDK ever changes to async + // emission the write is simply skipped — no data is lost. if (session) { try { await session.disconnect(); @@ -328,6 +345,17 @@ async function runWithCopilotSDK({ sdkUri, prompt, logger, attempt = 0, model, c // best-effort cleanup } } + // Write captured session.shutdown data (model metrics) to events.jsonl + // so that log_parser_bootstrap.cjs can compute AIC from it. + // Only the last shutdown event is used; a session emits exactly one. + if (stream && capturedShutdownData) { + const shutdownEntry = JSON.stringify({ type: "session.shutdown", timestamp: new Date().toISOString(), data: capturedShutdownData }) + "\n"; + stream.write(shutdownEntry); + process.stderr.write(shutdownEntry); + } + if (stream) { + await new Promise(resolve => stream.end(resolve)); + } if (clientStarted) { try { await client.stop(); diff --git a/actions/setup/js/log_parser_bootstrap.cjs b/actions/setup/js/log_parser_bootstrap.cjs index 99a06b93de1..a15c863711b 100644 --- a/actions/setup/js/log_parser_bootstrap.cjs +++ b/actions/setup/js/log_parser_bootstrap.cjs @@ -4,6 +4,7 @@ const { generatePlainTextSummary, generateCopilotCliStyleSummary, wrapAgentLogInSection, formatSafeOutputsPreview } = require("./log_parser_shared.cjs"); const { getErrorMessage } = require("./error_helpers.cjs"); const { ERR_API, ERR_CONFIG, ERR_VALIDATION } = require("./error_codes.cjs"); +const { computeInferenceAIC } = require("./model_costs.cjs"); /** * Bootstrap helper for log parser entry points. @@ -160,6 +161,36 @@ async function runLogParser(options) { } } + // For copilot-sdk runs, inference bypasses the AWF API proxy so token-usage.jsonl + // contains no pricing-eligible entries. Compute AIC here from the modelMetrics + // captured in the session.shutdown SDK event and export it so that the subsequent + // parse-mcp-gateway step can propagate it as the job-level `aic` output. + if (logEntries && Array.isArray(logEntries)) { + const resultEntry = logEntries.find(entry => entry.type === "result"); + const modelMetrics = resultEntry?.modelMetrics; + if (modelMetrics && typeof modelMetrics === "object") { + let totalAIC = 0; + for (const [modelName, metrics] of Object.entries(modelMetrics)) { + const usage = metrics?.usage; + if (!usage) continue; + totalAIC += computeInferenceAIC({ + provider: "github-copilot", + model: modelName, + inputTokens: usage.inputTokens || 0, + outputTokens: usage.outputTokens || 0, + cacheReadTokens: usage.cacheReadTokens || 0, + cacheWriteTokens: usage.cacheWriteTokens || 0, + reasoningTokens: usage.reasoningTokens || 0, + }); + } + if (totalAIC > 0) { + const roundedAIC = totalAIC.toFixed(3); + core.exportVariable("GH_AW_AIC", roundedAIC); + process.stderr.write(JSON.stringify({ level: "info", message: `AI Credits (copilot-sdk): ${roundedAIC}` }) + "\n"); + } + } + } + if (markdown) { // Generate lightweight plain text summary for core.info and Copilot CLI style for step summary if (logEntries && Array.isArray(logEntries) && logEntries.length > 0) { diff --git a/actions/setup/js/log_parser_bootstrap.test.cjs b/actions/setup/js/log_parser_bootstrap.test.cjs index cf7a7e5403a..4beb42e4acf 100644 --- a/actions/setup/js/log_parser_bootstrap.test.cjs +++ b/actions/setup/js/log_parser_bootstrap.test.cjs @@ -237,6 +237,63 @@ describe("log_parser_bootstrap.cjs", () => { (fs.writeFileSync(logFile, "content"), (process.env.GH_AW_AGENT_OUTPUT = logFile), (process.env.GH_AW_SAFE_OUTPUTS = "/non/existent/file.jsonl")); const mockParseLog = vi.fn().mockReturnValue({ markdown: "## Result\n", mcpFailures: [], maxTurnsHit: false }); (runLogParser({ parseLog: mockParseLog, parserName: "TestParser" }), expect(mockCore.warning).not.toHaveBeenCalled(), fs.unlinkSync(logFile), fs.rmdirSync(tmpDir)); + }), + it("exports GH_AW_AIC via exportVariable when result entry has modelMetrics (copilot-sdk run)", () => { + const tmpDir = fs.mkdtempSync(path.join(__dirname, "test-")); + const logFile = path.join(tmpDir, "test.log"); + fs.writeFileSync(logFile, "content"); + process.env.GH_AW_AGENT_OUTPUT = logFile; + + const mockParseLog = vi.fn().mockReturnValue({ + markdown: "## Result\n", + mcpFailures: [], + maxTurnsHit: false, + logEntries: [ + { type: "system", subtype: "init", model: "claude-sonnet-4.6" }, + { + type: "result", + num_turns: 1, + modelMetrics: { + "claude-sonnet-4.6": { + usage: { inputTokens: 10000, outputTokens: 500, cacheReadTokens: 5000, cacheWriteTokens: 0 }, + }, + }, + }, + ], + }); + + runLogParser({ parseLog: mockParseLog, parserName: "Copilot" }); + + expect(mockCore.exportVariable).toHaveBeenCalledWith("GH_AW_AIC", expect.any(String)); + const aicCall = mockCore.exportVariable.mock.calls.find(c => c[0] === "GH_AW_AIC"); + expect(parseFloat(aicCall[1])).toBeGreaterThan(0); + + fs.unlinkSync(logFile); + fs.rmdirSync(tmpDir); + }), + it("does not export GH_AW_AIC when result entry has no modelMetrics", () => { + const tmpDir = fs.mkdtempSync(path.join(__dirname, "test-")); + const logFile = path.join(tmpDir, "test.log"); + fs.writeFileSync(logFile, "content"); + process.env.GH_AW_AGENT_OUTPUT = logFile; + + const mockParseLog = vi.fn().mockReturnValue({ + markdown: "## Result\n", + mcpFailures: [], + maxTurnsHit: false, + logEntries: [ + { type: "system", subtype: "init", model: "claude-sonnet-4.6" }, + { type: "result", num_turns: 1 }, + ], + }); + + runLogParser({ parseLog: mockParseLog, parserName: "Copilot" }); + + const aicCall = mockCore.exportVariable.mock.calls.find(c => c[0] === "GH_AW_AIC"); + expect(aicCall).toBeUndefined(); + + fs.unlinkSync(logFile); + fs.rmdirSync(tmpDir); })); })); }); diff --git a/actions/setup/js/parse_copilot_log.cjs b/actions/setup/js/parse_copilot_log.cjs index dc1388e95fb..50763736251 100644 --- a/actions/setup/js/parse_copilot_log.cjs +++ b/actions/setup/js/parse_copilot_log.cjs @@ -106,6 +106,10 @@ function normalizeCopilotSdkEventsToTrace(sdkEntries) { let assistantMessageCount = 0; let firstTimestampMs = null; let lastTimestampMs = null; + /** @type {Record | null} */ + let capturedModelMetrics = null; + /** @type {string | null} */ + let capturedCurrentModel = null; const addPendingId = (toolName, toolId) => { const existing = pendingIdsByToolName.get(toolName); @@ -244,6 +248,20 @@ function normalizeCopilotSdkEventsToTrace(sdkEntries) { break; } + case "session.shutdown": { + // Capture model metrics and current model for AIC computation. + // A session emits exactly one shutdown event; last-write-wins is fine. + const modelMetrics = entry.data?.modelMetrics; + const currentModel = entry.data?.currentModel; + if (modelMetrics && typeof modelMetrics === "object" && Object.keys(modelMetrics).length > 0) { + capturedModelMetrics = modelMetrics; + } + if (typeof currentModel === "string" && currentModel.trim()) { + capturedCurrentModel = currentModel.trim(); + } + break; + } + default: break; } @@ -256,7 +274,7 @@ function normalizeCopilotSdkEventsToTrace(sdkEntries) { normalizedEntries.unshift({ type: "system", subtype: "init", - model: "copilot-sdk", + model: capturedCurrentModel ?? "copilot-sdk", session_id: null, tools: Array.from(toolNames), }); @@ -268,6 +286,9 @@ function normalizeCopilotSdkEventsToTrace(sdkEntries) { if (firstTimestampMs !== null && lastTimestampMs !== null && lastTimestampMs >= firstTimestampMs) { resultEntry.duration_ms = lastTimestampMs - firstTimestampMs; } + if (capturedModelMetrics !== null) { + resultEntry.modelMetrics = capturedModelMetrics; + } normalizedEntries.push(resultEntry); return normalizedEntries; diff --git a/actions/setup/js/parse_copilot_log.test.cjs b/actions/setup/js/parse_copilot_log.test.cjs index b81fe3511c6..0d4040bb5da 100644 --- a/actions/setup/js/parse_copilot_log.test.cjs +++ b/actions/setup/js/parse_copilot_log.test.cjs @@ -122,6 +122,55 @@ describe("parse_copilot_log.cjs", () => { expect(resultEntry?.num_turns).toBe(1); }); + it("extracts modelMetrics and currentModel from session.shutdown into the result entry", () => { + const modelMetrics = { + "claude-sonnet-4.6": { + usage: { inputTokens: 5000, outputTokens: 300, cacheReadTokens: 10000, cacheWriteTokens: 0 }, + }, + }; + const sdkEventsLog = [ + '{"type":"user.message","timestamp":"2026-06-05T00:44:01.367Z","data":{}}', + '{"type":"assistant.message","timestamp":"2026-06-05T00:44:59.769Z","data":{"content":"Done"}}', + JSON.stringify({ + type: "session.shutdown", + timestamp: "2026-06-05T00:45:01.000Z", + data: { modelMetrics, currentModel: "claude-sonnet-4.6" }, + }), + ].join("\n"); + + const result = parseCopilotLog(sdkEventsLog); + + const initEntry = result.logEntries.find(e => e.type === "system" && e.subtype === "init"); + expect(initEntry?.model).toBe("claude-sonnet-4.6"); + + const resultEntry = result.logEntries.find(e => e.type === "result"); + expect(resultEntry?.modelMetrics).toEqual(modelMetrics); + }); + + it("keeps model as copilot-sdk when session.shutdown has no currentModel", () => { + const sdkEventsLog = [ + '{"type":"user.message","timestamp":"2026-06-05T00:44:01.367Z","data":{}}', + '{"type":"assistant.message","timestamp":"2026-06-05T00:44:59.769Z","data":{"content":"Done"}}', + JSON.stringify({ + type: "session.shutdown", + timestamp: "2026-06-05T00:45:01.000Z", + data: { + modelMetrics: { + "claude-sonnet-4.6": { usage: { inputTokens: 100, outputTokens: 50, cacheReadTokens: 0, cacheWriteTokens: 0 } }, + }, + }, + }), + ].join("\n"); + + const result = parseCopilotLog(sdkEventsLog); + + const initEntry = result.logEntries.find(e => e.type === "system" && e.subtype === "init"); + expect(initEntry?.model).toBe("copilot-sdk"); + + const resultEntry = result.logEntries.find(e => e.type === "result"); + expect(resultEntry?.modelMetrics).toBeDefined(); + }); + it("should handle tool calls with details in HTML format", () => { const logWithHtmlDetails = JSON.stringify([ { type: "system", subtype: "init", session_id: "html-test", tools: ["Bash"], model: "gpt-5" }, diff --git a/actions/setup/js/parse_mcp_gateway_log.cjs b/actions/setup/js/parse_mcp_gateway_log.cjs index fbbc88197ff..2344292f2bb 100644 --- a/actions/setup/js/parse_mcp_gateway_log.cjs +++ b/actions/setup/js/parse_mcp_gateway_log.cjs @@ -214,6 +214,17 @@ function writeStepSummaryWithTokenUsage(coreObj) { coreObj.exportVariable("GH_AW_AIC", roundedAIC); coreObj.setOutput("aic", roundedAIC); coreObj.info(`AI Credits: ${roundedAIC}`); + } else { + // For copilot-sdk runs, inference bypasses the AWF proxy so token-usage.jsonl + // contains no pricing-eligible entries (model name is absent/unknown). The + // preceding log_parser_bootstrap step computes AIC from the session.shutdown + // SDK event and exports it via core.exportVariable. Propagate that value here + // as the step output so downstream jobs receive it through needs.agent.outputs.aic. + const copilotSdkAIC = process.env.GH_AW_AIC; + if (copilotSdkAIC) { + coreObj.setOutput("aic", copilotSdkAIC); + process.stderr.write(JSON.stringify({ level: "info", message: `AI Credits (copilot-sdk, from prior step): ${copilotSdkAIC}` }) + "\n"); + } } if (parsedSummary && typeof parsedSummary.ambientContextTokens === "number" && parsedSummary.ambientContextTokens > 0) { const roundedAmbientContext = String(Math.round(parsedSummary.ambientContextTokens)); diff --git a/actions/setup/js/parse_mcp_gateway_log.test.cjs b/actions/setup/js/parse_mcp_gateway_log.test.cjs index 3843ab232b7..657703dd250 100644 --- a/actions/setup/js/parse_mcp_gateway_log.test.cjs +++ b/actions/setup/js/parse_mcp_gateway_log.test.cjs @@ -413,6 +413,79 @@ Some content here.`; } }); + test("falls back to process.env.GH_AW_AIC for aic step output when token-usage.jsonl has no eligible model (copilot-sdk run)", async () => { + const originalEnvAIC = process.env.GH_AW_AIC; + const originalExistsSync = fs.existsSync; + const originalReadFileSync = fs.readFileSync; + + try { + // Simulate a copilot-sdk run: token-usage.jsonl exists but all entries have unknown model → totalAIC = 0. + // GH_AW_AIC is set by the preceding log_parser_bootstrap step. + process.env.GH_AW_AIC = "42.123"; + + const unknownModelEntry = JSON.stringify({ + model: "", + input_tokens: 20000, + output_tokens: 0, + cache_read_tokens: 0, + cache_write_tokens: 0, + duration_ms: 500, + }); + + const mockCore = { + info: vi.fn(), + debug: vi.fn(), + startGroup: vi.fn(), + endGroup: vi.fn(), + notice: vi.fn(), + warning: vi.fn(), + error: vi.fn(), + setFailed: vi.fn(), + exportVariable: vi.fn(), + setOutput: vi.fn(), + summary: { + addRaw: vi.fn().mockReturnThis(), + addDetails: vi.fn().mockReturnThis(), + write: vi.fn(), + }, + }; + + fs.existsSync = vi.fn(filepath => { + if (filepath === "/tmp/gh-aw/mcp-logs/gateway.md") return false; + if (filepath === "/tmp/gh-aw/mcp-logs/gateway.log") return false; + if (filepath === "/tmp/gh-aw/mcp-logs/gateway.jsonl") return false; + if (filepath === "/tmp/gh-aw/sandbox/firewall/logs/api-proxy-logs/token-usage.jsonl") return true; + return originalExistsSync(filepath); + }); + + fs.readFileSync = vi.fn((filepath, encoding) => { + if (filepath === "/tmp/gh-aw/sandbox/firewall/logs/api-proxy-logs/token-usage.jsonl") { + return unknownModelEntry; + } + return originalReadFileSync(filepath, encoding); + }); + + global.core = mockCore; + + const { main } = require("./parse_mcp_gateway_log.cjs"); + await main(); + + // The aic step output must be set from the env var fallback. + expect(mockCore.setOutput).toHaveBeenCalledWith("aic", "42.123"); + // GH_AW_AIC must NOT be re-exported (it's already in the env from the prior step). + expect(mockCore.exportVariable).not.toHaveBeenCalledWith("GH_AW_AIC", expect.any(String)); + } finally { + fs.existsSync = originalExistsSync; + fs.readFileSync = originalReadFileSync; + if (originalEnvAIC === undefined) { + delete process.env.GH_AW_AIC; + } else { + process.env.GH_AW_AIC = originalEnvAIC; + } + delete global.core; + } + }); + test("appends steering from rpc-messages.jsonl after gateway.md", async () => { const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "mcp-test-")); const gatewayMdPath = path.join(tmpDir, "gateway.md");