Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
82 changes: 82 additions & 0 deletions actions/setup/js/copilot_sdk_driver.test.cjs
Original file line number Diff line number Diff line change
Expand Up @@ -646,6 +646,88 @@ describe("copilot_sdk_driver.cjs", () => {
}
}
});

it("writes session.shutdown event to events.jsonl when session emits model metrics", async () => {
const stop = vi.fn().mockResolvedValue(undefined);
const stderrWriteSpy = vi.spyOn(process.stderr, "write").mockImplementation(() => true);
try {
let onEvent = () => {};
const disconnect = vi.fn().mockImplementation(async () => {
// Emit session.shutdown synchronously during disconnect to simulate SDK behaviour.
onEvent({
type: "session.shutdown",
ephemeral: false,
timestamp: new Date().toISOString(),
data: {
modelMetrics: {
"claude-sonnet-4.6": {
usage: { inputTokens: 1000, outputTokens: 200, cacheReadTokens: 500, cacheWriteTokens: 0 },
},
},
currentModel: "claude-sonnet-4.6",
},
});
});
const session = {
sessionId: "session-shutdown-metrics",
on: handler => {
onEvent = handler;
},
sendAndWait: vi.fn().mockImplementation(async () => {
onEvent({
type: "assistant.message",
ephemeral: false,
timestamp: new Date().toISOString(),
data: { content: "done" },
});
return { data: { content: "done" } };
}),
disconnect,
};
class FakeCopilotClient {
start = vi.fn().mockResolvedValue(undefined);
createSession = vi.fn().mockResolvedValue(session);
stop = stop;
}

const result = await runWithCopilotSDK({
sdkUri: "http://127.0.0.1:3002",
prompt: "test prompt",
logger: () => {},
sdkModule: {
CopilotClient: FakeCopilotClient,
RuntimeConnection: { forUri: vi.fn(() => ({})) },
approveAll: () => "allow",
},
});

expect(result.exitCode).toBe(0);
expect(disconnect).toHaveBeenCalledTimes(1);

// The session.shutdown entry must be written to stderr (and thus events.jsonl).
const stderrLines = stderrWriteSpy.mock.calls
.map(([message]) => {
if (typeof message !== "string" || !message.endsWith("\n")) return null;
try {
return JSON.parse(message.trimEnd());
} catch {
return null;
}
})
.filter(Boolean);

const shutdownEntry = stderrLines.find(e => e.type === "session.shutdown");
expect(shutdownEntry).toBeDefined();
expect(shutdownEntry.data.modelMetrics).toEqual({
"claude-sonnet-4.6": {
usage: { inputTokens: 1000, outputTokens: 200, cacheReadTokens: 500, cacheWriteTokens: 0 },
},
});
expect(shutdownEntry.data.currentModel).toBe("claude-sonnet-4.6");
} finally {
stderrWriteSpy.mockRestore();
}
});
});

describe("parsePermissionConfigFromServerArgs", () => {
Expand Down
34 changes: 31 additions & 3 deletions actions/setup/js/copilot_sdk_session.cjs
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,8 @@ async function runWithCopilotSDK({ sdkUri, prompt, logger, attempt = 0, model, c
let toolDenialCount = 0;
let catastrophicToolDenialsError = null;
let catastrophicToolDenialsTriggered = false;
/** @type {{ modelMetrics: Record<string, { usage: { inputTokens: number; outputTokens: number; cacheReadTokens?: number; cacheWriteTokens?: number; reasoningTokens?: number } }>; currentModel?: string } | null} */
let capturedShutdownData = null;

/**
* Best-effort write of a driver-level event to events.jsonl and stderr.
Expand Down Expand Up @@ -277,6 +279,18 @@ async function runWithCopilotSDK({ sdkUri, prompt, logger, attempt = 0, model, c
break;
}

case "session.shutdown": {
// Capture model metrics for AIC computation.
// The event fires when the session disconnects, so data is stored in a
// variable and written to the stream in the finally block before closing.
const modelMetrics = event.data?.modelMetrics;
const currentModel = event.data?.currentModel;
if (modelMetrics && typeof modelMetrics === "object" && Object.keys(modelMetrics).length > 0) {
capturedShutdownData = { modelMetrics, ...(currentModel ? { currentModel } : {}) };
}
break;
}

default:
// Other event types are not consumed by unified_timeline.cjs; skip them.
break;
Expand Down Expand Up @@ -318,16 +332,30 @@ async function runWithCopilotSDK({ sdkUri, prompt, logger, attempt = 0, model, c
} finally {
// Snapshot for null-safe cleanup in this scope.
const stream = eventsStream;
if (stream) {
await new Promise(resolve => stream.end(resolve));
}
// Disconnect the session first so that the session.shutdown event fires
// and capturedShutdownData is populated before we write to the stream.
// The Copilot SDK emits session.shutdown synchronously inside disconnect()
// (standard Node.js EventEmitter semantics), so capturedShutdownData is
// populated before the await resolves. If the SDK ever changes to async
// emission the write is simply skipped — no data is lost.
if (session) {
try {
await session.disconnect();
} catch {
// best-effort cleanup
}
}
// Write captured session.shutdown data (model metrics) to events.jsonl
// so that log_parser_bootstrap.cjs can compute AIC from it.
// Only the last shutdown event is used; a session emits exactly one.
if (stream && capturedShutdownData) {
const shutdownEntry = JSON.stringify({ type: "session.shutdown", timestamp: new Date().toISOString(), data: capturedShutdownData }) + "\n";
stream.write(shutdownEntry);
process.stderr.write(shutdownEntry);
}
if (stream) {
await new Promise(resolve => stream.end(resolve));
}
if (clientStarted) {
try {
await client.stop();
Expand Down
31 changes: 31 additions & 0 deletions actions/setup/js/log_parser_bootstrap.cjs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
const { generatePlainTextSummary, generateCopilotCliStyleSummary, wrapAgentLogInSection, formatSafeOutputsPreview } = require("./log_parser_shared.cjs");
const { getErrorMessage } = require("./error_helpers.cjs");
const { ERR_API, ERR_CONFIG, ERR_VALIDATION } = require("./error_codes.cjs");
const { computeInferenceAIC } = require("./model_costs.cjs");

/**
* Bootstrap helper for log parser entry points.
Expand Down Expand Up @@ -160,6 +161,36 @@ async function runLogParser(options) {
}
}

// For copilot-sdk runs, inference bypasses the AWF API proxy so token-usage.jsonl
// contains no pricing-eligible entries. Compute AIC here from the modelMetrics
// captured in the session.shutdown SDK event and export it so that the subsequent
// parse-mcp-gateway step can propagate it as the job-level `aic` output.
if (logEntries && Array.isArray(logEntries)) {
const resultEntry = logEntries.find(entry => entry.type === "result");
const modelMetrics = resultEntry?.modelMetrics;
if (modelMetrics && typeof modelMetrics === "object") {
let totalAIC = 0;
for (const [modelName, metrics] of Object.entries(modelMetrics)) {
const usage = metrics?.usage;
if (!usage) continue;
totalAIC += computeInferenceAIC({
provider: "github-copilot",
model: modelName,
inputTokens: usage.inputTokens || 0,
outputTokens: usage.outputTokens || 0,
cacheReadTokens: usage.cacheReadTokens || 0,
cacheWriteTokens: usage.cacheWriteTokens || 0,
reasoningTokens: usage.reasoningTokens || 0,
});
}
if (totalAIC > 0) {
const roundedAIC = totalAIC.toFixed(3);
core.exportVariable("GH_AW_AIC", roundedAIC);
process.stderr.write(JSON.stringify({ level: "info", message: `AI Credits (copilot-sdk): ${roundedAIC}` }) + "\n");
}
}
}

if (markdown) {
// Generate lightweight plain text summary for core.info and Copilot CLI style for step summary
if (logEntries && Array.isArray(logEntries) && logEntries.length > 0) {
Expand Down
57 changes: 57 additions & 0 deletions actions/setup/js/log_parser_bootstrap.test.cjs
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,63 @@ describe("log_parser_bootstrap.cjs", () => {
(fs.writeFileSync(logFile, "content"), (process.env.GH_AW_AGENT_OUTPUT = logFile), (process.env.GH_AW_SAFE_OUTPUTS = "/non/existent/file.jsonl"));
const mockParseLog = vi.fn().mockReturnValue({ markdown: "## Result\n", mcpFailures: [], maxTurnsHit: false });
(runLogParser({ parseLog: mockParseLog, parserName: "TestParser" }), expect(mockCore.warning).not.toHaveBeenCalled(), fs.unlinkSync(logFile), fs.rmdirSync(tmpDir));
}),
it("exports GH_AW_AIC via exportVariable when result entry has modelMetrics (copilot-sdk run)", () => {
const tmpDir = fs.mkdtempSync(path.join(__dirname, "test-"));
const logFile = path.join(tmpDir, "test.log");
fs.writeFileSync(logFile, "content");
process.env.GH_AW_AGENT_OUTPUT = logFile;

const mockParseLog = vi.fn().mockReturnValue({
markdown: "## Result\n",
mcpFailures: [],
maxTurnsHit: false,
logEntries: [
{ type: "system", subtype: "init", model: "claude-sonnet-4.6" },
{
type: "result",
num_turns: 1,
modelMetrics: {
"claude-sonnet-4.6": {
usage: { inputTokens: 10000, outputTokens: 500, cacheReadTokens: 5000, cacheWriteTokens: 0 },
},
},
},
],
});

runLogParser({ parseLog: mockParseLog, parserName: "Copilot" });

expect(mockCore.exportVariable).toHaveBeenCalledWith("GH_AW_AIC", expect.any(String));
const aicCall = mockCore.exportVariable.mock.calls.find(c => c[0] === "GH_AW_AIC");
expect(parseFloat(aicCall[1])).toBeGreaterThan(0);

fs.unlinkSync(logFile);
fs.rmdirSync(tmpDir);
}),
it("does not export GH_AW_AIC when result entry has no modelMetrics", () => {
const tmpDir = fs.mkdtempSync(path.join(__dirname, "test-"));
const logFile = path.join(tmpDir, "test.log");
fs.writeFileSync(logFile, "content");
process.env.GH_AW_AGENT_OUTPUT = logFile;

const mockParseLog = vi.fn().mockReturnValue({
markdown: "## Result\n",
mcpFailures: [],
maxTurnsHit: false,
logEntries: [
{ type: "system", subtype: "init", model: "claude-sonnet-4.6" },
{ type: "result", num_turns: 1 },
],
});

runLogParser({ parseLog: mockParseLog, parserName: "Copilot" });

const aicCall = mockCore.exportVariable.mock.calls.find(c => c[0] === "GH_AW_AIC");
expect(aicCall).toBeUndefined();

fs.unlinkSync(logFile);
fs.rmdirSync(tmpDir);
}));
}));
});
23 changes: 22 additions & 1 deletion actions/setup/js/parse_copilot_log.cjs
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,10 @@ function normalizeCopilotSdkEventsToTrace(sdkEntries) {
let assistantMessageCount = 0;
let firstTimestampMs = null;
let lastTimestampMs = null;
/** @type {Record<string, { usage: { inputTokens: number; outputTokens: number; cacheReadTokens?: number; cacheWriteTokens?: number; reasoningTokens?: number } }> | null} */
let capturedModelMetrics = null;
/** @type {string | null} */
let capturedCurrentModel = null;

const addPendingId = (toolName, toolId) => {
const existing = pendingIdsByToolName.get(toolName);
Expand Down Expand Up @@ -244,6 +248,20 @@ function normalizeCopilotSdkEventsToTrace(sdkEntries) {
break;
}

case "session.shutdown": {
// Capture model metrics and current model for AIC computation.
// A session emits exactly one shutdown event; last-write-wins is fine.
const modelMetrics = entry.data?.modelMetrics;
const currentModel = entry.data?.currentModel;
if (modelMetrics && typeof modelMetrics === "object" && Object.keys(modelMetrics).length > 0) {
capturedModelMetrics = modelMetrics;
}
if (typeof currentModel === "string" && currentModel.trim()) {
capturedCurrentModel = currentModel.trim();
}
break;
}

default:
break;
}
Expand All @@ -256,7 +274,7 @@ function normalizeCopilotSdkEventsToTrace(sdkEntries) {
normalizedEntries.unshift({
type: "system",
subtype: "init",
model: "copilot-sdk",
model: capturedCurrentModel ?? "copilot-sdk",
session_id: null,
tools: Array.from(toolNames),
});
Expand All @@ -268,6 +286,9 @@ function normalizeCopilotSdkEventsToTrace(sdkEntries) {
if (firstTimestampMs !== null && lastTimestampMs !== null && lastTimestampMs >= firstTimestampMs) {
resultEntry.duration_ms = lastTimestampMs - firstTimestampMs;
}
if (capturedModelMetrics !== null) {
resultEntry.modelMetrics = capturedModelMetrics;
}
normalizedEntries.push(resultEntry);

return normalizedEntries;
Expand Down
49 changes: 49 additions & 0 deletions actions/setup/js/parse_copilot_log.test.cjs
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,55 @@ describe("parse_copilot_log.cjs", () => {
expect(resultEntry?.num_turns).toBe(1);
});

it("extracts modelMetrics and currentModel from session.shutdown into the result entry", () => {
const modelMetrics = {
"claude-sonnet-4.6": {
usage: { inputTokens: 5000, outputTokens: 300, cacheReadTokens: 10000, cacheWriteTokens: 0 },
},
};
const sdkEventsLog = [
'{"type":"user.message","timestamp":"2026-06-05T00:44:01.367Z","data":{}}',
'{"type":"assistant.message","timestamp":"2026-06-05T00:44:59.769Z","data":{"content":"Done"}}',
JSON.stringify({
type: "session.shutdown",
timestamp: "2026-06-05T00:45:01.000Z",
data: { modelMetrics, currentModel: "claude-sonnet-4.6" },
}),
].join("\n");

const result = parseCopilotLog(sdkEventsLog);

const initEntry = result.logEntries.find(e => e.type === "system" && e.subtype === "init");
expect(initEntry?.model).toBe("claude-sonnet-4.6");

const resultEntry = result.logEntries.find(e => e.type === "result");
expect(resultEntry?.modelMetrics).toEqual(modelMetrics);
});

it("keeps model as copilot-sdk when session.shutdown has no currentModel", () => {
const sdkEventsLog = [
'{"type":"user.message","timestamp":"2026-06-05T00:44:01.367Z","data":{}}',
'{"type":"assistant.message","timestamp":"2026-06-05T00:44:59.769Z","data":{"content":"Done"}}',
JSON.stringify({
type: "session.shutdown",
timestamp: "2026-06-05T00:45:01.000Z",
data: {
modelMetrics: {
"claude-sonnet-4.6": { usage: { inputTokens: 100, outputTokens: 50, cacheReadTokens: 0, cacheWriteTokens: 0 } },
},
},
}),
].join("\n");

const result = parseCopilotLog(sdkEventsLog);

const initEntry = result.logEntries.find(e => e.type === "system" && e.subtype === "init");
expect(initEntry?.model).toBe("copilot-sdk");

const resultEntry = result.logEntries.find(e => e.type === "result");
expect(resultEntry?.modelMetrics).toBeDefined();
});

it("should handle tool calls with details in HTML format", () => {
const logWithHtmlDetails = JSON.stringify([
{ type: "system", subtype: "init", session_id: "html-test", tools: ["Bash"], model: "gpt-5" },
Expand Down
11 changes: 11 additions & 0 deletions actions/setup/js/parse_mcp_gateway_log.cjs
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,17 @@ function writeStepSummaryWithTokenUsage(coreObj) {
coreObj.exportVariable("GH_AW_AIC", roundedAIC);
coreObj.setOutput("aic", roundedAIC);
coreObj.info(`AI Credits: ${roundedAIC}`);
} else {
// For copilot-sdk runs, inference bypasses the AWF proxy so token-usage.jsonl
// contains no pricing-eligible entries (model name is absent/unknown). The
// preceding log_parser_bootstrap step computes AIC from the session.shutdown
// SDK event and exports it via core.exportVariable. Propagate that value here
// as the step output so downstream jobs receive it through needs.agent.outputs.aic.
const copilotSdkAIC = process.env.GH_AW_AIC;
if (copilotSdkAIC) {
coreObj.setOutput("aic", copilotSdkAIC);
process.stderr.write(JSON.stringify({ level: "info", message: `AI Credits (copilot-sdk, from prior step): ${copilotSdkAIC}` }) + "\n");
}
}
if (parsedSummary && typeof parsedSummary.ambientContextTokens === "number" && parsedSummary.ambientContextTokens > 0) {
const roundedAmbientContext = String(Math.round(parsedSummary.ambientContextTokens));
Expand Down
Loading