diff --git a/pkg/cli/audit_diff.go b/pkg/cli/audit_diff.go index 4ac6f830c31..ace68ffb3af 100644 --- a/pkg/cli/audit_diff.go +++ b/pkg/cli/audit_diff.go @@ -260,9 +260,9 @@ type TokenUsageDiff struct { Run1CacheWriteTokens int `json:"run1_cache_write_tokens"` Run2CacheWriteTokens int `json:"run2_cache_write_tokens"` CacheWriteTokensChange string `json:"cache_write_tokens_change,omitempty"` - Run1EffectiveTokens int `json:"run1_effective_tokens"` - Run2EffectiveTokens int `json:"run2_effective_tokens"` - EffectiveTokensChange string `json:"effective_tokens_change,omitempty"` + Run1AIC float64 `json:"run1_aic,omitempty"` + Run2AIC float64 `json:"run2_aic,omitempty"` + AICChange string `json:"aic_change,omitempty"` Run1TotalRequests int `json:"run1_total_requests"` Run2TotalRequests int `json:"run2_total_requests"` RequestsDelta string `json:"requests_delta,omitempty"` // Absolute request-count delta, e.g. "+4" @@ -327,8 +327,8 @@ type RunMetricsDiff struct { Run1Turns int `json:"run1_turns,omitempty"` Run2Turns int `json:"run2_turns,omitempty"` TurnsChange int `json:"turns_change,omitempty"` - Run1TokensPerTurn int `json:"run1_tokens_per_turn,omitempty"` // Avg effective tokens per turn in run 1 - Run2TokensPerTurn int `json:"run2_tokens_per_turn,omitempty"` // Avg effective tokens per turn in run 2 + Run1TokensPerTurn int `json:"run1_tokens_per_turn,omitempty"` // Avg token usage per turn in run 1 + Run2TokensPerTurn int `json:"run2_tokens_per_turn,omitempty"` // Avg token usage per turn in run 2 TokensPerTurnChange string `json:"tokens_per_turn_change,omitempty"` // e.g. "+20%", "-10%" TokenUsageDetails *TokenUsageDiff `json:"token_usage_details,omitempty"` // Detailed breakdown from firewall proxy GitHubRateLimitDetails *GitHubRateLimitDiff `json:"github_rate_limit_details,omitempty"` // GitHub API quota consumption diff @@ -561,21 +561,14 @@ func computeRunMetricsDiff(summary1, summary2 *RunSummary) *RunMetricsDiff { } } - // Compute tokens per turn using effective tokens from firewall proxy when available, - // otherwise fall back to the engine-level token count. - run1Effective := run1Tokens - run2Effective := run2Tokens - if tu1 != nil && tu1.TotalEffectiveTokens > 0 { - run1Effective = tu1.TotalEffectiveTokens - } - if tu2 != nil && tu2.TotalEffectiveTokens > 0 { - run2Effective = tu2.TotalEffectiveTokens - } + // Compute tokens per turn using engine-level token usage. + run1PerTurn := run1Tokens + run2PerTurn := run2Tokens if run1Turns > 0 { - diff.Run1TokensPerTurn = run1Effective / run1Turns + diff.Run1TokensPerTurn = run1PerTurn / run1Turns } if run2Turns > 0 { - diff.Run2TokensPerTurn = run2Effective / run2Turns + diff.Run2TokensPerTurn = run2PerTurn / run2Turns } if diff.Run1TokensPerTurn > 0 || diff.Run2TokensPerTurn > 0 { diff.TokensPerTurnChange = formatVolumeChange(diff.Run1TokensPerTurn, diff.Run2TokensPerTurn) @@ -861,7 +854,7 @@ func computeTokenUsageDiff(tu1, tu2 *TokenUsageSummary) *TokenUsageDiff { run1Output, run2Output int run1CacheRead, run2CacheRead int run1CacheWrite, run2CacheWrite int - run1Effective, run2Effective int + run1AIC, run2AIC float64 run1Requests, run2Requests int run1CacheEff, run2CacheEff float64 ) @@ -871,7 +864,7 @@ func computeTokenUsageDiff(tu1, tu2 *TokenUsageSummary) *TokenUsageDiff { run1Output = tu1.TotalOutputTokens run1CacheRead = tu1.TotalCacheReadTokens run1CacheWrite = tu1.TotalCacheWriteTokens - run1Effective = tu1.TotalEffectiveTokens + run1AIC = tu1.TotalAIC run1Requests = tu1.TotalRequests run1CacheEff = tu1.CacheEfficiency } @@ -880,7 +873,7 @@ func computeTokenUsageDiff(tu1, tu2 *TokenUsageSummary) *TokenUsageDiff { run2Output = tu2.TotalOutputTokens run2CacheRead = tu2.TotalCacheReadTokens run2CacheWrite = tu2.TotalCacheWriteTokens - run2Effective = tu2.TotalEffectiveTokens + run2AIC = tu2.TotalAIC run2Requests = tu2.TotalRequests run2CacheEff = tu2.CacheEfficiency } @@ -894,8 +887,8 @@ func computeTokenUsageDiff(tu1, tu2 *TokenUsageSummary) *TokenUsageDiff { Run2CacheReadTokens: run2CacheRead, Run1CacheWriteTokens: run1CacheWrite, Run2CacheWriteTokens: run2CacheWrite, - Run1EffectiveTokens: run1Effective, - Run2EffectiveTokens: run2Effective, + Run1AIC: run1AIC, + Run2AIC: run2AIC, Run1TotalRequests: run1Requests, Run2TotalRequests: run2Requests, Run1CacheEfficiency: run1CacheEff, @@ -914,8 +907,8 @@ func computeTokenUsageDiff(tu1, tu2 *TokenUsageSummary) *TokenUsageDiff { if run1CacheWrite > 0 || run2CacheWrite > 0 { diff.CacheWriteTokensChange = formatVolumeChange(run1CacheWrite, run2CacheWrite) } - if run1Effective > 0 || run2Effective > 0 { - diff.EffectiveTokensChange = formatVolumeChange(run1Effective, run2Effective) + if run1AIC > 0 || run2AIC > 0 { + diff.AICChange = formatFloatDelta(run1AIC, run2AIC) } if run1Requests > 0 || run2Requests > 0 { diff.RequestsDelta = formatCountChange(run1Requests, run2Requests) diff --git a/pkg/cli/audit_diff_command.go b/pkg/cli/audit_diff_command.go index a2b0e7c83ea..80dfc88ccc4 100644 --- a/pkg/cli/audit_diff_command.go +++ b/pkg/cli/audit_diff_command.go @@ -40,7 +40,7 @@ analyzes their data, and produces a diff showing: - Anomaly flags (new denied domains, previously-denied now allowed) - MCP tool invocation changes (new/removed tools, call count and error count diffs) - Run metrics comparison (token usage, duration, turns) when cached data is available -- Detailed token usage breakdown (input/output/cache/effective tokens) from firewall proxy +- Detailed token usage breakdown (input/output/cache + AI Credits) from firewall proxy Examples: ` + string(constants.CLIExtensionPrefix) + ` audit diff 12345 12346 # Compare two runs diff --git a/pkg/cli/audit_diff_render.go b/pkg/cli/audit_diff_render.go index 58b3f959361..35bdd977734 100644 --- a/pkg/cli/audit_diff_render.go +++ b/pkg/cli/audit_diff_render.go @@ -298,8 +298,8 @@ func renderTokenUsageDiffMarkdownSection(run1ID, run2ID int64, diff *TokenUsageD if diff.Run1CacheWriteTokens > 0 || diff.Run2CacheWriteTokens > 0 { fmt.Fprintf(os.Stdout, "| Cache write | %d | %d | %s |\n", diff.Run1CacheWriteTokens, diff.Run2CacheWriteTokens, diff.CacheWriteTokensChange) } - if diff.Run1EffectiveTokens > 0 || diff.Run2EffectiveTokens > 0 { - fmt.Fprintf(os.Stdout, "| Effective | %d | %d | %s |\n", diff.Run1EffectiveTokens, diff.Run2EffectiveTokens, diff.EffectiveTokensChange) + if diff.Run1AIC > 0 || diff.Run2AIC > 0 { + fmt.Fprintf(os.Stdout, "| AI Credits | %.3f | %.3f | %s |\n", diff.Run1AIC, diff.Run2AIC, diff.AICChange) } if diff.Run1TotalRequests > 0 || diff.Run2TotalRequests > 0 { fmt.Fprintf(os.Stdout, "| API requests | %d | %d | %s |\n", diff.Run1TotalRequests, diff.Run2TotalRequests, diff.RequestsDelta) @@ -578,12 +578,12 @@ func renderTokenUsageDiffPrettySection(run1ID, run2ID int64, diff *TokenUsageDif diff.CacheWriteTokensChange, }) } - if diff.Run1EffectiveTokens > 0 || diff.Run2EffectiveTokens > 0 { + if diff.Run1AIC > 0 || diff.Run2AIC > 0 { config.Rows = append(config.Rows, []string{ - "Effective", - strconv.Itoa(diff.Run1EffectiveTokens), - strconv.Itoa(diff.Run2EffectiveTokens), - diff.EffectiveTokensChange, + "AI Credits", + fmt.Sprintf("%.3f", diff.Run1AIC), + fmt.Sprintf("%.3f", diff.Run2AIC), + diff.AICChange, }) } if diff.Run1TotalRequests > 0 || diff.Run2TotalRequests > 0 { diff --git a/pkg/cli/audit_diff_test.go b/pkg/cli/audit_diff_test.go index 6a30074cfce..99feb73f73f 100644 --- a/pkg/cli/audit_diff_test.go +++ b/pkg/cli/audit_diff_test.go @@ -705,7 +705,7 @@ func TestComputeTokenUsageDiff_WithData(t *testing.T) { TotalOutputTokens: 2000, TotalCacheReadTokens: 5000, TotalCacheWriteTokens: 1000, - TotalEffectiveTokens: 8000, + TotalAIC: 0.8, TotalRequests: 10, CacheEfficiency: 0.333, } @@ -714,7 +714,7 @@ func TestComputeTokenUsageDiff_WithData(t *testing.T) { TotalOutputTokens: 3000, TotalCacheReadTokens: 7000, TotalCacheWriteTokens: 800, - TotalEffectiveTokens: 12000, + TotalAIC: 1.4, TotalRequests: 14, CacheEfficiency: 0.318, } @@ -738,9 +738,9 @@ func TestComputeTokenUsageDiff_WithData(t *testing.T) { assert.Equal(t, 800, diff.Run2CacheWriteTokens, "Run2 cache write tokens should be 800") assert.Equal(t, "-20%", diff.CacheWriteTokensChange, "Cache write tokens should decrease by 20%") - assert.Equal(t, 8000, diff.Run1EffectiveTokens, "Run1 effective tokens should be 8000") - assert.Equal(t, 12000, diff.Run2EffectiveTokens, "Run2 effective tokens should be 12000") - assert.Equal(t, "+50%", diff.EffectiveTokensChange, "Effective tokens should increase by 50%") + assert.InDelta(t, 0.8, diff.Run1AIC, 1e-9, "Run1 AI Credits should be 0.8") + assert.InDelta(t, 1.4, diff.Run2AIC, 1e-9, "Run2 AI Credits should be 1.4") + assert.Equal(t, "+0.600", diff.AICChange, "AI Credits delta should be +0.600") assert.Equal(t, 10, diff.Run1TotalRequests, "Run1 requests should be 10") assert.Equal(t, 14, diff.Run2TotalRequests, "Run2 requests should be 14") @@ -785,22 +785,22 @@ func TestComputeRunMetricsDiff_WithTokenUsageDetails(t *testing.T) { RunID: 100, Run: WorkflowRun{Duration: 5 * time.Minute, Turns: 4}, TokenUsage: &TokenUsageSummary{ - TotalInputTokens: 8000, - TotalOutputTokens: 1500, - TotalEffectiveTokens: 6000, - TotalRequests: 8, - CacheEfficiency: 0.25, + TotalInputTokens: 8000, + TotalOutputTokens: 1500, + TotalAIC: 0.6, + TotalRequests: 8, + CacheEfficiency: 0.25, }, } summary2 := &RunSummary{ RunID: 200, Run: WorkflowRun{Duration: 7 * time.Minute, Turns: 6}, TokenUsage: &TokenUsageSummary{ - TotalInputTokens: 12000, - TotalOutputTokens: 2000, - TotalEffectiveTokens: 9000, - TotalRequests: 11, - CacheEfficiency: 0.30, + TotalInputTokens: 12000, + TotalOutputTokens: 2000, + TotalAIC: 0.9, + TotalRequests: 11, + CacheEfficiency: 0.30, }, } @@ -813,9 +813,9 @@ func TestComputeRunMetricsDiff_WithTokenUsageDetails(t *testing.T) { assert.Equal(t, 12000, diff.TokenUsageDetails.Run2InputTokens, "Run2 input tokens should be 12000") assert.Equal(t, "+50%", diff.TokenUsageDetails.InputTokensChange, "Input tokens change should be +50%") - assert.Equal(t, 6000, diff.TokenUsageDetails.Run1EffectiveTokens, "Run1 effective tokens should be 6000") - assert.Equal(t, 9000, diff.TokenUsageDetails.Run2EffectiveTokens, "Run2 effective tokens should be 9000") - assert.Equal(t, "+50%", diff.TokenUsageDetails.EffectiveTokensChange, "Effective tokens change should be +50%") + assert.InDelta(t, 0.6, diff.TokenUsageDetails.Run1AIC, 1e-9, "Run1 AI Credits should be 0.6") + assert.InDelta(t, 0.9, diff.TokenUsageDetails.Run2AIC, 1e-9, "Run2 AI Credits should be 0.9") + assert.Equal(t, "+0.300", diff.TokenUsageDetails.AICChange, "AI Credits delta should be +0.300") } func TestComputeRunMetricsDiff_TokenUsageDetailsAloneNotNil(t *testing.T) { @@ -1320,14 +1320,14 @@ func TestComputeRunMetricsDiff_TokensPerTurn(t *testing.T) { diff := computeRunMetricsDiff(summary1, summary2) require.NotNil(t, diff, "Should produce metrics diff") - // Without effective tokens, falls back to engine token count + // Uses engine token counts for tokens/turn. assert.Equal(t, 2000, diff.Run1TokensPerTurn, "Run1 tokens/turn should be 10000/5=2000") assert.Equal(t, 3000, diff.Run2TokensPerTurn, "Run2 tokens/turn should be 18000/6=3000") assert.Equal(t, "+50%", diff.TokensPerTurnChange, "Tokens/turn should increase by 50%") } -func TestComputeRunMetricsDiff_TokensPerTurnFromEffective(t *testing.T) { - // When effective token data is available it should be used for tokens/turn +func TestComputeRunMetricsDiff_TokensPerTurnIgnoresEffectiveTokenTotals(t *testing.T) { + // Tokens/turn should continue to use engine token usage even when effective totals exist. summary1 := &RunSummary{ Run: WorkflowRun{ TokenUsage: 10000, @@ -1354,9 +1354,8 @@ func TestComputeRunMetricsDiff_TokensPerTurnFromEffective(t *testing.T) { diff := computeRunMetricsDiff(summary1, summary2) require.NotNil(t, diff, "Should produce metrics diff") - // Effective tokens should be used: 8000/4=2000, 12000/4=3000 - assert.Equal(t, 2000, diff.Run1TokensPerTurn, "Run1 tokens/turn should use effective: 8000/4=2000") - assert.Equal(t, 3000, diff.Run2TokensPerTurn, "Run2 tokens/turn should use effective: 12000/4=3000") + assert.Equal(t, 2500, diff.Run1TokensPerTurn, "Run1 tokens/turn should use engine tokens: 10000/4=2500") + assert.Equal(t, 4000, diff.Run2TokensPerTurn, "Run2 tokens/turn should use engine tokens: 16000/4=4000") } func TestComputeRunMetricsDiff_TokensPerTurnZeroTurns(t *testing.T) { diff --git a/pkg/cli/audit_math_helpers.go b/pkg/cli/audit_math_helpers.go index b6f0473ed42..d719441a5ba 100644 --- a/pkg/cli/audit_math_helpers.go +++ b/pkg/cli/audit_math_helpers.go @@ -48,3 +48,12 @@ func formatCountChange(count1, count2 int) string { } return strconv.Itoa(delta) } + +// formatFloatDelta formats an absolute delta between two floating-point values. +func formatFloatDelta(value1, value2 float64) string { + delta := value2 - value1 + if delta >= 0 { + return fmt.Sprintf("+%.3f", delta) + } + return fmt.Sprintf("%.3f", delta) +} diff --git a/pkg/cli/audit_report.go b/pkg/cli/audit_report.go index 9fae0103d2d..417111767da 100644 --- a/pkg/cli/audit_report.go +++ b/pkg/cli/audit_report.go @@ -101,14 +101,13 @@ type OverviewData struct { // MetricsData contains execution metrics type MetricsData struct { - TokenUsage int `json:"token_usage,omitempty" console:"header:Token Usage,format:number,omitempty"` - EffectiveTokens int `json:"effective_tokens,omitempty" console:"header:Effective Tokens,format:number,omitempty"` - AIC float64 `json:"aic,omitempty"` - AmbientContext *AmbientContextMetrics `json:"ambient_context,omitempty" console:"title:Ambient Context,omitempty"` - ActionMinutes float64 `json:"action_minutes,omitempty" console:"header:Action Minutes,omitempty"` - Turns int `json:"turns,omitempty" console:"header:Turns,omitempty"` - ErrorCount int `json:"error_count" console:"header:Errors"` - WarningCount int `json:"warning_count" console:"header:Warnings"` + TokenUsage int `json:"token_usage,omitempty" console:"header:Token Usage,format:number,omitempty"` + AIC float64 `json:"aic,omitempty"` + AmbientContext *AmbientContextMetrics `json:"ambient_context,omitempty" console:"title:Ambient Context,omitempty"` + ActionMinutes float64 `json:"action_minutes,omitempty" console:"header:Action Minutes,omitempty"` + Turns int `json:"turns,omitempty" console:"header:Turns,omitempty"` + ErrorCount int `json:"error_count" console:"header:Errors"` + WarningCount int `json:"warning_count" console:"header:Warnings"` } // JobData contains information about individual jobs @@ -321,13 +320,6 @@ func buildAuditData(processedRun ProcessedRun, metrics LogMetrics, mcpToolUsage metricsData.Turns = fallbackMetrics.Turns } - // Populate effective tokens from the firewall proxy summary when available, - // otherwise fall back to the effective tokens stored on the run itself. - if processedRun.TokenUsage != nil && processedRun.TokenUsage.TotalEffectiveTokens > 0 { - metricsData.EffectiveTokens = processedRun.TokenUsage.TotalEffectiveTokens - } else if run.EffectiveTokens > 0 { - metricsData.EffectiveTokens = run.EffectiveTokens - } if processedRun.TokenUsage != nil && processedRun.TokenUsage.TotalAIC > 0 { metricsData.AIC = processedRun.TokenUsage.TotalAIC } diff --git a/pkg/cli/logs_episode.go b/pkg/cli/logs_episode.go index defa90e38cc..6df9d3badbb 100644 --- a/pkg/cli/logs_episode.go +++ b/pkg/cli/logs_episode.go @@ -55,7 +55,7 @@ type EpisodeData struct { PrimaryWorkflow string `json:"primary_workflow,omitempty"` TotalRuns int `json:"total_runs"` TotalTokens int `json:"total_tokens"` - TotalEffectiveTokens int `json:"total_effective_tokens"` + TotalAIC float64 `json:"total_aic,omitempty"` TotalDuration string `json:"total_duration"` RiskyNodeCount int `json:"risky_node_count"` ChangedNodeCount int `json:"changed_node_count"` @@ -169,7 +169,7 @@ func buildEpisodeData(runs []RunData, processedRuns []ProcessedRun) ([]EpisodeDa acc.metadata.TotalRuns++ acc.metadata.TotalTokens += run.TokenUsage - acc.metadata.TotalEffectiveTokens += run.EffectiveTokens + acc.metadata.TotalAIC += run.AIC acc.metadata.ManifestEntryCount += run.ManifestEntryCount acc.metadata.TemporaryIDMappings += run.TemporaryIDMappings acc.metadata.ChainedTargetCount += run.ChainedTargetCount diff --git a/pkg/cli/logs_episode_test.go b/pkg/cli/logs_episode_test.go index 4a7f9b6477e..5da7ea8fcec 100644 --- a/pkg/cli/logs_episode_test.go +++ b/pkg/cli/logs_episode_test.go @@ -167,21 +167,21 @@ func TestBuildEpisodeDataNoToolCallsWhenMCPUsageAbsent(t *testing.T) { assert.Empty(t, ep.ToolCalls, "tool_calls should be absent when no MCP usage data") } -func TestBuildEpisodeDataAggregatesEffectiveTokens(t *testing.T) { +func TestBuildEpisodeDataAggregatesAIC(t *testing.T) { runs := []RunData{ { - RunID: 501, - WorkflowName: "effective-a", - Status: "completed", - EffectiveTokens: 1200, - CreatedAt: time.Date(2024, 1, 1, 12, 0, 0, 0, time.UTC), + RunID: 501, + WorkflowName: "effective-a", + Status: "completed", + AIC: 1.2, + CreatedAt: time.Date(2024, 1, 1, 12, 0, 0, 0, time.UTC), }, { - RunID: 502, - WorkflowName: "effective-b", - Status: "completed", - EffectiveTokens: 345, - CreatedAt: time.Date(2024, 1, 1, 12, 1, 0, 0, time.UTC), + RunID: 502, + WorkflowName: "effective-b", + Status: "completed", + AIC: 0.345, + CreatedAt: time.Date(2024, 1, 1, 12, 1, 0, 0, time.UTC), }, } @@ -194,8 +194,8 @@ func TestBuildEpisodeDataAggregatesEffectiveTokens(t *testing.T) { byRunID[episode.RunIDs[0]] = episode } - assert.Equal(t, 1200, byRunID[501].TotalEffectiveTokens, "episode should preserve effective tokens from run 501") - assert.Equal(t, 345, byRunID[502].TotalEffectiveTokens, "episode should preserve effective tokens from run 502") + assert.InDelta(t, 1.2, byRunID[501].TotalAIC, 1e-9, "episode should preserve AIC from run 501") + assert.InDelta(t, 0.345, byRunID[502].TotalAIC, 1e-9, "episode should preserve AIC from run 502") } func TestBuildEpisodeDataAggregatesToolCallsAcrossRuns(t *testing.T) { diff --git a/pkg/cli/logs_format_compact.go b/pkg/cli/logs_format_compact.go index 139a9297308..2b247d4b0d0 100644 --- a/pkg/cli/logs_format_compact.go +++ b/pkg/cli/logs_format_compact.go @@ -230,7 +230,6 @@ func renderLogsCompactVerbose(data LogsData) { "runs=" + strconv.Itoa(s.TotalRuns), "duration=" + s.TotalDuration, "tokens=" + strconv.Itoa(s.TotalTokens), - "eff_tokens=" + strconv.Itoa(s.TotalEffectiveTokens), "action_min=" + fmt.Sprintf("%.1f", s.TotalActionMinutes), "turns=" + strconv.Itoa(s.TotalTurns), "errors=" + strconv.Itoa(s.TotalErrors), @@ -272,7 +271,7 @@ func renderLogsCompactVerbose(data LogsData) { // [runs] verbose aligned table fmt.Fprintln(os.Stdout, "[runs]") w := tabwriter.NewWriter(os.Stdout, 0, 0, 2, ' ', 0) - fmt.Fprintln(w, "RUNID\tWORKFLOW\tENGINE\tSTATUS\tDUR\tTOKENS\tEFF_TOK\tAIC\tTURNS\tERR\tWARN\tEVENT\tACTOR\tTBT\tCLASS\tCREATED\tBRANCH") + fmt.Fprintln(w, "RUNID\tWORKFLOW\tENGINE\tSTATUS\tDUR\tTOKENS\tAIC\tTURNS\tERR\tWARN\tEVENT\tACTOR\tTBT\tCLASS\tCREATED\tBRANCH") for _, r := range data.Runs { status := r.Conclusion @@ -300,9 +299,9 @@ func renderLogsCompactVerbose(data LogsData) { } wfID := workflowIDFromRun(r.WorkflowPath, r.WorkflowName) - fmt.Fprintf(w, "%d\t%s\t%s\t%s\t%s\t%d\t%d\t%s\t%d\t%d\t%d\t%s\t%s\t%s\t%s\t%s\t%s\n", + fmt.Fprintf(w, "%d\t%s\t%s\t%s\t%s\t%d\t%s\t%d\t%d\t%d\t%s\t%s\t%s\t%s\t%s\t%s\n", r.RunID, wfID, r.EngineID, status, dur, - r.TokenUsage, r.EffectiveTokens, formatCompactAIC(r.AIC), + r.TokenUsage, formatCompactAIC(r.AIC), r.Turns, r.ErrorCount, r.WarningCount, r.Event, actor, tbt, classification, r.CreatedAt.Format("01-02 15:04"), r.Branch) diff --git a/pkg/cli/logs_format_tsv.go b/pkg/cli/logs_format_tsv.go index 0c8d7188cf3..f86c0305837 100644 --- a/pkg/cli/logs_format_tsv.go +++ b/pkg/cli/logs_format_tsv.go @@ -35,7 +35,7 @@ func renderLogsTSV(data LogsData) { // Header headers := []string{ "run_id", "workflow", "engine", "status", "duration", - "tokens", "eff_tokens", "turns", "errors", + "tokens", "aic", "turns", "errors", "event", "branch", "created_at", "classification", "url", } fmt.Fprintln(os.Stdout, strings.Join(headers, "\t")) @@ -67,7 +67,7 @@ func renderLogsTSV(data LogsData) { conclusion, duration, strconv.Itoa(r.TokenUsage), - strconv.Itoa(r.EffectiveTokens), + fmt.Sprintf("%.3f", r.AIC), strconv.Itoa(r.Turns), strconv.Itoa(r.ErrorCount), r.Event, @@ -117,7 +117,7 @@ func renderLogsTSVVerbose(data LogsData) { headers := []string{ "run_id", "workflow", "engine", "status", "duration", - "tokens", "eff_tokens", "turns", "errors", + "tokens", "aic", "turns", "errors", "warnings", "missing_tools", "missing_data", "github_api", "event", "branch", "actor", "created_at", "tbt", "classification", "action_min", "display_title", "url", @@ -153,7 +153,7 @@ func renderLogsTSVVerbose(data LogsData) { conclusion, duration, strconv.Itoa(r.TokenUsage), - strconv.Itoa(r.EffectiveTokens), + fmt.Sprintf("%.3f", r.AIC), strconv.Itoa(r.Turns), strconv.Itoa(r.ErrorCount), strconv.Itoa(r.WarningCount), diff --git a/pkg/cli/logs_report.go b/pkg/cli/logs_report.go index 7b8c9e4c52c..71c98eaac69 100644 --- a/pkg/cli/logs_report.go +++ b/pkg/cli/logs_report.go @@ -57,7 +57,6 @@ type LogsSummary struct { TotalRuns int `json:"total_runs" console:"header:Total Runs"` TotalDuration string `json:"total_duration" console:"header:Total Duration"` TotalTokens int `json:"total_tokens" console:"header:Total Tokens,format:number"` - TotalEffectiveTokens int `json:"total_effective_tokens" console:"header:Total Effective Tokens,format:number"` TotalAIC float64 `json:"total_aic,omitempty"` TotalActionMinutes float64 `json:"total_action_minutes" console:"header:Total Action Minutes"` TotalTurns int `json:"total_turns" console:"header:Total Turns"` @@ -109,7 +108,6 @@ type RunData struct { Duration string `json:"duration,omitempty" console:"header:Duration,omitempty"` ActionMinutes float64 `json:"action_minutes,omitempty" console:"header:Action Minutes,omitempty"` TokenUsage int `json:"token_usage,omitempty" console:"header:Tokens,format:number,omitempty"` - EffectiveTokens int `json:"effective_tokens,omitempty" console:"header:Effective Tokens,format:number,omitempty"` AIC float64 `json:"aic,omitempty"` AmbientContext *AmbientContextMetrics `json:"ambient_context,omitempty" console:"-"` Turns int `json:"turns,omitempty" console:"header:Turns,omitempty"` @@ -160,7 +158,6 @@ func buildLogsData(processedRuns []ProcessedRun, outputDir string, continuation // Build summary var totalDuration time.Duration var totalTokens int - var totalEffectiveTokens int var totalAIC float64 var totalActionMinutes float64 var totalTurns int @@ -195,7 +192,6 @@ func buildLogsData(processedRuns []ProcessedRun, outputDir string, continuation totalDuration += run.Duration } totalTokens += run.TokenUsage - totalEffectiveTokens += run.EffectiveTokens if pr.TokenUsage != nil { totalAIC += pr.TokenUsage.TotalAIC } @@ -277,7 +273,6 @@ func buildLogsData(processedRuns []ProcessedRun, outputDir string, continuation Conclusion: run.Conclusion, Classification: deriveRunClassification(comparison), TokenUsage: run.TokenUsage, - EffectiveTokens: run.EffectiveTokens, AIC: 0, AmbientContext: ambientContext, ActionMinutes: run.ActionMinutes, @@ -353,7 +348,6 @@ func buildLogsData(processedRuns []ProcessedRun, outputDir string, continuation TotalRuns: len(processedRuns), TotalDuration: timeutil.FormatDuration(totalDuration), TotalTokens: totalTokens, - TotalEffectiveTokens: totalEffectiveTokens, TotalAIC: totalAIC, TotalActionMinutes: totalActionMinutes, TotalTurns: totalTurns,