-
Notifications
You must be signed in to change notification settings - Fork 3
overhead: per-inference context-delta attribution (#432) #452
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -9,12 +9,14 @@ use std::io::{self, Write}; | |
| use std::path::{Path, PathBuf}; | ||
|
|
||
| use relayburn_sdk::{ | ||
| describe_applies_to, overhead as sdk_overhead, overhead_trim as sdk_overhead_trim, | ||
| OverheadFileSummary, OverheadOptions, OverheadPerFileEntry, OverheadResult, | ||
| OverheadSectionCost, OverheadTrimOptions, OverheadTrimResult, | ||
| context_delta as sdk_context_delta, describe_applies_to, overhead as sdk_overhead, | ||
| overhead_trim as sdk_overhead_trim, ContextDelta, ContextDeltaOpts, | ||
| ContextDeltaOwnerRail as OwnerRail, InterveningStep, OverheadFileSummary, OverheadOptions, | ||
| OverheadPerFileEntry, OverheadResult, OverheadSectionCost, OverheadTrimOptions, | ||
| OverheadTrimResult, | ||
| }; | ||
|
|
||
| use crate::cli::{GlobalArgs, OverheadAction, OverheadArgs}; | ||
| use crate::cli::{GlobalArgs, OverheadAction, OverheadArgs, OverheadDeltasArgs}; | ||
| use crate::render::error::report_error; | ||
| use crate::render::format::{ | ||
| coerce_whole_f64_to_int, format_tokens, format_uint, format_usd, render_table, | ||
|
|
@@ -27,6 +29,7 @@ pub fn run(globals: &GlobalArgs, args: OverheadArgs) -> i32 { | |
| Some(OverheadAction::Trim(trim)) => { | ||
| run_trim(globals, args.project, args.since, args.kind, trim.top) | ||
| } | ||
| Some(OverheadAction::Deltas(deltas)) => run_deltas(globals, args.since, deltas), | ||
| None => run_report(globals, args.project, args.since, args.kind), | ||
| } | ||
| } | ||
|
|
@@ -361,6 +364,241 @@ fn format_line_range(start: u64, end: u64) -> String { | |
| format!("{s}-{e}") | ||
| } | ||
|
|
||
| // --------------------------------------------------------------------------- | ||
| // `burn overhead deltas` (#432) | ||
| // --------------------------------------------------------------------------- | ||
|
|
||
| fn run_deltas( | ||
| globals: &GlobalArgs, | ||
| since: Option<String>, | ||
| args: OverheadDeltasArgs, | ||
| ) -> i32 { | ||
| let opts = ContextDeltaOpts { | ||
| session: args.session.clone(), | ||
| since: since.as_deref().and_then(parse_since_duration), | ||
| top: args.top, | ||
| min_delta: args.min_delta, | ||
| owner: args.owner.into(), | ||
| }; | ||
| let progress = TaskProgress::new(globals, "overhead deltas"); | ||
| progress.set_task("computing context deltas"); | ||
| let deltas = match sdk_context_delta(opts, globals.ledger_path.clone()) { | ||
| Ok(d) => d, | ||
| Err(err) => { | ||
| progress.finish_and_clear(); | ||
| return report_error(&err, globals); | ||
| } | ||
| }; | ||
| progress.finish_and_clear(); | ||
|
|
||
| if globals.json { | ||
| let mut value = match serde_json::to_value(&deltas) { | ||
| Ok(v) => v, | ||
| Err(err) => return report_error(&io::Error::other(err), globals), | ||
| }; | ||
| coerce_whole_f64_to_int(&mut value); | ||
| if let Err(err) = render_json(&value) { | ||
| return report_error(&err, globals); | ||
| } | ||
| return 0; | ||
| } | ||
|
|
||
| if let Err(err) = render_human_deltas(&deltas, args.explain) { | ||
| return report_error(&err, globals); | ||
| } | ||
| 0 | ||
| } | ||
|
|
||
| fn render_human_deltas(deltas: &[ContextDelta], explain: bool) -> io::Result<()> { | ||
| let stdout = io::stdout(); | ||
| let mut handle = stdout.lock(); | ||
|
|
||
| if deltas.is_empty() { | ||
| return handle.write_all(b"# no context deltas above threshold\n"); | ||
| } | ||
|
|
||
| let mut table: Vec<Vec<String>> = Vec::with_capacity(deltas.len() + 1); | ||
| table.push(vec![ | ||
| "Inference".to_string(), | ||
| "Owner".to_string(), | ||
| "Delta".to_string(), | ||
| "Cost".to_string(), | ||
| "Driver".to_string(), | ||
| ]); | ||
| for d in deltas { | ||
| let inf_label = format!("{}/inf{}", short_turn_label(&d.turn_id), d.inference_idx); | ||
| let owner_label = match &d.owner_rail { | ||
| OwnerRail::Main => "main".to_string(), | ||
| OwnerRail::Subagent { agent_id } => format!("sub:{}", short_agent_label(agent_id)), | ||
| }; | ||
| let delta_label = format_signed_tokens(d.delta_tokens); | ||
| let cost_label = format_usd(d.attributed_cost_usd); | ||
| let driver_label = driver_summary(&d.intervening); | ||
| table.push(vec![ | ||
| inf_label, | ||
| owner_label, | ||
| delta_label, | ||
| cost_label, | ||
| driver_label, | ||
| ]); | ||
| } | ||
| handle.write_all(render_table(&table).as_bytes())?; | ||
| handle.write_all(b"\n")?; | ||
|
|
||
| if explain { | ||
| handle.write_all(b"\n")?; | ||
| for d in deltas { | ||
| let inf_label = format!("{}/inf{}", short_turn_label(&d.turn_id), d.inference_idx); | ||
| let header = format!( | ||
| "{inf_label} — {} steps, prior {} -> current {} tok\n", | ||
| d.intervening.len(), | ||
| format_tokens(d.prior_context_tokens), | ||
| format_tokens(d.current_context_tokens), | ||
| ); | ||
| handle.write_all(header.as_bytes())?; | ||
| for step in &d.intervening { | ||
| let line = format!(" - {}\n", explain_step(step)); | ||
| handle.write_all(line.as_bytes())?; | ||
| } | ||
| } | ||
| } | ||
|
|
||
| handle.write_all( | ||
| b"\n# token / cost figures are approximate (bytes/4 for tool results,\n\ | ||
| # cache-read rate for cost). Compaction rows surface separately and\n\ | ||
| # never appear as negative deltas.\n", | ||
| )?; | ||
| handle.flush()?; | ||
| Ok(()) | ||
| } | ||
|
|
||
| /// Parse the CLI's relative-range `--since` form (`24h`, `7d`, `4w`, `2m`) | ||
| /// into a [`std::time::Duration`]. ISO-timestamp forms are accepted by the | ||
| /// SDK's `normalize_since` elsewhere, but the deltas verb only takes a | ||
| /// relative window today (`ContextDeltaOpts::since: Option<Duration>`). | ||
| /// Unrecognized inputs fall through to `None` — the SDK then applies the | ||
| /// 24h default. | ||
| fn parse_since_duration(s: &str) -> Option<std::time::Duration> { | ||
| if s.is_empty() { | ||
| return None; | ||
| } | ||
| let bytes = s.as_bytes(); | ||
| let unit = *bytes.last()? as char; | ||
| if !matches!(unit, 'h' | 'd' | 'w' | 'm') { | ||
| return None; | ||
| } | ||
| let num = &s[..s.len() - 1]; | ||
| if num.is_empty() || !num.bytes().all(|b| b.is_ascii_digit()) { | ||
| return None; | ||
| } | ||
| let n: u64 = num.parse().ok()?; | ||
| let secs = match unit { | ||
| 'h' => n.checked_mul(3_600)?, | ||
| 'd' => n.checked_mul(86_400)?, | ||
| 'w' => n.checked_mul(7 * 86_400)?, | ||
| 'm' => n.checked_mul(30 * 86_400)?, | ||
| _ => unreachable!(), | ||
| }; | ||
| Some(std::time::Duration::from_secs(secs)) | ||
| } | ||
|
|
||
| fn short_turn_label(turn_id: &str) -> String { | ||
| // Turn ids on Claude are `msg-...` UUIDs; trim to a short prefix | ||
| // for the table. Keep the original for JSON output. Use | ||
| // `chars().take(8)` rather than byte slicing so non-ASCII ids | ||
| // (defensive — Claude ids are ASCII, but the helper is generic) | ||
| // don't panic on a mid-byte cut. | ||
| let trimmed = turn_id.trim_start_matches("msg_"); | ||
| let trimmed = trimmed.trim_start_matches("msg-"); | ||
| let short: String = trimmed.chars().take(8).collect(); | ||
| format!("T{short}") | ||
| } | ||
|
|
||
| fn short_agent_label(agent_id: &str) -> String { | ||
| let trimmed = agent_id.trim_start_matches("agent-"); | ||
| trimmed.chars().take(8).collect() | ||
| } | ||
|
Comment on lines
+517
to
+520
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The fn short_agent_label(agent_id: &str) -> String {
let trimmed = agent_id.trim_start_matches("agent-");
trimmed.chars().take(8).collect()
}
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fixed in cf3c61e — Generated by Claude Code |
||
|
|
||
| fn format_signed_tokens(n: i64) -> String { | ||
| let sign = if n > 0 { | ||
| "+" | ||
| } else if n < 0 { | ||
| "-" | ||
| } else { | ||
| "" | ||
| }; | ||
| format!("{sign}{}", format_tokens(n.unsigned_abs())) | ||
| } | ||
|
Comment on lines
+522
to
+531
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The fn format_signed_tokens(n: i64) -> String {
let sign = if n > 0 {
"+"
} else if n < 0 {
"-"
} else {
""
};
format!("{sign}{}", format_tokens(n.unsigned_abs()))
}
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fixed in cf3c61e — Generated by Claude Code |
||
|
|
||
| fn driver_summary(steps: &[InterveningStep]) -> String { | ||
| if steps.is_empty() { | ||
| return "(no intervening leaves)".to_string(); | ||
| } | ||
| // Largest step by approx_tokens, with a "N steps" suffix when more | ||
| // than one. Compaction rows always win their summary because | ||
| // freeing tokens is the most explanatory signal. | ||
| if let Some(comp) = steps | ||
| .iter() | ||
| .find(|s| matches!(s, InterveningStep::Compaction { .. })) | ||
| { | ||
| return comp.driver_label(); | ||
| } | ||
| let largest = steps | ||
| .iter() | ||
| .max_by_key(|s| s.approx_tokens()) | ||
| .expect("non-empty"); | ||
| let extra = steps.len().saturating_sub(1); | ||
| if extra == 0 { | ||
| largest.driver_label() | ||
| } else { | ||
| format!( | ||
| "{} (+{extra} more step{})", | ||
| largest.driver_label(), | ||
| if extra == 1 { "" } else { "s" } | ||
| ) | ||
| } | ||
| } | ||
|
|
||
| fn explain_step(step: &InterveningStep) -> String { | ||
| match step { | ||
| InterveningStep::ToolResult { | ||
| tool_use_id, | ||
| tool_name, | ||
| approx_tokens, | ||
| approx_bytes, | ||
| truncated, | ||
| } => format!( | ||
| "tool_result {tool_name} (id={tool_use_id}): ~{} tok / {} bytes{}", | ||
| format_tokens(*approx_tokens), | ||
| format_uint(*approx_bytes), | ||
| if *truncated { " [truncated]" } else { "" }, | ||
| ), | ||
| InterveningStep::UserPrompt { | ||
| approx_tokens, | ||
| has_system_reminder, | ||
| } => format!( | ||
| "user prompt: ~{} tok{}", | ||
| format_tokens(*approx_tokens), | ||
| if *has_system_reminder { | ||
| " (with system-reminder)" | ||
| } else { | ||
| "" | ||
| }, | ||
| ), | ||
| InterveningStep::SystemReminder { | ||
| source, | ||
| approx_tokens, | ||
| } => format!( | ||
| "system-reminder ({source:?}): ~{} tok", | ||
| format_tokens(*approx_tokens), | ||
| ), | ||
| InterveningStep::Compaction { tokens_freed } => { | ||
| format!("compaction: -{} tok freed", format_tokens(*tokens_freed)) | ||
| } | ||
| InterveningStep::Other => "other".to_string(), | ||
| } | ||
| } | ||
|
|
||
| #[cfg(test)] | ||
| mod tests { | ||
| use super::*; | ||
|
|
@@ -370,4 +608,58 @@ mod tests { | |
| assert_eq!(format_line_range(7, 11), " 7- 11"); | ||
| assert_eq!(format_line_range(100, 200), " 100- 200"); | ||
| } | ||
|
|
||
| #[test] | ||
| fn short_turn_label_trims_msg_prefix() { | ||
| assert_eq!(short_turn_label("msg_abcdef1234"), "Tabcdef12"); | ||
| assert_eq!(short_turn_label("msg-deadbeef"), "Tdeadbeef"); | ||
| assert_eq!(short_turn_label("xyz"), "Txyz"); | ||
| } | ||
|
|
||
| #[test] | ||
| fn driver_summary_singles_out_compaction() { | ||
| let steps = vec![ | ||
| InterveningStep::ToolResult { | ||
| tool_use_id: "tu-1".into(), | ||
| tool_name: "Bash".into(), | ||
| approx_tokens: 100, | ||
| approx_bytes: 400, | ||
| truncated: false, | ||
| }, | ||
| InterveningStep::Compaction { | ||
| tokens_freed: 5000, | ||
| }, | ||
| ]; | ||
| let s = driver_summary(&steps); | ||
| assert!(s.contains("compaction")); | ||
| } | ||
|
|
||
| #[test] | ||
| fn driver_summary_picks_largest_step() { | ||
| let steps = vec![ | ||
| InterveningStep::ToolResult { | ||
| tool_use_id: "tu-1".into(), | ||
| tool_name: "Bash".into(), | ||
| approx_tokens: 100, | ||
| approx_bytes: 400, | ||
| truncated: false, | ||
| }, | ||
| InterveningStep::ToolResult { | ||
| tool_use_id: "tu-2".into(), | ||
| tool_name: "Read".into(), | ||
| approx_tokens: 5000, | ||
| approx_bytes: 20_000, | ||
| truncated: false, | ||
| }, | ||
| ]; | ||
| let s = driver_summary(&steps); | ||
| assert!(s.contains("Read"), "got {s}"); | ||
| assert!(s.contains("more"), "got {s}"); | ||
| } | ||
|
|
||
| #[test] | ||
| fn format_signed_tokens_handles_positive_and_zero() { | ||
| assert_eq!(format_signed_tokens(0), "0"); | ||
| assert!(format_signed_tokens(5_000).starts_with('+')); | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The
short_turn_labelfunction uses string slicing&trimmed[..8]which can panic if the 8th byte is not a character boundary (e.g., if the string contains multi-byte UTF-8 characters). Using.chars().take(8).collect()is safer and avoids potential panics.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fixed in cf3c61e —
short_turn_labelnow useschars().take(8).collect()so multi-byte UTF-8 ids never panic on a mid-byte cut.Generated by Claude Code