diff --git a/CHANGELOG.md b/CHANGELOG.md index 3ef66a81..5bf2dcf7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ Cross-package release notes for relayburn. Package changelogs contain package-le - `relayburn-ingest` (Rust): port the standalone primitives — `pending_stamps` (binary-compatible with the TS `@relayburn/ingest` wire format), `walk` (`walk_jsonl` / `walk_opencode_sessions`), `watch_loop` (`tokio::time::interval`-driven `WatchController` with graceful stop), and the typed `cursors` module layered on the SQLite ledger's cursor blob. Public verb surface (`ingest_all`, per-harness verbs, `reingest_missing_content`) is wired; per-harness orchestration follow-ups deferred to dedicated sub-issues. (#245) - `relayburn-analyze` (Rust): port the `compare` aggregator — `build_compare_table` for the in-memory `(model, activity)` rollup with per-cell turn / edit / one-shot / priced / cost / cache-hit / median-retries metrics, plus `compare_from_archive` sourced from the SQLite ledger via `Ledger::query_turns`. Public surface: `CompareCell`, `CompareTable`, `CompareTotals`, `CompareOptions`, `CompareCategory`, `DEFAULT_MIN_SAMPLE`, `compare_from_archive`, `CompareFromArchiveResult`. (#269) +- `relayburn-analyze` (Rust): port `subagent_tree` and `claude_md` modules. `build_subagent_tree` / `aggregate_subagent_type_stats` walk per-session subagent invocations (relationship-row substrate with legacy `subagent` fallback) and roll up self/cumulative cost. `parse_claude_md` / `attribute_claude_md` / `build_trim_recommendations` / `render_unified_diff_for_recommendation` produce CLAUDE.md section attribution and trim diffs whose unified-diff format stays byte-aligned with the TS implementation. (#272) ## [1.9.0] - 2026-05-03 diff --git a/crates/relayburn-analyze/src/claude_md.rs b/crates/relayburn-analyze/src/claude_md.rs new file mode 100644 index 00000000..68a4c3e3 --- /dev/null +++ b/crates/relayburn-analyze/src/claude_md.rs @@ -0,0 +1,1003 @@ +//! CLAUDE.md attribution + trim recommendations — Rust port of +//! `packages/analyze/src/claude-md.ts`. +//! +//! Per-session CLAUDE.md cost is attributed to the cacheRead tariff for any +//! turn whose `cacheRead >= totalTokens` (treating CLAUDE.md as cached once +//! the prompt cache is large enough). Section-level cost is split by byte +//! share so Σ section.totalCost ≤ totalCost holds exactly. Trim +//! recommendations re-emit the largest non-preamble sections as a unified +//! diff that hand-applies cleanly. The diff format is byte-aligned with the +//! TS implementation since CLI/MCP consumers may grep on it. + +use std::fs; +use std::io; +use std::path::{Path, PathBuf}; + +use indexmap::IndexMap; +use regex::Regex; +use relayburn_reader::TurnRecord; +use serde::{Deserialize, Serialize}; + +use crate::cost::lookup_model_rate; +use crate::pricing::PricingTable; + +const PER_MILLION: f64 = 1_000_000.0; +const CHARS_PER_TOKEN: u64 = 4; + +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct MarkdownSection { + pub heading: String, + /// 0 for preamble, 1-6 for `#` through `######`. + pub level: u32, + /// 1-indexed. + pub start_line: u64, + /// 1-indexed inclusive. + pub end_line: u64, + pub bytes: u64, + pub tokens: u64, +} + +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct ParsedClaudeMd { + pub path: String, + pub total_lines: u64, + pub bytes: u64, + pub tokens: u64, + pub sections: Vec, + /// 1 or 2; 0 if no headings. + pub grouping_level: u32, +} + +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct SessionClaudeMdCost { + pub session_id: String, + pub cost: f64, + pub riding_turns: u64, + pub total_turns: u64, + pub model: String, +} + +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct SectionCost { + pub file_path: String, + pub section: MarkdownSection, + /// `section.bytes / Σ file.bytes` — additive across sections. + pub token_share: f64, + pub cost_per_session: f64, + pub total_cost: f64, +} + +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct ClaudeMdAttributionResult { + pub total_tokens: u64, + pub total_cost: f64, + pub session_costs: Vec, + pub section_costs: Vec, + pub per_session_avg: f64, + pub per_session_p95: f64, + pub session_count: u64, +} + +#[derive(Debug, Clone)] +pub struct AttributeClaudeMdInput<'a> { + pub files: &'a [ParsedClaudeMd], + pub turns: &'a [TurnRecord], + pub pricing: &'a PricingTable, +} + +pub fn find_claude_md_files(project_path: &Path) -> io::Result> { + let candidates = [ + project_path.join("CLAUDE.md"), + project_path.join(".claude").join("CLAUDE.md"), + ]; + let mut found = Vec::new(); + for c in candidates { + match fs::metadata(&c) { + Ok(m) if m.is_file() => found.push(c), + _ => {} + } + } + Ok(found) +} + +pub fn load_claude_md_file(file_path: &Path) -> io::Result { + let text = fs::read_to_string(file_path)?; + Ok(parse_claude_md(&file_path.to_string_lossy(), &text)) +} + +pub fn parse_claude_md(file_path: &str, text: &str) -> ParsedClaudeMd { + // Normalize CRLF → LF and drop a single trailing newline so `total_lines` + // and per-section `end_line` match what a user sees in an editor. Empty + // text => 0 lines. + let normalized = text.replace("\r\n", "\n"); + let had_trailing_newline = normalized.ends_with('\n'); + let trimmed_end: &str = if had_trailing_newline { + &normalized[..normalized.len() - 1] + } else { + &normalized + }; + let lines: Vec<&str> = if trimmed_end.is_empty() { + Vec::new() + } else { + trimmed_end.split('\n').collect() + }; + let total_lines = lines.len() as u64; + let total_bytes = normalized.len() as u64; + let tokens = if total_bytes == 0 { + 0 + } else { + total_bytes.div_ceil(CHARS_PER_TOKEN) + }; + + let line_bytes: Vec = lines.iter().map(|l| l.len() as u64).collect(); + let line_with_newline_weight = |idx: usize| -> u64 { + let base = line_bytes.get(idx).copied().unwrap_or(0); + let is_last = idx + 1 == lines.len(); + if is_last && !had_trailing_newline { + base + } else { + base + 1 + } + }; + let range_bytes = |start1: u64, end1: u64| -> u64 { + let mut sum = 0u64; + let start = start1.saturating_sub(1) as usize; + let end = end1.saturating_sub(1) as usize; + for i in start..=end { + sum += line_with_newline_weight(i); + } + sum + }; + + let headings = find_headings(&lines); + let grouping_level = if headings.iter().any(|h| h.level == 2) { + 2 + } else if headings.iter().any(|h| h.level == 1) { + 1 + } else { + 0 + }; + + let mut sections: Vec = Vec::new(); + if grouping_level == 0 { + if total_lines > 0 && total_bytes > 0 { + sections.push(MarkdownSection { + heading: "(preamble)".to_string(), + level: 0, + start_line: 1, + end_line: total_lines, + bytes: total_bytes, + tokens, + }); + } + return ParsedClaudeMd { + path: file_path.to_string(), + total_lines, + bytes: total_bytes, + tokens, + sections, + grouping_level, + }; + } + + let group_headings: Vec<&HeadingInfo> = + headings.iter().filter(|h| h.level == grouping_level).collect(); + let first_start = group_headings.first().map(|h| h.line).unwrap_or(total_lines + 1); + if first_start > 1 { + let pb_bytes = range_bytes(1, first_start - 1); + if pb_bytes > 0 { + sections.push(MarkdownSection { + heading: "(preamble)".to_string(), + level: 0, + start_line: 1, + end_line: first_start - 1, + bytes: pb_bytes, + tokens: ceil_div(pb_bytes, CHARS_PER_TOKEN), + }); + } + } + + for i in 0..group_headings.len() { + let h = group_headings[i]; + let next = group_headings.get(i + 1).copied(); + let end_line = match next { + Some(n) => n.line - 1, + None => total_lines, + }; + let sec_bytes = range_bytes(h.line, end_line); + sections.push(MarkdownSection { + heading: h.text.clone(), + level: h.level, + start_line: h.line, + end_line, + bytes: sec_bytes, + tokens: ceil_div(sec_bytes, CHARS_PER_TOKEN), + }); + } + + ParsedClaudeMd { + path: file_path.to_string(), + total_lines, + bytes: total_bytes, + tokens, + sections, + grouping_level, + } +} + +fn ceil_div(a: u64, b: u64) -> u64 { + if a == 0 { + 0 + } else { + a.div_ceil(b) + } +} + +#[derive(Debug, Clone)] +struct HeadingInfo { + line: u64, + level: u32, + text: String, +} + +fn find_headings(lines: &[&str]) -> Vec { + let open_re = Regex::new(r"^(`{3,}|~{3,})").unwrap(); + let heading_re = Regex::new(r"^(#{1,6})\s+(.*\S)\s*$").unwrap(); + let mut out = Vec::new(); + let mut fence_char: Option = None; + let mut fence_len: usize = 0; + for (i, line) in lines.iter().enumerate() { + let trimmed = line.trim(); + if fence_char.is_none() { + if let Some(m) = open_re.captures(trimmed) { + let s = m.get(1).unwrap().as_str(); + fence_char = s.chars().next(); + fence_len = s.len(); + continue; + } + } else { + let ch = fence_char.unwrap(); + // Closing fence: a run of the same char at least as long as the + // opener, followed only by whitespace. + if matches_close_fence(trimmed, ch, fence_len) { + fence_char = None; + fence_len = 0; + } + continue; + } + if let Some(m) = heading_re.captures(line) { + let hashes = m.get(1).unwrap().as_str(); + let body = m.get(2).unwrap().as_str(); + out.push(HeadingInfo { + line: (i as u64) + 1, + level: hashes.len() as u32, + text: format!("{hashes} {body}"), + }); + } + } + out +} + +fn matches_close_fence(s: &str, ch: char, min_len: usize) -> bool { + let mut chars = s.chars(); + let mut run = 0usize; + while let Some(c) = chars.clone().next() { + if c == ch { + run += 1; + chars.next(); + } else { + break; + } + } + if run < min_len { + return false; + } + chars.all(|c| c.is_whitespace()) +} + +pub fn attribute_claude_md(input: &AttributeClaudeMdInput<'_>) -> ClaudeMdAttributionResult { + let total_tokens: u64 = input.files.iter().map(|f| f.tokens).sum(); + if total_tokens == 0 { + return ClaudeMdAttributionResult { + total_tokens: 0, + total_cost: 0.0, + session_costs: Vec::new(), + section_costs: Vec::new(), + per_session_avg: 0.0, + per_session_p95: 0.0, + session_count: 0, + }; + } + + let mut by_session: IndexMap> = IndexMap::new(); + for t in input.turns { + by_session.entry(t.session_id.clone()).or_default().push(t); + } + + let mut session_costs: Vec = Vec::new(); + let mut total_cost = 0.0_f64; + for (session_id, turns) in by_session { + let mut turns = turns; + turns.sort_by_key(|t| t.turn_index); + let mut cost = 0.0_f64; + let mut riding_turns: u64 = 0; + let mut model_counts: IndexMap = IndexMap::new(); + for t in &turns { + let Some(rate) = lookup_model_rate(&t.model, input.pricing) else { + continue; + }; + *model_counts.entry(t.model.clone()).or_insert(0) += 1; + if t.usage.cache_read < total_tokens { + continue; + } + cost += (total_tokens as f64 / PER_MILLION) * rate.cache_read; + riding_turns += 1; + } + let dominant = pick_dominant_model(&model_counts); + session_costs.push(SessionClaudeMdCost { + session_id, + cost, + riding_turns, + total_turns: turns.len() as u64, + model: dominant, + }); + total_cost += cost; + } + + let mut session_cost_values: Vec = session_costs.iter().map(|s| s.cost).collect(); + session_cost_values.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal)); + let per_session_avg = if session_cost_values.is_empty() { + 0.0 + } else { + session_cost_values.iter().sum::() / session_cost_values.len() as f64 + }; + let per_session_p95 = percentile(&session_cost_values, 0.95); + + let total_bytes: u64 = input.files.iter().map(|f| f.bytes).sum(); + let mut section_costs: Vec = Vec::new(); + for f in input.files { + for section in &f.sections { + let token_share = if total_bytes > 0 { + section.bytes as f64 / total_bytes as f64 + } else { + 0.0 + }; + let total_sec_cost = total_cost * token_share; + let per_session_sec_cost = per_session_avg * token_share; + section_costs.push(SectionCost { + file_path: f.path.clone(), + section: section.clone(), + token_share, + cost_per_session: per_session_sec_cost, + total_cost: total_sec_cost, + }); + } + } + section_costs.sort_by(|a, b| { + b.total_cost + .partial_cmp(&a.total_cost) + .unwrap_or(std::cmp::Ordering::Equal) + }); + + let session_count = session_costs.len() as u64; + ClaudeMdAttributionResult { + total_tokens, + total_cost, + session_costs, + section_costs, + per_session_avg, + per_session_p95, + session_count, + } +} + +fn pick_dominant_model(counts: &IndexMap) -> String { + let mut best_model = String::new(); + let mut best_count: i64 = -1; + for (m, c) in counts { + let c = *c as i64; + if c > best_count { + best_model = m.clone(); + best_count = c; + } + } + best_model +} + +fn percentile(sorted: &[f64], p: f64) -> f64 { + if sorted.is_empty() { + return 0.0; + } + if sorted.len() == 1 { + return sorted[0]; + } + let raw = (p * sorted.len() as f64).ceil() as i64 - 1; + let idx = raw.clamp(0, sorted.len() as i64 - 1) as usize; + sorted[idx] +} + +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct TrimRecommendation { + pub file_path: String, + pub section: MarkdownSection, + pub projected_savings_per_session: f64, + pub projected_savings_across_window: f64, + pub token_share: f64, +} + +pub fn build_trim_recommendations( + attribution: &ClaudeMdAttributionResult, + top_n: usize, +) -> Vec { + attribution + .section_costs + .iter() + .filter(|s| s.section.level > 0) + .take(top_n) + .map(|s| TrimRecommendation { + file_path: s.file_path.clone(), + section: s.section.clone(), + projected_savings_per_session: s.cost_per_session, + projected_savings_across_window: s.total_cost, + token_share: s.token_share, + }) + .collect() +} + +pub fn render_unified_diff_for_recommendation( + file_path: &str, + file_text: &str, + rec: &TrimRecommendation, + base_dir: Option<&Path>, +) -> String { + let normalized = file_text.replace("\r\n", "\n"); + let had_trailing = normalized.ends_with('\n'); + let trimmed_end: &str = if had_trailing { + &normalized[..normalized.len() - 1] + } else { + &normalized + }; + let lines: Vec<&str> = if trimmed_end.is_empty() { + Vec::new() + } else { + trimmed_end.split('\n').collect() + }; + let start = rec.section.start_line as usize; + let end = rec.section.end_line as usize; + let removed: Vec<&str> = lines + .iter() + .copied() + .skip(start - 1) + .take(end - (start - 1)) + .collect(); + let display = to_posix_relative(file_path, base_dir); + let header_a = format!("--- a/{display}"); + let header_b = format!("+++ b/{display}"); + let hunk = format!("@@ -{},{} +{},0 @@", start, removed.len(), start); + let body = removed + .iter() + .map(|l| format!("-{l}")) + .collect::>() + .join("\n"); + format!( + "# TRIM: {heading}\n# projected savings per session: ${ps:.4}\n# projected savings across window: ${pw:.4}\n{header_a}\n{header_b}\n{hunk}\n{body}", + heading = rec.section.heading, + ps = rec.projected_savings_per_session, + pw = rec.projected_savings_across_window, + ) +} + +fn to_posix_relative(file_path: &str, base_dir: Option<&Path>) -> String { + let path = Path::new(file_path); + let mut p: PathBuf = path.to_path_buf(); + if let Some(base) = base_dir { + if let Ok(rel) = path.strip_prefix(base) { + if !rel.as_os_str().is_empty() && !rel.starts_with("..") { + p = rel.to_path_buf(); + } + } + } + let s = p.to_string_lossy().replace(std::path::MAIN_SEPARATOR, "/"); + // Strip leading slashes so headers aren't `--- a//abs/path`. + s.trim_start_matches('/').to_string() +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::pricing::load_builtin_pricing; + use relayburn_reader::{SourceKind, ToolCall, TurnRecord, Usage}; + use std::fs; + use tempfile::TempDir; + + fn make_turn(session_id: &str, message_id: &str, turn_index: u64, usage: Usage) -> TurnRecord { + TurnRecord { + v: 1, + source: SourceKind::ClaudeCode, + session_id: session_id.into(), + session_path: None, + message_id: message_id.into(), + turn_index, + ts: "2026-04-23T00:00:00.000Z".into(), + model: "claude-sonnet-4-6".into(), + project: None, + project_key: None, + usage, + tool_calls: Vec::::new(), + files_touched: None, + subagent: None, + stop_reason: None, + activity: None, + retries: None, + has_edits: None, + fidelity: None, + } + } + + fn zero_usage() -> Usage { + Usage { + input: 0, + output: 0, + reasoning: 0, + cache_read: 0, + cache_create_5m: 0, + cache_create_1h: 0, + } + } + + #[test] + fn returns_a_single_preamble_section_for_a_file_with_no_headings() { + let parsed = parse_claude_md("/p/CLAUDE.md", "just a paragraph\nwith some content"); + assert_eq!(parsed.sections.len(), 1); + assert_eq!(parsed.sections[0].level, 0); + assert_eq!(parsed.sections[0].heading, "(preamble)"); + assert_eq!(parsed.grouping_level, 0); + } + + #[test] + fn groups_by_h2_when_h2_sections_exist_treating_leading_content_as_preamble() { + let text = [ + "# Title", + "intro paragraph", + "", + "## Architecture", + "arch line 1", + "arch line 2", + "", + "## Testing", + "testing line 1", + ] + .join("\n"); + let parsed = parse_claude_md("/p/CLAUDE.md", &text); + assert_eq!(parsed.grouping_level, 2); + assert_eq!(parsed.sections.len(), 3); + assert_eq!(parsed.sections[0].level, 0); + assert_eq!(parsed.sections[1].heading, "## Architecture"); + assert_eq!(parsed.sections[2].heading, "## Testing"); + assert_eq!(parsed.sections[1].start_line, 4); + assert_eq!(parsed.sections[1].end_line, 7); + assert_eq!(parsed.sections[2].start_line, 8); + assert_eq!(parsed.sections[2].end_line, 9); + } + + #[test] + fn groups_by_h1_when_no_h2_exists() { + let text = ["# Section A", "a body", "# Section B", "b body"].join("\n"); + let parsed = parse_claude_md("/p/CLAUDE.md", &text); + assert_eq!(parsed.grouping_level, 1); + assert_eq!(parsed.sections.len(), 2); + assert_eq!(parsed.sections[0].heading, "# Section A"); + assert_eq!(parsed.sections[1].heading, "# Section B"); + } + + #[test] + fn ignores_headings_inside_fenced_code_blocks() { + let text = [ + "## Real heading", + "body", + "", + "```", + "## not a heading", + "```", + "", + "## Another real heading", + ] + .join("\n"); + let parsed = parse_claude_md("/p/CLAUDE.md", &text); + assert_eq!(parsed.sections.len(), 2); + assert_eq!(parsed.sections[0].heading, "## Real heading"); + assert_eq!(parsed.sections[1].heading, "## Another real heading"); + } + + #[test] + fn a_python_line_inside_a_3_backtick_fence_does_not_close_the_fence() { + let text = [ + "```", + "## inside block", + "````python", + "## should-be-inside", + "```", + "## should-be-outside", + ] + .join("\n"); + let parsed = parse_claude_md("/p/CLAUDE.md", &text); + let headings: Vec<&str> = parsed + .sections + .iter() + .filter(|s| s.level > 0) + .map(|s| s.heading.as_str()) + .collect(); + assert_eq!(headings, vec!["## should-be-outside"]); + } + + #[test] + fn does_not_count_a_trailing_newline_as_an_extra_line() { + let parsed = parse_claude_md("/p/CLAUDE.md", "## Section\nbody\n"); + assert_eq!(parsed.total_lines, 2); + assert_eq!(parsed.sections[0].end_line, 2); + } + + #[test] + fn normalizes_crlf_line_endings() { + let parsed = parse_claude_md("/p/CLAUDE.md", "## A\r\nbody\r\n## B\r\nb\r\n"); + assert_eq!(parsed.sections.len(), 2); + assert_eq!(parsed.sections[0].heading, "## A"); + assert_eq!(parsed.sections[1].heading, "## B"); + } + + #[test] + fn returns_zero_sections_for_empty_input() { + let parsed = parse_claude_md("/p/CLAUDE.md", ""); + assert_eq!(parsed.total_lines, 0); + assert_eq!(parsed.sections.len(), 0); + } + + #[test] + fn attributes_per_turn_cost_within_10_pct_of_hand_computed_truth() { + let pricing = load_builtin_pricing(); + let rate = pricing.get("claude-sonnet-4-6").unwrap().clone(); + let mut text = String::from("# Title\n"); + text.push_str(&"x".repeat(4000 - 8)); + let parsed = parse_claude_md("/p/CLAUDE.md", &text); + + let session_id = "s-cm-1"; + let mut turns: Vec = Vec::new(); + for i in 0..5 { + turns.push(make_turn( + session_id, + &format!("m-{i}"), + i, + Usage { + input: 50, + output: 30, + reasoning: 0, + cache_read: parsed.tokens + 5000, + cache_create_5m: 0, + cache_create_1h: 0, + }, + )); + } + let files = vec![parsed.clone()]; + let result = attribute_claude_md(&AttributeClaudeMdInput { + files: &files, + turns: &turns, + pricing: &pricing, + }); + let expected = 5.0 * (parsed.tokens as f64 / 1_000_000.0) * rate.cache_read; + assert!( + (result.total_cost - expected).abs() <= expected * 0.10, + "total={} expected={} diff>10%", + result.total_cost, + expected, + ); + assert_eq!(result.session_count, 1); + assert_eq!(result.session_costs[0].riding_turns, 5); + } + + #[test] + fn section_cost_is_proportional_to_its_token_share() { + let pricing = load_builtin_pricing(); + let mut text = String::new(); + text.push_str("## Big\n"); + text.push_str(&"x".repeat(8000)); + text.push_str("\n## Small\n"); + text.push_str(&"x".repeat(2000)); + let parsed = parse_claude_md("/p/CLAUDE.md", &text); + let session_id = "s-cm-sec"; + let mut turns: Vec = Vec::new(); + for i in 0..3 { + turns.push(make_turn( + session_id, + &format!("m-{i}"), + i, + Usage { + input: 50, + output: 10, + reasoning: 0, + cache_read: parsed.tokens + 1000, + cache_create_5m: 0, + cache_create_1h: 0, + }, + )); + } + let files = vec![parsed]; + let result = attribute_claude_md(&AttributeClaudeMdInput { + files: &files, + turns: &turns, + pricing: &pricing, + }); + let big = result + .section_costs + .iter() + .find(|s| s.section.heading == "## Big") + .unwrap(); + let small = result + .section_costs + .iter() + .find(|s| s.section.heading == "## Small") + .unwrap(); + assert!(big.total_cost > small.total_cost); + let ratio = big.total_cost / small.total_cost; + let token_ratio = big.section.tokens as f64 / small.section.tokens as f64; + assert!((ratio - token_ratio).abs() / token_ratio < 0.05); + } + + #[test] + fn skips_turns_where_cache_read_is_below_claude_md_size() { + let pricing = load_builtin_pricing(); + let mut text = String::from("## Big\n"); + text.push_str(&"x".repeat(40_000)); + let parsed = parse_claude_md("/p/CLAUDE.md", &text); + let session_id = "s-cm-skip"; + let turns = vec![ + make_turn( + session_id, + "m0", + 0, + Usage { + input: 5000, + output: 10, + reasoning: 0, + cache_read: 100, + cache_create_5m: 0, + cache_create_1h: 0, + }, + ), + make_turn( + session_id, + "m1", + 1, + Usage { + input: 50, + output: 10, + reasoning: 0, + cache_read: parsed.tokens + 500, + cache_create_5m: 0, + cache_create_1h: 0, + }, + ), + ]; + let files = vec![parsed]; + let result = attribute_claude_md(&AttributeClaudeMdInput { + files: &files, + turns: &turns, + pricing: &pricing, + }); + assert_eq!(result.session_costs[0].riding_turns, 1); + } + + #[test] + fn returns_zero_cost_when_claude_md_is_empty() { + let parsed = parse_claude_md("/p/CLAUDE.md", ""); + let pricing = PricingTable::new(); + let turns = vec![make_turn("s", "m", 0, zero_usage())]; + let files = vec![parsed]; + let result = attribute_claude_md(&AttributeClaudeMdInput { + files: &files, + turns: &turns, + pricing: &pricing, + }); + assert_eq!(result.total_cost, 0.0); + assert_eq!(result.session_costs.len(), 0); + } + + #[test] + fn includes_zero_cost_sessions_in_session_count_so_avg_p95_are_not_biased_upward() { + let pricing = load_builtin_pricing(); + let mut text = String::from("## Body\n"); + text.push_str(&"x".repeat(4000)); + let parsed = parse_claude_md("/p/CLAUDE.md", &text); + let turns = vec![ + make_turn( + "s-A", + "m", + 0, + Usage { + input: 10, + output: 10, + reasoning: 0, + cache_read: parsed.tokens + 500, + cache_create_5m: 0, + cache_create_1h: 0, + }, + ), + make_turn( + "s-B", + "m", + 0, + Usage { + input: 500, + output: 10, + reasoning: 0, + cache_read: 0, + cache_create_5m: 0, + cache_create_1h: 0, + }, + ), + ]; + let files = vec![parsed]; + let result = attribute_claude_md(&AttributeClaudeMdInput { + files: &files, + turns: &turns, + pricing: &pricing, + }); + assert_eq!(result.session_count, 2); + let b = result + .session_costs + .iter() + .find(|s| s.session_id == "s-B") + .unwrap(); + assert_eq!(b.cost, 0.0); + assert_eq!(b.riding_turns, 0); + let a = result + .session_costs + .iter() + .find(|s| s.session_id == "s-A") + .unwrap(); + assert!((result.per_session_avg - a.cost / 2.0).abs() < 1e-9); + } + + #[test] + fn sum_of_section_costs_stays_below_or_equal_total_cost() { + let pricing = load_builtin_pricing(); + let mut parts = String::new(); + for i in 0..20 { + parts.push_str(&format!("## Section {i}\n{}\n", "x".repeat(123))); + } + let parsed = parse_claude_md("/p/CLAUDE.md", &parts); + let mut turns: Vec = Vec::new(); + for i in 0..5 { + turns.push(make_turn( + "s-sum", + &format!("m{i}"), + i, + Usage { + input: 10, + output: 10, + reasoning: 0, + cache_read: parsed.tokens + 500, + cache_create_5m: 0, + cache_create_1h: 0, + }, + )); + } + let files = vec![parsed]; + let result = attribute_claude_md(&AttributeClaudeMdInput { + files: &files, + turns: &turns, + pricing: &pricing, + }); + let sum: f64 = result.section_costs.iter().map(|s| s.total_cost).sum(); + assert!(sum <= result.total_cost + 1e-9); + let sum_shares: f64 = result.section_costs.iter().map(|s| s.token_share).sum(); + assert!((sum_shares - 1.0).abs() < 1e-9); + } + + #[test] + fn finds_root_claude_md_and_dot_claude_claude_md() { + let tmp = TempDir::new().unwrap(); + fs::write(tmp.path().join("CLAUDE.md"), "# Root").unwrap(); + fs::create_dir_all(tmp.path().join(".claude")).unwrap(); + fs::write(tmp.path().join(".claude").join("CLAUDE.md"), "# Nested").unwrap(); + let files = find_claude_md_files(tmp.path()).unwrap(); + assert_eq!(files.len(), 2); + assert!(files.iter().any(|f| { + f.file_name().unwrap() == "CLAUDE.md" + && f.parent().unwrap().file_name().map(|s| s != ".claude").unwrap_or(true) + })); + assert!(files.iter().any(|f| { + f.file_name().unwrap() == "CLAUDE.md" + && f.parent().unwrap().file_name().map(|s| s == ".claude").unwrap_or(false) + })); + } + + #[test] + fn loads_parsed_content_via_load_claude_md_file() { + let tmp = TempDir::new().unwrap(); + let target = tmp.path().join("CLAUDE.md"); + fs::write(&target, "## Section\nbody").unwrap(); + let parsed = load_claude_md_file(&target).unwrap(); + assert_eq!(parsed.sections[0].heading, "## Section"); + } + + #[test] + fn emits_a_trim_diff_for_the_largest_section_that_hand_applies_cleanly() { + let pricing = load_builtin_pricing(); + let mut text = String::new(); + text.push_str("## Big\n"); + text.push_str(&"x".repeat(8000)); + text.push_str("\n## Small\n"); + text.push_str(&"x".repeat(2000)); + let parsed = parse_claude_md("/p/CLAUDE.md", &text); + let turns = vec![make_turn( + "s-cm-advise", + "m0", + 0, + Usage { + input: 50, + output: 10, + reasoning: 0, + cache_read: parsed.tokens + 1000, + cache_create_5m: 0, + cache_create_1h: 0, + }, + )]; + let files = vec![parsed]; + let attribution = attribute_claude_md(&AttributeClaudeMdInput { + files: &files, + turns: &turns, + pricing: &pricing, + }); + let recs = build_trim_recommendations(&attribution, 1); + assert_eq!(recs.len(), 1); + assert_eq!(recs[0].section.heading, "## Big"); + assert!(recs[0].token_share > 0.0); + let diff = render_unified_diff_for_recommendation("/p/CLAUDE.md", &text, &recs[0], None); + assert!(diff.contains("# TRIM: ## Big")); + assert!(diff.contains("--- a/")); + assert!(diff.contains("+++ b/")); + assert!(diff.contains("@@ -1,2 +1,0 @@")); + } + + #[test] + fn emits_a_project_relative_posix_path_in_the_diff_header_when_base_dir_is_given() { + let pricing = load_builtin_pricing(); + let text = "## Only\nbody\n"; + let parsed = parse_claude_md("/home/u/repo/CLAUDE.md", text); + let turns = vec![make_turn( + "s", + "m", + 0, + Usage { + input: 10, + output: 10, + reasoning: 0, + cache_read: parsed.tokens + 100, + cache_create_5m: 0, + cache_create_1h: 0, + }, + )]; + let files = vec![parsed]; + let attribution = attribute_claude_md(&AttributeClaudeMdInput { + files: &files, + turns: &turns, + pricing: &pricing, + }); + let recs = build_trim_recommendations(&attribution, 1); + let diff = render_unified_diff_for_recommendation( + "/home/u/repo/CLAUDE.md", + text, + &recs[0], + Some(Path::new("/home/u/repo")), + ); + assert!(diff.contains("--- a/CLAUDE.md")); + assert!(diff.contains("+++ b/CLAUDE.md")); + assert!(!diff.contains("a//")); + } +} diff --git a/crates/relayburn-analyze/src/lib.rs b/crates/relayburn-analyze/src/lib.rs index 21e2b396..8ffd08ad 100644 --- a/crates/relayburn-analyze/src/lib.rs +++ b/crates/relayburn-analyze/src/lib.rs @@ -15,6 +15,7 @@ //! 1e-9 USD precision contract that the future `overhead` sub-issue gates //! against. +pub mod claude_md; pub mod compare; pub mod compare_archive; pub mod cost; @@ -24,7 +25,14 @@ pub mod pricing; pub mod provider; pub mod provider_reattribution; pub mod quality; +pub mod subagent_tree; +pub use claude_md::{ + attribute_claude_md, build_trim_recommendations, find_claude_md_files, load_claude_md_file, + parse_claude_md, render_unified_diff_for_recommendation, AttributeClaudeMdInput, + ClaudeMdAttributionResult, MarkdownSection, ParsedClaudeMd, SectionCost, SessionClaudeMdCost, + TrimRecommendation, +}; pub use compare::{ build_compare_table, CompareCategory, CompareCell, CompareOptions, CompareTable, CompareTotals, DEFAULT_MIN_SAMPLE, @@ -65,3 +73,7 @@ pub use quality::{ compute_one_shot_rate, compute_quality, infer_outcome, ComputeQualityOptions, OneShotMetrics, OutcomeConfidence, OutcomeLabel, OutcomeReason, QualityResult, SessionOutcome, }; +pub use subagent_tree::{ + aggregate_subagent_type_stats, build_subagent_tree, BuildSubagentTreeOptions, SubagentTreeNode, + SubagentTypeStats, +}; diff --git a/crates/relayburn-analyze/src/subagent_tree.rs b/crates/relayburn-analyze/src/subagent_tree.rs new file mode 100644 index 00000000..4d1a43db --- /dev/null +++ b/crates/relayburn-analyze/src/subagent_tree.rs @@ -0,0 +1,1198 @@ +//! Subagent tree / per-type rollups — Rust port of +//! `packages/analyze/src/subagent-tree.ts`. +//! +//! Walks the parent-uuid chains in `TurnRecord.subagent` (or +//! `SessionRelationshipRecord` rows when supplied) to build one tree per +//! session, with cost rolled up from leaves. The relationship-row path is +//! the primary substrate for newer ingests; the legacy path falls back to +//! `TurnRecord.subagent` only. + +use indexmap::{IndexMap, IndexSet}; +use relayburn_reader::{RelationshipType, SessionRelationshipRecord, TurnRecord}; +use serde::{Deserialize, Serialize}; + +use crate::cost::cost_for_turn; +use crate::pricing::PricingTable; + +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct SubagentTreeNode { + pub node_id: String, + pub label: String, + pub relationship_type: RelationshipType, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub subagent_type: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub description: Option, + pub models: Vec, + pub self_turns: u64, + pub self_cost: f64, + pub cumulative_turns: u64, + pub cumulative_cost: f64, + pub depth: i32, + pub children: Vec, +} + +#[derive(Debug, Clone)] +pub struct BuildSubagentTreeOptions<'a> { + pub pricing: &'a PricingTable, + pub relationships: Option<&'a [SessionRelationshipRecord]>, +} + +impl<'a> BuildSubagentTreeOptions<'a> { + pub fn new(pricing: &'a PricingTable) -> Self { + Self { + pricing, + relationships: None, + } + } + + pub fn with_relationships(mut self, rels: &'a [SessionRelationshipRecord]) -> Self { + self.relationships = Some(rels); + self + } +} + +/// Build per-session subagent trees. Each session yields one tree whose root +/// is the main thread. Children are subagent invocations (grouped by +/// `subagent.agentId`), nested by `parentAgentId`. When relationship rows are +/// supplied, they are the primary substrate; per-turn `subagent` fields +/// attach turn cost and fill legacy gaps. +pub fn build_subagent_tree( + turns: &[TurnRecord], + opts: &BuildSubagentTreeOptions<'_>, +) -> IndexMap { + if let Some(rels) = opts.relationships { + if !rels.is_empty() { + return build_relationship_trees(turns, rels, opts.pricing); + } + } + build_legacy_subagent_trees(turns, opts.pricing) +} + +#[derive(Debug)] +struct MutableNode { + node_id: String, + label: String, + relationship_type: RelationshipType, + subagent_type: Option, + description: Option, + self_turns: u64, + self_cost: f64, + cumulative_turns: u64, + cumulative_cost: f64, + depth: i32, + children: Vec, +} + +impl MutableNode { + fn new(id: String, label: String, relationship_type: RelationshipType) -> Self { + Self { + node_id: id, + label, + relationship_type, + subagent_type: None, + description: None, + self_turns: 0, + self_cost: 0.0, + cumulative_turns: 0, + cumulative_cost: 0.0, + depth: -1, + children: Vec::new(), + } + } +} + +#[derive(Debug, Default)] +struct GraphState { + alias_by_id: IndexMap, + node_by_id: IndexMap, + models_by_node: IndexMap>, + parent_by_node: IndexMap, +} + +fn build_legacy_subagent_trees( + turns: &[TurnRecord], + pricing: &PricingTable, +) -> IndexMap { + let mut by_session: IndexMap> = IndexMap::new(); + for t in turns { + by_session.entry(t.session_id.clone()).or_default().push(t); + } + let mut out: IndexMap = IndexMap::new(); + for (session_id, session_turns) in by_session { + let root = build_session_tree(&session_id, &session_turns, pricing); + out.insert(session_id, root); + } + out +} + +fn build_session_tree( + session_id: &str, + turns: &[&TurnRecord], + pricing: &PricingTable, +) -> SubagentTreeNode { + let mut nodes: IndexMap = IndexMap::new(); + let mut models: IndexMap> = IndexMap::new(); + nodes.insert( + session_id.to_string(), + MutableNode { + depth: 0, + ..MutableNode::new(session_id.to_string(), "main".to_string(), RelationshipType::Root) + }, + ); + models.insert(session_id.to_string(), IndexSet::new()); + + let unresolved_id = format!("{session_id}:__unresolved"); + let mut unresolved_created = false; + + for t in turns { + let cost = cost_for_turn(t, pricing).map(|c| c.total).unwrap_or(0.0); + let Some(sub) = &t.subagent else { + let node = nodes.get_mut(session_id).unwrap(); + node.self_turns += 1; + node.self_cost += cost; + if !t.model.is_empty() { + models.get_mut(session_id).unwrap().insert(t.model.clone()); + } + continue; + }; + let Some(agent_id) = &sub.agent_id else { + if !unresolved_created { + let mut un = MutableNode::new( + unresolved_id.clone(), + "(unresolved)".to_string(), + RelationshipType::Subagent, + ); + un.depth = 1; + nodes.insert(unresolved_id.clone(), un); + models.insert(unresolved_id.clone(), IndexSet::new()); + nodes + .get_mut(session_id) + .unwrap() + .children + .push(unresolved_id.clone()); + unresolved_created = true; + } + let n = nodes.get_mut(&unresolved_id).unwrap(); + n.self_turns += 1; + n.self_cost += cost; + if !t.model.is_empty() { + models + .get_mut(&unresolved_id) + .unwrap() + .insert(t.model.clone()); + } + continue; + }; + if !nodes.contains_key(agent_id) { + let mut n = MutableNode::new( + agent_id.clone(), + sub.subagent_type + .clone() + .unwrap_or_else(|| "(unknown)".to_string()), + RelationshipType::Subagent, + ); + n.subagent_type = sub.subagent_type.clone(); + n.description = sub.description.clone(); + nodes.insert(agent_id.clone(), n); + models.insert(agent_id.clone(), IndexSet::new()); + } else { + let n = nodes.get_mut(agent_id).unwrap(); + if n.subagent_type.is_none() { + if let Some(st) = &sub.subagent_type { + n.subagent_type = Some(st.clone()); + if n.label == "(unknown)" { + n.label = st.clone(); + } + } + } + if n.description.is_none() { + if let Some(d) = &sub.description { + n.description = Some(d.clone()); + } + } + } + let n = nodes.get_mut(agent_id).unwrap(); + n.self_turns += 1; + n.self_cost += cost; + if !t.model.is_empty() { + models.get_mut(agent_id).unwrap().insert(t.model.clone()); + } + } + + // Build parent map (insertion order = first-encounter order in turns). + let mut parent_by_node: IndexMap = IndexMap::new(); + for t in turns { + let Some(sub) = &t.subagent else { continue }; + let Some(agent_id) = &sub.agent_id else { + continue; + }; + if parent_by_node.contains_key(agent_id) { + continue; + } + let pid = sub + .parent_agent_id + .clone() + .unwrap_or_else(|| session_id.to_string()); + parent_by_node.insert(agent_id.clone(), pid); + } + + // Attach children, redirecting cycles / self-parents to the session root. + for (id, parent_id) in parent_by_node.clone() { + if !nodes.contains_key(&id) { + continue; + } + let resolved = resolve_parent_or_root(&id, &parent_id, &parent_by_node, session_id); + let parent_target = if nodes.contains_key(&resolved) { + resolved + } else { + session_id.to_string() + }; + let parent_node = nodes.get_mut(&parent_target).unwrap(); + parent_node.children.push(id); + } + + // BFS depth assignment. + assign_depth(&mut nodes, session_id); + + fold_cumulative(&mut nodes, session_id); + sort_tree(&mut nodes, session_id); + + materialize_session_tree(&nodes, &models, session_id) +} + +fn build_relationship_trees( + turns: &[TurnRecord], + relationships: &[SessionRelationshipRecord], + pricing: &PricingTable, +) -> IndexMap { + let mut state = GraphState { + alias_by_id: build_relationship_aliases(turns, relationships), + ..GraphState::default() + }; + + for r in relationships { + let id = canonical_id(&state, &relationship_node_id(r)); + ensure_node(&mut state, &id, &label_for_relationship(r), r.relationship_type); + apply_relationship_metadata(&mut state, &id, r); + if r.relationship_type == RelationshipType::Root { + continue; + } + let Some(related) = &r.related_session_id else { + continue; + }; + let parent_id = canonical_id(&state, related); + ensure_node(&mut state, &parent_id, &parent_id, RelationshipType::Root); + if !state.parent_by_node.contains_key(&id) { + state.parent_by_node.insert(id.clone(), parent_id); + } + } + + add_legacy_subagent_gaps(&mut state, turns); + ensure_turn_session_roots(&mut state, turns); + attach_graph_children(&mut state); + attach_turn_costs(&mut state, turns, pricing); + + let child_ids = collect_attached_child_ids(&state); + let root_ids: Vec = state + .node_by_id + .keys() + .filter(|id| !child_ids.contains(*id)) + .cloned() + .collect(); + + let mut out: IndexMap = IndexMap::new(); + for id in root_ids { + finalize_tree(&mut state, &id); + let tree = materialize_session_tree(&state.node_by_id, &state.models_by_node, &id); + out.insert(id, tree); + } + out +} + +fn build_relationship_aliases( + turns: &[TurnRecord], + relationships: &[SessionRelationshipRecord], +) -> IndexMap { + let mut sessions_with_native_sidechains: IndexSet = IndexSet::new(); + for t in turns { + if let Some(sub) = &t.subagent { + if sub.agent_id.is_some() { + sessions_with_native_sidechains.insert(t.session_id.clone()); + } + } + } + for r in relationships { + if r.relationship_type == RelationshipType::Subagent { + if let Some(rs) = &r.related_session_id { + if rs == &r.session_id { + sessions_with_native_sidechains.insert(r.session_id.clone()); + } + } + } + } + + let mut aliases: IndexMap = IndexMap::new(); + for r in relationships { + aliases.insert(r.session_id.clone(), r.session_id.clone()); + } + for r in relationships { + if r.relationship_type != RelationshipType::Subagent { + continue; + } + let Some(agent_id) = &r.agent_id else { + aliases.insert(r.session_id.clone(), r.session_id.clone()); + continue; + }; + let target = if sessions_with_native_sidechains.contains(&r.session_id) { + agent_id.clone() + } else { + r.session_id.clone() + }; + aliases.insert(agent_id.clone(), target); + } + aliases +} + +fn relationship_node_id(r: &SessionRelationshipRecord) -> String { + if r.relationship_type == RelationshipType::Subagent { + r.agent_id.clone().unwrap_or_else(|| r.session_id.clone()) + } else { + r.session_id.clone() + } +} + +fn canonical_id(state: &GraphState, id: &str) -> String { + state + .alias_by_id + .get(id) + .cloned() + .unwrap_or_else(|| id.to_string()) +} + +fn ensure_node( + state: &mut GraphState, + id: &str, + label: &str, + relationship_type: RelationshipType, +) { + if !state.node_by_id.contains_key(id) { + state.node_by_id.insert( + id.to_string(), + MutableNode::new(id.to_string(), label.to_string(), relationship_type), + ); + state.models_by_node.insert(id.to_string(), IndexSet::new()); + } +} + +fn label_for_relationship(r: &SessionRelationshipRecord) -> String { + match r.relationship_type { + RelationshipType::Root => "main".to_string(), + RelationshipType::Subagent => r + .subagent_type + .clone() + .unwrap_or_else(|| "(unknown)".to_string()), + _ => r.session_id.clone(), + } +} + +fn apply_relationship_metadata(state: &mut GraphState, id: &str, r: &SessionRelationshipRecord) { + let node = state.node_by_id.get_mut(id).unwrap(); + if r.relationship_type == RelationshipType::Root { + if node.relationship_type == RelationshipType::Root { + node.label = "main".to_string(); + } + return; + } + node.relationship_type = r.relationship_type; + node.label = label_for_relationship(r); + if let Some(st) = &r.subagent_type { + node.subagent_type = Some(st.clone()); + } + if let Some(d) = &r.description { + node.description = Some(d.clone()); + } +} + +fn add_legacy_subagent_gaps(state: &mut GraphState, turns: &[TurnRecord]) { + for t in turns { + let Some(sub) = &t.subagent else { continue }; + let Some(agent_id) = &sub.agent_id else { + continue; + }; + let id = canonical_id(state, agent_id); + let label = sub + .subagent_type + .clone() + .unwrap_or_else(|| "(unknown)".to_string()); + ensure_node(state, &id, &label, RelationshipType::Subagent); + let node = state.node_by_id.get_mut(&id).unwrap(); + if node.relationship_type == RelationshipType::Root { + node.relationship_type = RelationshipType::Subagent; + } + if node.label == "(unknown)" { + if let Some(st) = &sub.subagent_type { + node.label = st.clone(); + } + } + if node.subagent_type.is_none() { + if let Some(st) = &sub.subagent_type { + node.subagent_type = Some(st.clone()); + } + } + if node.description.is_none() { + if let Some(d) = &sub.description { + node.description = Some(d.clone()); + } + } + if state.parent_by_node.contains_key(&id) { + continue; + } + let parent_raw = sub + .parent_agent_id + .clone() + .unwrap_or_else(|| t.session_id.clone()); + let parent_id = canonical_id(state, &parent_raw); + state.parent_by_node.insert(id, parent_id); + } +} + +fn ensure_turn_session_roots(state: &mut GraphState, turns: &[TurnRecord]) { + for t in turns { + let id = canonical_id(state, &t.session_id); + ensure_node(state, &id, "main", RelationshipType::Root); + let node = state.node_by_id.get_mut(&id).unwrap(); + if node.relationship_type == RelationshipType::Root { + node.label = "main".to_string(); + } + } + let parent_ids: Vec = state.parent_by_node.values().cloned().collect(); + for pid in parent_ids { + ensure_node(state, &pid, &pid, RelationshipType::Root); + } +} + +fn attach_graph_children(state: &mut GraphState) { + let parent_map = state.parent_by_node.clone(); + for (id, parent_id) in parent_map.iter() { + if !state.node_by_id.contains_key(id) { + continue; + } + let Some(resolved) = resolve_graph_parent(id, parent_id, &parent_map) else { + continue; + }; + let Some(parent) = state.node_by_id.get_mut(&resolved) else { + continue; + }; + if !parent.children.contains(id) { + parent.children.push(id.clone()); + } + } +} + +fn collect_attached_child_ids(state: &GraphState) -> IndexSet { + let mut out = IndexSet::new(); + for node in state.node_by_id.values() { + for c in &node.children { + out.insert(c.clone()); + } + } + out +} + +fn attach_turn_costs(state: &mut GraphState, turns: &[TurnRecord], pricing: &PricingTable) { + let mut unresolved_by_parent: IndexMap = IndexMap::new(); + for t in turns { + let cost = cost_for_turn(t, pricing).map(|c| c.total).unwrap_or(0.0); + let sub = t.subagent.as_ref(); + if let Some(s) = sub { + if s.agent_id.is_none() { + let parent_id = canonical_id(state, &t.session_id); + let unresolved_id = if let Some(existing) = unresolved_by_parent.get(&parent_id) { + existing.clone() + } else { + let uid = format!("{parent_id}:__unresolved"); + ensure_node(state, &uid, "(unresolved)", RelationshipType::Subagent); + state.parent_by_node.insert(uid.clone(), parent_id.clone()); + if let Some(parent) = state.node_by_id.get_mut(&parent_id) { + if !parent.children.contains(&uid) { + parent.children.push(uid.clone()); + } + } + unresolved_by_parent.insert(parent_id.clone(), uid.clone()); + uid + }; + add_turn_to_node(state, &unresolved_id, t, cost); + continue; + } + } + let id = match sub.and_then(|s| s.agent_id.as_deref()) { + Some(a) => canonical_id(state, a), + None => canonical_id(state, &t.session_id), + }; + let label = sub + .and_then(|s| s.subagent_type.clone()) + .unwrap_or_else(|| "main".to_string()); + let rel = if sub.is_some() { + RelationshipType::Subagent + } else { + RelationshipType::Root + }; + ensure_node(state, &id, &label, rel); + add_turn_to_node(state, &id, t, cost); + } +} + +fn add_turn_to_node(state: &mut GraphState, id: &str, turn: &TurnRecord, cost: f64) { + let Some(node) = state.node_by_id.get_mut(id) else { + return; + }; + node.self_turns += 1; + node.self_cost += cost; + if !turn.model.is_empty() { + let entry = state.models_by_node.entry(id.to_string()).or_default(); + entry.insert(turn.model.clone()); + } +} + +fn finalize_tree(state: &mut GraphState, root_id: &str) { + // BFS depth assignment with cycle protection. + let mut queue: std::collections::VecDeque<(String, i32)> = std::collections::VecDeque::new(); + queue.push_back((root_id.to_string(), 0)); + let mut seen: IndexSet = IndexSet::new(); + while let Some((id, depth)) = queue.pop_front() { + if seen.contains(&id) { + continue; + } + seen.insert(id.clone()); + let children = if let Some(n) = state.node_by_id.get_mut(&id) { + n.depth = depth; + n.children.clone() + } else { + continue; + }; + for c in children { + queue.push_back((c, depth + 1)); + } + } + + fold_cumulative(&mut state.node_by_id, root_id); + sort_tree(&mut state.node_by_id, root_id); +} + +fn assign_depth(nodes: &mut IndexMap, root_id: &str) { + let mut queue: std::collections::VecDeque<(String, i32)> = std::collections::VecDeque::new(); + queue.push_back((root_id.to_string(), 0)); + let mut seen: IndexSet = IndexSet::new(); + while let Some((id, depth)) = queue.pop_front() { + if seen.contains(&id) { + continue; + } + seen.insert(id.clone()); + let children = if let Some(n) = nodes.get_mut(&id) { + n.depth = depth; + n.children.clone() + } else { + continue; + }; + for c in children { + queue.push_back((c, depth + 1)); + } + } +} + +fn fold_cumulative(nodes: &mut IndexMap, root_id: &str) { + let order = topo_post_order(nodes, root_id); + for id in order { + let (self_cost, self_turns, children) = { + let n = nodes.get(&id).unwrap(); + (n.self_cost, n.self_turns, n.children.clone()) + }; + let mut cost = self_cost; + let mut turns = self_turns; + for c in &children { + if let Some(child) = nodes.get(c) { + cost += child.cumulative_cost; + turns += child.cumulative_turns; + } + } + let n = nodes.get_mut(&id).unwrap(); + n.cumulative_cost = cost; + n.cumulative_turns = turns; + } +} + +fn topo_post_order(nodes: &IndexMap, root_id: &str) -> Vec { + let mut order: Vec = Vec::new(); + let mut seen: IndexSet = IndexSet::new(); + fn visit( + nodes: &IndexMap, + id: &str, + seen: &mut IndexSet, + order: &mut Vec, + ) { + if seen.contains(id) { + return; + } + seen.insert(id.to_string()); + if let Some(n) = nodes.get(id) { + for c in n.children.clone() { + visit(nodes, &c, seen, order); + } + } + order.push(id.to_string()); + } + visit(nodes, root_id, &mut seen, &mut order); + order +} + +fn sort_tree(nodes: &mut IndexMap, root_id: &str) { + let order = topo_post_order(nodes, root_id); + for id in order { + let mut children = nodes.get(&id).unwrap().children.clone(); + children.sort_by(|a, b| { + let ca = nodes.get(a).map(|n| n.cumulative_cost).unwrap_or(0.0); + let cb = nodes.get(b).map(|n| n.cumulative_cost).unwrap_or(0.0); + cb.partial_cmp(&ca).unwrap_or(std::cmp::Ordering::Equal) + }); + nodes.get_mut(&id).unwrap().children = children; + } +} + +fn resolve_parent_or_root( + id: &str, + parent_id: &str, + parent_by_node: &IndexMap, + session_id: &str, +) -> String { + if parent_id == id { + return session_id.to_string(); + } + let mut seen: IndexSet = IndexSet::new(); + seen.insert(id.to_string()); + let mut cursor = parent_id.to_string(); + while cursor != session_id { + if seen.contains(&cursor) { + return session_id.to_string(); + } + seen.insert(cursor.clone()); + match parent_by_node.get(&cursor) { + Some(next) => cursor = next.clone(), + None => return parent_id.to_string(), + } + } + parent_id.to_string() +} + +fn resolve_graph_parent( + id: &str, + parent_id: &str, + parent_by_node: &IndexMap, +) -> Option { + if parent_id == id { + return None; + } + let mut seen: IndexSet = IndexSet::new(); + seen.insert(id.to_string()); + let mut cursor = parent_id.to_string(); + while parent_by_node.contains_key(&cursor) { + if seen.contains(&cursor) { + return None; + } + seen.insert(cursor.clone()); + cursor = parent_by_node.get(&cursor).unwrap().clone(); + } + Some(parent_id.to_string()) +} + +fn materialize_session_tree( + nodes: &IndexMap, + models: &IndexMap>, + root_id: &str, +) -> SubagentTreeNode { + let n = nodes.get(root_id).unwrap(); + let mut model_vec: Vec = models + .get(root_id) + .map(|s| s.iter().cloned().collect()) + .unwrap_or_default(); + model_vec.sort(); + let mut children = Vec::with_capacity(n.children.len()); + for c in &n.children { + children.push(materialize_session_tree(nodes, models, c)); + } + SubagentTreeNode { + node_id: n.node_id.clone(), + label: n.label.clone(), + relationship_type: n.relationship_type, + subagent_type: n.subagent_type.clone(), + description: n.description.clone(), + models: model_vec, + self_turns: n.self_turns, + self_cost: n.self_cost, + cumulative_turns: n.cumulative_turns, + cumulative_cost: n.cumulative_cost, + depth: n.depth, + children, + } +} + +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct SubagentTypeStats { + pub subagent_type: String, + pub invocations: u64, + pub turns: u64, + pub total_cost: f64, + pub median_cost: f64, + pub p95_cost: f64, + pub mean_cost: f64, +} + +/// Aggregate subagent invocations across sessions by `subagentType`. An +/// invocation is the unique `(sessionId, agentId)` pair so the same agent id +/// re-used across sessions doesn't collide. +pub fn aggregate_subagent_type_stats( + turns: &[TurnRecord], + opts: &BuildSubagentTreeOptions<'_>, +) -> Vec { + #[derive(Default)] + struct Inv { + ty: String, + turns: u64, + cost: f64, + } + let mut by_invocation: IndexMap = IndexMap::new(); + for t in turns { + let Some(sub) = &t.subagent else { continue }; + let Some(agent_id) = &sub.agent_id else { + continue; + }; + let ty = sub + .subagent_type + .clone() + .unwrap_or_else(|| "(unknown)".to_string()); + let key = format!("{}:{}", t.session_id, agent_id); + let inv = by_invocation.entry(key).or_insert_with(|| Inv { + ty: ty.clone(), + turns: 0, + cost: 0.0, + }); + if inv.ty == "(unknown)" && ty != "(unknown)" { + inv.ty = ty; + } + inv.turns += 1; + inv.cost += cost_for_turn(t, opts.pricing).map(|c| c.total).unwrap_or(0.0); + } + let mut by_type: IndexMap> = IndexMap::new(); + let mut totals_by_type: IndexMap = IndexMap::new(); + for inv in by_invocation.values() { + by_type.entry(inv.ty.clone()).or_default().push(inv.cost); + let entry = totals_by_type.entry(inv.ty.clone()).or_insert((0, 0.0)); + entry.0 += inv.turns; + entry.1 += inv.cost; + } + let mut out: Vec = Vec::new(); + for (ty, mut costs) in by_type { + let (turns, total) = *totals_by_type.get(&ty).unwrap(); + costs.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal)); + let invocations = costs.len() as u64; + out.push(SubagentTypeStats { + subagent_type: ty, + invocations, + turns, + total_cost: total, + median_cost: percentile(&costs, 0.5), + p95_cost: percentile(&costs, 0.95), + mean_cost: if invocations > 0 { + total / invocations as f64 + } else { + 0.0 + }, + }); + } + out.sort_by(|a, b| { + b.total_cost + .partial_cmp(&a.total_cost) + .unwrap_or(std::cmp::Ordering::Equal) + }); + out +} + +fn percentile(sorted: &[f64], p: f64) -> f64 { + if sorted.is_empty() { + return 0.0; + } + let len = sorted.len(); + // Nearest-rank with clamp. + let raw = (p * len as f64).ceil() as i64 - 1; + let rank = raw.clamp(0, len as i64 - 1) as usize; + sorted[rank] +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::pricing::load_builtin_pricing; + use relayburn_reader::{ + RelationshipSourceKind, RelationshipType, SourceKind, Subagent, ToolCall, TurnRecord, Usage, + }; + + fn make_turn( + session_id: &str, + message_id: &str, + model: &str, + turn_index: u64, + source: SourceKind, + subagent: Option, + ) -> TurnRecord { + TurnRecord { + v: 1, + source, + session_id: session_id.into(), + session_path: None, + message_id: message_id.into(), + turn_index, + ts: "2026-04-20T00:00:00.000Z".into(), + model: model.into(), + project: None, + project_key: None, + usage: Usage { + input: 1000, + output: 1000, + reasoning: 0, + cache_read: 0, + cache_create_5m: 0, + cache_create_1h: 0, + }, + tool_calls: Vec::::new(), + files_touched: None, + subagent, + stop_reason: None, + activity: None, + retries: None, + has_edits: None, + fidelity: None, + } + } + + fn sub( + agent_id: Option<&str>, + parent_agent_id: Option<&str>, + subagent_type: Option<&str>, + description: Option<&str>, + ) -> Subagent { + Subagent { + is_sidechain: true, + parent_tool_use_id: None, + agent_id: agent_id.map(String::from), + parent_agent_id: parent_agent_id.map(String::from), + subagent_type: subagent_type.map(String::from), + description: description.map(String::from), + } + } + + fn rel( + session_id: &str, + rel_type: RelationshipType, + related: Option<&str>, + agent_id: Option<&str>, + subagent_type: Option<&str>, + description: Option<&str>, + source: RelationshipSourceKind, + ) -> SessionRelationshipRecord { + SessionRelationshipRecord { + v: 1, + source, + session_id: session_id.into(), + related_session_id: related.map(String::from), + relationship_type: rel_type, + ts: None, + source_session_id: None, + source_version: None, + parent_tool_use_id: None, + agent_id: agent_id.map(String::from), + subagent_type: subagent_type.map(String::from), + description: description.map(String::from), + } + } + + #[test] + fn folds_cumulative_cost_from_nested_subagents_up_to_the_main_root() { + let pricing = load_builtin_pricing(); + let session_id = "sess-1"; + let turns = vec![ + make_turn(session_id, "m1", "claude-sonnet-4-6", 0, SourceKind::ClaudeCode, None), + make_turn(session_id, "m2", "claude-sonnet-4-6", 1, SourceKind::ClaudeCode, None), + make_turn( + session_id, + "o1", + "claude-haiku-4-5", + 2, + SourceKind::ClaudeCode, + Some(sub(Some("u-outer"), Some(session_id), Some("Explore"), Some("Research"))), + ), + make_turn( + session_id, + "o2", + "claude-haiku-4-5", + 3, + SourceKind::ClaudeCode, + Some(sub(Some("u-outer"), Some(session_id), Some("Explore"), None)), + ), + make_turn( + session_id, + "i1", + "claude-haiku-4-5", + 4, + SourceKind::ClaudeCode, + Some(sub(Some("u-inner"), Some("u-outer"), Some("code-reviewer"), None)), + ), + ]; + + let opts = BuildSubagentTreeOptions::new(&pricing); + let trees = build_subagent_tree(&turns, &opts); + let root = trees.get(session_id).expect("root"); + assert_eq!(root.label, "main"); + assert_eq!(root.depth, 0); + assert_eq!(root.self_turns, 2); + assert_eq!(root.cumulative_turns, 5); + assert!(root.cumulative_cost > root.self_cost); + + assert_eq!(root.children.len(), 1); + let outer = &root.children[0]; + assert_eq!(outer.label, "Explore"); + assert_eq!(outer.depth, 1); + assert_eq!(outer.self_turns, 2); + assert_eq!(outer.cumulative_turns, 3); + assert_eq!(outer.children.len(), 1); + + let inner = &outer.children[0]; + assert_eq!(inner.label, "code-reviewer"); + assert_eq!(inner.depth, 2); + assert_eq!(inner.self_turns, 1); + assert_eq!(inner.cumulative_turns, 1); + assert!((inner.cumulative_cost - inner.self_cost).abs() < 1e-12); + + assert!( + (outer.cumulative_cost - (outer.self_cost + inner.cumulative_cost)).abs() < 1e-12, + "outer cumulative is selfCost + inner.cumulativeCost" + ); + } + + #[test] + fn buckets_sidechain_turns_without_agent_id_under_an_unresolved_node() { + let pricing = load_builtin_pricing(); + let session_id = "sess-2"; + let turns = vec![ + make_turn(session_id, "m1", "claude-sonnet-4-6", 0, SourceKind::ClaudeCode, None), + make_turn( + session_id, + "s1", + "claude-haiku-4-5", + 1, + SourceKind::ClaudeCode, + Some(Subagent { + is_sidechain: true, + parent_tool_use_id: None, + agent_id: None, + parent_agent_id: None, + subagent_type: None, + description: None, + }), + ), + ]; + let opts = BuildSubagentTreeOptions::new(&pricing); + let trees = build_subagent_tree(&turns, &opts); + let root = trees.get(session_id).unwrap(); + assert_eq!(root.children.len(), 1); + assert_eq!(root.children[0].label, "(unresolved)"); + assert_eq!(root.children[0].self_turns, 1); + } + + #[test] + fn builds_the_same_claude_tree_from_session_relationship_records() { + let pricing = load_builtin_pricing(); + let session_id = "sess-graph"; + let turns = vec![ + make_turn(session_id, "m1", "claude-sonnet-4-6", 0, SourceKind::ClaudeCode, None), + make_turn( + session_id, + "o1", + "claude-haiku-4-5", + 1, + SourceKind::ClaudeCode, + Some(sub(Some("u-outer"), Some(session_id), Some("Explore"), Some("Research"))), + ), + make_turn( + session_id, + "i1", + "claude-haiku-4-5", + 2, + SourceKind::ClaudeCode, + Some(sub(Some("u-inner"), Some("u-outer"), Some("code-reviewer"), None)), + ), + ]; + let relationships = vec![ + rel( + session_id, + RelationshipType::Root, + None, + None, + None, + None, + RelationshipSourceKind::ClaudeCode, + ), + rel( + session_id, + RelationshipType::Subagent, + Some(session_id), + Some("u-outer"), + Some("Explore"), + Some("Research"), + RelationshipSourceKind::NativeClaude, + ), + rel( + session_id, + RelationshipType::Subagent, + Some("u-outer"), + Some("u-inner"), + Some("code-reviewer"), + None, + RelationshipSourceKind::NativeClaude, + ), + ]; + + let legacy_opts = BuildSubagentTreeOptions::new(&pricing); + let legacy = build_subagent_tree(&turns, &legacy_opts).get(session_id).unwrap().clone(); + let graph_opts = BuildSubagentTreeOptions::new(&pricing).with_relationships(&relationships); + let graph = build_subagent_tree(&turns, &graph_opts).get(session_id).unwrap().clone(); + assert_eq!(graph, legacy); + assert_eq!(graph.relationship_type, RelationshipType::Root); + assert_eq!(graph.children[0].relationship_type, RelationshipType::Subagent); + } + + #[test] + fn joins_child_session_relationship_rows_to_turns_without_per_turn_subagent_metadata() { + let pricing = load_builtin_pricing(); + let turns = vec![ + make_turn("parent-session", "parent-1", "gpt-5.1-codex", 0, SourceKind::Codex, None), + make_turn("child-session", "child-1", "gpt-5.1-codex", 0, SourceKind::Codex, None), + ]; + let relationships = vec![ + rel( + "parent-session", + RelationshipType::Root, + None, + None, + None, + None, + RelationshipSourceKind::Codex, + ), + rel( + "child-session", + RelationshipType::Subagent, + Some("parent-session"), + Some("agent-child"), + Some("worker"), + None, + RelationshipSourceKind::Codex, + ), + ]; + + let opts = BuildSubagentTreeOptions::new(&pricing).with_relationships(&relationships); + let root = build_subagent_tree(&turns, &opts).get("parent-session").unwrap().clone(); + assert_eq!(root.self_turns, 1); + assert_eq!(root.cumulative_turns, 2); + assert_eq!(root.children.len(), 1); + assert_eq!(root.children[0].label, "worker"); + assert_eq!(root.children[0].node_id, "child-session"); + assert_eq!(root.children[0].relationship_type, RelationshipType::Subagent); + assert_eq!(root.children[0].self_turns, 1); + } + + #[test] + fn does_not_alias_native_sidechain_session_roots_onto_agent_ids_when_turns_lack_subagent_fields() { + let pricing = load_builtin_pricing(); + let session_id = "partial-claude"; + let turns = vec![make_turn( + session_id, + "main-1", + "claude-sonnet-4-6", + 0, + SourceKind::ClaudeCode, + None, + )]; + let relationships = vec![ + rel( + session_id, + RelationshipType::Root, + None, + None, + None, + None, + RelationshipSourceKind::ClaudeCode, + ), + rel( + session_id, + RelationshipType::Subagent, + Some(session_id), + Some("u-outer"), + Some("Explore"), + None, + RelationshipSourceKind::NativeClaude, + ), + ]; + let opts = BuildSubagentTreeOptions::new(&pricing).with_relationships(&relationships); + let root = build_subagent_tree(&turns, &opts).get(session_id).unwrap().clone(); + assert_eq!(root.node_id, session_id); + assert_eq!(root.label, "main"); + assert_eq!(root.self_turns, 1); + assert_eq!(root.children.len(), 1); + assert_eq!(root.children[0].node_id, "u-outer"); + assert_eq!(root.children[0].self_turns, 0); + } + + #[test] + fn reports_median_p95_mean_total_per_subagent_type_across_invocations() { + let pricing = load_builtin_pricing(); + let mut turns: Vec = Vec::new(); + for i in 0..3 { + let agent_id = format!("u-exp-{i}"); + for j in 0..=i { + turns.push(make_turn( + &format!("sess-{i}"), + &format!("m-{i}-{j}"), + "claude-haiku-4-5", + j as u64, + SourceKind::ClaudeCode, + Some(sub(Some(&agent_id), None, Some("Explore"), None)), + )); + } + } + turns.push(make_turn( + "sess-rev", + "mr", + "claude-haiku-4-5", + 0, + SourceKind::ClaudeCode, + Some(sub(Some("u-rev"), None, Some("code-reviewer"), None)), + )); + + let opts = BuildSubagentTreeOptions::new(&pricing); + let stats = aggregate_subagent_type_stats(&turns, &opts); + let explore = stats.iter().find(|s| s.subagent_type == "Explore").unwrap(); + assert_eq!(explore.invocations, 3); + assert_eq!(explore.turns, 6); + assert!(explore.median_cost > 0.0); + assert!(explore.p95_cost >= explore.median_cost); + assert!((explore.mean_cost - explore.total_cost / 3.0).abs() < 1e-12); + + let rev = stats.iter().find(|s| s.subagent_type == "code-reviewer").unwrap(); + assert_eq!(rev.invocations, 1); + assert_eq!(rev.turns, 1); + assert!((rev.median_cost - rev.total_cost).abs() < 1e-12); + assert!((rev.p95_cost - rev.total_cost).abs() < 1e-12); + } + +}