diff --git a/CHANGELOG.md b/CHANGELOG.md index f2c9a379..21df2bea 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,9 +4,10 @@ Cross-package release notes for relayburn. Package changelogs contain package-le ## [Unreleased] - ### Added +- `burn summary`: one-line `Turn outcomes: …` breakdown of assistant + `stop_reason` counts, plus a `stopReasons` block in `--json`. (#437) - Ledger fingerprint primitive (`{count}:{maxMtimeUnix}:{totalBytes}`) for cheap "did anything change" polling. Exposed as `LedgerHandle::fingerprint` on the Rust SDK, `sdk.fingerprint()` on `@relayburn/sdk`, @@ -23,7 +24,11 @@ Cross-package release notes for relayburn. Package changelogs contain package-le `queued_command` attachment with `commandMode`), so a real prompt that literally types `` is not filtered. Drops user-turn inflation from background Bash completions. - +- **BREAKING** `relayburn-sdk`: `TurnRecord.stop_reason` is now an + `Option` enum (kebab-case wire form); deserialization is + lenient so pre-3.0 ledgers replay cleanly. (#437) +- `relayburn-sdk` ledger schema bumps to v2: `turns` gains a + `stop_reason TEXT` column, migrated in place on `Ledger::open`. (#437) ## [2.10.0] - 2026-05-24 diff --git a/crates/relayburn-cli/src/commands/summary.rs b/crates/relayburn-cli/src/commands/summary.rs index b5e1830d..118a4436 100644 --- a/crates/relayburn-cli/src/commands/summary.rs +++ b/crates/relayburn-cli/src/commands/summary.rs @@ -23,10 +23,10 @@ use clap::Args; use relayburn_sdk::{ ingest_all, summary_fidelity_summary_to_value, summary_replacement_savings_to_value, CostBreakdown, CoverageField, Enrichment, FidelityClass, FidelitySummary, Ledger, LedgerHandle, - LedgerOpenOptions, OutcomeLabel, QualityResult, RelationshipType, SubagentTreeNode, - SubagentTypeStats, SummaryByToolReport, SummaryGroupBy, SummaryGroupedReport, - SummaryRelationshipReport, SummaryReport, SummaryReportMode, SummaryReportOptions, - SummarySubagentTreeReport, UsageCostAggregateRow, + LedgerOpenOptions, OutcomeLabel, QualityResult, RelationshipType, StopReasonCounts, + SubagentTreeNode, SubagentTypeStats, SummaryByToolReport, SummaryGroupBy, + SummaryGroupedReport, SummaryRelationshipReport, SummaryReport, SummaryReportMode, + SummaryReportOptions, SummarySubagentTreeReport, UsageCostAggregateRow, }; use serde_json::{json, Map, Value}; @@ -490,6 +490,7 @@ fn grouped_json_value( summary_replacement_savings_to_value(&report.replacement_savings), ); } + payload.insert("stopReasons".into(), stop_reasons_to_json(&report.stop_reasons)); if let Some(quality) = report.quality.as_ref() { payload.insert("quality".into(), json!(quality)); } @@ -968,6 +969,11 @@ fn emit_human(report: &SummaryGroupedReport, ingest_report: &relayburn_sdk::Inge lines.push(String::new()); } + if !report.stop_reasons.is_empty() { + lines.push(format_stop_reasons_line(&report.stop_reasons)); + lines.push(String::new()); + } + if any_partial { lines.push(format_partial_footer(&report.rows)); lines.push(String::new()); @@ -1036,6 +1042,52 @@ fn render_quality(q: &QualityResult) -> String { .join("\n") } +/// Human-readable outcome line for `burn summary`, e.g. +/// `Turn outcomes: 142 end_turn, 3 max_tokens, 1 refusal, 0 pause`. +/// +/// Always renders `end_turn` / `max_tokens` / `refusal` / `pause` because +/// users want to see the zero — "no refusals" is a meaningful signal. +/// Other buckets (`tool_use`, `stop_sequence`, `silent`, `none`) appear +/// only when non-zero so the line stays scannable. Labels stay snake_case +/// to match the historical Anthropic spelling the issue specified. +fn format_stop_reasons_line(s: &StopReasonCounts) -> String { + let mut parts: Vec = vec![ + format!("{} end_turn", format_uint(s.end_turn)), + format!("{} max_tokens", format_uint(s.max_tokens)), + format!("{} refusal", format_uint(s.refusal)), + format!("{} pause", format_uint(s.pause_turn)), + ]; + if s.tool_use > 0 { + parts.push(format!("{} tool_use", format_uint(s.tool_use))); + } + if s.stop_sequence > 0 { + parts.push(format!("{} stop_sequence", format_uint(s.stop_sequence))); + } + if s.silent > 0 { + parts.push(format!("{} silent", format_uint(s.silent))); + } + if s.none > 0 { + parts.push(format!("{} none", format_uint(s.none))); + } + format!("Turn outcomes: {}", parts.join(", ")) +} + +/// JSON shape for the outcome breakdown. Keys are camelCase to match the +/// rest of the summary surface; every bucket is emitted unconditionally so +/// downstream consumers can index keys without `?` plumbing. +fn stop_reasons_to_json(s: &StopReasonCounts) -> Value { + json!({ + "endTurn": s.end_turn, + "maxTokens": s.max_tokens, + "pauseTurn": s.pause_turn, + "stopSequence": s.stop_sequence, + "toolUse": s.tool_use, + "refusal": s.refusal, + "silent": s.silent, + "none": s.none, + }) +} + fn format_replacement_savings_line(s: &relayburn_sdk::ReplacementSavingsSummary) -> String { let call_word = if s.calls == 1 { "call" } else { "calls" }; format!( @@ -1181,6 +1233,7 @@ mod tests { fidelity: relayburn_sdk::summarize_fidelity(&[]), per_cell_fidelity: json!({"groupBy": "model"}), replacement_savings: relayburn_sdk::ReplacementSavingsSummary::default(), + stop_reasons: relayburn_sdk::StopReasonCounts::default(), quality: Some(QualityResult::default()), }; diff --git a/crates/relayburn-sdk/src/analyze/quality.rs b/crates/relayburn-sdk/src/analyze/quality.rs index 80e5db01..e21f82ff 100644 --- a/crates/relayburn-sdk/src/analyze/quality.rs +++ b/crates/relayburn-sdk/src/analyze/quality.rs @@ -15,7 +15,7 @@ use std::collections::HashMap; -use crate::reader::{ContentKind, ContentRecord, ContentRole, TurnRecord}; +use crate::reader::{ContentKind, ContentRecord, ContentRole, StopReason, TurnRecord}; use serde::{Deserialize, Serialize}; #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)] @@ -324,9 +324,9 @@ fn ending_role(turns: &[&TurnRecord]) -> EndingRole { // reason means user-ended (session died after a tool_use). When the // source doesn't record stopReason at all (e.g. Codex), return Unknown. let last = turns.last().expect("turns non-empty"); - match &last.stop_reason { + match last.stop_reason { None => EndingRole::Unknown, - Some(s) if s == "end_turn" => EndingRole::Assistant, + Some(StopReason::EndTurn) => EndingRole::Assistant, Some(_) => EndingRole::User, } } @@ -525,7 +525,7 @@ mod tests { ts: Option, session_id: Option, source: Option, - stop_reason: Option>, + stop_reason: Option>, tool_calls: Option>, retries: Option>, has_edits: Option, @@ -582,13 +582,13 @@ mod tests { turn(TurnOverrides { message_id: "m1".into(), turn_index: 0, - stop_reason: Some(Some("tool_use".into())), + stop_reason: Some(Some(StopReason::ToolUse)), ..Default::default() }), turn(TurnOverrides { message_id: "m2".into(), turn_index: 1, - stop_reason: Some(Some("end_turn".into())), + stop_reason: Some(Some(StopReason::EndTurn)), ..Default::default() }), ]; @@ -603,7 +603,7 @@ mod tests { let turns = vec![turn(TurnOverrides { message_id: "m1".into(), turn_index: 0, - stop_reason: Some(Some("end_turn".into())), + stop_reason: Some(Some(StopReason::EndTurn)), ..Default::default() })]; let o = infer_outcome("s", &turns, None, fixed_now()); @@ -617,7 +617,7 @@ mod tests { let turns = vec![turn(TurnOverrides { message_id: "m1".into(), turn_index: 0, - stop_reason: Some(Some("tool_use".into())), + stop_reason: Some(Some(StopReason::ToolUse)), ..Default::default() })]; let o = infer_outcome("s", &turns, None, fixed_now()); @@ -633,21 +633,21 @@ mod tests { message_id: "m1".into(), turn_index: 0, ts: Some("2026-04-20T00:00:00.000Z".into()), - stop_reason: Some(Some("end_turn".into())), + stop_reason: Some(Some(StopReason::EndTurn)), ..Default::default() }), turn(TurnOverrides { message_id: "m2".into(), turn_index: 1, ts: Some("2026-04-20T00:01:00.000Z".into()), - stop_reason: Some(Some("end_turn".into())), + stop_reason: Some(Some(StopReason::EndTurn)), ..Default::default() }), turn(TurnOverrides { message_id: "m3".into(), turn_index: 2, ts: Some("2026-04-20T00:02:00.000Z".into()), - stop_reason: Some(Some("end_turn".into())), + stop_reason: Some(Some(StopReason::EndTurn)), ..Default::default() }), ]; @@ -664,9 +664,11 @@ mod tests { turn(TurnOverrides { message_id: format!("m{i}"), turn_index: i as u64, - stop_reason: Some(Some( - if i == 9 { "tool_use" } else { "end_turn" }.to_string(), - )), + stop_reason: Some(Some(if i == 9 { + StopReason::ToolUse + } else { + StopReason::EndTurn + })), ..Default::default() }) }) @@ -683,19 +685,19 @@ mod tests { turn(TurnOverrides { message_id: "m1".into(), turn_index: 0, - stop_reason: Some(Some("end_turn".into())), + stop_reason: Some(Some(StopReason::EndTurn)), ..Default::default() }), turn(TurnOverrides { message_id: "m2".into(), turn_index: 1, - stop_reason: Some(Some("end_turn".into())), + stop_reason: Some(Some(StopReason::EndTurn)), ..Default::default() }), turn(TurnOverrides { message_id: "m3".into(), turn_index: 2, - stop_reason: Some(Some("tool_use".into())), + stop_reason: Some(Some(StopReason::ToolUse)), ..Default::default() }), ]; @@ -711,27 +713,27 @@ mod tests { turn(TurnOverrides { message_id: "m1".into(), turn_index: 0, - stop_reason: Some(Some("end_turn".into())), + stop_reason: Some(Some(StopReason::EndTurn)), ..Default::default() }), turn(TurnOverrides { message_id: "m2".into(), turn_index: 1, - stop_reason: Some(Some("end_turn".into())), + stop_reason: Some(Some(StopReason::EndTurn)), tool_calls: Some(vec![tc("u1", "Bash", Some(true))]), ..Default::default() }), turn(TurnOverrides { message_id: "m3".into(), turn_index: 2, - stop_reason: Some(Some("end_turn".into())), + stop_reason: Some(Some(StopReason::EndTurn)), tool_calls: Some(vec![tc("u2", "Bash", Some(true))]), ..Default::default() }), turn(TurnOverrides { message_id: "m4".into(), turn_index: 3, - stop_reason: Some(Some("end_turn".into())), + stop_reason: Some(Some(StopReason::EndTurn)), tool_calls: Some(vec![tc("u3", "Bash", Some(true))]), ..Default::default() }), @@ -748,7 +750,7 @@ mod tests { turn(TurnOverrides { message_id: format!("m{}", i + 1), turn_index: i, - stop_reason: Some(Some("end_turn".into())), + stop_reason: Some(Some(StopReason::EndTurn)), ..Default::default() }) }) @@ -825,7 +827,7 @@ mod tests { turn(TurnOverrides { message_id: format!("m{}", i + 1), turn_index: i, - stop_reason: Some(Some("end_turn".into())), + stop_reason: Some(Some(StopReason::EndTurn)), ..Default::default() }) }) @@ -942,7 +944,7 @@ mod tests { turn_index: 0, session_id: Some("A".into()), has_edits: Some(true), - stop_reason: Some(Some("end_turn".into())), + stop_reason: Some(Some(StopReason::EndTurn)), ..Default::default() }), turn(TurnOverrides { @@ -950,21 +952,21 @@ mod tests { turn_index: 1, session_id: Some("A".into()), has_edits: Some(true), - stop_reason: Some(Some("end_turn".into())), + stop_reason: Some(Some(StopReason::EndTurn)), ..Default::default() }), turn(TurnOverrides { message_id: "a3".into(), turn_index: 2, session_id: Some("A".into()), - stop_reason: Some(Some("end_turn".into())), + stop_reason: Some(Some(StopReason::EndTurn)), ..Default::default() }), turn(TurnOverrides { message_id: "b1".into(), turn_index: 0, session_id: Some("B".into()), - stop_reason: Some(Some("tool_use".into())), + stop_reason: Some(Some(StopReason::ToolUse)), ..Default::default() }), ]; diff --git a/crates/relayburn-sdk/src/ledger/db.rs b/crates/relayburn-sdk/src/ledger/db.rs index 26af6bb1..34d64a43 100644 --- a/crates/relayburn-sdk/src/ledger/db.rs +++ b/crates/relayburn-sdk/src/ledger/db.rs @@ -57,6 +57,7 @@ impl Connections { let mut burn = Connection::open(burn_path)?; configure_pragmas(&burn)?; burn.execute_batch(BURN_DDL)?; + migrate_burn_schema(&burn)?; verify_schema_version(&burn)?; // Bootstrap from `ledger.jsonl` sibling if the sqlite mirror is @@ -92,6 +93,66 @@ fn configure_pragmas(conn: &Connection) -> Result<()> { Ok(()) } +/// In-place forward migrations for `burn.sqlite`. Re-applying is a no-op so +/// open is idempotent; called BEFORE [`verify_schema_version`] so the +/// version we read reflects the post-migration state. +/// +/// Migrations are tagged by destination schema version; each step is +/// guarded so re-running `Ledger::open` after a crash mid-migration picks +/// up where it left off without surfacing `duplicate column name` errors. +fn migrate_burn_schema(conn: &Connection) -> Result<()> { + let current_version: u32 = conn + .query_row( + "SELECT schema_version FROM archive_state WHERE id = 1", + [], + |r| r.get::<_, i64>(0), + ) + .map(|v| v as u32) + .unwrap_or(SCHEMA_VERSION); + + if current_version < 2 { + // v1 → v2: add the denormalized `turns.stop_reason` column for + // outcome aggregation. `CREATE TABLE IF NOT EXISTS` in the DDL + // already covers fresh DBs (the column lives in the DDL); this + // branch handles existing v1 ledgers whose `turns` table + // pre-existed the bump. + // + // We try the `ALTER TABLE` unconditionally and swallow the + // `duplicate column name` failure rather than pre-checking with + // `PRAGMA table_info`. The check-then-act sequence is racy under + // concurrent ledger opens: two processes can both observe the + // column missing, both issue the `ALTER`, and the second loses. + // Letting SQLite arbitrate via the duplicate-column error keeps + // the migration genuinely idempotent. We deliberately don't + // catch the `SqliteFailure(_, None)` shape — that's too broad + // and would mask real schema breakage. + match conn.execute("ALTER TABLE turns ADD COLUMN stop_reason TEXT", []) { + Ok(_) => {} + Err(rusqlite::Error::SqliteFailure(_, Some(msg))) + if msg.contains("duplicate column name") => {} + Err(e) => return Err(e.into()), + } + conn.execute( + "UPDATE archive_state SET schema_version = 2 WHERE id = 1", + [], + )?; + } + + // The `idx_turns_stop_reason` index is created here rather than in + // the static DDL so a legacy v1 table (no `stop_reason` column yet) + // doesn't fail the DDL pre-pass. By this point the column either + // existed all along (fresh v2 DDL) or was just added by the v1 → v2 + // step above, so the index is safe to create idempotently every + // open. + conn.execute( + "CREATE INDEX IF NOT EXISTS idx_turns_stop_reason \ + ON turns(stop_reason) WHERE stop_reason IS NOT NULL", + [], + )?; + + Ok(()) +} + fn verify_schema_version(conn: &Connection) -> Result<()> { let version: u32 = conn .query_row( diff --git a/crates/relayburn-sdk/src/ledger/schema.rs b/crates/relayburn-sdk/src/ledger/schema.rs index 3def6db7..4a0d9e5e 100644 --- a/crates/relayburn-sdk/src/ledger/schema.rs +++ b/crates/relayburn-sdk/src/ledger/schema.rs @@ -26,7 +26,16 @@ pub const DERIVABLE_TABLES: &[&str] = &[ /// Bumped when on-disk shape changes incompatibly. Stored in /// `archive_state.schema_version` after migration. -pub const SCHEMA_VERSION: u32 = 1; +/// +/// Version history: +/// - `1`: initial release; `turns` is `(source, session_id, message_id, ts, +/// project, project_key, record_json, content_fingerprint)`. +/// - `2`: 3.0 release — adds the denormalized `turns.stop_reason TEXT` column +/// so `burn summary` can roll outcome counts without re-deserializing every +/// `record_json` row. Existing rows are migrated by `ALTER TABLE … ADD +/// COLUMN` and left `NULL`; new inserts populate the column from +/// [`crate::reader::StopReason::wire_str`]. +pub const SCHEMA_VERSION: u32 = 2; /// DDL for `burn.sqlite`. Idempotent (`IF NOT EXISTS`) so re-applying on /// startup is a no-op once the tables exist. @@ -46,6 +55,12 @@ CREATE TABLE IF NOT EXISTS turns ( project_key TEXT, record_json TEXT NOT NULL, content_fingerprint TEXT NOT NULL, + -- Denormalized stop reason for the trailing assistant row (kebab-case + -- wire spelling of `crate::reader::StopReason`). Nullable so pre-3.0 + -- rows and harnesses that don't report one (Codex) survive the schema + -- bump without a backfill. New inserts populate this directly from the + -- parsed `TurnRecord.stop_reason`. See issue #437. + stop_reason TEXT, PRIMARY KEY (source, session_id, message_id) ) STRICT; @@ -55,6 +70,10 @@ CREATE INDEX IF NOT EXISTS idx_turns_session ON turns(session_id); CREATE INDEX IF NOT EXISTS idx_turns_ts ON turns(ts); +-- `idx_turns_stop_reason` is created from the in-place migration in +-- `db::migrate_burn_schema` so a legacy v1 table (no `stop_reason` column +-- yet) doesn't blow up on the DDL pre-pass. Once `ALTER TABLE` adds the +-- column, the migration runs `CREATE INDEX IF NOT EXISTS` itself. CREATE TABLE IF NOT EXISTS compactions ( id_fingerprint TEXT PRIMARY KEY, @@ -143,7 +162,7 @@ CREATE TABLE IF NOT EXISTS archive_state ( ); INSERT INTO archive_state (id, schema_version) - VALUES (1, 1) + VALUES (1, 2) ON CONFLICT(id) DO NOTHING; "#; diff --git a/crates/relayburn-sdk/src/ledger/tests.rs b/crates/relayburn-sdk/src/ledger/tests.rs index 8e089c13..065eaaf0 100644 --- a/crates/relayburn-sdk/src/ledger/tests.rs +++ b/crates/relayburn-sdk/src/ledger/tests.rs @@ -719,6 +719,100 @@ fn invalid_session_id_in_content_rejected() { assert!(matches!(err, LedgerError::InvalidSessionId(_))); } +/// Acceptance for issue #437: a v1 `burn.sqlite` (no `stop_reason` +/// column on `turns`, `archive_state.schema_version = 1`) opens cleanly +/// against the 3.0 SDK, the column is back-added by the in-place +/// migration, and the stored version bumps to 2. Existing rows stay +/// `NULL` until rewritten. +#[test] +fn legacy_v1_ledger_migrates_to_v2_on_open_and_adds_stop_reason_column() { + let tmp = TempDir::new().unwrap(); + let layout = LedgerLayout::under(tmp.path()); + // Step 1: write a synthetic v1 schema by hand. We bypass + // `Ledger::open` so the migration doesn't pre-emptively bump us + // past v1. + { + let conn = rusqlite::Connection::open(&layout.burn).unwrap(); + conn.execute_batch( + "CREATE TABLE turns ( + source TEXT NOT NULL, + session_id TEXT NOT NULL, + message_id TEXT NOT NULL, + ts TEXT NOT NULL, + project TEXT, + project_key TEXT, + record_json TEXT NOT NULL, + content_fingerprint TEXT NOT NULL, + PRIMARY KEY (source, session_id, message_id) + ) STRICT; + CREATE TABLE archive_state ( + id INTEGER PRIMARY KEY CHECK (id = 1), + schema_version INTEGER NOT NULL, + upstream_cursors_json TEXT NOT NULL DEFAULT '{}', + last_built_at TEXT, + last_rebuild_at TEXT + ); + INSERT INTO archive_state (id, schema_version) VALUES (1, 1); + -- A single legacy row. The record_json carries no stopReason + -- key so the reader's lenient deserializer reproduces None. + INSERT INTO turns (source, session_id, message_id, ts, + project, project_key, record_json, content_fingerprint) + VALUES ('claude-code', 'legacy-sess', 'legacy-msg', + '2025-01-01T00:00:00Z', NULL, NULL, + '{\"v\":1,\"source\":\"claude-code\",\"sessionId\":\"legacy-sess\",\"messageId\":\"legacy-msg\",\"turnIndex\":0,\"ts\":\"2025-01-01T00:00:00Z\",\"model\":\"claude-sonnet-4-6\",\"usage\":{\"input\":0,\"output\":0,\"reasoning\":0,\"cacheRead\":0,\"cacheCreate5m\":0,\"cacheCreate1h\":0},\"toolCalls\":[]}', + 'legacy-fp'); + ", + ) + .unwrap(); + } + + // Step 2: open through the SDK. The migration must: + // a) add `turns.stop_reason TEXT`, + // b) bump archive_state.schema_version to 2, + // c) leave the legacy row's stop_reason as NULL. + let l = Ledger::open(&layout.burn, &layout.content).unwrap(); + let version: i64 = l + .conns + .burn + .query_row( + "SELECT schema_version FROM archive_state WHERE id = 1", + [], + |r| r.get(0), + ) + .unwrap(); + assert_eq!(version, 2, "open must bump v1 → v2"); + + let column_names: Vec = l + .conns + .burn + .prepare("PRAGMA table_info(turns)") + .unwrap() + .query_map([], |row| row.get::<_, String>(1)) + .unwrap() + .map(|r| r.unwrap()) + .collect(); + assert!( + column_names.iter().any(|c| c == "stop_reason"), + "post-migration table must carry stop_reason; got: {column_names:?}" + ); + + let legacy_stop_reason: Option = l + .conns + .burn + .query_row( + "SELECT stop_reason FROM turns WHERE message_id = 'legacy-msg'", + [], + |r| r.get(0), + ) + .unwrap(); + assert!(legacy_stop_reason.is_none(), "legacy row stays NULL"); + + // Re-opening is idempotent: the migration probe sees the column and + // skips the ALTER, version stays at 2. + drop(l); + let _ = Ledger::open(&layout.burn, &layout.content).unwrap(); +} + #[test] fn schema_too_new_is_rejected() { // Defensive: if a future build wrote a higher schema_version, this diff --git a/crates/relayburn-sdk/src/ledger/writer.rs b/crates/relayburn-sdk/src/ledger/writer.rs index f602e0b9..9c9c48ae 100644 --- a/crates/relayburn-sdk/src/ledger/writer.rs +++ b/crates/relayburn-sdk/src/ledger/writer.rs @@ -52,8 +52,9 @@ pub(crate) fn append_turns(conn: &mut Connection, turns: &[TurnRecord]) -> Resul tx.prepare("SELECT 1 FROM turns WHERE content_fingerprint = ? LIMIT 1")?; let mut insert = tx.prepare( "INSERT OR IGNORE INTO turns - (source, session_id, message_id, ts, project, project_key, record_json, content_fingerprint) - VALUES (?, ?, ?, ?, ?, ?, ?, ?)", + (source, session_id, message_id, ts, project, project_key, + record_json, content_fingerprint, stop_reason) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)", )?; for t in turns { let fingerprint = turn_content_fingerprint(t); @@ -64,6 +65,10 @@ pub(crate) fn append_turns(conn: &mut Connection, turns: &[TurnRecord]) -> Resul continue; } let json = serde_json::to_string(t)?; + // Denormalize `stop_reason` so summary aggregations don't have to + // re-deserialize `record_json`. NULL for Codex (no field) and + // pre-3.0 imports. + let stop_reason_str = t.stop_reason.as_ref().map(|s| s.wire_str()); let changed = insert.execute(params![ t.source.wire_str(), t.session_id, @@ -73,6 +78,7 @@ pub(crate) fn append_turns(conn: &mut Connection, turns: &[TurnRecord]) -> Resul t.project_key, json, fingerprint, + stop_reason_str, ])?; if changed > 0 { appended += 1; diff --git a/crates/relayburn-sdk/src/lib.rs b/crates/relayburn-sdk/src/lib.rs index 539aec1c..c7ae0dae 100644 --- a/crates/relayburn-sdk/src/lib.rs +++ b/crates/relayburn-sdk/src/lib.rs @@ -69,7 +69,7 @@ pub use crate::reader::{ ClassificationResult, CompactionEvent, ContentKind, ContentRecord, ContentRole, ContentStoreMode, ContentToolResult, ContentToolUse, Coverage, Fidelity, FidelityClass, Harness, ProjectResolver, RelationshipSourceKind, RelationshipType, ResolvedProject, - SessionRelationshipRecord, SourceKind, Subagent, ToolCall, ToolResultEventRecord, + SessionRelationshipRecord, SourceKind, StopReason, Subagent, ToolCall, ToolResultEventRecord, ToolResultEventSource, ToolResultStatus, TurnRecord, Usage, UsageAttribution, UsageGranularity, UserTurnBlock, UserTurnBlockKind, UserTurnRecord, }; diff --git a/crates/relayburn-sdk/src/query_verbs.rs b/crates/relayburn-sdk/src/query_verbs.rs index 482bba01..9d819b4c 100644 --- a/crates/relayburn-sdk/src/query_verbs.rs +++ b/crates/relayburn-sdk/src/query_verbs.rs @@ -41,8 +41,8 @@ use crate::analyze::{ use crate::ledger::{EnrichedTurn, Enrichment, Query}; use crate::reader::{ parse_bash_command, resolve_project, BashParse, ContentRecord, Coverage, FidelityClass, - RelationshipType, SessionRelationshipRecord, SourceKind, TurnRecord, Usage, UsageGranularity, - UserTurnBlockKind, UserTurnRecord, + RelationshipType, SessionRelationshipRecord, SourceKind, StopReason, TurnRecord, Usage, + UsageGranularity, UserTurnBlockKind, UserTurnRecord, }; use crate::{Ledger, LedgerHandle, LedgerOpenOptions}; @@ -390,6 +390,74 @@ pub struct SummaryTagRow { pub turn_count: u64, } +/// Per-outcome turn counts, surfaced by `burn summary` for the one-line +/// outcome breakdown (`142 end_turn, 3 max_tokens, 1 refusal, 0 pause`). +/// +/// Counts mirror the [`StopReason`] enum variants plus a `none` slot for +/// turns whose row carried no `stop_reason` field at all — that's Codex +/// today (no field in the rollout schema) and any pre-3.0 ledger row that +/// was ingested before the reader started populating the enum. +/// +/// `Silent` is reserved for "row exists, carries a stop_reason that we +/// don't recognize" — distinct from `none` so we can spot a future harness +/// regression rather than silently lumping it with Codex. +#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct StopReasonCounts { + pub end_turn: u64, + pub max_tokens: u64, + pub pause_turn: u64, + pub stop_sequence: u64, + pub tool_use: u64, + pub refusal: u64, + pub silent: u64, + /// Turns whose record carried no `stop_reason` field — e.g. Codex + /// rollouts (the harness doesn't report one) or pre-3.0 ledger rows + /// from before the reader started parsing the field. + pub none: u64, +} + +impl StopReasonCounts { + /// Accumulate one turn's outcome into the bucket counts. `None` lands + /// in [`Self::none`]; unrecognized variants would already be normalized + /// to [`StopReason::Silent`] upstream by the lenient deserializer. + pub fn bump(&mut self, reason: Option) { + match reason { + None => self.none += 1, + Some(StopReason::EndTurn) => self.end_turn += 1, + Some(StopReason::MaxTokens) => self.max_tokens += 1, + Some(StopReason::PauseTurn) => self.pause_turn += 1, + Some(StopReason::StopSequence) => self.stop_sequence += 1, + Some(StopReason::ToolUse) => self.tool_use += 1, + Some(StopReason::Refusal) => self.refusal += 1, + Some(StopReason::Silent) => self.silent += 1, + } + } + + /// Fold every turn's `stop_reason` into a fresh counts struct. + pub fn from_turns(turns: &[TurnRecord]) -> Self { + let mut out = Self::default(); + for t in turns { + out.bump(t.stop_reason); + } + out + } + + /// True iff every counter is zero — useful for "skip the outcome line + /// entirely" presentation logic in summary. + pub fn is_empty(&self) -> bool { + self.end_turn + | self.max_tokens + | self.pause_turn + | self.stop_sequence + | self.tool_use + | self.refusal + | self.silent + | self.none + == 0 + } +} + #[derive(Debug, Clone, Serialize, Deserialize)] #[serde(rename_all = "camelCase")] pub struct Summary { @@ -402,6 +470,10 @@ pub struct Summary { pub by_tag: Option>, #[serde(default, skip_serializing_if = "Option::is_none")] pub replacement_savings: Option, + /// Per-outcome breakdown — `end_turn` / `max_tokens` / `refusal` / etc. + /// Counts roll up the trailing `stop_reason` of every assistant turn + /// in the filtered slice. See #437. + pub stop_reasons: StopReasonCounts, } impl LedgerHandle { @@ -522,6 +594,7 @@ fn compute_summary(turns: &[TurnRecord], pricing: &PricingTable) -> Summary { .collect(), by_tag: None, replacement_savings, + stop_reasons: StopReasonCounts::from_turns(turns), } } @@ -697,6 +770,9 @@ pub struct SummaryGroupedReport { /// so presenters don't rebuild order-sensitive HashMap projections. pub per_cell_fidelity: serde_json::Value, pub replacement_savings: ReplacementSavingsSummary, + /// Per-outcome turn counts (issue #437). Always populated; presenters + /// decide whether to render the line based on `is_empty()`. + pub stop_reasons: StopReasonCounts, #[serde(default, skip_serializing_if = "Option::is_none")] pub quality: Option, } @@ -870,6 +946,7 @@ impl LedgerHandle { } else { None }; + let stop_reasons = StopReasonCounts::from_turns(&turns); Ok(SummaryReport::Grouped(SummaryGroupedReport { group_by, tag_key, @@ -880,6 +957,7 @@ impl LedgerHandle { fidelity, per_cell_fidelity, replacement_savings, + stop_reasons, quality, })) } @@ -4157,6 +4235,130 @@ mod tests { assert!(summary_fidelity_summary_to_value(&grouped.fidelity)["byClass"].is_object()); } + /// Acceptance test for issue #437: a turn carrying `stop_reason: + /// "max_tokens"` surfaces in the summary outcome counts. Mixes a + /// `max_tokens` turn with a `none` turn (no field on the row) to + /// confirm both buckets land in the right slot. + #[test] + fn summary_report_aggregates_stop_reasons_per_outcome() { + let dir = tempfile::tempdir().unwrap(); + let opts = LedgerOpenOptions::with_home(dir.path()); + let mut handle = Ledger::open(opts).expect("open ledger"); + + let make_turn = |idx: u64, + msg: &str, + stop_reason: Option| + -> TurnRecord { + TurnRecord { + v: 1, + source: SourceKind::ClaudeCode, + session_id: "sess-stop".into(), + session_path: None, + message_id: msg.into(), + turn_index: idx, + ts: format!("2026-05-25T00:0{idx}:00.000Z"), + model: "claude-sonnet-4-6".into(), + project: Some("/tmp/proj".into()), + project_key: None, + usage: Usage { + input: 100 + idx, + output: 50, + reasoning: 0, + cache_read: 0, + cache_create_5m: 0, + cache_create_1h: 0, + }, + tool_calls: vec![], + files_touched: None, + subagent: None, + stop_reason, + activity: None, + retries: None, + has_edits: None, + fidelity: None, + } + }; + + handle + .raw_mut() + .append_turns(&[ + make_turn(0, "m-max", Some(StopReason::MaxTokens)), + make_turn(1, "m-end", Some(StopReason::EndTurn)), + make_turn(2, "m-refusal", Some(StopReason::Refusal)), + // Codex-style: no field on the row. + make_turn(3, "m-none", None), + ]) + .expect("append turns"); + + let report = handle + .summary_report(SummaryReportOptions::default()) + .expect("summary report"); + let SummaryReport::Grouped(grouped) = report else { + panic!("expected grouped report"); + }; + assert_eq!(grouped.turn_count, 4); + assert_eq!(grouped.stop_reasons.max_tokens, 1); + assert_eq!(grouped.stop_reasons.end_turn, 1); + assert_eq!(grouped.stop_reasons.refusal, 1); + assert_eq!(grouped.stop_reasons.none, 1); + // Untouched buckets stay at zero. + assert_eq!(grouped.stop_reasons.tool_use, 0); + assert_eq!(grouped.stop_reasons.pause_turn, 0); + assert_eq!(grouped.stop_reasons.silent, 0); + assert!(!grouped.stop_reasons.is_empty()); + } + + /// Acceptance test for issue #437: the legacy `LedgerHandle::summary` + /// surface (the slim one) also exposes the new counts. Verifies a turn + /// without a stop_reason field round-trips to `None`/`none` rather + /// than silently leaking into a real bucket. + #[test] + fn summary_legacy_surface_includes_stop_reason_counts_with_none_for_missing_field() { + let dir = tempfile::tempdir().unwrap(); + let opts = LedgerOpenOptions::with_home(dir.path()); + let mut handle = Ledger::open(opts).expect("open ledger"); + + let mut turn = TurnRecord { + v: 1, + source: SourceKind::ClaudeCode, + session_id: "sess-legacy".into(), + session_path: None, + message_id: "m-legacy".into(), + turn_index: 0, + ts: "2026-05-25T00:00:00.000Z".into(), + model: "claude-sonnet-4-6".into(), + project: None, + project_key: None, + usage: Usage::default(), + tool_calls: vec![], + files_touched: None, + subagent: None, + stop_reason: None, + activity: None, + retries: None, + has_edits: None, + fidelity: None, + }; + handle + .raw_mut() + .append_turns(&[turn.clone()]) + .expect("append none turn"); + turn.message_id = "m-pause".into(); + turn.turn_index = 1; + turn.ts = "2026-05-25T00:01:00.000Z".into(); + turn.stop_reason = Some(StopReason::PauseTurn); + handle + .raw_mut() + .append_turns(&[turn]) + .expect("append pause turn"); + + let s = handle.summary(SummaryOptions::default()).expect("summary"); + assert_eq!(s.turn_count, 2); + assert_eq!(s.stop_reasons.none, 1); + assert_eq!(s.stop_reasons.pause_turn, 1); + assert_eq!(s.stop_reasons.end_turn, 0); + } + #[test] fn summary_report_by_tool_uses_predecessor_before_since_boundary() { let (_dir, handle) = fixture_handle(); @@ -4997,7 +5199,7 @@ mod tests { assert_eq!(s.burn.rows.stamps, 0); assert_eq!(s.burn.tracked_rows, 0); assert_eq!(s.content.rows, 0); - assert_eq!(s.archive.schema_version, 1); + assert_eq!(s.archive.schema_version, 2); assert!(s.archive.last_built_at.is_none()); assert!(s.archive.last_rebuild_at.is_none()); } diff --git a/crates/relayburn-sdk/src/reader.rs b/crates/relayburn-sdk/src/reader.rs index 5660e138..5b41acf2 100644 --- a/crates/relayburn-sdk/src/reader.rs +++ b/crates/relayburn-sdk/src/reader.rs @@ -36,9 +36,10 @@ pub use git::{resolve_project, ProjectResolver, ResolvedProject}; pub use types::{ ActivityCategory, CompactionEvent, ContentKind, ContentRecord, ContentRole, ContentStoreMode, ContentToolResult, ContentToolUse, Coverage, Fidelity, FidelityClass, Harness, - RelationshipSourceKind, RelationshipType, SessionRelationshipRecord, SourceKind, Subagent, - ToolCall, ToolResultEventRecord, ToolResultEventSource, ToolResultStatus, TurnRecord, Usage, - UsageAttribution, UsageGranularity, UserTurnBlock, UserTurnBlockKind, UserTurnRecord, + RelationshipSourceKind, RelationshipType, SessionRelationshipRecord, SourceKind, StopReason, + Subagent, ToolCall, ToolResultEventRecord, ToolResultEventSource, ToolResultStatus, + TurnRecord, Usage, UsageAttribution, UsageGranularity, UserTurnBlock, UserTurnBlockKind, + UserTurnRecord, }; pub use claude::{ parse_claude_session, parse_claude_session_incremental, diff --git a/crates/relayburn-sdk/src/reader/claude.rs b/crates/relayburn-sdk/src/reader/claude.rs index f2e9963c..46bf7499 100644 --- a/crates/relayburn-sdk/src/reader/claude.rs +++ b/crates/relayburn-sdk/src/reader/claude.rs @@ -21,7 +21,7 @@ use crate::reader::hash::{args_hash, content_hash}; use crate::reader::types::{ CompactionEvent, ContentKind, ContentRecord, ContentRole, ContentStoreMode, ContentToolResult, ContentToolUse, Coverage, Fidelity, RelationshipSourceKind, RelationshipType, - SessionRelationshipRecord, SourceKind, Subagent, ToolCall, ToolResultEventRecord, + SessionRelationshipRecord, SourceKind, StopReason, Subagent, ToolCall, ToolResultEventRecord, ToolResultEventSource, ToolResultStatus, TurnRecord, Usage, UsageGranularity, UserTurnBlock, UserTurnRecord, }; @@ -2419,7 +2419,10 @@ fn run_incremental( Some(files_touched) }, subagent, - stop_reason: w.stop_reason.clone(), + stop_reason: w + .stop_reason + .as_deref() + .map(|s| StopReason::from_wire(s).unwrap_or(StopReason::Silent)), activity: None, retries: None, has_edits: None, @@ -2635,7 +2638,7 @@ mod tests { subagent_type: Some("general-purpose".to_string()), description: Some("delegate".to_string()), }), - stop_reason: Some("end_turn".to_string()), + stop_reason: Some(StopReason::EndTurn), activity: Some(crate::reader::types::ActivityCategory::Coding), retries: Some(1), has_edits: Some(true), @@ -2792,7 +2795,7 @@ mod tests { assert_eq!(t.source, SourceKind::ClaudeCode); assert_eq!(t.message_id, "msg_simple_1"); assert_eq!(t.model, "claude-sonnet-4-6"); - assert_eq!(t.stop_reason.as_deref(), Some("end_turn")); + assert_eq!(t.stop_reason, Some(StopReason::EndTurn)); assert_eq!(t.usage.input, 10); assert_eq!(t.usage.output, 5); assert_eq!(t.usage.cache_read, 500); @@ -2837,7 +2840,7 @@ mod tests { ); assert_eq!(t.tool_calls[1].name, "Agent"); assert_eq!(t.tool_calls[1].target.as_deref(), Some("general-purpose")); - assert_eq!(t.stop_reason.as_deref(), Some("tool_use")); + assert_eq!(t.stop_reason, Some(StopReason::ToolUse)); assert_eq!(t.ts, "2026-04-20T00:00:01.000Z"); } @@ -3451,7 +3454,7 @@ mod tests { .unwrap(); assert_eq!(second.turns.len(), 1); assert_eq!(second.turns[0].message_id, "msg_inprog_1"); - assert_eq!(second.turns[0].stop_reason.as_deref(), Some("end_turn")); + assert_eq!(second.turns[0].stop_reason, Some(StopReason::EndTurn)); } #[test] diff --git a/crates/relayburn-sdk/src/reader/opencode.rs b/crates/relayburn-sdk/src/reader/opencode.rs index 35d2a9e1..132b9f15 100644 --- a/crates/relayburn-sdk/src/reader/opencode.rs +++ b/crates/relayburn-sdk/src/reader/opencode.rs @@ -32,7 +32,7 @@ use crate::reader::hash::{args_hash, content_hash}; use crate::reader::types::{ CompactionEvent, ContentKind, ContentRecord, ContentRole, ContentStoreMode, ContentToolResult, ContentToolUse, Coverage, Fidelity, RelationshipSourceKind, RelationshipType, - SessionRelationshipRecord, SourceKind, Subagent, ToolCall, ToolResultEventRecord, + SessionRelationshipRecord, SourceKind, StopReason, Subagent, ToolCall, ToolResultEventRecord, ToolResultEventSource, ToolResultStatus, TurnRecord, Usage, UsageAttribution, UsageGranularity, UserTurnBlock, UserTurnRecord, }; @@ -224,7 +224,9 @@ pub fn parse_opencode_session_incremental( Some(extracted.files_touched.clone()) }, subagent: None, - stop_reason: stop_reason.clone(), + stop_reason: stop_reason + .as_deref() + .map(|s| StopReason::from_wire(s).unwrap_or(StopReason::Silent)), activity: None, retries: None, has_edits: None, diff --git a/crates/relayburn-sdk/src/reader/opencode/tests.rs b/crates/relayburn-sdk/src/reader/opencode/tests.rs index 27d41083..ad5ce258 100644 --- a/crates/relayburn-sdk/src/reader/opencode/tests.rs +++ b/crates/relayburn-sdk/src/reader/opencode/tests.rs @@ -10,8 +10,8 @@ use tempfile::tempdir; use super::*; use crate::reader::types::{ - ContentKind, ContentRole, FidelityClass, RelationshipType, SourceKind, ToolResultEventSource, - ToolResultStatus, UsageAttribution, UsageGranularity, UserTurnBlockKind, + ContentKind, ContentRole, FidelityClass, RelationshipType, SourceKind, StopReason, + ToolResultEventSource, ToolResultStatus, UsageAttribution, UsageGranularity, UserTurnBlockKind, }; fn fixtures_root() -> PathBuf { @@ -60,7 +60,7 @@ fn parses_simple_one_turn_session() { assert_eq!(t.model, "anthropic/claude-sonnet-4-5"); assert_eq!(t.project.as_deref(), Some("/tmp/project")); assert_eq!(t.ts, "2026-04-24T00:00:02.000Z"); - assert_eq!(t.stop_reason.as_deref(), Some("end_turn")); + assert_eq!(t.stop_reason, Some(StopReason::EndTurn)); assert_eq!( t.usage, Usage { @@ -98,7 +98,8 @@ fn extracts_tool_calls_and_files_touched_for_file_tools() { files, vec!["/src/a.ts".to_string(), "/src/b.ts".to_string()] ); - assert_eq!(t.stop_reason.as_deref(), Some("tool-calls")); + // opencode emits `tool-calls`; the canonical mapping is `ToolUse`. + assert_eq!(t.stop_reason, Some(StopReason::ToolUse)); } #[test] diff --git a/crates/relayburn-sdk/src/reader/types.rs b/crates/relayburn-sdk/src/reader/types.rs index 6377789b..0280c071 100644 --- a/crates/relayburn-sdk/src/reader/types.rs +++ b/crates/relayburn-sdk/src/reader/types.rs @@ -10,9 +10,23 @@ use std::collections::BTreeMap; use std::fmt; -use serde::{Deserialize, Serialize}; +use serde::{Deserialize, Deserializer, Serialize}; use serde_json::Value; +/// Lenient deserializer for `TurnRecord.stop_reason`. Accepts the canonical +/// kebab-case variant (`end-turn`, `max-tokens`, …) plus the legacy free-text +/// shapes from upstream harnesses (`end_turn`, `tool_use`, opencode's +/// `tool-calls`, etc.). An unrecognized string decodes to +/// [`StopReason::Silent`] instead of an error so a pre-3.0 ledger replays +/// cleanly through the new column. +fn deserialize_optional_stop_reason<'de, D>(d: D) -> std::result::Result, D::Error> +where + D: Deserializer<'de>, +{ + let opt: Option = Option::deserialize(d)?; + Ok(opt.map(|s| StopReason::from_wire(&s).unwrap_or(StopReason::Silent))) +} + #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)] #[serde(rename_all = "kebab-case")] pub enum SourceKind { @@ -258,6 +272,88 @@ pub struct Fidelity { pub class: FidelityClass, } +/// Coarse outcome of an assistant turn, derived from the harness-reported +/// stop reason on the trailing assistant row. +/// +/// Wire shape is kebab-case (`end-turn`, `max-tokens`, …). On-disk and on the +/// JSONL surface this is round-trippable as a string; absent rows decode as +/// `None`. `Silent` is reserved for the "we have an inference but it carries +/// no stop reason" case (mid-write, sidechain, harness that doesn't report +/// one in the row we parsed). The lossy mapping for non-Anthropic harnesses +/// is intentional; downstream presenters consume the variant, not the raw +/// string. See issue #437. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)] +#[serde(rename_all = "kebab-case")] +pub enum StopReason { + EndTurn, + MaxTokens, + PauseTurn, + StopSequence, + ToolUse, + Refusal, + Silent, +} + +impl StopReason { + /// Kebab-case label as emitted on the wire (matches + /// `#[serde(rename_all = "kebab-case")]`). + pub fn wire_str(&self) -> &'static str { + match self { + Self::EndTurn => "end-turn", + Self::MaxTokens => "max-tokens", + Self::PauseTurn => "pause-turn", + Self::StopSequence => "stop-sequence", + Self::ToolUse => "tool-use", + Self::Refusal => "refusal", + Self::Silent => "silent", + } + } + + /// Map a harness-emitted stop-reason string (e.g. Anthropic's + /// `end_turn` / `max_tokens` or opencode's `tool-calls`) onto the + /// canonical [`StopReason`]. Returns `None` for unrecognized inputs so + /// callers can decide whether to fall back to [`StopReason::Silent`] + /// or keep `None`. + /// + /// Matching is case-insensitive and accepts either snake_case or + /// kebab-case so the same parser handles Anthropic (`max_tokens`), + /// opencode (`tool-calls`), and the kebab-case wire form we emit + /// ourselves on round-trip. + pub fn from_wire(raw: &str) -> Option { + let normalized: String = raw + .trim() + .chars() + .map(|c| match c { + '_' => '-', + c => c.to_ascii_lowercase(), + }) + .collect(); + match normalized.as_str() { + // OpenAI / AI-SDK convention emits `"stop"` for ordinary + // end-of-turn completions (this is what opencode forwards), + // so it maps to `EndTurn`, not `StopSequence`. Anthropic's + // actual stop-sequence outcome is the explicit + // `"stop_sequence"` / `"stop-sequence"` variant below. + "end-turn" | "stop" => Some(Self::EndTurn), + "max-tokens" | "length" => Some(Self::MaxTokens), + "pause-turn" => Some(Self::PauseTurn), + "stop-sequence" => Some(Self::StopSequence), + // `tool-calls` is the opencode AI-SDK wire form; `tool-use` + // is the Anthropic one and our canonical kebab spelling. + "tool-use" | "tool-calls" => Some(Self::ToolUse), + "refusal" => Some(Self::Refusal), + "silent" => Some(Self::Silent), + _ => None, + } + } +} + +impl fmt::Display for StopReason { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str(self.wire_str()) + } +} + #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)] #[serde(rename_all = "kebab-case")] pub enum ActivityCategory { @@ -303,8 +399,19 @@ pub struct TurnRecord { pub files_touched: Option>, #[serde(default, skip_serializing_if = "Option::is_none")] pub subagent: Option, - #[serde(default, skip_serializing_if = "Option::is_none")] - pub stop_reason: Option, + /// Outcome of the assistant inference, as reported by the harness on the + /// trailing assistant row (Anthropic `stop_reason`, opencode + /// `step-finish.reason`, etc.). `None` means the row carried no field at + /// all (e.g. Codex, which doesn't report one); deserialization is + /// tolerant of the historical free-text shape — unknown strings decode + /// as [`StopReason::Silent`] so a future harness value can't poison + /// reads. + #[serde( + default, + skip_serializing_if = "Option::is_none", + deserialize_with = "deserialize_optional_stop_reason" + )] + pub stop_reason: Option, #[serde(default, skip_serializing_if = "Option::is_none")] pub activity: Option, #[serde(default, skip_serializing_if = "Option::is_none")] @@ -628,6 +735,115 @@ mod tests { assert!(c.is_full()); } + #[test] + fn stop_reason_serializes_kebab_case() { + assert_eq!( + serde_json::to_string(&StopReason::EndTurn).unwrap(), + "\"end-turn\"", + ); + assert_eq!( + serde_json::to_string(&StopReason::MaxTokens).unwrap(), + "\"max-tokens\"", + ); + assert_eq!( + serde_json::to_string(&StopReason::ToolUse).unwrap(), + "\"tool-use\"", + ); + } + + #[test] + fn stop_reason_from_wire_normalizes_underscored_and_legacy_variants() { + // Anthropic snake_case. + assert_eq!( + StopReason::from_wire("end_turn"), + Some(StopReason::EndTurn) + ); + assert_eq!( + StopReason::from_wire("max_tokens"), + Some(StopReason::MaxTokens) + ); + assert_eq!( + StopReason::from_wire("tool_use"), + Some(StopReason::ToolUse) + ); + // Opencode finish reason for the same outcome ships as `tool-calls`. + assert_eq!( + StopReason::from_wire("tool-calls"), + Some(StopReason::ToolUse) + ); + // Canonical kebab-case round-trips identity. + assert_eq!( + StopReason::from_wire("pause-turn"), + Some(StopReason::PauseTurn) + ); + // OpenAI / AI-SDK (and therefore opencode) emit a bare `"stop"` + // for normal end-of-turn completions — that's `EndTurn`, NOT + // `StopSequence`. Misclassifying these would skew the + // `burn summary` outcome buckets every time opencode wraps an + // OpenAI-shaped provider. + assert_eq!(StopReason::from_wire("stop"), Some(StopReason::EndTurn)); + // Anthropic's actual stop-sequence outcome is the explicit + // `stop_sequence` (snake) / `stop-sequence` (kebab) variants and + // those still resolve to `StopSequence`. + assert_eq!( + StopReason::from_wire("stop_sequence"), + Some(StopReason::StopSequence) + ); + assert_eq!( + StopReason::from_wire("stop-sequence"), + Some(StopReason::StopSequence) + ); + // Unknown / harness-specific strings don't map. + assert_eq!(StopReason::from_wire("magic"), None); + } + + #[test] + fn turn_record_stop_reason_deserializes_legacy_strings_into_enum() { + // Pre-3.0 ledgers stored the raw Anthropic stop_reason on + // `TurnRecord.stopReason` as a free-text string. The lenient + // deserializer must keep replaying those rows. + let raw = serde_json::json!({ + "v": 1, + "source": "claude-code", + "sessionId": "s", + "messageId": "m", + "turnIndex": 0, + "ts": "2026-04-20T00:00:00.000Z", + "model": "claude-sonnet-4-6", + "usage": { + "input": 0, "output": 0, "reasoning": 0, + "cacheRead": 0, "cacheCreate5m": 0, "cacheCreate1h": 0 + }, + "toolCalls": [], + "stopReason": "max_tokens" + }); + let rec: TurnRecord = serde_json::from_value(raw).unwrap(); + assert_eq!(rec.stop_reason, Some(StopReason::MaxTokens)); + } + + #[test] + fn turn_record_stop_reason_unknown_string_falls_back_to_silent() { + // A ledger written by a future / unknown harness shouldn't break + // reads — the parser maps to `Silent` instead of erroring. + let raw = serde_json::json!({ + "v": 1, + "source": "claude-code", + "sessionId": "s", + "messageId": "m", + "turnIndex": 0, + "ts": "2026-04-20T00:00:00.000Z", + "model": "claude-sonnet-4-6", + "usage": { + "input": 0, "output": 0, "reasoning": 0, + "cacheRead": 0, "cacheCreate5m": 0, "cacheCreate1h": 0 + }, + "toolCalls": [], + "stopReason": "totally-unknown-future-value" + }); + let rec: TurnRecord = serde_json::from_value(raw).unwrap(); + assert_eq!(rec.stop_reason, Some(StopReason::Silent)); + } + #[test] fn coverage_round_trips_camel_case() { let c = Coverage { diff --git a/tests/fixtures/cli-golden/snapshots/state-status-json.stdout.txt b/tests/fixtures/cli-golden/snapshots/state-status-json.stdout.txt index b166c328..dc9cfd65 100644 --- a/tests/fixtures/cli-golden/snapshots/state-status-json.stdout.txt +++ b/tests/fixtures/cli-golden/snapshots/state-status-json.stdout.txt @@ -20,7 +20,7 @@ "rows": 0 }, "archive": { - "schemaVersion": 1 + "schemaVersion": 2 }, "config": { "store": "off", diff --git a/tests/fixtures/cli-golden/snapshots/state-status.stdout.txt b/tests/fixtures/cli-golden/snapshots/state-status.stdout.txt index 98e75959..99aed66f 100644 --- a/tests/fixtures/cli-golden/snapshots/state-status.stdout.txt +++ b/tests/fixtures/cli-golden/snapshots/state-status.stdout.txt @@ -13,7 +13,7 @@ content DB (content.sqlite): path: ${RELAYBURN_HOME}/content.sqlite rows: 0 archive state: - schema version: 1 + schema version: 2 last built: never last rebuild: never config: diff --git a/tests/fixtures/cli-golden/snapshots/summary-json.stdout.txt b/tests/fixtures/cli-golden/snapshots/summary-json.stdout.txt index b124e200..9721c222 100644 --- a/tests/fixtures/cli-golden/snapshots/summary-json.stdout.txt +++ b/tests/fixtures/cli-golden/snapshots/summary-json.stdout.txt @@ -190,5 +190,15 @@ } ] } + }, + "stopReasons": { + "endTurn": 0, + "maxTokens": 0, + "pauseTurn": 0, + "stopSequence": 0, + "toolUse": 0, + "refusal": 0, + "silent": 0, + "none": 7 } } diff --git a/tests/fixtures/cli-golden/snapshots/summary.stdout.txt b/tests/fixtures/cli-golden/snapshots/summary.stdout.txt index 1a015f57..83e70e7d 100644 --- a/tests/fixtures/cli-golden/snapshots/summary.stdout.txt +++ b/tests/fixtures/cli-golden/snapshots/summary.stdout.txt @@ -11,4 +11,6 @@ claude-haiku-4-5 2 1,700 220 0 4,500 0 $0.0 total cost: $0.044 input $0.019 / output $0.018 / reasoning $0.00 / cacheRead $0.0063 / cacheCreate $0.0008 +Turn outcomes: 0 end_turn, 0 max_tokens, 0 refusal, 0 pause, 7 none + fidelity: 6 full / 1 partial (use --json for per-field coverage)