From 8902e330b33649308da51e9a2127e73aa3ca387e Mon Sep 17 00:00:00 2001 From: Will Washburn Date: Thu, 7 May 2026 01:52:36 -0400 Subject: [PATCH] relayburn-sdk: bootstrap burn.sqlite from ledger.jsonl on Ledger::open MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Rust SDK reads exclusively from `burn.sqlite` but never auto-built that mirror from a `ledger.jsonl` sibling. Freshly-ingested or JSONL-only ledgers (the cli-golden fixture, side-by-side TS/Rust tooling, users upgrading from 1.x) returned empty rows on read until something else populated the sqlite. The TS @relayburn/sdk@1.x didn't have this problem because it treats sqlite as a derived view rebuilt on demand. Lift the bootstrap algorithm from `tests/golden.rs::bootstrap_sqlite_from_jsonl` into the production SDK so reads always see the latest data. Algorithm — Option A, eager on `Ledger::open`. We snapshot the JSONL-vs- sqlite mtime BEFORE `Connection::open` creates `burn.sqlite` as a side effect (otherwise every fresh sqlite would look "current" relative to the JSONL and we'd skip the rebuild). If the JSONL is newer (or the sqlite is missing), wipe the derivable tables and replay the JSONL via the existing `writer::append_*` paths. Stamps + archive_state are first-party and preserved. Concurrency: SQLite WAL plus the configured `busy_timeout` serialize peer writers without a user-space lockfile — same design choice that let us drop `lock.ts` from the Rust port (see #259). Two concurrent opens both observing a stale sqlite would each attempt a rebuild; the second sees an already-warm sqlite and skips. Side effect: the cli-golden test helper no longer needs its own JSONL parser. Replace `bootstrap_sqlite_from_jsonl` (~150 lines of duplicated parse/replay logic) with a 30-line `reset_sqlite_for_fresh_bootstrap` that just deletes any prior sqlite so the SDK does the rebuild on the binary's first `Ledger::open`. Followup to PRs #354 (napi shape conformance) and #355 (conformance gate flip) which surfaced this gap. Verified manually against the cli-golden fixture: `RELAYBURN_HOME=/tmp/probe-ledger RELAYBURN_ARCHIVE=0 burn summary` on a JSONL-only ledger now returns 7 turns instead of 0. --- crates/relayburn-cli/Cargo.toml | 10 +- crates/relayburn-cli/tests/golden.rs | 167 +------- crates/relayburn-sdk/src/ledger.rs | 1 + crates/relayburn-sdk/src/ledger/bootstrap.rs | 417 +++++++++++++++++++ crates/relayburn-sdk/src/ledger/db.rs | 15 +- 5 files changed, 459 insertions(+), 151 deletions(-) create mode 100644 crates/relayburn-sdk/src/ledger/bootstrap.rs diff --git a/crates/relayburn-cli/Cargo.toml b/crates/relayburn-cli/Cargo.toml index 7debe919..1cfc17b9 100644 --- a/crates/relayburn-cli/Cargo.toml +++ b/crates/relayburn-cli/Cargo.toml @@ -107,8 +107,8 @@ tokio = { workspace = true, features = ["rt-multi-thread", "macros", "sync"] } # capture and the Rust diff runner. `serde` / `serde_json` are workspace deps # already pulled in via `[dependencies]`, so this entry is documentation only. # -# The diff runner also bootstraps the SQLite fixture from the committed -# `ledger.jsonl` (the in-tree fixture is JSONL-only because the SQLite -# binaries are gitignored). It does so by parsing the JSONL with `serde_json` -# and replaying the records via `relayburn_sdk::RawLedger::append_*` methods, -# which is reachable through the `relayburn-sdk` dep already declared above. +# The in-tree fixture is JSONL-only (the SQLite binaries are gitignored); the +# diff runner just deletes any prior sqlite before invoking the binary, and +# the SDK's `Ledger::open` rebuilds `burn.sqlite` from `ledger.jsonl` +# automatically (see `relayburn_sdk::ledger::bootstrap`). No JSONL parsing in +# the test helper itself. diff --git a/crates/relayburn-cli/tests/golden.rs b/crates/relayburn-cli/tests/golden.rs index d5581dcf..2afb9c79 100644 --- a/crates/relayburn-cli/tests/golden.rs +++ b/crates/relayburn-cli/tests/golden.rs @@ -36,38 +36,28 @@ use std::fs; use std::path::{Path, PathBuf}; use std::process::Command; -use relayburn_sdk::{ - CompactionEvent, RawLedger, SessionRelationshipRecord, Stamp, ToolResultEventRecord, - TurnRecord, UserTurnRecord, -}; use serde::Deserialize; -/// Bootstrap the SQLite fixture from the committed `ledger.jsonl`. +/// Wipe any prior `burn.sqlite` / `content.sqlite` so the next +/// `Ledger::open` deterministically rebuilds from `ledger.jsonl`. /// -/// The CLI-golden fixture's source of truth is `ledger.jsonl` (the SQLite -/// counterparts are gitignored because they're rematerialized on demand; -/// see `tests/fixtures/cli-golden/ledger/.gitignore`). The TS CLI reads -/// JSONL natively via its `file` storage adapter; the Rust SDK is sqlite- -/// only, so we replay the JSONL into `burn.sqlite` here before invoking -/// the binary. +/// The CLI-golden fixture's source of truth is `ledger.jsonl` (the +/// SQLite counterparts are gitignored because they're rematerialized +/// on demand; see `tests/fixtures/cli-golden/ledger/.gitignore`). The +/// TS CLI reads JSONL natively via its `file` storage adapter; the Rust +/// SDK is sqlite-only and bootstraps `burn.sqlite` from the JSONL on +/// open (see `relayburn_sdk::ledger::bootstrap`). /// -/// Idempotent: if `burn.sqlite` already has a non-empty `turns` table we -/// skip the rebuild. Local devs running the diff runner repeatedly thus -/// only pay the JSONL-replay cost once. -fn bootstrap_sqlite_from_jsonl(ledger_home: &Path) -> std::io::Result<()> { - let jsonl_path = ledger_home.join("ledger.jsonl"); - if !jsonl_path.is_file() { - // No JSONL source: assume the fixture was built another way and - // bail (the binary will surface the resulting empty-ledger - // diff loud and clear when it runs). +/// We could rely on the SDK's mtime check to do this for free, but a +/// stale sqlite from a prior run with a *newer* mtime than the JSONL +/// would otherwise mask snapshot drift. Wiping forces a fresh replay +/// every test run. +fn reset_sqlite_for_fresh_bootstrap(ledger_home: &Path) -> std::io::Result<()> { + if !ledger_home.join("ledger.jsonl").is_file() { + // No JSONL source — leave whatever sqlite is here alone and + // let the binary surface any resulting empty-ledger diff. return Ok(()); } - let burn_path = ledger_home.join("burn.sqlite"); - let content_path = ledger_home.join("content.sqlite"); - - // Wipe any prior bootstrap. We don't try to do incremental upserts - // here — the JSONL is canonical and small; rewriting from scratch - // keeps the bootstrap deterministic across runs. for name in [ "burn.sqlite", "burn.sqlite-shm", @@ -76,125 +66,11 @@ fn bootstrap_sqlite_from_jsonl(ledger_home: &Path) -> std::io::Result<()> { "content.sqlite-shm", "content.sqlite-wal", ] { - let p = ledger_home.join(name); - let _ = fs::remove_file(p); - } - - let mut ledger = - RawLedger::open(&burn_path, &content_path).expect("open fixture ledger for bootstrap"); - - let raw = fs::read_to_string(&jsonl_path)?; - let mut turns: Vec = Vec::new(); - let mut user_turns: Vec = Vec::new(); - let mut tool_results: Vec = Vec::new(); - let mut relationships: Vec = Vec::new(); - let mut compactions: Vec = Vec::new(); - let mut stamps: Vec = Vec::new(); - - for (line_no, line) in raw.lines().enumerate() { - let trimmed = line.trim(); - if trimmed.is_empty() { - continue; - } - let envelope: serde_json::Value = serde_json::from_str(trimmed).unwrap_or_else(|err| { - panic!( - "[golden bootstrap] line {} of ledger.jsonl is not valid JSON: {err}", - line_no + 1 - ) - }); - let kind = envelope.get("kind").and_then(|v| v.as_str()).unwrap_or(""); - let mut record = envelope - .get("record") - .cloned() - .unwrap_or(serde_json::Value::Null); - match kind { - "turn" => turns.push(serde_json::from_value(record).expect("turn record")), - "user_turn" => { - user_turns.push(serde_json::from_value(record).expect("user_turn record")) - } - "tool_result_event" => { - normalize_tool_result_event(&mut record); - tool_results.push(serde_json::from_value(record).expect("tool_result_event record")) - } - "relationship" => { - relationships.push(serde_json::from_value(record).expect("relationship record")) - } - "compaction" => { - compactions.push(serde_json::from_value(record).expect("compaction record")) - } - "stamp" => stamps.push(stamp_from_envelope(&envelope)), - _ => { - // Unknown kinds (`text`, `tool_result`, etc. emitted by older - // ledger writers) are noise here — they belong to the content - // sidecar lifecycle, not the events DB. - } - } - } - - ledger.append_turns(&turns).expect("append turns"); - ledger - .append_user_turns(&user_turns) - .expect("append user_turns"); - ledger - .append_tool_result_events(&tool_results) - .expect("append tool_result_events"); - ledger - .append_relationships(&relationships) - .expect("append relationships"); - ledger - .append_compactions(&compactions) - .expect("append compactions"); - for s in &stamps { - ledger.append_stamp(s).expect("append stamp"); + let _ = fs::remove_file(ledger_home.join(name)); } Ok(()) } -/// The hand-built fixture writes `eventSource: "transcript"` for Claude -/// `tool_result` events; the canonical schema dropped that variant in -/// favor of the more specific `"tool_result"` value. The TS reader is -/// lenient and stores the JSON verbatim; the Rust SDK is strict. Normalize -/// here so the fixture replays cleanly without retroactively rewriting -/// the JSONL on disk (which would drift the snapshot capture corpus). The -/// substitution also fills in `eventIndex` if the fixture omits it -/// (required by the SDK schema; the TS reader defaults missing values to -/// `0` via `??`). -fn normalize_tool_result_event(record: &mut serde_json::Value) { - let Some(obj) = record.as_object_mut() else { - return; - }; - if let Some(src) = obj.get_mut("eventSource") { - if src.as_str() == Some("transcript") { - *src = serde_json::Value::String("tool_result".to_string()); - } - } - obj.entry("eventIndex").or_insert(serde_json::json!(0)); -} - -fn stamp_from_envelope(envelope: &serde_json::Value) -> Stamp { - Stamp { - ts: envelope - .get("ts") - .and_then(|v| v.as_str()) - .unwrap_or("") - .to_string(), - selector: serde_json::from_value( - envelope - .get("selector") - .cloned() - .unwrap_or(serde_json::Value::Null), - ) - .unwrap_or_default(), - enrichment: serde_json::from_value( - envelope - .get("enrichment") - .cloned() - .unwrap_or(serde_json::Value::Null), - ) - .unwrap_or_default(), - } -} - #[derive(Debug, Deserialize)] #[serde(rename_all = "camelCase")] struct Invocation { @@ -256,10 +132,11 @@ fn golden_diff_against_ts_cli_snapshots() { let ledger_home = fixture_dir.join("ledger"); let project_dir = fixture_dir.join("project"); - // Bootstrap the SQLite fixture from `ledger.jsonl`. The Rust SDK only - // reads from sqlite; the in-tree fixture is JSONL-only because the - // sqlite binaries are gitignored. Replays once per test run. - bootstrap_sqlite_from_jsonl(&ledger_home).expect("bootstrap sqlite from JSONL"); + // The in-tree fixture is JSONL-only (the sqlite binaries are + // gitignored). Wipe any prior sqlite so the SDK's bootstrap-on-open + // (see `relayburn_sdk::ledger::bootstrap`) deterministically replays + // the JSONL on the binary's first `Ledger::open`. + reset_sqlite_for_fresh_bootstrap(&ledger_home).expect("reset sqlite for fresh bootstrap"); // Sealed HOME so the Rust binary's eventual ingest sweep doesn't // discover the developer's real session stores. diff --git a/crates/relayburn-sdk/src/ledger.rs b/crates/relayburn-sdk/src/ledger.rs index 55b0ab68..201aa8c0 100644 --- a/crates/relayburn-sdk/src/ledger.rs +++ b/crates/relayburn-sdk/src/ledger.rs @@ -17,6 +17,7 @@ // agent absorbing more verbs will need them. #![allow(dead_code, unused_imports)] +mod bootstrap; mod config; mod content; mod db; diff --git a/crates/relayburn-sdk/src/ledger/bootstrap.rs b/crates/relayburn-sdk/src/ledger/bootstrap.rs new file mode 100644 index 00000000..46c92118 --- /dev/null +++ b/crates/relayburn-sdk/src/ledger/bootstrap.rs @@ -0,0 +1,417 @@ +//! Bootstrap `burn.sqlite` from a `ledger.jsonl` sibling on `Ledger::open`. +//! +//! ## Why this exists +//! +//! The 2.0 SQLite-only design (see #259) treats `burn.sqlite` / +//! `content.sqlite` as the steady-state storage. But the TS 1.x ledger +//! is JSONL-of-record, and during the #240 cutover both write paths can +//! coexist on disk: +//! +//! * a 1.x writer ingesting on the side, leaving `ledger.jsonl` ahead +//! of any sqlite mirror; +//! * a freshly built fixture (the cli-golden corpus is JSONL-only — +//! the sqlite binaries are `.gitignore`d because they're rebuilt on +//! demand); +//! * a user upgrading and pointing the new SDK at their old +//! `~/.relayburn/` home. +//! +//! In all three cases the Rust SDK was returning empty rows because it +//! reads exclusively from sqlite. The TS SDK didn't have this problem +//! because it treats sqlite as a derived view rebuilt on demand. This +//! module lifts that bootstrap algorithm into the Rust SDK so reads +//! always see the latest data. +//! +//! ## Algorithm — Option A (eager, on `Ledger::open`) +//! +//! Compare mtimes: if `ledger.jsonl` is newer than `burn.sqlite` (or +//! `burn.sqlite` is missing entirely), wipe the sqlite mirror, replay +//! the JSONL line-by-line via `Ledger::append_*`, and continue. If +//! `burn.sqlite` is at-or-newer than the JSONL, do nothing and let the +//! existing connection serve queries. If there is no `ledger.jsonl` at +//! all, do nothing — the SDK is in pure-sqlite mode and the caller is +//! responsible for any prior ingest. +//! +//! We picked Option A (eager on open) over Option B (lazy on first +//! read) because: +//! +//! * Open is a rare event. Embedded callers usually open once per +//! process; CLI invocations open once per `burn …` command. The +//! bootstrap cost lands on the caller already paying for cold +//! start. +//! * Open has clean read-modify-write semantics — we already hold +//! the only `&mut Connection` for `burn.sqlite`. A lazy bootstrap +//! on first read would have to reach into every read verb and +//! acquire a mutable handle just to maybe-rebuild, complicating +//! `&self`-only read paths. +//! * Bootstrap is idempotent and cheap when the mtime check is a +//! no-op (the steady state). The only loss is an extra `stat()` +//! pair per open. +//! +//! ## Concurrency +//! +//! Replay is a read-modify-write on `burn.sqlite`. SQLite's WAL mode +//! (configured in `db.rs`) plus the `busy_timeout` we set there +//! serialize peer writers without a user-space lockfile — the same +//! design choice that let us drop the 1.x `lock.ts` module from the +//! Rust port (see #259). Two concurrent `Ledger::open` callers that +//! both observe a stale sqlite will each attempt the rebuild; the +//! second will see an already-warm sqlite (mtime ≥ jsonl mtime) and +//! skip. Worst case is one redundant rebuild, which is cheap and +//! deterministic. + +use std::fs; +use std::io; +use std::path::{Path, PathBuf}; +use std::time::SystemTime; + +use rusqlite::Connection; + +use crate::ledger::error::Result; +use crate::ledger::schema::DERIVABLE_TABLES; +use crate::ledger::stamp::Stamp; +use crate::ledger::writer; +use crate::reader::{ + CompactionEvent, SessionRelationshipRecord, ToolResultEventRecord, TurnRecord, UserTurnRecord, +}; + +/// Result of the staleness check, captured BEFORE `Connection::open` +/// creates `burn.sqlite` as a side effect (which would otherwise make +/// every fresh sqlite look "current" relative to the JSONL). +pub(crate) enum BootstrapDecision { + /// No JSONL on disk OR sqlite already at-or-newer than JSONL — do + /// nothing on open. + Skip, + /// JSONL is newer (or sqlite was missing). Replay this file once + /// the sqlite handle is open + DDL'd. + Rebuild { jsonl_path: PathBuf }, +} + +/// Path to the `ledger.jsonl` sibling of `burn.sqlite`. Returns `None` +/// when the burn path has no parent (e.g. a bare filename in cwd, in +/// which case the JSONL would also be in cwd — but we're conservative +/// and skip). +fn jsonl_sibling(burn_path: &Path) -> Option { + burn_path.parent().map(|p| p.join("ledger.jsonl")) +} + +fn mtime(path: &Path) -> io::Result { + fs::metadata(path)?.modified() +} + +/// Snapshot the JSONL-vs-sqlite staleness state. Must be called BEFORE +/// `Connection::open(burn_path)`, since that call creates the sqlite +/// file as a side effect. +/// +/// * No JSONL → `Skip` (pure-sqlite ledger). +/// * JSONL exists but no `burn.sqlite` → `Rebuild`. +/// * JSONL mtime > sqlite mtime → `Rebuild`. +/// * Otherwise → `Skip`. +pub(crate) fn decide_bootstrap(burn_path: &Path) -> BootstrapDecision { + let Some(jsonl_path) = jsonl_sibling(burn_path) else { + return BootstrapDecision::Skip; + }; + if !jsonl_path.is_file() { + return BootstrapDecision::Skip; + } + let Ok(jsonl_mtime) = mtime(&jsonl_path) else { + return BootstrapDecision::Skip; + }; + match mtime(burn_path) { + Ok(burn_mtime) if burn_mtime >= jsonl_mtime => BootstrapDecision::Skip, + // burn.sqlite missing OR older than JSONL. + _ => BootstrapDecision::Rebuild { jsonl_path }, + } +} + +/// Apply the decision captured by [`decide_bootstrap`]. A no-op for +/// `BootstrapDecision::Skip`; for `Rebuild`, wipes derivable tables +/// and replays the JSONL via `writer::append_*`. +pub(crate) fn apply_bootstrap( + burn: &mut Connection, + decision: BootstrapDecision, +) -> Result<()> { + match decision { + BootstrapDecision::Skip => Ok(()), + BootstrapDecision::Rebuild { jsonl_path } => rebuild_from_jsonl(burn, &jsonl_path), + } +} + +/// Wipe derivable tables, parse `ledger.jsonl`, and replay the records +/// through the writer. Stamps are first-party in 2.0 (preserved across +/// rebuild) but the JSONL replay re-emits them too — `append_stamp` is +/// idempotent on the `(source, session_id, ts, written_at)` PK so a +/// duplicate replay is a no-op. +fn rebuild_from_jsonl(burn: &mut Connection, jsonl_path: &Path) -> Result<()> { + // Drop derivable tables only. Stamps + archive_state are first-party + // and survive — the JSONL replay below will re-add stamp rows but + // any existing ones are preserved if the JSONL doesn't list them. + for table in DERIVABLE_TABLES { + burn.execute(&format!("DELETE FROM {table}"), [])?; + } + + let raw = fs::read_to_string(jsonl_path)?; + + let mut turns: Vec = Vec::new(); + let mut user_turns: Vec = Vec::new(); + let mut tool_results: Vec = Vec::new(); + let mut relationships: Vec = Vec::new(); + let mut compactions: Vec = Vec::new(); + let mut stamps: Vec = Vec::new(); + + for line in raw.lines() { + let trimmed = line.trim(); + if trimmed.is_empty() { + continue; + } + // Tolerate malformed envelopes: a single bad line shouldn't + // wedge the SDK on open. The `burn.sqlite` mirror will be + // missing those records, but every well-formed line still + // lands. + let Ok(envelope) = serde_json::from_str::(trimmed) else { + continue; + }; + let kind = envelope.get("kind").and_then(|v| v.as_str()).unwrap_or(""); + let mut record = envelope + .get("record") + .cloned() + .unwrap_or(serde_json::Value::Null); + match kind { + "turn" => { + if let Ok(t) = serde_json::from_value::(record) { + turns.push(t); + } + } + "user_turn" => { + if let Ok(u) = serde_json::from_value::(record) { + user_turns.push(u); + } + } + "tool_result_event" => { + normalize_tool_result_event(&mut record); + if let Ok(e) = serde_json::from_value::(record) { + tool_results.push(e); + } + } + "relationship" => { + if let Ok(r) = serde_json::from_value::(record) { + relationships.push(r); + } + } + "compaction" => { + if let Ok(c) = serde_json::from_value::(record) { + compactions.push(c); + } + } + "stamp" => { + stamps.push(stamp_from_envelope(&envelope)); + } + _ => { + // Unknown kinds (`text`, `tool_result`, etc. emitted by + // older content-sidecar writers) are noise here — they + // belong to the content DB lifecycle, not the events DB. + } + } + } + + if !turns.is_empty() { + writer::append_turns(burn, &turns)?; + } + if !user_turns.is_empty() { + writer::append_user_turns(burn, &user_turns)?; + } + if !tool_results.is_empty() { + writer::append_tool_result_events(burn, &tool_results)?; + } + if !relationships.is_empty() { + writer::append_relationships(burn, &relationships)?; + } + if !compactions.is_empty() { + writer::append_compactions(burn, &compactions)?; + } + for s in &stamps { + writer::append_stamp(burn, s)?; + } + Ok(()) +} + +/// 1.x fixtures (and some early-port test corpora) wrote +/// `eventSource: "transcript"` for Claude `tool_result` events; the +/// canonical schema dropped that variant in favor of the more specific +/// `"tool_result"` value. The TS reader was lenient and stored the JSON +/// verbatim; the Rust SDK is strict. Normalize here so a stray legacy +/// row in upstream JSONL replays cleanly. Also fills in `eventIndex` if +/// the row omits it (required by the SDK schema; the TS reader defaults +/// missing values to `0`). +fn normalize_tool_result_event(record: &mut serde_json::Value) { + let Some(obj) = record.as_object_mut() else { + return; + }; + if let Some(src) = obj.get_mut("eventSource") { + if src.as_str() == Some("transcript") { + *src = serde_json::Value::String("tool_result".to_string()); + } + } + obj.entry("eventIndex").or_insert(serde_json::json!(0)); +} + +fn stamp_from_envelope(envelope: &serde_json::Value) -> Stamp { + Stamp { + ts: envelope + .get("ts") + .and_then(|v| v.as_str()) + .unwrap_or("") + .to_string(), + selector: serde_json::from_value( + envelope + .get("selector") + .cloned() + .unwrap_or(serde_json::Value::Null), + ) + .unwrap_or_default(), + enrichment: serde_json::from_value( + envelope + .get("enrichment") + .cloned() + .unwrap_or(serde_json::Value::Null), + ) + .unwrap_or_default(), + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::io::Write; + use tempfile::TempDir; + + use crate::ledger::Ledger; + + /// Smallest possible turn JSONL envelope. `input` is parameterized + /// because turns whose content fields exactly match collapse under + /// the writer's `content_fingerprint` dedup — tests need distinct + /// `input` counts to keep two synthetic turns from merging. + fn turn_envelope_line(session: &str, message: &str, input: u64) -> String { + let record = serde_json::json!({ + "v": 1, + "source": "claude-code", + "sessionId": session, + "messageId": message, + "turnIndex": 0, + "ts": "2025-01-01T00:00:00Z", + "model": "claude-sonnet-4-6", + "usage": { + "input": input, + "output": 5, + "reasoning": 0, + "cacheRead": 0, + "cacheCreate5m": 0, + "cacheCreate1h": 0 + }, + "toolCalls": [] + }); + format!( + r#"{{"v":1,"kind":"turn","record":{}}}"#, + serde_json::to_string(&record).unwrap() + ) + } + + #[test] + fn no_jsonl_no_bootstrap() { + // Pure-sqlite ledger — no JSONL on disk. Open should be a no-op + // and turns table stays empty. + let tmp = TempDir::new().unwrap(); + let burn = tmp.path().join("burn.sqlite"); + let content = tmp.path().join("content.sqlite"); + let l = Ledger::open(&burn, &content).unwrap(); + assert_eq!(l.count_table("turns").unwrap(), 0); + assert!(!tmp.path().join("ledger.jsonl").exists()); + } + + #[test] + fn jsonl_only_bootstraps_on_open() { + // The "freshly-cloned cli-golden fixture" scenario: ledger.jsonl + // exists, burn.sqlite does not. Open should populate the events + // DB. + let tmp = TempDir::new().unwrap(); + let jsonl = tmp.path().join("ledger.jsonl"); + let burn = tmp.path().join("burn.sqlite"); + let content = tmp.path().join("content.sqlite"); + + let mut f = fs::File::create(&jsonl).unwrap(); + writeln!(f, "{}", turn_envelope_line("sess-a", "msg-1", 10)).unwrap(); + writeln!(f, "{}", turn_envelope_line("sess-a", "msg-2", 20)).unwrap(); + f.flush().unwrap(); + drop(f); + + let l = Ledger::open(&burn, &content).unwrap(); + assert_eq!(l.count_table("turns").unwrap(), 2); + } + + /// Force a file's mtime via `File::set_modified` — stable since + /// 1.75 and works on all OSes the workspace supports without an + /// extra crate. + fn set_mtime(path: &Path, when: SystemTime) { + let f = fs::OpenOptions::new().write(true).open(path).unwrap(); + f.set_modified(when).unwrap(); + } + + #[test] + fn fresh_sqlite_skips_bootstrap() { + // burn.sqlite mtime ≥ ledger.jsonl mtime → no rebuild. + let tmp = TempDir::new().unwrap(); + let jsonl = tmp.path().join("ledger.jsonl"); + let burn = tmp.path().join("burn.sqlite"); + let content = tmp.path().join("content.sqlite"); + + // First write the JSONL, then build the sqlite. The sqlite's + // mtime will be newer. + fs::write(&jsonl, turn_envelope_line("sess-a", "msg-1", 10) + "\n").unwrap(); + // Build sqlite once (this rebuilds from JSONL — 1 row). + { + let _ = Ledger::open(&burn, &content).unwrap(); + } + // Bump the sqlite's mtime explicitly so we don't depend on + // filesystem resolution. + set_mtime(&burn, SystemTime::now() + std::time::Duration::from_secs(60)); + + // Append a second turn to the JSONL — but *force* its mtime to + // be older than sqlite's. The reopen should NOT rebuild and the + // count should stay at 1. + let mut f = fs::OpenOptions::new().append(true).open(&jsonl).unwrap(); + writeln!(f, "{}", turn_envelope_line("sess-a", "msg-2", 20)).unwrap(); + drop(f); + set_mtime(&jsonl, SystemTime::now() - std::time::Duration::from_secs(60)); + + let l = Ledger::open(&burn, &content).unwrap(); + assert_eq!(l.count_table("turns").unwrap(), 1); + } + + #[test] + fn stale_sqlite_rebuilds_on_open() { + // burn.sqlite is older than ledger.jsonl — rebuild and pick up + // the newer JSONL contents. + let tmp = TempDir::new().unwrap(); + let jsonl = tmp.path().join("ledger.jsonl"); + let burn = tmp.path().join("burn.sqlite"); + let content = tmp.path().join("content.sqlite"); + + // Initial state: 1-line JSONL, sqlite built from it. + fs::write(&jsonl, turn_envelope_line("sess-a", "msg-1", 10) + "\n").unwrap(); + { + let l = Ledger::open(&burn, &content).unwrap(); + assert_eq!(l.count_table("turns").unwrap(), 1); + } + + // Force sqlite's mtime well into the past. + set_mtime(&burn, SystemTime::now() - std::time::Duration::from_secs(3600)); + + // Append to JSONL — its mtime is now newer than sqlite's. + let mut f = fs::OpenOptions::new().append(true).open(&jsonl).unwrap(); + writeln!(f, "{}", turn_envelope_line("sess-a", "msg-2", 20)).unwrap(); + drop(f); + set_mtime(&jsonl, SystemTime::now()); + + let l = Ledger::open(&burn, &content).unwrap(); + assert_eq!(l.count_table("turns").unwrap(), 2); + } +} diff --git a/crates/relayburn-sdk/src/ledger/db.rs b/crates/relayburn-sdk/src/ledger/db.rs index 400ba43f..26af6bb1 100644 --- a/crates/relayburn-sdk/src/ledger/db.rs +++ b/crates/relayburn-sdk/src/ledger/db.rs @@ -47,11 +47,24 @@ impl Connections { } } - let burn = Connection::open(burn_path)?; + // Snapshot whether a bootstrap is needed BEFORE `Connection::open` + // creates `burn.sqlite` as a side effect — if we waited, the + // freshly-created (and newer-than-JSONL) sqlite mtime would + // always look "current" and we'd skip the rebuild. + let bootstrap_decision = + crate::ledger::bootstrap::decide_bootstrap(burn_path); + + let mut burn = Connection::open(burn_path)?; configure_pragmas(&burn)?; burn.execute_batch(BURN_DDL)?; verify_schema_version(&burn)?; + // Bootstrap from `ledger.jsonl` sibling if the sqlite mirror is + // stale or missing. No-op when the JSONL doesn't exist (the + // SDK is in pure-sqlite mode) or when the sqlite is already + // current. See `bootstrap.rs` for the rationale. + crate::ledger::bootstrap::apply_bootstrap(&mut burn, bootstrap_decision)?; + let content = Connection::open(content_path)?; configure_pragmas(&content)?; content.execute_batch(CONTENT_DDL)?;