diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 2b65a95..966acaf 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -72,6 +72,17 @@ Every version implements the `sql_ast_benchmark::Parser` trait (the same trait ` A new family is a new `families/.rs` with its own adapter (each library has a different parse API) plus its aliases and registry entries. +Both runner passes are resumable family by family on a `--full` run. The memory pass writes `target/timemachine/.mem.json` and skips families that already have one. The timing pass writes `target/timemachine/.timing.json` and reuses it when it is no older than the memory sidecar, so a refreshed memory pass invalidates a stale timing checkpoint automatically. An interruption resumes at the next family (a family interrupted partway is recomputed from its start). Delete `target/timemachine/` for a from-scratch run. + +To correct only a few versions (for example after fixing the dialect mapping for some old releases) without redoing the whole history, use the incremental refresh. It reads the committed `history.json.zst`, recomputes only the listed versions, reuses every other version verbatim, and recomputes the family's deltas from freshly determined accepted sets, so the result matches a full re-run without re-measuring unchanged points: + +```bash +cargo run --release -p timemachine --bin timemachine-mem -- --refresh sqlparser-rs:0.8.0,0.14.0 +cargo run --release -p timemachine --bin timemachine -- --refresh sqlparser-rs:0.8.0,0.14.0 +``` + +Run the memory refresh first (it rewrites only those versions' entries in the memory sidecar), then the timing refresh (which merges them and writes `history.json.zst`). + ## Coverage ```bash diff --git a/timemachine/Cargo.toml b/timemachine/Cargo.toml index ca4c7e5..c29fa73 100644 --- a/timemachine/Cargo.toml +++ b/timemachine/Cargo.toml @@ -22,6 +22,30 @@ zstd = "0.13" # sqlparser-rs, the latest patch of every minor the shared adapter compiles # against. Each rename pins one release; cargo keeps them side by side because # different 0.x minors are semver-incompatible. +sqlparser_v0_6 = { package = "sqlparser", version = "=0.6.1" } +sqlparser_v0_7 = { package = "sqlparser", version = "=0.7.0" } +sqlparser_v0_8 = { package = "sqlparser", version = "=0.8.0" } +sqlparser_v0_9 = { package = "sqlparser", version = "=0.9.0" } +sqlparser_v0_10 = { package = "sqlparser", version = "=0.10.0" } +sqlparser_v0_11 = { package = "sqlparser", version = "=0.11.0" } +sqlparser_v0_12 = { package = "sqlparser", version = "=0.12.0" } +sqlparser_v0_13 = { package = "sqlparser", version = "=0.13.0" } +sqlparser_v0_14 = { package = "sqlparser", version = "=0.14.0" } +sqlparser_v0_15 = { package = "sqlparser", version = "=0.15.0" } +sqlparser_v0_16 = { package = "sqlparser", version = "=0.16.0" } +sqlparser_v0_17 = { package = "sqlparser", version = "=0.17.0" } +sqlparser_v0_18 = { package = "sqlparser", version = "=0.18.0" } +sqlparser_v0_19 = { package = "sqlparser", version = "=0.19.0" } +sqlparser_v0_20 = { package = "sqlparser", version = "=0.20.0" } +sqlparser_v0_21 = { package = "sqlparser", version = "=0.21.0" } +sqlparser_v0_22 = { package = "sqlparser", version = "=0.22.0" } +sqlparser_v0_23 = { package = "sqlparser", version = "=0.23.0" } +sqlparser_v0_24 = { package = "sqlparser", version = "=0.24.0" } +sqlparser_v0_25 = { package = "sqlparser", version = "=0.25.0" } +sqlparser_v0_26 = { package = "sqlparser", version = "=0.26.0" } +sqlparser_v0_27 = { package = "sqlparser", version = "=0.27.0" } +sqlparser_v0_28 = { package = "sqlparser", version = "=0.28.0" } +sqlparser_v0_29 = { package = "sqlparser", version = "=0.29.0" } sqlparser_v0_30 = { package = "sqlparser", version = "=0.30.0" } sqlparser_v0_31 = { package = "sqlparser", version = "=0.31.0" } sqlparser_v0_32 = { package = "sqlparser", version = "=0.32.0" } diff --git a/timemachine/src/bin/timemachine.rs b/timemachine/src/bin/timemachine.rs index a0d44a3..08d9085 100644 --- a/timemachine/src/bin/timemachine.rs +++ b/timemachine/src/bin/timemachine.rs @@ -19,8 +19,10 @@ fn main() { eprintln!("ERROR: could not prepare datasets/: {e}"); std::process::exit(1); } - let full = std::env::args().any(|a| a == "--full"); - if !full { + let args: Vec = std::env::args().collect(); + let refresh = timemachine::run::parse_refresh(&args); + let full = args.iter().any(|a| a == "--full"); + if refresh.is_none() && !full { eprintln!( "(smoke run: first {} statements per dialect; pass --full for the whole corpus)", timemachine::run::SMOKE_LIMIT @@ -32,8 +34,17 @@ fn main() { .stack_size(WORKER_STACK) .spawn(move || { let versions = timemachine::registry::all(); - let written = timemachine::run::run_timing(&versions, full); - eprintln!("history written for: {written:?}"); + if let Some((family, vers)) = refresh { + eprintln!("refreshing {family} versions {vers:?}"); + if let Err(e) = timemachine::run::run_refresh(&versions, &family, &vers) { + eprintln!("ERROR: {e}"); + std::process::exit(1); + } + eprintln!("refreshed {family}"); + } else { + let written = timemachine::run::run_timing(&versions, full); + eprintln!("history written for: {written:?}"); + } }) .expect("spawn worker") .join() diff --git a/timemachine/src/bin/timemachine_mem.rs b/timemachine/src/bin/timemachine_mem.rs index 9476ce4..92c0bf5 100644 --- a/timemachine/src/bin/timemachine_mem.rs +++ b/timemachine/src/bin/timemachine_mem.rs @@ -49,8 +49,10 @@ fn main() { eprintln!("ERROR: could not prepare datasets/: {e}"); std::process::exit(1); } - let full = std::env::args().any(|a| a == "--full"); - if !full { + let args: Vec = std::env::args().collect(); + let refresh = timemachine::run::parse_refresh(&args); + let full = args.iter().any(|a| a == "--full"); + if refresh.is_none() && !full { eprintln!( "(smoke run: first {} statements per dialect; pass --full for the whole corpus)", timemachine::run::SMOKE_LIMIT @@ -60,7 +62,13 @@ fn main() { .stack_size(WORKER_STACK) .spawn(move || { let versions = timemachine::registry::all(); - timemachine::run::run_memory(&versions, full); + if let Some((family, vers)) = refresh { + eprintln!("refreshing memory for {family} versions {vers:?}"); + timemachine::run::run_memory_refresh(&versions, &family, &vers); + eprintln!("memory refreshed for {family}"); + } else { + timemachine::run::run_memory(&versions, full); + } }) .expect("spawn worker") .join() diff --git a/timemachine/src/families/databend.rs b/timemachine/src/families/databend.rs index 0d14140..81b592e 100644 --- a/timemachine/src/families/databend.rs +++ b/timemachine/src/families/databend.rs @@ -20,6 +20,22 @@ macro_rules! databend_version { } impl Parser for $name { + // Surface a caught panic (the adapters fold one into `Err("panicked")`) + // so `grade_chunk` records the empirical panic rate across releases. + fn parse_outcome( + &self, + sql: &str, + dialect: Dialect, + ) -> sql_ast_benchmark::ParseOutcome { + use sql_ast_benchmark::ParseOutcome; + match self.try_parse(sql, dialect) { + None => ParseOutcome::Unsupported, + Some(Ok(())) => ParseOutcome::Accepted, + Some(Err(e)) if e == "panicked" => ParseOutcome::Panicked(e), + Some(Err(e)) => ParseOutcome::Rejected(e), + } + } + fn id(&self) -> ParserId { ParserId { family: "databend-common-ast", diff --git a/timemachine/src/families/orql.rs b/timemachine/src/families/orql.rs index cb69c4e..5ee4cf2 100644 --- a/timemachine/src/families/orql.rs +++ b/timemachine/src/families/orql.rs @@ -10,6 +10,22 @@ macro_rules! orql_version { pub struct $name; impl Parser for $name { + // Surface a caught panic (the adapters fold one into `Err("panicked")`) + // so `grade_chunk` records the empirical panic rate across releases. + fn parse_outcome( + &self, + sql: &str, + dialect: Dialect, + ) -> sql_ast_benchmark::ParseOutcome { + use sql_ast_benchmark::ParseOutcome; + match self.try_parse(sql, dialect) { + None => ParseOutcome::Unsupported, + Some(Ok(())) => ParseOutcome::Accepted, + Some(Err(e)) if e == "panicked" => ParseOutcome::Panicked(e), + Some(Err(e)) => ParseOutcome::Rejected(e), + } + } + fn id(&self) -> ParserId { ParserId { family: "orql", diff --git a/timemachine/src/families/polyglot.rs b/timemachine/src/families/polyglot.rs index 3614d2c..11f4720 100644 --- a/timemachine/src/families/polyglot.rs +++ b/timemachine/src/families/polyglot.rs @@ -29,6 +29,22 @@ macro_rules! polyglot_version { } impl Parser for $name { + // Surface a caught panic (the adapters fold one into `Err("panicked")`) + // so `grade_chunk` records the empirical panic rate across releases. + fn parse_outcome( + &self, + sql: &str, + dialect: Dialect, + ) -> sql_ast_benchmark::ParseOutcome { + use sql_ast_benchmark::ParseOutcome; + match self.try_parse(sql, dialect) { + None => ParseOutcome::Unsupported, + Some(Ok(())) => ParseOutcome::Accepted, + Some(Err(e)) if e == "panicked" => ParseOutcome::Panicked(e), + Some(Err(e)) => ParseOutcome::Rejected(e), + } + } + fn id(&self) -> ParserId { ParserId { family: "polyglot-sql", diff --git a/timemachine/src/families/qusql.rs b/timemachine/src/families/qusql.rs index e0c14f3..242bdb2 100644 --- a/timemachine/src/families/qusql.rs +++ b/timemachine/src/families/qusql.rs @@ -27,6 +27,22 @@ macro_rules! qusql_version { } impl Parser for $name { + // Surface a caught panic (the adapters fold one into `Err("panicked")`) + // so `grade_chunk` records the empirical panic rate across releases. + fn parse_outcome( + &self, + sql: &str, + dialect: Dialect, + ) -> sql_ast_benchmark::ParseOutcome { + use sql_ast_benchmark::ParseOutcome; + match self.try_parse(sql, dialect) { + None => ParseOutcome::Unsupported, + Some(Ok(())) => ParseOutcome::Accepted, + Some(Err(e)) if e == "panicked" => ParseOutcome::Panicked(e), + Some(Err(e)) => ParseOutcome::Rejected(e), + } + } + fn id(&self) -> ParserId { ParserId { family: "qusql-parse", diff --git a/timemachine/src/families/sqlglot.rs b/timemachine/src/families/sqlglot.rs index ac097ad..a3ceebd 100644 --- a/timemachine/src/families/sqlglot.rs +++ b/timemachine/src/families/sqlglot.rs @@ -29,6 +29,22 @@ macro_rules! sqlglot_version { } impl Parser for $name { + // Surface a caught panic (the adapters fold one into `Err("panicked")`) + // so `grade_chunk` records the empirical panic rate across releases. + fn parse_outcome( + &self, + sql: &str, + dialect: Dialect, + ) -> sql_ast_benchmark::ParseOutcome { + use sql_ast_benchmark::ParseOutcome; + match self.try_parse(sql, dialect) { + None => ParseOutcome::Unsupported, + Some(Ok(())) => ParseOutcome::Accepted, + Some(Err(e)) if e == "panicked" => ParseOutcome::Panicked(e), + Some(Err(e)) => ParseOutcome::Rejected(e), + } + } + fn id(&self) -> ParserId { ParserId { family: "sqlglot-rust", diff --git a/timemachine/src/families/sqlite3.rs b/timemachine/src/families/sqlite3.rs index 59baffd..6c660c1 100644 --- a/timemachine/src/families/sqlite3.rs +++ b/timemachine/src/families/sqlite3.rs @@ -10,6 +10,22 @@ macro_rules! sqlite3_version { pub struct $name; impl Parser for $name { + // Surface a caught panic (the adapters fold one into `Err("panicked")`) + // so `grade_chunk` records the empirical panic rate across releases. + fn parse_outcome( + &self, + sql: &str, + dialect: Dialect, + ) -> sql_ast_benchmark::ParseOutcome { + use sql_ast_benchmark::ParseOutcome; + match self.try_parse(sql, dialect) { + None => ParseOutcome::Unsupported, + Some(Ok(())) => ParseOutcome::Accepted, + Some(Err(e)) if e == "panicked" => ParseOutcome::Panicked(e), + Some(Err(e)) => ParseOutcome::Rejected(e), + } + } + fn id(&self) -> ParserId { ParserId { family: "sqlite3-parser", diff --git a/timemachine/src/families/sqlparser.rs b/timemachine/src/families/sqlparser.rs index 8b3f693..ed76e76 100644 --- a/timemachine/src/families/sqlparser.rs +++ b/timemachine/src/families/sqlparser.rs @@ -14,28 +14,47 @@ use sql_ast_benchmark::{Parser, ParserId}; /// back to `GenericDialect` for the rest, so the same code compiles against each /// version and the trend stays internally consistent. macro_rules! sqlparser_version { + // Full dialect set, for releases that model every dialect we map (0.20+). ($name:ident, $cr:ident, $ver:literal, $released:literal) => { + sqlparser_version!($name, $cr, $ver, $released, [ + Postgresql => PostgreSqlDialect, + Mysql => MySqlDialect, + Sqlite => SQLiteDialect, + Clickhouse => ClickHouseDialect, + Hive => HiveDialect, + Tsql => MsSqlDialect, + Bigquery => BigQueryDialect, + ]); + }; + // Explicit dialect arms, for older releases that predate some dialects (SQLite + // arrived in 0.7, Hive in 0.8, ClickHouse in 0.14, BigQuery in 0.18). Any + // dialect not listed falls back to the generic dialect, the same approach the + // newest versions use for dialects they do not model. + ($name:ident, $cr:ident, $ver:literal, $released:literal, [$($variant:ident => $dia:ident),* $(,)?]) => { pub struct $name; impl $name { fn dialect(d: Dialect) -> Box { match d { - Dialect::Postgresql => Box::new($cr::dialect::PostgreSqlDialect {}), - Dialect::Mysql => Box::new($cr::dialect::MySqlDialect {}), - Dialect::Sqlite => Box::new($cr::dialect::SQLiteDialect {}), - Dialect::Clickhouse => Box::new($cr::dialect::ClickHouseDialect {}), - Dialect::Hive => Box::new($cr::dialect::HiveDialect {}), - Dialect::Tsql => Box::new($cr::dialect::MsSqlDialect {}), - Dialect::Bigquery => Box::new($cr::dialect::BigQueryDialect {}), - // Oracle, DuckDB, Redshift, Spark, Trino and Multi did not all - // exist as dedicated dialects across these releases, so use the - // generic dialect uniformly for them. + $( Dialect::$variant => Box::new($cr::dialect::$dia {}), )* _ => Box::new($cr::dialect::GenericDialect {}), } } } impl Parser for $name { + // Surface a caught panic (the adapters fold one into `Err("panicked")`) + // so `grade_chunk` records the empirical panic rate across releases. + fn parse_outcome(&self, sql: &str, dialect: Dialect) -> sql_ast_benchmark::ParseOutcome { + use sql_ast_benchmark::ParseOutcome; + match self.try_parse(sql, dialect) { + None => ParseOutcome::Unsupported, + Some(Ok(())) => ParseOutcome::Accepted, + Some(Err(e)) if e == "panicked" => ParseOutcome::Panicked(e), + Some(Err(e)) => ParseOutcome::Rejected(e), + } + } + fn id(&self) -> ParserId { ParserId { family: "sqlparser-rs", @@ -114,6 +133,33 @@ macro_rules! sqlparser_version { }; } +// Older releases, with the reduced dialect sets of their era. +sqlparser_version!(SqlparserV0_6, sqlparser_v0_6, "0.6.1", "2020-07-20", [Postgresql => PostgreSqlDialect, Mysql => MySqlDialect, Tsql => MsSqlDialect]); +sqlparser_version!(SqlparserV0_7, sqlparser_v0_7, "0.7.0", "2020-12-28", [Postgresql => PostgreSqlDialect, Mysql => MySqlDialect, Sqlite => SQLiteDialect, Tsql => MsSqlDialect]); +sqlparser_version!(SqlparserV0_8, sqlparser_v0_8, "0.8.0", "2021-02-09", [Postgresql => PostgreSqlDialect, Mysql => MySqlDialect, Sqlite => SQLiteDialect, Hive => HiveDialect, Tsql => MsSqlDialect]); +sqlparser_version!(SqlparserV0_9, sqlparser_v0_9, "0.9.0", "2021-03-21", [Postgresql => PostgreSqlDialect, Mysql => MySqlDialect, Sqlite => SQLiteDialect, Hive => HiveDialect, Tsql => MsSqlDialect]); +sqlparser_version!(SqlparserV0_10, sqlparser_v0_10, "0.10.0", "2021-08-23", [Postgresql => PostgreSqlDialect, Mysql => MySqlDialect, Sqlite => SQLiteDialect, Hive => HiveDialect, Tsql => MsSqlDialect]); +sqlparser_version!(SqlparserV0_11, sqlparser_v0_11, "0.11.0", "2021-09-25", [Postgresql => PostgreSqlDialect, Mysql => MySqlDialect, Sqlite => SQLiteDialect, Hive => HiveDialect, Tsql => MsSqlDialect]); +sqlparser_version!(SqlparserV0_12, sqlparser_v0_12, "0.12.0", "2021-10-14", [Postgresql => PostgreSqlDialect, Mysql => MySqlDialect, Sqlite => SQLiteDialect, Hive => HiveDialect, Tsql => MsSqlDialect]); +sqlparser_version!(SqlparserV0_13, sqlparser_v0_13, "0.13.0", "2021-12-10", [Postgresql => PostgreSqlDialect, Mysql => MySqlDialect, Sqlite => SQLiteDialect, Hive => HiveDialect, Tsql => MsSqlDialect]); +sqlparser_version!(SqlparserV0_14, sqlparser_v0_14, "0.14.0", "2022-02-09", [Postgresql => PostgreSqlDialect, Mysql => MySqlDialect, Sqlite => SQLiteDialect, Clickhouse => ClickHouseDialect, Hive => HiveDialect, Tsql => MsSqlDialect]); +sqlparser_version!(SqlparserV0_15, sqlparser_v0_15, "0.15.0", "2022-03-08", [Postgresql => PostgreSqlDialect, Mysql => MySqlDialect, Sqlite => SQLiteDialect, Clickhouse => ClickHouseDialect, Hive => HiveDialect, Tsql => MsSqlDialect]); +sqlparser_version!(SqlparserV0_16, sqlparser_v0_16, "0.16.0", "2022-04-03", [Postgresql => PostgreSqlDialect, Mysql => MySqlDialect, Sqlite => SQLiteDialect, Clickhouse => ClickHouseDialect, Hive => HiveDialect, Tsql => MsSqlDialect]); +sqlparser_version!(SqlparserV0_17, sqlparser_v0_17, "0.17.0", "2022-05-10", [Postgresql => PostgreSqlDialect, Mysql => MySqlDialect, Sqlite => SQLiteDialect, Clickhouse => ClickHouseDialect, Hive => HiveDialect, Tsql => MsSqlDialect]); +// 0.18 onward model every dialect we map (BigQuery landed in 0.18), so they use +// the full-set form. +sqlparser_version!(SqlparserV0_18, sqlparser_v0_18, "0.18.0", "2022-06-06"); +sqlparser_version!(SqlparserV0_19, sqlparser_v0_19, "0.19.0", "2022-07-28"); +sqlparser_version!(SqlparserV0_20, sqlparser_v0_20, "0.20.0", "2022-08-05"); +sqlparser_version!(SqlparserV0_21, sqlparser_v0_21, "0.21.0", "2022-08-18"); +sqlparser_version!(SqlparserV0_22, sqlparser_v0_22, "0.22.0", "2022-08-26"); +sqlparser_version!(SqlparserV0_23, sqlparser_v0_23, "0.23.0", "2022-09-08"); +sqlparser_version!(SqlparserV0_24, sqlparser_v0_24, "0.24.0", "2022-09-28"); +sqlparser_version!(SqlparserV0_25, sqlparser_v0_25, "0.25.0", "2022-10-03"); +sqlparser_version!(SqlparserV0_26, sqlparser_v0_26, "0.26.0", "2022-10-19"); +sqlparser_version!(SqlparserV0_27, sqlparser_v0_27, "0.27.0", "2022-11-11"); +sqlparser_version!(SqlparserV0_28, sqlparser_v0_28, "0.28.0", "2022-12-05"); +sqlparser_version!(SqlparserV0_29, sqlparser_v0_29, "0.29.0", "2022-12-29"); sqlparser_version!(SqlparserV0_30, sqlparser_v0_30, "0.30.0", "2023-01-02"); sqlparser_version!(SqlparserV0_31, sqlparser_v0_31, "0.31.0", "2023-03-01"); sqlparser_version!(SqlparserV0_32, sqlparser_v0_32, "0.32.0", "2023-03-06"); @@ -147,3 +193,30 @@ sqlparser_version!(SqlparserV0_59, sqlparser_v0_59, "0.59.0", "2025-09-24"); sqlparser_version!(SqlparserV0_60, sqlparser_v0_60, "0.60.0", "2025-12-07"); sqlparser_version!(SqlparserV0_61, sqlparser_v0_61, "0.61.0", "2026-02-10"); sqlparser_version!(SqlparserV0_62, sqlparser_v0_62, "0.62.0", "2026-05-07"); + +#[cfg(test)] +mod tests { + use super::*; + + // The old-era adapters (reduced dialect sets, generic fallback) must actually + // parse, not just compile. A plain SELECT is valid in every release back to + // 0.6, so each oldest-tier version should accept it without panicking. + #[test] + fn old_adapters_parse_basic_select() { + let sql = "SELECT a, b FROM t WHERE a > 1"; + for p in [ + Box::new(SqlparserV0_6) as Box, + Box::new(SqlparserV0_7), + Box::new(SqlparserV0_9), + Box::new(SqlparserV0_16), + Box::new(SqlparserV0_29), + ] { + let v = p.id().version; + assert_eq!( + p.try_parse(sql, Dialect::Postgresql), + Some(Ok(())), + "sqlparser {v} should parse a basic SELECT" + ); + } + } +} diff --git a/timemachine/src/families/turso.rs b/timemachine/src/families/turso.rs index 356410a..0f0c1d1 100644 --- a/timemachine/src/families/turso.rs +++ b/timemachine/src/families/turso.rs @@ -10,6 +10,22 @@ macro_rules! turso_version { pub struct $name; impl Parser for $name { + // Surface a caught panic (the adapters fold one into `Err("panicked")`) + // so `grade_chunk` records the empirical panic rate across releases. + fn parse_outcome( + &self, + sql: &str, + dialect: Dialect, + ) -> sql_ast_benchmark::ParseOutcome { + use sql_ast_benchmark::ParseOutcome; + match self.try_parse(sql, dialect) { + None => ParseOutcome::Unsupported, + Some(Ok(())) => ParseOutcome::Accepted, + Some(Err(e)) if e == "panicked" => ParseOutcome::Panicked(e), + Some(Err(e)) => ParseOutcome::Rejected(e), + } + } + fn id(&self) -> ParserId { ParserId { family: "turso_parser", diff --git a/timemachine/src/registry.rs b/timemachine/src/registry.rs index 8a381ce..6d4880b 100644 --- a/timemachine/src/registry.rs +++ b/timemachine/src/registry.rs @@ -3,6 +3,21 @@ //! One entry per (family, milestone). The current release of each family is //! included as the newest point so the trend ends at "now", measured under the //! same conditions as the older points. +//! +//! Each family is taken back as far as it still builds with a proportionate +//! adapter. Where a line stops, the reason is recorded so the gaps are explicit: +//! +//! - sqlparser-rs: back to 0.6.1 (July 2020). Below 0.6 `parse_sql` returns a +//! single `ASTNode` instead of `Vec`, a different shape that is not +//! comparable to the rest of the history. +//! - sqlite3-parser: back to 0.9.0. Every release below 0.9 depends on +//! fallible-iterator 0.2 (the adapter uses 0.3), and 0.1 to 0.5 also use a +//! divergent generic `Parser::new(input: I)`, so reaching them would need a +//! second fallible-iterator major version and a separate constructor tier. +//! - qusql-parse: back to 0.2.1. 0.1.0 is excluded because its parser +//! effectively hangs on parts of the MySQL corpus at full-corpus scale. +//! - polyglot-sql (0.1), databend-common-ast (0.0), sqlglot-rust (0.9), +//! turso_parser (0.6), orql (0.1): already at their first published release. use crate::families::{databend, orql, polyglot, qusql, sqlglot, sqlite3, sqlparser, turso}; use sql_ast_benchmark::Parser; @@ -11,6 +26,30 @@ use sql_ast_benchmark::Parser; #[must_use] pub fn all() -> Vec> { vec![ + Box::new(sqlparser::SqlparserV0_6), + Box::new(sqlparser::SqlparserV0_7), + Box::new(sqlparser::SqlparserV0_8), + Box::new(sqlparser::SqlparserV0_9), + Box::new(sqlparser::SqlparserV0_10), + Box::new(sqlparser::SqlparserV0_11), + Box::new(sqlparser::SqlparserV0_12), + Box::new(sqlparser::SqlparserV0_13), + Box::new(sqlparser::SqlparserV0_14), + Box::new(sqlparser::SqlparserV0_15), + Box::new(sqlparser::SqlparserV0_16), + Box::new(sqlparser::SqlparserV0_17), + Box::new(sqlparser::SqlparserV0_18), + Box::new(sqlparser::SqlparserV0_19), + Box::new(sqlparser::SqlparserV0_20), + Box::new(sqlparser::SqlparserV0_21), + Box::new(sqlparser::SqlparserV0_22), + Box::new(sqlparser::SqlparserV0_23), + Box::new(sqlparser::SqlparserV0_24), + Box::new(sqlparser::SqlparserV0_25), + Box::new(sqlparser::SqlparserV0_26), + Box::new(sqlparser::SqlparserV0_27), + Box::new(sqlparser::SqlparserV0_28), + Box::new(sqlparser::SqlparserV0_29), Box::new(sqlparser::SqlparserV0_30), Box::new(sqlparser::SqlparserV0_31), Box::new(sqlparser::SqlparserV0_32), diff --git a/timemachine/src/run.rs b/timemachine/src/run.rs index c1439ed..6b9b24a 100644 --- a/timemachine/src/run.rs +++ b/timemachine/src/run.rs @@ -12,11 +12,13 @@ use sql_ast_benchmark::batch::{batch_eligible, evaluate_batches, reports_stateme use sql_ast_benchmark::datasets::Dialect; use sql_ast_benchmark::report::{self, load_dialect}; use sql_ast_benchmark::{stats, Parser}; -use std::collections::BTreeMap; +use std::collections::{BTreeMap, HashSet}; use std::hint::black_box; use std::path::PathBuf; use std::time::Instant; -use viz::{DialectRun, FamilyHistory, ParserBatch, ParserMem, ParserMetrics, VersionRun}; +use viz::{ + DialectDelta, DialectRun, FamilyHistory, ParserBatch, ParserMem, ParserMetrics, VersionRun, +}; /// Dialects in display order (matches the rest of the benchmark). pub const DIALECTS: &[Dialect] = &[ @@ -154,21 +156,73 @@ fn metrics_of(report: &report::DialectReport) -> ParserMetrics { } else { pct(s.accepted_valid, report.valid_total) }, - // The time machine does not classify contentious constructs, so it reports - // no excluding-contentious recall and counts none accepted. - accepted_valid_contentious: 0, - recall_excl_contentious_pct: None, - // The time machine does not measure the empirical panic rate (only the - // current build does, via BenchParser's panic-detecting parse_outcome), so - // it is left unmeasured rather than reported as a misleading zero. + accepted_valid_contentious: s.accepted_valid_contentious, + // Recall over the non-contentious valid statements, the secondary metric + // the main snapshot reports, now tracked across releases too. + recall_excl_contentious_pct: if reference { + pct( + s.accepted_valid - s.accepted_valid_contentious, + report.valid_total - report.contentious_valid, + ) + } else { + None + }, + // Empirical panic rate: the adapters override `parse_outcome` to surface a + // caught panic, so `grade_chunk` counts it here too. attempted: s.attempted, - panicked: 0, - panic_pct: None, + panicked: s.panicked, + panic_pct: pct(s.panicked, s.attempted), + } +} + +/// Truncate `s` to at most `max` characters for a compact example, marking it. +fn truncate(s: &str, max: usize) -> String { + if s.chars().count() <= max { + s.to_string() + } else { + let head: String = s.chars().take(max).collect(); + format!("{head} ...") } } -/// Build the timing + batch + correctness part of one version's run (no memory). -fn timing_dialect_run(p: &dyn Parser, d: Dialect, stmts: &[String]) -> DialectRun { +/// The per-dialect change from the `prev` accepted set to `cur`: exact gained and +/// lost counts, plus a few sorted, truncated example statements for each. +fn coverage_delta(dir: &str, prev: &HashSet, cur: &HashSet) -> DialectDelta { + const EXAMPLES: usize = 3; + const MAX_LEN: usize = 200; + let mut gained: Vec<&String> = cur.difference(prev).collect(); + let mut lost: Vec<&String> = prev.difference(cur).collect(); + gained.sort(); + lost.sort(); + let sample = |v: &[&String]| -> Vec { + v.iter() + .take(EXAMPLES) + .map(|s| truncate(s, MAX_LEN)) + .collect() + }; + DialectDelta { + dir_name: dir.to_string(), + gained: gained.len(), + lost: lost.len(), + examples_gained: sample(&gained), + examples_lost: sample(&lost), + } +} + +/// The set of statements one version accepts in one dialect, without timing +/// (cheap: one parse per statement). Used to recompute deltas during a refresh. +fn accepted_set(p: &dyn Parser, d: Dialect, stmts: &[String]) -> HashSet { + stmts + .iter() + .filter(|s| p.accepts(s, d) == Some(true)) + .cloned() + .collect() +} + +/// Build the timing + batch + correctness part of one version's run (no memory), +/// plus the set of statements this version accepted in this dialect (for the +/// version-to-version coverage deltas). +fn timing_dialect_run(p: &dyn Parser, d: Dialect, stmts: &[String]) -> (DialectRun, Vec) { let accepted: Vec<&str> = stmts .iter() .filter(|s| p.accepts(s, d) == Some(true)) @@ -246,15 +300,19 @@ fn timing_dialect_run(p: &dyn Parser, d: Dialect, stmts: &[String]) -> DialectRu let report = report::grade_chunk(stmts, d, &[p]); let correctness = Some(metrics_of(&report)); - DialectRun { - dir_name: d.dir_name().to_string(), - display_name: d.display_name().to_string(), - has_reference: report.has_reference, - perf, - memory: None, - batch, - correctness, - } + let accepted_owned: Vec = accepted.iter().map(|s| (*s).to_string()).collect(); + ( + DialectRun { + dir_name: d.dir_name().to_string(), + display_name: d.display_name().to_string(), + has_reference: report.has_reference, + perf, + memory: None, + batch, + correctness, + }, + accepted_owned, + ) } /// Build the memory part of one version's run for one dialect (peak + retained). @@ -302,17 +360,35 @@ fn by_family(versions: &[Box]) -> Vec<(&'static str, Vec<&dyn Parser } /// Run the timing + batch + correctness pass, merge the memory sidecar (if any), -/// and write the final per-family history files. Returns the families written. +/// and write the final combined history. Returns the families written. +/// +/// On a full run each finished family is checkpointed to a `.timing.json`, and a +/// family with a fresh checkpoint (no older than its memory sidecar) is loaded +/// rather than recomputed, so an interrupted run resumes family by family. Delete +/// `target/timemachine/` for a from-scratch run. pub fn run_timing(versions: &[Box], full: bool) -> Vec { let corpus = load_corpus(full); let mut histories = Vec::new(); let mut written = Vec::new(); for (family, vs) in by_family(versions) { + // Resume: reuse a fresh checkpoint instead of recomputing the family. + if full { + if let Some(cached) = cached_timing(family) { + eprintln!("time {family}: cached checkpoint, skipping"); + histories.push(cached); + written.push(family.to_string()); + continue; + } + } let sidecar = read_sidecar(family); let mut version_runs = Vec::new(); + // Accepted sets of the previous version, per dialect, for coverage deltas. + let mut prev_accepted: BTreeMap> = BTreeMap::new(); for p in vs { let id = p.id(); let mut dialects = Vec::new(); + let mut deltas = Vec::new(); + let mut cur_accepted: BTreeMap> = BTreeMap::new(); for &d in DIALECTS { if !p.supports(d) { continue; @@ -327,7 +403,7 @@ pub fn run_timing(versions: &[Box], full: bool) -> Vec { let outcome = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| { timing_dialect_run(p, d, stmts) })); - let Ok(mut run) = outcome else { + let Ok((mut run, accepted)) = outcome else { eprintln!( " [warn] time {family} {} {} panicked, skipping", id.version, @@ -342,18 +418,31 @@ pub fn run_timing(versions: &[Box], full: bool) -> Vec { d.dir_name(), run.perf.as_ref().map_or(0, |x| x.n_accepted), ); + let dir = d.dir_name().to_string(); + let acc_set: HashSet = accepted.into_iter().collect(); + if let Some(prev) = prev_accepted.get(&dir) { + deltas.push(coverage_delta(&dir, prev, &acc_set)); + } + cur_accepted.insert(dir, acc_set); dialects.push(run); } version_runs.push(VersionRun { version: id.version.to_string(), released: id.released.to_string(), dialects, + deltas, }); + prev_accepted = cur_accepted; } - histories.push(FamilyHistory { + let history = FamilyHistory { family: family.to_string(), versions: version_runs, - }); + }; + // Checkpoint the finished family so a later interruption can resume here. + if full { + write_timing(&history); + } + histories.push(history); written.push(family.to_string()); } write_combined(&histories); @@ -419,6 +508,9 @@ pub fn run_memory(versions: &[Box], full: bool) { version: id.version.to_string(), released: id.released.to_string(), dialects, + // The memory pass writes only the memory sidecar, never the deltas + // (the timing pass owns those), so leave them empty here. + deltas: Vec::new(), }); } let history = FamilyHistory { @@ -429,6 +521,191 @@ pub fn run_memory(versions: &[Box], full: bool) { } } +/// The registry's parsers for one family, in release order. +fn family_versions<'a>(versions: &'a [Box], family: &str) -> Vec<&'a dyn Parser> { + by_family(versions) + .into_iter() + .find(|(f, _)| *f == family) + .map(|(_, vs)| vs) + .unwrap_or_default() +} + +/// Incremental memory refresh: recompute the memory sidecar entries for only the +/// listed versions of `family`, splicing them into the existing sidecar. Run in +/// the memory binary (it installs the counting allocator). Other versions and +/// families are left untouched. +pub fn run_memory_refresh(versions: &[Box], family: &str, refresh: &[String]) { + let corpus = load_corpus(true); + let mut sidecar = read_sidecar(family).unwrap_or_else(|| FamilyHistory { + family: family.to_string(), + versions: Vec::new(), + }); + for p in family_versions(versions, family) { + let id = p.id(); + if !refresh.iter().any(|r| r == id.version) { + continue; + } + let mut dialects = Vec::new(); + for &d in DIALECTS { + if !p.supports(d) { + continue; + } + let stmts = &corpus[d.dir_name()]; + if stmts.is_empty() { + continue; + } + let memory = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| { + mem_dialect_run(p, d, stmts) + })) + .unwrap_or(None); + eprintln!( + "mem-refresh {family} {} {}: n={}", + id.version, + d.dir_name(), + memory.as_ref().map_or(0, |m| m.n) + ); + dialects.push(DialectRun { + dir_name: d.dir_name().to_string(), + display_name: d.display_name().to_string(), + has_reference: false, + perf: None, + memory, + batch: None, + correctness: None, + }); + } + let vr = VersionRun { + version: id.version.to_string(), + released: id.released.to_string(), + dialects, + deltas: Vec::new(), + }; + match sidecar + .versions + .iter_mut() + .find(|v| v.version == id.version) + { + Some(slot) => *slot = vr, + None => sidecar.versions.push(vr), + } + } + write_sidecar(&sidecar); +} + +/// Incremental timing refresh: recompute the timing, correctness, and batch for +/// only the listed versions of `family` (merging their refreshed memory from the +/// sidecar), reuse every other version's metrics from the committed history, and +/// recompute all of the family's deltas from freshly determined accepted sets. +/// The result is identical to a full re-run, without re-measuring unchanged +/// points. Returns an error if the committed history cannot be read. +pub fn run_refresh( + versions: &[Box], + family: &str, + refresh: &[String], +) -> Result<(), String> { + let corpus = load_corpus(true); + let mut history = read_combined().ok_or_else(|| format!("cannot read {HISTORY_FILE}"))?; + let baseline = history + .iter() + .find(|h| h.family == family) + .cloned() + .ok_or_else(|| format!("{family} not present in {HISTORY_FILE}"))?; + let sidecar = read_sidecar(family); + + let mut new_versions = Vec::new(); + let mut prev_accepted: BTreeMap> = BTreeMap::new(); + for p in family_versions(versions, family) { + let id = p.id(); + let refreshing = refresh.iter().any(|r| r == id.version); + let mut cur_accepted: BTreeMap> = BTreeMap::new(); + + let dialects: Vec = if refreshing { + let mut ds = Vec::new(); + for &d in DIALECTS { + if !p.supports(d) { + continue; + } + let stmts = &corpus[d.dir_name()]; + if stmts.is_empty() { + continue; + } + let Ok((mut run, accepted)) = + std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| { + timing_dialect_run(p, d, stmts) + })) + else { + eprintln!( + " [warn] refresh {family} {} {} panicked, skipping", + id.version, + d.dir_name() + ); + continue; + }; + run.memory = sidecar_lookup(sidecar.as_ref(), id.version, d.dir_name()); + eprintln!( + "refresh {family} {} {}: n={}", + id.version, + d.dir_name(), + run.perf.as_ref().map_or(0, |x| x.n_accepted) + ); + cur_accepted.insert(d.dir_name().to_string(), accepted.into_iter().collect()); + ds.push(run); + } + ds + } else { + // Unchanged version: reuse its committed metrics, but still determine + // its accepted sets (cheaply) so neighbouring deltas stay correct. + for &d in DIALECTS { + if !p.supports(d) { + continue; + } + let stmts = &corpus[d.dir_name()]; + if stmts.is_empty() { + continue; + } + cur_accepted.insert(d.dir_name().to_string(), accepted_set(p, d, stmts)); + } + baseline + .versions + .iter() + .find(|v| v.version == id.version) + .map(|v| v.dialects.clone()) + .unwrap_or_default() + }; + + let mut deltas = Vec::new(); + for &d in DIALECTS { + let dir = d.dir_name().to_string(); + if let (Some(prev), Some(cur)) = (prev_accepted.get(&dir), cur_accepted.get(&dir)) { + deltas.push(coverage_delta(&dir, prev, cur)); + } + } + new_versions.push(VersionRun { + version: id.version.to_string(), + released: id.released.to_string(), + dialects, + deltas, + }); + prev_accepted = cur_accepted; + } + + if let Some(slot) = history.iter_mut().find(|h| h.family == family) { + slot.versions = new_versions; + } + write_combined(&history); + Ok(()) +} + +/// Parse a `--refresh :,,...` spec from the args, if present. +#[must_use] +pub fn parse_refresh(args: &[String]) -> Option<(String, Vec)> { + let i = args.iter().position(|a| a == "--refresh")?; + let spec = args.get(i + 1)?; + let (family, vers) = spec.split_once(':')?; + let versions = vers.split(',').map(str::to_string).collect(); + Some((family.to_string(), versions)) +} + /// Serialize all families to one JSON array and zstd-compress it for embedding. fn write_combined(histories: &[FamilyHistory]) { let path = PathBuf::from(HISTORY_FILE); @@ -450,6 +727,13 @@ fn write_combined(histories: &[FamilyHistory]) { } } +/// Read the committed combined history back (for the incremental refresh mode). +fn read_combined() -> Option> { + let raw = std::fs::read(HISTORY_FILE).ok()?; + let json = zstd::stream::decode_all(raw.as_slice()).ok()?; + serde_json::from_slice(&json).ok() +} + fn sidecar_path(family: &str) -> PathBuf { PathBuf::from(SIDECAR_DIR).join(format!("{}.mem.json", family_slug(family))) } @@ -471,6 +755,37 @@ fn read_sidecar(family: &str) -> Option { serde_json::from_str(&raw).ok() } +/// Per-family timing checkpoint path (distinct from the `.mem.json` sidecar), so +/// the timing pass can resume after an interruption family by family. +fn timing_path(family: &str) -> PathBuf { + PathBuf::from(SIDECAR_DIR).join(format!("{}.timing.json", family_slug(family))) +} + +/// Write one family's finished timing result as a resume checkpoint. +fn write_timing(history: &FamilyHistory) { + let _ = std::fs::create_dir_all(SIDECAR_DIR); + let path = timing_path(&history.family); + let json = serde_json::to_string(history).expect("serialize timing checkpoint"); + if let Err(e) = std::fs::write(&path, json) { + eprintln!("ERROR: writing {}: {e}", path.display()); + } +} + +/// The cached timing result for `family`, if its checkpoint exists and is at +/// least as new as the memory sidecar. A refreshed memory pass thus invalidates a +/// stale timing checkpoint automatically (the memory is merged into the timing +/// result, so an older checkpoint would carry outdated memory). +fn cached_timing(family: &str) -> Option { + let path = timing_path(family); + let checkpoint_mtime = std::fs::metadata(&path).ok()?.modified().ok()?; + if let Ok(mem_mtime) = std::fs::metadata(sidecar_path(family)).and_then(|m| m.modified()) { + if mem_mtime > checkpoint_mtime { + return None; + } + } + serde_json::from_str(&std::fs::read_to_string(&path).ok()?).ok() +} + /// Find the memory entry for a (version, dialect) in the sidecar. fn sidecar_lookup(sidecar: Option<&FamilyHistory>, version: &str, dir: &str) -> Option { sidecar? diff --git a/viz/src/chart.rs b/viz/src/chart.rs index 3a3f4b6..7f82ea9 100644 --- a/viz/src/chart.rs +++ b/viz/src/chart.rs @@ -531,8 +531,9 @@ pub fn trend_lines(title: &str, series: &[TrendSeries], w: u32, h: u32, y_desc: /// Percentage trend chart: x = release date, y = a rate in percent on a linear /// axis, one line per series. Each point's `median` slot carries the value (the /// p25/p75 slots are ignored, since a rate is a single number rather than a -/// distribution). Used for the accept/recall and false-positive trends. The y -/// range hugs the data so small changes between releases stay visible. +/// distribution). Used for the accept/recall, false-positive, panic, round-trip, +/// and contentious-recall trends. The y range hugs the data, clamped to 0..102, +/// so small changes between releases stay visible. #[must_use] pub fn pct_trend_lines( title: &str, @@ -540,6 +541,40 @@ pub fn pct_trend_lines( w: u32, h: u32, y_desc: &str, +) -> String { + linear_trend(title, series, w, h, y_desc, |ymin, ymax| { + let ylo = (ymin - 2.0).max(0.0); + let yhi = (ymax + 2.0).min(102.0).max(ylo + 1.0); + (ylo, yhi) + }) +} + +/// Count trend chart: like [`pct_trend_lines`] but on a linear axis anchored at +/// zero with no upper clamp, for absolute counts (such as accepted-statement +/// coverage) rather than rates. +#[must_use] +pub fn count_trend_lines( + title: &str, + series: &[TrendSeries], + w: u32, + h: u32, + y_desc: &str, +) -> String { + linear_trend(title, series, w, h, y_desc, |_ymin, ymax| { + (0.0, (ymax * 1.08).max(1.0)) + }) +} + +/// Shared linear-axis trend renderer. `y_bounds` maps the data's `(ymin, ymax)` +/// to the drawn `(ylo, yhi)`, the one axis difference between the rate and count +/// variants. Each point's `median` slot carries the value. +fn linear_trend( + title: &str, + series: &[TrendSeries], + w: u32, + h: u32, + y_desc: &str, + y_bounds: impl Fn(f64, f64) -> (f64, f64), ) -> String { let legend: Vec = series .iter() @@ -582,8 +617,7 @@ pub fn pct_trend_lines( } let xpad = ((xmax - xmin) * 0.08).max(0.08); let (xlo, xhi) = (xmin - xpad, xmax + xpad); - let ylo = (ymin - 2.0).max(0.0); - let yhi = (ymax + 2.0).min(102.0).max(ylo + 1.0); + let (ylo, yhi) = y_bounds(ymin, ymax); let mut chart = ChartBuilder::on(&plot) .caption(title, ("sans-serif", 16)) diff --git a/viz/src/lib.rs b/viz/src/lib.rs index 541d152..2bdb0cd 100644 --- a/viz/src/lib.rs +++ b/viz/src/lib.rs @@ -14,13 +14,13 @@ pub mod marker; pub mod schema; pub use chart::{ - box_lines, box_svg, ecdf_lines, ecdf_svg, mem_line, pct_trend_lines, trend_lines, year_frac, - Line, TrendSeries, + box_lines, box_svg, count_trend_lines, ecdf_lines, ecdf_svg, mem_line, pct_trend_lines, + trend_lines, year_frac, Line, TrendSeries, }; pub use color::{parser_hex, parser_rgb}; pub use marker::{marker_for, Marker}; pub use schema::{ - Bundle, CoverageFile, CoverageMatrix, DepthReport, DepthScan, DialectData, DialectRun, - FamilyHistory, FeatureCounts, FeatureScan, LintPolicy, MemDist, ParserBatch, ParserFailures, - ParserFeatures, ParserMem, ParserMetrics, ParserPerf, RuleMeta, VersionRun, + Bundle, CoverageFile, CoverageMatrix, DepthReport, DepthScan, DialectData, DialectDelta, + DialectRun, FamilyHistory, FeatureCounts, FeatureScan, LintPolicy, MemDist, ParserBatch, + ParserFailures, ParserFeatures, ParserMem, ParserMetrics, ParserPerf, RuleMeta, VersionRun, }; diff --git a/viz/src/schema.rs b/viz/src/schema.rs index dc39b63..1fef3ef 100644 --- a/viz/src/schema.rs +++ b/viz/src/schema.rs @@ -396,6 +396,28 @@ pub struct VersionRun { pub released: String, /// One entry per dialect this version models, in display order. pub dialects: Vec, + /// Per-dialect change in the accepted set versus the previous benchmarked + /// version of this family. Empty for the first version (no predecessor). + #[serde(default)] + pub deltas: Vec, +} + +/// How one version's accepted set changed from the previous version, in one +/// dialect. The counts are exact, the examples a small illustrative sample. +#[derive(Serialize, Deserialize, Clone, Debug)] +pub struct DialectDelta { + pub dir_name: String, + /// Statements this version accepts that the previous version rejected. + pub gained: usize, + /// Statements this version rejects that the previous version accepted (a + /// regression in raw coverage). + pub lost: usize, + /// A few newly accepted statements, for display. + #[serde(default)] + pub examples_gained: Vec, + /// A few newly rejected statements, for display. + #[serde(default)] + pub examples_lost: Vec, } /// One version's results in one dialect. The same per-parser shapes as the main diff --git a/web/assets/history.json.zst b/web/assets/history.json.zst index 7b9d789..85311a6 100644 Binary files a/web/assets/history.json.zst and b/web/assets/history.json.zst differ diff --git a/web/assets/main.css b/web/assets/main.css index 7ab6d6a..56c9b6e 100644 --- a/web/assets/main.css +++ b/web/assets/main.css @@ -534,6 +534,31 @@ main { display: block; } white-space: pre; } .badge-row .copy-btn { flex: none; } +/* Per-version coverage delta panel: which statements a release gained or lost. */ +.version-delta { margin: 0.6rem 0 0.2rem; } +.version-delta-head { font-size: 0.95rem; margin: 0 0 0.2rem; } +.delta-row { padding: 0.5rem 0; border-top: 1px solid var(--line); } +.delta-head { display: flex; align-items: center; gap: 0.6rem; flex-wrap: wrap; } +.delta-dialect { font-weight: 600; } +.delta-gained { color: #1a7f37; font-variant-numeric: tabular-nums; } +.delta-lost { color: #c0202a; font-variant-numeric: tabular-nums; } +.delta-examples { display: flex; align-items: baseline; gap: 0.4rem; flex-wrap: wrap; margin-top: 0.35rem; } +.delta-tag { flex: none; font-size: 0.7rem; text-transform: uppercase; letter-spacing: 0.04em; color: var(--muted); } +.delta-tag-gained { color: #1a7f37; } +.delta-tag-lost { color: #c0202a; } +.delta-ex { + display: block; + width: 100%; + padding: 0.25rem 0.5rem; + border: 1px solid var(--line); + border-radius: 6px; + background: #fbfbfb; + color: #1f2328; + font-family: ui-monospace, SFMono-Regular, "SF Mono", Menlo, Consolas, monospace; + font-size: 0.76rem; + overflow-x: auto; + white-space: pre; +} /* The parser's error message for a rejected statement, under its preview. */ .fail-reason { margin: 0.2rem 0 0; diff --git a/web/src/components.rs b/web/src/components.rs index 2f85aa0..c19e7f3 100644 --- a/web/src/components.rs +++ b/web/src/components.rs @@ -351,7 +351,7 @@ pub fn Overview() -> Element { {rich_text(&format!("We evaluated nine parser libraries: [sqlparser-rs](https://github.com/sqlparser-rs/sqlparser-rs) (Apache DataFusion), [pg_query.rs](https://github.com/pganalyze/pg_query.rs) and its faster summary mode (Rust bindings to [libpg_query](https://github.com/pganalyze/libpg_query), PostgreSQL's own parser), [databend-common-ast](https://crates.io/crates/databend-common-ast), [polyglot-sql](https://github.com/tobilg/polyglot), [sqlglot-rust](https://crates.io/crates/sqlglot-rust), [qusql-parse](https://crates.io/crates/qusql-parse), [sqlite3-parser](https://crates.io/crates/sqlite3-parser) (lemon-rs), and [turso_parser](https://crates.io/crates/turso_parser) (the SQLite parser from Turso), plus [orql](https://codeberg.org/xitep/orql) on Oracle. We ran them against a corpus of 340,938 statements spanning these {} dialects, drawn from each engine's own regression suites and official samples and committed compressed so every run is reproducible.", b.dialects.len())).into_iter()} } p { class: "blurb", - {rich_text("We exercised each parser in the dialect that matches the corpus under test. Where a dialect has a runnable engine, we labelled each statement valid or invalid with the real database engine itself, run in Docker via [testcontainers](https://github.com/testcontainers/testcontainers-rs): a statement counts as valid unless the engine reports a syntax error, so a missing table or column still counts as parsed. Against that ground truth we scored the parsers on recall (valid statements accepted), false positives (invalid statements wrongly accepted), and display round-trip stability. The other dialects have no runnable engine, so their statements count as provenance-valid and the metric is simply the acceptance rate. Across all dialects, we captured speed as a per-statement parse-time distribution over every accepted statement, and memory as the peak and retained bytes per statement under a counting allocator. A batch axis additionally parses each parser's whole accepted set as a single script, showing what bulk parsing amortizes, and a time machine benchmarks the historical releases of every pure-Rust parser (59 versions in total, including every sqlparser-rs minor since January 2023), so each parser page also charts how coverage, speed, and memory evolved across releases.").into_iter()} + {rich_text("We exercised each parser in the dialect that matches the corpus under test. Where a dialect has a runnable engine, we labelled each statement valid or invalid with the real database engine itself, run in Docker via [testcontainers](https://github.com/testcontainers/testcontainers-rs): a statement counts as valid unless the engine reports a syntax error, so a missing table or column still counts as parsed. Against that ground truth we scored the parsers on recall (valid statements accepted), false positives (invalid statements wrongly accepted), and display round-trip stability. The other dialects have no runnable engine, so their statements count as provenance-valid and the metric is simply the acceptance rate. Across all dialects, we captured speed as a per-statement parse-time distribution over every accepted statement, and memory as the peak and retained bytes per statement under a counting allocator. A batch axis additionally parses each parser's whole accepted set as a single script, showing what bulk parsing amortizes, and a time machine benchmarks the historical releases of every pure-Rust parser (more than 80 versions in total, reaching back to sqlparser-rs 0.6 in mid-2020), so each parser page also charts how coverage, speed, memory, recall, round-trip, and panic rate evolved across releases, down to the exact statements each release gained or lost, and the section below compares the families against one another over calendar time.").into_iter()} } p { class: "blurb", {rich_text("On their home dialect the reference bindings are exact by construction, so the more telling comparison is among the pure-Rust parsers. There, [sqlparser-rs](https://github.com/sqlparser-rs/sqlparser-rs) is the most broadly capable, the permissive parsers such as [polyglot-sql](https://github.com/tobilg/polyglot) accept the most statements but pay for it with a high false-positive rate, and the stricter parsers reject more in exchange for precision. Speed spans more than an order of magnitude, from well under a microsecond per statement for the fastest parsers to the low single-digit microseconds for most, with [polyglot-sql](https://github.com/tobilg/polyglot) a clear outlier at roughly fifteen. No parser leads on every axis, so the right choice comes down to what a given project values most: broad coverage, few false positives, or raw speed.").into_iter()} @@ -370,6 +370,8 @@ pub fn Overview() -> Element { {score_leaderboard()} } + {cross_family_section()} + div { class: "section-head", h2 { Icon { width: 18, height: 18, fill: "currentColor".to_string(), class: "h2-ico".to_string(), icon: FaDatabase } @@ -437,6 +439,111 @@ pub fn Overview() -> Element { } } +/// Cross-family overlay: every family's trajectory on one chart, recall and +/// median parse time over calendar time, one line per family. Renders nothing +/// when the history is empty. +fn cross_family_section() -> Element { + let mut recall_series = Vec::new(); + let mut speed_series = Vec::new(); + for h in crate::data::all_histories() { + let rgb = parser_rgb(&h.family); + let mut recall_pts = Vec::new(); + let mut speed_pts = Vec::new(); + for v in &h.versions { + let Some(x) = viz::year_frac(&v.released) else { + continue; + }; + let recalls: Vec = v + .dialects + .iter() + .filter_map(|d| { + d.correctness.as_ref().and_then(|c| { + if d.has_reference { + c.recall_pct + } else { + c.accept_pct + } + }) + }) + .collect(); + if let Some(m) = mean_of(&recalls) { + recall_pts.push((x, m, m, m)); + } + let speeds: Vec = v + .dialects + .iter() + .filter_map(|d| d.perf.as_ref().map(|p| p.median)) + .collect(); + if let Some(md) = median_of(&speeds) { + speed_pts.push((x, md, md, md)); + } + } + if !recall_pts.is_empty() { + recall_series.push(viz::TrendSeries { + label: h.family.clone(), + rgb, + points: recall_pts, + }); + } + if !speed_pts.is_empty() { + speed_series.push(viz::TrendSeries { + label: h.family.clone(), + rgb, + points: speed_pts, + }); + } + } + if recall_series.is_empty() && speed_series.is_empty() { + return rsx! {}; + } + let recall_svg = viz::pct_trend_lines( + "Recall over time", + &recall_series, + 760, + 460, + "% accepted (mean over dialects)", + ); + let speed_svg = viz::trend_lines( + "Parse time over time", + &speed_series, + 760, + 460, + "ns / statement (median over dialects)", + ); + rsx! { + div { class: "section-head", + h2 { + Icon { width: 18, height: 18, fill: "currentColor".to_string(), class: "h2-ico".to_string(), icon: FaChartLine } + "Across parsers over time" + } + } + div { class: "block", + p { class: "table-cap", + "Each pure-Rust family's trajectory on one chart, placed at each release's date, one line per family. Recall is averaged over the dialects a family models (acceptance rate where there is no reference engine), and parse time is the median over those dialects on a log scale. This is a coarse cross-family view: the per-parser pages break each line down by dialect." + } + div { class: "charts", + {chart_figure("chart-xfam-recall", &recall_svg, "Recall over calendar time, one line per parser family.", "Mean recall by release date, one line per family. Higher is better.", "cross-family-recall")} + {chart_figure("chart-xfam-speed", &speed_svg, "Median parse time over calendar time, one line per parser family.", "Median parse time by release date, one line per family (log scale). Lower is faster.", "cross-family-speed")} + } + } + } +} + +/// Mean of a slice, or `None` when empty. +fn mean_of(xs: &[f64]) -> Option { + (!xs.is_empty()).then(|| xs.iter().sum::() / xs.len() as f64) +} + +/// Median of a slice (upper-middle for even lengths), or `None` when empty. +fn median_of(xs: &[f64]) -> Option { + if xs.is_empty() { + return None; + } + let mut v = xs.to_vec(); + v.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal)); + Some(v[v.len() / 2]) +} + /// Browser script that serializes the inline `` inside a figure and saves /// it verbatim as an `.svg` file. `__FIG__`/`__NAME__` are replaced with the /// figure id and download base name (both app-controlled slugs). @@ -797,6 +904,62 @@ pub fn ParserView(name: String) -> Element { } } +/// Panel showing exactly which statements the selected version gained or lost +/// versus the previous release, per dialect, with a few example statements. +fn version_delta_panel( + version: &str, + deltas: &[viz::DialectDelta], + dialects: &[(String, String)], +) -> Element { + let display = |dir: &str| { + dialects + .iter() + .find(|(d, _)| d == dir) + .map_or_else(|| dir.to_string(), |(_, n)| n.clone()) + }; + let rows: Vec<(String, &viz::DialectDelta)> = deltas + .iter() + .filter(|d| d.gained > 0 || d.lost > 0) + .map(|d| (display(&d.dir_name), d)) + .collect(); + if rows.is_empty() { + return rsx! {}; + } + rsx! { + div { class: "version-delta", + h3 { class: "version-delta-head", "What {version} changed from the previous version" } + p { class: "table-cap", + "Statements this version newly accepts (gained) or no longer accepts (regressed) versus the previous benchmarked release, per dialect, with a few examples. Counts are exact." + } + for (name , d) in rows { + div { class: "delta-row", key: "{name}", + div { class: "delta-head", + span { class: "delta-dialect", "{name}" } + span { class: "delta-gained", "{d.gained} gained" } + span { class: "delta-lost", "{d.lost} lost" } + } + if !d.examples_gained.is_empty() { + div { class: "delta-examples", + span { class: "delta-tag delta-tag-gained", "newly parsed" } + for (i , ex) in d.examples_gained.iter().enumerate() { + code { key: "g{i}", class: "delta-ex", "{ex}" } + } + } + } + if !d.examples_lost.is_empty() { + div { class: "delta-examples", + span { class: "delta-tag delta-tag-lost", "regressed" } + for (i , ex) in d.examples_lost.iter().enumerate() { + code { key: "l{i}", class: "delta-ex", "{ex}" } + } + } + } + } + } + } + } +} + /// The "Across versions" section: time and memory trends over the benchmarked /// versions of this family (median with an interquartile bar, one line per /// dialect), plus a selector that shows the chosen version's full per-dialect @@ -829,12 +992,20 @@ fn VersionHistory(parser: String) -> Element { let mut peak_series = Vec::new(); let mut recall_series = Vec::new(); let mut fp_series = Vec::new(); + let mut coverage_series = Vec::new(); + let mut roundtrip_series = Vec::new(); + let mut panic_series = Vec::new(); + let mut contentious_series = Vec::new(); for (dir, name) in &dialects { let rgb = brand(dir).accent_rgb; let mut time_points = Vec::new(); let mut peak_points = Vec::new(); let mut recall_points = Vec::new(); let mut fp_points = Vec::new(); + let mut coverage_points = Vec::new(); + let mut roundtrip_points = Vec::new(); + let mut panic_points = Vec::new(); + let mut contentious_points = Vec::new(); for v in &hist.versions { let Some(x) = viz::year_frac(&v.released) else { continue; @@ -860,31 +1031,44 @@ fn VersionHistory(parser: String) -> Element { if let Some(fp) = c.false_positive_pct { fp_points.push((x, fp, fp, fp)); } + // Capability growth: the raw count of valid statements accepted. + let acc = c.accepted_valid as f64; + coverage_points.push((x, acc, acc, acc)); + if let Some(rt) = c.roundtrip_pct { + roundtrip_points.push((x, rt, rt, rt)); + } + if let Some(pp) = c.panic_pct { + panic_points.push((x, pp, pp, pp)); + } + if let Some(rc) = c.recall_excl_contentious_pct { + contentious_points.push((x, rc, rc, rc)); + } } } - time_series.push(viz::TrendSeries { - label: name.clone(), - rgb, - points: time_points, - }); - peak_series.push(viz::TrendSeries { + let series = |points| viz::TrendSeries { label: name.clone(), rgb, - points: peak_points, - }); + points, + }; + time_series.push(series(time_points)); + peak_series.push(series(peak_points)); if !recall_points.is_empty() { - recall_series.push(viz::TrendSeries { - label: name.clone(), - rgb, - points: recall_points, - }); + recall_series.push(series(recall_points)); } if !fp_points.is_empty() { - fp_series.push(viz::TrendSeries { - label: name.clone(), - rgb, - points: fp_points, - }); + fp_series.push(series(fp_points)); + } + if coverage_points.iter().any(|&(_, v, ..)| v > 0.0) { + coverage_series.push(series(coverage_points)); + } + if !roundtrip_points.is_empty() { + roundtrip_series.push(series(roundtrip_points)); + } + if panic_points.iter().any(|&(_, v, ..)| v > 0.0) { + panic_series.push(series(panic_points)); + } + if !contentious_points.is_empty() { + contentious_series.push(series(contentious_points)); } } let time_trend = viz::trend_lines( @@ -920,6 +1104,44 @@ fn VersionHistory(parser: String) -> Element { } else { String::new() }; + // Optional trends, rendered only where the snapshot carries the data. + let opt_trend = |series: &[viz::TrendSeries], title: String, y: &str, pct: bool| { + if series.is_empty() { + return None; + } + Some(if pct { + viz::pct_trend_lines(&title, series, 760, 460, y) + } else { + viz::count_trend_lines(&title, series, 760, 460, y) + }) + }; + let coverage_trend = opt_trend( + &coverage_series, + format!("{parser} coverage"), + "valid accepted", + false, + ); + let roundtrip_trend = opt_trend( + &roundtrip_series, + format!("{parser} round-trip"), + "% round-trip ok", + true, + ); + let panic_trend = opt_trend( + &panic_series, + format!("{parser} panics"), + "% panicked", + true, + ); + let contentious_trend = opt_trend( + &contentious_series, + format!("{parser} recall excl. contentious"), + "% accepted", + true, + ); + + // Coverage delta for the selected version versus its predecessor. + let sel_delta = hist.versions[sel].deltas.clone(); // Selected version: full per-dialect charts and a results table. let run = &hist.versions[sel]; @@ -1041,15 +1263,27 @@ fn VersionHistory(parser: String) -> Element { "Across versions" } p { class: "table-cap", - "How {parser} changed across releases, each version placed at its release date. For time and memory each point is the median over a dialect's accepted statements with an interquartile (p25 to p75) bar on a log scale, so the heavily right-skewed tails do not distort it: lower is faster and leaner. The quality trends show the share of expected statements accepted (recall where a reference engine exists, acceptance rate elsewhere) and, on reference dialects, the share of invalid statements wrongly accepted (lower is better). Pick a version to see its full charts and results below." + "How {parser} changed across releases, each version placed at its release date. For time and memory each point is the median over a dialect's accepted statements with an interquartile (p25 to p75) bar on a log scale, so the heavily right-skewed tails do not distort it: lower is faster and leaner. Coverage is the raw count of valid statements accepted, and the quality trends show recall (acceptance rate where there is no reference engine), recall excluding contentious constructs, round-trip stability, the share of invalid statements wrongly accepted (lower is better), and the empirical panic rate. Pick a version to see its full charts, its results, and exactly which statements it gained or lost versus the previous release." } div { class: "charts", {chart_figure(&format!("chart-{pslug}-time-trend"), &time_trend, &format!("Parse-time trend for {parser} across releases, one line per dialect."), "Median parse time by release date, one line per dialect (log scale, interquartile bars).", &format!("{pslug}-time-trend"))} {chart_figure(&format!("chart-{pslug}-mem-trend"), &peak_trend, &format!("Peak-memory trend for {parser} across releases, one line per dialect."), "Median peak memory by release date, one line per dialect (log scale, interquartile bars).", &format!("{pslug}-mem-trend"))} + if let Some(t) = &coverage_trend { + {chart_figure(&format!("chart-{pslug}-coverage-trend"), t, &format!("Coverage trend for {parser} across releases, one line per dialect."), "Count of valid statements accepted by release date, one line per dialect (linear scale). Higher is broader coverage.", &format!("{pslug}-coverage-trend"))} + } {chart_figure(&format!("chart-{pslug}-recall-trend"), &recall_trend, &format!("Accept and recall trend for {parser} across releases, one line per dialect."), "Share of expected statements accepted by release date (recall on reference dialects, acceptance rate elsewhere). Higher is better.", &format!("{pslug}-recall-trend"))} + if let Some(t) = &contentious_trend { + {chart_figure(&format!("chart-{pslug}-contentious-trend"), t, &format!("Recall excluding contentious constructs for {parser} across releases, one line per reference dialect."), "Recall over the non-contentious valid statements, by release date. Higher is better.", &format!("{pslug}-contentious-trend"))} + } + if let Some(t) = &roundtrip_trend { + {chart_figure(&format!("chart-{pslug}-roundtrip-trend"), t, &format!("Round-trip stability for {parser} across releases, one line per dialect."), "Share of accepted statements that reprint stably, by release date. Higher is better.", &format!("{pslug}-roundtrip-trend"))} + } if has_fp { {chart_figure(&format!("chart-{pslug}-fp-trend"), &fp_trend, &format!("False-positive trend for {parser} across releases, one line per reference dialect."), "Share of reference-invalid statements wrongly accepted, by release date. Lower is better.", &format!("{pslug}-fp-trend"))} } + if let Some(t) = &panic_trend { + {chart_figure(&format!("chart-{pslug}-panic-trend"), t, &format!("Empirical panic rate for {parser} across releases, one line per dialect."), "Share of statements that panicked the parser, by release date. Lower is better.", &format!("{pslug}-panic-trend"))} + } } div { class: "version-picker", span { class: "version-picker-label", "version" } @@ -1061,6 +1295,7 @@ fn VersionHistory(parser: String) -> Element { } } } + {version_delta_panel(&sel_version, &sel_delta, &dialects)} div { class: "charts", if has_time { {chart_figure(&format!("chart-{pslug}-{vslug}-ecdf"), &sel_ecdf, &format!("Parse-time eCDF for {parser} {sel_version}, one curve per dialect."), "Per-statement parse time for the selected version, one curve per dialect (log scale).", &format!("{pslug}-{vslug}-ecdf"))} diff --git a/web/src/data.rs b/web/src/data.rs index c617e06..920d285 100644 --- a/web/src/data.rs +++ b/web/src/data.rs @@ -52,6 +52,12 @@ pub fn history(family: &str) -> Option<&'static FamilyHistory> { histories().iter().find(|h| h.family == family) } +/// Every family's version history, for the cross-family overlay. +#[must_use] +pub fn all_histories() -> &'static [FamilyHistory] { + histories() +} + /// The static source-feature scan (parsed once). fn featurescan() -> &'static FeatureScan { static CACHE: OnceLock = OnceLock::new();