Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,17 @@ Every version implements the `sql_ast_benchmark::Parser` trait (the same trait `

A new family is a new `families/<name>.rs` with its own adapter (each library has a different parse API) plus its aliases and registry entries.

Both runner passes are resumable family by family on a `--full` run. The memory pass writes `target/timemachine/<family>.mem.json` and skips families that already have one. The timing pass writes `target/timemachine/<family>.timing.json` and reuses it when it is no older than the memory sidecar, so a refreshed memory pass invalidates a stale timing checkpoint automatically. An interruption resumes at the next family (a family interrupted partway is recomputed from its start). Delete `target/timemachine/` for a from-scratch run.

To correct only a few versions (for example after fixing the dialect mapping for some old releases) without redoing the whole history, use the incremental refresh. It reads the committed `history.json.zst`, recomputes only the listed versions, reuses every other version verbatim, and recomputes the family's deltas from freshly determined accepted sets, so the result matches a full re-run without re-measuring unchanged points:

```bash
cargo run --release -p timemachine --bin timemachine-mem -- --refresh sqlparser-rs:0.8.0,0.14.0
cargo run --release -p timemachine --bin timemachine -- --refresh sqlparser-rs:0.8.0,0.14.0
```

Run the memory refresh first (it rewrites only those versions' entries in the memory sidecar), then the timing refresh (which merges them and writes `history.json.zst`).

## Coverage

```bash
Expand Down
24 changes: 24 additions & 0 deletions timemachine/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,30 @@ zstd = "0.13"
# sqlparser-rs, the latest patch of every minor the shared adapter compiles
# against. Each rename pins one release; cargo keeps them side by side because
# different 0.x minors are semver-incompatible.
sqlparser_v0_6 = { package = "sqlparser", version = "=0.6.1" }
sqlparser_v0_7 = { package = "sqlparser", version = "=0.7.0" }
sqlparser_v0_8 = { package = "sqlparser", version = "=0.8.0" }
sqlparser_v0_9 = { package = "sqlparser", version = "=0.9.0" }
sqlparser_v0_10 = { package = "sqlparser", version = "=0.10.0" }
sqlparser_v0_11 = { package = "sqlparser", version = "=0.11.0" }
sqlparser_v0_12 = { package = "sqlparser", version = "=0.12.0" }
sqlparser_v0_13 = { package = "sqlparser", version = "=0.13.0" }
sqlparser_v0_14 = { package = "sqlparser", version = "=0.14.0" }
sqlparser_v0_15 = { package = "sqlparser", version = "=0.15.0" }
sqlparser_v0_16 = { package = "sqlparser", version = "=0.16.0" }
sqlparser_v0_17 = { package = "sqlparser", version = "=0.17.0" }
sqlparser_v0_18 = { package = "sqlparser", version = "=0.18.0" }
sqlparser_v0_19 = { package = "sqlparser", version = "=0.19.0" }
sqlparser_v0_20 = { package = "sqlparser", version = "=0.20.0" }
sqlparser_v0_21 = { package = "sqlparser", version = "=0.21.0" }
sqlparser_v0_22 = { package = "sqlparser", version = "=0.22.0" }
sqlparser_v0_23 = { package = "sqlparser", version = "=0.23.0" }
sqlparser_v0_24 = { package = "sqlparser", version = "=0.24.0" }
sqlparser_v0_25 = { package = "sqlparser", version = "=0.25.0" }
sqlparser_v0_26 = { package = "sqlparser", version = "=0.26.0" }
sqlparser_v0_27 = { package = "sqlparser", version = "=0.27.0" }
sqlparser_v0_28 = { package = "sqlparser", version = "=0.28.0" }
sqlparser_v0_29 = { package = "sqlparser", version = "=0.29.0" }
sqlparser_v0_30 = { package = "sqlparser", version = "=0.30.0" }
sqlparser_v0_31 = { package = "sqlparser", version = "=0.31.0" }
sqlparser_v0_32 = { package = "sqlparser", version = "=0.32.0" }
Expand Down
19 changes: 15 additions & 4 deletions timemachine/src/bin/timemachine.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,10 @@ fn main() {
eprintln!("ERROR: could not prepare datasets/: {e}");
std::process::exit(1);
}
let full = std::env::args().any(|a| a == "--full");
if !full {
let args: Vec<String> = std::env::args().collect();
let refresh = timemachine::run::parse_refresh(&args);
let full = args.iter().any(|a| a == "--full");
if refresh.is_none() && !full {
eprintln!(
"(smoke run: first {} statements per dialect; pass --full for the whole corpus)",
timemachine::run::SMOKE_LIMIT
Expand All @@ -32,8 +34,17 @@ fn main() {
.stack_size(WORKER_STACK)
.spawn(move || {
let versions = timemachine::registry::all();
let written = timemachine::run::run_timing(&versions, full);
eprintln!("history written for: {written:?}");
if let Some((family, vers)) = refresh {
eprintln!("refreshing {family} versions {vers:?}");
if let Err(e) = timemachine::run::run_refresh(&versions, &family, &vers) {
eprintln!("ERROR: {e}");
std::process::exit(1);
}
eprintln!("refreshed {family}");
} else {
let written = timemachine::run::run_timing(&versions, full);
eprintln!("history written for: {written:?}");
}
})
.expect("spawn worker")
.join()
Expand Down
14 changes: 11 additions & 3 deletions timemachine/src/bin/timemachine_mem.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,10 @@ fn main() {
eprintln!("ERROR: could not prepare datasets/: {e}");
std::process::exit(1);
}
let full = std::env::args().any(|a| a == "--full");
if !full {
let args: Vec<String> = std::env::args().collect();
let refresh = timemachine::run::parse_refresh(&args);
let full = args.iter().any(|a| a == "--full");
if refresh.is_none() && !full {
eprintln!(
"(smoke run: first {} statements per dialect; pass --full for the whole corpus)",
timemachine::run::SMOKE_LIMIT
Expand All @@ -60,7 +62,13 @@ fn main() {
.stack_size(WORKER_STACK)
.spawn(move || {
let versions = timemachine::registry::all();
timemachine::run::run_memory(&versions, full);
if let Some((family, vers)) = refresh {
eprintln!("refreshing memory for {family} versions {vers:?}");
timemachine::run::run_memory_refresh(&versions, &family, &vers);
eprintln!("memory refreshed for {family}");
} else {
timemachine::run::run_memory(&versions, full);
}
})
.expect("spawn worker")
.join()
Expand Down
16 changes: 16 additions & 0 deletions timemachine/src/families/databend.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,22 @@ macro_rules! databend_version {
}

impl Parser for $name {
// Surface a caught panic (the adapters fold one into `Err("panicked")`)
// so `grade_chunk` records the empirical panic rate across releases.
fn parse_outcome(
&self,
sql: &str,
dialect: Dialect,
) -> sql_ast_benchmark::ParseOutcome {
use sql_ast_benchmark::ParseOutcome;
match self.try_parse(sql, dialect) {
None => ParseOutcome::Unsupported,
Some(Ok(())) => ParseOutcome::Accepted,
Some(Err(e)) if e == "panicked" => ParseOutcome::Panicked(e),
Some(Err(e)) => ParseOutcome::Rejected(e),
}
}

fn id(&self) -> ParserId {
ParserId {
family: "databend-common-ast",
Expand Down
16 changes: 16 additions & 0 deletions timemachine/src/families/orql.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,22 @@ macro_rules! orql_version {
pub struct $name;

impl Parser for $name {
// Surface a caught panic (the adapters fold one into `Err("panicked")`)
// so `grade_chunk` records the empirical panic rate across releases.
fn parse_outcome(
&self,
sql: &str,
dialect: Dialect,
) -> sql_ast_benchmark::ParseOutcome {
use sql_ast_benchmark::ParseOutcome;
match self.try_parse(sql, dialect) {
None => ParseOutcome::Unsupported,
Some(Ok(())) => ParseOutcome::Accepted,
Some(Err(e)) if e == "panicked" => ParseOutcome::Panicked(e),
Some(Err(e)) => ParseOutcome::Rejected(e),
}
}

fn id(&self) -> ParserId {
ParserId {
family: "orql",
Expand Down
16 changes: 16 additions & 0 deletions timemachine/src/families/polyglot.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,22 @@ macro_rules! polyglot_version {
}

impl Parser for $name {
// Surface a caught panic (the adapters fold one into `Err("panicked")`)
// so `grade_chunk` records the empirical panic rate across releases.
fn parse_outcome(
&self,
sql: &str,
dialect: Dialect,
) -> sql_ast_benchmark::ParseOutcome {
use sql_ast_benchmark::ParseOutcome;
match self.try_parse(sql, dialect) {
None => ParseOutcome::Unsupported,
Some(Ok(())) => ParseOutcome::Accepted,
Some(Err(e)) if e == "panicked" => ParseOutcome::Panicked(e),
Some(Err(e)) => ParseOutcome::Rejected(e),
}
}

fn id(&self) -> ParserId {
ParserId {
family: "polyglot-sql",
Expand Down
16 changes: 16 additions & 0 deletions timemachine/src/families/qusql.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,22 @@ macro_rules! qusql_version {
}

impl Parser for $name {
// Surface a caught panic (the adapters fold one into `Err("panicked")`)
// so `grade_chunk` records the empirical panic rate across releases.
fn parse_outcome(
&self,
sql: &str,
dialect: Dialect,
) -> sql_ast_benchmark::ParseOutcome {
use sql_ast_benchmark::ParseOutcome;
match self.try_parse(sql, dialect) {
None => ParseOutcome::Unsupported,
Some(Ok(())) => ParseOutcome::Accepted,
Some(Err(e)) if e == "panicked" => ParseOutcome::Panicked(e),
Some(Err(e)) => ParseOutcome::Rejected(e),
}
}

fn id(&self) -> ParserId {
ParserId {
family: "qusql-parse",
Expand Down
16 changes: 16 additions & 0 deletions timemachine/src/families/sqlglot.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,22 @@ macro_rules! sqlglot_version {
}

impl Parser for $name {
// Surface a caught panic (the adapters fold one into `Err("panicked")`)
// so `grade_chunk` records the empirical panic rate across releases.
fn parse_outcome(
&self,
sql: &str,
dialect: Dialect,
) -> sql_ast_benchmark::ParseOutcome {
use sql_ast_benchmark::ParseOutcome;
match self.try_parse(sql, dialect) {
None => ParseOutcome::Unsupported,
Some(Ok(())) => ParseOutcome::Accepted,
Some(Err(e)) if e == "panicked" => ParseOutcome::Panicked(e),
Some(Err(e)) => ParseOutcome::Rejected(e),
}
}

fn id(&self) -> ParserId {
ParserId {
family: "sqlglot-rust",
Expand Down
16 changes: 16 additions & 0 deletions timemachine/src/families/sqlite3.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,22 @@ macro_rules! sqlite3_version {
pub struct $name;

impl Parser for $name {
// Surface a caught panic (the adapters fold one into `Err("panicked")`)
// so `grade_chunk` records the empirical panic rate across releases.
fn parse_outcome(
&self,
sql: &str,
dialect: Dialect,
) -> sql_ast_benchmark::ParseOutcome {
use sql_ast_benchmark::ParseOutcome;
match self.try_parse(sql, dialect) {
None => ParseOutcome::Unsupported,
Some(Ok(())) => ParseOutcome::Accepted,
Some(Err(e)) if e == "panicked" => ParseOutcome::Panicked(e),
Some(Err(e)) => ParseOutcome::Rejected(e),
}
}

fn id(&self) -> ParserId {
ParserId {
family: "sqlite3-parser",
Expand Down
93 changes: 83 additions & 10 deletions timemachine/src/families/sqlparser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,28 +14,47 @@ use sql_ast_benchmark::{Parser, ParserId};
/// back to `GenericDialect` for the rest, so the same code compiles against each
/// version and the trend stays internally consistent.
macro_rules! sqlparser_version {
// Full dialect set, for releases that model every dialect we map (0.20+).
($name:ident, $cr:ident, $ver:literal, $released:literal) => {
sqlparser_version!($name, $cr, $ver, $released, [
Postgresql => PostgreSqlDialect,
Mysql => MySqlDialect,
Sqlite => SQLiteDialect,
Clickhouse => ClickHouseDialect,
Hive => HiveDialect,
Tsql => MsSqlDialect,
Bigquery => BigQueryDialect,
]);
};
// Explicit dialect arms, for older releases that predate some dialects (SQLite
// arrived in 0.7, Hive in 0.8, ClickHouse in 0.14, BigQuery in 0.18). Any
// dialect not listed falls back to the generic dialect, the same approach the
// newest versions use for dialects they do not model.
($name:ident, $cr:ident, $ver:literal, $released:literal, [$($variant:ident => $dia:ident),* $(,)?]) => {
pub struct $name;

impl $name {
fn dialect(d: Dialect) -> Box<dyn $cr::dialect::Dialect> {
match d {
Dialect::Postgresql => Box::new($cr::dialect::PostgreSqlDialect {}),
Dialect::Mysql => Box::new($cr::dialect::MySqlDialect {}),
Dialect::Sqlite => Box::new($cr::dialect::SQLiteDialect {}),
Dialect::Clickhouse => Box::new($cr::dialect::ClickHouseDialect {}),
Dialect::Hive => Box::new($cr::dialect::HiveDialect {}),
Dialect::Tsql => Box::new($cr::dialect::MsSqlDialect {}),
Dialect::Bigquery => Box::new($cr::dialect::BigQueryDialect {}),
// Oracle, DuckDB, Redshift, Spark, Trino and Multi did not all
// exist as dedicated dialects across these releases, so use the
// generic dialect uniformly for them.
$( Dialect::$variant => Box::new($cr::dialect::$dia {}), )*
_ => Box::new($cr::dialect::GenericDialect {}),
}
}
}

impl Parser for $name {
// Surface a caught panic (the adapters fold one into `Err("panicked")`)
// so `grade_chunk` records the empirical panic rate across releases.
fn parse_outcome(&self, sql: &str, dialect: Dialect) -> sql_ast_benchmark::ParseOutcome {
use sql_ast_benchmark::ParseOutcome;
match self.try_parse(sql, dialect) {
None => ParseOutcome::Unsupported,
Some(Ok(())) => ParseOutcome::Accepted,
Some(Err(e)) if e == "panicked" => ParseOutcome::Panicked(e),
Some(Err(e)) => ParseOutcome::Rejected(e),
}
}

fn id(&self) -> ParserId {
ParserId {
family: "sqlparser-rs",
Expand Down Expand Up @@ -114,6 +133,33 @@ macro_rules! sqlparser_version {
};
}

// Older releases, with the reduced dialect sets of their era.
sqlparser_version!(SqlparserV0_6, sqlparser_v0_6, "0.6.1", "2020-07-20", [Postgresql => PostgreSqlDialect, Mysql => MySqlDialect, Tsql => MsSqlDialect]);
sqlparser_version!(SqlparserV0_7, sqlparser_v0_7, "0.7.0", "2020-12-28", [Postgresql => PostgreSqlDialect, Mysql => MySqlDialect, Sqlite => SQLiteDialect, Tsql => MsSqlDialect]);
sqlparser_version!(SqlparserV0_8, sqlparser_v0_8, "0.8.0", "2021-02-09", [Postgresql => PostgreSqlDialect, Mysql => MySqlDialect, Sqlite => SQLiteDialect, Hive => HiveDialect, Tsql => MsSqlDialect]);
sqlparser_version!(SqlparserV0_9, sqlparser_v0_9, "0.9.0", "2021-03-21", [Postgresql => PostgreSqlDialect, Mysql => MySqlDialect, Sqlite => SQLiteDialect, Hive => HiveDialect, Tsql => MsSqlDialect]);
sqlparser_version!(SqlparserV0_10, sqlparser_v0_10, "0.10.0", "2021-08-23", [Postgresql => PostgreSqlDialect, Mysql => MySqlDialect, Sqlite => SQLiteDialect, Hive => HiveDialect, Tsql => MsSqlDialect]);
sqlparser_version!(SqlparserV0_11, sqlparser_v0_11, "0.11.0", "2021-09-25", [Postgresql => PostgreSqlDialect, Mysql => MySqlDialect, Sqlite => SQLiteDialect, Hive => HiveDialect, Tsql => MsSqlDialect]);
sqlparser_version!(SqlparserV0_12, sqlparser_v0_12, "0.12.0", "2021-10-14", [Postgresql => PostgreSqlDialect, Mysql => MySqlDialect, Sqlite => SQLiteDialect, Hive => HiveDialect, Tsql => MsSqlDialect]);
sqlparser_version!(SqlparserV0_13, sqlparser_v0_13, "0.13.0", "2021-12-10", [Postgresql => PostgreSqlDialect, Mysql => MySqlDialect, Sqlite => SQLiteDialect, Hive => HiveDialect, Tsql => MsSqlDialect]);
sqlparser_version!(SqlparserV0_14, sqlparser_v0_14, "0.14.0", "2022-02-09", [Postgresql => PostgreSqlDialect, Mysql => MySqlDialect, Sqlite => SQLiteDialect, Clickhouse => ClickHouseDialect, Hive => HiveDialect, Tsql => MsSqlDialect]);
sqlparser_version!(SqlparserV0_15, sqlparser_v0_15, "0.15.0", "2022-03-08", [Postgresql => PostgreSqlDialect, Mysql => MySqlDialect, Sqlite => SQLiteDialect, Clickhouse => ClickHouseDialect, Hive => HiveDialect, Tsql => MsSqlDialect]);
sqlparser_version!(SqlparserV0_16, sqlparser_v0_16, "0.16.0", "2022-04-03", [Postgresql => PostgreSqlDialect, Mysql => MySqlDialect, Sqlite => SQLiteDialect, Clickhouse => ClickHouseDialect, Hive => HiveDialect, Tsql => MsSqlDialect]);
sqlparser_version!(SqlparserV0_17, sqlparser_v0_17, "0.17.0", "2022-05-10", [Postgresql => PostgreSqlDialect, Mysql => MySqlDialect, Sqlite => SQLiteDialect, Clickhouse => ClickHouseDialect, Hive => HiveDialect, Tsql => MsSqlDialect]);
// 0.18 onward model every dialect we map (BigQuery landed in 0.18), so they use
// the full-set form.
sqlparser_version!(SqlparserV0_18, sqlparser_v0_18, "0.18.0", "2022-06-06");
sqlparser_version!(SqlparserV0_19, sqlparser_v0_19, "0.19.0", "2022-07-28");
sqlparser_version!(SqlparserV0_20, sqlparser_v0_20, "0.20.0", "2022-08-05");
sqlparser_version!(SqlparserV0_21, sqlparser_v0_21, "0.21.0", "2022-08-18");
sqlparser_version!(SqlparserV0_22, sqlparser_v0_22, "0.22.0", "2022-08-26");
sqlparser_version!(SqlparserV0_23, sqlparser_v0_23, "0.23.0", "2022-09-08");
sqlparser_version!(SqlparserV0_24, sqlparser_v0_24, "0.24.0", "2022-09-28");
sqlparser_version!(SqlparserV0_25, sqlparser_v0_25, "0.25.0", "2022-10-03");
sqlparser_version!(SqlparserV0_26, sqlparser_v0_26, "0.26.0", "2022-10-19");
sqlparser_version!(SqlparserV0_27, sqlparser_v0_27, "0.27.0", "2022-11-11");
sqlparser_version!(SqlparserV0_28, sqlparser_v0_28, "0.28.0", "2022-12-05");
sqlparser_version!(SqlparserV0_29, sqlparser_v0_29, "0.29.0", "2022-12-29");
sqlparser_version!(SqlparserV0_30, sqlparser_v0_30, "0.30.0", "2023-01-02");
sqlparser_version!(SqlparserV0_31, sqlparser_v0_31, "0.31.0", "2023-03-01");
sqlparser_version!(SqlparserV0_32, sqlparser_v0_32, "0.32.0", "2023-03-06");
Expand Down Expand Up @@ -147,3 +193,30 @@ sqlparser_version!(SqlparserV0_59, sqlparser_v0_59, "0.59.0", "2025-09-24");
sqlparser_version!(SqlparserV0_60, sqlparser_v0_60, "0.60.0", "2025-12-07");
sqlparser_version!(SqlparserV0_61, sqlparser_v0_61, "0.61.0", "2026-02-10");
sqlparser_version!(SqlparserV0_62, sqlparser_v0_62, "0.62.0", "2026-05-07");

#[cfg(test)]
mod tests {
use super::*;

// The old-era adapters (reduced dialect sets, generic fallback) must actually
// parse, not just compile. A plain SELECT is valid in every release back to
// 0.6, so each oldest-tier version should accept it without panicking.
#[test]
fn old_adapters_parse_basic_select() {
let sql = "SELECT a, b FROM t WHERE a > 1";
for p in [
Box::new(SqlparserV0_6) as Box<dyn Parser>,
Box::new(SqlparserV0_7),
Box::new(SqlparserV0_9),
Box::new(SqlparserV0_16),
Box::new(SqlparserV0_29),
] {
let v = p.id().version;
assert_eq!(
p.try_parse(sql, Dialect::Postgresql),
Some(Ok(())),
"sqlparser {v} should parse a basic SELECT"
);
}
}
}
Loading
Loading