From 5257b0d3bdad5dda06fce73d3b429e79bb854762 Mon Sep 17 00:00:00 2001 From: Claude Date: Sun, 24 May 2026 02:02:34 +0000 Subject: [PATCH 1/5] expr: mark timestamp/date + interval as non-monotone MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `Interval` is lex-ordered by (months, days, micros), but adding an interval to a timestamp or date adds *calendar* months with day-clamping and then days as 24-hour periods. That arithmetic does not respect the lex order: t = 2024-01-31 i1 = {0 months, 31 days} → t + i1 = 2024-03-02 i2 = {1 month, 0 days} → t + i2 = 2024-02-29 In lex order `i1 < i2`, yet `t + i1 > t + i2`. For timestamps, the day-clamping also collapses near-boundary inputs into the same date while preserving sub-day time, so the first argument is non-monotone too: t1 = 2024-01-30 23:59:59, i = {1 month} → 2024-02-29 23:59:59 t2 = 2024-01-31 00:00:00, i = {1 month} → 2024-02-29 00:00:00 For dates the first argument *is* monotone (no sub-day precision means clamping only collapses, never reverses), but the interval argument has the same problem. These annotations are consumed by the abstract interpreter that drives persist filter pushdown. Marking these functions monotone meant the interpreter computed the output range by evaluating the function only at the endpoints of the input interval range — and a stats range like `[{0m,31d}, {1m,0d}]` would yield the narrow output range `[2024-02-29, 2024-03-02]` even though interior intervals (e.g. `{0m, 60d}`) actually produce timestamps far outside that window. Filter pushdown could then incorrectly conclude a part had no matching rows, tripping the `persist filter pushdown correctness violation!` audit in `persist_source.rs`. Companion fix to b6079939c1, which corrected the analogous annotations for `add_time_interval`, `sub_time_interval`, `mul_interval`, and `div_interval`. Fixes database-issues#9656. --- src/expr/src/interpret.rs | 72 +++++++++++++++++++++++++++++++++++++ src/expr/src/scalar/func.rs | 25 +++++++++---- 2 files changed, 91 insertions(+), 6 deletions(-) diff --git a/src/expr/src/interpret.rs b/src/expr/src/interpret.rs index 737464a9407a6..ca1752c4e1ab8 100644 --- a/src/expr/src/interpret.rs +++ b/src/expr/src/interpret.rs @@ -1763,6 +1763,78 @@ mod tests { assert!(range_out.may_contain(Datum::Null)); } + /// Regression test for database-issues#9656. + /// + /// Adding an `Interval` to a `Timestamp` is non-monotone in the interval + /// argument: the lex order of intervals (months, days, micros) does not + /// respect calendar-month arithmetic with day-clamping. The interpreter + /// must therefore not assume monotonicity, otherwise persist filter + /// pushdown can incorrectly conclude that a part has no matching rows. + #[mz_ore::test] + #[cfg_attr(miri, ignore)] + fn test_add_timestamp_interval_non_monotone() { + use chrono::NaiveDateTime; + use mz_repr::adt::interval::Interval; + use mz_repr::adt::timestamp::CheckedTimestamp; + use mz_repr::{Datum, Row}; + + let arena = RowArena::new(); + + // The setup: a timestamp literal `t = 2024-01-31 00:00:00`, and an + // interval column whose stats-range spans + // `[{0 months, 31 days, 0 us}, {1 month, 0 days, 0 us}]`. In lex order, + // the 31-day interval is the lower bound and the 1-month interval is + // the upper bound. The function values at the endpoints are: + // t + {0,31,0} = 2024-03-02 + // t + {1, 0,0} = 2024-02-29 + // But an *interior* interval like {0, 60, 0} maps to 2024-03-31, which + // lies far outside `[Feb 29, Mar 2]`. Under the (incorrect) monotone + // assumption, the interpreter would conclude the output is in that + // narrow window, and rule out predicates like `>= 2024-03-15`. + let ts_lit = |s: &str| { + let mut row = Row::default(); + row.packer().push(Datum::Timestamp( + CheckedTimestamp::from_timestamplike( + NaiveDateTime::parse_from_str(s, "%Y-%m-%dT%H:%M:%S").unwrap(), + ) + .unwrap(), + )); + MirScalarExpr::Literal(Ok(row), ReprScalarType::Timestamp.nullable(false).into()) + }; + let interval = |months: i32, days: i32, micros: i64| { + Datum::Interval(Interval { + months, + days, + micros, + }) + }; + + // Expression: `(timestamp_lit + interval_col) >= 2024-03-15`. + let expr = ts_lit("2024-01-31T00:00:00") + .call_binary(MirScalarExpr::column(0), AddTimestampInterval) + .call_binary(ts_lit("2024-03-15T00:00:00"), Gte); + + let relation = ReprRelationType::new(vec![ReprScalarType::Interval.nullable(false)]); + let mut interpreter = ColumnSpecs::new(&relation, &arena); + interpreter.push_column( + 0, + ResultSpec::value_between(interval(0, 31, 0), interval(1, 0, 0)), + ); + + let range_out = interpreter.expr(&expr).range; + // The actual data may include e.g. `{0, 60, 0}` → 2024-03-31, which + // satisfies `>= 2024-03-15`. The interpreter must admit `True` so that + // filter pushdown does not skip the part. Under the buggy + // `(true, true)` annotation, the output range would be + // `[Feb 29, Mar 2]`, all of which is `< Mar 15`, and the interpreter + // would (wrongly) admit only `False`. + assert!( + range_out.may_contain(Datum::True), + "interpreter incorrectly ruled out matching rows; \ + add_timestamp_interval is not monotone in the interval argument", + ); + } + #[mz_ore::test] fn test_trace() { use super::Trace; diff --git a/src/expr/src/scalar/func.rs b/src/expr/src/scalar/func.rs index 71c9c7bf6f5ec..0a126542d382f 100644 --- a/src/expr/src/scalar/func.rs +++ b/src/expr/src/scalar/func.rs @@ -181,7 +181,13 @@ fn add_float64(a: f64, b: f64) -> Result { } } -#[sqlfunc(is_monotone = "(true, true)", is_infix_op = true, sqlname = "+")] +// `Interval` is lex-ordered (months, days, micros), but adding an interval to a +// timestamp adds *calendar* months (with day-clamping) which does not respect +// that ordering: e.g. `i1 = {0 months, 31 days}` is lex-less than +// `i2 = {1 month, 0 days}`, but `2024-01-31 + i1 = 2024-03-02` is greater than +// `2024-01-31 + i2 = 2024-02-29`. Day-clamping plus preserved sub-day time also +// breaks monotonicity in the first argument near month boundaries. +#[sqlfunc(is_monotone = "(false, false)", is_infix_op = true, sqlname = "+")] fn add_timestamp_interval( a: CheckedTimestamp, b: Interval, @@ -189,7 +195,7 @@ fn add_timestamp_interval( add_timestamplike_interval(a, b) } -#[sqlfunc(is_monotone = "(true, true)", is_infix_op = true, sqlname = "+")] +#[sqlfunc(is_monotone = "(false, false)", is_infix_op = true, sqlname = "+")] fn add_timestamp_tz_interval( a: CheckedTimestamp>, b: Interval, @@ -212,7 +218,8 @@ where Ok(CheckedTimestamp::from_timestamplike(T::from_date_time(dt))?) } -#[sqlfunc(is_monotone = "(true, true)", is_infix_op = true, sqlname = "-")] +// See `add_timestamp_interval` for why this is not monotone. +#[sqlfunc(is_monotone = "(false, false)", is_infix_op = true, sqlname = "-")] fn sub_timestamp_interval( a: CheckedTimestamp, b: Interval, @@ -220,7 +227,7 @@ fn sub_timestamp_interval( sub_timestamplike_interval(a, b) } -#[sqlfunc(is_monotone = "(true, true)", is_infix_op = true, sqlname = "-")] +#[sqlfunc(is_monotone = "(false, false)", is_infix_op = true, sqlname = "-")] fn sub_timestamp_tz_interval( a: CheckedTimestamp>, b: Interval, @@ -249,7 +256,12 @@ fn add_date_time( Ok(CheckedTimestamp::from_timestamplike(dt)?) } -#[sqlfunc(is_monotone = "(true, true)", is_infix_op = true, sqlname = "+")] +// Monotone in `date` (dates have no sub-day component, so day-clamping at month +// boundaries only causes results to collapse, never to reverse), but not in +// `interval`: e.g. `{0 months, 31 days}` is lex-less than `{1 month, 0 days}`, +// but adding the former to `2024-01-31` gives `2024-03-02` while the latter +// gives `2024-02-29`. +#[sqlfunc(is_monotone = "(true, false)", is_infix_op = true, sqlname = "+")] fn add_date_interval( date: Date, interval: Interval, @@ -852,8 +864,9 @@ fn sub_interval(a: Interval, b: Interval) -> Result { .ok_or_else(|| EvalError::IntervalOutOfRange(format!("{a} - {b}").into())) } +// See `add_date_interval` for why this is not monotone in `interval`. #[sqlfunc( - is_monotone = "(true, true)", + is_monotone = "(true, false)", is_infix_op = true, sqlname = "-", propagates_nulls = true From 8833f13b0bcbf1b2a33f0c960f382a27ab13ddac Mon Sep 17 00:00:00 2001 From: Claude Date: Sun, 24 May 2026 02:07:01 +0000 Subject: [PATCH 2/5] parallel-workload: re-enable persist_stats_filter_enabled The override was added to suppress the audit panic from database-issues#9656; with the monotonicity annotations on timestamp/date + interval corrected, filter pushdown should be sound again. Removing this lets CI re-exercise the audit and surface any remaining latent causes. --- misc/python/materialize/parallel_workload/settings.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/misc/python/materialize/parallel_workload/settings.py b/misc/python/materialize/parallel_workload/settings.py index 39894cd0dc536..fe50ba74fc8fe 100644 --- a/misc/python/materialize/parallel_workload/settings.py +++ b/misc/python/materialize/parallel_workload/settings.py @@ -41,8 +41,6 @@ def _missing_(cls, value): ADDITIONAL_SYSTEM_PARAMETER_DEFAULTS = { # Uses a lot of memory, hard to predict how much "memory_limiter_interval": "0", - # TODO: Remove when https://github.com/MaterializeInc/database-issues/issues/9656 is fixed - "persist_stats_filter_enabled": "false", # See https://materializeinc.slack.com/archives/CTESPM7FU/p1758195280629909, should reenable when it performs better "enable_compute_logical_backpressure": "false", # Allows the `Scenario.RepeatRow` scenario to call `repeat_row`. Having From 1eebb851a519f071b428b695b35fdba082645249 Mon Sep 17 00:00:00 2001 From: Claude Date: Sun, 24 May 2026 02:36:07 +0000 Subject: [PATCH 3/5] fixup: address CI feedback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Drop useless .into() in the new interpret.rs regression test (clippy). - Update filter-pushdown.slt: sub_timestamp_interval is no longer pushdownable now that it's marked non-monotone, so the expected pushdown= lines for 'timestamp - INTERVAL day' queries are gone. This is the known tradeoff for soundness — even day-only intervals share the non-monotonic annotation since the abstract interpreter can't distinguish them statically. --- src/expr/src/interpret.rs | 2 +- test/sqllogictest/filter-pushdown.slt | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/src/expr/src/interpret.rs b/src/expr/src/interpret.rs index ca1752c4e1ab8..51ad31bc9ae0b 100644 --- a/src/expr/src/interpret.rs +++ b/src/expr/src/interpret.rs @@ -1799,7 +1799,7 @@ mod tests { ) .unwrap(), )); - MirScalarExpr::Literal(Ok(row), ReprScalarType::Timestamp.nullable(false).into()) + MirScalarExpr::Literal(Ok(row), ReprScalarType::Timestamp.nullable(false)) }; let interval = |months: i32, days: i32, micros: i64| { Datum::Interval(Interval { diff --git a/test/sqllogictest/filter-pushdown.slt b/test/sqllogictest/filter-pushdown.slt index 6020ef88894ac..17418f65b5f0b 100644 --- a/test/sqllogictest/filter-pushdown.slt +++ b/test/sqllogictest/filter-pushdown.slt @@ -326,7 +326,6 @@ Explained Query: Source materialize.public.t filter=(((#1{t} - case when (#0{x} = 0) then 1 day else 2 days end) < 2023-10-02 15:55:31.918)) - pushdown=(((#1{t} - case when (#0{x} = 0) then 1 day else 2 days end) < 2023-10-02 15:55:31.918)) Target cluster: quickstart @@ -349,7 +348,6 @@ materialize.public.mv9: Source materialize.public.t filter=((mz_now() > timestamp_to_mz_timestamp((#1{t} - case when (#0{x} = 0) then 1 day else 2 days end)))) - pushdown=((mz_now() > timestamp_to_mz_timestamp((#1{t} - case when (#0{x} = 0) then 1 day else 2 days end)))) Target cluster: quickstart From 51e86ed008e1fce692e9008263db9b109b17dd4a Mon Sep 17 00:00:00 2001 From: Claude Date: Sun, 24 May 2026 03:01:43 +0000 Subject: [PATCH 4/5] expr: date_bin is non-monotone in stride MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit date_bin(stride, source) = origin + floor((source - origin) / stride) * stride. For a fixed source like 2024-01-01 12:00:00, a 1-day stride bins to 2024-01-01 00:00:00 but a 2-day stride bins to 2023-12-31 00:00:00 — i.e. the lex-larger stride produces an earlier output. Same class of bug as the timestamp/date + interval monotonicity issues. Demotes date_bin_timestamp and date_bin_timestamp_tz from (true, true) to (false, true). Still monotone in source. --- src/expr/src/interpret.rs | 67 +++++++++++++++++++++++++++++++++++++ src/expr/src/scalar/func.rs | 10 ++++-- 2 files changed, 75 insertions(+), 2 deletions(-) diff --git a/src/expr/src/interpret.rs b/src/expr/src/interpret.rs index 51ad31bc9ae0b..4857090c82bc0 100644 --- a/src/expr/src/interpret.rs +++ b/src/expr/src/interpret.rs @@ -1835,6 +1835,73 @@ mod tests { ); } + /// Regression test for `date_bin_timestamp`, which is non-monotone in the + /// `stride` argument: a larger stride can bin a source timestamp to an + /// *earlier* result than a smaller stride, because the bin alignment to + /// the unix epoch depends on the stride magnitude rather than on lex order. + #[mz_ore::test] + #[cfg_attr(miri, ignore)] + fn test_date_bin_timestamp_non_monotone() { + use chrono::NaiveDateTime; + use mz_repr::adt::interval::Interval; + use mz_repr::adt::timestamp::CheckedTimestamp; + use mz_repr::{Datum, Row}; + + let arena = RowArena::new(); + + let ts_lit = |s: &str| { + let mut row = Row::default(); + row.packer().push(Datum::Timestamp( + CheckedTimestamp::from_timestamplike( + NaiveDateTime::parse_from_str(s, "%Y-%m-%dT%H:%M:%S").unwrap(), + ) + .unwrap(), + )); + MirScalarExpr::Literal(Ok(row), ReprScalarType::Timestamp.nullable(false).into()) + }; + let interval = |months: i32, days: i32, micros: i64| { + Datum::Interval(Interval { + months, + days, + micros, + }) + }; + + // Expression: `date_bin(stride_col, 2024-01-01 12:00:00) >= 2024-01-01`. + // stride_col ranges over `[1 day, 2 days]`. + // - 1 day stride → bins to 2024-01-01 00:00:00 (satisfies `>=`). + // - 2 day stride → bins to 2023-12-31 00:00:00 (does not). + // Under the buggy `(true, true)` annotation, both endpoint evaluations + // give the same boolean answer (`>=` is monotone), so depending on which + // way the comparisons fall the interpreter could end up wrongly ruling + // out one outcome. The non-monotone fix forces a conservative + // `anything()` for the binned value. + let expr = MirScalarExpr::column(0) + .call_binary(ts_lit("2024-01-01T12:00:00"), DateBinTimestamp) + .call_binary(ts_lit("2024-01-01T00:00:00"), Gte); + + let relation = ReprRelationType::new(vec![ReprScalarType::Interval.nullable(false)]); + let mut interpreter = ColumnSpecs::new(&relation, &arena); + interpreter.push_column( + 0, + ResultSpec::value_between(interval(0, 1, 0), interval(0, 2, 0)), + ); + + let range_out = interpreter.expr(&expr).range; + // The actual data may include a 1-day stride for which the predicate + // holds, and a 2-day stride for which it doesn't, so both outcomes are + // reachable. + assert!( + range_out.may_contain(Datum::True), + "date_bin is not monotone in the stride argument; \ + interpreter must not rule out matching rows", + ); + assert!( + range_out.may_contain(Datum::False), + "interpreter must not rule out non-matching rows either", + ); + } + #[mz_ore::test] fn test_trace() { use super::Trace; diff --git a/src/expr/src/scalar/func.rs b/src/expr/src/scalar/func.rs index 0a126542d382f..09705eff50d15 100644 --- a/src/expr/src/scalar/func.rs +++ b/src/expr/src/scalar/func.rs @@ -2049,7 +2049,12 @@ where Ok(CheckedTimestamp::from_timestamplike(res)?) } -#[sqlfunc(is_monotone = "(true, true)", sqlname = "bin_unix_epoch_timestamp")] +// Non-monotone in `stride`: the result is `origin + floor((source - origin) / +// stride) * stride`. For a fixed source like `2024-01-01 12:00:00`, a 1-day +// stride bins to `2024-01-01 00:00:00`, but a 2-day stride bins to +// `2023-12-31 00:00:00` — i.e. the lex-larger interval produces an earlier +// timestamp. Monotone in `source`. +#[sqlfunc(is_monotone = "(false, true)", sqlname = "bin_unix_epoch_timestamp")] fn date_bin_timestamp( stride: Interval, source: CheckedTimestamp, @@ -2060,7 +2065,8 @@ fn date_bin_timestamp( date_bin(stride, source, origin) } -#[sqlfunc(is_monotone = "(true, true)", sqlname = "bin_unix_epoch_timestamptz")] +// See `date_bin_timestamp` for why this is not monotone in `stride`. +#[sqlfunc(is_monotone = "(false, true)", sqlname = "bin_unix_epoch_timestamptz")] fn date_bin_timestamp_tz( stride: Interval, source: CheckedTimestamp>, From 2c3116178a67a03d922f29868a7e1f7b5f779875 Mon Sep 17 00:00:00 2001 From: Dennis Felsing Date: Sun, 24 May 2026 12:59:08 +0000 Subject: [PATCH 5/5] expr: age is non-monotone in both arguments MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Postgres-style `age(a, b)` is annotated `is_monotone = "(true, true)"`, but it is non-monotone in either argument. First argument: the carry logic in the calendar-aware difference borrows a whole month's worth of days when the day field goes negative, which breaks the lex order of `Interval` (months, days, micros) at month boundaries: age(2024-03-31, 2024-02-15) = {1 month, 16 days} age(2024-04-01, 2024-02-15) = {1 month, 15 days} ← lex-smaller age(2024-05-01, 2024-02-15) = {2 months, 15 days} Second argument: after the algorithm's final sign-revert, the same day-borrow makes the result dip non-monotonically as `b` crosses a month boundary: age(2024-02-29, 2024-03-30) = {-1 month, -1 day} age(2024-02-29, 2024-03-31) = {-1 month, -2 days} ← lex-smaller age(2024-02-29, 2024-04-01) = {-1 month, -1 day} Demotes both `age_timestamp` and `age_timestamp_tz` to `(false, false)`. Adds regression tests in `interpret.rs` exercising the abstract interpreter via a `>=` predicate, mirroring the existing tests for the analogous `add_timestamp_interval` and `date_bin_timestamp` bugs. Same bug class as 5257b0d3bd and 51e86ed008. --- src/expr/src/interpret.rs | 212 ++++++++++++++++++++++++++++++++++++ src/expr/src/scalar/func.rs | 26 ++++- 2 files changed, 236 insertions(+), 2 deletions(-) diff --git a/src/expr/src/interpret.rs b/src/expr/src/interpret.rs index 4857090c82bc0..8ce2f61411586 100644 --- a/src/expr/src/interpret.rs +++ b/src/expr/src/interpret.rs @@ -1902,6 +1902,218 @@ mod tests { ); } + /// Regression test for `age_timestamp`, which is non-monotone in the first + /// argument: the Postgres-style calendar-aware age computation does not + /// respect the lex order of `Interval` results when the input crosses a + /// month boundary. + #[mz_ore::test] + #[cfg_attr(miri, ignore)] + fn test_age_timestamp_non_monotone_in_first_arg() { + use chrono::NaiveDateTime; + use mz_repr::adt::interval::Interval; + use mz_repr::adt::timestamp::CheckedTimestamp; + use mz_repr::{Datum, Row}; + + let arena = RowArena::new(); + + let ts_lit = |s: &str| { + let mut row = Row::default(); + row.packer().push(Datum::Timestamp( + CheckedTimestamp::from_timestamplike( + NaiveDateTime::parse_from_str(s, "%Y-%m-%dT%H:%M:%S").unwrap(), + ) + .unwrap(), + )); + MirScalarExpr::Literal(Ok(row), ReprScalarType::Timestamp.nullable(false).into()) + }; + let interval = |months: i32, days: i32, micros: i64| { + Datum::Interval(Interval { + months, + days, + micros, + }) + }; + + // Expression: `age(a_col, 2024-02-15) >= '1 month 16 days'`. + // a_col stats: [2024-03-31, 2024-05-01]. + // age(2024-03-31, 2024-02-15) = {1 month, 16 days} ← satisfies `>=` + // age(2024-05-01, 2024-02-15) = {2 months, 15 days} ← satisfies `>=` + // Interior a = 2024-04-01: + // age(2024-04-01, 2024-02-15) = {1 month, 15 days} ← does NOT satisfy. + // Under the buggy `(true, true)` annotation, the interpreter would + // bound the output of `age` by its endpoint values — lex-range + // `[(1m,16d), (2m,15d)]` — and conclude the `>=` predicate is always + // True, ruling out parts whose true rows would evaluate False. + let expr = MirScalarExpr::column(0) + .call_binary(ts_lit("2024-02-15T00:00:00"), AgeTimestamp) + .call_binary( + MirScalarExpr::Literal( + Ok({ + let mut row = Row::default(); + row.packer().push(interval(1, 16, 0)); + row + }), + ReprScalarType::Interval.nullable(false).into(), + ), + Gte, + ); + + let relation = ReprRelationType::new(vec![ReprScalarType::Timestamp.nullable(false)]); + let mut interpreter = ColumnSpecs::new(&relation, &arena); + let lo = { + let mut row = Row::default(); + row.packer().push(Datum::Timestamp( + CheckedTimestamp::from_timestamplike( + NaiveDateTime::parse_from_str("2024-03-31T00:00:00", "%Y-%m-%dT%H:%M:%S") + .unwrap(), + ) + .unwrap(), + )); + row + }; + let hi = { + let mut row = Row::default(); + row.packer().push(Datum::Timestamp( + CheckedTimestamp::from_timestamplike( + NaiveDateTime::parse_from_str("2024-05-01T00:00:00", "%Y-%m-%dT%H:%M:%S") + .unwrap(), + ) + .unwrap(), + )); + row + }; + interpreter.push_column( + 0, + ResultSpec::value_between(lo.unpack_first(), hi.unpack_first()), + ); + + let range_out = interpreter.expr(&expr).range; + assert!( + range_out.may_contain(Datum::False), + "age_timestamp is not monotone in the first argument; \ + interpreter must not rule out non-matching rows", + ); + } + + /// Regression test for `age_timestamp`, which is non-monotone in the + /// second argument: the result has a V-shape at `a == b` because the + /// algorithm flips sign when `a < b`, so the same interval can be produced + /// by `b` values on either side of `a`. + #[mz_ore::test] + #[cfg_attr(miri, ignore)] + fn test_age_timestamp_non_monotone_in_second_arg() { + use chrono::NaiveDateTime; + use mz_repr::adt::interval::Interval; + use mz_repr::adt::timestamp::CheckedTimestamp; + use mz_repr::{Datum, Row}; + + let arena = RowArena::new(); + + let ts_lit = |s: &str| { + let mut row = Row::default(); + row.packer().push(Datum::Timestamp( + CheckedTimestamp::from_timestamplike( + NaiveDateTime::parse_from_str(s, "%Y-%m-%dT%H:%M:%S").unwrap(), + ) + .unwrap(), + )); + MirScalarExpr::Literal(Ok(row), ReprScalarType::Timestamp.nullable(false).into()) + }; + let interval = |months: i32, days: i32, micros: i64| { + Datum::Interval(Interval { + months, + days, + micros, + }) + }; + + // Expression: `age(2024-02-29, b_col) >= '-1 month, -1 day'`. + // b_col stats: [2024-03-30, 2024-04-01]. + // age(2024-02-29, 2024-03-30) = {-1 month, -1 day, 0} ← satisfies `>=` + // age(2024-02-29, 2024-04-01) = {-1 month, -1 day, 0} ← satisfies `>=` + // Interior b = 2024-03-31: + // age(2024-02-29, 2024-03-31) = {-1 month, -2 days, 0} ← does NOT satisfy. + // The carry logic in `age` borrows a *whole month worth of days* when + // the day field goes negative, which (after the sign-revert at the + // end) makes the result dip non-monotonically as `b` crosses a month + // boundary. Both endpoints lex-coincide above the interior dip, so + // under the buggy `(true, true)` annotation the interpreter would + // conclude the predicate is always True. + let expr = ts_lit("2024-02-29T00:00:00") + .call_binary(MirScalarExpr::column(0), AgeTimestamp) + .call_binary( + MirScalarExpr::Literal( + Ok({ + let mut row = Row::default(); + row.packer().push(interval(-1, -1, 0)); + row + }), + ReprScalarType::Interval.nullable(false).into(), + ), + Gte, + ); + + let relation = ReprRelationType::new(vec![ReprScalarType::Timestamp.nullable(false)]); + let mut interpreter = ColumnSpecs::new(&relation, &arena); + let lo = { + let mut row = Row::default(); + row.packer().push(Datum::Timestamp( + CheckedTimestamp::from_timestamplike( + NaiveDateTime::parse_from_str("2024-03-30T00:00:00", "%Y-%m-%dT%H:%M:%S") + .unwrap(), + ) + .unwrap(), + )); + row + }; + let hi = { + let mut row = Row::default(); + row.packer().push(Datum::Timestamp( + CheckedTimestamp::from_timestamplike( + NaiveDateTime::parse_from_str("2024-04-01T00:00:00", "%Y-%m-%dT%H:%M:%S") + .unwrap(), + ) + .unwrap(), + )); + row + }; + interpreter.push_column( + 0, + ResultSpec::value_between(lo.unpack_first(), hi.unpack_first()), + ); + + // Sanity-check the counterexample: endpoint and interior values. + let a = CheckedTimestamp::from_timestamplike( + NaiveDateTime::parse_from_str("2024-02-29T00:00:00", "%Y-%m-%dT%H:%M:%S").unwrap(), + ) + .unwrap(); + let b_lo = CheckedTimestamp::from_timestamplike( + NaiveDateTime::parse_from_str("2024-03-30T00:00:00", "%Y-%m-%dT%H:%M:%S").unwrap(), + ) + .unwrap(); + let b_mid = CheckedTimestamp::from_timestamplike( + NaiveDateTime::parse_from_str("2024-03-31T00:00:00", "%Y-%m-%dT%H:%M:%S").unwrap(), + ) + .unwrap(); + let b_hi = CheckedTimestamp::from_timestamplike( + NaiveDateTime::parse_from_str("2024-04-01T00:00:00", "%Y-%m-%dT%H:%M:%S").unwrap(), + ) + .unwrap(); + let age_lo = a.age(&b_lo).unwrap(); + let age_mid = a.age(&b_mid).unwrap(); + let age_hi = a.age(&b_hi).unwrap(); + assert_eq!((age_lo.months, age_lo.days, age_lo.micros), (-1, -1, 0)); + assert_eq!((age_mid.months, age_mid.days, age_mid.micros), (-1, -2, 0)); + assert_eq!((age_hi.months, age_hi.days, age_hi.micros), (-1, -1, 0)); + + let range_out = interpreter.expr(&expr).range; + assert!( + range_out.may_contain(Datum::False), + "age_timestamp is not monotone in the second argument; \ + interpreter must not rule out non-matching rows", + ); + } + #[mz_ore::test] fn test_trace() { use super::Trace; diff --git a/src/expr/src/scalar/func.rs b/src/expr/src/scalar/func.rs index 09705eff50d15..2b00f497adb79 100644 --- a/src/expr/src/scalar/func.rs +++ b/src/expr/src/scalar/func.rs @@ -796,8 +796,25 @@ fn sub_numeric( } } +// `age(a, b)` is non-monotone in *both* arguments: +// +// * Lex order on `Interval` is `(months, days, micros)`, but the Postgres +// `age` algorithm independently subtracts year/month/day/... fields and +// then *borrows* across boundaries when a lower field goes negative. With +// `b = 2024-02-15` fixed: +// a = 2024-03-31 → age = {1 month, 16 days} +// a = 2024-04-01 → age = {1 month, 15 days} +// a = 2024-05-01 → age = {2 months, 15 days} +// As `a` increases past a month boundary, `months` jumps by 1 and `days` +// drops, producing a lex-smaller interval than the previous step. +// +// * Holding `a` fixed and varying `b`, the result has a V-shape at `a == b` +// (sign is flipped when `a < b`): +// a = 2024-02-15, b = 2024-02-14 → age = {0 months, 1 day} +// a = 2024-02-15, b = 2024-02-15 → age = {0 months, 0 days} +// a = 2024-02-15, b = 2024-02-16 → age = {0 months, 1 day} #[sqlfunc( - is_monotone = "(true, true)", + is_monotone = "(false, false)", output_type = "Interval", sqlname = "age", propagates_nulls = true @@ -809,7 +826,12 @@ fn age_timestamp( Ok(a.age(&b)?) } -#[sqlfunc(is_monotone = "(true, true)", sqlname = "age", propagates_nulls = true)] +// See `age_timestamp` for why this is not monotone in either argument. +#[sqlfunc( + is_monotone = "(false, false)", + sqlname = "age", + propagates_nulls = true +)] fn age_timestamp_tz( a: CheckedTimestamp>, b: CheckedTimestamp>,