From 5257b0d3bdad5dda06fce73d3b429e79bb854762 Mon Sep 17 00:00:00 2001 From: Claude Date: Sun, 24 May 2026 02:02:34 +0000 Subject: [PATCH 1/4] expr: mark timestamp/date + interval as non-monotone MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `Interval` is lex-ordered by (months, days, micros), but adding an interval to a timestamp or date adds *calendar* months with day-clamping and then days as 24-hour periods. That arithmetic does not respect the lex order: t = 2024-01-31 i1 = {0 months, 31 days} → t + i1 = 2024-03-02 i2 = {1 month, 0 days} → t + i2 = 2024-02-29 In lex order `i1 < i2`, yet `t + i1 > t + i2`. For timestamps, the day-clamping also collapses near-boundary inputs into the same date while preserving sub-day time, so the first argument is non-monotone too: t1 = 2024-01-30 23:59:59, i = {1 month} → 2024-02-29 23:59:59 t2 = 2024-01-31 00:00:00, i = {1 month} → 2024-02-29 00:00:00 For dates the first argument *is* monotone (no sub-day precision means clamping only collapses, never reverses), but the interval argument has the same problem. These annotations are consumed by the abstract interpreter that drives persist filter pushdown. Marking these functions monotone meant the interpreter computed the output range by evaluating the function only at the endpoints of the input interval range — and a stats range like `[{0m,31d}, {1m,0d}]` would yield the narrow output range `[2024-02-29, 2024-03-02]` even though interior intervals (e.g. `{0m, 60d}`) actually produce timestamps far outside that window. Filter pushdown could then incorrectly conclude a part had no matching rows, tripping the `persist filter pushdown correctness violation!` audit in `persist_source.rs`. Companion fix to b6079939c1, which corrected the analogous annotations for `add_time_interval`, `sub_time_interval`, `mul_interval`, and `div_interval`. Fixes database-issues#9656. --- src/expr/src/interpret.rs | 72 +++++++++++++++++++++++++++++++++++++ src/expr/src/scalar/func.rs | 25 +++++++++---- 2 files changed, 91 insertions(+), 6 deletions(-) diff --git a/src/expr/src/interpret.rs b/src/expr/src/interpret.rs index 737464a9407a6..ca1752c4e1ab8 100644 --- a/src/expr/src/interpret.rs +++ b/src/expr/src/interpret.rs @@ -1763,6 +1763,78 @@ mod tests { assert!(range_out.may_contain(Datum::Null)); } + /// Regression test for database-issues#9656. + /// + /// Adding an `Interval` to a `Timestamp` is non-monotone in the interval + /// argument: the lex order of intervals (months, days, micros) does not + /// respect calendar-month arithmetic with day-clamping. The interpreter + /// must therefore not assume monotonicity, otherwise persist filter + /// pushdown can incorrectly conclude that a part has no matching rows. + #[mz_ore::test] + #[cfg_attr(miri, ignore)] + fn test_add_timestamp_interval_non_monotone() { + use chrono::NaiveDateTime; + use mz_repr::adt::interval::Interval; + use mz_repr::adt::timestamp::CheckedTimestamp; + use mz_repr::{Datum, Row}; + + let arena = RowArena::new(); + + // The setup: a timestamp literal `t = 2024-01-31 00:00:00`, and an + // interval column whose stats-range spans + // `[{0 months, 31 days, 0 us}, {1 month, 0 days, 0 us}]`. In lex order, + // the 31-day interval is the lower bound and the 1-month interval is + // the upper bound. The function values at the endpoints are: + // t + {0,31,0} = 2024-03-02 + // t + {1, 0,0} = 2024-02-29 + // But an *interior* interval like {0, 60, 0} maps to 2024-03-31, which + // lies far outside `[Feb 29, Mar 2]`. Under the (incorrect) monotone + // assumption, the interpreter would conclude the output is in that + // narrow window, and rule out predicates like `>= 2024-03-15`. + let ts_lit = |s: &str| { + let mut row = Row::default(); + row.packer().push(Datum::Timestamp( + CheckedTimestamp::from_timestamplike( + NaiveDateTime::parse_from_str(s, "%Y-%m-%dT%H:%M:%S").unwrap(), + ) + .unwrap(), + )); + MirScalarExpr::Literal(Ok(row), ReprScalarType::Timestamp.nullable(false).into()) + }; + let interval = |months: i32, days: i32, micros: i64| { + Datum::Interval(Interval { + months, + days, + micros, + }) + }; + + // Expression: `(timestamp_lit + interval_col) >= 2024-03-15`. + let expr = ts_lit("2024-01-31T00:00:00") + .call_binary(MirScalarExpr::column(0), AddTimestampInterval) + .call_binary(ts_lit("2024-03-15T00:00:00"), Gte); + + let relation = ReprRelationType::new(vec![ReprScalarType::Interval.nullable(false)]); + let mut interpreter = ColumnSpecs::new(&relation, &arena); + interpreter.push_column( + 0, + ResultSpec::value_between(interval(0, 31, 0), interval(1, 0, 0)), + ); + + let range_out = interpreter.expr(&expr).range; + // The actual data may include e.g. `{0, 60, 0}` → 2024-03-31, which + // satisfies `>= 2024-03-15`. The interpreter must admit `True` so that + // filter pushdown does not skip the part. Under the buggy + // `(true, true)` annotation, the output range would be + // `[Feb 29, Mar 2]`, all of which is `< Mar 15`, and the interpreter + // would (wrongly) admit only `False`. + assert!( + range_out.may_contain(Datum::True), + "interpreter incorrectly ruled out matching rows; \ + add_timestamp_interval is not monotone in the interval argument", + ); + } + #[mz_ore::test] fn test_trace() { use super::Trace; diff --git a/src/expr/src/scalar/func.rs b/src/expr/src/scalar/func.rs index 71c9c7bf6f5ec..0a126542d382f 100644 --- a/src/expr/src/scalar/func.rs +++ b/src/expr/src/scalar/func.rs @@ -181,7 +181,13 @@ fn add_float64(a: f64, b: f64) -> Result { } } -#[sqlfunc(is_monotone = "(true, true)", is_infix_op = true, sqlname = "+")] +// `Interval` is lex-ordered (months, days, micros), but adding an interval to a +// timestamp adds *calendar* months (with day-clamping) which does not respect +// that ordering: e.g. `i1 = {0 months, 31 days}` is lex-less than +// `i2 = {1 month, 0 days}`, but `2024-01-31 + i1 = 2024-03-02` is greater than +// `2024-01-31 + i2 = 2024-02-29`. Day-clamping plus preserved sub-day time also +// breaks monotonicity in the first argument near month boundaries. +#[sqlfunc(is_monotone = "(false, false)", is_infix_op = true, sqlname = "+")] fn add_timestamp_interval( a: CheckedTimestamp, b: Interval, @@ -189,7 +195,7 @@ fn add_timestamp_interval( add_timestamplike_interval(a, b) } -#[sqlfunc(is_monotone = "(true, true)", is_infix_op = true, sqlname = "+")] +#[sqlfunc(is_monotone = "(false, false)", is_infix_op = true, sqlname = "+")] fn add_timestamp_tz_interval( a: CheckedTimestamp>, b: Interval, @@ -212,7 +218,8 @@ where Ok(CheckedTimestamp::from_timestamplike(T::from_date_time(dt))?) } -#[sqlfunc(is_monotone = "(true, true)", is_infix_op = true, sqlname = "-")] +// See `add_timestamp_interval` for why this is not monotone. +#[sqlfunc(is_monotone = "(false, false)", is_infix_op = true, sqlname = "-")] fn sub_timestamp_interval( a: CheckedTimestamp, b: Interval, @@ -220,7 +227,7 @@ fn sub_timestamp_interval( sub_timestamplike_interval(a, b) } -#[sqlfunc(is_monotone = "(true, true)", is_infix_op = true, sqlname = "-")] +#[sqlfunc(is_monotone = "(false, false)", is_infix_op = true, sqlname = "-")] fn sub_timestamp_tz_interval( a: CheckedTimestamp>, b: Interval, @@ -249,7 +256,12 @@ fn add_date_time( Ok(CheckedTimestamp::from_timestamplike(dt)?) } -#[sqlfunc(is_monotone = "(true, true)", is_infix_op = true, sqlname = "+")] +// Monotone in `date` (dates have no sub-day component, so day-clamping at month +// boundaries only causes results to collapse, never to reverse), but not in +// `interval`: e.g. `{0 months, 31 days}` is lex-less than `{1 month, 0 days}`, +// but adding the former to `2024-01-31` gives `2024-03-02` while the latter +// gives `2024-02-29`. +#[sqlfunc(is_monotone = "(true, false)", is_infix_op = true, sqlname = "+")] fn add_date_interval( date: Date, interval: Interval, @@ -852,8 +864,9 @@ fn sub_interval(a: Interval, b: Interval) -> Result { .ok_or_else(|| EvalError::IntervalOutOfRange(format!("{a} - {b}").into())) } +// See `add_date_interval` for why this is not monotone in `interval`. #[sqlfunc( - is_monotone = "(true, true)", + is_monotone = "(true, false)", is_infix_op = true, sqlname = "-", propagates_nulls = true From 8833f13b0bcbf1b2a33f0c960f382a27ab13ddac Mon Sep 17 00:00:00 2001 From: Claude Date: Sun, 24 May 2026 02:07:01 +0000 Subject: [PATCH 2/4] parallel-workload: re-enable persist_stats_filter_enabled The override was added to suppress the audit panic from database-issues#9656; with the monotonicity annotations on timestamp/date + interval corrected, filter pushdown should be sound again. Removing this lets CI re-exercise the audit and surface any remaining latent causes. --- misc/python/materialize/parallel_workload/settings.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/misc/python/materialize/parallel_workload/settings.py b/misc/python/materialize/parallel_workload/settings.py index 39894cd0dc536..fe50ba74fc8fe 100644 --- a/misc/python/materialize/parallel_workload/settings.py +++ b/misc/python/materialize/parallel_workload/settings.py @@ -41,8 +41,6 @@ def _missing_(cls, value): ADDITIONAL_SYSTEM_PARAMETER_DEFAULTS = { # Uses a lot of memory, hard to predict how much "memory_limiter_interval": "0", - # TODO: Remove when https://github.com/MaterializeInc/database-issues/issues/9656 is fixed - "persist_stats_filter_enabled": "false", # See https://materializeinc.slack.com/archives/CTESPM7FU/p1758195280629909, should reenable when it performs better "enable_compute_logical_backpressure": "false", # Allows the `Scenario.RepeatRow` scenario to call `repeat_row`. Having From 1eebb851a519f071b428b695b35fdba082645249 Mon Sep 17 00:00:00 2001 From: Claude Date: Sun, 24 May 2026 02:36:07 +0000 Subject: [PATCH 3/4] fixup: address CI feedback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Drop useless .into() in the new interpret.rs regression test (clippy). - Update filter-pushdown.slt: sub_timestamp_interval is no longer pushdownable now that it's marked non-monotone, so the expected pushdown= lines for 'timestamp - INTERVAL day' queries are gone. This is the known tradeoff for soundness — even day-only intervals share the non-monotonic annotation since the abstract interpreter can't distinguish them statically. --- src/expr/src/interpret.rs | 2 +- test/sqllogictest/filter-pushdown.slt | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/src/expr/src/interpret.rs b/src/expr/src/interpret.rs index ca1752c4e1ab8..51ad31bc9ae0b 100644 --- a/src/expr/src/interpret.rs +++ b/src/expr/src/interpret.rs @@ -1799,7 +1799,7 @@ mod tests { ) .unwrap(), )); - MirScalarExpr::Literal(Ok(row), ReprScalarType::Timestamp.nullable(false).into()) + MirScalarExpr::Literal(Ok(row), ReprScalarType::Timestamp.nullable(false)) }; let interval = |months: i32, days: i32, micros: i64| { Datum::Interval(Interval { diff --git a/test/sqllogictest/filter-pushdown.slt b/test/sqllogictest/filter-pushdown.slt index 6020ef88894ac..17418f65b5f0b 100644 --- a/test/sqllogictest/filter-pushdown.slt +++ b/test/sqllogictest/filter-pushdown.slt @@ -326,7 +326,6 @@ Explained Query: Source materialize.public.t filter=(((#1{t} - case when (#0{x} = 0) then 1 day else 2 days end) < 2023-10-02 15:55:31.918)) - pushdown=(((#1{t} - case when (#0{x} = 0) then 1 day else 2 days end) < 2023-10-02 15:55:31.918)) Target cluster: quickstart @@ -349,7 +348,6 @@ materialize.public.mv9: Source materialize.public.t filter=((mz_now() > timestamp_to_mz_timestamp((#1{t} - case when (#0{x} = 0) then 1 day else 2 days end)))) - pushdown=((mz_now() > timestamp_to_mz_timestamp((#1{t} - case when (#0{x} = 0) then 1 day else 2 days end)))) Target cluster: quickstart From eb53ce5954ad3c87d7346c35f09c4bde7b50cf02 Mon Sep 17 00:00:00 2001 From: Claude Date: Sun, 24 May 2026 03:01:43 +0000 Subject: [PATCH 4/4] expr: date_bin is non-monotone in stride MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit date_bin(stride, source) = origin + floor((source - origin) / stride) * stride. For a fixed source like 2024-01-01 12:00:00, a 1-day stride bins to 2024-01-01 00:00:00 but a 2-day stride bins to 2023-12-31 00:00:00 — i.e. the lex-larger stride produces an earlier output. Same class of bug as the timestamp/date + interval monotonicity issues. Demotes date_bin_timestamp and date_bin_timestamp_tz from (true, true) to (false, true). Still monotone in source. --- src/expr/src/interpret.rs | 68 +++++++++++++++++++++++++++++++++++++ src/expr/src/scalar/func.rs | 10 ++++-- 2 files changed, 76 insertions(+), 2 deletions(-) diff --git a/src/expr/src/interpret.rs b/src/expr/src/interpret.rs index 51ad31bc9ae0b..7e9480500586b 100644 --- a/src/expr/src/interpret.rs +++ b/src/expr/src/interpret.rs @@ -1835,6 +1835,74 @@ mod tests { ); } + /// Regression test for `date_bin_timestamp`, which is non-monotone in the + /// `stride` argument: a larger stride can bin a source timestamp to an + /// *earlier* result than a smaller stride, because the bin alignment to + /// the unix epoch depends on the stride magnitude rather than on lex order. + #[mz_ore::test] + #[cfg_attr(miri, ignore)] + fn test_date_bin_timestamp_non_monotone() { + use chrono::NaiveDateTime; + use mz_repr::adt::interval::Interval; + use mz_repr::adt::timestamp::CheckedTimestamp; + use mz_repr::{Datum, Row}; + + let arena = RowArena::new(); + + let ts_lit = |s: &str| { + let mut row = Row::default(); + row.packer().push(Datum::Timestamp( + CheckedTimestamp::from_timestamplike( + NaiveDateTime::parse_from_str(s, "%Y-%m-%dT%H:%M:%S").unwrap(), + ) + .unwrap(), + )); + MirScalarExpr::Literal(Ok(row), ReprScalarType::Timestamp.nullable(false)) + }; + let interval = |months: i32, days: i32, micros: i64| { + Datum::Interval(Interval { + months, + days, + micros, + }) + }; + + // Expression: `date_bin(stride_col, 2024-01-01 12:00:00) > 2024-01-01 06:00:00`. + // stride_col ranges over `[1 day, 2 days]`. + // + // Endpoint evaluations: + // 1 day stride → bins to 2024-01-01 00:00:00 + // 2 day stride → bins to 2023-12-31 00:00:00 + // + // Interior strides produce results *outside* that endpoint box. For + // example, a 1.5-day stride (i.e. `{0 months, 1 day, 12 h micros}`, + // which sorts between the two endpoints in lex order) bins + // 2024-01-01 12:00:00 to exactly 2024-01-01 12:00:00 — well above the + // endpoint maximum of 2024-01-01 00:00:00. With the buggy + // `(true, true)` annotation, the interpreter narrows the output to + // `[Dec 31 00:00, Jan 1 00:00]`, both of which are `<= Jan 1 06:00`, + // so the predicate is wrongly proved `False`. With the non-monotone + // fix the output is `anything()`, so `True` is correctly admitted. + let expr = MirScalarExpr::column(0) + .call_binary(ts_lit("2024-01-01T12:00:00"), DateBinTimestamp) + .call_binary(ts_lit("2024-01-01T06:00:00"), Gt); + + let relation = ReprRelationType::new(vec![ReprScalarType::Interval.nullable(false)]); + let mut interpreter = ColumnSpecs::new(&relation, &arena); + interpreter.push_column( + 0, + ResultSpec::value_between(interval(0, 1, 0), interval(0, 2, 0)), + ); + + let range_out = interpreter.expr(&expr).range; + assert!( + range_out.may_contain(Datum::True), + "date_bin is not monotone in the stride argument; \ + interior strides can produce outputs outside the endpoint-bounded \ + box, so the interpreter must admit True for `>`-style predicates", + ); + } + #[mz_ore::test] fn test_trace() { use super::Trace; diff --git a/src/expr/src/scalar/func.rs b/src/expr/src/scalar/func.rs index 0a126542d382f..09705eff50d15 100644 --- a/src/expr/src/scalar/func.rs +++ b/src/expr/src/scalar/func.rs @@ -2049,7 +2049,12 @@ where Ok(CheckedTimestamp::from_timestamplike(res)?) } -#[sqlfunc(is_monotone = "(true, true)", sqlname = "bin_unix_epoch_timestamp")] +// Non-monotone in `stride`: the result is `origin + floor((source - origin) / +// stride) * stride`. For a fixed source like `2024-01-01 12:00:00`, a 1-day +// stride bins to `2024-01-01 00:00:00`, but a 2-day stride bins to +// `2023-12-31 00:00:00` — i.e. the lex-larger interval produces an earlier +// timestamp. Monotone in `source`. +#[sqlfunc(is_monotone = "(false, true)", sqlname = "bin_unix_epoch_timestamp")] fn date_bin_timestamp( stride: Interval, source: CheckedTimestamp, @@ -2060,7 +2065,8 @@ fn date_bin_timestamp( date_bin(stride, source, origin) } -#[sqlfunc(is_monotone = "(true, true)", sqlname = "bin_unix_epoch_timestamptz")] +// See `date_bin_timestamp` for why this is not monotone in `stride`. +#[sqlfunc(is_monotone = "(false, true)", sqlname = "bin_unix_epoch_timestamptz")] fn date_bin_timestamp_tz( stride: Interval, source: CheckedTimestamp>,