Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions misc/python/materialize/parallel_workload/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,6 @@ def _missing_(cls, value):
ADDITIONAL_SYSTEM_PARAMETER_DEFAULTS = {
# Uses a lot of memory, hard to predict how much
"memory_limiter_interval": "0",
# TODO: Remove when https://github.com/MaterializeInc/database-issues/issues/9656 is fixed
"persist_stats_filter_enabled": "false",
# See https://materializeinc.slack.com/archives/CTESPM7FU/p1758195280629909, should reenable when it performs better
"enable_compute_logical_backpressure": "false",
# Allows the `Scenario.RepeatRow` scenario to call `repeat_row`. Having
Expand Down
351 changes: 351 additions & 0 deletions src/expr/src/interpret.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1763,6 +1763,357 @@ mod tests {
assert!(range_out.may_contain(Datum::Null));
}

/// Regression test for database-issues#9656.
///
/// Adding an `Interval` to a `Timestamp` is non-monotone in the interval
/// argument: the lex order of intervals (months, days, micros) does not
/// respect calendar-month arithmetic with day-clamping. The interpreter
/// must therefore not assume monotonicity, otherwise persist filter
/// pushdown can incorrectly conclude that a part has no matching rows.
#[mz_ore::test]
#[cfg_attr(miri, ignore)]
fn test_add_timestamp_interval_non_monotone() {
use chrono::NaiveDateTime;
use mz_repr::adt::interval::Interval;
use mz_repr::adt::timestamp::CheckedTimestamp;
use mz_repr::{Datum, Row};

let arena = RowArena::new();

// The setup: a timestamp literal `t = 2024-01-31 00:00:00`, and an
// interval column whose stats-range spans
// `[{0 months, 31 days, 0 us}, {1 month, 0 days, 0 us}]`. In lex order,
// the 31-day interval is the lower bound and the 1-month interval is
// the upper bound. The function values at the endpoints are:
// t + {0,31,0} = 2024-03-02
// t + {1, 0,0} = 2024-02-29
// But an *interior* interval like {0, 60, 0} maps to 2024-03-31, which
// lies far outside `[Feb 29, Mar 2]`. Under the (incorrect) monotone
// assumption, the interpreter would conclude the output is in that
// narrow window, and rule out predicates like `>= 2024-03-15`.
let ts_lit = |s: &str| {
let mut row = Row::default();
row.packer().push(Datum::Timestamp(
CheckedTimestamp::from_timestamplike(
NaiveDateTime::parse_from_str(s, "%Y-%m-%dT%H:%M:%S").unwrap(),
)
.unwrap(),
));
MirScalarExpr::Literal(Ok(row), ReprScalarType::Timestamp.nullable(false))
};
let interval = |months: i32, days: i32, micros: i64| {
Datum::Interval(Interval {
months,
days,
micros,
})
};

// Expression: `(timestamp_lit + interval_col) >= 2024-03-15`.
let expr = ts_lit("2024-01-31T00:00:00")
.call_binary(MirScalarExpr::column(0), AddTimestampInterval)
.call_binary(ts_lit("2024-03-15T00:00:00"), Gte);

let relation = ReprRelationType::new(vec![ReprScalarType::Interval.nullable(false)]);
let mut interpreter = ColumnSpecs::new(&relation, &arena);
interpreter.push_column(
0,
ResultSpec::value_between(interval(0, 31, 0), interval(1, 0, 0)),
);

let range_out = interpreter.expr(&expr).range;
// The actual data may include e.g. `{0, 60, 0}` → 2024-03-31, which
// satisfies `>= 2024-03-15`. The interpreter must admit `True` so that
// filter pushdown does not skip the part. Under the buggy
// `(true, true)` annotation, the output range would be
// `[Feb 29, Mar 2]`, all of which is `< Mar 15`, and the interpreter
// would (wrongly) admit only `False`.
assert!(
range_out.may_contain(Datum::True),
"interpreter incorrectly ruled out matching rows; \
add_timestamp_interval is not monotone in the interval argument",
);
}

/// Regression test for `date_bin_timestamp`, which is non-monotone in the
/// `stride` argument: a larger stride can bin a source timestamp to an
/// *earlier* result than a smaller stride, because the bin alignment to
/// the unix epoch depends on the stride magnitude rather than on lex order.
#[mz_ore::test]
#[cfg_attr(miri, ignore)]
fn test_date_bin_timestamp_non_monotone() {
use chrono::NaiveDateTime;
use mz_repr::adt::interval::Interval;
use mz_repr::adt::timestamp::CheckedTimestamp;
use mz_repr::{Datum, Row};

let arena = RowArena::new();

let ts_lit = |s: &str| {
let mut row = Row::default();
row.packer().push(Datum::Timestamp(
CheckedTimestamp::from_timestamplike(
NaiveDateTime::parse_from_str(s, "%Y-%m-%dT%H:%M:%S").unwrap(),
)
.unwrap(),
));
MirScalarExpr::Literal(Ok(row), ReprScalarType::Timestamp.nullable(false).into())
};
let interval = |months: i32, days: i32, micros: i64| {
Datum::Interval(Interval {
months,
days,
micros,
})
};

// Expression: `date_bin(stride_col, 2024-01-01 12:00:00) >= 2024-01-01`.
// stride_col ranges over `[1 day, 2 days]`.
// - 1 day stride → bins to 2024-01-01 00:00:00 (satisfies `>=`).
// - 2 day stride → bins to 2023-12-31 00:00:00 (does not).
// Under the buggy `(true, true)` annotation, both endpoint evaluations
// give the same boolean answer (`>=` is monotone), so depending on which
// way the comparisons fall the interpreter could end up wrongly ruling
// out one outcome. The non-monotone fix forces a conservative
// `anything()` for the binned value.
let expr = MirScalarExpr::column(0)
.call_binary(ts_lit("2024-01-01T12:00:00"), DateBinTimestamp)
.call_binary(ts_lit("2024-01-01T00:00:00"), Gte);

let relation = ReprRelationType::new(vec![ReprScalarType::Interval.nullable(false)]);
let mut interpreter = ColumnSpecs::new(&relation, &arena);
interpreter.push_column(
0,
ResultSpec::value_between(interval(0, 1, 0), interval(0, 2, 0)),
);

let range_out = interpreter.expr(&expr).range;
// The actual data may include a 1-day stride for which the predicate
// holds, and a 2-day stride for which it doesn't, so both outcomes are
// reachable.
assert!(
range_out.may_contain(Datum::True),
"date_bin is not monotone in the stride argument; \
interpreter must not rule out matching rows",
);
assert!(
range_out.may_contain(Datum::False),
"interpreter must not rule out non-matching rows either",
);
}

/// Regression test for `age_timestamp`, which is non-monotone in the first
/// argument: the Postgres-style calendar-aware age computation does not
/// respect the lex order of `Interval` results when the input crosses a
/// month boundary.
#[mz_ore::test]
#[cfg_attr(miri, ignore)]
fn test_age_timestamp_non_monotone_in_first_arg() {
use chrono::NaiveDateTime;
use mz_repr::adt::interval::Interval;
use mz_repr::adt::timestamp::CheckedTimestamp;
use mz_repr::{Datum, Row};

let arena = RowArena::new();

let ts_lit = |s: &str| {
let mut row = Row::default();
row.packer().push(Datum::Timestamp(
CheckedTimestamp::from_timestamplike(
NaiveDateTime::parse_from_str(s, "%Y-%m-%dT%H:%M:%S").unwrap(),
)
.unwrap(),
));
MirScalarExpr::Literal(Ok(row), ReprScalarType::Timestamp.nullable(false).into())
};
let interval = |months: i32, days: i32, micros: i64| {
Datum::Interval(Interval {
months,
days,
micros,
})
};

// Expression: `age(a_col, 2024-02-15) >= '1 month 16 days'`.
// a_col stats: [2024-03-31, 2024-05-01].
// age(2024-03-31, 2024-02-15) = {1 month, 16 days} ← satisfies `>=`
// age(2024-05-01, 2024-02-15) = {2 months, 15 days} ← satisfies `>=`
// Interior a = 2024-04-01:
// age(2024-04-01, 2024-02-15) = {1 month, 15 days} ← does NOT satisfy.
// Under the buggy `(true, true)` annotation, the interpreter would
// bound the output of `age` by its endpoint values — lex-range
// `[(1m,16d), (2m,15d)]` — and conclude the `>=` predicate is always
// True, ruling out parts whose true rows would evaluate False.
let expr = MirScalarExpr::column(0)
.call_binary(ts_lit("2024-02-15T00:00:00"), AgeTimestamp)
.call_binary(
MirScalarExpr::Literal(
Ok({
let mut row = Row::default();
row.packer().push(interval(1, 16, 0));
row
}),
ReprScalarType::Interval.nullable(false).into(),
),
Gte,
);

let relation = ReprRelationType::new(vec![ReprScalarType::Timestamp.nullable(false)]);
let mut interpreter = ColumnSpecs::new(&relation, &arena);
let lo = {
let mut row = Row::default();
row.packer().push(Datum::Timestamp(
CheckedTimestamp::from_timestamplike(
NaiveDateTime::parse_from_str("2024-03-31T00:00:00", "%Y-%m-%dT%H:%M:%S")
.unwrap(),
)
.unwrap(),
));
row
};
let hi = {
let mut row = Row::default();
row.packer().push(Datum::Timestamp(
CheckedTimestamp::from_timestamplike(
NaiveDateTime::parse_from_str("2024-05-01T00:00:00", "%Y-%m-%dT%H:%M:%S")
.unwrap(),
)
.unwrap(),
));
row
};
interpreter.push_column(
0,
ResultSpec::value_between(lo.unpack_first(), hi.unpack_first()),
);

let range_out = interpreter.expr(&expr).range;
assert!(
range_out.may_contain(Datum::False),
"age_timestamp is not monotone in the first argument; \
interpreter must not rule out non-matching rows",
);
}

/// Regression test for `age_timestamp`, which is non-monotone in the
/// second argument: the result has a V-shape at `a == b` because the
/// algorithm flips sign when `a < b`, so the same interval can be produced
/// by `b` values on either side of `a`.
#[mz_ore::test]
#[cfg_attr(miri, ignore)]
fn test_age_timestamp_non_monotone_in_second_arg() {
use chrono::NaiveDateTime;
use mz_repr::adt::interval::Interval;
use mz_repr::adt::timestamp::CheckedTimestamp;
use mz_repr::{Datum, Row};

let arena = RowArena::new();

let ts_lit = |s: &str| {
let mut row = Row::default();
row.packer().push(Datum::Timestamp(
CheckedTimestamp::from_timestamplike(
NaiveDateTime::parse_from_str(s, "%Y-%m-%dT%H:%M:%S").unwrap(),
)
.unwrap(),
));
MirScalarExpr::Literal(Ok(row), ReprScalarType::Timestamp.nullable(false).into())
};
let interval = |months: i32, days: i32, micros: i64| {
Datum::Interval(Interval {
months,
days,
micros,
})
};

// Expression: `age(2024-02-29, b_col) >= '-1 month, -1 day'`.
// b_col stats: [2024-03-30, 2024-04-01].
// age(2024-02-29, 2024-03-30) = {-1 month, -1 day, 0} ← satisfies `>=`
// age(2024-02-29, 2024-04-01) = {-1 month, -1 day, 0} ← satisfies `>=`
// Interior b = 2024-03-31:
// age(2024-02-29, 2024-03-31) = {-1 month, -2 days, 0} ← does NOT satisfy.
// The carry logic in `age` borrows a *whole month worth of days* when
// the day field goes negative, which (after the sign-revert at the
// end) makes the result dip non-monotonically as `b` crosses a month
// boundary. Both endpoints lex-coincide above the interior dip, so
// under the buggy `(true, true)` annotation the interpreter would
// conclude the predicate is always True.
let expr = ts_lit("2024-02-29T00:00:00")
.call_binary(MirScalarExpr::column(0), AgeTimestamp)
.call_binary(
MirScalarExpr::Literal(
Ok({
let mut row = Row::default();
row.packer().push(interval(-1, -1, 0));
row
}),
ReprScalarType::Interval.nullable(false).into(),
),
Gte,
);

let relation = ReprRelationType::new(vec![ReprScalarType::Timestamp.nullable(false)]);
let mut interpreter = ColumnSpecs::new(&relation, &arena);
let lo = {
let mut row = Row::default();
row.packer().push(Datum::Timestamp(
CheckedTimestamp::from_timestamplike(
NaiveDateTime::parse_from_str("2024-03-30T00:00:00", "%Y-%m-%dT%H:%M:%S")
.unwrap(),
)
.unwrap(),
));
row
};
let hi = {
let mut row = Row::default();
row.packer().push(Datum::Timestamp(
CheckedTimestamp::from_timestamplike(
NaiveDateTime::parse_from_str("2024-04-01T00:00:00", "%Y-%m-%dT%H:%M:%S")
.unwrap(),
)
.unwrap(),
));
row
};
interpreter.push_column(
0,
ResultSpec::value_between(lo.unpack_first(), hi.unpack_first()),
);

// Sanity-check the counterexample: endpoint and interior values.
let a = CheckedTimestamp::from_timestamplike(
NaiveDateTime::parse_from_str("2024-02-29T00:00:00", "%Y-%m-%dT%H:%M:%S").unwrap(),
)
.unwrap();
let b_lo = CheckedTimestamp::from_timestamplike(
NaiveDateTime::parse_from_str("2024-03-30T00:00:00", "%Y-%m-%dT%H:%M:%S").unwrap(),
)
.unwrap();
let b_mid = CheckedTimestamp::from_timestamplike(
NaiveDateTime::parse_from_str("2024-03-31T00:00:00", "%Y-%m-%dT%H:%M:%S").unwrap(),
)
.unwrap();
let b_hi = CheckedTimestamp::from_timestamplike(
NaiveDateTime::parse_from_str("2024-04-01T00:00:00", "%Y-%m-%dT%H:%M:%S").unwrap(),
)
.unwrap();
let age_lo = a.age(&b_lo).unwrap();
let age_mid = a.age(&b_mid).unwrap();
let age_hi = a.age(&b_hi).unwrap();
assert_eq!((age_lo.months, age_lo.days, age_lo.micros), (-1, -1, 0));
assert_eq!((age_mid.months, age_mid.days, age_mid.micros), (-1, -2, 0));
assert_eq!((age_hi.months, age_hi.days, age_hi.micros), (-1, -1, 0));

let range_out = interpreter.expr(&expr).range;
assert!(
range_out.may_contain(Datum::False),
"age_timestamp is not monotone in the second argument; \
interpreter must not rule out non-matching rows",
);
}

#[mz_ore::test]
fn test_trace() {
use super::Trace;
Expand Down
Loading
Loading