From cc8246e7e93610dfa2918091b7a12937e01c2368 Mon Sep 17 00:00:00 2001 From: Thomas Santerre Date: Thu, 11 Jun 2026 11:16:56 -0400 Subject: [PATCH 1/2] fix[file]: read the one-row pruning result in can_prune instead of requiring a constant MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit #7575 removed bottom-up constant-folding, so composite falsifications — and/or trees, and the or(min > lit, lit > max) that eq expands to — now execute to a one-row Canonical instead of folding to a Columnar::Constant. can_prune only read the folded case and mapped Canonical to false, so file-level pruning silently stopped for every and/or and eq predicate; bare gt/lt still fold, so they were unaffected. Nothing caught it because can_prune has no in-repo callers — the scan path prunes through FileStatsLayoutReader::evaluate_file_stats. Mirror that reader's read-out (execute to Canonical, take row 0) and add a regression test for bare, and/or, and eq predicates. Signed-off-by: Thomas Santerre --- vortex-file/src/file.rs | 12 +++++++----- vortex-file/src/tests.rs | 37 +++++++++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+), 5 deletions(-) diff --git a/vortex-file/src/file.rs b/vortex-file/src/file.rs index ded986f6210..b1d97391750 100644 --- a/vortex-file/src/file.rs +++ b/vortex-file/src/file.rs @@ -12,7 +12,7 @@ use std::sync::OnceLock; use itertools::Itertools; use vortex_array::ArrayRef; -use vortex_array::Columnar; +use vortex_array::Canonical; use vortex_array::IntoArray; use vortex_array::VortexSessionExecute; use vortex_array::arrays::ConstantArray; @@ -245,10 +245,12 @@ impl VortexFile { let applied = substitute_row_count(applied, &row_count_replacement)?; let mut ctx = self.session.create_execution_ctx(); - Ok(match applied.execute::(&mut ctx)? { - Columnar::Constant(s) => s.scalar().as_bool().value() == Some(true), - Columnar::Canonical(_) => false, - }) + let result = applied + .execute::(&mut ctx)? + .into_bool() + .into_array() + .execute_scalar(0, &mut ctx)?; + Ok(result.as_bool().value() == Some(true)) } pub fn splits(&self) -> VortexResult>> { diff --git a/vortex-file/src/tests.rs b/vortex-file/src/tests.rs index 2ba10d96684..e320cf2e9d9 100644 --- a/vortex-file/src/tests.rs +++ b/vortex-file/src/tests.rs @@ -35,6 +35,7 @@ use vortex_array::dtype::PType::I32; use vortex_array::dtype::StructFields; use vortex_array::expr::and; use vortex_array::expr::cast; +use vortex_array::expr::col; use vortex_array::expr::eq; use vortex_array::expr::get_item; use vortex_array::expr::gt; @@ -1953,3 +1954,39 @@ async fn test_segment_ordering_zonemaps_after_data() -> VortexResult<()> { Ok(()) } + +#[tokio::test] +#[cfg_attr(miri, ignore)] +async fn test_can_prune_composite_predicates() -> VortexResult<()> { + // Regression test for `can_prune` after `ScalarFnConstantRule` was removed + // (#7575): composite falsification trees no longer constant-fold during + // execution, so `can_prune` must read the one-row evaluated result instead + // of requiring a `Columnar::Constant`. `Eq` is affected too: its + // falsification is internally `or(min > lit, lit > max)`. + let st = StructArray::from_fields(&[ + ("age", buffer![15i32, 18, 22, 25].into_array()), + ("price", buffer![120i32, 130, 140, 150].into_array()), + ])?; + let mut buf = ByteBufferMut::empty(); + SESSION + .write_options() + .write(&mut buf, st.into_array().to_array_stream()) + .await?; + let file = SESSION.open_options().open_buffer(buf)?; + + // Bare comparisons: falsified directly by min/max stats. + assert!(file.can_prune(>(col("age"), lit(30)))?); + assert!(file.can_prune(<(col("price"), lit(100)))?); + + // Composite predicates whose falsifications are boolean trees. + assert!(file.can_prune(&and(gt(col("age"), lit(30)), lt(col("price"), lit(100))))?); + assert!(file.can_prune(&or(gt(col("age"), lit(30)), lt(col("age"), lit(10))))?); + assert!(file.can_prune(&eq(col("age"), lit(5)))?); + + // Non-falsifiable controls: rows may match, so pruning must refuse. + assert!(!file.can_prune(>(col("age"), lit(20)))?); + assert!(!file.can_prune(&eq(col("age"), lit(18)))?); + assert!(!file.can_prune(&and(gt(col("age"), lit(20)), gt(col("price"), lit(100))))?); + + Ok(()) +} From 46623180ea914226eedbbbc00d2a8e8cce7fe56a Mon Sep 17 00:00:00 2001 From: Thomas Santerre Date: Thu, 11 Jun 2026 12:08:36 -0400 Subject: [PATCH 2/2] fix[file]: keep the Columnar constant fast path, read row 0 in the Canonical arm Address review from #8369: retain the Columnar::Constant fast path and read the row-0 scalar directly in the Canonical arm, dropping the redundant into_bool round-trip. Signed-off-by: Thomas Santerre --- vortex-file/src/file.rs | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/vortex-file/src/file.rs b/vortex-file/src/file.rs index b1d97391750..23e2114b1c3 100644 --- a/vortex-file/src/file.rs +++ b/vortex-file/src/file.rs @@ -12,7 +12,7 @@ use std::sync::OnceLock; use itertools::Itertools; use vortex_array::ArrayRef; -use vortex_array::Canonical; +use vortex_array::Columnar; use vortex_array::IntoArray; use vortex_array::VortexSessionExecute; use vortex_array::arrays::ConstantArray; @@ -245,12 +245,16 @@ impl VortexFile { let applied = substitute_row_count(applied, &row_count_replacement)?; let mut ctx = self.session.create_execution_ctx(); - let result = applied - .execute::(&mut ctx)? - .into_bool() - .into_array() - .execute_scalar(0, &mut ctx)?; - Ok(result.as_bool().value() == Some(true)) + Ok(match applied.execute::(&mut ctx)? { + Columnar::Constant(s) => s.scalar().as_bool().value() == Some(true), + Columnar::Canonical(c) => { + c.into_array() + .execute_scalar(0, &mut ctx)? + .as_bool() + .value() + == Some(true) + } + }) } pub fn splits(&self) -> VortexResult>> {