diff --git a/vortex-file/src/file.rs b/vortex-file/src/file.rs index ded986f6210..b7158f00658 100644 --- a/vortex-file/src/file.rs +++ b/vortex-file/src/file.rs @@ -12,7 +12,6 @@ use std::sync::OnceLock; use itertools::Itertools; use vortex_array::ArrayRef; -use vortex_array::Columnar; use vortex_array::IntoArray; use vortex_array::VortexSessionExecute; use vortex_array::arrays::ConstantArray; @@ -244,11 +243,11 @@ impl VortexFile { ConstantArray::new(self.footer.row_count(), applied.len()).into_array(); let applied = substitute_row_count(applied, &row_count_replacement)?; + // The stats array has exactly one row, so evaluate the predicate at row 0. A null + // result means the stats cannot prove anything, so only an exact `true` prunes. let mut ctx = self.session.create_execution_ctx(); - Ok(match applied.execute::(&mut ctx)? { - Columnar::Constant(s) => s.scalar().as_bool().value() == Some(true), - Columnar::Canonical(_) => false, - }) + let pruned = applied.execute_scalar(0, &mut ctx)?; + Ok(pruned.as_bool().value() == Some(true)) } pub fn splits(&self) -> VortexResult>> { diff --git a/vortex-file/src/tests.rs b/vortex-file/src/tests.rs index 2ba10d96684..05320f3a836 100644 --- a/vortex-file/src/tests.rs +++ b/vortex-file/src/tests.rs @@ -35,6 +35,7 @@ use vortex_array::dtype::PType::I32; use vortex_array::dtype::StructFields; use vortex_array::expr::and; use vortex_array::expr::cast; +use vortex_array::expr::col; use vortex_array::expr::eq; use vortex_array::expr::get_item; use vortex_array::expr::gt; @@ -1953,3 +1954,52 @@ async fn test_segment_ordering_zonemaps_after_data() -> VortexResult<()> { Ok(()) } + +#[tokio::test] +async fn test_can_prune_composite_predicates() -> VortexResult<()> { + let array = StructArray::from_fields(&[ + ("age", buffer![15i32, 18, 22, 25].into_array()), + ("price", buffer![120i32, 130, 140, 150].into_array()), + ])? + .into_array(); + + let mut buf = ByteBufferMut::empty(); + let mut writer = SESSION + .write_options() + .with_file_statistics(PRUNING_STATS.to_vec()) + .writer(&mut buf, array.dtype().clone()); + writer.push(array).await?; + writer.finish().await?; + + let file = SESSION.open_options().open_buffer(buf)?; + + // Control: a bare comparison. + assert!( + file.can_prune(>(col("age"), lit(30)))?, + "bare gt should prune" + ); + + // eq falsification is internally or(min > lit, lit > max). + assert!( + file.can_prune(&eq(col("age"), lit(5)))?, + "eq outside min/max should prune" + ); + + // Composite and/or trees. + assert!( + file.can_prune(&and(gt(col("age"), lit(30)), lt(col("price"), lit(100))))?, + "and of falsified branches should prune" + ); + assert!( + file.can_prune(&or(gt(col("age"), lit(30)), lt(col("age"), lit(10))))?, + "or of falsified branches should prune" + ); + + // Non-prunable predicate: matches data within bounds. + assert!( + !file.can_prune(&eq(col("age"), lit(18)))?, + "eq within min/max must not prune" + ); + + Ok(()) +}