From 0bba84e30010988048e2a33af17269319b5eb2e5 Mon Sep 17 00:00:00 2001 From: Claude Date: Fri, 5 Jun 2026 16:37:20 +0000 Subject: [PATCH 1/3] perf: branchless boolean zip kernel Add a dedicated `ZipKernel for Bool` that blends the two value bitmaps with the mask in a single bitwise pass -- `(true & mask) | (false & !mask)` -- instead of the generic per-run builder, so boolean zips are branch-free and mask-shape independent. Also add a shared `zip_validity` helper to the zip module that builds the result validity as a (lazy) zip over the two boolean validity bitmaps, reusing this kernel. This gives the per-encoding zip kernels one shared validity-selection path; the recursion terminates immediately because validity bitmaps are non-nullable. Adds a small `bool_zip` divan benchmark (nonnull ~7us, nullable ~14us). Signed-off-by: Joe Isaacs --- vortex-array/Cargo.toml | 4 + vortex-array/benches/bool_zip.rs | 65 +++++++++ vortex-array/src/arrays/bool/compute/mod.rs | 1 + vortex-array/src/arrays/bool/compute/zip.rs | 131 ++++++++++++++++++ vortex-array/src/arrays/bool/vtable/kernel.rs | 2 + vortex-array/src/scalar_fn/fns/zip/mod.rs | 29 ++++ 6 files changed, 232 insertions(+) create mode 100644 vortex-array/benches/bool_zip.rs create mode 100644 vortex-array/src/arrays/bool/compute/zip.rs diff --git a/vortex-array/Cargo.toml b/vortex-array/Cargo.toml index d5abd7bef44..ac0024c2631 100644 --- a/vortex-array/Cargo.toml +++ b/vortex-array/Cargo.toml @@ -188,6 +188,10 @@ harness = false name = "varbinview_zip" harness = false +[[bench]] +name = "bool_zip" +harness = false + [[bench]] name = "take_primitive" harness = false diff --git a/vortex-array/benches/bool_zip.rs b/vortex-array/benches/bool_zip.rs new file mode 100644 index 00000000000..c537d7b074f --- /dev/null +++ b/vortex-array/benches/bool_zip.rs @@ -0,0 +1,65 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +#![expect(clippy::unwrap_used)] + +use divan::Bencher; +use vortex_array::ArrayRef; +use vortex_array::IntoArray; +use vortex_array::LEGACY_SESSION; +use vortex_array::RecursiveCanonical; +use vortex_array::VortexSessionExecute; +use vortex_array::arrays::BoolArray; +use vortex_array::builtins::ArrayBuiltins; +use vortex_mask::Mask; + +fn main() { + divan::main(); +} + +const LEN: usize = 65_536; + +/// Fragmented (alternating) mask: the worst case for the generic per-run builder this kernel +/// replaces. The branchless bitmap blend is mask-shape-independent, so one shape suffices. +fn mask() -> Mask { + Mask::from_iter((0..LEN).map(|i| i.is_multiple_of(2))) +} + +#[divan::bench] +fn nonnull(bencher: Bencher) { + let if_true = BoolArray::from_iter((0..LEN).map(|i| i.is_multiple_of(2))).into_array(); + let if_false = BoolArray::from_iter((0..LEN).map(|i| i.is_multiple_of(3))).into_array(); + run(bencher, if_true, if_false); +} + +#[divan::bench] +fn nullable(bencher: Bencher) { + let if_true = BoolArray::from_iter( + (0..LEN).map(|i| (!i.is_multiple_of(7)).then_some(i.is_multiple_of(2))), + ) + .into_array(); + let if_false = BoolArray::from_iter( + (0..LEN).map(|i| (!i.is_multiple_of(5)).then_some(i.is_multiple_of(3))), + ) + .into_array(); + run(bencher, if_true, if_false); +} + +fn run(bencher: Bencher, if_true: ArrayRef, if_false: ArrayRef) { + let mask = mask(); + bencher + .with_inputs(|| { + ( + if_true.clone(), + if_false.clone(), + mask.clone().into_array(), + LEGACY_SESSION.create_execution_ctx(), + ) + }) + .bench_refs(|(t, f, m, ctx)| { + m.zip(t.clone(), f.clone()) + .unwrap() + .execute::(ctx) + .unwrap(); + }); +} diff --git a/vortex-array/src/arrays/bool/compute/mod.rs b/vortex-array/src/arrays/bool/compute/mod.rs index 9649bd31d6c..9b71578cd84 100644 --- a/vortex-array/src/arrays/bool/compute/mod.rs +++ b/vortex-array/src/arrays/bool/compute/mod.rs @@ -8,6 +8,7 @@ mod mask; pub mod rules; mod slice; mod take; +mod zip; #[cfg(test)] mod tests { diff --git a/vortex-array/src/arrays/bool/compute/zip.rs b/vortex-array/src/arrays/bool/compute/zip.rs new file mode 100644 index 00000000000..68d18d392ea --- /dev/null +++ b/vortex-array/src/arrays/bool/compute/zip.rs @@ -0,0 +1,131 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use vortex_error::VortexResult; +use vortex_mask::Mask; + +use crate::ArrayRef; +use crate::ExecutionCtx; +use crate::IntoArray; +use crate::array::ArrayView; +use crate::arrays::Bool; +use crate::arrays::BoolArray; +use crate::arrays::bool::BoolArrayExt; +use crate::scalar_fn::fns::zip::ZipKernel; +use crate::scalar_fn::fns::zip::zip_validity; + +/// A branchless boolean zip kernel that blends the two value bitmaps with the mask in one pass. +/// +/// Booleans are bit-packed, so selecting `if_true` where the mask is set and `if_false` where it is +/// not is a single bitwise blend over the packed words — `(true & mask) | (false & !mask)` — instead +/// of the generic per-run builder. Validity is combined with [`zip_validity`], which itself reuses +/// this kernel (terminating immediately, since validity bitmaps are non-nullable). +impl ZipKernel for Bool { + fn zip( + if_true: ArrayView<'_, Bool>, + if_false: &ArrayRef, + mask: &ArrayRef, + ctx: &mut ExecutionCtx, + ) -> VortexResult> { + let Some(if_false) = if_false.as_opt::() else { + return Ok(None); + }; + + // Null mask entries select `if_false`, matching `Zip`'s SQL ELSE semantics. + let mask = mask.try_to_mask_fill_null_false(ctx)?; + let mask_values = match &mask { + // Defer trivial masks to the generic zip, which just casts the surviving side. + Mask::AllTrue(_) | Mask::AllFalse(_) => return Ok(None), + Mask::Values(values) => values, + }; + let mask_bits = mask_values.bit_buffer(); + + // Branchless blend of the packed value bits: `(true & mask) | (false & !mask)`. + let true_bits = if_true.to_bit_buffer(); + let false_bits = if_false.to_bit_buffer(); + let values = (&true_bits & mask_bits) | false_bits.bitand_not(mask_bits); + + let validity = zip_validity(if_true.validity()?, if_false.validity()?, &mask)?; + + Ok(Some(BoolArray::new(values, validity).into_array())) + } +} + +#[cfg(test)] +mod tests { + use vortex_error::VortexResult; + use vortex_mask::Mask; + + use crate::ArrayRef; + use crate::IntoArray; + use crate::LEGACY_SESSION; + use crate::VortexSessionExecute; + use crate::arrays::Bool; + use crate::arrays::BoolArray; + use crate::assert_arrays_eq; + use crate::builtins::ArrayBuiltins; + + /// Blend two non-nullable bool arrays across the 64-bit mask chunk boundary + remainder. + #[test] + fn zip_nonnull_spans_mask_chunks() -> VortexResult<()> { + let len = 150usize; + let if_true = BoolArray::from_iter((0..len).map(|i| i.is_multiple_of(2))).into_array(); + let if_false = BoolArray::from_iter((0..len).map(|i| i.is_multiple_of(3))).into_array(); + + let bits: Vec = (0..len).map(|i| i.is_multiple_of(5) || i == 64).collect(); + let mask = Mask::from_iter(bits.iter().copied()); + + let mut ctx = LEGACY_SESSION.create_execution_ctx(); + let result = mask + .into_array() + .zip(if_true, if_false)? + .execute::(&mut ctx)?; + assert!(result.is::()); + + let expected = BoolArray::from_iter((0..len).map(|i| { + if bits[i] { + i.is_multiple_of(2) + } else { + i.is_multiple_of(3) + } + })) + .into_array(); + assert_arrays_eq!(result, expected); + Ok(()) + } + + /// With `Validity::Array` on both sides, select values and validity from the chosen side. + #[test] + fn zip_nullable_selects_values_and_validity() -> VortexResult<()> { + let len = 130usize; + let if_true = BoolArray::from_iter( + (0..len).map(|i| (!i.is_multiple_of(4)).then_some(i.is_multiple_of(2))), + ) + .into_array(); + let if_false = BoolArray::from_iter( + (0..len).map(|i| (!i.is_multiple_of(5)).then_some(i.is_multiple_of(3))), + ) + .into_array(); + + let bits: Vec = (0..len).map(|i| i.is_multiple_of(2)).collect(); + let mask = Mask::from_iter(bits.iter().copied()); + + let mut ctx = LEGACY_SESSION.create_execution_ctx(); + let result = mask + .into_array() + .zip(if_true, if_false)? + .execute::(&mut ctx)?; + assert!(result.is::()); + + let expected = BoolArray::from_iter((0..len).map(|i| { + if bits[i] { + (!i.is_multiple_of(4)).then_some(i.is_multiple_of(2)) + } else { + (!i.is_multiple_of(5)).then_some(i.is_multiple_of(3)) + } + })) + .into_array(); + assert_arrays_eq!(result, expected); + Ok(()) + } +} diff --git a/vortex-array/src/arrays/bool/vtable/kernel.rs b/vortex-array/src/arrays/bool/vtable/kernel.rs index 4f1047a5132..2fe2d974a8f 100644 --- a/vortex-array/src/arrays/bool/vtable/kernel.rs +++ b/vortex-array/src/arrays/bool/vtable/kernel.rs @@ -6,9 +6,11 @@ use crate::arrays::dict::TakeExecuteAdaptor; use crate::kernel::ParentKernelSet; use crate::scalar_fn::fns::cast::CastExecuteAdaptor; use crate::scalar_fn::fns::fill_null::FillNullExecuteAdaptor; +use crate::scalar_fn::fns::zip::ZipExecuteAdaptor; pub(super) const PARENT_KERNELS: ParentKernelSet = ParentKernelSet::new(&[ ParentKernelSet::lift(&CastExecuteAdaptor(Bool)), ParentKernelSet::lift(&FillNullExecuteAdaptor(Bool)), ParentKernelSet::lift(&TakeExecuteAdaptor(Bool)), + ParentKernelSet::lift(&ZipExecuteAdaptor(Bool)), ]); diff --git a/vortex-array/src/scalar_fn/fns/zip/mod.rs b/vortex-array/src/scalar_fn/fns/zip/mod.rs index 3a558ea44f8..8a3e1b5bbc6 100644 --- a/vortex-array/src/scalar_fn/fns/zip/mod.rs +++ b/vortex-array/src/scalar_fn/fns/zip/mod.rs @@ -32,6 +32,7 @@ use crate::scalar_fn::ScalarFnId; use crate::scalar_fn::ScalarFnVTable; use crate::scalar_fn::SimplifyCtx; use crate::scalar_fn::fns::literal::Literal; +use crate::validity::Validity; /// An expression that conditionally selects between two arrays based on a boolean mask. /// @@ -236,6 +237,34 @@ fn zip_impl_with_builder( Ok(builder.finish()) } +/// Combine two validities for a row-wise zip: take `if_true`'s validity where `mask` is set and +/// `if_false`'s where it is not. +/// +/// That selection is itself a zip over the two boolean validity bitmaps, so it is built as a (lazy) +/// zip array — reusing the zip machinery rather than re-deriving the mask algebra. Trivial cases +/// where both sides' validity already agrees skip the zip. `mask` must already be null-filled so the +/// selection matches the accompanying value selection. Shared by the per-encoding zip kernels (e.g. +/// `Bool`, `Primitive`) that build their result directly. +pub(crate) fn zip_validity( + if_true: Validity, + if_false: Validity, + mask: &Mask, +) -> VortexResult { + match (&if_true, &if_false) { + (Validity::NonNullable, Validity::NonNullable) => return Ok(Validity::NonNullable), + (Validity::AllValid, Validity::AllValid) => return Ok(Validity::AllValid), + (Validity::AllInvalid, Validity::AllInvalid) => return Ok(Validity::AllInvalid), + _ => {} + } + + let len = mask.len(); + let validity = mask + .clone() + .into_array() + .zip(if_true.to_array(len), if_false.to_array(len))?; + Ok(Validity::Array(validity)) +} + #[cfg(test)] mod tests { use arrow_array::cast::AsArray; From 7ad4b18a60a71cb03ea79853f6ad9265ef6dfafa Mon Sep 17 00:00:00 2001 From: Claude Date: Fri, 5 Jun 2026 16:41:41 +0000 Subject: [PATCH 2/3] Avoid private intra-doc link to zip_validity in bool zip kernel The doc comment on the public ZipKernel impl linked to the pub(crate) zip_validity, which -D rustdoc::private-intra-doc-links rejects. Use a plain code span instead. Signed-off-by: Joe Isaacs --- vortex-array/src/arrays/bool/compute/zip.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vortex-array/src/arrays/bool/compute/zip.rs b/vortex-array/src/arrays/bool/compute/zip.rs index 68d18d392ea..8a9e28dc69e 100644 --- a/vortex-array/src/arrays/bool/compute/zip.rs +++ b/vortex-array/src/arrays/bool/compute/zip.rs @@ -18,8 +18,8 @@ use crate::scalar_fn::fns::zip::zip_validity; /// /// Booleans are bit-packed, so selecting `if_true` where the mask is set and `if_false` where it is /// not is a single bitwise blend over the packed words — `(true & mask) | (false & !mask)` — instead -/// of the generic per-run builder. Validity is combined with [`zip_validity`], which itself reuses -/// this kernel (terminating immediately, since validity bitmaps are non-nullable). +/// of the generic per-run builder. Validity is combined with the shared `zip_validity`, which itself +/// reuses this kernel (terminating immediately, since validity bitmaps are non-nullable). impl ZipKernel for Bool { fn zip( if_true: ArrayView<'_, Bool>, From 86f2413f900814b49b32f673c40b4b8bc27e327c Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Tue, 9 Jun 2026 14:02:45 +0100 Subject: [PATCH 3/3] fix Signed-off-by: Joe Isaacs --- vortex-array/src/arrays/bool/compute/zip.rs | 65 +++++++++++++++++++-- 1 file changed, 61 insertions(+), 4 deletions(-) diff --git a/vortex-array/src/arrays/bool/compute/zip.rs b/vortex-array/src/arrays/bool/compute/zip.rs index 8a9e28dc69e..2628b457a85 100644 --- a/vortex-array/src/arrays/bool/compute/zip.rs +++ b/vortex-array/src/arrays/bool/compute/zip.rs @@ -1,6 +1,8 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors +use vortex_buffer::BitBuffer; +use vortex_buffer::BufferMut; use vortex_error::VortexResult; use vortex_mask::Mask; @@ -40,10 +42,11 @@ impl ZipKernel for Bool { }; let mask_bits = mask_values.bit_buffer(); - // Branchless blend of the packed value bits: `(true & mask) | (false & !mask)`. - let true_bits = if_true.to_bit_buffer(); - let false_bits = if_false.to_bit_buffer(); - let values = (&true_bits & mask_bits) | false_bits.bitand_not(mask_bits); + let values = zip_value_bits( + &if_true.to_bit_buffer(), + &if_false.to_bit_buffer(), + mask_bits, + ); let validity = zip_validity(if_true.validity()?, if_false.validity()?, &mask)?; @@ -51,11 +54,39 @@ impl ZipKernel for Bool { } } +fn zip_value_bits(if_true: &BitBuffer, if_false: &BitBuffer, mask: &BitBuffer) -> BitBuffer { + assert_eq!(if_true.len(), if_false.len()); + assert_eq!(if_true.len(), mask.len()); + + let true_chunks = if_true.chunks(); + let false_chunks = if_false.chunks(); + let mask_chunks = mask.chunks(); + + let mut values = BufferMut::::with_capacity(true_chunks.num_u64s()); + for ((true_bits, false_bits), mask_bits) in true_chunks + .iter() + .zip(false_chunks.iter()) + .zip(mask_chunks.iter()) + { + values.push((true_bits & mask_bits) | (false_bits & !mask_bits)); + } + + if true_chunks.remainder_len() != 0 { + let true_bits = true_chunks.remainder_bits(); + let false_bits = false_chunks.remainder_bits(); + let mask_bits = mask_chunks.remainder_bits(); + values.push((true_bits & mask_bits) | (false_bits & !mask_bits)); + } + + BitBuffer::new(values.freeze().into_byte_buffer(), if_true.len()) +} + #[cfg(test)] mod tests { use vortex_error::VortexResult; use vortex_mask::Mask; + use super::zip_value_bits; use crate::ArrayRef; use crate::IntoArray; use crate::LEGACY_SESSION; @@ -65,6 +96,32 @@ mod tests { use crate::assert_arrays_eq; use crate::builtins::ArrayBuiltins; + #[test] + fn blend_value_bits_boundaries() { + for len in [0usize, 1, 2, 7, 8, 9, 63, 64, 65, 127, 128] { + let if_true = (0..len).map(|i| i.is_multiple_of(2)).collect(); + let if_false = (0..len).map(|i| i.is_multiple_of(3)).collect(); + let mask = (0..len).map(|i| i % 3 != 1).collect(); + + let values = zip_value_bits(&if_true, &if_false, &mask); + + assert_eq!(values.len(), len); + assert_eq!( + values.iter().collect::>(), + (0..len) + .map(|i| { + if i % 3 != 1 { + i.is_multiple_of(2) + } else { + i.is_multiple_of(3) + } + }) + .collect::>(), + "failed for len {len}", + ); + } + } + /// Blend two non-nullable bool arrays across the 64-bit mask chunk boundary + remainder. #[test] fn zip_nonnull_spans_mask_chunks() -> VortexResult<()> {