diff --git a/encodings/fastlanes/src/bitpacking/array/bitpack_compress.rs b/encodings/fastlanes/src/bitpacking/array/bitpack_compress.rs index 2e517819059..eba5be1b749 100644 --- a/encodings/fastlanes/src/bitpacking/array/bitpack_compress.rs +++ b/encodings/fastlanes/src/bitpacking/array/bitpack_compress.rs @@ -273,15 +273,13 @@ where let total_chunks = data.len().div_ceil(1024); let mut chunk_offsets: BufferMut = BufferMut::with_capacity(total_chunks); - for (idx, value) in data.iter().enumerate() { + for ((idx, value), valid) in data.iter().enumerate().zip(validity_mask.iter()) { if (idx % 1024) == 0 { // Record the patch index offset for each chunk. chunk_offsets.push(values.len() as u64); } - if (value.leading_zeros() as usize) < T::PTYPE.bit_width() - bit_width as usize - && validity_mask.value(idx) - { + if (value.leading_zeros() as usize) < T::PTYPE.bit_width() - bit_width as usize && valid { indices.push(P::from(idx).vortex_expect("cast index from usize")); values.push(*value); } diff --git a/encodings/sparse/src/canonical.rs b/encodings/sparse/src/canonical.rs index ac8c072e7b1..dd84123851a 100644 --- a/encodings/sparse/src/canonical.rs +++ b/encodings/sparse/src/canonical.rs @@ -226,7 +226,11 @@ fn execute_sparse_lists_inner( let mut next_index = 0; - for (patch_idx, sparse_idx) in patch_indices.iter().enumerate() { + for ((patch_idx, sparse_idx), patch_valid) in patch_indices + .iter() + .enumerate() + .zip(patch_values_validity.iter()) + { let sparse_idx = sparse_idx .to_usize() .vortex_expect("patch index must fit in usize"); @@ -237,7 +241,7 @@ fn execute_sparse_lists_inner( sparse_idx - next_index, ); - if patch_values_validity.value(patch_idx) { + if patch_valid { let patch_list = patch_values .list_elements_at(patch_idx) .vortex_expect("list_elements_at"); @@ -318,7 +322,7 @@ fn execute_sparse_fixed_size_list_inner( .iter() .map(|x| (*x).to_usize().vortex_expect("index must fit in usize")); - for (patch_idx, sparse_idx) in indices.enumerate() { + for ((patch_idx, sparse_idx), patch_valid) in indices.enumerate().zip(values_validity.iter()) { // Fill gap before this patch with fill values. append_fixed_size_list_fill( &mut builder, @@ -327,7 +331,7 @@ fn execute_sparse_fixed_size_list_inner( ); // Append the patch value, handling null patches by appending defaults. - if values_validity.value(patch_idx) { + if patch_valid { let patch_list = values .fixed_size_list_elements_at(patch_idx) .vortex_expect("fixed_size_list_elements_at"); diff --git a/vortex-array/src/aggregate_fn/accumulator_grouped.rs b/vortex-array/src/aggregate_fn/accumulator_grouped.rs index ed651ba3d75..4b94159127b 100644 --- a/vortex-array/src/aggregate_fn/accumulator_grouped.rs +++ b/vortex-array/src/aggregate_fn/accumulator_grouped.rs @@ -229,12 +229,13 @@ impl GroupedAccumulator { )?; let mut states = builder_with_capacity(&self.partial_dtype, offsets.len()); - for (i, (offset, size)) in offsets.iter().zip(sizes.iter()).enumerate() { + // `validity` is the per-group list-view validity, so it is zipped element-wise with the + // offsets and sizes (one entry per group). + for ((offset, size), valid) in offsets.iter().zip(sizes.iter()).zip(validity.iter()) { let offset = offset.to_usize().vortex_expect("Offset value is not usize"); let size = size.to_usize().vortex_expect("Size value is not usize"); - // validity is for the outer list view, so it must be indexed with `i` - if validity.value(i) { + if valid { let group = elements.slice(offset..offset + size)?; accumulator.accumulate(&group, ctx)?; states.append_scalar(&accumulator.flush()?)?; @@ -304,8 +305,8 @@ impl GroupedAccumulator { .to_usize() .vortex_expect("List size is not usize"); - for i in 0..groups.len() { - if validity.value(i) { + for valid in validity.iter() { + if valid { let group = elements.slice(offset..offset + size)?; accumulator.accumulate(&group, ctx)?; states.append_scalar(&accumulator.flush()?)?; diff --git a/vortex-array/src/arrays/fixed_size_list/compute/take.rs b/vortex-array/src/arrays/fixed_size_list/compute/take.rs index 8df3bae0b0c..193730f62c4 100644 --- a/vortex-array/src/arrays/fixed_size_list/compute/take.rs +++ b/vortex-array/src/arrays/fixed_size_list/compute/take.rs @@ -175,13 +175,11 @@ fn take_nullable_fsl( let mut new_validity_builder = BitBufferMut::with_capacity(new_len); // Build the element indices while tracking which lists are null. - for (i, data_idx) in indices.iter().enumerate() { + for (data_idx, is_index_valid) in indices.iter().zip(indices_validity.iter()) { let data_idx = data_idx .to_usize() .unwrap_or_else(|| vortex_panic!("Failed to convert index to usize: {}", data_idx)); - let is_index_valid = indices_validity.value(i); - // The list is null if the index is null or the indexed element is null. if !is_index_valid || !array_validity.value(data_idx) { // Append placeholder zeros for null lists. These will be masked by the validity array. diff --git a/vortex-array/src/arrays/list/compute/take.rs b/vortex-array/src/arrays/list/compute/take.rs index 43117b1b834..f96cac5cf32 100644 --- a/vortex-array/src/arrays/list/compute/take.rs +++ b/vortex-array/src/arrays/list/compute/take.rs @@ -164,8 +164,8 @@ fn _take_nullable::with_capacity(len); let mut sizes = BufferMut::::with_capacity(len); - for (idx, (out_offsets, out_sizes)) in offsets + for ((idx, (out_offsets, out_sizes)), selected) in offsets .spare_capacity_mut() .iter_mut() .zip(sizes.spare_capacity_mut().iter_mut()) .take(len) .enumerate() + .zip(mask.iter()) { - if mask.value(idx) { + if selected { out_offsets.write(true_offsets[idx]); out_sizes.write(true_sizes[idx]); } else { diff --git a/vortex-array/src/arrays/varbin/compute/take.rs b/vortex-array/src/arrays/varbin/compute/take.rs index cb4b4031cd4..5826633aea3 100644 --- a/vortex-array/src/arrays/varbin/compute/take.rs +++ b/vortex-array/src/arrays/varbin/compute/take.rs @@ -202,8 +202,8 @@ fn take_nullable MaskBoolIter<'_> { + match self { + Mask::AllTrue(len) => MaskBoolIter::Repeat { + value: true, + remaining: *len, + }, + Mask::AllFalse(len) => MaskBoolIter::Repeat { + value: false, + remaining: *len, + }, + Mask::Values(values) => MaskBoolIter::Bits(values.bit_buffer().iter()), + } + } + /// Returns the first true index in the mask. pub fn first(&self) -> Option { match &self { @@ -814,6 +835,48 @@ pub enum MaskIter<'a> { Slices(&'a [(usize, usize)]), } +/// Iterator yielding one `bool` per element of a [`Mask`], in order. +/// +/// Created by [`Mask::iter`]. +pub enum MaskBoolIter<'a> { + /// An all-true or all-false run. + Repeat { + /// The constant value yielded by every element of the run. + value: bool, + /// The number of elements still to yield. + remaining: usize, + }, + /// Per-element bits of a [`Mask::Values`] mask. + Bits(BitIterator<'a>), +} + +impl Iterator for MaskBoolIter<'_> { + type Item = bool; + + #[inline] + fn next(&mut self) -> Option { + match self { + Self::Repeat { remaining: 0, .. } => None, + Self::Repeat { value, remaining } => { + *remaining -= 1; + Some(*value) + } + Self::Bits(bits) => bits.next(), + } + } + + #[inline] + fn size_hint(&self) -> (usize, Option) { + let remaining = match self { + Self::Repeat { remaining, .. } => *remaining, + Self::Bits(bits) => bits.len(), + }; + (remaining, Some(remaining)) + } +} + +impl ExactSizeIterator for MaskBoolIter<'_> {} + impl From for Mask { fn from(value: BitBuffer) -> Self { Self::from_buffer(value) diff --git a/vortex-mask/src/tests.rs b/vortex-mask/src/tests.rs index 65687afef15..c1a4928f548 100644 --- a/vortex-mask/src/tests.rs +++ b/vortex-mask/src/tests.rs @@ -725,3 +725,29 @@ fn test_mask_concat_mixed_types() { assert!(!result.value(6)); // from all_false assert!(!result.value(7)); // from all_false } + +// `Mask::iter` per-element bool iteration + +#[rstest] +#[case::all_true(Mask::new_true(4), vec![true, true, true, true])] +#[case::all_false(Mask::new_false(3), vec![false, false, false])] +#[case::values( + Mask::from_buffer(BitBuffer::from_iter([true, false, true, true, false])), + vec![true, false, true, true, false] +)] +#[case::empty(Mask::new_true(0), vec![])] +fn mask_iter_matches_value(#[case] mask: Mask, #[case] expected: Vec) { + // Iterator yields exactly one bool per element, matching `value`. + let collected: Vec = mask.iter().collect(); + assert_eq!(collected, expected); + + let by_value: Vec = (0..mask.len()).map(|i| mask.value(i)).collect(); + assert_eq!(collected, by_value); + + // ExactSizeIterator reports the right length, including after partial consumption. + let mut it = mask.iter(); + assert_eq!(it.len(), mask.len()); + if it.next().is_some() { + assert_eq!(it.len(), mask.len() - 1); + } +}