diff --git a/Cargo.lock b/Cargo.lock index 52aaaebefa1..d6db894c022 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8561,7 +8561,6 @@ dependencies = [ name = "vortex-btrblocks" version = "0.1.0" dependencies = [ - "arrow-buffer", "codspeed-divan-compat", "env_logger", "getrandom 0.3.3", @@ -8611,7 +8610,6 @@ dependencies = [ name = "vortex-bytebool" version = "0.1.0" dependencies = [ - "arrow-buffer", "num-traits", "rstest", "vortex-array", @@ -8953,12 +8951,10 @@ dependencies = [ name = "vortex-fuzz" version = "0.1.0" dependencies = [ - "arrow-buffer", "arrow-ord", "itertools 0.14.0", "libfuzzer-sys", "strum 0.27.2", - "tokio", "vortex-array", "vortex-btrblocks", "vortex-buffer", @@ -9122,9 +9118,9 @@ dependencies = [ name = "vortex-mask" version = "0.1.0" dependencies = [ - "arrow-buffer", "itertools 0.14.0", "rstest", + "vortex-buffer", "vortex-error", ] @@ -9337,7 +9333,6 @@ dependencies = [ "rstest", "vortex-array", "vortex-buffer", - "vortex-dict", "vortex-dtype", "vortex-error", "vortex-mask", diff --git a/encodings/alp/src/alp/compute/between.rs b/encodings/alp/src/alp/compute/between.rs index adf323029f9..67c8a466c6f 100644 --- a/encodings/alp/src/alp/compute/between.rs +++ b/encodings/alp/src/alp/compute/between.rs @@ -104,7 +104,7 @@ mod tests { let res = between_impl(arr, lower, upper, Nullability::Nullable, options) .unwrap() .to_bool() - .boolean_buffer() + .bit_buffer() .iter() .collect_vec(); assert_eq!(res.len(), 1); diff --git a/encodings/alp/src/alp/compute/compare.rs b/encodings/alp/src/alp/compute/compare.rs index 405774233b5..7e8e754c362 100644 --- a/encodings/alp/src/alp/compute/compare.rs +++ b/encodings/alp/src/alp/compute/compare.rs @@ -148,7 +148,7 @@ mod tests { { alp_scalar_compare(alp, value, operator) .unwrap() - .map(|a| a.to_bool().boolean_buffer().iter().collect()) + .map(|a| a.to_bool().bit_buffer().iter().collect()) } #[test] @@ -166,7 +166,7 @@ mod tests { .unwrap() .to_bool(); - for v in r.boolean_buffer().iter() { + for v in r.bit_buffer().iter() { assert!(!v); } @@ -175,7 +175,7 @@ mod tests { .unwrap() .to_bool(); - for v in r.boolean_buffer().iter() { + for v in r.bit_buffer().iter() { assert!(v); } } @@ -196,7 +196,7 @@ mod tests { .unwrap() .to_bool(); - assert!(r_eq.boolean_buffer().iter().all(|v| !v)); + assert!(r_eq.bit_buffer().iter().all(|v| !v)); #[allow(clippy::excessive_precision)] let r_neq = alp_scalar_compare(&encoded, 1.234444f32, Operator::NotEq) @@ -204,7 +204,7 @@ mod tests { .unwrap() .to_bool(); - assert!(r_neq.boolean_buffer().iter().all(|v| v)); + assert!(r_neq.bit_buffer().iter().all(|v| v)); } #[test] @@ -223,7 +223,7 @@ mod tests { .to_bool(); // !(0.0605_f32 >= 0.06051_f32); - assert!(r_gte.boolean_buffer().iter().all(|v| !v)); + assert!(r_gte.bit_buffer().iter().all(|v| !v)); let r_gt = alp_scalar_compare(&encoded, 0.06051_f32, Operator::Gt) .unwrap() @@ -231,7 +231,7 @@ mod tests { .to_bool(); // (0.0605_f32 > 0.06051_f32); - assert!(r_gt.boolean_buffer().iter().all(|v| !v)); + assert!(r_gt.bit_buffer().iter().all(|v| !v)); let r_lte = alp_scalar_compare(&encoded, 0.06051_f32, Operator::Lte) .unwrap() @@ -239,7 +239,7 @@ mod tests { .to_bool(); // 0.0605_f32 <= 0.06051_f32; - assert!(r_lte.boolean_buffer().iter().all(|v| v)); + assert!(r_lte.bit_buffer().iter().all(|v| v)); let r_lt = alp_scalar_compare(&encoded, 0.06051_f32, Operator::Lt) .unwrap() @@ -247,7 +247,7 @@ mod tests { .to_bool(); //0.0605_f32 < 0.06051_f32; - assert!(r_lt.boolean_buffer().iter().all(|v| v)); + assert!(r_lt.bit_buffer().iter().all(|v| v)); } #[test] @@ -311,7 +311,7 @@ mod tests { .unwrap() .to_bool(); - for v in r.boolean_buffer().iter() { + for v in r.bit_buffer().iter() { assert!(!v); } } diff --git a/encodings/alp/src/alp/compute/filter.rs b/encodings/alp/src/alp/compute/filter.rs index bffa334f4cc..21269adfe75 100644 --- a/encodings/alp/src/alp/compute/filter.rs +++ b/encodings/alp/src/alp/compute/filter.rs @@ -34,9 +34,9 @@ register_kernel!(FilterKernelAdapter(ALPVTable).lift()); #[cfg(test)] mod test { use rstest::rstest; - use vortex_array::IntoArray; use vortex_array::arrays::PrimitiveArray; use vortex_array::compute::conformance::filter::test_filter_conformance; + use vortex_array::{ArrayRef, IntoArray}; use vortex_buffer::buffer; use crate::ALPEncoding; @@ -50,7 +50,7 @@ mod test { 1.0f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0 ].into_array())] - fn test_filter_alp_conformance(#[case] array: vortex_array::ArrayRef) { + fn test_filter_alp_conformance(#[case] array: ArrayRef) { let alp = ALPEncoding .encode(&array.to_canonical(), None) .unwrap() diff --git a/encodings/alp/src/alp/compute/mod.rs b/encodings/alp/src/alp/compute/mod.rs index f3bf00c1d78..11805c56101 100644 --- a/encodings/alp/src/alp/compute/mod.rs +++ b/encodings/alp/src/alp/compute/mod.rs @@ -33,7 +33,6 @@ mod tests { // Arrays with patterns #[case::repeating_pattern(alp_encode(&PrimitiveArray::from_iter([1.1f32, 2.2, 3.3, 1.1, 2.2, 3.3, 1.1, 2.2, 3.3]), None).unwrap())] #[case::close_values(alp_encode(&PrimitiveArray::from_iter([100.001f64, 100.002, 100.003, 100.004, 100.005]), None).unwrap())] - fn test_alp_consistency(#[case] array: ALPArray) { test_array_consistency(array.as_ref()); } diff --git a/encodings/alp/src/alp_rd/compute/mod.rs b/encodings/alp/src/alp_rd/compute/mod.rs index 2a9b328f169..c04cda8687c 100644 --- a/encodings/alp/src/alp_rd/compute/mod.rs +++ b/encodings/alp/src/alp_rd/compute/mod.rs @@ -64,7 +64,6 @@ mod tests { let encoder = RDEncoder::new(&values); encoder.encode(&arr) })] - fn test_alp_rd_consistency(#[case] array: ALPRDArray) { test_array_consistency(array.as_ref()); } diff --git a/encodings/bytebool/Cargo.toml b/encodings/bytebool/Cargo.toml index f33c7f779f2..682b40235c7 100644 --- a/encodings/bytebool/Cargo.toml +++ b/encodings/bytebool/Cargo.toml @@ -17,7 +17,6 @@ version = { workspace = true } workspace = true [dependencies] -arrow-buffer = { workspace = true } num-traits = { workspace = true } vortex-array = { workspace = true } vortex-buffer = { workspace = true } diff --git a/encodings/bytebool/src/array.rs b/encodings/bytebool/src/array.rs index 833d67f4e46..e33ba498dff 100644 --- a/encodings/bytebool/src/array.rs +++ b/encodings/bytebool/src/array.rs @@ -4,7 +4,6 @@ use std::fmt::Debug; use std::ops::Range; -use arrow_buffer::BooleanBuffer; use vortex_array::arrays::BoolArray; use vortex_array::stats::{ArrayStats, StatsSetRef}; use vortex_array::validity::Validity; @@ -13,7 +12,7 @@ use vortex_array::vtable::{ ValidityVTableFromValidityHelper, }; use vortex_array::{ArrayRef, Canonical, EncodingId, EncodingRef, IntoArray, vtable}; -use vortex_buffer::ByteBuffer; +use vortex_buffer::{BitBuffer, ByteBuffer}; use vortex_dtype::DType; use vortex_error::vortex_panic; use vortex_scalar::Scalar; @@ -114,9 +113,9 @@ impl ArrayVTable for ByteBoolVTable { impl CanonicalVTable for ByteBoolVTable { fn canonicalize(array: &ByteBoolArray) -> Canonical { - let boolean_buffer = BooleanBuffer::from(array.as_slice()); + let boolean_buffer = BitBuffer::from(array.as_slice()); let validity = array.validity().clone(); - Canonical::Bool(BoolArray::from_bool_buffer(boolean_buffer, validity)) + Canonical::Bool(BoolArray::from_bit_buffer(boolean_buffer, validity)) } } diff --git a/encodings/decimal-byte-parts/src/decimal_byte_parts/compute/compare.rs b/encodings/decimal-byte-parts/src/decimal_byte_parts/compute/compare.rs index 78838ed6d66..074c0848eb7 100644 --- a/encodings/decimal-byte-parts/src/decimal_byte_parts/compute/compare.rs +++ b/encodings/decimal-byte-parts/src/decimal_byte_parts/compute/compare.rs @@ -149,7 +149,7 @@ mod tests { let res = compare(lhs.as_ref(), rhs.as_ref(), Operator::Eq).unwrap(); assert_eq!( - res.to_bool().boolean_buffer().iter().collect::>(), + res.to_bool().bit_buffer().iter().collect::>(), vec![false, false, true] ); } @@ -206,13 +206,13 @@ mod tests { let res = compare(lhs.as_ref(), rhs.as_ref(), Operator::Eq).unwrap(); - assert_eq!(res.to_bool().bool_vec().unwrap(), vec![false, false, false]); + assert_eq!(res.to_bool().bool_vec(), vec![false, false, false]); let res = compare(lhs.as_ref(), rhs.as_ref(), Operator::Gt).unwrap(); - assert_eq!(res.to_bool().bool_vec().unwrap(), vec![true, true, true]); + assert_eq!(res.to_bool().bool_vec(), vec![true, true, true]); let res = compare(lhs.as_ref(), rhs.as_ref(), Operator::Lt).unwrap(); - assert_eq!(res.to_bool().bool_vec().unwrap(), vec![false, false, false]); + assert_eq!(res.to_bool().bool_vec(), vec![false, false, false]); // This cannot be converted to a i32. let rhs = ConstantArray::new( @@ -222,12 +222,12 @@ mod tests { let res = compare(lhs.as_ref(), rhs.as_ref(), Operator::Eq).unwrap(); - assert_eq!(res.to_bool().bool_vec().unwrap(), vec![false, false, false]); + assert_eq!(res.to_bool().bool_vec(), vec![false, false, false]); let res = compare(lhs.as_ref(), rhs.as_ref(), Operator::Gt).unwrap(); - assert_eq!(res.to_bool().bool_vec().unwrap(), vec![false, false, false]); + assert_eq!(res.to_bool().bool_vec(), vec![false, false, false]); let res = compare(lhs.as_ref(), rhs.as_ref(), Operator::Lt).unwrap(); - assert_eq!(res.to_bool().bool_vec().unwrap(), vec![true, true, true]); + assert_eq!(res.to_bool().bool_vec(), vec![true, true, true]); } } diff --git a/encodings/dict/src/array.rs b/encodings/dict/src/array.rs index 8604cfe93f4..f96443b4390 100644 --- a/encodings/dict/src/array.rs +++ b/encodings/dict/src/array.rs @@ -3,10 +3,10 @@ use std::fmt::Debug; -use arrow_buffer::BooleanBuffer; use vortex_array::stats::{ArrayStats, StatsSetRef}; use vortex_array::vtable::{ArrayVTable, NotSupported, VTable, ValidityVTable}; use vortex_array::{Array, ArrayRef, EncodingId, EncodingRef, ToCanonical, vtable}; +use vortex_buffer::BitBuffer; use vortex_dtype::{DType, match_each_integer_ptype}; use vortex_error::{VortexExpect as _, VortexResult, vortex_bail}; use vortex_mask::{AllOr, Mask}; @@ -142,13 +142,13 @@ impl ValidityVTable for DictVTable { fn validity_mask(array: &DictArray) -> Mask { let codes_validity = array.codes().validity_mask(); - match codes_validity.boolean_buffer() { + match codes_validity.bit_buffer() { AllOr::All => { let primitive_codes = array.codes().to_primitive(); let values_mask = array.values().validity_mask(); let is_valid_buffer = match_each_integer_ptype!(primitive_codes.ptype(), |P| { let codes_slice = primitive_codes.as_slice::

(); - BooleanBuffer::collect_bool(array.len(), |idx| { + BitBuffer::collect_bool(array.len(), |idx| { #[allow(clippy::cast_possible_truncation)] values_mask.value(codes_slice[idx] as usize) }) @@ -162,7 +162,7 @@ impl ValidityVTable for DictVTable { let is_valid_buffer = match_each_integer_ptype!(primitive_codes.ptype(), |P| { let codes_slice = primitive_codes.as_slice::

(); #[allow(clippy::cast_possible_truncation)] - BooleanBuffer::collect_bool(array.len(), |idx| { + BitBuffer::collect_bool(array.len(), |idx| { validity_buff.value(idx) && values_mask.value(codes_slice[idx] as usize) }) }); @@ -174,7 +174,6 @@ impl ValidityVTable for DictVTable { #[cfg(test)] mod test { - use arrow_buffer::BooleanBuffer; use rand::distr::{Distribution, StandardUniform}; use rand::prelude::StdRng; use rand::{Rng, SeedableRng}; @@ -182,7 +181,7 @@ mod test { use vortex_array::builders::builder_with_capacity; use vortex_array::validity::Validity; use vortex_array::{Array, ArrayRef, IntoArray, ToCanonical}; - use vortex_buffer::buffer; + use vortex_buffer::{BitBuffer, buffer}; use vortex_dtype::Nullability::NonNullable; use vortex_dtype::{DType, NativePType, PType, UnsignedPType}; use vortex_error::{VortexExpect, VortexUnwrap, vortex_panic}; @@ -195,7 +194,7 @@ mod test { let dict = DictArray::try_new( PrimitiveArray::new( buffer![0u32, 1, 2, 2, 1], - Validity::from(BooleanBuffer::from(vec![true, false, true, false, true])), + Validity::from(BitBuffer::from(vec![true, false, true, false, true])), ) .into_array(), PrimitiveArray::new(buffer![3, 6, 9], Validity::AllValid).into_array(), @@ -214,7 +213,7 @@ mod test { buffer![0u32, 1, 2, 2, 1].into_array(), PrimitiveArray::new( buffer![3, 6, 9], - Validity::from(BooleanBuffer::from(vec![true, false, false])), + Validity::from(BitBuffer::from(vec![true, false, false])), ) .into_array(), ) @@ -231,12 +230,12 @@ mod test { let dict = DictArray::try_new( PrimitiveArray::new( buffer![0u32, 1, 2, 2, 1], - Validity::from(BooleanBuffer::from(vec![true, false, true, false, true])), + Validity::from(BitBuffer::from(vec![true, false, true, false, true])), ) .into_array(), PrimitiveArray::new( buffer![3, 6, 9], - Validity::from(BooleanBuffer::from(vec![false, true, true])), + Validity::from(BitBuffer::from(vec![false, true, true])), ) .into_array(), ) @@ -253,7 +252,7 @@ mod test { let dict = DictArray::try_new( PrimitiveArray::new( buffer![0u32, 1, 2, 2, 1], - Validity::from(BooleanBuffer::from(vec![true, false, true, false, true])), + Validity::from(BitBuffer::from(vec![true, false, true, false, true])), ) .into_array(), PrimitiveArray::new(buffer![3, 6, 9], Validity::NonNullable).into_array(), @@ -312,8 +311,8 @@ mod test { assert_eq!(into_prim.as_slice::(), prim_into.as_slice::()); assert_eq!( - into_prim.validity_mask().boolean_buffer(), - prim_into.validity_mask().boolean_buffer() + into_prim.validity_mask().bit_buffer(), + prim_into.validity_mask().bit_buffer() ) } } diff --git a/encodings/dict/src/builders/bytes.rs b/encodings/dict/src/builders/bytes.rs index 3037f8bbf10..961d8b7ae6b 100644 --- a/encodings/dict/src/builders/bytes.rs +++ b/encodings/dict/src/builders/bytes.rs @@ -2,15 +2,15 @@ // SPDX-FileCopyrightText: Copyright the Vortex contributors use std::hash::BuildHasher; +use std::mem; use std::sync::Arc; -use arrow_buffer::NullBufferBuilder; use vortex_array::accessor::ArrayAccessor; use vortex_array::arrays::binary_view::BinaryView; use vortex_array::arrays::{PrimitiveArray, VarBinVTable, VarBinViewArray, VarBinViewVTable}; use vortex_array::validity::Validity; use vortex_array::{Array, ArrayRef, IntoArray}; -use vortex_buffer::{BufferMut, ByteBufferMut}; +use vortex_buffer::{BitBufferMut, BufferMut, ByteBufferMut}; use vortex_dtype::{DType, UnsignedPType}; use vortex_error::{VortexExpect, VortexResult, VortexUnwrap, vortex_bail, vortex_panic}; use vortex_utils::aliases::hash_map::{DefaultHashBuilder, HashTable, HashTableEntry, RandomState}; @@ -23,7 +23,7 @@ pub struct BytesDictBuilder { lookup: Option>, views: BufferMut, values: ByteBufferMut, - values_nulls: NullBufferBuilder, + values_nulls: BitBufferMut, hasher: RandomState, dtype: DType, max_dict_bytes: usize, @@ -45,7 +45,7 @@ impl BytesDictBuilder { lookup: Some(HashTable::new()), views: BufferMut::::empty(), values: BufferMut::empty(), - values_nulls: NullBufferBuilder::new(0), + values_nulls: BitBufferMut::empty(), hasher: DefaultHashBuilder::default(), dtype, max_dict_bytes: constraints.max_bytes, @@ -59,7 +59,7 @@ impl BytesDictBuilder { #[inline] fn lookup_bytes(&self, idx: usize) -> Option<&[u8]> { - self.values_nulls.is_valid(idx).then(|| { + self.values_nulls.value(idx).then(|| { let bin_view = &self.views[idx]; if bin_view.is_inlined() { bin_view.as_inlined().value() @@ -87,7 +87,7 @@ impl BytesDictBuilder { None => { // Null value self.views.push(BinaryView::default()); - self.values_nulls.append_null(); + self.values_nulls.append_false(); } Some(val) => { let view = BinaryView::make_view( @@ -106,7 +106,7 @@ impl BytesDictBuilder { } self.views.push(view); - self.values_nulls.append_non_null(); + self.values_nulls.append_true(); if !view.is_inlined() { self.values.extend_from_slice(val); } @@ -173,8 +173,8 @@ impl DictEncoder for BytesDictBuilder { self.views.clone().freeze(), Arc::from([self.values.clone().freeze()]), self.dtype.clone(), - Validity::from_null_buffer( - self.values_nulls.finish_cloned(), + Validity::from_bit_buffer( + mem::take(&mut self.values_nulls).freeze(), self.dtype.nullability(), ), ) diff --git a/encodings/dict/src/builders/primitive.rs b/encodings/dict/src/builders/primitive.rs index fe581a2a059..409f8a7487f 100644 --- a/encodings/dict/src/builders/primitive.rs +++ b/encodings/dict/src/builders/primitive.rs @@ -2,14 +2,14 @@ // SPDX-FileCopyrightText: Copyright the Vortex contributors use std::hash::Hash; +use std::mem; -use arrow_buffer::NullBufferBuilder; use rustc_hash::FxBuildHasher; use vortex_array::accessor::ArrayAccessor; use vortex_array::arrays::{NativeValue, PrimitiveArray}; use vortex_array::validity::Validity; use vortex_array::{Array, ArrayRef, IntoArray, ToCanonical}; -use vortex_buffer::BufferMut; +use vortex_buffer::{BitBufferMut, BufferMut}; use vortex_dtype::{NativePType, Nullability, PType, UnsignedPType}; use vortex_error::{VortexResult, vortex_bail, vortex_panic}; use vortex_utils::aliases::hash_map::{Entry, HashMap}; @@ -64,7 +64,7 @@ where Self { lookup: HashMap::with_hasher(FxBuildHasher), values: BufferMut::::empty(), - values_nulls: NullBufferBuilder::new(0), + values_nulls: BitBufferMut::empty(), nullability, max_dict_len, } @@ -85,11 +85,11 @@ where match v { None => { self.values.push(T::default()); - self.values_nulls.append_null(); + self.values_nulls.append_false(); } Some(v) => { self.values.push(v); - self.values_nulls.append_non_null(); + self.values_nulls.append_true(); } } Some(next_code) @@ -104,7 +104,7 @@ where pub struct PrimitiveDictBuilder { lookup: HashMap>, Code, FxBuildHasher>, values: BufferMut, - values_nulls: NullBufferBuilder, + values_nulls: BitBufferMut, nullability: Nullability, max_dict_len: usize, } @@ -136,7 +136,7 @@ where fn values(&mut self) -> VortexResult { Ok(PrimitiveArray::new( self.values.clone(), - Validity::from_null_buffer(self.values_nulls.finish_cloned(), self.nullability), + Validity::from_bit_buffer(mem::take(&mut self.values_nulls).freeze(), self.nullability), ) .into_array()) } diff --git a/encodings/dict/src/canonical.rs b/encodings/dict/src/canonical.rs index 8569d3b86e0..78276e4178d 100644 --- a/encodings/dict/src/canonical.rs +++ b/encodings/dict/src/canonical.rs @@ -3,12 +3,12 @@ use std::ops::Not; -use arrow_buffer::BooleanBuffer; use vortex_array::arrays::{BoolArray, ConstantArray}; use vortex_array::compute::{Operator, cast, compare, mask, take}; use vortex_array::validity::Validity; use vortex_array::vtable::CanonicalVTable; use vortex_array::{Array, ArrayRef, Canonical, IntoArray, ToCanonical}; +use vortex_buffer::BitBuffer; use vortex_dtype::{DType, Nullability}; use vortex_error::{VortexExpect, VortexResult}; use vortex_mask::{AllOr, Mask}; @@ -46,8 +46,8 @@ fn dict_bool_take(dict_array: &DictArray) -> VortexResult { let bool_values = values.to_bool(); let result_validity = bool_values.validity_mask(); - let bool_buffer = bool_values.boolean_buffer(); - let (first_match, second_match) = match result_validity.boolean_buffer() { + let bool_buffer = bool_values.bit_buffer(); + let (first_match, second_match) = match result_validity.bit_buffer() { AllOr::All => { let mut indices_iter = bool_buffer.set_indices(); (indices_iter.next(), indices_iter.next()) @@ -62,8 +62,8 @@ fn dict_bool_take(dict_array: &DictArray) -> VortexResult { Ok(match (first_match, second_match) { // Couldn't find a value match, so the result is all false (None, _) => match result_validity { - Mask::AllTrue(_) => BoolArray::from_bool_buffer( - BooleanBuffer::new_unset(codes.len()), + Mask::AllTrue(_) => BoolArray::from_bit_buffer( + BitBuffer::new_unset(codes.len()), Validity::copy_from_array(codes).union_nullability(result_nullability), ) .to_canonical(), @@ -72,8 +72,8 @@ fn dict_bool_take(dict_array: &DictArray) -> VortexResult { codes.len(), ) .to_canonical(), - Mask::Values(_) => BoolArray::from_bool_buffer( - BooleanBuffer::new_unset(codes.len()), + Mask::Values(_) => BoolArray::from_bit_buffer( + BitBuffer::new_unset(codes.len()), Validity::from_mask(result_validity, result_nullability).take(codes)?, ) .to_canonical(), @@ -107,9 +107,9 @@ fn dict_bool_take(dict_array: &DictArray) -> VortexResult { Operator::Eq, )?, &Mask::from_buffer( - take(BoolArray::from(rv.boolean_buffer().clone()).as_ref(), codes)? + take(BoolArray::from(rv.bit_buffer().clone()).as_ref(), codes)? .to_bool() - .boolean_buffer() + .bit_buffer() .not(), ), )? diff --git a/encodings/dict/src/compute/compare.rs b/encodings/dict/src/compute/compare.rs index 4ec63da6fdc..d3e96b0b87e 100644 --- a/encodings/dict/src/compute/compare.rs +++ b/encodings/dict/src/compute/compare.rs @@ -44,7 +44,6 @@ impl CompareKernel for DictVTable { } register_kernel!(CompareKernelAdapter(DictVTable).lift()); - #[cfg(test)] mod tests { use vortex_array::arrays::{ConstantArray, PrimitiveArray}; @@ -74,7 +73,7 @@ mod tests { .unwrap(); let res = res.to_bool(); assert_eq!( - res.boolean_buffer().iter().collect::>(), + res.bit_buffer().iter().collect::>(), vec![true, false, false] ); } @@ -95,7 +94,7 @@ mod tests { .unwrap(); let res = res.to_bool(); assert_eq!( - res.boolean_buffer().iter().collect::>(), + res.bit_buffer().iter().collect::>(), vec![false, true, true] ); } @@ -120,7 +119,7 @@ mod tests { .unwrap(); let res = res.to_bool(); assert_eq!( - res.boolean_buffer().iter().collect::>(), + res.bit_buffer().iter().collect::>(), vec![false, false, false] ); assert_eq!(res.dtype().nullability(), Nullability::Nullable); @@ -147,7 +146,7 @@ mod tests { .unwrap(); let res = res.to_bool(); assert_eq!( - res.boolean_buffer().iter().collect::>(), + res.bit_buffer().iter().collect::>(), vec![false, false, false] ); assert_eq!(res.dtype().nullability(), Nullability::Nullable); diff --git a/encodings/dict/src/compute/fill_null.rs b/encodings/dict/src/compute/fill_null.rs index 6bade487970..8ebbe080a72 100644 --- a/encodings/dict/src/compute/fill_null.rs +++ b/encodings/dict/src/compute/fill_null.rs @@ -20,7 +20,7 @@ impl FillNullKernel for DictVTable { )? .to_bool(); - let Some(first_fill_value) = found_fill_values.boolean_buffer().set_indices().next() else { + let Some(first_fill_value) = found_fill_values.bit_buffer().set_indices().next() else { // No fill values found, so we must canonicalize and fill_null. // TODO(ngates): compute kernels should all return Option to support this // fall back. @@ -50,12 +50,11 @@ register_kernel!(FillNullKernelAdapter(DictVTable).lift()); #[cfg(test)] mod tests { - use arrow_buffer::BooleanBuffer; use vortex_array::arrays::PrimitiveArray; use vortex_array::compute::fill_null; use vortex_array::validity::Validity; use vortex_array::{IntoArray, ToCanonical}; - use vortex_buffer::buffer; + use vortex_buffer::{BitBuffer, buffer}; use vortex_dtype::Nullability; use vortex_error::VortexUnwrap; use vortex_scalar::Scalar; @@ -67,7 +66,7 @@ mod tests { let dict = DictArray::try_new( PrimitiveArray::new( buffer![0u32, 1, 2], - Validity::from(BooleanBuffer::from(vec![true, false, true])), + Validity::from(BitBuffer::from(vec![true, false, true])), ) .into_array(), PrimitiveArray::new(buffer![10, 20, 20], Validity::AllValid).into_array(), diff --git a/encodings/fastlanes/benches/bitpacking_decompress_selection.rs b/encodings/fastlanes/benches/bitpacking_decompress_selection.rs index 2779227ffe2..6e172fee26e 100644 --- a/encodings/fastlanes/benches/bitpacking_decompress_selection.rs +++ b/encodings/fastlanes/benches/bitpacking_decompress_selection.rs @@ -11,7 +11,6 @@ use divan::Bencher; use rand::rngs::StdRng; use rand::{Rng as _, SeedableRng as _}; -use vortex_array::arrays::BooleanBuffer; use vortex_array::compute::{filter, warm_up_vtables}; use vortex_array::{Array, IntoArray as _, ToCanonical}; use vortex_buffer::BufferMut; @@ -38,7 +37,7 @@ fn decompress_bitpacking_early_filter(bencher: Bencher, fractio let mask = (0..100_000) .map(|_| rng.random_bool(fraction_kept)) - .collect::(); + .collect(); let mask = &Mask::from_buffer(mask); bencher.bench(|| filter(array.as_ref(), mask).unwrap().to_canonical()); @@ -58,7 +57,7 @@ fn decompress_bitpacking_late_filter(bencher: Bencher, fraction let mask = (0..100_000) .map(|_| rng.random_bool(fraction_kept)) - .collect::(); + .collect(); let mask = &Mask::from_buffer(mask); bencher diff --git a/encodings/fastlanes/benches/pipeline_bitpacking.rs b/encodings/fastlanes/benches/pipeline_bitpacking.rs index 0011cb332b9..7106fa9b342 100644 --- a/encodings/fastlanes/benches/pipeline_bitpacking.rs +++ b/encodings/fastlanes/benches/pipeline_bitpacking.rs @@ -4,14 +4,13 @@ #![allow(clippy::unwrap_used)] #![allow(unexpected_cfgs)] -use arrow_buffer::BooleanBuffer; use divan::Bencher; use mimalloc::MiMalloc; use rand::prelude::StdRng; use rand::{Rng, SeedableRng}; use vortex_array::compute::{filter, warm_up_vtables}; use vortex_array::{IntoArray, ToCanonical}; -use vortex_buffer::BufferMut; +use vortex_buffer::{BitBuffer, BufferMut}; use vortex_dtype::NativePType; use vortex_fastlanes::bitpack_to_best_bit_width; use vortex_mask::Mask; @@ -39,7 +38,7 @@ pub fn decompress_bitpacking_early_filter(bencher: Bencher, frac let mask = (0..LENGTH) .map(|_| rng.random_bool(fraction_kept)) - .collect::(); + .collect::(); bencher // Be sure to reconstruct the mask to avoid cached set_indices @@ -60,7 +59,7 @@ pub fn decompress_bitpacking_late_filter(bencher: Bencher, fract let mask = (0..LENGTH) .map(|_| rng.random_bool(fraction_kept)) - .collect::(); + .collect::(); bencher .with_inputs(|| Mask::from_buffer(mask.clone())) diff --git a/encodings/fastlanes/benches/pipeline_bitpacking_compare_scalar.rs b/encodings/fastlanes/benches/pipeline_bitpacking_compare_scalar.rs index de635802a94..0ade54ba1e6 100644 --- a/encodings/fastlanes/benches/pipeline_bitpacking_compare_scalar.rs +++ b/encodings/fastlanes/benches/pipeline_bitpacking_compare_scalar.rs @@ -4,14 +4,13 @@ #![allow(clippy::unwrap_used)] #![allow(unexpected_cfgs)] -use arrow_buffer::BooleanBuffer; use divan::Bencher; use mimalloc::MiMalloc; use rand::prelude::StdRng; use rand::{Rng, SeedableRng}; use vortex_array::compute::{filter, warm_up_vtables}; use vortex_array::{Array, ArrayRef, IntoArray, ToCanonical}; -use vortex_buffer::BufferMut; +use vortex_buffer::{BitBuffer, BufferMut}; use vortex_dtype::NativePType; use vortex_error::VortexResult; use vortex_expr::{Scope, lit, lt, root}; @@ -58,7 +57,7 @@ pub fn eval>(bencher: Bencher, fraction_kept: f64) let mask = (0..100_000) .map(|_| rng.random_bool(fraction_kept)) - .collect::(); + .collect::(); let expr = lt(root(), lit(T::from_i32(2).unwrap())); diff --git a/encodings/fastlanes/benches/pipeline_bitpacking_kernel.rs b/encodings/fastlanes/benches/pipeline_bitpacking_kernel.rs index 4d87f6a16f5..8a52619c447 100644 --- a/encodings/fastlanes/benches/pipeline_bitpacking_kernel.rs +++ b/encodings/fastlanes/benches/pipeline_bitpacking_kernel.rs @@ -94,7 +94,7 @@ pub fn main() { // .to_primitive(); // // let mut mask_data = [0usize; N_WORDS]; -// for (i, chunk) in mask.to_boolean_buffer().bit_chunks().iter().enumerate() { +// for (i, chunk) in mask.to_bit_buffer().chunks().iter().enumerate() { // if i < N_WORDS { // mask_data[i] = usize::try_from(chunk).unwrap(); // } diff --git a/encodings/fastlanes/benches/pipeline_for.rs b/encodings/fastlanes/benches/pipeline_for.rs index 087ca864240..9c1261dbb91 100644 --- a/encodings/fastlanes/benches/pipeline_for.rs +++ b/encodings/fastlanes/benches/pipeline_for.rs @@ -4,14 +4,13 @@ #![allow(clippy::unwrap_used)] #![allow(unexpected_cfgs)] -use arrow_buffer::BooleanBuffer; use divan::Bencher; use mimalloc::MiMalloc; use rand::prelude::StdRng; use rand::{Rng, SeedableRng}; use vortex_array::compute::{filter, warm_up_vtables}; use vortex_array::{IntoArray, ToCanonical}; -use vortex_buffer::BufferMut; +use vortex_buffer::{BitBuffer, BufferMut}; use vortex_dtype::NativePType; use vortex_fastlanes::{FoRArray, bitpack_to_best_bit_width}; use vortex_mask::Mask; @@ -60,7 +59,7 @@ pub fn decompress_for_early_filter(bencher: Bencher, fraction_ke // let mask = generate_mask_with_runs(102_400, fraction_kept, &mut rng); let mask = (0..LENGTH) .map(|_| rng.random_bool(fraction_kept)) - .collect::(); + .collect::(); bencher .with_inputs(|| Mask::from_buffer(mask.clone())) diff --git a/encodings/fastlanes/src/bitpacking/compress.rs b/encodings/fastlanes/src/bitpacking/compress.rs index faf256925b9..976f71d7d5c 100644 --- a/encodings/fastlanes/src/bitpacking/compress.rs +++ b/encodings/fastlanes/src/bitpacking/compress.rs @@ -471,7 +471,7 @@ fn bit_width_histogram_typed( |v: T| (8 * size_of::()) - (PrimInt::leading_zeros(v) as usize); let mut bit_widths = vec![0usize; size_of::() * 8 + 1]; - match array.validity_mask().boolean_buffer() { + match array.validity_mask().bit_buffer() { AllOr::All => { // All values are valid. for v in array.as_slice::() { @@ -672,9 +672,7 @@ mod test { (0..(1 << 4)).collect::>(), compressed .validity_mask() - .to_null_buffer() - .unwrap() - .into_inner() + .to_bit_buffer() .set_indices() .collect::>() ) diff --git a/encodings/fastlanes/src/bitpacking/pipeline/mod.rs b/encodings/fastlanes/src/bitpacking/pipeline/mod.rs new file mode 100644 index 00000000000..5d17a360c28 --- /dev/null +++ b/encodings/fastlanes/src/bitpacking/pipeline/mod.rs @@ -0,0 +1,257 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +mod kernel; +mod unaligned_kernel; + +use std::any::Any; +use std::hash::{Hash, Hasher}; +use std::sync::Arc; + +use fastlanes::FastLanes; +pub use kernel::BitPackedKernel; +pub use unaligned_kernel::BitPackedUnalignedKernel; +use vortex_array::pipeline::operators::{BindContext, Operator, OperatorRef}; +use vortex_array::pipeline::{Kernel, PipelineVTable, VType}; +use vortex_buffer::Buffer; +use vortex_dtype::{PhysicalPType, match_each_integer_ptype}; +use vortex_error::VortexResult; + +use crate::{BitPackedArray, BitPackedVTable}; + +impl PipelineVTable for BitPackedVTable { + fn to_operator(array: &BitPackedArray) -> VortexResult> { + if array.dtype.is_nullable() { + log::trace!("BitPackedVTable does not support nullable arrays"); + return Ok(None); + } + if array.patches.is_some() { + log::trace!("BitPackedVTable does not support nullable arrays"); + return Ok(None); + } + + Ok(Some(Arc::new(array.clone()))) + } +} + +impl Operator for BitPackedArray { + fn as_any(&self) -> &dyn Any { + self + } + + fn vtype(&self) -> VType { + VType::Primitive(self.ptype()) + } + + fn children(&self) -> &[OperatorRef] { + &[] + } + + fn with_children(&self, _children: Vec) -> OperatorRef { + Arc::new(self.clone()) + } + + fn bind(&self, _ctx: &dyn BindContext) -> VortexResult> { + assert!(self.bit_width > 0); + match_each_integer_ptype!(self.ptype(), |T| { + let packed_stride = + self.bit_width as usize * <::Physical as FastLanes>::LANES; + let buffer = Buffer::<::Physical>::from_byte_buffer( + self.packed.clone().into_byte_buffer(), + ); + if self.offset == 0 { + Ok(Box::new(BitPackedKernel::::new( + self.bit_width as usize, + packed_stride, + buffer, + 0, + )) as Box) + } else { + Ok(Box::new(BitPackedUnalignedKernel::::new( + self.bit_width as usize, + packed_stride, + buffer, + 0, + self.offset, + )) as Box) + } + }) + } +} + +impl Hash for BitPackedArray { + fn hash(&self, state: &mut H) { + self.packed.as_ptr().addr().hash(state); + self.bit_width.hash(state); + self.dtype.hash(state); + } +} + +#[cfg(test)] +mod tests { + use rand::prelude::StdRng; + use rand::{Rng, SeedableRng}; + use vortex_array::arrays::PrimitiveArray; + use vortex_array::compute::filter; + use vortex_array::pipeline::{N, export_canonical_pipeline_expr}; + use vortex_array::{IntoArray, ToCanonical}; + use vortex_buffer::{BitBuffer, BufferMut}; + use vortex_mask::Mask; + use vortex_scalar::Scalar; + + use crate::{FoRArray, bitpack_to_best_bit_width}; + + #[test] + fn test_bitpacking_pipeline() { + let frac = 0.5; + let len = 10; + let mut rng = StdRng::seed_from_u64(0); + let values = (0i16..len) + .map(|_| rng.random_range(0..100)) + .collect::>(); + + let primitive_array = values.into_array().to_primitive(); + let bitpacked = bitpack_to_best_bit_width(&primitive_array).unwrap(); + + let mask = (0..len) + .map(|_| rng.random_bool(frac)) + .collect::(); + let mask = Mask::from_buffer(mask); + + let result = export_canonical_pipeline_expr( + bitpacked.dtype(), + bitpacked.len(), + bitpacked.to_operator().unwrap().unwrap().as_ref(), + &mask, + ) + .unwrap() + .into_array(); + + let expect = filter(bitpacked.to_canonical().as_ref(), &mask).unwrap(); + + assert_eq!(result.len(), expect.len()); + + for i in 0..mask.true_count() { + assert_eq!( + result.scalar_at(i), + expect.scalar_at(i), + "mismatch at index {}", + i, + ); + } + } + + #[test] + fn test_bitpacking_offset_simple() { + // Test a simple case: 1024 + 10 elements, offset by 5 + let len = 1034usize; + let offset = 5usize; + + let values = (0..len).map(|i| i as i32).collect::>(); + let primitive_array = values.into_array().to_primitive(); + let bitpacked = bitpack_to_best_bit_width(&primitive_array).unwrap(); + + let sliced = bitpacked.slice(offset..offset + N); + + // Just test first few elements manually + let val0: i32 = sliced.scalar_at(0).try_into().unwrap(); + let val1: i32 = sliced.scalar_at(1).try_into().unwrap(); + let val1019: i32 = sliced.scalar_at(1019).try_into().unwrap(); + assert_eq!(val0, 5i32); + assert_eq!(val1, 6i32); + assert_eq!(val1019, 1024i32); // This should be from second chunk + } + + #[test] + fn test_bitpacking_offset_with_partial_last_chunk() { + // Test case: offset + partial last chunk + let len = 1030usize; // 1024 + 6 elements + let offset = 5usize; + + let values = (0..len).map(|i| i as i32).collect::>(); + let primitive_array = values.into_array().to_primitive(); + let bitpacked = bitpack_to_best_bit_width(&primitive_array).unwrap(); + + let sliced = bitpacked.slice(offset..offset + N); + + assert_eq!(i32::try_from(sliced.scalar_at(0)).unwrap(), 5i32); // First element + assert_eq!(i32::try_from(sliced.scalar_at(1019)).unwrap(), 1024i32); // Element at chunk boundary + assert_eq!(i32::try_from(sliced.scalar_at(1020)).unwrap(), 1025i32); // Element at chunk boundary + assert_eq!(i32::try_from(sliced.scalar_at(1023)).unwrap(), 1028i32); // Last element in partial chunk + } + + #[test] + fn test_bitpacking_parent_pipeline() { + let len = 10; + let prim = (0i32..len).map(|x| x % 32).collect::(); + let mask = (0..len).map(|i| i % 32 != 0).collect::(); + let bitpack = bitpack_to_best_bit_width(&prim).unwrap(); + let array = FoRArray::try_new(bitpack.to_array(), Scalar::from(100i32)).unwrap(); + + let res = export_canonical_pipeline_expr( + array.dtype(), + array.len(), + array.to_operator().unwrap().unwrap().as_ref(), + &mask, + ) + .unwrap() + .into_array(); + + let expect = filter(array.as_ref(), &mask).unwrap(); + + for i in 0..mask.true_count() { + assert_eq!(res.scalar_at(i), expect.scalar_at(i), "{i}",); + } + } + + #[test] + fn test_bitpacking_pipeline_sparse_selection() { + // Test with very sparse selection (< 8 elements selected) + let len = 2048usize; + + let values = (0..len) + .map(|i| (i as i32) * 3 + 17) + .collect::>(); + + let primitive_array = values.into_array().to_primitive(); + let bitpacked = bitpack_to_best_bit_width(&primitive_array).unwrap(); + + // Test with offset + let offset = 7; + let sliced = bitpacked.slice(offset..len); + let sliced_mask = Mask::from_buffer(BitBuffer::from( + (0..sliced.len()) + .map(|i| { + let orig_idx = i + offset; + orig_idx == 10 + || orig_idx == 500 + || orig_idx == 1024 + || orig_idx == 1500 + || orig_idx == 2047 + }) + .collect::>(), + )); + + let result = export_canonical_pipeline_expr( + sliced.dtype(), + sliced.len(), + sliced.to_operator().unwrap().unwrap().as_ref(), + &sliced_mask, + ) + .unwrap() + .into_array(); + + let expect = filter(sliced.to_canonical().as_ref(), &sliced_mask).unwrap(); + + assert_eq!(result.len(), 5, "Should have exactly 5 selected elements"); + + for i in 0..5 { + assert_eq!( + result.scalar_at(i), + expect.scalar_at(i), + "Sparse selection mismatch at index {}", + i + ); + } + } +} diff --git a/encodings/fastlanes/src/for/compute/compare.rs b/encodings/fastlanes/src/for/compute/compare.rs index e16c744fccc..3a9ed646715 100644 --- a/encodings/fastlanes/src/for/compute/compare.rs +++ b/encodings/fastlanes/src/for/compute/compare.rs @@ -80,11 +80,10 @@ where #[cfg(test)] mod tests { - use arrow_buffer::BooleanBuffer; use vortex_array::arrays::PrimitiveArray; use vortex_array::validity::Validity; use vortex_array::{IntoArray, ToCanonical}; - use vortex_buffer::buffer; + use vortex_buffer::{BitBuffer, buffer}; use vortex_dtype::DType; use super::*; @@ -201,6 +200,6 @@ mod tests { expected: T, ) { let result = result.unwrap().unwrap().to_bool(); - assert_eq!(result.boolean_buffer(), &BooleanBuffer::from_iter(expected)); + assert_eq!(result.bit_buffer(), &BitBuffer::from_iter(expected)); } } diff --git a/encodings/fastlanes/src/rle/compress.rs b/encodings/fastlanes/src/rle/compress.rs index 08142e56b1c..a148f0ff076 100644 --- a/encodings/fastlanes/src/rle/compress.rs +++ b/encodings/fastlanes/src/rle/compress.rs @@ -8,7 +8,7 @@ use vortex_array::arrays::PrimitiveArray; use vortex_array::validity::Validity; use vortex_array::vtable::ValidityHelper; use vortex_array::{IntoArray, ToCanonical}; -use vortex_buffer::BufferMut; +use vortex_buffer::{BitBufferMut, BufferMut}; use vortex_dtype::{NativePType, match_each_native_ptype, match_each_unsigned_integer_ptype}; use vortex_error::{VortexResult, vortex_panic}; @@ -128,14 +128,14 @@ fn padded_validity(array: &PrimitiveArray) -> Validity { return Validity::Array(validity_array.clone()); } - let mut builder = arrow_buffer::BooleanBufferBuilder::new(padded_len); + let mut builder = BitBufferMut::with_capacity(padded_len); let bool_array = validity_array.to_bool(); - let bool_buffer = bool_array.boolean_buffer(); - builder.append_buffer(&bool_buffer.slice(0, len)); - builder.append_n(padded_len - len, false); + let bool_buffer = bool_array.bit_buffer(); + builder.append_buffer(&bool_buffer.slice(0..len)); + builder.append_n(false, padded_len - len); - Validity::from(builder.finish()) + Validity::from(builder.freeze()) } } } diff --git a/encodings/fsst/src/compute/compare.rs b/encodings/fsst/src/compute/compare.rs index ca020d9b4ff..e5bd5f0bc53 100644 --- a/encodings/fsst/src/compute/compare.rs +++ b/encodings/fsst/src/compute/compare.rs @@ -1,13 +1,13 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -use vortex_array::arrays::{BoolArray, BooleanBuffer, ConstantArray}; +use vortex_array::arrays::{BoolArray, ConstantArray}; use vortex_array::compute::{ CompareKernel, CompareKernelAdapter, Operator, compare, compare_lengths_to_empty, }; use vortex_array::validity::Validity; use vortex_array::{Array, ArrayRef, IntoArray, ToCanonical, register_kernel}; -use vortex_buffer::ByteBuffer; +use vortex_buffer::{BitBuffer, ByteBuffer}; use vortex_dtype::{DType, match_each_integer_ptype}; use vortex_error::{VortexExpect, VortexResult, vortex_bail}; use vortex_scalar::Scalar; @@ -51,9 +51,9 @@ fn compare_fsst_constant( if is_rhs_empty { let buffer = match operator { // Every possible value is gte "" - Operator::Gte => BooleanBuffer::new_set(left.len()), + Operator::Gte => BitBuffer::new_set(left.len()), // No value is lt "" - Operator::Lt => BooleanBuffer::new_unset(left.len()), + Operator::Lt => BitBuffer::new_unset(left.len()), _ => { let uncompressed_lengths = left.uncompressed_lengths().to_primitive(); match_each_integer_ptype!(uncompressed_lengths.ptype(), |P| { @@ -66,7 +66,7 @@ fn compare_fsst_constant( }; return Ok(Some( - BoolArray::from_bool_buffer( + BoolArray::from_bit_buffer( buffer, Validity::copy_from_array(left.as_ref()) .union_nullability(right.dtype().nullability()), @@ -144,7 +144,7 @@ mod tests { assert_eq!(equals.dtype(), &DType::Bool(Nullability::Nullable)); assert_eq!( - equals.boolean_buffer().into_iter().collect::>(), + equals.bit_buffer().into_iter().collect::>(), vec![false, false, true, false, false] ); @@ -155,7 +155,7 @@ mod tests { assert_eq!(not_equals.dtype(), &DType::Bool(Nullability::Nullable)); assert_eq!( - not_equals.boolean_buffer().into_iter().collect::>(), + not_equals.bit_buffer().into_iter().collect::>(), vec![true, true, false, true, true] ); diff --git a/encodings/runend/src/array.rs b/encodings/runend/src/array.rs index 765b296a7c0..3da6b06bebd 100644 --- a/encodings/runend/src/array.rs +++ b/encodings/runend/src/array.rs @@ -328,7 +328,7 @@ impl ValidityVTable for RunEndVTable { ) .into_array() }; - Mask::from_buffer(ree_validity.to_bool().boolean_buffer().clone()) + Mask::from_buffer(ree_validity.to_bool().bit_buffer().clone()) } } } diff --git a/encodings/runend/src/compress.rs b/encodings/runend/src/compress.rs index 84c1de2683a..e00bf5620e6 100644 --- a/encodings/runend/src/compress.rs +++ b/encodings/runend/src/compress.rs @@ -1,13 +1,12 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -use arrow_buffer::BooleanBufferBuilder; use itertools::Itertools; -use vortex_array::arrays::{BoolArray, BooleanBuffer, ConstantArray, PrimitiveArray}; +use vortex_array::arrays::{BoolArray, ConstantArray, PrimitiveArray}; use vortex_array::validity::Validity; use vortex_array::vtable::ValidityHelper; use vortex_array::{ArrayRef, IntoArray, ToCanonical}; -use vortex_buffer::{Buffer, BufferMut, buffer}; +use vortex_buffer::{BitBuffer, BitBufferMut, Buffer, BufferMut, buffer}; use vortex_dtype::{ NativePType, Nullability, match_each_native_ptype, match_each_unsigned_integer_ptype, }; @@ -29,7 +28,7 @@ pub fn runend_encode(array: &PrimitiveArray) -> (PrimitiveArray, ArrayRef) { ConstantArray::new(Scalar::null(array.dtype().clone()), 1).into_array(), ); } - Validity::Array(a) => Some(a.to_bool().boolean_buffer().clone()), + Validity::Array(a) => Some(a.to_bool().bit_buffer().clone()), }; let (ends, values) = match validity { @@ -89,18 +88,18 @@ fn runend_encode_primitive(elements: &[T]) -> (Buffer, Buff fn runend_encode_nullable_primitive( elements: &[T], - element_validity: BooleanBuffer, + element_validity: BitBuffer, ) -> (Buffer, PrimitiveArray) { let mut ends = BufferMut::empty(); let mut values = BufferMut::empty(); - let mut validity = BooleanBufferBuilder::new(values.capacity()); + let mut validity = BitBufferMut::with_capacity(values.capacity()); if elements.is_empty() { return ( ends.freeze(), PrimitiveArray::new( values, - Validity::Array(BoolArray::from(validity.finish()).into_array()), + Validity::Array(BoolArray::from(validity.freeze()).into_array()), ), ); } @@ -145,7 +144,7 @@ fn runend_encode_nullable_primitive( ( ends.freeze(), - PrimitiveArray::new(values, Validity::from(validity.finish())), + PrimitiveArray::new(values, Validity::from(validity.freeze())), ) } @@ -177,7 +176,7 @@ pub fn runend_decode_bools( match_each_unsigned_integer_ptype!(ends.ptype(), |E| { runend_decode_typed_bool( trimmed_ends_iter(ends.as_slice::(), offset, length), - values.boolean_buffer().clone(), + values.bit_buffer(), values.validity_mask(), values.dtype().nullability(), length, @@ -206,84 +205,83 @@ pub fn runend_decode_typed_primitive( Mask::AllFalse(_) => PrimitiveArray::new(Buffer::::zeroed(length), Validity::AllInvalid), Mask::Values(mask) => { let mut decoded = BufferMut::with_capacity(length); - let mut decoded_validity = BooleanBufferBuilder::new(length); + let mut decoded_validity = BitBufferMut::with_capacity(length); for (end, value) in run_ends.zip_eq( values .iter() - .zip(mask.boolean_buffer().iter()) + .zip(mask.bit_buffer().iter()) .map(|(&v, is_valid)| is_valid.then_some(v)), ) { assert!(end <= length, "Runend end must be less than overall length"); match value { None => { - decoded_validity.append_n(end - decoded.len(), false); + decoded_validity.append_n(false, end - decoded.len()); // SAFETY: // We preallocate enough capacity because we know the total length unsafe { decoded.push_n_unchecked(T::default(), end - decoded.len()) }; } Some(value) => { - decoded_validity.append_n(end - decoded.len(), true); + decoded_validity.append_n(true, end - decoded.len()); // SAFETY: // We preallocate enough capacity because we know the total length unsafe { decoded.push_n_unchecked(value, end - decoded.len()) }; } } } - PrimitiveArray::new(decoded, Validity::from(decoded_validity.finish())) + PrimitiveArray::new(decoded, Validity::from(decoded_validity.freeze())) } } } pub fn runend_decode_typed_bool( run_ends: impl Iterator, - values: BooleanBuffer, + values: &BitBuffer, values_validity: Mask, values_nullability: Nullability, length: usize, ) -> BoolArray { match values_validity { Mask::AllTrue(_) => { - let mut decoded = BooleanBufferBuilder::new(length); + let mut decoded = BitBufferMut::with_capacity(length); for (end, value) in run_ends.zip_eq(values.iter()) { - decoded.append_n(end - decoded.len(), value); + decoded.append_n(value, end - decoded.len()); } - BoolArray::from_bool_buffer(decoded.finish(), values_nullability.into()) + BoolArray::from_bit_buffer(decoded.freeze(), values_nullability.into()) } Mask::AllFalse(_) => { - BoolArray::from_bool_buffer(BooleanBuffer::new_unset(length), Validity::AllInvalid) + BoolArray::from_bit_buffer(BitBuffer::new_unset(length), Validity::AllInvalid) } Mask::Values(mask) => { - let mut decoded = BooleanBufferBuilder::new(length); - let mut decoded_validity = BooleanBufferBuilder::new(length); + let mut decoded = BitBufferMut::with_capacity(length); + let mut decoded_validity = BitBufferMut::with_capacity(length); for (end, value) in run_ends.zip_eq( values .iter() - .zip(mask.boolean_buffer().iter()) + .zip(mask.bit_buffer().iter()) .map(|(v, is_valid)| is_valid.then_some(v)), ) { match value { None => { - decoded_validity.append_n(end - decoded.len(), false); - decoded.append_n(end - decoded.len(), false); + decoded_validity.append_n(false, end - decoded.len()); + decoded.append_n(false, end - decoded.len()); } Some(value) => { - decoded_validity.append_n(end - decoded.len(), true); - decoded.append_n(end - decoded.len(), value); + decoded_validity.append_n(true, end - decoded.len()); + decoded.append_n(value, end - decoded.len()); } } } - BoolArray::from_bool_buffer(decoded.finish(), Validity::from(decoded_validity.finish())) + BoolArray::from_bit_buffer(decoded.freeze(), Validity::from(decoded_validity.freeze())) } } } #[cfg(test)] mod test { - use arrow_buffer::BooleanBuffer; use vortex_array::ToCanonical; use vortex_array::arrays::PrimitiveArray; use vortex_array::validity::Validity; - use vortex_buffer::buffer; + use vortex_buffer::{BitBuffer, buffer}; use crate::compress::{runend_decode_primitive, runend_encode}; @@ -301,7 +299,7 @@ mod test { fn encode_nullable() { let arr = PrimitiveArray::new( buffer![1i32, 1, 2, 2, 2, 3, 3, 3, 3, 3], - Validity::from(BooleanBuffer::from(vec![ + Validity::from(BitBuffer::from(vec![ true, true, false, false, true, true, true, true, false, false, ])), ); @@ -316,7 +314,7 @@ mod test { fn encode_all_null() { let arr = PrimitiveArray::new( buffer![0, 0, 0, 0, 0], - Validity::from(BooleanBuffer::new_unset(5)), + Validity::from(BitBuffer::new_unset(5)), ); let (ends, values) = runend_encode(&arr); let values = values.to_primitive(); diff --git a/encodings/runend/src/compute/compare.rs b/encodings/runend/src/compute/compare.rs index cf988c59298..fd59473f159 100644 --- a/encodings/runend/src/compute/compare.rs +++ b/encodings/runend/src/compute/compare.rs @@ -44,15 +44,18 @@ register_kernel!(CompareKernelAdapter(RunEndVTable).lift()); #[cfg(test)] mod test { - use vortex_array::arrays::{BooleanBuffer, ConstantArray}; + use vortex_array::arrays::{ConstantArray, PrimitiveArray}; use vortex_array::compute::{Operator, compare}; use vortex_array::{IntoArray, ToCanonical}; - use vortex_buffer::buffer; + use vortex_buffer::BitBuffer; use crate::RunEndArray; fn ree_array() -> RunEndArray { - RunEndArray::encode(buffer![1, 1, 1, 4, 4, 4, 2, 2, 5, 5, 5, 5].into_array()).unwrap() + RunEndArray::encode( + PrimitiveArray::from_iter([1, 1, 1, 4, 4, 4, 2, 2, 5, 5, 5, 5]).into_array(), + ) + .unwrap() } #[test] @@ -66,8 +69,8 @@ mod test { .unwrap(); let res_canon = res.to_bool(); assert_eq!( - res_canon.boolean_buffer(), - &BooleanBuffer::from(vec![ + res_canon.bit_buffer(), + &BitBuffer::from(vec![ false, false, false, false, false, false, false, false, true, true, true, true ]) ); diff --git a/encodings/runend/src/compute/filter.rs b/encodings/runend/src/compute/filter.rs index 477700cb5e6..df16bf26fa6 100644 --- a/encodings/runend/src/compute/filter.rs +++ b/encodings/runend/src/compute/filter.rs @@ -2,15 +2,16 @@ // SPDX-FileCopyrightText: Copyright the Vortex contributors use std::cmp::min; +use std::ops::AddAssign; -use arrow_buffer::BooleanBuffer; +use num_traits::AsPrimitive; use vortex_array::arrays::PrimitiveArray; use vortex_array::compute::{FilterKernel, FilterKernelAdapter, filter}; use vortex_array::validity::Validity; -use vortex_array::{Array, ArrayRef, IntoArray, ToCanonical, register_kernel}; -use vortex_buffer::buffer_mut; -use vortex_dtype::{IntegerPType, match_each_unsigned_integer_ptype}; -use vortex_error::{VortexExpect, VortexResult, VortexUnwrap, vortex_panic}; +use vortex_array::{Array, ArrayRef, Canonical, IntoArray, ToCanonical, register_kernel}; +use vortex_buffer::{BitBuffer, buffer_mut}; +use vortex_dtype::{NativePType, match_each_unsigned_integer_ptype}; +use vortex_error::{VortexExpect, VortexResult, VortexUnwrap}; use vortex_mask::Mask; use crate::compute::take::take_indices_unchecked; @@ -20,39 +21,41 @@ const FILTER_TAKE_THRESHOLD: f64 = 0.1; impl FilterKernel for RunEndVTable { fn filter(&self, array: &RunEndArray, mask: &Mask) -> VortexResult { - let Mask::Values(mask_values) = mask else { - vortex_panic!("FilterKernel invariant was incorrect"); - }; - - let runs_ratio = mask_values.true_count() as f64 / array.ends().len() as f64; - - if runs_ratio < FILTER_TAKE_THRESHOLD || mask_values.true_count() < 25 { - // This strategy is directly proportional to the number of indices. - take_indices_unchecked(array, mask_values.indices(), &Validity::NonNullable) - } else { - // This strategy ends up being close to fixed cost based on the number of runs, rather - // than the number of indices. - let primitive_run_ends = array.ends().to_primitive(); - let (run_ends, values_mask) = - match_each_unsigned_integer_ptype!(primitive_run_ends.ptype(), |P| { - filter_run_end_primitive( - primitive_run_ends.as_slice::

(), - array.offset() as u64, - array.len() as u64, - mask_values.boolean_buffer(), - )? - }); - let values = filter(array.values(), &values_mask)?; - - // SAFETY: guaranteed by implementation of filter_run_end_primitive - unsafe { - Ok(RunEndArray::new_unchecked( - run_ends.into_array(), - values, - 0, - mask_values.true_count(), - ) - .into_array()) + match mask { + Mask::AllTrue(_) => Ok(array.to_array()), + Mask::AllFalse(_) => Ok(Canonical::empty(array.dtype()).into()), + Mask::Values(mask_values) => { + let runs_ratio = mask_values.true_count() as f64 / array.ends().len() as f64; + + if runs_ratio < FILTER_TAKE_THRESHOLD || mask_values.true_count() < 25 { + // This strategy is directly proportional to the number of indices. + take_indices_unchecked(array, mask_values.indices(), &Validity::NonNullable) + } else { + // This strategy ends up being close to fixed cost based on the number of runs, + // rather than the number of indices. + let primitive_run_ends = array.ends().to_primitive(); + let (run_ends, values_mask) = + match_each_unsigned_integer_ptype!(primitive_run_ends.ptype(), |P| { + filter_run_end_primitive( + primitive_run_ends.as_slice::

(), + array.offset() as u64, + array.len() as u64, + mask_values.bit_buffer(), + )? + }); + let values = filter(array.values(), &values_mask)?; + + // SAFETY: guaranteed by implementation of filter_run_end_primitive + unsafe { + Ok(RunEndArray::new_unchecked( + run_ends.into_array(), + values, + 0, + mask_values.true_count(), + ) + .into_array()) + } + } } } } @@ -71,7 +74,7 @@ pub fn filter_run_end(array: &RunEndArray, mask: &Mask) -> VortexResult VortexResult>( +fn filter_run_end_primitive + AsPrimitive>( run_ends: &[R], offset: u64, length: u64, - mask: &BooleanBuffer, + mask: &BitBuffer, ) -> VortexResult<(PrimitiveArray, Mask)> { let mut new_run_ends = buffer_mut![R::zero(); run_ends.len()]; @@ -98,9 +101,9 @@ fn filter_run_end_primitive>( let mut j = 0; let mut count = R::zero(); - let new_mask: Mask = BooleanBuffer::collect_bool(run_ends.len(), |i| { + let new_mask: Mask = BitBuffer::collect_bool(run_ends.len(), |i| { let mut keep = false; - let end = min(run_ends[i].as_() as u64 - offset, length); + let end = min(run_ends[i].as_() - offset, length); // Safety: predicate must be the same length as the array the ends have been taken from for pred in @@ -127,16 +130,18 @@ fn filter_run_end_primitive>( #[cfg(test)] mod tests { - + use vortex_array::arrays::PrimitiveArray; use vortex_array::{IntoArray, ToCanonical}; - use vortex_buffer::buffer; use vortex_mask::Mask; use super::filter_run_end; use crate::{RunEndArray, RunEndVTable}; fn ree_array() -> RunEndArray { - RunEndArray::encode(buffer![1, 1, 1, 4, 4, 4, 2, 2, 5, 5, 5, 5].into_array()).unwrap() + RunEndArray::encode( + PrimitiveArray::from_iter([1, 1, 1, 4, 4, 4, 2, 2, 5, 5, 5, 5]).into_array(), + ) + .unwrap() } #[test] diff --git a/encodings/sequence/src/compute/compare.rs b/encodings/sequence/src/compute/compare.rs index 9e5e0130d6e..70c4746fac6 100644 --- a/encodings/sequence/src/compute/compare.rs +++ b/encodings/sequence/src/compute/compare.rs @@ -1,10 +1,11 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -use vortex_array::arrays::{BoolArray, BooleanBuffer, ConstantArray}; +use vortex_array::arrays::{BoolArray, ConstantArray}; use vortex_array::compute::{CompareKernel, Operator}; use vortex_array::validity::Validity; use vortex_array::{Array, ArrayRef}; +use vortex_buffer::BitBuffer; use vortex_dtype::{DType, NativePType, Nullability, match_each_integer_ptype}; use vortex_error::{VortexExpect, VortexResult}; use vortex_scalar::{PValue, Scalar}; @@ -45,9 +46,9 @@ impl CompareKernel for SequenceVTable { }; if let Some(set_idx) = set_idx { - let buffer = BooleanBuffer::from_iter((0..lhs.len()).map(|idx| idx == set_idx)); + let buffer = BitBuffer::from_iter((0..lhs.len()).map(|idx| idx == set_idx)); Ok(Some( - BoolArray::from_bool_buffer(buffer, validity).to_array(), + BoolArray::from_bit_buffer(buffer, validity).to_array(), )) } else { Ok(Some( @@ -113,8 +114,8 @@ mod tests { let result = compare(lhs.as_ref(), rhs.as_ref(), Operator::Eq).unwrap(); assert_eq!( - result.to_bool().boolean_buffer(), - BoolArray::from_iter(vec![false, false, true, false]).boolean_buffer(), + result.to_bool().bit_buffer(), + BoolArray::from_iter(vec![false, false, true, false]).bit_buffer(), ) } @@ -127,8 +128,8 @@ mod tests { let result = compare(lhs.as_ref(), rhs.as_ref(), Operator::Eq).unwrap(); assert_eq!( - result.to_bool().boolean_buffer(), - BoolArray::from_iter(vec![false, false, true, false]).boolean_buffer(), + result.to_bool().bit_buffer(), + BoolArray::from_iter(vec![false, false, true, false]).bit_buffer(), ) } @@ -141,8 +142,8 @@ mod tests { let result = compare(lhs.as_ref(), rhs.as_ref(), Operator::Eq).unwrap(); assert_eq!( - result.to_bool().boolean_buffer(), - BoolArray::from_iter(vec![false, false, false, false]).boolean_buffer(), + result.to_bool().bit_buffer(), + BoolArray::from_iter(vec![false, false, false, false]).bit_buffer(), ) } } diff --git a/encodings/sequence/src/compute/filter.rs b/encodings/sequence/src/compute/filter.rs index 757ce093807..c93fca58142 100644 --- a/encodings/sequence/src/compute/filter.rs +++ b/encodings/sequence/src/compute/filter.rs @@ -26,10 +26,10 @@ impl FilterKernel for SequenceVTable { register_kernel!(FilterKernelAdapter(SequenceVTable).lift()); fn filter_impl(mul: T, base: T, mask: &Mask, validity: Validity) -> ArrayRef { - match mask.boolean_buffer() { + match mask.bit_buffer() { AllOr::All | AllOr::None => unreachable!("Handled by entrypoint function"), AllOr::Some(mask) => { - let mut buffer = BufferMut::::with_capacity(mask.count_set_bits()); + let mut buffer = BufferMut::::with_capacity(mask.true_count()); buffer.extend(mask.set_indices().map(|idx| { let i = T::from_usize(idx).vortex_expect("all valid indices fit"); base + i * mul diff --git a/encodings/sequence/src/compute/list_contains.rs b/encodings/sequence/src/compute/list_contains.rs index de6445be60b..727663854c5 100644 --- a/encodings/sequence/src/compute/list_contains.rs +++ b/encodings/sequence/src/compute/list_contains.rs @@ -81,8 +81,7 @@ mod tests { let res = list_contains(elements.as_ref(), array.as_ref()) .unwrap() .to_bool() - .bool_vec() - .unwrap(); + .bool_vec(); assert_eq!(res, vec![true, false, true]); } @@ -96,8 +95,7 @@ mod tests { let res = list_contains(elements.as_ref(), array.as_ref()) .unwrap() .to_bool() - .bool_vec() - .unwrap(); + .bool_vec(); assert_eq!(res, vec![true, true, false]); } diff --git a/encodings/sequence/src/compute/take.rs b/encodings/sequence/src/compute/take.rs index d8bb7413cd8..e53d9cea130 100644 --- a/encodings/sequence/src/compute/take.rs +++ b/encodings/sequence/src/compute/take.rs @@ -41,7 +41,7 @@ fn take( indices_mask: Mask, result_nullability: Nullability, ) -> ArrayRef { - match indices_mask.boolean_buffer() { + match indices_mask.bit_buffer() { AllOr::All => PrimitiveArray::new( Buffer::from_trusted_len_iter(indices.iter().map(|i| { let i = ::from::(*i).vortex_expect("all indices fit"); diff --git a/encodings/sparse/src/canonical.rs b/encodings/sparse/src/canonical.rs index d6865c66abd..de830635e4b 100644 --- a/encodings/sparse/src/canonical.rs +++ b/encodings/sparse/src/canonical.rs @@ -7,20 +7,20 @@ use itertools::Itertools; use num_traits::NumCast; use vortex_array::arrays::binary_view::BinaryView; use vortex_array::arrays::{ - BoolArray, BooleanBuffer, ConstantArray, FixedSizeListArray, ListArray, NullArray, - PrimitiveArray, StructArray, VarBinViewArray, smallest_decimal_value_type, + BoolArray, ConstantArray, FixedSizeListArray, ListArray, NullArray, PrimitiveArray, + StructArray, VarBinViewArray, smallest_decimal_value_type, }; use vortex_array::builders::{ArrayBuilder, DecimalBuilder, ListBuilder, builder_with_capacity}; use vortex_array::patches::Patches; use vortex_array::validity::Validity; use vortex_array::vtable::{CanonicalVTable, ValidityHelper}; -use vortex_array::{Array, ArrayRef, Canonical, IntoArray, ToCanonical}; -use vortex_buffer::{Buffer, BufferMut, BufferString, ByteBuffer, buffer, buffer_mut}; +use vortex_array::{Array, ArrayRef, Canonical, IntoArray as _, ToCanonical as _}; +use vortex_buffer::{BitBuffer, Buffer, BufferMut, BufferString, ByteBuffer, buffer, buffer_mut}; use vortex_dtype::{ DType, DecimalDType, IntegerPType, NativePType, Nullability, StructFields, match_each_integer_ptype, match_each_native_ptype, }; -use vortex_error::{VortexError, VortexExpect, vortex_panic}; +use vortex_error::{VortexError, VortexExpect as _, vortex_panic}; use vortex_scalar::{ DecimalScalar, ListScalar, NativeDecimalType, Scalar, StructScalar, match_each_decimal_value_type, @@ -390,11 +390,11 @@ fn canonicalize_sparse_bools(patches: &Patches, fill_value: &Scalar) -> Canonica ) }; - let bools = BoolArray::from_bool_buffer( + let bools = BoolArray::from_bit_buffer( if fill_bool { - BooleanBuffer::new_set(patches.array_len()) + BitBuffer::new_set(patches.array_len()) } else { - BooleanBuffer::new_unset(patches.array_len()) + BitBuffer::new_unset(patches.array_len()) }, validity, ); @@ -530,7 +530,7 @@ fn canonicalize_varbin( }) } -fn canonicalize_varbin_inner( +fn canonicalize_varbin_inner( fill_value: Option, indices: Buffer, values: VarBinViewArray, @@ -572,18 +572,16 @@ fn canonicalize_varbin_inner( #[cfg(test)] mod test { - use std::sync::Arc; - use rstest::rstest; use vortex_array::arrays::{ - BoolArray, BooleanBufferBuilder, DecimalArray, FixedSizeListArray, ListArray, - PrimitiveArray, StructArray, VarBinArray, VarBinViewArray, + BoolArray, DecimalArray, ListArray, PrimitiveArray, StructArray, VarBinArray, + VarBinViewArray, }; use vortex_array::arrow::IntoArrowArray as _; use vortex_array::validity::Validity; use vortex_array::vtable::ValidityHelper; use vortex_array::{IntoArray, ToCanonical}; - use vortex_buffer::{ByteBuffer, buffer, buffer_mut}; + use vortex_buffer::{BitBufferMut, ByteBuffer, buffer, buffer_mut}; use vortex_dtype::Nullability::{NonNullable, Nullable}; use vortex_dtype::{DType, DecimalDType, FieldNames, PType, StructFields}; use vortex_mask::Mask; @@ -621,18 +619,18 @@ mod test { fill_value, ); - assert_eq!(flat_bools.boolean_buffer(), expected.boolean_buffer()); + assert_eq!(flat_bools.bit_buffer(), expected.bit_buffer()); assert_eq!(flat_bools.validity(), expected.validity()); - assert!(flat_bools.boolean_buffer().value(0)); + assert!(flat_bools.bit_buffer().value(0)); assert!(flat_bools.validity().is_valid(0)); assert_eq!( - flat_bools.boolean_buffer().value(1), + flat_bools.bit_buffer().value(1), fill_value.unwrap_or_default() ); assert!(!flat_bools.validity().is_valid(1)); assert_eq!(flat_bools.validity().is_valid(2), fill_value.is_some()); - assert!(!flat_bools.boolean_buffer().value(7)); + assert!(!flat_bools.bit_buffer().value(7)); assert!(flat_bools.validity().is_valid(7)); } @@ -640,13 +638,13 @@ mod test { bools: Vec>, fill_value: Option, ) -> BoolArray { - let mut buffer = BooleanBufferBuilder::new(bools.len()); - let mut validity = BooleanBufferBuilder::new(bools.len()); + let mut buffer = BitBufferMut::with_capacity(bools.len()); + let mut validity = BitBufferMut::with_capacity(bools.len()); for maybe_bool in bools { buffer.append(maybe_bool.unwrap_or_else(|| fill_value.unwrap_or_default())); validity.append(maybe_bool.is_some()); } - BoolArray::from_bool_buffer(buffer.finish(), Validity::from(validity.finish())) + BoolArray::from_bit_buffer(buffer.freeze(), Validity::from(validity.freeze())) } #[rstest] @@ -1259,232 +1257,6 @@ mod test { assert_eq!(&actual, &expected); } - #[test] - fn test_sparse_fixed_size_list_null_fill() { - // Create a FixedSizeListArray with 3 lists of size 3. - let elements = buffer![1i32, 2, 3, 4, 5, 6, 7, 8, 9].into_array(); - let fsl = FixedSizeListArray::try_new(elements, 3, Validity::AllValid, 3) - .unwrap() - .into_array(); - - let indices = buffer![0u8, 2u8, 3u8].into_array(); - let fill_value = Scalar::null(DType::FixedSizeList( - Arc::new(DType::Primitive(PType::I32, NonNullable)), - 3, - Nullable, - )); - let sparse = SparseArray::try_new(indices, fsl, 5, fill_value) - .unwrap() - .into_array(); - - let actual = sparse.to_canonical().into_array(); - - // Expected: [1,2,3], null, [4,5,6], [7,8,9], null. - let expected_elements = - buffer![1i32, 2, 3, 0, 0, 0, 4, 5, 6, 7, 8, 9, 0, 0, 0].into_array(); - let expected = FixedSizeListArray::try_new( - expected_elements, - 3, - Validity::Array(BoolArray::from_iter([true, false, true, true, false]).into_array()), - 5, - ) - .unwrap() - .into_array(); - - let actual = actual.into_arrow_preferred().unwrap(); - let expected = expected.into_arrow_preferred().unwrap(); - - assert_eq!(actual.data_type(), expected.data_type()); - assert_eq!(&actual, &expected); - } - - #[test] - fn test_sparse_fixed_size_list_non_null_fill() { - let elements = buffer![1i32, 2, 3, 4, 5, 6].into_array(); - let fsl = FixedSizeListArray::try_new(elements, 2, Validity::AllValid, 3) - .unwrap() - .into_array(); - - let indices = buffer![0u8, 2u8, 4u8].into_array(); - let fill_value = Scalar::fixed_size_list( - Arc::new(DType::Primitive(PType::I32, NonNullable)), - vec![ - Scalar::primitive(99i32, NonNullable), - Scalar::primitive(88i32, NonNullable), - ], - NonNullable, - ); - let sparse = SparseArray::try_new(indices, fsl, 6, fill_value) - .unwrap() - .into_array(); - - let actual = sparse.to_canonical().into_array(); - - // Expected: [1,2], [99,88], [3,4], [99,88], [5,6], [99,88]. - let expected_elements = buffer![1i32, 2, 99, 88, 3, 4, 99, 88, 5, 6, 99, 88].into_array(); - let expected = FixedSizeListArray::try_new(expected_elements, 2, Validity::NonNullable, 6) - .unwrap() - .into_array(); - - let actual = actual.into_arrow_preferred().unwrap(); - let expected = expected.into_arrow_preferred().unwrap(); - - assert_eq!(actual.data_type(), expected.data_type()); - assert_eq!(&actual, &expected); - } - - #[test] - fn test_sparse_fixed_size_list_with_validity() { - // Create FSL values with some nulls. - let elements = buffer![10i32, 20, 30, 40, 50, 60].into_array(); - let fsl = FixedSizeListArray::try_new( - elements, - 2, - Validity::Array(BoolArray::from_iter([true, false, true]).into_array()), - 3, - ) - .unwrap() - .into_array(); - - let indices = buffer![1u16, 3u16, 4u16].into_array(); - let fill_value = Scalar::fixed_size_list( - Arc::new(DType::Primitive(PType::I32, NonNullable)), - vec![ - Scalar::primitive(7i32, NonNullable), - Scalar::primitive(8i32, NonNullable), - ], - Nullable, - ); - let sparse = SparseArray::try_new(indices, fsl, 6, fill_value) - .unwrap() - .into_array(); - - let actual = sparse.to_canonical().into_array(); - - // Expected validity: [true, true, true, false, true, true]. - // Expected elements: [7,8], [10,20], [7,8], [30,40], [50,60], [7,8]. - let expected_elements = buffer![7i32, 8, 10, 20, 7, 8, 30, 40, 50, 60, 7, 8].into_array(); - let expected = FixedSizeListArray::try_new( - expected_elements, - 2, - Validity::Array( - BoolArray::from_iter([true, true, true, false, true, true]).into_array(), - ), - 6, - ) - .unwrap() - .into_array(); - - let actual = actual.into_arrow_preferred().unwrap(); - let expected = expected.into_arrow_preferred().unwrap(); - - assert_eq!(actual.data_type(), expected.data_type()); - assert_eq!(&actual, &expected); - } - - #[test] - fn test_sparse_fixed_size_list_truly_sparse() { - // Test with a truly sparse array where most values are the fill value. - // This demonstrates the compression benefit of sparse encoding. - - // Create patch values: only 3 distinct lists out of 100 total positions. - let elements = buffer![10i32, 11, 20, 21, 30, 31].into_array(); - let fsl = FixedSizeListArray::try_new(elements, 2, Validity::AllValid, 3) - .unwrap() - .into_array(); - - // Patches at positions 5, 50, and 95 out of 100. - let indices = buffer![5u32, 50, 95].into_array(); - - // Fill value [99, 99] will appear 97 times but stored only once. - let fill_value = Scalar::fixed_size_list( - Arc::new(DType::Primitive(PType::I32, NonNullable)), - vec![ - Scalar::primitive(99i32, NonNullable), - Scalar::primitive(99i32, NonNullable), - ], - NonNullable, - ); - - let sparse = SparseArray::try_new(indices, fsl, 100, fill_value) - .unwrap() - .into_array(); - - let actual = sparse.to_canonical().into_array(); - - // Build expected: 97 copies of [99,99] with patches at positions 5, 50, 95. - let mut expected_elements_vec = Vec::with_capacity(200); - // Positions 0-4: fill values - for _ in 0..5 { - expected_elements_vec.extend([99i32, 99]); - } - // Position 5: first patch [10, 11] - expected_elements_vec.extend([10, 11]); - // Positions 6-49: fill values - for _ in 6..50 { - expected_elements_vec.extend([99, 99]); - } - // Position 50: second patch [20, 21] - expected_elements_vec.extend([20, 21]); - // Positions 51-94: fill values - for _ in 51..95 { - expected_elements_vec.extend([99, 99]); - } - // Position 95: third patch [30, 31] - expected_elements_vec.extend([30, 31]); - // Positions 96-99: fill values - for _ in 96..100 { - expected_elements_vec.extend([99, 99]); - } - let expected_elements = PrimitiveArray::from_iter(expected_elements_vec).into_array(); - let expected = - FixedSizeListArray::try_new(expected_elements, 2, Validity::NonNullable, 100) - .unwrap() - .into_array(); - - let actual = actual.into_arrow_preferred().unwrap(); - let expected = expected.into_arrow_preferred().unwrap(); - - assert_eq!(actual.data_type(), expected.data_type()); - assert_eq!(&actual, &expected); - } - - #[test] - fn test_sparse_fixed_size_list_single_element() { - // Test with a single element FSL array. - let elements = buffer![42i32, 43].into_array(); - let fsl = FixedSizeListArray::try_new(elements, 2, Validity::AllValid, 1) - .unwrap() - .into_array(); - - let indices = buffer![0u32].into_array(); - let fill_value = Scalar::fixed_size_list( - Arc::new(DType::Primitive(PType::I32, NonNullable)), - vec![ - Scalar::primitive(1i32, NonNullable), - Scalar::primitive(2i32, NonNullable), - ], - NonNullable, - ); - let sparse = SparseArray::try_new(indices, fsl, 1, fill_value) - .unwrap() - .into_array(); - - let actual = sparse.to_canonical().into_array(); - - // Expected: just [42, 43]. - let expected_elements = buffer![42i32, 43].into_array(); - let expected = FixedSizeListArray::try_new(expected_elements, 2, Validity::NonNullable, 1) - .unwrap() - .into_array(); - - let actual = actual.into_arrow_preferred().unwrap(); - let expected = expected.into_arrow_preferred().unwrap(); - - assert_eq!(actual.data_type(), expected.data_type()); - assert_eq!(&actual, &expected); - } - #[test] fn test_sparse_list_grows_offset_type() { let elements = buffer![1i32, 2, 1, 2].into_array(); diff --git a/encodings/sparse/src/lib.rs b/encodings/sparse/src/lib.rs index 35063cb7cfa..1367d938073 100644 --- a/encodings/sparse/src/lib.rs +++ b/encodings/sparse/src/lib.rs @@ -4,15 +4,15 @@ use std::fmt::Debug; use itertools::Itertools as _; -use num_traits::NumCast; -use vortex_array::arrays::{BooleanBufferBuilder, ConstantArray}; +use num_traits::AsPrimitive; +use vortex_array::arrays::ConstantArray; use vortex_array::compute::{Operator, compare, fill_null, filter, sub_scalar}; use vortex_array::patches::Patches; use vortex_array::stats::{ArrayStats, StatsSetRef}; use vortex_array::vtable::{ArrayVTable, NotSupported, VTable, ValidityVTable}; use vortex_array::{Array, ArrayRef, EncodingId, EncodingRef, IntoArray, ToCanonical, vtable}; -use vortex_buffer::Buffer; -use vortex_dtype::{DType, IntegerPType, Nullability, match_each_integer_ptype}; +use vortex_buffer::{BitBufferMut, Buffer}; +use vortex_dtype::{DType, NativePType, Nullability, match_each_integer_ptype}; use vortex_error::{VortexExpect as _, VortexResult, vortex_bail, vortex_ensure}; use vortex_mask::{AllOr, Mask}; use vortex_scalar::Scalar; @@ -218,7 +218,7 @@ impl SparseArray { &Scalar::bool(true, Nullability::NonNullable), )? .to_bool() - .boolean_buffer() + .bit_buffer() .clone(), ); @@ -296,9 +296,11 @@ impl ValidityVTable for SparseVTable { return Mask::AllFalse(len); } - // TODO(ngates): use vortex-buffer::BitBufferMut when it exists. - let mut is_valid_buffer = BooleanBufferBuilder::new(len); - is_valid_buffer.append_n(len, fill_is_valid); + let mut is_valid_buffer = if fill_is_valid { + BitBufferMut::new_set(len) + } else { + BitBufferMut::new_unset(len) + }; let indices = array.patches().indices().to_primitive(); let index_offset = array.patches().offset(); @@ -308,35 +310,32 @@ impl ValidityVTable for SparseVTable { patch_validity(&mut is_valid_buffer, indices, index_offset, values_validity); }); - Mask::from_buffer(is_valid_buffer.finish()) + Mask::from_buffer(is_valid_buffer.freeze()) } } -fn patch_validity( - is_valid_buffer: &mut BooleanBufferBuilder, +fn patch_validity>( + is_valid_buffer: &mut BitBufferMut, indices: &[I], index_offset: usize, values_validity: Mask, ) { - let indices = indices.iter().map(|index| { - let index = ::from(*index).vortex_expect("Failed to cast to usize"); - index - index_offset - }); + let indices = indices.iter().map(|index| index.as_() - index_offset); match values_validity { Mask::AllTrue(_) => { for index in indices { - is_valid_buffer.set_bit(index, true); + is_valid_buffer.set(index); } } Mask::AllFalse(_) => { for index in indices { - is_valid_buffer.set_bit(index, false); + is_valid_buffer.unset(index); } } Mask::Values(mask_values) => { - let is_valid = mask_values.boolean_buffer().iter(); + let is_valid = mask_values.bit_buffer().iter(); for (index, is_valid) in indices.zip_eq(is_valid) { - is_valid_buffer.set_bit(index, is_valid); + is_valid_buffer.set_to(index, is_valid); } } } @@ -459,11 +458,7 @@ mod test { pub fn sparse_validity_mask() { let array = sparse_array(nullable_fill()); assert_eq!( - array - .validity_mask() - .to_boolean_buffer() - .iter() - .collect_vec(), + array.validity_mask().to_bit_buffer().iter().collect_vec(), [ false, false, true, false, false, true, false, false, true, false ] diff --git a/encodings/zigzag/src/compute/mod.rs b/encodings/zigzag/src/compute/mod.rs index f4ad7978f0c..ac84db59810 100644 --- a/encodings/zigzag/src/compute/mod.rs +++ b/encodings/zigzag/src/compute/mod.rs @@ -63,13 +63,13 @@ impl ZigZagEncoded for u64 { #[cfg(test)] mod tests { use rstest::rstest; - use vortex_array::arrays::{BooleanBuffer, PrimitiveArray}; + use vortex_array::arrays::PrimitiveArray; use vortex_array::compute::conformance::binary_numeric::test_binary_numeric_array; use vortex_array::compute::conformance::consistency::test_array_consistency; use vortex_array::compute::{filter, take}; use vortex_array::validity::Validity; use vortex_array::{Array, ArrayRef, IntoArray, ToCanonical}; - use vortex_buffer::buffer; + use vortex_buffer::{BitBuffer, buffer}; use vortex_dtype::Nullability; use vortex_scalar::Scalar; @@ -113,7 +113,7 @@ mod tests { .encode(&buffer![-189, -160, 1].into_array().to_canonical(), None) .unwrap() .unwrap(); - let filter_mask = BooleanBuffer::from(vec![true, false, true]).into(); + let filter_mask = BitBuffer::from(vec![true, false, true]).into(); let actual = filter(&zigzag, &filter_mask).unwrap().to_primitive(); let expected = ZigZagEncoding .encode(&buffer![-189, 1].into_array().to_canonical(), None) diff --git a/encodings/zstd/Cargo.toml b/encodings/zstd/Cargo.toml index 8e7e9cc2c8d..a0fa0a4cc1a 100644 --- a/encodings/zstd/Cargo.toml +++ b/encodings/zstd/Cargo.toml @@ -31,6 +31,3 @@ zstd = { workspace = true } [dev-dependencies] rstest = { workspace = true } vortex-array = { workspace = true, features = ["test-harness"] } - -[dev-dependencies.vortex-dict] -workspace = true diff --git a/encodings/zstd/src/array.rs b/encodings/zstd/src/array.rs index 29c35a6eba5..a9666e8160f 100644 --- a/encodings/zstd/src/array.rs +++ b/encodings/zstd/src/array.rs @@ -109,7 +109,7 @@ fn collect_valid_primitive(parray: &PrimitiveArray) -> VortexResult VortexResult<(ByteBuffer, Vec)> { let mask = vbv.validity_mask(); - let buffer_and_value_byte_indices = match mask.boolean_buffer() { + let buffer_and_value_byte_indices = match mask.bit_buffer() { AllOr::None => (Buffer::empty(), Vec::new()), _ => { let mut buffer = BufferMut::with_capacity( diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml index 1f7417984b7..20bd76ebb4d 100644 --- a/fuzz/Cargo.toml +++ b/fuzz/Cargo.toml @@ -18,12 +18,10 @@ version = { workspace = true } cargo-fuzz = true [dependencies] -arrow-buffer = { workspace = true } arrow-ord = { workspace = true } itertools = { workspace = true } libfuzzer-sys = { workspace = true } strum = { workspace = true, features = ["derive"] } -tokio = { workspace = true, features = ["full"] } vortex-array = { workspace = true, features = ["arbitrary"] } vortex-btrblocks = { workspace = true } vortex-buffer = { workspace = true } diff --git a/fuzz/fuzz_targets/file_io.rs b/fuzz/fuzz_targets/file_io.rs index 8fce37d432d..03b217b741e 100644 --- a/fuzz/fuzz_targets/file_io.rs +++ b/fuzz/fuzz_targets/file_io.rs @@ -4,7 +4,6 @@ #![no_main] #![allow(clippy::result_large_err)] -use arrow_buffer::BooleanBuffer; use arrow_ord::ord::make_comparator; use arrow_ord::sort::SortOptions; use itertools::Itertools; @@ -13,7 +12,7 @@ use vortex_array::arrays::ChunkedArray; use vortex_array::arrow::IntoArrowArray; use vortex_array::compute::{Operator, compare, filter}; use vortex_array::{Array, ArrayRef, Canonical, IntoArray, ToCanonical}; -use vortex_buffer::ByteBufferMut; +use vortex_buffer::{BitBuffer, ByteBufferMut}; use vortex_dtype::{DType, StructFields}; use vortex_error::{VortexExpect, VortexUnwrap, vortex_panic}; use vortex_expr::{Scope, lit, root}; @@ -96,7 +95,7 @@ fuzz_target!(|fuzz: FuzzFileAction| -> Corpus { let bool_result = compare(&expected_array, &output_array, Operator::Eq) .vortex_unwrap() .to_bool(); - let true_count = bool_result.boolean_buffer().count_set_bits(); + let true_count = bool_result.bit_buffer().true_count(); if true_count != expected_array.len() && (bool_result.all_valid() || expected_array.all_valid()) { @@ -119,10 +118,10 @@ fn compare_struct(expected: ArrayRef, actual: ArrayRef) { make_comparator(&arrow_expected, &arrow_actual, SortOptions::default()).vortex_unwrap(); let comparison_result = - BooleanBuffer::collect_bool(arrow_expected.len(), |idx| cmp_fn(idx, idx).is_eq()); + BitBuffer::collect_bool(arrow_expected.len(), |idx| cmp_fn(idx, idx).is_eq()); assert_eq!( - comparison_result.count_set_bits(), + comparison_result.true_count(), arrow_expected.len(), "\nEXPECTED: {}ACTUAL: {}", expected.display_tree(), diff --git a/fuzz/src/array/compare.rs b/fuzz/src/array/compare.rs index ad5b84e6e8d..04435a48e53 100644 --- a/fuzz/src/array/compare.rs +++ b/fuzz/src/array/compare.rs @@ -4,12 +4,12 @@ use std::fmt::Debug; use std::ops::Deref; -use arrow_buffer::BooleanBuffer; use vortex_array::accessor::ArrayAccessor; use vortex_array::arrays::BoolArray; use vortex_array::compute::{Operator, scalar_cmp}; use vortex_array::validity::Validity; use vortex_array::{Array, ArrayRef, IntoArray, ToCanonical}; +use vortex_buffer::BitBuffer; use vortex_dtype::{DType, NativePType, match_each_native_ptype}; use vortex_error::{VortexExpect, VortexResult, vortex_err}; use vortex_scalar::{NativeDecimalType, Scalar, match_each_decimal_value_type}; @@ -20,8 +20,8 @@ pub fn compare_canonical_array( operator: Operator, ) -> VortexResult { if value.is_null() { - return Ok(BoolArray::from_bool_buffer( - BooleanBuffer::new_unset(array.len()), + return Ok(BoolArray::from_bit_buffer( + BitBuffer::new_unset(array.len()), Validity::AllInvalid, ) .into_array()); @@ -36,9 +36,9 @@ pub fn compare_canonical_array( Ok(compare_to( array .to_bool() - .boolean_buffer() + .bit_buffer() .iter() - .zip(array.validity_mask().to_boolean_buffer().iter()) + .zip(array.validity_mask().to_bit_buffer().iter()) .map(|(b, v)| v.then_some(b)), bool, operator, @@ -56,7 +56,7 @@ pub fn compare_canonical_array( .as_slice::

() .iter() .copied() - .zip(array.validity_mask().to_boolean_buffer().iter()) + .zip(array.validity_mask().to_bit_buffer().iter()) .map(|(b, v)| v.then_some(b)), pval, operator, @@ -77,7 +77,7 @@ pub fn compare_canonical_array( buf.as_slice() .iter() .copied() - .zip(array.validity_mask().to_boolean_buffer().iter()) + .zip(array.validity_mask().to_bit_buffer().iter()) .map(|(b, v)| v.then_some(b)), dval, operator, diff --git a/fuzz/src/array/filter.rs b/fuzz/src/array/filter.rs index 3225898d19a..7c46ea1c662 100644 --- a/fuzz/src/array/filter.rs +++ b/fuzz/src/array/filter.rs @@ -2,9 +2,7 @@ // SPDX-FileCopyrightText: Copyright the Vortex contributors use vortex_array::accessor::ArrayAccessor; -use vortex_array::arrays::{ - BoolArray, BooleanBuffer, DecimalArray, PrimitiveArray, StructArray, VarBinViewArray, -}; +use vortex_array::arrays::{BoolArray, DecimalArray, PrimitiveArray, StructArray, VarBinViewArray}; use vortex_array::validity::Validity; use vortex_array::{Array, ArrayRef, IntoArray, ToCanonical}; use vortex_buffer::Buffer; @@ -16,7 +14,7 @@ use crate::array::take_canonical_array_non_nullable_indices; pub fn filter_canonical_array(array: &dyn Array, filter: &[bool]) -> VortexResult { let validity = if array.dtype().is_nullable() { - let validity_buff = array.validity_mask().to_boolean_buffer(); + let validity_buff = array.validity_mask().to_bit_buffer(); Validity::from_iter( filter .iter() @@ -31,14 +29,13 @@ pub fn filter_canonical_array(array: &dyn Array, filter: &[bool]) -> VortexResul match array.dtype() { DType::Bool(_) => { let bool_array = array.to_bool(); - Ok(BoolArray::from_bool_buffer( - BooleanBuffer::from_iter( - filter - .iter() - .zip(bool_array.boolean_buffer().iter()) - .filter(|(f, _)| **f) - .map(|(_, v)| v), - ), + Ok(BoolArray::from_bit_buffer( + filter + .iter() + .zip(bool_array.bit_buffer().iter()) + .filter(|(f, _)| **f) + .map(|(_, v)| v) + .collect(), validity, ) .into_array()) diff --git a/fuzz/src/array/search_sorted.rs b/fuzz/src/array/search_sorted.rs index eaf404649a5..ca334927697 100644 --- a/fuzz/src/array/search_sorted.rs +++ b/fuzz/src/array/search_sorted.rs @@ -55,9 +55,9 @@ pub fn search_sorted_canonical_array( match array.dtype() { DType::Bool(_) => { let bool_array = array.to_bool(); - let validity = bool_array.validity_mask().to_boolean_buffer(); + let validity = bool_array.validity_mask().to_bit_buffer(); let opt_values = bool_array - .boolean_buffer() + .bit_buffer() .iter() .zip(validity.iter()) .map(|(b, v)| v.then_some(b)) @@ -67,7 +67,7 @@ pub fn search_sorted_canonical_array( } DType::Primitive(p, _) => { let primitive_array = array.to_primitive(); - let validity = primitive_array.validity_mask().to_boolean_buffer(); + let validity = primitive_array.validity_mask().to_bit_buffer(); match_each_native_ptype!(p, |P| { let opt_values = primitive_array .as_slice::

() @@ -82,7 +82,7 @@ pub fn search_sorted_canonical_array( } DType::Decimal(d, _) => { let decimal_array = array.to_decimal(); - let validity = decimal_array.validity_mask().to_boolean_buffer(); + let validity = decimal_array.validity_mask().to_bit_buffer(); match_each_decimal_value_type!(decimal_array.values_type(), |D| { let buf = decimal_array.buffer::(); let opt_values = buf diff --git a/fuzz/src/array/slice.rs b/fuzz/src/array/slice.rs index ee2860a2f09..8575560af51 100644 --- a/fuzz/src/array/slice.rs +++ b/fuzz/src/array/slice.rs @@ -21,8 +21,8 @@ pub fn slice_canonical_array( stop: usize, ) -> VortexResult { let validity = if array.dtype().is_nullable() { - let bool_buff = array.validity_mask().to_boolean_buffer(); - Validity::from(bool_buff.slice(start, stop - start)) + let bool_buff = array.validity_mask().to_bit_buffer(); + Validity::from(bool_buff.slice(start..stop)) } else { Validity::NonNullable }; @@ -30,8 +30,8 @@ pub fn slice_canonical_array( match array.dtype() { DType::Bool(_) => { let bool_array = array.to_bool(); - let sliced_bools = bool_array.boolean_buffer().slice(start, stop - start); - Ok(BoolArray::from_bool_buffer(sliced_bools, validity).into_array()) + let sliced_bools = bool_array.bit_buffer().slice(start..stop); + Ok(BoolArray::from_bit_buffer(sliced_bools, validity).into_array()) } DType::Primitive(p, _) => { let primitive_array = array.to_primitive(); diff --git a/fuzz/src/array/sort.rs b/fuzz/src/array/sort.rs index 2c7a1a70fbd..38c130d411d 100644 --- a/fuzz/src/array/sort.rs +++ b/fuzz/src/array/sort.rs @@ -17,9 +17,9 @@ pub fn sort_canonical_array(array: &dyn Array) -> VortexResult { DType::Bool(_) => { let bool_array = array.to_bool(); let mut opt_values = bool_array - .boolean_buffer() + .bit_buffer() .iter() - .zip(bool_array.validity_mask().to_boolean_buffer().iter()) + .zip(bool_array.validity_mask().to_bit_buffer().iter()) .map(|(b, v)| v.then_some(b)) .collect::>(); opt_values.sort(); @@ -32,7 +32,7 @@ pub fn sort_canonical_array(array: &dyn Array) -> VortexResult { .as_slice::

() .iter() .copied() - .zip(primitive_array.validity_mask().to_boolean_buffer().iter()) + .zip(primitive_array.validity_mask().to_bit_buffer().iter()) .map(|(p, v)| v.then_some(p)) .collect::>(); sort_primitive_slice(&mut opt_values); @@ -47,7 +47,7 @@ pub fn sort_canonical_array(array: &dyn Array) -> VortexResult { .as_slice() .iter() .copied() - .zip(decimal_array.validity_mask().to_boolean_buffer().iter()) + .zip(decimal_array.validity_mask().to_bit_buffer().iter()) .map(|(p, v)| v.then_some(p)) .collect::>(); opt_values.sort(); diff --git a/fuzz/src/array/take.rs b/fuzz/src/array/take.rs index 9a29c82e257..9251175155c 100644 --- a/fuzz/src/array/take.rs +++ b/fuzz/src/array/take.rs @@ -36,7 +36,7 @@ pub fn take_canonical_array( }; let validity = if array.dtype().is_nullable() || nullable == Nullability::Nullable { - let validity_idx = array.validity_mask().to_boolean_buffer(); + let validity_idx = array.validity_mask().to_bit_buffer(); Validity::from_iter( indices @@ -53,8 +53,8 @@ pub fn take_canonical_array( match array.dtype() { DType::Bool(_) => { let bool_array = array.to_bool(); - let vec_values = bool_array.boolean_buffer().iter().collect::>(); - Ok(BoolArray::from_bool_buffer( + let vec_values = bool_array.bit_buffer().iter().collect::>(); + Ok(BoolArray::from_bit_buffer( indices_slice_non_opt .iter() .map(|i| vec_values[*i]) diff --git a/java/testfiles/Cargo.lock b/java/testfiles/Cargo.lock index 46a321d8e73..d12a9c534bc 100644 --- a/java/testfiles/Cargo.lock +++ b/java/testfiles/Cargo.lock @@ -2350,7 +2350,6 @@ dependencies = [ name = "vortex-btrblocks" version = "0.1.0" dependencies = [ - "arrow-buffer", "getrandom 0.3.3", "itertools", "log", @@ -2381,6 +2380,7 @@ name = "vortex-buffer" version = "0.1.0" dependencies = [ "arrow-buffer", + "bitvec", "bytes", "itertools", "num-traits", @@ -2392,7 +2392,6 @@ dependencies = [ name = "vortex-bytebool" version = "0.1.0" dependencies = [ - "arrow-buffer", "num-traits", "vortex-array", "vortex-buffer", @@ -2674,8 +2673,8 @@ dependencies = [ name = "vortex-mask" version = "0.1.0" dependencies = [ - "arrow-buffer", "itertools", + "vortex-buffer", "vortex-error", ] diff --git a/vortex-array/src/arrays/arbitrary.rs b/vortex-array/src/arrays/arbitrary.rs index 66fe02a782c..3f25636e6bf 100644 --- a/vortex-array/src/arrays/arbitrary.rs +++ b/vortex-array/src/arrays/arbitrary.rs @@ -5,16 +5,17 @@ use std::iter; use std::sync::Arc; use arbitrary::{Arbitrary, Result, Unstructured}; -use arrow_buffer::BooleanBuffer; use builders::ListBuilder; -use vortex_buffer::Buffer; +use vortex_buffer::{BitBuffer, Buffer}; use vortex_dtype::{DType, IntegerPType, NativePType, Nullability, PType}; use vortex_error::{VortexExpect, VortexUnwrap}; use vortex_scalar::arbitrary::random_scalar; use vortex_scalar::{Scalar, match_each_decimal_value_type}; -use super::{BoolArray, ChunkedArray, NullArray, PrimitiveArray, StructArray}; -use crate::arrays::{VarBinArray, VarBinViewArray, smallest_decimal_value_type}; +use super::{ + BoolArray, ChunkedArray, NullArray, PrimitiveArray, StructArray, smallest_decimal_value_type, +}; +use crate::arrays::{VarBinArray, VarBinViewArray}; use crate::builders::{ArrayBuilder, DecimalBuilder, FixedSizeListBuilder}; use crate::validity::Validity; use crate::{Array, ArrayRef, IntoArray, ToCanonical, builders}; @@ -306,7 +307,7 @@ fn random_bool( ) -> Result { let v = arbitrary_vec_of_len(u, len)?; let validity = random_validity(u, nullability, v.len())?; - Ok(BoolArray::from_bool_buffer(BooleanBuffer::from(v), validity).into_array()) + Ok(BoolArray::from_bit_buffer(BitBuffer::from(v), validity).into_array()) } fn random_validity(u: &mut Unstructured, nullability: Nullability, len: usize) -> Result { diff --git a/vortex-array/src/arrays/bool/array.rs b/vortex-array/src/arrays/bool/array.rs index c7f0e23fbde..75b0a51311e 100644 --- a/vortex-array/src/arrays/bool/array.rs +++ b/vortex-array/src/arrays/bool/array.rs @@ -1,39 +1,15 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -use std::ops::BitAnd; - use arrow_array::BooleanArray; -use arrow_buffer::{BooleanBuffer, BooleanBufferBuilder, MutableBuffer}; -use itertools::Itertools; -use vortex_buffer::ByteBuffer; -use vortex_dtype::{DType, match_each_integer_ptype}; +use vortex_buffer::{BitBuffer, BitBufferMut, ByteBuffer}; +use vortex_dtype::DType; use vortex_error::{VortexExpect, VortexResult, vortex_ensure}; use vortex_mask::Mask; -use crate::ToCanonical; use crate::arrays::bool; -use crate::patches::Patches; use crate::stats::ArrayStats; use crate::validity::Validity; -use crate::vtable::ValidityHelper; - -pub trait BooleanBufferExt { - /// Slice any full bytes from the buffer, leaving the offset < 8. - fn shrink_offset(self) -> Self; -} - -impl BooleanBufferExt for BooleanBuffer { - fn shrink_offset(self) -> Self { - let byte_offset = self.offset() / 8; - let bit_offset = self.offset() % 8; - let len = self.len(); - let buffer = self - .into_inner() - .slice_with_length(byte_offset, (len + bit_offset).div_ceil(8)); - BooleanBuffer::new(buffer, bit_offset, len) - } -} /// A boolean array that stores true/false values in a compact bit-packed format. /// @@ -67,7 +43,7 @@ impl BooleanBufferExt for BooleanBuffer { #[derive(Clone, Debug)] pub struct BoolArray { pub(super) dtype: DType, - pub(super) buffer: BooleanBuffer, + pub(super) buffer: BitBuffer, pub(super) validity: Validity, pub(super) stats_set: ArrayStats, } @@ -118,7 +94,7 @@ impl BoolArray { Self::validate(&buffer, offset, len, &validity) .vortex_expect("[Debug Assertion]: Invalid `BoolArray` parameters"); - let buffer = BooleanBuffer::new(buffer.into_arrow_buffer(), offset, len); + let buffer = BitBuffer::new_with_offset(buffer, len, offset); let buffer = buffer.shrink_offset(); Self { dtype: DType::Bool(validity.nullability()), @@ -161,12 +137,12 @@ impl BoolArray { Ok(()) } - /// Creates a new [`BoolArray`] from a [`BooleanBuffer`] and [`Validity`] directly. + /// Creates a new [`BoolArray`] from a [`BitBuffer`] and [`Validity`] directly. /// /// # Panics /// /// Panics if the validity is [`Validity::Array`] and the length is not the same as the buffer. - pub fn from_bool_buffer(buffer: BooleanBuffer, validity: Validity) -> Self { + pub fn from_bit_buffer(buffer: BitBuffer, validity: Validity) -> Self { if let Some(validity_len) = validity.maybe_len() { assert_eq!(buffer.len(), validity_len); } @@ -189,19 +165,13 @@ impl BoolArray { indices: I, validity: Validity, ) -> Self { - let mut buffer = MutableBuffer::new_null(length); - let buffer_slice = buffer.as_slice_mut(); - indices - .into_iter() - .for_each(|idx| arrow_buffer::bit_util::set_bit(buffer_slice, idx)); - Self::from_bool_buffer( - BooleanBufferBuilder::new_from_buffer(buffer, length).finish(), - validity, - ) + let mut buffer = BitBufferMut::new_unset(length); + indices.into_iter().for_each(|idx| buffer.set(idx)); + Self::from_bit_buffer(buffer.freeze(), validity) } - /// Returns the underlying [`BooleanBuffer`] of the array. - pub fn boolean_buffer(&self) -> &BooleanBuffer { + /// Returns the underlying [`BitBuffer`] of the array. + pub fn bit_buffer(&self) -> &BitBuffer { assert!( self.buffer.offset() < 8, "Offset must be <8, did we forget to call shrink_offset? Found {}", @@ -210,32 +180,9 @@ impl BoolArray { &self.buffer } - /// Get a mutable version of this array. - /// - /// If the caller holds the only reference to the underlying buffer the underlying buffer is returned - /// otherwise a copy is created. - /// - /// The second value of the tuple is a bit_offset of first value in first byte of the returned builder - pub fn into_boolean_builder(self) -> (BooleanBufferBuilder, usize) { - let offset = self.buffer.offset(); - let len = self.buffer.len(); - let arrow_buffer = self.buffer.into_inner(); - let mutable_buf = if arrow_buffer.ptr_offset() == 0 { - arrow_buffer.into_mutable().unwrap_or_else(|b| { - let mut buf = MutableBuffer::with_capacity(b.len()); - buf.extend_from_slice(b.as_slice()); - buf - }) - } else { - let mut buf = MutableBuffer::with_capacity(arrow_buffer.len()); - buf.extend_from_slice(arrow_buffer.as_slice()); - buf - }; - - ( - BooleanBufferBuilder::new_from_buffer(mutable_buf, offset + len), - offset, - ) + /// Returns the underlying [`BitBuffer`] ofthe array + pub fn into_bit_buffer(self) -> BitBuffer { + self.buffer } pub fn to_mask(&self) -> Mask { @@ -245,7 +192,7 @@ impl BoolArray { pub fn maybe_to_mask(&self) -> Option { self.all_valid() - .then(|| Mask::from_buffer(self.boolean_buffer().clone())) + .then(|| Mask::from_buffer(self.bit_buffer().clone())) } pub fn to_mask_fill_null_false(&self) -> Mask { @@ -259,49 +206,23 @@ impl BoolArray { } // Extract a boolean buffer, treating null values to false let buffer = match self.validity_mask() { - Mask::AllTrue(_) => self.boolean_buffer().clone(), + Mask::AllTrue(_) => self.bit_buffer().clone(), Mask::AllFalse(_) => return Mask::new_false(self.len()), - Mask::Values(validity) => validity.boolean_buffer().bitand(self.boolean_buffer()), + Mask::Values(validity) => validity.bit_buffer() & self.bit_buffer(), }; Mask::from_buffer(buffer) } - - pub fn patch(self, patches: &Patches) -> Self { - let len = self.len(); - let offset = patches.offset(); - let indices = patches.indices().to_primitive(); - let values = patches.values().to_bool(); - - let patched_validity = - self.validity() - .clone() - .patch(len, offset, indices.as_ref(), values.validity()); - - let (mut own_values, bit_offset) = self.into_boolean_builder(); - match_each_integer_ptype!(indices.ptype(), |I| { - for (idx, value) in indices - .as_slice::() - .iter() - .zip_eq(values.boolean_buffer().iter()) - { - #[allow(clippy::cast_possible_truncation)] - own_values.set_bit(*idx as usize - offset + bit_offset, value); - } - }); - - Self::from_bool_buffer(own_values.finish().slice(bit_offset, len), patched_validity) - } } -impl From for BoolArray { - fn from(value: BooleanBuffer) -> Self { - Self::from_bool_buffer(value, Validity::NonNullable) +impl From for BoolArray { + fn from(value: BitBuffer) -> Self { + Self::from_bit_buffer(value, Validity::NonNullable) } } impl FromIterator for BoolArray { fn from_iter>(iter: T) -> Self { - Self::from_bool_buffer(BooleanBuffer::from_iter(iter), Validity::NonNullable) + Self::from(BitBuffer::from_iter(iter)) } } @@ -309,17 +230,18 @@ impl FromIterator> for BoolArray { fn from_iter>>(iter: I) -> Self { let (buffer, nulls) = BooleanArray::from_iter(iter).into_parts(); - Self::from_bool_buffer( - buffer, - nulls.map(Validity::from).unwrap_or(Validity::AllValid), + Self::from_bit_buffer( + BitBuffer::from(buffer), + nulls + .map(|n| Validity::from(BitBuffer::from(n.into_inner()))) + .unwrap_or(Validity::AllValid), ) } } #[cfg(test)] mod tests { - use arrow_buffer::{BooleanBuffer, BooleanBufferBuilder}; - use vortex_buffer::buffer; + use vortex_buffer::{BitBuffer, BitBufferMut, buffer}; use crate::arrays::{BoolArray, PrimitiveArray}; use crate::patches::Patches; @@ -368,22 +290,21 @@ mod tests { #[test] fn patch_sliced_bools() { - let arr = BoolArray::from(BooleanBuffer::new_set(12)); + let arr = BoolArray::from(BitBuffer::new_set(12)); let sliced = arr.slice(4..12); - let (values, offset) = sliced.to_bool().into_boolean_builder(); + let (values, offset) = sliced.to_bool().into_bit_buffer().into_mut(); assert_eq!(offset, 4); assert_eq!(values.len(), 12); assert_eq!(values.as_slice(), &[255, 15]); let arr = { - let mut builder = BooleanBufferBuilder::new(12); - builder.append(false); - builder.append_n(11, true); - BoolArray::from(builder.finish()) + let mut builder = BitBufferMut::new_unset(12); + (1..12).for_each(|i| builder.set(i)); + BoolArray::from(builder.freeze()) }; let sliced = arr.slice(4..12); let sliced_len = sliced.len(); - let (values, offset) = sliced.to_bool().into_boolean_builder(); + let (values, offset) = sliced.to_bool().into_bit_buffer().into_mut(); assert_eq!(offset, 4); assert_eq!(values.as_slice(), &[254, 15]); @@ -392,61 +313,58 @@ mod tests { arr.len(), 0, buffer![4u32].into_array(), // This creates a non-nullable array - BoolArray::from(BooleanBuffer::new_unset(1)).into_array(), + BoolArray::from(BitBuffer::new_unset(1)).into_array(), None, ); let arr = arr.patch(&patches); let arr_len = arr.len(); - let (values, offset) = arr.to_bool().into_boolean_builder(); + let (values, offset) = arr.to_bool().into_bit_buffer().into_mut(); assert_eq!(offset, 0); - assert_eq!(values.len(), arr_len + offset); + assert_eq!(values.len(), arr_len); assert_eq!(values.as_slice(), &[238, 15]); // the slice should be unchanged - let (values, offset) = sliced.to_bool().into_boolean_builder(); + let (values, offset) = sliced.to_bool().into_bit_buffer().into_mut(); assert_eq!(offset, 4); - assert_eq!(values.len(), sliced_len + offset); + assert_eq!(values.len(), sliced_len); assert_eq!(values.as_slice(), &[254, 15]); // unchanged } #[test] fn slice_array_in_middle() { - let arr = BoolArray::from(BooleanBuffer::new_set(16)); + let arr = BoolArray::from(BitBuffer::new_set(16)); let sliced = arr.slice(4..12); let sliced_len = sliced.len(); - let (values, offset) = sliced.to_bool().into_boolean_builder(); + let (values, offset) = sliced.to_bool().into_bit_buffer().into_mut(); assert_eq!(offset, 4); - assert_eq!(values.len(), sliced_len + offset); - assert_eq!(values.as_slice(), &[255, 15]); + assert_eq!(values.len(), sliced_len); + assert_eq!(values.as_slice(), &[255, 255]); } #[test] - #[should_panic] fn patch_bools_owned() { - let buffer = buffer![255u8; 2]; - let buf = BooleanBuffer::new(buffer.into_arrow_buffer(), 0, 15); - let arr = BoolArray::from_bool_buffer(buf, Validity::NonNullable); - let buf_ptr = arr.boolean_buffer().sliced().as_ptr(); + let arr = BoolArray::from(BitBuffer::new_set(16)); + let buf_ptr = arr.bit_buffer().inner().as_ptr(); let patches = Patches::new( arr.len(), 0, - PrimitiveArray::new(buffer![0u32], Validity::AllValid).into_array(), - BoolArray::from(BooleanBuffer::new_unset(1)).into_array(), + PrimitiveArray::new(buffer![0u32], Validity::NonNullable).into_array(), + BoolArray::from(BitBuffer::new_unset(1)).into_array(), None, ); let arr = arr.patch(&patches); - assert_eq!(arr.boolean_buffer().sliced().as_ptr(), buf_ptr); + assert_eq!(arr.bit_buffer().inner().as_ptr(), buf_ptr); - let (values, _byte_bit_offset) = arr.to_bool().into_boolean_builder(); - assert_eq!(values.as_slice(), &[254, 127]); + let values = arr.into_bit_buffer(); + assert_eq!(values.inner().as_slice(), &[254, 255]); } #[test] fn patch_sliced_bools_offset() { - let arr = BoolArray::from(BooleanBuffer::new_set(15)); + let arr = BoolArray::from(BitBuffer::new_set(15)); let sliced = arr.slice(4..15); - let (values, offset) = sliced.to_bool().into_boolean_builder(); + let (values, offset) = sliced.to_bool().into_bit_buffer().into_mut(); assert_eq!(offset, 4); assert_eq!(values.as_slice(), &[255, 127]); } diff --git a/vortex-array/src/arrays/bool/compute/cast.rs b/vortex-array/src/arrays/bool/compute/cast.rs index 57047f15387..48a36e61d72 100644 --- a/vortex-array/src/arrays/bool/compute/cast.rs +++ b/vortex-array/src/arrays/bool/compute/cast.rs @@ -22,7 +22,7 @@ impl CastKernel for BoolVTable { .clone() .cast_nullability(new_nullability, array.len())?; Ok(Some( - BoolArray::from_bool_buffer(array.boolean_buffer().clone(), new_validity).to_array(), + BoolArray::from_bit_buffer(array.bit_buffer().clone(), new_validity).to_array(), )) } } diff --git a/vortex-array/src/arrays/bool/compute/fill_null.rs b/vortex-array/src/arrays/bool/compute/fill_null.rs index 2f1bd9a1ff3..60838f2deae 100644 --- a/vortex-array/src/arrays/bool/compute/fill_null.rs +++ b/vortex-array/src/arrays/bool/compute/fill_null.rs @@ -18,8 +18,8 @@ impl FillNullKernel for BoolVTable { .ok_or_else(|| vortex_err!("Fill value must be non null"))?; Ok(match array.validity() { - Validity::NonNullable | Validity::AllValid => BoolArray::from_bool_buffer( - array.boolean_buffer().clone(), + Validity::NonNullable | Validity::AllValid => BoolArray::from_bit_buffer( + array.bit_buffer().clone(), fill_value.dtype().nullability().into(), ) .into_array(), @@ -28,11 +28,11 @@ impl FillNullKernel for BoolVTable { } Validity::Array(v) => { let bool_buffer = if fill { - array.boolean_buffer() | &!v.to_bool().boolean_buffer() + array.bit_buffer() | &!v.to_bool().bit_buffer() } else { - array.boolean_buffer() & v.to_bool().boolean_buffer() + array.bit_buffer() & v.to_bool().bit_buffer() }; - BoolArray::from_bool_buffer(bool_buffer, fill_value.dtype().nullability().into()) + BoolArray::from_bit_buffer(bool_buffer, fill_value.dtype().nullability().into()) .into_array() } }) @@ -43,8 +43,8 @@ register_kernel!(FillNullKernelAdapter(BoolVTable).lift()); #[cfg(test)] mod tests { - use arrow_buffer::BooleanBuffer; use rstest::rstest; + use vortex_buffer::BitBuffer; use vortex_dtype::{DType, Nullability}; use crate::arrays::BoolArray; @@ -56,15 +56,15 @@ mod tests { #[case(true, vec![true, true, false, true])] #[case(false, vec![true, false, false, false])] fn bool_fill_null(#[case] fill_value: bool, #[case] expected: Vec) { - let bool_array = BoolArray::from_bool_buffer( - BooleanBuffer::from_iter([true, true, false, false]), + let bool_array = BoolArray::from_bit_buffer( + BitBuffer::from_iter([true, true, false, false]), Validity::from_iter([true, false, true, false]), ); let non_null_array = fill_null(bool_array.as_ref(), &fill_value.into()) .unwrap() .to_bool(); assert_eq!( - non_null_array.boolean_buffer().iter().collect::>(), + non_null_array.bit_buffer().iter().collect::>(), expected ); assert_eq!( diff --git a/vortex-array/src/arrays/bool/compute/filter.rs b/vortex-array/src/arrays/bool/compute/filter.rs index a78688b5b76..4ff10471035 100644 --- a/vortex-array/src/arrays/bool/compute/filter.rs +++ b/vortex-array/src/arrays/bool/compute/filter.rs @@ -1,7 +1,7 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -use arrow_buffer::{BooleanBuffer, BooleanBufferBuilder, bit_util}; +use vortex_buffer::{BitBuffer, BitBufferMut}; use vortex_error::{VortexExpect, VortexResult}; use vortex_mask::{Mask, MaskIter}; @@ -23,18 +23,18 @@ impl FilterKernel for BoolVTable { let buffer = match mask_values.threshold_iter(FILTER_SLICES_DENSITY_THRESHOLD) { MaskIter::Indices(indices) => filter_indices( - array.boolean_buffer(), + array.bit_buffer(), mask.true_count(), indices.iter().copied(), ), MaskIter::Slices(slices) => filter_slices( - array.boolean_buffer(), + array.bit_buffer(), mask.true_count(), slices.iter().copied(), ), }; - Ok(BoolArray::from_bool_buffer(buffer, validity).into_array()) + Ok(BoolArray::from_bit_buffer(buffer, validity).into_array()) } } @@ -44,34 +44,31 @@ register_kernel!(FilterKernelAdapter(BoolVTable).lift()); /// NOTE: it was benchmarked to be faster using collect_bool to index into a slice than to /// pass the indices as an iterator of usize. So we keep this alternate implementation. pub fn filter_indices( - buffer: &BooleanBuffer, + buffer: &BitBuffer, indices_len: usize, mut indices: impl Iterator, -) -> BooleanBuffer { - let src = buffer.values().as_ptr(); - let offset = buffer.offset(); - - BooleanBuffer::collect_bool(indices_len, |_idx| { +) -> BitBuffer { + BitBuffer::collect_bool(indices_len, |_idx| { let idx = indices .next() .vortex_expect("iterator is guaranteed to be within the length of the array."); - unsafe { bit_util::get_bit_raw(src, idx + offset) } + buffer.value(idx) }) } pub fn filter_slices( - buffer: &BooleanBuffer, + buffer: &BitBuffer, indices_len: usize, slices: impl Iterator, -) -> BooleanBuffer { - let src = buffer.values(); +) -> BitBuffer { let offset = buffer.offset(); + let src = buffer.inner(); - let mut builder = BooleanBufferBuilder::new(indices_len); + let mut builder = BitBufferMut::with_capacity(indices_len); for (start, end) in slices { builder.append_packed_range(start + offset..end + offset, src) } - builder.into() + builder.freeze() } #[cfg(test)] @@ -95,7 +92,7 @@ mod test { assert_eq!( vec![true, false], - filtered.boolean_buffer().iter().collect_vec() + filtered.bit_buffer().iter().collect_vec() ) } @@ -103,7 +100,7 @@ mod test { fn filter_bool_by_slice_test() { let arr = BoolArray::from_iter([true, true, false]); - let filtered = filter_slices(arr.boolean_buffer(), 2, [(0, 1), (2, 3)].into_iter()); + let filtered = filter_slices(arr.bit_buffer(), 2, [(0, 1), (2, 3)].into_iter()); assert_eq!(2, filtered.len()); assert_eq!(vec![true, false], filtered.iter().collect_vec()) @@ -113,7 +110,7 @@ mod test { fn filter_bool_by_index_test() { let arr = BoolArray::from_iter([true, true, false]); - let filtered = filter_indices(arr.boolean_buffer(), 2, [0, 2].into_iter()); + let filtered = filter_indices(arr.bit_buffer(), 2, [0, 2].into_iter()); assert_eq!(2, filtered.len()); assert_eq!(vec![true, false], filtered.iter().collect_vec()) diff --git a/vortex-array/src/arrays/bool/compute/invert.rs b/vortex-array/src/arrays/bool/compute/invert.rs index ddda7376ce2..377ca51195c 100644 --- a/vortex-array/src/arrays/bool/compute/invert.rs +++ b/vortex-array/src/arrays/bool/compute/invert.rs @@ -13,7 +13,7 @@ use crate::{ArrayRef, IntoArray, register_kernel}; impl InvertKernel for BoolVTable { fn invert(&self, array: &BoolArray) -> VortexResult { Ok( - BoolArray::from_bool_buffer(array.boolean_buffer().not(), array.validity().clone()) + BoolArray::from_bit_buffer(array.bit_buffer().not(), array.validity().clone()) .into_array(), ) } diff --git a/vortex-array/src/arrays/bool/compute/is_constant.rs b/vortex-array/src/arrays/bool/compute/is_constant.rs index 42fe7ddb273..0b7bb518889 100644 --- a/vortex-array/src/arrays/bool/compute/is_constant.rs +++ b/vortex-array/src/arrays/bool/compute/is_constant.rs @@ -14,20 +14,8 @@ impl IsConstantKernel for BoolVTable { return Ok(None); } - let buffer = array.boolean_buffer(); - - // Safety: - // We must have at least one value at this point - let first_value = unsafe { buffer.value_unchecked(0) }; - let value_block = if first_value { u64::MAX } else { 0_u64 }; - - let bit_chunks = buffer.bit_chunks(); - let packed = bit_chunks.iter().all(|chunk| chunk == value_block); - let reminder = bit_chunks.remainder_bits().count_ones() as usize - == bit_chunks.remainder_len() * (first_value as usize); - - // We iterate on blocks of u64 - Ok(Some(packed & reminder)) + let true_count = array.bit_buffer().true_count(); + Ok(Some(true_count == array.len() || true_count == 0)) } } diff --git a/vortex-array/src/arrays/bool/compute/is_sorted.rs b/vortex-array/src/arrays/bool/compute/is_sorted.rs index 47430f9fb0c..3e9d39f25c9 100644 --- a/vortex-array/src/arrays/bool/compute/is_sorted.rs +++ b/vortex-array/src/arrays/bool/compute/is_sorted.rs @@ -12,10 +12,10 @@ impl IsSortedKernel for BoolVTable { fn is_sorted(&self, array: &BoolArray) -> VortexResult> { match array.validity_mask() { Mask::AllFalse(_) => Ok(Some(true)), - Mask::AllTrue(_) => Ok(Some(array.boolean_buffer().iter().is_sorted())), + Mask::AllTrue(_) => Ok(Some(array.bit_buffer().iter().is_sorted())), Mask::Values(mask_values) => { - let set_indices = mask_values.boolean_buffer().set_indices(); - let values = array.boolean_buffer(); + let set_indices = mask_values.bit_buffer().set_indices(); + let values = array.bit_buffer(); let values_iter = set_indices.map(|idx| // Safety: // All idxs are in-bounds for the array. @@ -31,10 +31,10 @@ impl IsSortedKernel for BoolVTable { fn is_strict_sorted(&self, array: &BoolArray) -> VortexResult> { match array.validity_mask() { Mask::AllFalse(_) => Ok(Some(false)), - Mask::AllTrue(_) => Ok(Some(array.boolean_buffer().iter().is_strict_sorted())), + Mask::AllTrue(_) => Ok(Some(array.bit_buffer().iter().is_strict_sorted())), Mask::Values(mask_values) => { - let validity_buffer = mask_values.boolean_buffer(); - let values = array.boolean_buffer(); + let validity_buffer = mask_values.bit_buffer(); + let values = array.bit_buffer(); Ok(Some( validity_buffer diff --git a/vortex-array/src/arrays/bool/compute/mask.rs b/vortex-array/src/arrays/bool/compute/mask.rs index 85de7dba223..cd67b243d0c 100644 --- a/vortex-array/src/arrays/bool/compute/mask.rs +++ b/vortex-array/src/arrays/bool/compute/mask.rs @@ -12,11 +12,8 @@ use crate::{ArrayRef, IntoArray, register_kernel}; impl MaskKernel for BoolVTable { fn mask(&self, array: &BoolArray, mask: &Mask) -> VortexResult { Ok( - BoolArray::from_bool_buffer( - array.boolean_buffer().clone(), - array.validity().mask(mask), - ) - .into_array(), + BoolArray::from_bit_buffer(array.bit_buffer().clone(), array.validity().mask(mask)) + .into_array(), ) } } diff --git a/vortex-array/src/arrays/bool/compute/min_max.rs b/vortex-array/src/arrays/bool/compute/min_max.rs index 24f160a412a..3752fed811d 100644 --- a/vortex-array/src/arrays/bool/compute/min_max.rs +++ b/vortex-array/src/arrays/bool/compute/min_max.rs @@ -14,9 +14,9 @@ use crate::register_kernel; impl MinMaxKernel for BoolVTable { fn min_max(&self, array: &BoolArray) -> VortexResult> { let x = match array.validity_mask() { - Mask::AllTrue(_) => array.boolean_buffer().clone(), + Mask::AllTrue(_) => array.bit_buffer().clone(), Mask::AllFalse(_) => return Ok(None), - Mask::Values(v) => array.boolean_buffer().bitand(v.boolean_buffer()), + Mask::Values(v) => array.bit_buffer().bitand(v.bit_buffer()), }; // TODO(ngates): we should be able to bail out earlier as soon as we have one true and diff --git a/vortex-array/src/arrays/bool/compute/sum.rs b/vortex-array/src/arrays/bool/compute/sum.rs index 8187e9d740f..8d404ed195b 100644 --- a/vortex-array/src/arrays/bool/compute/sum.rs +++ b/vortex-array/src/arrays/bool/compute/sum.rs @@ -13,21 +13,18 @@ use crate::register_kernel; impl SumKernel for BoolVTable { fn sum(&self, array: &BoolArray) -> VortexResult { - let true_count: Option = match array.validity_mask().boolean_buffer() { + let true_count: Option = match array.validity_mask().bit_buffer() { AllOr::All => { // All-valid - Some(array.boolean_buffer().count_set_bits() as u64) + Some(array.bit_buffer().true_count() as u64) } AllOr::None => { // All-invalid unreachable!("All-invalid boolean array should have been handled by entry-point") } - AllOr::Some(validity_mask) => Some( - array - .boolean_buffer() - .bitand(validity_mask) - .count_set_bits() as u64, - ), + AllOr::Some(validity_mask) => { + Some(array.bit_buffer().bitand(validity_mask).true_count() as u64) + } }; Ok(Scalar::from(true_count)) } diff --git a/vortex-array/src/arrays/bool/compute/take.rs b/vortex-array/src/arrays/bool/compute/take.rs index 379b023ecb6..999071e707b 100644 --- a/vortex-array/src/arrays/bool/compute/take.rs +++ b/vortex-array/src/arrays/bool/compute/take.rs @@ -1,9 +1,9 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -use arrow_buffer::BooleanBuffer; use itertools::Itertools as _; use num_traits::AsPrimitive; +use vortex_buffer::BitBuffer; use vortex_dtype::match_each_integer_ptype; use vortex_error::VortexResult; use vortex_mask::Mask; @@ -29,37 +29,34 @@ impl TakeKernel for BoolVTable { }; let indices_nulls_zeroed = indices_nulls_zeroed.to_primitive(); let buffer = match_each_integer_ptype!(indices_nulls_zeroed.ptype(), |I| { - take_valid_indices(array.boolean_buffer(), indices_nulls_zeroed.as_slice::()) + take_valid_indices(array.bit_buffer(), indices_nulls_zeroed.as_slice::()) }); - Ok(BoolArray::from_bool_buffer(buffer, array.validity().take(indices)?).to_array()) + Ok(BoolArray::from_bit_buffer(buffer, array.validity().take(indices)?).to_array()) } } register_kernel!(TakeKernelAdapter(BoolVTable).lift()); -fn take_valid_indices>( - bools: &BooleanBuffer, - indices: &[I], -) -> BooleanBuffer { +fn take_valid_indices>(bools: &BitBuffer, indices: &[I]) -> BitBuffer { // For boolean arrays that roughly fit into a single page (at least, on Linux), it's worth // the overhead to convert to a Vec. if bools.len() <= 4096 { - let bools = bools.into_iter().collect_vec(); + let bools = bools.iter().collect_vec(); take_byte_bool(bools, indices) } else { take_bool(bools, indices) } } -fn take_byte_bool>(bools: Vec, indices: &[I]) -> BooleanBuffer { - BooleanBuffer::collect_bool(indices.len(), |idx| { +fn take_byte_bool>(bools: Vec, indices: &[I]) -> BitBuffer { + BitBuffer::collect_bool(indices.len(), |idx| { bools[unsafe { indices.get_unchecked(idx).as_() }] }) } -fn take_bool>(bools: &BooleanBuffer, indices: &[I]) -> BooleanBuffer { - BooleanBuffer::collect_bool(indices.len(), |idx| { +fn take_bool>(bools: &BitBuffer, indices: &[I]) -> BitBuffer { + BitBuffer::collect_bool(indices.len(), |idx| { // We can always take from the indices unchecked since collect_bool just iterates len. bools.value(unsafe { indices.get_unchecked(idx).as_() }) }) @@ -93,8 +90,8 @@ mod test { .unwrap() .to_bool(); assert_eq!( - b.boolean_buffer(), - BoolArray::from_iter([Some(false), None, Some(false)]).boolean_buffer() + b.bit_buffer(), + BoolArray::from_iter([Some(false), None, Some(false)]).bit_buffer() ); let nullable_bool_dtype = DType::Bool(Nullability::Nullable); diff --git a/vortex-array/src/arrays/bool/mod.rs b/vortex-array/src/arrays/bool/mod.rs index 7c168a1bfb3..36ea433cf97 100644 --- a/vortex-array/src/arrays/bool/mod.rs +++ b/vortex-array/src/arrays/bool/mod.rs @@ -2,9 +2,9 @@ // SPDX-FileCopyrightText: Copyright the Vortex contributors mod array; -pub use array::{BoolArray, BooleanBufferExt}; -// Re-export Arrow's `BooleanBuffer` type on our API surface. -pub use arrow_buffer::{BooleanBuffer, BooleanBufferBuilder}; +mod patch; + +pub use array::*; pub mod compute; diff --git a/vortex-array/src/arrays/bool/patch.rs b/vortex-array/src/arrays/bool/patch.rs new file mode 100644 index 00000000000..657081290b1 --- /dev/null +++ b/vortex-array/src/arrays/bool/patch.rs @@ -0,0 +1,69 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use itertools::Itertools; +use vortex_dtype::match_each_unsigned_integer_ptype; + +use crate::ToCanonical; +use crate::arrays::BoolArray; +use crate::patches::Patches; +use crate::vtable::ValidityHelper; + +impl BoolArray { + pub fn patch(self, patches: &Patches) -> Self { + let len = self.len(); + let offset = patches.offset(); + let indices = patches.indices().to_primitive(); + let values = patches.values().to_bool(); + + let patched_validity = + self.validity() + .clone() + .patch(len, offset, indices.as_ref(), values.validity()); + + let (mut own_values, bit_offset) = self.into_bit_buffer().into_mut(); + match_each_unsigned_integer_ptype!(indices.ptype(), |I| { + for (idx, value) in indices + .as_slice::() + .iter() + .zip_eq(values.bit_buffer().iter()) + { + #[allow(clippy::cast_possible_truncation)] + own_values.set_to(*idx as usize - offset + bit_offset, value); + } + }); + + Self::from_bit_buffer( + own_values.freeze().slice(bit_offset..(bit_offset + len)), + patched_validity, + ) + } +} + +#[cfg(test)] +mod tests { + use vortex_buffer::BitBuffer; + + use crate::ToCanonical; + use crate::arrays::BoolArray; + + #[test] + fn patch_sliced_bools() { + let arr = BoolArray::from(BitBuffer::new_set(12)); + let sliced = arr.slice(4..12); + let (values, offset) = sliced.to_bool().into_bit_buffer().into_mut(); + assert_eq!(offset, 4); + assert_eq!(values.len(), 8); + assert_eq!(values.as_slice(), &[255, 255]); + } + + #[test] + fn patch_sliced_bools_offset() { + let arr = BoolArray::from(BitBuffer::new_set(15)); + let sliced = arr.slice(4..15); + let (values, offset) = sliced.to_bool().into_bit_buffer().into_mut(); + assert_eq!(offset, 4); + assert_eq!(values.len(), 11); + assert_eq!(values.as_slice(), &[255, 255]); + } +} diff --git a/vortex-array/src/arrays/bool/test_harness.rs b/vortex-array/src/arrays/bool/test_harness.rs index c8022c3125b..f1df7f8f2d2 100644 --- a/vortex-array/src/arrays/bool/test_harness.rs +++ b/vortex-array/src/arrays/bool/test_harness.rs @@ -1,34 +1,32 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -use itertools::Itertools; -use vortex_error::{VortexResult, vortex_bail}; +use vortex_error::vortex_panic; use crate::arrays::BoolArray; impl BoolArray { - pub fn opt_bool_vec(&self) -> VortexResult>> { - Ok(self - .validity_mask() - .to_boolean_buffer() - .into_iter() - .zip(self.boolean_buffer().iter()) - .map(move |(valid, value)| valid.then_some(value)) - .collect_vec()) + pub fn opt_bool_vec(&self) -> Vec> { + self.validity_mask() + .to_bit_buffer() + .iter() + .zip(self.bit_buffer().iter()) + .map(|(valid, value)| valid.then_some(value)) + .collect() } - pub fn bool_vec(&self) -> VortexResult> { + pub fn bool_vec(&self) -> Vec { self.validity_mask() - .to_boolean_buffer() - .into_iter() - .zip(self.boolean_buffer().iter()) - .map(move |(valid, value)| { + .to_bit_buffer() + .iter() + .zip(self.bit_buffer().iter()) + .map(|(valid, value)| { if !valid { - vortex_bail!("trying to get bool values from an array with null elements") + vortex_panic!("trying to get bool values from an array with null elements") } - Ok(value) + value }) - .try_collect() + .collect() } } diff --git a/vortex-array/src/arrays/bool/vtable/operations.rs b/vortex-array/src/arrays/bool/vtable/operations.rs index d5543117452..615c736fe43 100644 --- a/vortex-array/src/arrays/bool/vtable/operations.rs +++ b/vortex-array/src/arrays/bool/vtable/operations.rs @@ -11,18 +11,15 @@ use crate::{ArrayRef, IntoArray}; impl OperationsVTable for BoolVTable { fn slice(array: &BoolArray, range: Range) -> ArrayRef { - BoolArray::from_bool_buffer( - array.boolean_buffer().slice(range.start, range.len()), + BoolArray::from_bit_buffer( + array.bit_buffer().slice(range.clone()), array.validity().slice(range), ) .into_array() } fn scalar_at(array: &BoolArray, index: usize) -> Scalar { - Scalar::bool( - array.boolean_buffer().value(index), - array.dtype().nullability(), - ) + Scalar::bool(array.bit_buffer().value(index), array.dtype().nullability()) } } @@ -38,8 +35,8 @@ mod tests { let arr = BoolArray::from_iter(iter::repeat_n(Some(true), 100)); let sliced_arr = arr.slice(8..16).to_bool(); assert_eq!(sliced_arr.len(), 8); - assert_eq!(sliced_arr.boolean_buffer().len(), 8); - assert_eq!(sliced_arr.boolean_buffer().offset(), 0); + assert_eq!(sliced_arr.bit_buffer().len(), 8); + assert_eq!(sliced_arr.bit_buffer().offset(), 0); } #[test] diff --git a/vortex-array/src/arrays/bool/vtable/serde.rs b/vortex-array/src/arrays/bool/vtable/serde.rs index b2e284976e6..af817be9a18 100644 --- a/vortex-array/src/arrays/bool/vtable/serde.rs +++ b/vortex-array/src/arrays/bool/vtable/serde.rs @@ -3,7 +3,7 @@ use vortex_buffer::ByteBuffer; use vortex_dtype::DType; -use vortex_error::{VortexResult, vortex_bail, vortex_err}; +use vortex_error::{VortexExpect, VortexResult, vortex_bail}; use super::BoolArray; use crate::ProstMetadata; @@ -23,10 +23,11 @@ impl SerdeVTable for BoolVTable { type Metadata = ProstMetadata; fn metadata(array: &BoolArray) -> VortexResult> { - let bit_offset = array.boolean_buffer().offset(); - let bit_offset = u32::try_from(bit_offset) - .map_err(|_| vortex_err!("bit_offset {bit_offset} overflows u32"))?; - Ok(Some(ProstMetadata(BoolMetadata { offset: bit_offset }))) + let bit_offset = array.bit_buffer().offset(); + assert!(bit_offset < 8, "Offset must be <8, got {bit_offset}"); + Ok(Some(ProstMetadata(BoolMetadata { + offset: u32::try_from(bit_offset).vortex_expect("checked"), + }))) } fn build( diff --git a/vortex-array/src/arrays/bool/vtable/visitor.rs b/vortex-array/src/arrays/bool/vtable/visitor.rs index 6ca6b309bd0..63371ecbb55 100644 --- a/vortex-array/src/arrays/bool/vtable/visitor.rs +++ b/vortex-array/src/arrays/bool/vtable/visitor.rs @@ -1,18 +1,13 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -use vortex_buffer::{Alignment, ByteBuffer}; - use crate::arrays::{BoolArray, BoolVTable}; use crate::vtable::VisitorVTable; use crate::{ArrayBufferVisitor, ArrayChildVisitor}; impl VisitorVTable for BoolVTable { fn visit_buffers(array: &BoolArray, visitor: &mut dyn ArrayBufferVisitor) { - visitor.visit_buffer(&ByteBuffer::from_arrow_buffer( - array.boolean_buffer().clone().into_inner(), - Alignment::none(), - )) + visitor.visit_buffer(array.bit_buffer().inner()) } fn visit_children(array: &BoolArray, visitor: &mut dyn ArrayChildVisitor) { diff --git a/vortex-array/src/arrays/chunked/compute/fill_null.rs b/vortex-array/src/arrays/chunked/compute/fill_null.rs index 5cacfc2c09c..ba2e60ef004 100644 --- a/vortex-array/src/arrays/chunked/compute/fill_null.rs +++ b/vortex-array/src/arrays/chunked/compute/fill_null.rs @@ -29,7 +29,7 @@ register_kernel!(FillNullKernelAdapter(ChunkedVTable).lift()); #[cfg(test)] mod tests { - use arrow_buffer::BooleanBuffer; + use vortex_buffer::BitBuffer; use vortex_dtype::{DType, Nullability}; use crate::array::Array; @@ -41,10 +41,8 @@ mod tests { fn fill_null_chunks() { let chunked = ChunkedArray::try_new( vec![ - BoolArray::from_bool_buffer(BooleanBuffer::new_set(5), Validity::AllInvalid) - .to_array(), - BoolArray::from_bool_buffer(BooleanBuffer::new_set(5), Validity::AllValid) - .to_array(), + BoolArray::from_bit_buffer(BitBuffer::new_set(5), Validity::AllInvalid).to_array(), + BoolArray::from_bit_buffer(BitBuffer::new_set(5), Validity::AllValid).to_array(), ], DType::Bool(Nullability::Nullable), ) diff --git a/vortex-array/src/arrays/chunked/vtable/compute.rs b/vortex-array/src/arrays/chunked/vtable/compute.rs index 63ba36feb40..61cb55802c3 100644 --- a/vortex-array/src/arrays/chunked/vtable/compute.rs +++ b/vortex-array/src/arrays/chunked/vtable/compute.rs @@ -79,9 +79,10 @@ fn invoke_elementwise( #[cfg(test)] mod tests { + use vortex_buffer::BitBuffer; use vortex_dtype::{DType, Nullability}; - use crate::arrays::{BoolArray, BooleanBuffer, ChunkedArray}; + use crate::arrays::{BoolArray, ChunkedArray}; use crate::canonical::ToCanonical; use crate::compute::{BooleanOperator, boolean}; @@ -101,8 +102,8 @@ mod tests { .unwrap() .to_bool(); assert_eq!( - result.boolean_buffer(), - &BooleanBuffer::from_iter([true, true, false, false, true]) + result.bit_buffer(), + &BitBuffer::from_iter([true, true, false, false, true]) ); } } diff --git a/vortex-array/src/arrays/constant/compute/take.rs b/vortex-array/src/arrays/constant/compute/take.rs index f0f97d4bbc5..023785d078a 100644 --- a/vortex-array/src/arrays/constant/compute/take.rs +++ b/vortex-array/src/arrays/constant/compute/take.rs @@ -12,7 +12,7 @@ use crate::{Array, ArrayRef, IntoArray, register_kernel}; impl TakeKernel for ConstantVTable { fn take(&self, array: &ConstantArray, indices: &dyn Array) -> VortexResult { - match indices.validity_mask().boolean_buffer() { + match indices.validity_mask().bit_buffer() { AllOr::All => { let scalar = Scalar::new( array diff --git a/vortex-array/src/arrays/constant/vtable/canonical.rs b/vortex-array/src/arrays/constant/vtable/canonical.rs index c01fdf92c7d..744ac2cdfdd 100644 --- a/vortex-array/src/arrays/constant/vtable/canonical.rs +++ b/vortex-array/src/arrays/constant/vtable/canonical.rs @@ -3,8 +3,7 @@ use std::sync::Arc; -use arrow_buffer::BooleanBuffer; -use vortex_buffer::{Buffer, buffer}; +use vortex_buffer::{BitBuffer, Buffer, buffer}; use vortex_dtype::{DType, Nullability, PType, match_each_native_ptype}; use vortex_error::VortexExpect; use vortex_scalar::{ @@ -38,15 +37,15 @@ impl CanonicalVTable for ConstantVTable { match array.dtype() { DType::Null => Canonical::Null(NullArray::new(array.len())), - DType::Bool(..) => Canonical::Bool(BoolArray::from_bool_buffer( + DType::Bool(..) => Canonical::Bool(BoolArray::from_bit_buffer( if BoolScalar::try_from(scalar) .vortex_expect("must be bool") .value() .unwrap_or_default() { - BooleanBuffer::new_set(array.len()) + BitBuffer::new_set(array.len()) } else { - BooleanBuffer::new_unset(array.len()) + BitBuffer::new_unset(array.len()) }, validity, )), diff --git a/vortex-array/src/arrays/decimal/array.rs b/vortex-array/src/arrays/decimal/array.rs index befdaa9a72d..aba60628f7d 100644 --- a/vortex-array/src/arrays/decimal/array.rs +++ b/vortex-array/src/arrays/decimal/array.rs @@ -1,9 +1,8 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -use arrow_buffer::BooleanBufferBuilder; use itertools::Itertools; -use vortex_buffer::{Buffer, BufferMut, ByteBuffer}; +use vortex_buffer::{BitBufferMut, Buffer, BufferMut, ByteBuffer}; use vortex_dtype::{DType, DecimalDType, IntegerPType, match_each_integer_ptype}; use vortex_error::{VortexExpect, VortexResult, vortex_ensure, vortex_panic}; use vortex_scalar::{BigCast, DecimalValueType, NativeDecimalType, match_each_decimal_value_type}; @@ -209,7 +208,7 @@ impl DecimalArray { ) -> Self { let iter = iter.into_iter(); let mut values = BufferMut::with_capacity(iter.size_hint().0); - let mut validity = BooleanBufferBuilder::new(values.capacity()); + let mut validity = BitBufferMut::with_capacity(values.capacity()); for i in iter { match i { @@ -226,7 +225,7 @@ impl DecimalArray { Self::new( values.freeze(), decimal_dtype, - Validity::from(validity.finish()), + Validity::from(validity.freeze()), ) } diff --git a/vortex-array/src/arrays/decimal/compute/between.rs b/vortex-array/src/arrays/decimal/compute/between.rs index 71cbf39f0f5..0d26423eb81 100644 --- a/vortex-array/src/arrays/decimal/compute/between.rs +++ b/vortex-array/src/arrays/decimal/compute/between.rs @@ -1,7 +1,7 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -use arrow_buffer::BooleanBuffer; +use vortex_buffer::BitBuffer; use vortex_dtype::Nullability; use vortex_error::{VortexResult, vortex_bail}; use vortex_scalar::{NativeDecimalType, Scalar, match_each_decimal_value_type}; @@ -96,8 +96,8 @@ fn between_impl( upper_op: impl Fn(T, T) -> bool, ) -> ArrayRef { let buffer = arr.buffer::(); - BoolArray::from_bool_buffer( - BooleanBuffer::collect_bool(buffer.len(), |idx| { + BoolArray::from_bit_buffer( + BitBuffer::collect_bool(buffer.len(), |idx| { let value = buffer[idx]; lower_op(lower, value) & upper_op(value, upper) }), @@ -168,6 +168,6 @@ mod tests { } fn bool_to_vec(array: &dyn Array) -> Vec { - array.to_bool().boolean_buffer().iter().collect() + array.to_bool().bit_buffer().iter().collect() } } diff --git a/vortex-array/src/arrays/decimal/compute/is_sorted.rs b/vortex-array/src/arrays/decimal/compute/is_sorted.rs index e7e99be0eb4..6602bb7102a 100644 --- a/vortex-array/src/arrays/decimal/compute/is_sorted.rs +++ b/vortex-array/src/arrays/decimal/compute/is_sorted.rs @@ -47,7 +47,7 @@ where Mask::Values(mask_values) => { let values = array.buffer::(); let iter = mask_values - .boolean_buffer() + .bit_buffer() .iter() .zip_eq(values) .map(|(is_valid, v)| is_valid.then_some(v)); diff --git a/vortex-array/src/arrays/decimal/compute/min_max.rs b/vortex-array/src/arrays/decimal/compute/min_max.rs index fbe76294050..d68584908d9 100644 --- a/vortex-array/src/arrays/decimal/compute/min_max.rs +++ b/vortex-array/src/arrays/decimal/compute/min_max.rs @@ -35,7 +35,7 @@ where array .buffer::() .iter() - .zip(v.boolean_buffer().iter()) + .zip(v.bit_buffer().iter()) .filter_map(|(v, m)| m.then_some(v)), array.dtype(), ), diff --git a/vortex-array/src/arrays/decimal/compute/sum.rs b/vortex-array/src/arrays/decimal/compute/sum.rs index 0af39456343..b28b5480b3a 100644 --- a/vortex-array/src/arrays/decimal/compute/sum.rs +++ b/vortex-array/src/arrays/decimal/compute/sum.rs @@ -22,7 +22,7 @@ macro_rules! sum_decimal { use itertools::Itertools; let mut sum: $ty = <$ty>::default(); - for (v, valid) in $values.iter().zip_eq($validity.iter()) { + for (v, valid) in $values.iter().zip_eq($validity) { if valid { sum = num_traits::CheckedAdd::checked_add(&sum, v) .ok_or_else(|| vortex_err!("Overflow when summing decimal {sum:?} + {v:?}"))? @@ -56,7 +56,7 @@ impl SumKernel for DecimalVTable { DecimalValue::from(sum_decimal!( D, array.buffer::(), - mask_values.boolean_buffer() + mask_values.bit_buffer() )), decimal_dtype, nullability, diff --git a/vortex-array/src/arrays/fixed_size_list/compute/take.rs b/vortex-array/src/arrays/fixed_size_list/compute/take.rs index caa34375d3a..b7e8ebd9fd1 100644 --- a/vortex-array/src/arrays/fixed_size_list/compute/take.rs +++ b/vortex-array/src/arrays/fixed_size_list/compute/take.rs @@ -1,7 +1,7 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -use arrow_buffer::BooleanBufferBuilder; +use vortex_buffer::BitBufferMut; use vortex_dtype::{IntegerPType, Nullability, match_each_integer_ptype}; use vortex_error::{VortexExpect, VortexResult, vortex_panic}; @@ -131,7 +131,7 @@ fn take_nullable_fsl( // propagating nullability to the element array's take operation. let mut elements_indices = PrimitiveBuilder::::with_capacity(Nullability::NonNullable, new_len * list_size); - let mut new_validity_builder = BooleanBufferBuilder::new(new_len); + let mut new_validity_builder = BitBufferMut::with_capacity(new_len); // Build the element indices while tracking which lists are null. for (i, data_idx) in indices.iter().enumerate() { @@ -168,7 +168,7 @@ fn take_nullable_fsl( debug_assert_eq!(new_elements.len(), new_len * list_size); // At least one input was nullable, so the result is nullable. - let new_validity = Validity::from(new_validity_builder.finish()); + let new_validity = Validity::from(new_validity_builder.freeze()); debug_assert!(new_validity.maybe_len().is_none_or(|vl| vl == new_len)); Ok(unsafe { diff --git a/vortex-array/src/arrays/fixed_size_list/tests/filter.rs b/vortex-array/src/arrays/fixed_size_list/tests/filter.rs index c0a9c0b100c..3ede0e99c91 100644 --- a/vortex-array/src/arrays/fixed_size_list/tests/filter.rs +++ b/vortex-array/src/arrays/fixed_size_list/tests/filter.rs @@ -1,9 +1,8 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -use arrow_buffer::BooleanBuffer; use rstest::rstest; -use vortex_buffer::buffer; +use vortex_buffer::{BitBuffer, buffer}; use vortex_dtype::Nullability; use vortex_mask::Mask; @@ -63,7 +62,7 @@ fn test_filter_degenerate_list_size_zero( let elements = PrimitiveArray::empty::(Nullability::NonNullable); let fsl = FixedSizeListArray::new(elements.into_array(), 0, validity, num_lists); - let mask = Mask::from(BooleanBuffer::from(mask_values)); + let mask = Mask::from(BitBuffer::from(mask_values)); let filtered = filter(fsl.as_ref(), &mask).unwrap(); assert_eq!(filtered.len(), expected_len, "Degenerate FSL filter failed"); @@ -92,7 +91,7 @@ fn test_filter_with_nulls() { let validity = Validity::from_iter([true, false, true]); let fsl = FixedSizeListArray::new(elements.into_array(), 2, validity, 3); - let mask = Mask::from(BooleanBuffer::from(vec![true, false, true])); + let mask = Mask::from(BitBuffer::from(vec![true, false, true])); let filtered = filter(fsl.as_ref(), &mask).unwrap(); let filtered_fsl = filtered.as_::(); @@ -121,7 +120,7 @@ fn test_filter_all_null_array() { let validity = Validity::AllInvalid; let fsl = FixedSizeListArray::new(elements.into_array(), 2, validity, 3); - let mask = Mask::from(BooleanBuffer::from(vec![true, false, true])); + let mask = Mask::from(BitBuffer::from(vec![true, false, true])); let filtered = filter(fsl.as_ref(), &mask).unwrap(); // This should return a ConstantArray of nulls. @@ -171,7 +170,7 @@ fn test_filter_nested_fixed_size_lists() { ); // Filter to keep only the second outer list. - let mask = Mask::from(BooleanBuffer::from(vec![false, true])); + let mask = Mask::from(BitBuffer::from(vec![false, true])); let filtered = filter(outer_fsl.as_ref(), &mask).unwrap(); let filtered_outer = filtered.as_::(); @@ -287,7 +286,7 @@ fn test_filter_all_null_various_list_sizes() { // Case 1: list_size == 0 let elements0 = PrimitiveArray::empty::(Nullability::NonNullable); let fsl0 = FixedSizeListArray::new(elements0.into_array(), 0, Validity::AllInvalid, 3); - let mask0 = Mask::from(BooleanBuffer::from(vec![true, false, true])); + let mask0 = Mask::from(BitBuffer::from(vec![true, false, true])); let filtered0 = filter(fsl0.as_ref(), &mask0).unwrap(); assert_eq!(filtered0.len(), 2); // Check that all elements are null (might be ConstantArray or FixedSizeListArray) @@ -297,7 +296,7 @@ fn test_filter_all_null_various_list_sizes() { // Case 2: list_size == 1 let elements1 = buffer![1i32, 2, 3].into_array(); let fsl1 = FixedSizeListArray::new(elements1.into_array(), 1, Validity::AllInvalid, 3); - let mask1 = Mask::from(BooleanBuffer::from(vec![false, true, true])); + let mask1 = Mask::from(BitBuffer::from(vec![false, true, true])); let filtered1 = filter(fsl1.as_ref(), &mask1).unwrap(); assert_eq!(filtered1.len(), 2); // Check that all elements are null @@ -338,7 +337,7 @@ fn test_mask_expansion_threshold_boundary() { sparse_mask[5] = true; sparse_mask[25] = true; sparse_mask[75] = true; - let mask = Mask::from(BooleanBuffer::from(sparse_mask)); + let mask = Mask::from(BitBuffer::from(sparse_mask)); let filtered = filter(fsl.as_ref(), &mask).unwrap(); let filtered_fsl = filtered.as_::(); diff --git a/vortex-array/src/arrays/list/compute/filter.rs b/vortex-array/src/arrays/list/compute/filter.rs index 807cfa28454..6520b3320e6 100644 --- a/vortex-array/src/arrays/list/compute/filter.rs +++ b/vortex-array/src/arrays/list/compute/filter.rs @@ -1,8 +1,7 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -use arrow_buffer::BooleanBufferBuilder; -use vortex_buffer::BufferMut; +use vortex_buffer::{BitBufferMut, BufferMut}; use vortex_dtype::{IntegerPType, match_each_integer_ptype}; use vortex_error::{VortexExpect, VortexResult}; use vortex_mask::{Mask, MaskIter}; @@ -70,7 +69,7 @@ fn compute_filtered_elements_and_offsets( let true_count = selection_mask.true_count(); let mut new_offsets = BufferMut::::with_capacity(true_count + 1); - let mut new_mask_builder = BooleanBufferBuilder::new(elements.len()); + let mut new_mask_builder = BitBufferMut::with_capacity(elements.len()); let mut next_offset: O = O::zero(); // Offsets always start at zero. new_offsets.push(next_offset); @@ -114,12 +113,12 @@ fn compute_filtered_elements_and_offsets( // Fill any trailing elements. if new_mask_builder.len() < elements.len() { - new_mask_builder.append_n(elements.len() - new_mask_builder.len(), false); + new_mask_builder.append_n(false, elements.len() - new_mask_builder.len()); } // Allow the child array to filter themselves. // The `Mask` can determine the best representation based on the buffer's density in the future. - let new_elements = filter(elements, &Mask::from_buffer(new_mask_builder.finish()))?; + let new_elements = filter(elements, &Mask::from_buffer(new_mask_builder.freeze()))?; let new_offsets = PrimitiveArray::new(new_offsets, Validity::NonNullable); @@ -130,7 +129,7 @@ fn compute_filtered_elements_and_offsets( fn process_element_range( elems_start: usize, elems_end: usize, - new_mask_builder: &mut BooleanBufferBuilder, + new_mask_builder: &mut BitBufferMut, ) { let elems_len = elems_end - elems_start; @@ -138,9 +137,9 @@ fn process_element_range( if elems_len > 0 { // Fill any gaps before this range. if elems_start > new_mask_builder.len() { - new_mask_builder.append_n(elems_start - new_mask_builder.len(), false); + new_mask_builder.append_n(false, elems_start - new_mask_builder.len()); } // Keep all elements in this range. - new_mask_builder.append_n(elems_len, true); + new_mask_builder.append_n(true, elems_len); } } diff --git a/vortex-array/src/arrays/list/compute/take.rs b/vortex-array/src/arrays/list/compute/take.rs index 89311380663..5657e66aea7 100644 --- a/vortex-array/src/arrays/list/compute/take.rs +++ b/vortex-array/src/arrays/list/compute/take.rs @@ -1,7 +1,7 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -use arrow_buffer::BooleanBufferBuilder; +use vortex_buffer::BitBufferMut; use vortex_dtype::{IntegerPType, Nullability, match_each_integer_ptype}; use vortex_error::{VortexExpect, VortexResult, vortex_panic}; use vortex_mask::Mask; @@ -13,11 +13,6 @@ use crate::validity::Validity; use crate::vtable::ValidityHelper; use crate::{Array, ArrayRef, ToCanonical, register_kernel}; -/// Take implementation for [`ListArray`]. -/// -/// Unlike `ListView`, `ListArray` must rebuild the elements array to maintain its invariant -/// that lists are stored contiguously and in-order (`offset[i+1] >= offset[i]`). Taking -/// non-contiguous indices would violate this requirement. impl TakeKernel for ListVTable { fn take(&self, array: &ListArray, indices: &dyn Array) -> VortexResult { let indices = indices.to_primitive(); @@ -114,21 +109,12 @@ fn _take_nullable( indices_validity: Mask, ) -> VortexResult { let mut new_offsets = PrimitiveBuilder::with_capacity(Nullability::NonNullable, indices.len()); - - // This will be the indices we push down to the child array to call `take` with. - // - // There are 2 things to note here: - // - We do not know how many elements we need to take from our child since lists are variable - // size: thus we arbitrarily choose a capacity of `2 * # of indices`. - // - The type of the primitive builder needs to fit the largest offset of the (parent) - // `ListArray`, so we make this `PrimitiveBuilder` generic over `O` (instead of `I`). let mut elements_to_take = - PrimitiveBuilder::::with_capacity(Nullability::NonNullable, 2 * indices.len()); + PrimitiveBuilder::with_capacity(Nullability::NonNullable, 2 * indices.len()); let mut current_offset = O::zero(); new_offsets.append_zero(); - - let mut new_validity = BooleanBufferBuilder::new(indices.len()); + let mut new_validity = BitBufferMut::with_capacity(2 * indices.len()); for (idx, data_idx) in indices.iter().enumerate() { if !indices_validity.value(idx) { @@ -168,7 +154,7 @@ fn _take_nullable( let new_offsets = new_offsets.finish(); let new_elements = take(array.elements(), elements_to_take.as_ref())?; - let new_validity: Validity = Validity::from(new_validity.finish()); + let new_validity: Validity = Validity::from(new_validity.freeze()); // data are indexes are nullable, so the final result is also nullable. Ok(ListArray::try_new(new_elements, new_offsets, new_validity)?.to_array()) @@ -179,7 +165,6 @@ mod test { use std::sync::Arc; use rstest::rstest; - use vortex_buffer::buffer; use vortex_dtype::PType::I32; use vortex_dtype::{DType, Nullability}; use vortex_scalar::Scalar; @@ -189,13 +174,13 @@ mod test { use crate::compute::conformance::take::test_take_conformance; use crate::compute::take; use crate::validity::Validity; - use crate::{Array, IntoArray as _, ToCanonical}; + use crate::{Array, ToCanonical}; #[test] fn nullable_take() { let list = ListArray::try_new( - buffer![0i32, 5, 3, 4].into_array(), - buffer![0, 2, 3, 4, 4].into_array(), + PrimitiveArray::from_iter([0i32, 5, 3, 4]).to_array(), + PrimitiveArray::from_iter([0, 2, 3, 4, 4]).to_array(), Validity::Array(BoolArray::from_iter(vec![true, true, false, true]).to_array()), ) .unwrap() @@ -252,8 +237,8 @@ mod test { #[test] fn change_validity() { let list = ListArray::try_new( - buffer![0i32, 5, 3, 4].into_array(), - buffer![0, 2, 3].into_array(), + PrimitiveArray::from_iter([0i32, 5, 3, 4]).to_array(), + PrimitiveArray::from_iter([0, 2, 3]).to_array(), Validity::NonNullable, ) .unwrap() @@ -275,14 +260,14 @@ mod test { #[test] fn non_nullable_take() { let list = ListArray::try_new( - buffer![0i32, 5, 3, 4].into_array(), - buffer![0, 2, 3, 3, 4].into_array(), + PrimitiveArray::from_iter([0i32, 5, 3, 4]).to_array(), + PrimitiveArray::from_iter([0, 2, 3, 3, 4]).to_array(), Validity::NonNullable, ) .unwrap() .to_array(); - let idx = buffer![1, 0, 2].into_array(); + let idx = PrimitiveArray::from_iter([1, 0, 2]).to_array(); let result = take(&list, &idx).unwrap(); @@ -330,8 +315,8 @@ mod test { #[test] fn test_take_empty_array() { let list = ListArray::try_new( - buffer![0i32, 5, 3, 4].into_array(), - buffer![0].into_array(), + PrimitiveArray::from_iter([0i32, 5, 3, 4]).to_array(), + PrimitiveArray::from_iter([0]).to_array(), Validity::NonNullable, ) .unwrap() @@ -352,27 +337,27 @@ mod test { #[rstest] #[case(ListArray::try_new( - buffer![0i32, 1, 2, 3, 4, 5].into_array(), - buffer![0, 2, 3, 5, 5, 6].into_array(), + PrimitiveArray::from_iter([0i32, 1, 2, 3, 4, 5]).to_array(), + PrimitiveArray::from_iter([0, 2, 3, 5, 5, 6]).to_array(), Validity::NonNullable, ).unwrap())] #[case(ListArray::try_new( - buffer![10i32, 20, 30, 40, 50].into_array(), - buffer![0, 2, 3, 4, 5].into_array(), + PrimitiveArray::from_iter([10i32, 20, 30, 40, 50]).to_array(), + PrimitiveArray::from_iter([0, 2, 3, 4, 5]).to_array(), Validity::Array(BoolArray::from_iter(vec![true, false, true, true]).to_array()), ).unwrap())] #[case(ListArray::try_new( - buffer![1i32, 2, 3].into_array(), - buffer![0, 0, 2, 2, 3].into_array(), // First and third are empty + PrimitiveArray::from_iter([1i32, 2, 3]).to_array(), + PrimitiveArray::from_iter([0, 0, 2, 2, 3]).to_array(), // First and third are empty Validity::NonNullable, ).unwrap())] #[case(ListArray::try_new( - buffer![42i32, 43].into_array(), - buffer![0, 2].into_array(), + PrimitiveArray::from_iter([42i32, 43]).to_array(), + PrimitiveArray::from_iter([0, 2]).to_array(), Validity::NonNullable, ).unwrap())] #[case({ - let elements = buffer![0i32..200].into_array(); + let elements = PrimitiveArray::from_iter(0i32..200).to_array(); let mut offsets = vec![0u64]; for i in 1..=50 { offsets.push(offsets[i - 1] + (i as u64 % 5)); // Variable length lists @@ -385,7 +370,7 @@ mod test { })] #[case(ListArray::try_new( PrimitiveArray::from_option_iter([Some(1i32), None, Some(3), Some(4), None]).to_array(), - buffer![0, 2, 3, 5].into_array(), + PrimitiveArray::from_iter([0, 2, 3, 5]).to_array(), Validity::NonNullable, ).unwrap())] fn test_take_list_conformance(#[case] list: ListArray) { diff --git a/vortex-array/src/arrays/list/tests.rs b/vortex-array/src/arrays/list/tests.rs index 651369576b0..9ddb2f2e9f5 100644 --- a/vortex-array/src/arrays/list/tests.rs +++ b/vortex-array/src/arrays/list/tests.rs @@ -3,8 +3,7 @@ use std::sync::Arc; -use arrow_buffer::BooleanBuffer; -use vortex_buffer::buffer; +use vortex_buffer::{BitBuffer, buffer}; use vortex_dtype::PType::I32; use vortex_dtype::{DType, Nullability}; use vortex_error::VortexUnwrap; @@ -86,10 +85,7 @@ fn test_simple_list_filter() { .unwrap() .into_array(); - let filtered = filter( - &list, - &Mask::from(BooleanBuffer::from(vec![false, true, true])), - ); + let filtered = filter(&list, &Mask::from(BitBuffer::from(vec![false, true, true]))); assert!(filtered.is_ok()) } @@ -106,9 +102,7 @@ fn test_list_filter_dense_mask() { .into_array(); // Dense mask: keep most elements (indices 1, 2, 3, 4, 5). - let mask = Mask::from(BooleanBuffer::from(vec![ - false, true, true, true, true, true, - ])); + let mask = Mask::from(BitBuffer::from(vec![false, true, true, true, true, true])); let filtered = filter(&list, &mask).unwrap(); let filtered_list = filtered.as_::(); @@ -135,7 +129,7 @@ fn test_list_filter_sparse_mask() { .into_array(); // Sparse mask: keep only a few elements (indices 0 and 5). - let mask = Mask::from(BooleanBuffer::from(vec![ + let mask = Mask::from(BitBuffer::from(vec![ true, false, false, false, false, true, ])); @@ -169,9 +163,7 @@ fn test_list_filter_empty_lists() { .unwrap() .into_array(); - let mask = Mask::from(BooleanBuffer::from(vec![ - true, true, true, false, false, true, - ])); + let mask = Mask::from(BitBuffer::from(vec![true, true, true, false, false, true])); let filtered = filter(&list, &mask).unwrap(); let filtered_list = filtered.as_::(); @@ -199,7 +191,7 @@ fn test_list_filter_with_nulls() { let elements = buffer![0..15].into_array(); let offsets = buffer![0, 3, 7, 10, 12, 15].into_array(); let validity = Validity::from_mask( - Mask::from(BooleanBuffer::from(vec![true, false, true, false, true])), + Mask::from(BitBuffer::from(vec![true, false, true, false, true])), Nullability::Nullable, ); @@ -207,7 +199,7 @@ fn test_list_filter_with_nulls() { .unwrap() .into_array(); - let mask = Mask::from(BooleanBuffer::from(vec![true, true, false, true, true])); + let mask = Mask::from(BitBuffer::from(vec![true, true, false, true, true])); let filtered = filter(&list, &mask).unwrap(); let filtered_list = filtered.as_::(); @@ -279,7 +271,7 @@ fn test_list_filter_single_element() { .unwrap() .into_array(); - let mask = Mask::from(BooleanBuffer::from(vec![false, false, true, false, false])); + let mask = Mask::from(BitBuffer::from(vec![false, false, true, false, false])); let filtered = filter(&list, &mask).unwrap(); let filtered_list = filtered.as_::(); @@ -304,7 +296,7 @@ fn test_list_filter_alternating_pattern() { .into_array(); // Keep every other list. - let mask = Mask::from(BooleanBuffer::from(vec![ + let mask = Mask::from(BitBuffer::from(vec![ true, false, true, false, true, false, true, false, true, false, true, false, ])); @@ -332,7 +324,7 @@ fn test_list_filter_variable_sizes() { .unwrap() .into_array(); - let mask = Mask::from(BooleanBuffer::from(vec![ + let mask = Mask::from(BitBuffer::from(vec![ true, false, true, true, false, true, true, true, ])); @@ -476,7 +468,7 @@ fn create_list_of_lists_nullable(data: OptVec>>) -> ListArray // Create inner validity if needed. let inner_list_validity = if has_null_inner { Validity::from_mask( - Mask::from(BooleanBuffer::from(inner_validity)), + Mask::from(BitBuffer::from(inner_validity)), Nullability::Nullable, ) } else { @@ -506,7 +498,7 @@ fn create_list_of_lists_nullable(data: OptVec>>) -> ListArray // Create outer validity if needed. let outer_list_validity = if has_null_outer { Validity::from_mask( - Mask::from(BooleanBuffer::from(outer_validity)), + Mask::from(BitBuffer::from(outer_validity)), Nullability::Nullable, ) } else { @@ -804,7 +796,7 @@ fn test_validity_length_mismatch() { let elements = buffer![1i32, 2, 3, 4, 5].into_array(); let offsets = buffer![0u32, 2, 4, 5, 5].into_array(); let validity = Validity::from_mask( - Mask::from(BooleanBuffer::from(vec![true, false])), + Mask::from(BitBuffer::from(vec![true, false])), Nullability::Nullable, ); diff --git a/vortex-array/src/arrays/listview/rebuild.rs b/vortex-array/src/arrays/listview/rebuild.rs index 29ec6e76114..2581e8d5cab 100644 --- a/vortex-array/src/arrays/listview/rebuild.rs +++ b/vortex-array/src/arrays/listview/rebuild.rs @@ -174,6 +174,7 @@ impl ListViewArray { #[cfg(test)] mod tests { + use vortex_buffer::BitBuffer; use vortex_dtype::Nullability; use crate::arrays::{ListViewArray, PrimitiveArray}; @@ -366,8 +367,6 @@ mod tests { #[ignore = "TODO(connor)[ListView]: Reenable when `ListView` becomes canonical"] #[test] fn test_rebuild_flatten_with_nullable() { - use arrow_buffer::BooleanBuffer; - use crate::arrays::BoolArray; // Create a nullable list view with a null list @@ -375,7 +374,11 @@ mod tests { let offsets = PrimitiveArray::from_iter(vec![0u32, 1, 2]).into_array(); let sizes = PrimitiveArray::from_iter(vec![2u32, 1, 1]).into_array(); let validity = Validity::Array( - BoolArray::from(BooleanBuffer::from(vec![true, false, true])).into_array(), + BoolArray::from_bit_buffer( + BitBuffer::from(vec![true, false, true]), + Validity::NonNullable, + ) + .into_array(), ); let listview = ListViewArray::try_new(elements, offsets, sizes, validity).unwrap(); diff --git a/vortex-array/src/arrays/masked/compute/compare.rs b/vortex-array/src/arrays/masked/compute/compare.rs index a0ff0435081..30ab610a05a 100644 --- a/vortex-array/src/arrays/masked/compute/compare.rs +++ b/vortex-array/src/arrays/masked/compute/compare.rs @@ -25,7 +25,7 @@ impl CompareKernel for MaskedVTable { // Return a plain BoolArray with the combined validity Ok(Some( - BoolArray::from_bool_buffer(bool_array.boolean_buffer().clone(), combined_validity) + BoolArray::from_bit_buffer(bool_array.bit_buffer().clone(), combined_validity) .into_array(), )) } @@ -60,7 +60,7 @@ mod tests { .unwrap(); let res = res.to_bool(); assert_eq!( - res.boolean_buffer().iter().collect::>(), + res.bit_buffer().iter().collect::>(), vec![false, true, false] ); } @@ -81,7 +81,7 @@ mod tests { .unwrap(); let res = res.to_bool(); assert_eq!( - res.boolean_buffer().iter().collect::>(), + res.bit_buffer().iter().collect::>(), vec![false, false, true] ); } @@ -103,7 +103,7 @@ mod tests { .unwrap(); let res = res.to_bool(); assert_eq!( - res.boolean_buffer().iter().collect::>(), + res.bit_buffer().iter().collect::>(), vec![false, true, false] ); assert_eq!(res.dtype().nullability(), Nullability::Nullable); @@ -125,7 +125,7 @@ mod tests { let res = compare(masked.as_ref(), rhs.as_ref(), Operator::Eq).unwrap(); let res = res.to_bool(); assert_eq!( - res.boolean_buffer().iter().collect::>(), + res.bit_buffer().iter().collect::>(), vec![true, false, true] ); assert_eq!(res.dtype().nullability(), Nullability::Nullable); diff --git a/vortex-array/src/arrays/primitive/array/accessor.rs b/vortex-array/src/arrays/primitive/array/accessor.rs index 720d751966d..5be2a2ba5cd 100644 --- a/vortex-array/src/arrays/primitive/array/accessor.rs +++ b/vortex-array/src/arrays/primitive/array/accessor.rs @@ -28,7 +28,7 @@ impl ArrayAccessor for PrimitiveArray { let mut iter = self .as_slice::() .iter() - .zip(validity.boolean_buffer().iter()) + .zip(validity.bit_buffer().iter()) .map(|(value, valid)| valid.then_some(value)); Ok(f(&mut iter)) } diff --git a/vortex-array/src/arrays/primitive/array/conversion.rs b/vortex-array/src/arrays/primitive/array/conversion.rs index d542938e6eb..44f3634afa7 100644 --- a/vortex-array/src/arrays/primitive/array/conversion.rs +++ b/vortex-array/src/arrays/primitive/array/conversion.rs @@ -3,8 +3,7 @@ //! Conversion methods and trait implementations of [`From`] and [`Into`] for [`PrimitiveArray`]. -use arrow_buffer::BooleanBufferBuilder; -use vortex_buffer::{Buffer, BufferMut}; +use vortex_buffer::{BitBufferMut, Buffer, BufferMut}; use vortex_dtype::NativePType; use vortex_error::vortex_panic; @@ -19,7 +18,7 @@ impl PrimitiveArray { pub fn from_option_iter>>(iter: I) -> Self { let iter = iter.into_iter(); let mut values = BufferMut::with_capacity(iter.size_hint().0); - let mut validity = BooleanBufferBuilder::new(values.capacity()); + let mut validity = BitBufferMut::with_capacity(values.capacity()); for i in iter { match i { @@ -33,7 +32,7 @@ impl PrimitiveArray { } } } - Self::new(values.freeze(), Validity::from(validity.finish())) + Self::new(values.freeze(), Validity::from(validity.freeze())) } pub fn buffer(&self) -> Buffer { diff --git a/vortex-array/src/arrays/primitive/array/mod.rs b/vortex-array/src/arrays/primitive/array/mod.rs index ab4dc555f80..9fdda9b0547 100644 --- a/vortex-array/src/arrays/primitive/array/mod.rs +++ b/vortex-array/src/arrays/primitive/array/mod.rs @@ -165,7 +165,7 @@ impl PrimitiveArray { Validity::AllInvalid => ByteBuffer::zeroed_aligned(n_rows * byte_width, alignment), Validity::Array(is_valid) => { let bool_array = is_valid.to_bool(); - let bool_buffer = bool_array.boolean_buffer(); + let bool_buffer = bool_array.bit_buffer(); let mut bytes = ByteBufferMut::zeroed_aligned(n_rows * byte_width, alignment); for (i, valid_i) in bool_buffer.set_indices().enumerate() { bytes[valid_i * byte_width..(valid_i + 1) * byte_width] @@ -221,7 +221,7 @@ impl PrimitiveArray { } Validity::Array(val) => { let val = val.to_bool(); - BufferMut::::from_iter(buf_iter.zip(val.boolean_buffer()).map(f)) + BufferMut::::from_iter(buf_iter.zip(val.bit_buffer()).map(f)) } }; Ok(PrimitiveArray::new(buffer.freeze(), validity.clone())) diff --git a/vortex-array/src/arrays/primitive/compute/between.rs b/vortex-array/src/arrays/primitive/compute/between.rs index 08a8b83fa88..658e272df6d 100644 --- a/vortex-array/src/arrays/primitive/compute/between.rs +++ b/vortex-array/src/arrays/primitive/compute/between.rs @@ -1,7 +1,7 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -use arrow_buffer::BooleanBuffer; +use vortex_buffer::BitBuffer; use vortex_dtype::{NativePType, Nullability, match_each_native_ptype}; use vortex_error::VortexResult; @@ -98,8 +98,8 @@ where T: NativePType + Copy, { let slice = arr.as_slice::(); - BoolArray::from_bool_buffer( - BooleanBuffer::collect_bool(slice.len(), |idx| { + BoolArray::from_bit_buffer( + BitBuffer::collect_bool(slice.len(), |idx| { // We only iterate upto arr len and |arr| == |slice|. let i = unsafe { *slice.get_unchecked(idx) }; lower_fn(lower, i) & upper_fn(i, upper) diff --git a/vortex-array/src/arrays/primitive/compute/cast.rs b/vortex-array/src/arrays/primitive/compute/cast.rs index 2d8f4acaf53..f038eaaabfe 100644 --- a/vortex-array/src/arrays/primitive/compute/cast.rs +++ b/vortex-array/src/arrays/primitive/compute/cast.rs @@ -52,7 +52,7 @@ impl CastKernel for PrimitiveVTable { register_kernel!(CastKernelAdapter(PrimitiveVTable).lift()); fn cast(array: &[F], mask: Mask) -> VortexResult> { - match mask.boolean_buffer() { + match mask.bit_buffer() { AllOr::All => { let mut buffer = BufferMut::with_capacity(array.len()); for item in array { @@ -87,9 +87,8 @@ fn cast(array: &[F], mask: Mask) -> VortexResult #[cfg(test)] mod test { - use arrow_buffer::BooleanBuffer; use rstest::rstest; - use vortex_buffer::buffer; + use vortex_buffer::{BitBuffer, buffer}; use vortex_dtype::{DType, Nullability, PType}; use vortex_error::VortexError; use vortex_mask::Mask; @@ -197,7 +196,7 @@ mod test { assert_eq!(p.as_slice::(), vec![0, 0, 10]); assert_eq!( p.validity_mask(), - Mask::from(BooleanBuffer::from(vec![false, true, true])) + Mask::from(BitBuffer::from(vec![false, true, true])) ); } diff --git a/vortex-array/src/arrays/primitive/compute/fill_null.rs b/vortex-array/src/arrays/primitive/compute/fill_null.rs index 48f66e0f581..40cdea71cdd 100644 --- a/vortex-array/src/arrays/primitive/compute/fill_null.rs +++ b/vortex-array/src/arrays/primitive/compute/fill_null.rs @@ -33,7 +33,7 @@ impl FillNullKernel for PrimitiveVTable { } Validity::Array(is_valid) => { // TODO(danking): when we take PrimitiveArray by value, we should mutate in-place - let is_invalid = is_valid.to_bool().boolean_buffer().not(); + let is_invalid = is_valid.to_bool().bit_buffer().not(); match_each_native_ptype!(array.ptype(), |T| { let mut buffer = BufferMut::copy_from(array.as_slice::()); let fill_value = fill_value diff --git a/vortex-array/src/arrays/primitive/compute/is_sorted.rs b/vortex-array/src/arrays/primitive/compute/is_sorted.rs index 40a1b8a7a23..2b2f31fcf69 100644 --- a/vortex-array/src/arrays/primitive/compute/is_sorted.rs +++ b/vortex-array/src/arrays/primitive/compute/is_sorted.rs @@ -71,7 +71,7 @@ fn compute_is_sorted(array: &PrimitiveArray, strict: bool) -> Vo } Mask::Values(mask_values) => { let iter = mask_values - .boolean_buffer() + .bit_buffer() .iter() .zip_eq(array.as_slice::()) .map(|(is_valid, value)| is_valid.then_some(ComparablePrimitive::from(value))); diff --git a/vortex-array/src/arrays/primitive/compute/min_max.rs b/vortex-array/src/arrays/primitive/compute/min_max.rs index 88ab68a209d..6c6007a8215 100644 --- a/vortex-array/src/arrays/primitive/compute/min_max.rs +++ b/vortex-array/src/arrays/primitive/compute/min_max.rs @@ -33,7 +33,7 @@ where array .as_slice::() .iter() - .zip(v.boolean_buffer().iter()) + .zip(v.bit_buffer().iter()) .filter_map(|(v, m)| m.then_some(v)), array.dtype(), ), diff --git a/vortex-array/src/arrays/primitive/compute/nan_count.rs b/vortex-array/src/arrays/primitive/compute/nan_count.rs index bddb8de8be3..b9f45ae4fca 100644 --- a/vortex-array/src/arrays/primitive/compute/nan_count.rs +++ b/vortex-array/src/arrays/primitive/compute/nan_count.rs @@ -26,7 +26,7 @@ fn compute_nan_count_with_validity(values: &[T], validity: Mask) Mask::AllFalse(_) => 0, Mask::Values(v) => values .iter() - .zip(v.boolean_buffer().iter()) + .zip(v.bit_buffer().iter()) .filter_map(|(v, m)| m.then_some(v)) .filter(|v| v.is_nan()) .count(), diff --git a/vortex-array/src/arrays/primitive/compute/sum.rs b/vortex-array/src/arrays/primitive/compute/sum.rs index 924a3e6b883..00595cd40ad 100644 --- a/vortex-array/src/arrays/primitive/compute/sum.rs +++ b/vortex-array/src/arrays/primitive/compute/sum.rs @@ -1,9 +1,9 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -use arrow_buffer::BooleanBuffer; use itertools::Itertools; use num_traits::{CheckedAdd, Float, ToPrimitive}; +use vortex_buffer::BitBuffer; use vortex_dtype::{NativePType, match_each_native_ptype}; use vortex_error::{VortexExpect, VortexResult}; use vortex_mask::AllOr; @@ -16,7 +16,7 @@ use crate::stats::Stat; impl SumKernel for PrimitiveVTable { fn sum(&self, array: &PrimitiveArray) -> VortexResult { - Ok(match array.validity_mask().boolean_buffer() { + Ok(match array.validity_mask().bit_buffer() { AllOr::All => { // All-valid match_each_native_ptype!( @@ -67,7 +67,7 @@ fn sum_integer( fn sum_integer_with_validity( values: &[T], - validity: &BooleanBuffer, + validity: &BitBuffer, ) -> Option { let mut sum = R::zero(); for (&x, valid) in values.iter().zip_eq(validity.iter()) { @@ -86,7 +86,7 @@ fn sum_float(values: &[T]) -> f64 { sum } -fn sum_float_with_validity(array: &[T], validity: &BooleanBuffer) -> f64 { +fn sum_float_with_validity(array: &[T], validity: &BitBuffer) -> f64 { let mut sum = 0.0; for (&x, valid) in array.iter().zip_eq(validity.iter()) { if valid { diff --git a/vortex-array/src/arrays/struct_/compute/mod.rs b/vortex-array/src/arrays/struct_/compute/mod.rs index facfd23ffe3..97f39554307 100644 --- a/vortex-array/src/arrays/struct_/compute/mod.rs +++ b/vortex-array/src/arrays/struct_/compute/mod.rs @@ -4,7 +4,8 @@ mod cast; mod filter; mod mask; -mod zip; + +use std::sync::Arc; use vortex_dtype::Nullability::NonNullable; use vortex_error::VortexResult; @@ -26,7 +27,7 @@ impl TakeKernel for StructVTable { // an out of bounds element if array.is_empty() { return StructArray::try_new_with_dtype( - array.fields(), + array.fields().to_vec(), array.struct_fields().clone(), indices.len(), Validity::AllInvalid, @@ -43,7 +44,7 @@ impl TakeKernel for StructVTable { .fields() .iter() .map(|field| take(field, inner_indices)) - .collect::, _>>()?, + .collect::>>()?, array.struct_fields().clone(), indices.len(), array.validity().take(indices)?, @@ -93,13 +94,13 @@ register_kernel!(IsConstantKernelAdapter(StructVTable).lift()); mod tests { use Nullability::{NonNullable, Nullable}; use rstest::rstest; - use vortex_buffer::buffer; + use vortex_buffer::{BitBuffer, buffer}; use vortex_dtype::{DType, FieldNames, Nullability, PType, StructFields}; use vortex_error::VortexUnwrap; use vortex_mask::Mask; use vortex_scalar::Scalar; - use crate::arrays::{BoolArray, BooleanBuffer, PrimitiveArray, StructArray, VarBinArray}; + use crate::arrays::{BoolArray, PrimitiveArray, StructArray, VarBinArray}; use crate::compute::conformance::consistency::test_array_consistency; use crate::compute::conformance::filter::test_filter_conformance; use crate::compute::conformance::mask::test_mask_conformance; @@ -145,7 +146,9 @@ mod tests { #[test] fn take_field_struct() { - let struct_arr = StructArray::from_fields(&[("a", buffer![0..10].into_array())]).unwrap(); + let struct_arr = + StructArray::from_fields(&[("a", PrimitiveArray::from_iter(0..10).to_array())]) + .unwrap(); let indices = PrimitiveArray::from_option_iter([Some(1), None]); let taken = take(struct_arr.as_ref(), indices.as_ref()).unwrap(); assert_eq!(taken.len(), 2); @@ -314,8 +317,8 @@ mod tests { fn test_cast_complex_struct() { let xs = PrimitiveArray::from_option_iter([Some(0i64), Some(1), Some(2), Some(3), Some(4)]); let ys = VarBinArray::from_vec(vec!["a", "b", "c", "d", "e"], DType::Utf8(Nullable)); - let zs = BoolArray::from_bool_buffer( - BooleanBuffer::from_iter([true, true, false, false, true]), + let zs = BoolArray::from_bit_buffer( + BitBuffer::from_iter([true, true, false, false, true]), Validity::AllValid, ); let fully_nullable_array = StructArray::try_new( @@ -485,7 +488,7 @@ mod tests { #[test] fn test_take_large_struct_conformance() { // Test with larger array for additional edge cases - let xs = buffer![0i64..100].into_array(); + let xs = PrimitiveArray::from_iter(0i64..100).into_array(); let ys = VarBinArray::from_iter( (0..100).map(|i| format!("str_{i}")).map(Some), DType::Utf8(NonNullable), @@ -509,7 +512,7 @@ mod tests { #[rstest] // From test_all_consistency #[case::struct_simple({ - let xs = buffer![1i32, 2, 3, 4, 5].into_array(); + let xs = PrimitiveArray::from_iter([1i32, 2, 3, 4, 5]); let ys = VarBinArray::from_iter( ["a", "b", "c", "d", "e"].map(Some), DType::Utf8(NonNullable), @@ -543,7 +546,7 @@ mod tests { StructArray::try_new(["xs"].into(), vec![xs], 1, Validity::NonNullable).unwrap() })] #[case::large_struct({ - let xs = buffer![0..100i64].into_array(); + let xs = PrimitiveArray::from_iter(0..100i64).into_array(); let ys = VarBinArray::from_iter( (0..100).map(|i| format!("value_{i}")).map(Some), DType::Utf8(NonNullable), diff --git a/vortex-array/src/arrays/struct_/tests.rs b/vortex-array/src/arrays/struct_/tests.rs index c03fe5a725c..bc13fa38399 100644 --- a/vortex-array/src/arrays/struct_/tests.rs +++ b/vortex-array/src/arrays/struct_/tests.rs @@ -40,7 +40,7 @@ fn test_project() { let bools = &struct_b.fields[0]; assert_eq!( - bools.to_bool().boolean_buffer().iter().collect::>(), + bools.to_bool().bit_buffer().iter().collect::>(), vec![true, true, true, false, false] ); diff --git a/vortex-array/src/arrays/varbin/accessor.rs b/vortex-array/src/arrays/varbin/accessor.rs index f3ec316cd93..0bbf8b104da 100644 --- a/vortex-array/src/arrays/varbin/accessor.rs +++ b/vortex-array/src/arrays/varbin/accessor.rs @@ -39,7 +39,7 @@ impl ArrayAccessor<[u8]> for VarBinArray { let validity = v.to_bool(); let mut iter = offsets .windows(2) - .zip(validity.boolean_buffer()) + .zip(validity.bit_buffer()) .map(|(w, valid)| valid.then(|| &bytes[w[0] as usize..w[1] as usize])); Ok(f(&mut iter)) } diff --git a/vortex-array/src/arrays/varbin/builder.rs b/vortex-array/src/arrays/varbin/builder.rs index 9a64a0c8552..d230f9bcb6e 100644 --- a/vortex-array/src/arrays/varbin/builder.rs +++ b/vortex-array/src/arrays/varbin/builder.rs @@ -1,9 +1,8 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -use arrow_buffer::NullBufferBuilder; use num_traits::AsPrimitive; -use vortex_buffer::BufferMut; +use vortex_buffer::{BitBufferMut, BufferMut}; use vortex_dtype::{DType, IntegerPType}; use vortex_error::vortex_panic; @@ -15,7 +14,7 @@ use crate::validity::Validity; pub struct VarBinBuilder { offsets: BufferMut, data: BufferMut, - validity: NullBufferBuilder, + validity: BitBufferMut, } impl Default for VarBinBuilder { @@ -35,7 +34,7 @@ impl VarBinBuilder { Self { offsets, data: BufferMut::empty(), - validity: NullBufferBuilder::new(len), + validity: BitBufferMut::with_capacity(len), } } @@ -60,19 +59,19 @@ impl VarBinBuilder { ) })); self.data.extend_from_slice(slice); - self.validity.append_non_null(); + self.validity.append_true(); } #[inline] pub fn append_null(&mut self) { self.offsets.push(self.offsets[self.offsets.len() - 1]); - self.validity.append_null(); + self.validity.append_false(); } #[inline] pub fn append_n_nulls(&mut self, n: usize) { self.offsets.push_n(self.offsets[self.offsets.len() - 1], n); - self.validity.append_n_nulls(n); + self.validity.append_n(false, n); } #[inline] @@ -84,19 +83,14 @@ impl VarBinBuilder { self.offsets .extend(end_offsets.map(|offset| offset + self.data.len().as_())); self.data.extend_from_slice(values); - self.validity.append_n_non_nulls(num); + self.validity.append_n(true, num); } - pub fn finish(mut self, dtype: DType) -> VarBinArray { + pub fn finish(self, dtype: DType) -> VarBinArray { let offsets = PrimitiveArray::new(self.offsets.freeze(), Validity::NonNullable); - let nulls = self.validity.finish(); - - let validity = if dtype.is_nullable() { - nulls.map(Validity::from).unwrap_or(Validity::AllValid) - } else { - assert!(nulls.is_none(), "dtype and validity mismatch"); - Validity::NonNullable - }; + let nulls = self.validity.freeze(); + + let validity = Validity::from_bit_buffer(nulls, dtype.nullability()); // SAFETY: The builder maintains all invariants: // - Offsets are monotonically increasing starting from 0 (guaranteed by builder logic). diff --git a/vortex-array/src/arrays/varbin/compute/compare.rs b/vortex-array/src/arrays/varbin/compute/compare.rs index 010086a4f92..18956ed36db 100644 --- a/vortex-array/src/arrays/varbin/compute/compare.rs +++ b/vortex-array/src/arrays/varbin/compute/compare.rs @@ -2,9 +2,9 @@ // SPDX-FileCopyrightText: Copyright the Vortex contributors use arrow_array::{BinaryArray, StringArray}; -use arrow_buffer::BooleanBuffer; use arrow_ord::cmp; use itertools::Itertools; +use vortex_buffer::BitBuffer; use vortex_dtype::{DType, IntegerPType, match_each_integer_ptype}; use vortex_error::{VortexExpect as _, VortexResult, vortex_bail, vortex_err}; @@ -42,8 +42,8 @@ impl CompareKernel for VarBinVTable { if rhs_is_empty { let buffer = match operator { - Operator::Gte => BooleanBuffer::new_set(len), // Every possible value is >= "" - Operator::Lt => BooleanBuffer::new_unset(len), // No value is < "" + Operator::Gte => BitBuffer::new_set(len), // Every possible value is >= "" + Operator::Lt => BitBuffer::new_unset(len), // No value is < "" Operator::Eq | Operator::NotEq | Operator::Gt | Operator::Lte => { let lhs_offsets = lhs.offsets().to_primitive(); match_each_integer_ptype!(lhs_offsets.ptype(), |P| { @@ -53,7 +53,7 @@ impl CompareKernel for VarBinVTable { }; return Ok(Some( - BoolArray::from_bool_buffer( + BoolArray::from_bit_buffer( buffer, lhs.validity() .clone() @@ -110,7 +110,7 @@ register_kernel!(CompareKernelAdapter(VarBinVTable).lift()); fn compare_offsets_to_empty( offsets: PrimitiveArray, operator: Operator, -) -> BooleanBuffer { +) -> BitBuffer { let lengths_iter = offsets .as_slice::

() .iter() @@ -121,8 +121,7 @@ fn compare_offsets_to_empty( #[cfg(test)] mod test { - use arrow_buffer::BooleanBuffer; - use vortex_buffer::ByteBuffer; + use vortex_buffer::{BitBuffer, ByteBuffer}; use vortex_dtype::{DType, Nullability}; use vortex_scalar::Scalar; @@ -149,12 +148,12 @@ mod test { .to_bool(); assert_eq!( - &result.validity_mask().to_boolean_buffer(), - &BooleanBuffer::from_iter([true, false, true]) + &result.validity_mask().to_bit_buffer(), + &BitBuffer::from_iter([true, false, true]) ); assert_eq!( - result.boolean_buffer(), - &BooleanBuffer::from_iter([true, false, false]) + result.bit_buffer(), + &BitBuffer::from_iter([true, false, false]) ); } @@ -173,12 +172,12 @@ mod test { .to_bool(); assert_eq!( - &result.validity_mask().to_boolean_buffer(), - &BooleanBuffer::from_iter([false, false, true]) + &result.validity_mask().to_bit_buffer(), + &BitBuffer::from_iter([false, false, true]) ); assert_eq!( - result.boolean_buffer(), - &BooleanBuffer::from_iter([false, true, true]) + result.bit_buffer(), + &BitBuffer::from_iter([false, true, true]) ); } } diff --git a/vortex-array/src/arrays/varbin/compute/filter.rs b/vortex-array/src/arrays/varbin/compute/filter.rs index 27074d33f14..ff2258874d6 100644 --- a/vortex-array/src/arrays/varbin/compute/filter.rs +++ b/vortex-array/src/arrays/varbin/compute/filter.rs @@ -67,7 +67,7 @@ where usize: AsPrimitive, { let mut builder = VarBinBuilder::::with_capacity(selection_count); - match logical_validity.boolean_buffer() { + match logical_validity.bit_buffer() { AllOr::All => { mask_slices.iter().for_each(|(start, end)| { update_non_nullable_slice(data, offsets, &mut builder, *start, *end) @@ -78,8 +78,8 @@ where } AllOr::Some(validity) => { for (start, end) in mask_slices.iter().copied() { - let null_sl = validity.slice(start, end - start); - if null_sl.count_set_bits() == null_sl.len() { + let null_sl = validity.slice(start..end); + if null_sl.true_count() == null_sl.len() { update_non_nullable_slice(data, offsets, &mut builder, start, end) } else { for (idx, valid) in null_sl.iter().enumerate() { diff --git a/vortex-array/src/arrays/varbin/compute/take.rs b/vortex-array/src/arrays/varbin/compute/take.rs index 7f7644b5a76..2645b78b71c 100644 --- a/vortex-array/src/arrays/varbin/compute/take.rs +++ b/vortex-array/src/arrays/varbin/compute/take.rs @@ -1,8 +1,7 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -use arrow_buffer::BooleanBufferBuilder; -use vortex_buffer::{BufferMut, ByteBufferMut}; +use vortex_buffer::{BitBufferMut, BufferMut, ByteBufferMut}; use vortex_dtype::{DType, IntegerPType, match_each_integer_ptype}; use vortex_error::{VortexExpect, VortexResult, vortex_panic}; use vortex_mask::Mask; @@ -117,7 +116,7 @@ fn take_nullable( new_offsets.push(O::zero()); let mut current_offset = O::zero(); - let mut validity_buffer = BooleanBufferBuilder::new(indices.len()); + let mut validity_buffer = BitBufferMut::with_capacity(indices.len()); // Convert indices once and store valid ones with their positions let mut valid_indices = Vec::with_capacity(indices.len()); @@ -162,7 +161,7 @@ fn take_nullable( new_data.extend_from_slice(&data[start..stop]); } - let array_validity = Validity::from(validity_buffer.finish()); + let array_validity = Validity::from(validity_buffer.freeze()); // Safety: // All variants of VarBinArray are satisfied here. diff --git a/vortex-array/src/arrays/varbinview/accessor.rs b/vortex-array/src/arrays/varbinview/accessor.rs index 3231eb7b37f..8ef9e96694a 100644 --- a/vortex-array/src/arrays/varbinview/accessor.rs +++ b/vortex-array/src/arrays/varbinview/accessor.rs @@ -41,7 +41,7 @@ impl ArrayAccessor<[u8]> for VarBinViewArray { let validity = v.to_bool(); let mut iter = views .iter() - .zip(validity.boolean_buffer()) + .zip(validity.bit_buffer()) .map(|(view, valid)| { if valid { if view.is_inlined() { diff --git a/vortex-array/src/arrow/array.rs b/vortex-array/src/arrow/array.rs index 29e35354b9a..6ab7bf8eecc 100644 --- a/vortex-array/src/arrow/array.rs +++ b/vortex-array/src/arrow/array.rs @@ -126,7 +126,7 @@ impl ValidityVTable for ArrowVTable { array .inner .logical_nulls() - .map(|null_buffer| Mask::from_buffer(null_buffer.inner().clone())) + .map(|null_buffer| Mask::from_buffer(null_buffer.inner().clone().into())) .unwrap_or_else(|| Mask::new_true(array.inner.len())) } } diff --git a/vortex-array/src/arrow/compute/to_arrow/canonical.rs b/vortex-array/src/arrow/compute/to_arrow/canonical.rs index 9014c6ebe5c..0c96f27f6a1 100644 --- a/vortex-array/src/arrow/compute/to_arrow/canonical.rs +++ b/vortex-array/src/arrow/compute/to_arrow/canonical.rs @@ -32,6 +32,7 @@ use crate::arrays::{ use crate::arrow::IntoArrowArray; use crate::arrow::array::ArrowArray; use crate::arrow::compute::ToArrowArgs; +use crate::arrow::compute::to_arrow::null_buffer::to_null_buffer; use crate::compute::{InvocationArgs, Kernel, Output, cast}; use crate::{Array as _, Canonical, IntoArray, ToCanonical}; @@ -237,13 +238,13 @@ fn to_arrow_null(array: NullArray) -> VortexResult { fn to_arrow_bool(array: BoolArray) -> VortexResult { Ok(Arc::new(ArrowBoolArray::new( - array.boolean_buffer().clone(), - array.validity_mask().to_null_buffer(), + array.bit_buffer().clone().into(), + to_null_buffer(array.validity_mask()), ))) } fn to_arrow_primitive(array: PrimitiveArray) -> VortexResult { - let null_buffer = array.validity_mask().to_null_buffer(); + let null_buffer = to_null_buffer(array.validity_mask()); let len = array.len(); let buffer = array.into_byte_buffer().into_arrow_buffer(); Ok(Arc::new(ArrowPrimitiveArray::::new( @@ -253,7 +254,7 @@ fn to_arrow_primitive(array: PrimitiveArray) -> VortexRes } fn to_arrow_decimal32(array: DecimalArray) -> VortexResult { - let null_buffer = array.validity_mask().to_null_buffer(); + let null_buffer = to_null_buffer(array.validity_mask()); let buffer: Buffer = match array.values_type() { DecimalValueType::I8 => { Buffer::from_trusted_len_iter(array.buffer::().into_iter().map(|x| x.as_())) @@ -298,7 +299,7 @@ fn to_arrow_decimal32(array: DecimalArray) -> VortexResult { } fn to_arrow_decimal64(array: DecimalArray) -> VortexResult { - let null_buffer = array.validity_mask().to_null_buffer(); + let null_buffer = to_null_buffer(array.validity_mask()); let buffer: Buffer = match array.values_type() { DecimalValueType::I8 => { Buffer::from_trusted_len_iter(array.buffer::().into_iter().map(|x| x.as_())) @@ -338,7 +339,7 @@ fn to_arrow_decimal64(array: DecimalArray) -> VortexResult { } fn to_arrow_decimal128(array: DecimalArray) -> VortexResult { - let null_buffer = array.validity_mask().to_null_buffer(); + let null_buffer = to_null_buffer(array.validity_mask()); let buffer: Buffer = match array.values_type() { DecimalValueType::I8 => { Buffer::from_trusted_len_iter(array.buffer::().into_iter().map(|x| x.as_())) @@ -373,7 +374,7 @@ fn to_arrow_decimal128(array: DecimalArray) -> VortexResult { } fn to_arrow_decimal256(array: DecimalArray) -> VortexResult { - let null_buffer = array.validity_mask().to_null_buffer(); + let null_buffer = to_null_buffer(array.validity_mask()); let buffer: Buffer = match array.values_type() { DecimalValueType::I8 => { Buffer::from_trusted_len_iter(array.buffer::().into_iter().map(|x| x.as_())) @@ -442,7 +443,7 @@ fn to_arrow_struct( }) .collect::>>()?; - let nulls = array.validity_mask().to_null_buffer(); + let nulls = to_null_buffer(array.validity_mask()); if field_arrays.is_empty() { return Ok(Arc::new(ArrowStructArray::new_empty_fields( @@ -496,7 +497,7 @@ fn to_arrow_list( )); (values, element_field) }; - let nulls = array.validity_mask().to_null_buffer(); + let nulls = to_null_buffer(array.validity_mask()); Ok(Arc::new(GenericListArray::new( element_field, @@ -536,7 +537,7 @@ fn to_arrow_fixed_size_list( )); (values, element_field) }; - let nulls = array.validity_mask().to_null_buffer(); + let nulls = to_null_buffer(array.validity_mask()); Ok(Arc::new(ArrowFixedSizeListArray::new( element_field, @@ -554,7 +555,7 @@ fn to_arrow_varbinview(array: VarBinViewArray) -> VortexResult< .iter() .map(|buffer| buffer.clone().into_arrow_buffer()) .collect(); - let nulls = array.validity_mask().to_null_buffer(); + let nulls = to_null_buffer(array.validity_mask()); // SAFETY: our own VarBinView array is considered safe. Ok(Arc::new(unsafe { diff --git a/vortex-array/src/arrow/compute/to_arrow/mod.rs b/vortex-array/src/arrow/compute/to_arrow/mod.rs index 49b62814bb2..ee4441d5565 100644 --- a/vortex-array/src/arrow/compute/to_arrow/mod.rs +++ b/vortex-array/src/arrow/compute/to_arrow/mod.rs @@ -2,6 +2,7 @@ // SPDX-FileCopyrightText: Copyright the Vortex contributors mod canonical; +mod null_buffer; mod temporal; mod varbin; diff --git a/vortex-array/src/arrow/compute/to_arrow/null_buffer.rs b/vortex-array/src/arrow/compute/to_arrow/null_buffer.rs new file mode 100644 index 00000000000..ccf8f9d55b1 --- /dev/null +++ b/vortex-array/src/arrow/compute/to_arrow/null_buffer.rs @@ -0,0 +1,38 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use arrow_buffer::{BooleanBuffer, NullBuffer}; +use vortex_mask::Mask; + +/// Converts a mask to a null buffer. +pub fn to_null_buffer(mask: Mask) -> Option { + match mask { + Mask::AllTrue(_) => None, + Mask::AllFalse(l) => Some(NullBuffer::new_null(l)), + Mask::Values(values) => Some(NullBuffer::from(BooleanBuffer::from( + values.bit_buffer().clone(), + ))), + } +} + +#[cfg(test)] +mod tests { + use vortex_buffer::BitBuffer; + use vortex_mask::Mask; + + use crate::arrow::compute::to_arrow::null_buffer::to_null_buffer; + + #[test] + fn test_mask_to_null_buffer() { + let all_true = Mask::new_true(5); + assert!(to_null_buffer(all_true).is_none()); + + let all_false = Mask::new_false(5); + let null_buffer = to_null_buffer(all_false).unwrap(); + assert_eq!(null_buffer.null_count(), 5); + + let values = Mask::from_buffer(BitBuffer::from_iter([true, false, true, false, true])); + let null_buffer = to_null_buffer(values).unwrap(); + assert_eq!(null_buffer.null_count(), 2); + } +} diff --git a/vortex-array/src/arrow/compute/to_arrow/temporal.rs b/vortex-array/src/arrow/compute/to_arrow/temporal.rs index 6e603d46bd6..9e7a7370b91 100644 --- a/vortex-array/src/arrow/compute/to_arrow/temporal.rs +++ b/vortex-array/src/arrow/compute/to_arrow/temporal.rs @@ -17,6 +17,7 @@ use vortex_error::{VortexExpect, VortexResult, vortex_bail}; use crate::arrays::{ExtensionVTable, TemporalArray}; use crate::arrow::array::ArrowArray; use crate::arrow::compute::to_arrow::ToArrowArgs; +use crate::arrow::compute::to_arrow::null_buffer::to_null_buffer; use crate::compute::{InvocationArgs, Kernel, Output, cast}; use crate::{Array as _, IntoArray, ToCanonical}; @@ -110,7 +111,7 @@ where .to_primitive() .into_buffer() .into_arrow_scalar_buffer(); - let nulls = array.temporal_values().validity_mask().to_null_buffer(); + let nulls = to_null_buffer(array.temporal_values().validity_mask()); Ok(ArrowPrimitiveArray::::new(values, nulls)) } diff --git a/vortex-array/src/arrow/compute/to_arrow/varbin.rs b/vortex-array/src/arrow/compute/to_arrow/varbin.rs index eaa65116fc6..8de60df21d0 100644 --- a/vortex-array/src/arrow/compute/to_arrow/varbin.rs +++ b/vortex-array/src/arrow/compute/to_arrow/varbin.rs @@ -11,6 +11,7 @@ use vortex_dtype::{DType, IntegerPType, Nullability, PType}; use vortex_error::{VortexResult, vortex_bail, vortex_panic}; use crate::arrays::{VarBinArray, VarBinVTable}; +use crate::arrow::compute::to_arrow::null_buffer::to_null_buffer; use crate::arrow::compute::{ToArrowKernel, ToArrowKernelAdapter}; use crate::compute::cast; use crate::{Array, ToCanonical, register_kernel}; @@ -73,7 +74,7 @@ fn to_arrow(array: &VarBinArray) -> VortexRes )? .to_primitive(); - let nulls = array.validity_mask().to_null_buffer(); + let nulls = to_null_buffer(array.validity_mask()); let data = array.bytes().clone(); // Match on the `DType`. diff --git a/vortex-array/src/arrow/convert.rs b/vortex-array/src/arrow/convert.rs index 5b458c33088..9dd79075528 100644 --- a/vortex-array/src/arrow/convert.rs +++ b/vortex-array/src/arrow/convert.rs @@ -5,16 +5,15 @@ use std::sync::Arc; use arrow_array::cast::{AsArray, as_null_array}; use arrow_array::types::{ - ByteArrayType, ByteViewType, Date32Type, Date64Type, Decimal32Type, Decimal64Type, - Decimal128Type, Decimal256Type, Float16Type, Float32Type, Float64Type, Int8Type, Int16Type, - Int32Type, Int64Type, Time32MillisecondType, Time32SecondType, Time64MicrosecondType, - Time64NanosecondType, TimestampMicrosecondType, TimestampMillisecondType, - TimestampNanosecondType, TimestampSecondType, UInt8Type, UInt16Type, UInt32Type, UInt64Type, + ByteArrayType, ByteViewType, Date32Type, Date64Type, Decimal128Type, Decimal256Type, + Float16Type, Float32Type, Float64Type, Int8Type, Int16Type, Int32Type, Int64Type, + Time32MillisecondType, Time32SecondType, Time64MicrosecondType, Time64NanosecondType, + TimestampMicrosecondType, TimestampMillisecondType, TimestampNanosecondType, + TimestampSecondType, UInt8Type, UInt16Type, UInt32Type, UInt64Type, }; use arrow_array::{ - Array as ArrowArray, ArrowPrimitiveType, BooleanArray as ArrowBooleanArray, - FixedSizeListArray as ArrowFixedSizeListArray, GenericByteArray, GenericByteViewArray, - GenericListArray, NullArray as ArrowNullArray, OffsetSizeTrait, + Array as ArrowArray, ArrowPrimitiveType, BooleanArray as ArrowBooleanArray, GenericByteArray, + GenericByteViewArray, GenericListArray, NullArray as ArrowNullArray, OffsetSizeTrait, PrimitiveArray as ArrowPrimitiveArray, RecordBatch, StructArray as ArrowStructArray, make_array, }; @@ -22,15 +21,15 @@ use arrow_buffer::buffer::{NullBuffer, OffsetBuffer}; use arrow_buffer::{ArrowNativeType, BooleanBuffer, Buffer as ArrowBuffer, ScalarBuffer}; use arrow_schema::{DataType, TimeUnit as ArrowTimeUnit}; use itertools::Itertools; -use vortex_buffer::{Alignment, Buffer, ByteBuffer}; +use vortex_buffer::{Alignment, BitBuffer, Buffer, ByteBuffer}; use vortex_dtype::datetime::TimeUnit; use vortex_dtype::{DType, DecimalDType, IntegerPType, NativePType, PType}; use vortex_error::{VortexExpect as _, vortex_panic}; use vortex_scalar::i256; use crate::arrays::{ - BoolArray, DecimalArray, FixedSizeListArray, ListArray, NullArray, PrimitiveArray, StructArray, - TemporalArray, VarBinArray, VarBinViewArray, + BoolArray, DecimalArray, ListArray, NullArray, PrimitiveArray, StructArray, TemporalArray, + VarBinArray, VarBinViewArray, }; use crate::arrow::FromArrowArray; use crate::validity::Validity; @@ -49,7 +48,7 @@ impl IntoArray for ArrowBuffer { impl IntoArray for BooleanBuffer { fn into_array(self) -> ArrayRef { - BoolArray::from_bool_buffer(self, Validity::NonNullable).into_array() + BoolArray::from_bit_buffer(self.into(), Validity::NonNullable).into_array() } } @@ -104,24 +103,6 @@ impl_from_arrow_primitive!(Float16Type); impl_from_arrow_primitive!(Float32Type); impl_from_arrow_primitive!(Float64Type); -impl FromArrowArray<&ArrowPrimitiveArray> for ArrayRef { - fn from_arrow(array: &ArrowPrimitiveArray, nullable: bool) -> Self { - let decimal_type = DecimalDType::new(array.precision(), array.scale()); - let buffer = Buffer::from_arrow_scalar_buffer(array.values().clone()); - let validity = nulls(array.nulls(), nullable); - DecimalArray::new(buffer, decimal_type, validity).into_array() - } -} - -impl FromArrowArray<&ArrowPrimitiveArray> for ArrayRef { - fn from_arrow(array: &ArrowPrimitiveArray, nullable: bool) -> Self { - let decimal_type = DecimalDType::new(array.precision(), array.scale()); - let buffer = Buffer::from_arrow_scalar_buffer(array.values().clone()); - let validity = nulls(array.nulls(), nullable); - DecimalArray::new(buffer, decimal_type, validity).into_array() - } -} - impl FromArrowArray<&ArrowPrimitiveArray> for ArrayRef { fn from_arrow(array: &ArrowPrimitiveArray, nullable: bool) -> Self { let decimal_type = DecimalDType::new(array.precision(), array.scale()); @@ -251,8 +232,11 @@ impl FromArrowArray<&GenericByteViewArray> for ArrayRef { impl FromArrowArray<&ArrowBooleanArray> for ArrayRef { fn from_arrow(value: &ArrowBooleanArray, nullable: bool) -> Self { - BoolArray::from_bool_buffer(value.values().clone(), nulls(value.nulls(), nullable)) - .into_array() + BoolArray::from_bit_buffer( + value.values().clone().into(), + nulls(value.nulls(), nullable), + ) + .into_array() } } @@ -345,24 +329,7 @@ impl FromArrowArray<&GenericListArray> for value.offsets().clone().into_array(), nulls(value.nulls(), nullable), ) - .vortex_expect("Failed to convert Arrow ListArray to Vortex ListArray") - .into_array() - } -} - -impl FromArrowArray<&ArrowFixedSizeListArray> for ArrayRef { - fn from_arrow(array: &ArrowFixedSizeListArray, nullable: bool) -> Self { - let DataType::FixedSizeList(field, list_size) = array.data_type() else { - vortex_panic!("Invalid data type for ListArray: {}", array.data_type()); - }; - - FixedSizeListArray::try_new( - Self::from_arrow(array.values().as_ref(), field.is_nullable()), - *list_size as u32, - nulls(array.nulls(), nullable), - array.len(), - ) - .vortex_expect("Failed to convert Arrow FixedSizeListArray to Vortex FixedSizeListArray") + .vortex_expect("Failed to convert Arrow StructArray to Vortex StructArray") .into_array() } } @@ -381,7 +348,7 @@ fn nulls(nulls: Option<&NullBuffer>, nullable: bool) -> Validity { if nulls.null_count() == nulls.len() { Validity::AllInvalid } else { - Validity::from(nulls.inner().clone()) + Validity::from(BitBuffer::from(nulls.inner().clone())) } }) .unwrap_or_else(|| Validity::AllValid) @@ -415,7 +382,6 @@ impl FromArrowArray<&dyn ArrowArray> for ArrayRef { DataType::Struct(_) => Self::from_arrow(array.as_struct(), nullable), DataType::List(_) => Self::from_arrow(array.as_list::(), nullable), DataType::LargeList(_) => Self::from_arrow(array.as_list::(), nullable), - DataType::FixedSizeList(..) => Self::from_arrow(array.as_fixed_size_list(), nullable), DataType::Null => Self::from_arrow(as_null_array(array), nullable), DataType::Timestamp(u, _) => match u { ArrowTimeUnit::Second => { @@ -451,12 +417,6 @@ impl FromArrowArray<&dyn ArrowArray> for ArrayRef { } ArrowTimeUnit::Second | ArrowTimeUnit::Millisecond => unreachable!(), }, - DataType::Decimal32(..) => { - Self::from_arrow(array.as_primitive::(), nullable) - } - DataType::Decimal64(..) => { - Self::from_arrow(array.as_primitive::(), nullable) - } DataType::Decimal128(..) => { Self::from_arrow(array.as_primitive::(), nullable) } diff --git a/vortex-array/src/builders/bool.rs b/vortex-array/src/builders/bool.rs index cd18dbf1899..33090ee2f1c 100644 --- a/vortex-array/src/builders/bool.rs +++ b/vortex-array/src/builders/bool.rs @@ -2,22 +2,23 @@ // SPDX-FileCopyrightText: Copyright the Vortex contributors use std::any::Any; +use std::mem; -use arrow_buffer::BooleanBufferBuilder; +use vortex_buffer::BitBufferMut; use vortex_dtype::{DType, Nullability}; use vortex_error::{VortexResult, vortex_ensure}; use vortex_mask::Mask; use vortex_scalar::{BoolScalar, Scalar}; use crate::arrays::BoolArray; -use crate::builders::{ArrayBuilder, DEFAULT_BUILDER_CAPACITY, LazyNullBufferBuilder}; +use crate::builders::{ArrayBuilder, DEFAULT_BUILDER_CAPACITY, LazyBitBufferBuilder}; use crate::canonical::{Canonical, ToCanonical}; use crate::{Array, ArrayRef, IntoArray}; pub struct BoolBuilder { dtype: DType, - inner: BooleanBufferBuilder, - nulls: LazyNullBufferBuilder, + inner: BitBufferMut, + nulls: LazyBitBufferBuilder, } impl BoolBuilder { @@ -27,8 +28,8 @@ impl BoolBuilder { pub fn with_capacity(nullability: Nullability, capacity: usize) -> Self { Self { - inner: BooleanBufferBuilder::new(capacity), - nulls: LazyNullBufferBuilder::new(capacity), + inner: BitBufferMut::with_capacity(capacity), + nulls: LazyBitBufferBuilder::new(capacity), dtype: DType::Bool(nullability), } } @@ -42,7 +43,7 @@ impl BoolBuilder { /// /// This method appends the given boolean value `n` times. pub fn append_values(&mut self, value: bool, n: usize) { - self.inner.append_n(n, value); + self.inner.append_n(value, n); self.nulls.append_n_non_nulls(n) } @@ -54,8 +55,8 @@ impl BoolBuilder { "Null count and value count should match when calling BoolBuilder::finish." ); - BoolArray::from_bool_buffer( - self.inner.finish(), + BoolArray::from_bit_buffer( + mem::take(&mut self.inner).freeze(), self.nulls.finish_with_nullability(self.dtype.nullability()), ) } @@ -83,7 +84,7 @@ impl ArrayBuilder for BoolBuilder { } unsafe fn append_nulls_unchecked(&mut self, n: usize) { - self.inner.append_n(n, false); + self.inner.append_n(false, n); self.nulls.append_n_nulls(n) } @@ -107,7 +108,7 @@ impl ArrayBuilder for BoolBuilder { unsafe fn extend_from_array_unchecked(&mut self, array: &dyn Array) { let bool_array = array.to_bool(); - self.inner.append_buffer(bool_array.boolean_buffer()); + self.inner.append_buffer(bool_array.bit_buffer()); self.nulls.append_validity_mask(bool_array.validity_mask()); } @@ -119,7 +120,7 @@ impl ArrayBuilder for BoolBuilder { } unsafe fn set_validity_unchecked(&mut self, validity: Mask) { - self.nulls = LazyNullBufferBuilder::new(validity.len()); + self.nulls = LazyBitBufferBuilder::new(validity.len()); self.nulls.append_validity_mask(validity); } @@ -176,7 +177,7 @@ mod tests { let into_canon = chunk.to_bool(); assert_eq!(canon_into.validity(), into_canon.validity()); - assert_eq!(canon_into.boolean_buffer(), into_canon.boolean_buffer()); + assert_eq!(canon_into.bit_buffer(), into_canon.bit_buffer()); } #[test] @@ -199,8 +200,8 @@ mod tests { assert_eq!(array.len(), 3); // Check actual values. - assert!(array.boolean_buffer().value(0)); - assert!(!array.boolean_buffer().value(1)); + assert!(array.bit_buffer().value(0)); + assert!(!array.bit_buffer().value(1)); // The third value is null, but the buffer might have any value. // Check validity - first two should be valid, third should be null. diff --git a/vortex-array/src/builders/decimal.rs b/vortex-array/src/builders/decimal.rs index 726a34c243b..6f1744fb620 100644 --- a/vortex-array/src/builders/decimal.rs +++ b/vortex-array/src/builders/decimal.rs @@ -13,7 +13,7 @@ use vortex_scalar::{ }; use crate::arrays::DecimalArray; -use crate::builders::{ArrayBuilder, DEFAULT_BUILDER_CAPACITY, LazyNullBufferBuilder}; +use crate::builders::{ArrayBuilder, DEFAULT_BUILDER_CAPACITY, LazyBitBufferBuilder}; use crate::canonical::Canonical; use crate::{Array, ArrayRef, IntoArray, ToCanonical}; @@ -25,7 +25,7 @@ use crate::{Array, ArrayRef, IntoArray, ToCanonical}; pub struct DecimalBuilder { dtype: DType, values: DecimalBuffer, - nulls: LazyNullBufferBuilder, + nulls: LazyBitBufferBuilder, } /// Wrapper around the typed builder. @@ -101,7 +101,7 @@ impl DecimalBuilder { values: match_each_decimal_value_type!(T::VALUES_TYPE, |D| { DecimalBuffer::from(BufferMut::::with_capacity(capacity)) }), - nulls: LazyNullBufferBuilder::new(capacity), + nulls: LazyBitBufferBuilder::new(capacity), } } @@ -199,7 +199,7 @@ impl ArrayBuilder for DecimalBuilder { } unsafe fn set_validity_unchecked(&mut self, validity: Mask) { - self.nulls = LazyNullBufferBuilder::new(validity.len()); + self.nulls = LazyBitBufferBuilder::new(validity.len()); self.nulls.append_validity_mask(validity); } diff --git a/vortex-array/src/builders/fixed_size_list.rs b/vortex-array/src/builders/fixed_size_list.rs index a9877567cff..e843a0444e5 100644 --- a/vortex-array/src/builders/fixed_size_list.rs +++ b/vortex-array/src/builders/fixed_size_list.rs @@ -11,7 +11,7 @@ use vortex_scalar::{ListScalar, Scalar}; use crate::arrays::FixedSizeListArray; use crate::builders::{ - ArrayBuilder, DEFAULT_BUILDER_CAPACITY, LazyNullBufferBuilder, builder_with_capacity, + ArrayBuilder, DEFAULT_BUILDER_CAPACITY, LazyBitBufferBuilder, builder_with_capacity, }; use crate::canonical::{Canonical, ToCanonical}; use crate::{Array, ArrayRef, IntoArray}; @@ -29,7 +29,7 @@ pub struct FixedSizeListBuilder { /// The null map builder of the [`FixedSizeListArray`]. /// /// We also use this type to store the length of the final output array. - nulls: LazyNullBufferBuilder, + nulls: LazyBitBufferBuilder, } impl FixedSizeListBuilder { @@ -54,7 +54,7 @@ impl FixedSizeListBuilder { let elements_builder = builder_with_capacity(&element_dtype, elements_capacity); let fsl_dtype = DType::FixedSizeList(element_dtype, list_size, nullability); - let nulls = LazyNullBufferBuilder::new(capacity); + let nulls = LazyBitBufferBuilder::new(capacity); Self { dtype: fsl_dtype, @@ -224,7 +224,7 @@ impl ArrayBuilder for FixedSizeListBuilder { } unsafe fn set_validity_unchecked(&mut self, validity: Mask) { - self.nulls = LazyNullBufferBuilder::new(validity.len()); + self.nulls = LazyBitBufferBuilder::new(validity.len()); self.nulls.append_validity_mask(validity); } diff --git a/vortex-array/src/builders/lazy_null_builder.rs b/vortex-array/src/builders/lazy_null_builder.rs index c676376e02d..a9f38c5bc20 100644 --- a/vortex-array/src/builders/lazy_null_builder.rs +++ b/vortex-array/src/builders/lazy_null_builder.rs @@ -1,7 +1,7 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -use arrow_buffer::{BooleanBuffer, BooleanBufferBuilder, NullBuffer}; +use vortex_buffer::{BitBuffer, BitBufferMut}; use vortex_dtype::Nullability; use vortex_dtype::Nullability::{NonNullable, Nullable}; use vortex_error::{VortexExpect, vortex_panic}; @@ -11,13 +11,13 @@ use crate::validity::Validity; /// This is borrowed from arrow's null buffer builder, however we expose a `append_buffer` /// method to append a boolean buffer directly. -pub struct LazyNullBufferBuilder { - inner: Option, +pub struct LazyBitBufferBuilder { + inner: Option, len: usize, capacity: usize, } -impl LazyNullBufferBuilder { +impl LazyBitBufferBuilder { /// Creates a new empty builder. /// `capacity` is the number of bits in the null buffer. pub fn new(capacity: usize) -> Self { @@ -32,7 +32,7 @@ impl LazyNullBufferBuilder { #[inline] pub fn append_n_non_nulls(&mut self, n: usize) { if let Some(buf) = self.inner.as_mut() { - buf.append_n(n, true) + buf.append_n(true, n) } else { self.len += n; } @@ -55,7 +55,7 @@ impl LazyNullBufferBuilder { self.inner .as_mut() .vortex_expect("cannot append null to non-nullable builder") - .append_n(n, false); + .append_n(false, n); } /// Appends a single null value to the builder. @@ -70,7 +70,7 @@ impl LazyNullBufferBuilder { /// Appends values from a boolean buffer where `true` indicates non-null. #[inline] - pub fn append_buffer(&mut self, bool_buffer: &BooleanBuffer) { + pub fn append_buffer(&mut self, bool_buffer: &BitBuffer) { self.materialize_if_needed(); self.inner .as_mut() @@ -83,7 +83,7 @@ impl LazyNullBufferBuilder { match validity_mask { Mask::AllTrue(len) => self.append_n_non_nulls(len), Mask::AllFalse(len) => self.append_n_nulls(len), - Mask::Values(is_valid) => self.append_buffer(is_valid.boolean_buffer()), + Mask::Values(is_valid) => self.append_buffer(is_valid.bit_buffer()), } } @@ -93,7 +93,7 @@ impl LazyNullBufferBuilder { self.inner .as_mut() .vortex_expect("buffer just materialized") - .set_bit(index, v); + .set_to(index, v); } /// Returns the current length of the builder. @@ -102,9 +102,9 @@ impl LazyNullBufferBuilder { self.inner.as_ref().map(|i| i.len()).unwrap_or(self.len) } - fn finish(&mut self) -> Option { + fn finish(&mut self) -> Option { self.len = 0; - Some(NullBuffer::new(self.inner.take()?.finish())) + self.inner.take().map(|b| b.freeze()) } /// Finishes the builder and returns a `Validity` based on the given nullability. @@ -146,9 +146,9 @@ impl LazyNullBufferBuilder { #[inline(never)] fn materialize(&mut self) { if self.inner.is_none() { - let mut b = BooleanBufferBuilder::new(self.len.max(self.capacity)); - b.append_n(self.len, true); - self.inner = Some(b); + let mut bit_mut = BitBufferMut::with_capacity(self.len.max(self.capacity)); + bit_mut.append_n(true, self.len); + self.inner = Some(bit_mut); } } } diff --git a/vortex-array/src/builders/list.rs b/vortex-array/src/builders/list.rs index 1db2d15e964..21578ea4efa 100644 --- a/vortex-array/src/builders/list.rs +++ b/vortex-array/src/builders/list.rs @@ -12,7 +12,7 @@ use vortex_scalar::{ListScalar, Scalar}; use crate::arrays::ListArray; use crate::builders::{ - ArrayBuilder, DEFAULT_BUILDER_CAPACITY, LazyNullBufferBuilder, PrimitiveBuilder, + ArrayBuilder, DEFAULT_BUILDER_CAPACITY, LazyBitBufferBuilder, PrimitiveBuilder, builder_with_capacity, }; use crate::canonical::{Canonical, ToCanonical}; @@ -32,7 +32,7 @@ pub struct ListBuilder { offsets_builder: PrimitiveBuilder, /// The null map builder of the [`ListArray`]. - nulls: LazyNullBufferBuilder, + nulls: LazyBitBufferBuilder, } impl ListBuilder { @@ -70,7 +70,7 @@ impl ListBuilder { Self { elements_builder, offsets_builder, - nulls: LazyNullBufferBuilder::new(capacity), + nulls: LazyBitBufferBuilder::new(capacity), dtype: DType::List(value_dtype, nullability), } } @@ -239,7 +239,7 @@ impl ArrayBuilder for ListBuilder { } unsafe fn set_validity_unchecked(&mut self, validity: Mask) { - self.nulls = LazyNullBufferBuilder::new(validity.len()); + self.nulls = LazyBitBufferBuilder::new(validity.len()); self.nulls.append_validity_mask(validity); } diff --git a/vortex-array/src/builders/listview.rs b/vortex-array/src/builders/listview.rs index 0b7b16e6645..b73b43c8c07 100644 --- a/vortex-array/src/builders/listview.rs +++ b/vortex-array/src/builders/listview.rs @@ -15,9 +15,9 @@ use vortex_error::{VortexExpect, VortexResult, vortex_ensure, vortex_panic}; use vortex_mask::Mask; use vortex_scalar::{ListScalar, Scalar}; -use super::lazy_null_builder::LazyNullBufferBuilder; use crate::array::{Array, ArrayRef, IntoArray}; use crate::arrays::{ListViewArray, list_view_from_list}; +use crate::builders::lazy_null_builder::LazyBitBufferBuilder; use crate::builders::{ ArrayBuilder, DEFAULT_BUILDER_CAPACITY, PrimitiveBuilder, builder_with_capacity, }; @@ -46,7 +46,7 @@ pub struct ListViewBuilder { sizes_builder: PrimitiveBuilder, /// The null map builder of the [`ListViewArray`]. - nulls: LazyNullBufferBuilder, + nulls: LazyBitBufferBuilder, } impl ListViewBuilder { @@ -93,7 +93,7 @@ impl ListViewBuilder { let sizes_builder = PrimitiveBuilder::::with_capacity(Nullability::NonNullable, capacity); - let nulls = LazyNullBufferBuilder::new(capacity); + let nulls = LazyBitBufferBuilder::new(capacity); Self { dtype: DType::List(element_dtype, nullability), @@ -260,7 +260,7 @@ impl ArrayBuilder for ListViewBuilder { } unsafe fn set_validity_unchecked(&mut self, validity: Mask) { - self.nulls = LazyNullBufferBuilder::new(validity.len()); + self.nulls = LazyBitBufferBuilder::new(validity.len()); self.nulls.append_validity_mask(validity); } diff --git a/vortex-array/src/builders/mod.rs b/vortex-array/src/builders/mod.rs index 87dccf17881..8155918a943 100644 --- a/vortex-array/src/builders/mod.rs +++ b/vortex-array/src/builders/mod.rs @@ -40,7 +40,7 @@ use crate::canonical::Canonical; use crate::{Array, ArrayRef}; mod lazy_null_builder; -use lazy_null_builder::LazyNullBufferBuilder; +use lazy_null_builder::LazyBitBufferBuilder; mod bool; mod decimal; diff --git a/vortex-array/src/builders/primitive.rs b/vortex-array/src/builders/primitive.rs index 8fc8085b634..3571e8c8818 100644 --- a/vortex-array/src/builders/primitive.rs +++ b/vortex-array/src/builders/primitive.rs @@ -11,7 +11,7 @@ use vortex_mask::Mask; use vortex_scalar::{PrimitiveScalar, Scalar}; use crate::arrays::PrimitiveArray; -use crate::builders::{ArrayBuilder, DEFAULT_BUILDER_CAPACITY, LazyNullBufferBuilder}; +use crate::builders::{ArrayBuilder, DEFAULT_BUILDER_CAPACITY, LazyBitBufferBuilder}; use crate::canonical::{Canonical, ToCanonical}; use crate::{Array, ArrayRef, IntoArray}; @@ -19,7 +19,7 @@ use crate::{Array, ArrayRef, IntoArray}; pub struct PrimitiveBuilder { dtype: DType, values: BufferMut, - nulls: LazyNullBufferBuilder, + nulls: LazyBitBufferBuilder, } impl PrimitiveBuilder { @@ -32,7 +32,7 @@ impl PrimitiveBuilder { pub fn with_capacity(nullability: Nullability, capacity: usize) -> Self { Self { values: BufferMut::with_capacity(capacity), - nulls: LazyNullBufferBuilder::new(capacity), + nulls: LazyBitBufferBuilder::new(capacity), dtype: DType::Primitive(T::PTYPE, nullability), } } @@ -176,7 +176,7 @@ impl ArrayBuilder for PrimitiveBuilder { } unsafe fn set_validity_unchecked(&mut self, validity: Mask) { - self.nulls = LazyNullBufferBuilder::new(validity.len()); + self.nulls = LazyBitBufferBuilder::new(validity.len()); self.nulls.append_validity_mask(validity); } diff --git a/vortex-array/src/builders/struct_.rs b/vortex-array/src/builders/struct_.rs index 5eacd0246df..5dd9a153f15 100644 --- a/vortex-array/src/builders/struct_.rs +++ b/vortex-array/src/builders/struct_.rs @@ -11,7 +11,7 @@ use vortex_scalar::{Scalar, StructScalar}; use crate::arrays::StructArray; use crate::builders::{ - ArrayBuilder, DEFAULT_BUILDER_CAPACITY, LazyNullBufferBuilder, builder_with_capacity, + ArrayBuilder, DEFAULT_BUILDER_CAPACITY, LazyBitBufferBuilder, builder_with_capacity, }; use crate::canonical::{Canonical, ToCanonical}; use crate::{Array, ArrayRef, IntoArray}; @@ -20,7 +20,7 @@ use crate::{Array, ArrayRef, IntoArray}; pub struct StructBuilder { dtype: DType, builders: Vec>, - nulls: LazyNullBufferBuilder, + nulls: LazyBitBufferBuilder, } impl StructBuilder { @@ -42,7 +42,7 @@ impl StructBuilder { Self { builders, - nulls: LazyNullBufferBuilder::new(capacity), + nulls: LazyBitBufferBuilder::new(capacity), dtype: DType::Struct(struct_dtype, nullability), } } @@ -178,7 +178,7 @@ impl ArrayBuilder for StructBuilder { } unsafe fn set_validity_unchecked(&mut self, validity: Mask) { - self.nulls = LazyNullBufferBuilder::new(validity.len()); + self.nulls = LazyBitBufferBuilder::new(validity.len()); self.nulls.append_validity_mask(validity); } diff --git a/vortex-array/src/builders/varbinview.rs b/vortex-array/src/builders/varbinview.rs index 0ad21f4988a..f34f94ea73c 100644 --- a/vortex-array/src/builders/varbinview.rs +++ b/vortex-array/src/builders/varbinview.rs @@ -15,7 +15,7 @@ use vortex_utils::aliases::hash_map::{Entry, HashMap}; use crate::arrays::VarBinViewArray; use crate::arrays::binary_view::BinaryView; use crate::arrays::compact::BufferUtilization; -use crate::builders::{ArrayBuilder, LazyNullBufferBuilder}; +use crate::builders::{ArrayBuilder, LazyBitBufferBuilder}; use crate::canonical::{Canonical, ToCanonical}; use crate::{Array, ArrayRef, IntoArray}; @@ -23,7 +23,7 @@ use crate::{Array, ArrayRef, IntoArray}; pub struct VarBinViewBuilder { dtype: DType, views_builder: BufferMut, - nulls: LazyNullBufferBuilder, + nulls: LazyBitBufferBuilder, completed: CompletedBuffers, in_progress: ByteBufferMut, growth_strategy: BufferGrowthStrategy, @@ -68,7 +68,7 @@ impl VarBinViewBuilder { ); Self { views_builder: BufferMut::::with_capacity(capacity), - nulls: LazyNullBufferBuilder::new(capacity), + nulls: LazyBitBufferBuilder::new(capacity), completed, in_progress: ByteBufferMut::empty(), dtype, @@ -309,7 +309,7 @@ impl ArrayBuilder for VarBinViewBuilder { } unsafe fn set_validity_unchecked(&mut self, validity: Mask) { - self.nulls = LazyNullBufferBuilder::new(validity.len()); + self.nulls = LazyBitBufferBuilder::new(validity.len()); self.nulls.append_validity_mask(validity); } diff --git a/vortex-array/src/compute/compare.rs b/vortex-array/src/compute/compare.rs index ebf7281f9d0..99246349839 100644 --- a/vortex-array/src/compute/compare.rs +++ b/vortex-array/src/compute/compare.rs @@ -8,10 +8,11 @@ use std::sync::LazyLock; use arcref::ArcRef; use arrow_array::{BooleanArray, Datum as ArrowDatum}; -use arrow_buffer::{BooleanBuffer, NullBuffer}; +use arrow_buffer::NullBuffer; use arrow_ord::cmp; use arrow_ord::ord::make_comparator; use arrow_schema::SortOptions; +use vortex_buffer::BitBuffer; use vortex_dtype::{DType, IntegerPType, Nullability}; use vortex_error::{VortexError, VortexExpect, VortexResult, vortex_bail, vortex_err}; use vortex_scalar::Scalar; @@ -271,7 +272,7 @@ impl<'a> TryFrom<&InvocationArgs<'a>> for CompareArgs<'a> { /// Helper function to compare empty values with arrays that have external value length information /// like `VarBin`. -pub fn compare_lengths_to_empty(lengths: I, op: Operator) -> BooleanBuffer +pub fn compare_lengths_to_empty(lengths: I, op: Operator) -> BitBuffer where P: IntegerPType, I: Iterator, @@ -284,7 +285,7 @@ where Operator::Lt => |_| false, }; - lengths.map(cmp_fn).collect::() + lengths.map(cmp_fn).collect() } /// Implementation of `CompareFn` using the Arrow crate. @@ -354,22 +355,18 @@ pub fn scalar_cmp(lhs: &Scalar, rhs: &Scalar, operator: Operator) -> Scalar { #[cfg(test)] mod tests { - use arrow_buffer::BooleanBuffer; use rstest::rstest; use super::*; use crate::ToCanonical; - use crate::arrays::{ - BoolArray, ConstantArray, ListArray, PrimitiveArray, StructArray, VarBinArray, - VarBinViewArray, - }; + use crate::arrays::{BoolArray, ConstantArray, VarBinArray, VarBinViewArray}; use crate::test_harness::to_int_indices; use crate::validity::Validity; #[test] fn test_bool_basic_comparisons() { - let arr = BoolArray::from_bool_buffer( - BooleanBuffer::from_iter([true, true, false, true, false]), + let arr = BoolArray::from_bit_buffer( + BitBuffer::from_iter([true, true, false, true, false]), Validity::from_iter([false, true, true, true, true]), ); @@ -385,8 +382,8 @@ mod tests { let empty: [u64; 0] = []; assert_eq!(to_int_indices(matches).unwrap(), empty); - let other = BoolArray::from_bool_buffer( - BooleanBuffer::from_iter([false, false, false, true, true]), + let other = BoolArray::from_bit_buffer( + BitBuffer::from_iter([false, false, false, true, true]), Validity::from_iter([false, true, true, true, true]), ); @@ -448,117 +445,6 @@ mod tests { #[case(VarBinViewArray::from_iter_bin(["a".as_bytes(), "b".as_bytes()]).into_array(), VarBinArray::from(vec!["a".as_bytes(), "b".as_bytes()]).into_array())] fn arrow_compare_different_encodings(#[case] left: ArrayRef, #[case] right: ArrayRef) { let res = compare(&left, &right, Operator::Eq).unwrap(); - assert_eq!(res.to_bool().boolean_buffer().count_set_bits(), left.len()); - } - - #[test] - fn test_list_array_comparison() { - // Create two simple list arrays with integers - let values1 = PrimitiveArray::from_iter([1i32, 2, 3, 4, 5, 6]); - let offsets1 = PrimitiveArray::from_iter([0i32, 2, 4, 6]); - let list1 = ListArray::try_new( - values1.into_array(), - offsets1.into_array(), - Validity::NonNullable, - ) - .unwrap(); - - let values2 = PrimitiveArray::from_iter([1i32, 2, 3, 4, 7, 8]); - let offsets2 = PrimitiveArray::from_iter([0i32, 2, 4, 6]); - let list2 = ListArray::try_new( - values2.into_array(), - offsets2.into_array(), - Validity::NonNullable, - ) - .unwrap(); - - // Test equality - first two lists should be equal, third should be different - let result = compare(list1.as_ref(), list2.as_ref(), Operator::Eq).unwrap(); - let bool_result = result.to_bool(); - assert!(bool_result.boolean_buffer().value(0)); // [1,2] == [1,2] - assert!(bool_result.boolean_buffer().value(1)); // [3,4] == [3,4] - assert!(!bool_result.boolean_buffer().value(2)); // [5,6] != [7,8] - - // Test inequality - let result = compare(list1.as_ref(), list2.as_ref(), Operator::NotEq).unwrap(); - let bool_result = result.to_bool(); - assert!(!bool_result.boolean_buffer().value(0)); - assert!(!bool_result.boolean_buffer().value(1)); - assert!(bool_result.boolean_buffer().value(2)); - - // Test less than - let result = compare(list1.as_ref(), list2.as_ref(), Operator::Lt).unwrap(); - let bool_result = result.to_bool(); - assert!(!bool_result.boolean_buffer().value(0)); // [1,2] < [1,2] = false - assert!(!bool_result.boolean_buffer().value(1)); // [3,4] < [3,4] = false - assert!(bool_result.boolean_buffer().value(2)); // [5,6] < [7,8] = true - } - - #[test] - fn test_list_array_constant_comparison() { - use std::sync::Arc; - - use vortex_dtype::{DType, PType}; - - // Create a list array - let values = PrimitiveArray::from_iter([1i32, 2, 3, 4, 5, 6]); - let offsets = PrimitiveArray::from_iter([0i32, 2, 4, 6]); - let list = ListArray::try_new( - values.into_array(), - offsets.into_array(), - Validity::NonNullable, - ) - .unwrap(); - - // Create a constant list scalar [3,4] that will be broadcasted - let list_scalar = Scalar::list( - Arc::new(DType::Primitive(PType::I32, Nullability::NonNullable)), - vec![3i32.into(), 4i32.into()], - Nullability::NonNullable, - ); - let constant = ConstantArray::new(list_scalar, 3); - - // Compare list with constant - all should be compared to [3,4] - let result = compare(list.as_ref(), constant.as_ref(), Operator::Eq).unwrap(); - let bool_result = result.to_bool(); - assert!(!bool_result.boolean_buffer().value(0)); // [1,2] != [3,4] - assert!(bool_result.boolean_buffer().value(1)); // [3,4] == [3,4] - assert!(!bool_result.boolean_buffer().value(2)); // [5,6] != [3,4] - } - - #[test] - fn test_struct_array_comparison() { - // Create two struct arrays with bool and int fields - let bool_field1 = BoolArray::from_iter([Some(true), Some(false), Some(true)]); - let int_field1 = PrimitiveArray::from_iter([1i32, 2, 3]); - - let bool_field2 = BoolArray::from_iter([Some(true), Some(false), Some(false)]); - let int_field2 = PrimitiveArray::from_iter([1i32, 2, 4]); - - let struct1 = StructArray::from_fields(&[ - ("bool_col", bool_field1.into_array()), - ("int_col", int_field1.into_array()), - ]) - .unwrap(); - - let struct2 = StructArray::from_fields(&[ - ("bool_col", bool_field2.into_array()), - ("int_col", int_field2.into_array()), - ]) - .unwrap(); - - // Test equality - let result = compare(struct1.as_ref(), struct2.as_ref(), Operator::Eq).unwrap(); - let bool_result = result.to_bool(); - assert!(bool_result.boolean_buffer().value(0)); // {true, 1} == {true, 1} - assert!(bool_result.boolean_buffer().value(1)); // {false, 2} == {false, 2} - assert!(!bool_result.boolean_buffer().value(2)); // {true, 3} != {false, 4} - - // Test greater than - let result = compare(struct1.as_ref(), struct2.as_ref(), Operator::Gt).unwrap(); - let bool_result = result.to_bool(); - assert!(!bool_result.boolean_buffer().value(0)); // {true, 1} > {true, 1} = false - assert!(!bool_result.boolean_buffer().value(1)); // {false, 2} > {false, 2} = false - assert!(bool_result.boolean_buffer().value(2)); // {true, 3} > {false, 4} = true (bool field takes precedence) + assert_eq!(res.to_bool().bit_buffer().true_count(), left.len()); } } diff --git a/vortex-array/src/compute/conformance/consistency.rs b/vortex-array/src/compute/conformance/consistency.rs index 4d29dbaee8e..bd09aed7252 100644 --- a/vortex-array/src/compute/conformance/consistency.rs +++ b/vortex-array/src/compute/conformance/consistency.rs @@ -19,13 +19,12 @@ //! interact with null values. //! - **Edge Cases**: Tests empty arrays, single elements, and boundary conditions. -use arrow_buffer::BooleanBuffer; -use vortex_buffer::buffer; +use vortex_buffer::BitBuffer; use vortex_dtype::{DType, Nullability, PType}; use vortex_error::{VortexUnwrap, vortex_panic}; use vortex_mask::Mask; -use crate::arrays::{BoolArray, ConstantArray, PrimitiveArray}; +use crate::arrays::{BoolArray, PrimitiveArray}; use crate::compute::{Operator, and, cast, compare, filter, invert, mask, or, take}; use crate::{Array, IntoArray}; @@ -47,7 +46,7 @@ fn test_filter_take_consistency(array: &dyn Array) { } // Create a test mask (keep elements where index % 3 != 1) - let mask_pattern: BooleanBuffer = (0..len).map(|i| i % 3 != 1).collect(); + let mask_pattern: BitBuffer = (0..len).map(|i| i % 3 != 1).collect(); let mask = Mask::from_buffer(mask_pattern.clone()); // Filter the array @@ -115,10 +114,10 @@ fn test_double_mask_consistency(array: &dyn Array) { let double_masked = mask(&first_masked, &mask2).vortex_unwrap(); // Create combined mask (OR operation - element is masked if EITHER mask is true) - let combined_pattern: BooleanBuffer = mask1 - .to_boolean_buffer() + let combined_pattern: BitBuffer = mask1 + .to_bit_buffer() .iter() - .zip(mask2.to_boolean_buffer().iter()) + .zip(mask2.to_bit_buffer().iter()) .map(|(a, b)| a || b) .collect(); let combined_mask = Mask::from_buffer(combined_pattern); @@ -275,7 +274,7 @@ fn test_slice_filter_consistency(array: &dyn Array) { filtered.len(), sliced.len(), "Filter with contiguous mask and slice should produce same length. \ - \nFiltered length: {}\nSliced length: {}", + Filtered length: {}, Sliced length: {}", filtered.len(), sliced.len() ); @@ -286,7 +285,7 @@ fn test_slice_filter_consistency(array: &dyn Array) { assert_eq!( filtered_val, sliced_val, "Filter with contiguous mask and slice produced different values at index {i}. \ - \nFiltered value: {filtered_val:?}\nSliced value: {sliced_val:?}" + Filtered value: {filtered_val:?}, Sliced value: {sliced_val:?}" ); } } @@ -322,7 +321,7 @@ fn test_take_slice_consistency(array: &dyn Array) { taken.len(), sliced.len(), "Take with sequential indices and slice should produce same length. \ - \nTaken length: {}\nSliced length: {}", + Taken length: {}, Sliced length: {}", taken.len(), sliced.len() ); @@ -333,7 +332,7 @@ fn test_take_slice_consistency(array: &dyn Array) { assert_eq!( taken_val, sliced_val, "Take with sequential indices and slice produced different values at index {i}. \ - \nTaken value: {taken_val:?}\nSliced value: {sliced_val:?}" + Taken value: {taken_val:?}, Sliced value: {sliced_val:?}" ); } } @@ -368,7 +367,7 @@ fn test_take_repeated_indices(array: &dyn Array) { } // Take the first element three times - let indices = buffer![0u64, 0, 0].into_array(); + let indices = PrimitiveArray::from_iter([0u64, 0, 0]).into_array(); let taken = take(array, &indices).vortex_unwrap(); assert_eq!(taken.len(), 3); @@ -573,7 +572,7 @@ fn test_comparison_inverse_consistency(array: &dyn Array) { }; // Test Eq vs NotEq - let const_array = ConstantArray::new(test_scalar, len); + let const_array = crate::arrays::ConstantArray::new(test_scalar, len); if let (Ok(eq_result), Ok(neq_result)) = ( compare(array, const_array.as_ref(), Operator::Eq), compare(array, const_array.as_ref(), Operator::NotEq), @@ -669,7 +668,7 @@ fn test_comparison_symmetry_consistency(array: &dyn Array) { }; // Create a constant array with the test scalar for reverse comparison - let const_array = ConstantArray::new(test_scalar, len); + let const_array = crate::arrays::ConstantArray::new(test_scalar, len); // Test Gt vs Lt symmetry if let (Ok(arr_gt_scalar), Ok(scalar_lt_arr)) = ( diff --git a/vortex-array/src/compute/conformance/mask.rs b/vortex-array/src/compute/conformance/mask.rs index 869bfb13d65..719b0e0b406 100644 --- a/vortex-array/src/compute/conformance/mask.rs +++ b/vortex-array/src/compute/conformance/mask.rs @@ -184,7 +184,7 @@ fn test_nullable_mask_input(array: &dyn Array) { let bool_array = BoolArray::from_iter(bool_values.clone()); let validity = crate::validity::Validity::from_iter(validity_values.clone()); - let nullable_mask = BoolArray::from_bool_buffer(bool_array.boolean_buffer().clone(), validity); + let nullable_mask = BoolArray::from_bit_buffer(bool_array.bit_buffer().clone(), validity); let mask_array = nullable_mask.to_mask_fill_null_false(); let masked = mask(array, &mask_array).vortex_unwrap(); diff --git a/vortex-array/src/compute/filter.rs b/vortex-array/src/compute/filter.rs index ac287f2ca35..3274426b0fe 100644 --- a/vortex-array/src/compute/filter.rs +++ b/vortex-array/src/compute/filter.rs @@ -234,7 +234,7 @@ pub fn arrow_filter_fn(array: &dyn Array, mask: &Mask) -> VortexResult }; let array_ref = array.to_array().into_arrow_preferred()?; - let mask_array = BooleanArray::new(values.boolean_buffer().clone(), None); + let mask_array = BooleanArray::new(values.bit_buffer().clone().into(), None); let filtered = arrow_select::filter::filter(array_ref.as_ref(), &mask_array)?; Ok(ArrayRef::from_arrow( diff --git a/vortex-array/src/compute/list_contains.rs b/vortex-array/src/compute/list_contains.rs index 4c13eecd372..9b3bb59a134 100644 --- a/vortex-array/src/compute/list_contains.rs +++ b/vortex-array/src/compute/list_contains.rs @@ -6,10 +6,10 @@ use std::sync::LazyLock; use arcref::ArcRef; -use arrow_buffer::BooleanBuffer; use arrow_buffer::bit_iterator::BitIndexIterator; -use vortex_buffer::Buffer; -use vortex_dtype::{DType, IntegerPType, Nullability, match_each_integer_ptype}; +use num_traits::AsPrimitive; +use vortex_buffer::{BitBuffer, Buffer}; +use vortex_dtype::{DType, NativePType, Nullability, match_each_integer_ptype}; use vortex_error::{VortexExpect, VortexResult, vortex_bail}; use vortex_scalar::{ListScalar, Scalar}; @@ -271,7 +271,7 @@ fn list_contains_scalar( match_each_integer_ptype!(ends.ptype(), |T| { Ok(reduce_with_ends( ends.as_slice::(), - matches.boolean_buffer(), + matches.bit_buffer(), list_array.validity().clone().union_nullability(nullability), )) }) @@ -302,9 +302,9 @@ fn list_false_or_null(list_array: &ListArray, nullability: Nullability) -> Vorte } Validity::Array(validity_array) => { // Create a new bool array with false, and the provided nulls - let buffer = BooleanBuffer::new_unset(list_array.len()); + let buffer = BitBuffer::new_unset(list_array.len()); Ok( - BoolArray::from_bool_buffer(buffer, Validity::Array(validity_array.clone())) + BoolArray::from_bit_buffer(buffer, Validity::Array(validity_array.clone())) .into_array(), ) } @@ -329,7 +329,7 @@ fn list_is_not_empty(list_array: &ListArray, nullability: Nullability) -> Vortex }); // Copy over the validity mask from the input. - Ok(BoolArray::from_bool_buffer( + Ok(BoolArray::from_bit_buffer( buffer, list_array.validity().clone().union_nullability(nullability), ) @@ -338,21 +338,22 @@ fn list_is_not_empty(list_array: &ListArray, nullability: Nullability) -> Vortex /// Reduces each boolean values into a Mask that indicates which elements in the /// ListArray contain the matching value. -fn reduce_with_ends( +fn reduce_with_ends>( ends: &[T], - matches: &BooleanBuffer, + matches: &BitBuffer, validity: Validity, ) -> ArrayRef { - let mask: BooleanBuffer = ends + let mask: BitBuffer = ends .windows(2) .map(|window| { let len = window[1].as_() - window[0].as_(); - let mut set_bits = BitIndexIterator::new(matches.values(), window[0].as_(), len); + let mut set_bits = + BitIndexIterator::new(matches.inner().as_slice(), window[0].as_(), len); set_bits.next().is_some() }) .collect(); - BoolArray::from_bool_buffer(mask, validity).into_array() + BoolArray::from_bit_buffer(mask, validity).into_array() } /// Returns a new array of `u64` representing the length of each list element. @@ -397,15 +398,15 @@ pub fn list_elem_len(array: &dyn Array) -> VortexResult { Ok(lens_array) } -fn element_lens(values: &[T]) -> Buffer { +fn element_lens(values: &[T]) -> Buffer { values .windows(2) .map(|window| window[1] - window[0]) .collect() } -fn element_is_not_empty(values: &[T]) -> BooleanBuffer { - BooleanBuffer::from_iter(values.windows(2).map(|window| window[1] != window[0])) +fn element_is_not_empty(values: &[T]) -> BitBuffer { + BitBuffer::from_iter(values.windows(2).map(|window| window[1] != window[0])) } #[cfg(test)] @@ -453,7 +454,7 @@ mod tests { } fn bool_array(values: Vec, validity: Validity) -> BoolArray { - BoolArray::from_bool_buffer(values.into_iter().collect(), validity) + BoolArray::from_bit_buffer(values.into_iter().collect(), validity) } #[rstest] @@ -521,10 +522,7 @@ mod tests { let elem = ConstantArray::new(scalar, list_array.len()); let result = list_contains(&list_array, elem.as_ref()).expect("list_contains failed"); let bool_result = result.to_bool(); - assert_eq!( - bool_result.opt_bool_vec().unwrap(), - expected.opt_bool_vec().unwrap() - ); + assert_eq!(bool_result.opt_bool_vec(), expected.opt_bool_vec()); assert_eq!(bool_result.validity(), expected.validity()); } @@ -547,7 +545,7 @@ mod tests { .unwrap(); assert!(contains.is::(), "Expected constant result"); assert_eq!( - contains.to_bool().boolean_buffer().iter().collect_vec(), + contains.to_bool().bit_buffer().iter().collect_vec(), vec![true, true], ); } @@ -590,7 +588,7 @@ mod tests { assert_eq!(contains.len(), 7); assert_eq!( - contains.to_bool().opt_bool_vec().unwrap(), + contains.to_bool().opt_bool_vec(), vec![ Some(false), Some(true), diff --git a/vortex-array/src/compute/mask.rs b/vortex-array/src/compute/mask.rs index 26f915829fb..669487b480d 100644 --- a/vortex-array/src/compute/mask.rs +++ b/vortex-array/src/compute/mask.rs @@ -133,7 +133,7 @@ impl ComputeFnVTable for MaskFn { log::debug!("No mask implementation found for {}", array.encoding_id()); let array_ref = array.to_array().into_arrow_preferred()?; - let mask = BooleanArray::new(mask.to_boolean_buffer(), None); + let mask = BooleanArray::new(mask.to_bit_buffer().into(), None); let masked = arrow_select::nullif::nullif(array_ref.as_ref(), &mask)?; diff --git a/vortex-array/src/compute/min_max.rs b/vortex-array/src/compute/min_max.rs index 070065fe6b6..f842ea6d7ff 100644 --- a/vortex-array/src/compute/min_max.rs +++ b/vortex-array/src/compute/min_max.rs @@ -215,8 +215,7 @@ impl Kernel for MinMaxKernelAdapter { #[cfg(test)] mod tests { - use arrow_buffer::BooleanBuffer; - use vortex_buffer::buffer; + use vortex_buffer::{BitBuffer, buffer}; use crate::arrays::{BoolArray, NullArray, PrimitiveArray}; use crate::compute::{MinMaxResult, min_max}; @@ -236,8 +235,8 @@ mod tests { #[test] fn test_bool_max() { - let p = BoolArray::from_bool_buffer( - BooleanBuffer::from([true, true, true].as_slice()), + let p = BoolArray::from_bit_buffer( + BitBuffer::from([true, true, true].as_slice()), Validity::NonNullable, ); assert_eq!( @@ -248,8 +247,8 @@ mod tests { }) ); - let p = BoolArray::from_bool_buffer( - BooleanBuffer::from([false, false, false].as_slice()), + let p = BoolArray::from_bit_buffer( + BitBuffer::from([false, false, false].as_slice()), Validity::NonNullable, ); assert_eq!( @@ -260,8 +259,8 @@ mod tests { }) ); - let p = BoolArray::from_bool_buffer( - BooleanBuffer::from([false, true, false].as_slice()), + let p = BoolArray::from_bit_buffer( + BitBuffer::from([false, true, false].as_slice()), Validity::NonNullable, ); assert_eq!( diff --git a/vortex-array/src/executor.rs b/vortex-array/src/executor.rs index b3032cfb966..fccec6c8610 100644 --- a/vortex-array/src/executor.rs +++ b/vortex-array/src/executor.rs @@ -159,10 +159,7 @@ mod tests { let mut executor = Executor::default(); let result = block_on(executor.execute(compare)).unwrap(); - assert_eq!( - result.into_bool().bool_vec().unwrap(), - vec![false, false, true] - ); + assert_eq!(result.into_bool().bool_vec(), vec![false, false, true]); } #[test] @@ -181,7 +178,7 @@ mod tests { let mut executor = Executor::default(); let result = block_on(executor.execute(compare.clone())).unwrap(); assert_eq!( - result.into_bool().bool_vec().unwrap(), + result.into_bool().bool_vec(), vec![false, false, false, false] ); diff --git a/vortex-array/src/operator/hash.rs b/vortex-array/src/operator/hash.rs index a6502265e59..cb7b95a97e4 100644 --- a/vortex-array/src/operator/hash.rs +++ b/vortex-array/src/operator/hash.rs @@ -120,7 +120,7 @@ impl OperatorHash for Mask { len.hash(state); } Mask::Values(values) => { - let buffer = values.boolean_buffer(); + let buffer = values.bit_buffer(); buffer.offset().hash(state); buffer.len().hash(state); buffer.inner().as_ptr().hash(state); @@ -134,8 +134,8 @@ impl OperatorEq for Mask { (Mask::AllTrue(len1), Mask::AllTrue(len2)) => len1 == len2, (Mask::AllFalse(len1), Mask::AllFalse(len2)) => len1 == len2, (Mask::Values(buf1), Mask::Values(buf2)) => { - let b1 = buf1.boolean_buffer(); - let b2 = buf2.boolean_buffer(); + let b1 = buf1.bit_buffer(); + let b2 = buf2.bit_buffer(); b1.offset() == b2.offset() && b1.len() == b2.len() && b1.inner().as_ptr() == b2.inner().as_ptr() diff --git a/vortex-array/src/patches.rs b/vortex-array/src/patches.rs index b5ffb2063ec..f14a6b98de3 100644 --- a/vortex-array/src/patches.rs +++ b/vortex-array/src/patches.rs @@ -6,9 +6,8 @@ use std::fmt::Debug; use std::hash::Hash; use std::ops::Range; -use arrow_buffer::BooleanBuffer; -use itertools::Itertools; -use vortex_buffer::BufferMut; +use itertools::Itertools as _; +use vortex_buffer::{BitBuffer, BufferMut}; use vortex_dtype::Nullability::NonNullable; use vortex_dtype::{ DType, IntegerPType, NativePType, PType, match_each_integer_ptype, @@ -475,14 +474,14 @@ impl Patches { ); } - let filter_mask = match mask.boolean_buffer() { + let filter_mask = match mask.bit_buffer() { AllOr::All => return Ok(None), AllOr::None => return Ok(Some(self.clone())), AllOr::Some(masked) => { let patch_indices = self.indices().to_primitive(); match_each_unsigned_integer_ptype!(patch_indices.ptype(), |P| { let patch_indices = patch_indices.as_slice::

(); - Mask::from_buffer(BooleanBuffer::collect_bool(patch_indices.len(), |i| { + Mask::from_buffer(BitBuffer::collect_bool(patch_indices.len(), |i| { #[allow(clippy::cast_possible_truncation)] let idx = (patch_indices[i] as usize) - self.offset; !masked.value(idx) diff --git a/vortex-array/src/pipeline/canonical.rs b/vortex-array/src/pipeline/canonical.rs new file mode 100644 index 00000000000..892502ead21 --- /dev/null +++ b/vortex-array/src/pipeline/canonical.rs @@ -0,0 +1,197 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use vortex_buffer::{BitBuffer, BufferMut}; +use vortex_dtype::{DType, NativePType, Nullability, match_each_native_ptype}; +use vortex_error::{VortexResult, vortex_bail}; +use vortex_mask::Mask; + +use crate::Canonical; +use crate::arrays::{BoolArray, PrimitiveArray}; +use crate::pipeline::bits::{BitVector, BitView, BitViewMut}; +use crate::pipeline::operators::Operator; +use crate::pipeline::query::QueryPlan; +use crate::pipeline::types::Element; +use crate::pipeline::vec::Vector; +use crate::pipeline::view::ViewMut; +use crate::pipeline::{Kernel, KernelContext, N, N_WORDS}; +use crate::validity::Validity; + +/// Export canonical data from a pipeline kernel with the given mask. +pub fn export_canonical_pipeline( + dtype: &DType, + len: usize, + pipeline: &mut dyn Kernel, + mask: &Mask, +) -> VortexResult { + match dtype { + DType::Bool(Nullability::NonNullable) => { + export_bool_nonnull_masked(mask, pipeline).map(Canonical::Bool) + } + DType::Primitive(ptype, Nullability::NonNullable) => { + if mask.all_true() { + match_each_native_ptype!(ptype, |T| { + export_primitive_nonnull::(len, pipeline).map(Canonical::Primitive) + }) + } else { + match_each_native_ptype!(ptype, |T| { + export_primitive_nonnull_masked::(mask, pipeline).map(Canonical::Primitive) + }) + } + } + _ => vortex_bail!("Expected a primitive array, got: {}", dtype), + } +} + +/// Export canonical data from an operator expression with a starting offset and mask. +pub fn export_canonical_pipeline_expr_offset( + dtype: &DType, + offset: usize, + len: usize, + expression: &dyn Operator, + mask: &Mask, +) -> VortexResult { + let plan = QueryPlan::new(expression)?; + let mut pipeline = plan.executable_plan()?; + pipeline.seek(offset)?; + export_canonical_pipeline(dtype, len, &mut pipeline, mask) +} + +/// Export canonical data from an operator expression with the given mask. +pub fn export_canonical_pipeline_expr( + dtype: &DType, + len: usize, + expression: &dyn Operator, + mask: &Mask, +) -> VortexResult { + let plan = QueryPlan::new(expression)?; + let mut pipeline = plan.executable_plan()?; + export_canonical_pipeline(dtype, len, &mut pipeline, mask) +} + +fn export_primitive_nonnull( + len: usize, + pipeline: &mut dyn Kernel, +) -> VortexResult { + let capacity = len.next_multiple_of(N) + N; + + let mut elements = BufferMut::::with_capacity(capacity); + unsafe { elements.set_len(capacity) }; + + let mut remaining = len; + while remaining >= N { + let mut elements_view = ViewMut::new(&mut elements[len - remaining..][..N], None); + let dummy_ctx = KernelContext::default(); + pipeline.step(&dummy_ctx, BitView::all_true(), &mut elements_view)?; + remaining -= N; + } + + if remaining > 0 { + let mut elements_view = ViewMut::new(&mut elements[len - remaining..][..N], None); + let mask = BitVector::true_until(remaining); + let dummy_ctx = KernelContext::default(); + pipeline.step(&dummy_ctx, mask.as_view(), &mut elements_view)?; + } + + unsafe { elements.set_len(len) }; + + Ok(PrimitiveArray::new( + elements.freeze(), + Validity::NonNullable, + )) +} + +fn export_primitive_nonnull_masked( + mask: &Mask, + pipeline: &mut dyn Kernel, +) -> VortexResult { + let len = mask.len(); + let capacity = mask.true_count().next_multiple_of(N) + N; + + let mut elements = BufferMut::::with_capacity(capacity); + unsafe { elements.set_len(capacity) }; + + let mask_buffer = mask.to_bit_buffer(); + let mut mask_iter = mask_buffer.chunks().iter(); + + let mut mask = [0usize; N_WORDS]; + let mut mask_view = BitViewMut::new(&mut mask); + + let mut offset = 0; + let mut remaining = len; + while remaining > 0 { + let mut elements_view = ViewMut::new(&mut elements[offset..][..N], None); + + mask_view.clear(); + mask_view.fill_with_words(&mut mask_iter); + + let dummy_ctx = KernelContext::default(); + pipeline.step(&dummy_ctx, mask_view.as_view(), &mut elements_view)?; + offset += mask_view.true_count(); + + remaining = remaining.saturating_sub(N); + } + + unsafe { elements.set_len(offset) }; + + Ok(PrimitiveArray::new( + elements.freeze(), + Validity::NonNullable, + )) +} + +fn export_bool_nonnull_masked(mask: &Mask, pipeline: &mut dyn Kernel) -> VortexResult { + let len = mask.len(); + let true_count = mask.true_count(); + + let mut elements_buffer = Vector::new::(); + let mut elements_buffer_mut = elements_buffer.as_view_mut(); + + let mask_buffer = mask.to_bit_buffer(); + let mut mask_iter = mask_buffer.chunks().iter(); + + let mut mask = [0usize; N_WORDS]; + let mut mask_view = BitViewMut::new(&mut mask); + + // Fast path: collect all bools first, then use collect_bool for optimal packing + let mut all_bools: Vec = Vec::with_capacity(true_count); + let mut remaining = len; + + while remaining > 0 { + mask_view.clear(); + mask_view.fill_with_words(&mut mask_iter); + + // Handle partial iteration on the last chunk + let current_len = remaining.min(N); + if current_len < N { + mask_view.intersect_prefix(current_len); + } + + let dummy_ctx = KernelContext::default(); + pipeline.step(&dummy_ctx, mask_view.as_view(), &mut elements_buffer_mut)?; + + // Collect bools efficiently with unsafe for better performance + let bool_slice = elements_buffer_mut.as_slice::(); + let count = mask_view.true_count(); + + // Unsafe version to avoid bounds checking in hot path + let old_len = all_bools.len(); + unsafe { + all_bools.set_len(old_len + count); + std::ptr::copy_nonoverlapping( + bool_slice.as_ptr(), + all_bools.as_mut_ptr().add(old_len), + count, + ); + } + + remaining = remaining.saturating_sub(N); + } + + // Use collect_bool for optimal bit packing - avoid closure overhead + let values = BitBuffer::collect_bool(all_bools.len(), |idx| unsafe { + *all_bools.get_unchecked(idx) + }); + + Ok(BoolArray::new(values, Validity::NonNullable)) +} diff --git a/vortex-array/src/pipeline/operator/mod.rs b/vortex-array/src/pipeline/operator/mod.rs index 4127f8707b0..c26ad473273 100644 --- a/vortex-array/src/pipeline/operator/mod.rs +++ b/vortex-array/src/pipeline/operator/mod.rs @@ -19,7 +19,7 @@ use async_trait::async_trait; use futures::future::try_join_all; use itertools::Itertools; use termtree::Tree; -use vortex_buffer::{Alignment, BufferMut, ByteBuffer}; +use vortex_buffer::{Alignment, BitBuffer, BufferMut, ByteBuffer}; use vortex_dtype::{DType, NativePType, Nullability, match_each_native_ptype}; use vortex_error::{VortexExpect, VortexResult, vortex_bail}; use vortex_mask::AllOr; @@ -502,7 +502,7 @@ impl BatchExecution for PipelineExecution { try_join_all(self.children.into_iter().map(|exec| exec.execute())).await?; // Extract the length and possibly row selection mask. - let mut mask: Option = None; + let mut mask: Option = None; let len = match &self.row_selection { RowSelectionSource::BatchInputs(batch_ids) => { match batch_ids @@ -528,7 +528,7 @@ impl BatchExecution for PipelineExecution { .as_ref() .try_to_mask_fill_null_false()?; - match selection_mask.boolean_buffer() { + match selection_mask.bit_buffer() { AllOr::All => selection_mask.len(), AllOr::None => { // TODO(ngates): we should short-circuit execution here. @@ -583,7 +583,7 @@ impl BatchExecution for PipelineExecution { } Some(mask) => { // Step the pipeline over each chunk of the mask. - let mut mask_iter = mask.bit_chunks().iter_padded(); + let mut mask_iter = mask.chunks().iter(); let mut selection_words = [0usize; N_WORDS]; let mut selection_view_mut = BitViewMut::new(&mut selection_words); diff --git a/vortex-array/src/test_harness.rs b/vortex-array/src/test_harness.rs index 94afcf300fb..5ec5fb093c1 100644 --- a/vortex-array/src/test_harness.rs +++ b/vortex-array/src/test_harness.rs @@ -30,7 +30,7 @@ where /// Outputs the indices of the true values in a BoolArray pub fn to_int_indices(indices_bits: BoolArray) -> VortexResult> { - let buffer = indices_bits.boolean_buffer(); + let buffer = indices_bits.bit_buffer(); let mask = indices_bits.validity_mask(); Ok(buffer .iter() diff --git a/vortex-array/src/validity.rs b/vortex-array/src/validity.rs index cd9e0077da4..350cf62d36b 100644 --- a/vortex-array/src/validity.rs +++ b/vortex-array/src/validity.rs @@ -6,7 +6,7 @@ use std::fmt::Debug; use std::ops::{BitAnd, Not, Range}; -use arrow_buffer::{BooleanBuffer, NullBuffer}; +use vortex_buffer::BitBuffer; use vortex_dtype::{DType, Nullability}; use vortex_error::{VortexExpect as _, VortexResult, vortex_err, vortex_panic}; use vortex_mask::{AllOr, Mask, MaskValues}; @@ -131,7 +131,7 @@ impl Validity { pub fn take(&self, indices: &dyn Array) -> VortexResult { match self { - Self::NonNullable => match indices.validity_mask().boolean_buffer() { + Self::NonNullable => match indices.validity_mask().bit_buffer() { AllOr::All => { if indices.dtype().is_nullable() { Ok(Self::AllValid) @@ -142,7 +142,7 @@ impl Validity { AllOr::None => Ok(Self::AllInvalid), AllOr::Some(buf) => Ok(Validity::from(buf.clone())), }, - Self::AllValid => match indices.validity_mask().boolean_buffer() { + Self::AllValid => match indices.validity_mask().bit_buffer() { AllOr::All => Ok(Self::AllValid), AllOr::None => Ok(Self::AllInvalid), AllOr::Some(buf) => Ok(Validity::from(buf.clone())), @@ -176,7 +176,7 @@ impl Validity { /// The result is always nullable. The result has the same length as self. #[inline] pub fn mask(&self, mask: &Mask) -> Self { - match mask.boolean_buffer() { + match mask.bit_buffer() { AllOr::All => Validity::AllInvalid, AllOr::None => self.clone(), AllOr::Some(make_invalid) => match self { @@ -187,7 +187,7 @@ impl Validity { Validity::Array(is_valid) => { let is_valid = is_valid.to_bool(); let keep_valid = make_invalid.not(); - Validity::from(is_valid.boolean_buffer().bitand(&keep_valid)) + Validity::from(is_valid.bit_buffer() & &keep_valid) } }, } @@ -233,8 +233,8 @@ impl Validity { let lhs = lhs.to_bool(); let rhs = rhs.to_bool(); - let lhs = lhs.boolean_buffer(); - let rhs = rhs.boolean_buffer(); + let lhs = lhs.bit_buffer(); + let rhs = rhs.bit_buffer(); Validity::from(lhs.bitand(rhs)) } @@ -268,16 +268,16 @@ impl Validity { }; let source = match self { - Validity::NonNullable => BoolArray::from(BooleanBuffer::new_set(len)), - Validity::AllValid => BoolArray::from(BooleanBuffer::new_set(len)), - Validity::AllInvalid => BoolArray::from(BooleanBuffer::new_unset(len)), + Validity::NonNullable => BoolArray::from(BitBuffer::new_set(len)), + Validity::AllValid => BoolArray::from(BitBuffer::new_set(len)), + Validity::AllInvalid => BoolArray::from(BitBuffer::new_unset(len)), Validity::Array(a) => a.to_bool(), }; let patch_values = match patches { - Validity::NonNullable => BoolArray::from(BooleanBuffer::new_set(indices.len())), - Validity::AllValid => BoolArray::from(BooleanBuffer::new_set(indices.len())), - Validity::AllInvalid => BoolArray::from(BooleanBuffer::new_unset(indices.len())), + Validity::NonNullable => BoolArray::from(BitBuffer::new_set(indices.len())), + Validity::AllValid => BoolArray::from(BitBuffer::new_set(indices.len())), + Validity::AllInvalid => BoolArray::from(BitBuffer::new_unset(indices.len())), Validity::Array(a) => a.to_bool(), }; @@ -343,7 +343,7 @@ impl Validity { /// Create Validity from boolean array with given nullability of the array. /// /// Note: You want to pass the nullability of parent array and not the nullability of the validity array itself - /// as that is always non-nullable + /// as that is always nonnullable #[inline] fn from_array(value: ArrayRef, nullability: Nullability) -> Self { if !matches!(value.dtype(), DType::Bool(Nullability::NonNullable)) { @@ -384,19 +384,20 @@ impl PartialEq for Validity { (Self::Array(a), Self::Array(b)) => { let a = a.to_bool(); let b = b.to_bool(); - a.boolean_buffer() == b.boolean_buffer() + a.bit_buffer() == b.bit_buffer() } _ => false, } } } -impl From for Validity { +impl From for Validity { #[inline] - fn from(value: BooleanBuffer) -> Self { - if value.count_set_bits() == value.len() { + fn from(value: BitBuffer) -> Self { + let true_count = value.true_count(); + if true_count == value.len() { Self::AllValid - } else if value.count_set_bits() == 0 { + } else if true_count == 0 { Self::AllInvalid } else { Self::Array(BoolArray::from(value).into_array()) @@ -404,13 +405,6 @@ impl From for Validity { } } -impl From for Validity { - #[inline] - fn from(value: NullBuffer) -> Self { - value.into_inner().into() - } -} - impl FromIterator for Validity { #[inline] fn from_iter>(iter: T) -> Self { @@ -421,7 +415,7 @@ impl FromIterator for Validity { impl FromIterator for Validity { #[inline] fn from_iter>(iter: T) -> Self { - Validity::from(BooleanBuffer::from_iter(iter)) + Validity::from(BitBuffer::from_iter(iter)) } } @@ -436,17 +430,13 @@ impl From for Validity { } impl Validity { - pub fn from_null_buffer(buffer: Option, nullability: Nullability) -> Self { - match buffer { - // If there are no nulls, then we infer from nullability - None => nullability.into(), - Some(nulls) => { - if nulls.null_count() == nulls.len() { - Validity::AllInvalid - } else { - Validity::Array(BoolArray::from(nulls.into_inner()).into_array()) - } - } + pub fn from_bit_buffer(buffer: BitBuffer, nullability: Nullability) -> Self { + if buffer.true_count() == buffer.len() { + nullability.into() + } else if buffer.true_count() == 0 { + Validity::AllInvalid + } else { + Validity::Array(BoolArray::from_bit_buffer(buffer, Validity::NonNullable).into_array()) } } @@ -480,8 +470,7 @@ impl IntoArray for Mask { impl IntoArray for &MaskValues { #[inline] fn into_array(self) -> ArrayRef { - BoolArray::from_bool_buffer(self.boolean_buffer().clone(), Validity::NonNullable) - .into_array() + BoolArray::from_bit_buffer(self.bit_buffer().clone(), Validity::NonNullable).into_array() } } @@ -544,12 +533,16 @@ mod tests { } #[rstest] - #[case(Validity::AllValid, PrimitiveArray::new(buffer![0, 1], Validity::from_iter(vec![true, false])).into_array(), Validity::from_iter(vec![true, false]))] + #[case(Validity::AllValid, PrimitiveArray::new(buffer![0, 1], Validity::from_iter(vec![true, false])).into_array(), Validity::from_iter(vec![true, false]) + )] #[case(Validity::AllValid, buffer![0, 1].into_array(), Validity::AllValid)] - #[case(Validity::AllValid, PrimitiveArray::new(buffer![0, 1], Validity::AllInvalid).into_array(), Validity::AllInvalid)] - #[case(Validity::NonNullable, PrimitiveArray::new(buffer![0, 1], Validity::from_iter(vec![true, false])).into_array(), Validity::from_iter(vec![true, false]))] + #[case(Validity::AllValid, PrimitiveArray::new(buffer![0, 1], Validity::AllInvalid).into_array(), Validity::AllInvalid + )] + #[case(Validity::NonNullable, PrimitiveArray::new(buffer![0, 1], Validity::from_iter(vec![true, false])).into_array(), Validity::from_iter(vec![true, false]) + )] #[case(Validity::NonNullable, buffer![0, 1].into_array(), Validity::NonNullable)] - #[case(Validity::NonNullable, PrimitiveArray::new(buffer![0, 1], Validity::AllInvalid).into_array(), Validity::AllInvalid)] + #[case(Validity::NonNullable, PrimitiveArray::new(buffer![0, 1], Validity::AllInvalid).into_array(), Validity::AllInvalid + )] fn validity_take( #[case] validity: Validity, #[case] indices: ArrayRef, diff --git a/vortex-btrblocks/Cargo.toml b/vortex-btrblocks/Cargo.toml index df33d3b0a3e..2f59ad2c273 100644 --- a/vortex-btrblocks/Cargo.toml +++ b/vortex-btrblocks/Cargo.toml @@ -14,7 +14,6 @@ rust-version = { workspace = true } version = { workspace = true } [dependencies] -arrow-buffer = { workspace = true } getrandom_v03 = { workspace = true } itertools = { workspace = true } log = { workspace = true } diff --git a/vortex-btrblocks/src/float/stats.rs b/vortex-btrblocks/src/float/stats.rs index fcd988adcf9..564499f45aa 100644 --- a/vortex-btrblocks/src/float/stats.rs +++ b/vortex-btrblocks/src/float/stats.rs @@ -154,7 +154,7 @@ where let mut prev = buff[head_idx]; let first_valid_buff = buff.slice(head_idx..array.len()); - match validity.boolean_buffer() { + match validity.bit_buffer() { AllOr::All => { for value in first_valid_buff { if count_distinct_values { @@ -171,7 +171,7 @@ where AllOr::Some(v) => { for (&value, valid) in first_valid_buff .iter() - .zip_eq(v.slice(head_idx, array.len() - head_idx).iter()) + .zip_eq(v.slice(head_idx..array.len()).iter()) { if valid { if count_distinct_values { diff --git a/vortex-btrblocks/src/integer/stats.rs b/vortex-btrblocks/src/integer/stats.rs index 0d646006146..06ce62956b7 100644 --- a/vortex-btrblocks/src/integer/stats.rs +++ b/vortex-btrblocks/src/integer/stats.rs @@ -3,12 +3,12 @@ use std::hash::Hash; -use arrow_buffer::BooleanBuffer; use num_traits::PrimInt; use rustc_hash::FxBuildHasher; use vortex_array::ToCanonical; use vortex_array::arrays::{NativeValue, PrimitiveArray, PrimitiveVTable}; use vortex_array::stats::Stat; +use vortex_buffer::BitBuffer; use vortex_dtype::{IntegerPType, match_each_integer_ptype}; use vortex_error::{VortexError, VortexExpect, VortexUnwrap}; use vortex_mask::AllOr; @@ -231,7 +231,7 @@ where let sliced = buffer.slice(head_idx..array.len()); let mut chunks = sliced.as_slice().chunks_exact(64); - match validity.boolean_buffer() { + match validity.bit_buffer() { AllOr::All => { for chunk in &mut chunks { inner_loop_nonnull( @@ -244,19 +244,19 @@ where inner_loop_naive( remainder, count_distinct_values, - &BooleanBuffer::new_set(remainder.len()), + &BitBuffer::new_set(remainder.len()), &mut loop_state, ); } AllOr::None => unreachable!("All invalid arrays have been handled before"), AllOr::Some(v) => { - let mask = v.slice(head_idx, array.len() - head_idx); + let mask = v.slice(head_idx..(head_idx + array.len())); let mut offset = 0; for chunk in &mut chunks { - let validity = mask.slice(offset, 64); + let validity = mask.slice(offset..(offset + 64)); offset += 64; - match validity.count_set_bits() { + match validity.true_count() { // All nulls -> no stats to update 0 => continue, // Inner loop for when validity check can be elided @@ -279,7 +279,7 @@ where inner_loop_naive( remainder, count_distinct_values, - &mask.slice(offset, remainder.len()), + &mask.slice(offset..(offset + remainder.len())), &mut loop_state, ); } @@ -368,7 +368,7 @@ fn inner_loop_nonnull( fn inner_loop_nullable( values: &[T; 64], count_distinct_values: bool, - is_valid: &BooleanBuffer, + is_valid: &BitBuffer, state: &mut LoopState, ) where NativeValue: Eq + Hash, @@ -391,7 +391,7 @@ fn inner_loop_nullable( fn inner_loop_naive( values: &[T], count_distinct_values: bool, - is_valid: &BooleanBuffer, + is_valid: &BitBuffer, state: &mut LoopState, ) where NativeValue: Eq + Hash, @@ -414,10 +414,9 @@ fn inner_loop_naive( mod tests { use std::iter; - use arrow_buffer::BooleanBuffer; use vortex_array::arrays::PrimitiveArray; use vortex_array::validity::Validity; - use vortex_buffer::{Buffer, buffer}; + use vortex_buffer::{BitBuffer, Buffer, buffer}; use crate::CompressorStats; use crate::integer::IntegerStats; @@ -434,7 +433,7 @@ mod tests { fn test_naive_count_distinct_values_nullable() { let array = PrimitiveArray::new( buffer![217u8, 0], - Validity::from(BooleanBuffer::from(vec![true, false])), + Validity::from(BitBuffer::from(vec![true, false])), ); let stats = typed_int_stats::(&array, true); assert_eq!(stats.distinct_values_count, 1); @@ -451,7 +450,7 @@ mod tests { fn test_count_distinct_values_nullable() { let array = PrimitiveArray::new( (0..128u8).collect::>(), - Validity::from(BooleanBuffer::from_iter( + Validity::from(BitBuffer::from_iter( iter::repeat_n(vec![true, false], 64).flatten(), )), ); diff --git a/vortex-btrblocks/src/rle.rs b/vortex-btrblocks/src/rle.rs index e948921650f..4d69c2f8c39 100644 --- a/vortex-btrblocks/src/rle.rs +++ b/vortex-btrblocks/src/rle.rs @@ -7,7 +7,7 @@ use std::hash::Hash; use vortex_array::arrays::PrimitiveArray; use vortex_array::{ArrayRef, IntoArray, ToCanonical}; use vortex_error::VortexResult; -use vortex_fastlanes::{DeltaArray, RLEArray, delta_compress}; +use vortex_fastlanes::RLEArray; use crate::integer::{IntCode, IntCompressor}; use crate::{Compressor, CompressorStats, Scheme, estimate_compression_ratio_with_sampling}; @@ -159,6 +159,8 @@ fn try_compress_delta( allowed_cascading: usize, excludes: &[IntCode], ) -> VortexResult { + use vortex_fastlanes::{DeltaArray, delta_compress}; + let (bases, deltas) = delta_compress(primitive_array)?; let compressed_bases = IntCompressor::compress(&bases, is_sample, allowed_cascading, excludes)?; let compressed_deltas = diff --git a/vortex-buffer/src/bit/buf_mut.rs b/vortex-buffer/src/bit/buf_mut.rs index ec6a60f10cc..7fd31a3d6bc 100644 --- a/vortex-buffer/src/bit/buf_mut.rs +++ b/vortex-buffer/src/bit/buf_mut.rs @@ -28,7 +28,7 @@ use crate::{BitBuffer, BufferMut, ByteBuffer, ByteBufferMut, buffer_mut}; /// let bools = bools.freeze(); /// ``` /// -/// See also: [`crate::BitBuffer`]. +/// See also: [`BitBuffer`]. pub struct BitBufferMut { buffer: ByteBufferMut, len: usize, diff --git a/vortex-buffer/src/buffer.rs b/vortex-buffer/src/buffer.rs index 4c5752d70d3..c85d2f76415 100644 --- a/vortex-buffer/src/buffer.rs +++ b/vortex-buffer/src/buffer.rs @@ -299,10 +299,10 @@ impl Buffer { let end_byte = end * size_of::(); if !begin_byte.is_multiple_of(*alignment) { - vortex_panic!("range start must be aligned to {:?}", alignment); + vortex_panic!("range start must be aligned to {alignment:?}"); } if !end_byte.is_multiple_of(*alignment) { - vortex_panic!("range end must be aligned to {:?}", alignment); + vortex_panic!("range end must be aligned to {alignment:?}"); } if !alignment.is_aligned_to(Alignment::of::()) { vortex_panic!("Slice alignment must at least align to type T") diff --git a/vortex-buffer/src/lib.rs b/vortex-buffer/src/lib.rs index 2bde672db93..a32dcb3628a 100644 --- a/vortex-buffer/src/lib.rs +++ b/vortex-buffer/src/lib.rs @@ -18,6 +18,7 @@ //! compile-time alignment of `A`. //! * `buffer!` and `buffer_mut!` macros with the same syntax as the builtin `vec!` macro for //! inline construction of buffers. +//! * `BitBuffer` and `BitBufferMut` provide packed bitsets that can be used to store boolean values. //! //! You can think of `BufferMut` as similar to a `Vec`, except that any operation that may //! cause a re-allocation, e.g. extend, will ensure the new allocation maintains the buffer's diff --git a/vortex-duckdb/src/e2e_test/vortex_scan_test.rs b/vortex-duckdb/src/e2e_test/vortex_scan_test.rs index 16cf4da7386..8c4ba2eb2a0 100644 --- a/vortex-duckdb/src/e2e_test/vortex_scan_test.rs +++ b/vortex-duckdb/src/e2e_test/vortex_scan_test.rs @@ -275,7 +275,7 @@ fn test_vortex_scan_booleans() { let runtime = tokio::runtime::Runtime::new().unwrap(); let file = runtime.block_on(async { let flags = vec![true, false, true, true, false]; - let flags_array = BoolArray::from_bool_buffer(flags.into(), Validity::NonNullable); + let flags_array = BoolArray::from_bit_buffer(flags.into(), Validity::NonNullable); write_single_column_vortex_file("flag", flags_array).await }); let true_count: i64 = scan_vortex_file_single_row::( @@ -290,7 +290,7 @@ fn test_vortex_scan_booleans() { fn test_vortex_multi_column() { let runtime = tokio::runtime::Runtime::new().unwrap(); let file = runtime.block_on(async { - let f1 = BoolArray::from_bool_buffer( + let f1 = BoolArray::from_bit_buffer( vec![true, false, true, true, false].into(), Validity::NonNullable, ) diff --git a/vortex-duckdb/src/exporter/bool.rs b/vortex-duckdb/src/exporter/bool.rs index 758230a6bb4..94a815257e4 100644 --- a/vortex-duckdb/src/exporter/bool.rs +++ b/vortex-duckdb/src/exporter/bool.rs @@ -41,8 +41,8 @@ impl ColumnExporter for BoolExporter { unsafe { vector.as_slice_mut(len) }.copy_from_slice( &self .array - .boolean_buffer() - .slice(offset, len) + .bit_buffer() + .slice(offset..(offset + len)) .iter() .collect_vec(), ); diff --git a/vortex-duckdb/src/exporter/list.rs b/vortex-duckdb/src/exporter/list.rs index 020eb6fde60..c90b81c4a99 100644 --- a/vortex-duckdb/src/exporter/list.rs +++ b/vortex-duckdb/src/exporter/list.rs @@ -91,6 +91,7 @@ mod tests { use vortex::IntoArray as _; use vortex::arrays::VarBinArray; use vortex::buffer::{Buffer, buffer}; + use vortex::dtype::{DType, Nullability}; use vortex::error::VortexUnwrap; use vortex::validity::Validity; @@ -155,12 +156,10 @@ mod tests { #[test] fn test_export_non_empty_list_of_strings() { let list = ListArray::try_new( - >::from_iter([ - Some("abc"), - Some("def"), - None, - Some("ghi"), - ]) + VarBinArray::from_iter( + [Some("abc"), Some("def"), None, Some("ghi")], + DType::Utf8(Nullability::Nullable), + ) .into_array(), buffer![0u8, 0, 3, 4, 4].into_array(), Validity::from_iter([true, true, false, true]), diff --git a/vortex-duckdb/src/exporter/mod.rs b/vortex-duckdb/src/exporter/mod.rs index 9000224bfe4..f4e43bbec33 100644 --- a/vortex-duckdb/src/exporter/mod.rs +++ b/vortex-duckdb/src/exporter/mod.rs @@ -213,13 +213,13 @@ impl Vector { true } Mask::Values(arr) => { - let true_count = arr.boolean_buffer().count_set_bits(); + let true_count = arr.bit_buffer().true_count(); if true_count == len { unsafe { self.set_all_true_validity(len) } } else if true_count == 0 { self.set_all_false_validity() } else { - let source = arr.boolean_buffer().inner().as_slice(); + let source = arr.bit_buffer().inner().as_slice(); copy_from_slice( unsafe { self.ensure_validity_slice(len) }, source, @@ -259,7 +259,7 @@ fn copy_from_slice(target: &mut [u64], source: &[u8], offset: usize, len: usize) #[cfg(test)] mod tests { - use arrow_buffer::buffer::BooleanBuffer; + use vortex::buffer::BitBuffer; use vortex::mask::Mask; use crate::cpp::DUCKDB_TYPE; @@ -300,9 +300,7 @@ mod tests { let logical_type = LogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_BIGINT); let mut vector = Vector::with_capacity(logical_type, 100); - let bits = vec![true; 10]; - let buffer = BooleanBuffer::from(bits.as_slice()); - let mask = Mask::from(buffer); + let mask = Mask::from(BitBuffer::from(vec![true; 10])); let all_null = unsafe { vector.set_validity(&mask, 0, 10) }; @@ -322,8 +320,7 @@ mod tests { const LEN: usize = 10; let bits = vec![false; LEN]; - let buffer = BooleanBuffer::from(bits.as_slice()); - let mask = Mask::from(buffer); + let mask = Mask::from(BitBuffer::from(bits)); let all_null = unsafe { vector.set_validity(&mask, 0, LEN) }; @@ -343,8 +340,7 @@ mod tests { let bits = vec![ true, false, true, true, false, false, true, true, false, true, ]; - let buffer = BooleanBuffer::from(bits.as_slice()); - let mask = Mask::from(buffer); + let mask = Mask::from(BitBuffer::from(bits.as_slice())); let all_null = unsafe { vector.set_validity(&mask, 0, 10) }; @@ -364,8 +360,7 @@ mod tests { let bits = vec![ false, false, true, true, false, true, false, true, true, false, true, true, false, ]; - let buffer = BooleanBuffer::from(bits.as_slice()); - let mask = Mask::from(buffer); + let mask = Mask::from(BitBuffer::from(bits.as_slice())); let all_null = unsafe { vector.set_validity(&mask, 2, 8) }; @@ -386,8 +381,7 @@ mod tests { true, false, true, true, false, false, true, true, false, true, true, true, false, true, false, ]; - let buffer = BooleanBuffer::from(bits.as_slice()); - let mask = Mask::from(buffer); + let mask = Mask::from(BitBuffer::from(bits.as_slice())); let all_null = unsafe { vector.set_validity(&mask, 3, 5) }; @@ -405,9 +399,7 @@ mod tests { let mut vector = Vector::with_capacity(logical_type, 100); let bits = (0..70).map(|i| i % 3 == 0).collect::>(); - - let buffer = BooleanBuffer::from(bits.as_slice()); - let mask = Mask::from(buffer); + let mask = Mask::from(BitBuffer::from(bits.as_slice())); let all_null = unsafe { vector.set_validity(&mask, 5, 60) }; diff --git a/vortex-duckdb/src/exporter/struct_.rs b/vortex-duckdb/src/exporter/struct_.rs index 9c5830781a8..4bfbcb453a0 100644 --- a/vortex-duckdb/src/exporter/struct_.rs +++ b/vortex-duckdb/src/exporter/struct_.rs @@ -53,10 +53,9 @@ impl ColumnExporter for StructExporter { mod tests { use std::ffi::CString; - use arrow_buffer::BooleanBuffer; use vortex::IntoArray; use vortex::arrays::{ConstantArray, PrimitiveArray, VarBinViewArray}; - use vortex::buffer::buffer; + use vortex::buffer::{BitBuffer, buffer}; use vortex::encodings::dict::DictArray; use vortex::error::{VortexExpect, VortexUnwrap}; use vortex::validity::Validity; @@ -128,7 +127,7 @@ mod tests { ["col1", "col2"].into(), vec![prim, strings], 10, - Validity::from(BooleanBuffer::from_iter([ + Validity::from(BitBuffer::from_iter([ true, true, true, false, false, false, true, true, true, true, ])), ) @@ -169,7 +168,7 @@ mod tests { ["col1", "col2"].into(), vec![prim, strings], 10, - Validity::from(BooleanBuffer::from_iter([ + Validity::from(BitBuffer::from_iter([ true, true, true, false, false, false, true, true, true, true, ])), ) diff --git a/vortex-expr/src/exprs/binary.rs b/vortex-expr/src/exprs/binary.rs index cfdb47871c6..5980374245c 100644 --- a/vortex-expr/src/exprs/binary.rs +++ b/vortex-expr/src/exprs/binary.rs @@ -311,8 +311,8 @@ impl AnalysisExpr for BinaryExpr { /// let result = eq(root(), lit(3)).evaluate(&Scope::new(xs.to_array())).unwrap(); /// /// assert_eq!( -/// result.to_bool().boolean_buffer(), -/// BoolArray::from_iter(vec![false, false, true]).boolean_buffer(), +/// result.to_bool().bit_buffer(), +/// BoolArray::from_iter(vec![false, false, true]).bit_buffer(), /// ); /// ``` pub fn eq(lhs: ExprRef, rhs: ExprRef) -> ExprRef { @@ -333,8 +333,8 @@ pub fn eq(lhs: ExprRef, rhs: ExprRef) -> ExprRef { /// let result = not_eq(root(), lit(3)).evaluate(&Scope::new(xs.to_array())).unwrap(); /// /// assert_eq!( -/// result.to_bool().boolean_buffer(), -/// BoolArray::from_iter(vec![true, true, false]).boolean_buffer(), +/// result.to_bool().bit_buffer(), +/// BoolArray::from_iter(vec![true, true, false]).bit_buffer(), /// ); /// ``` pub fn not_eq(lhs: ExprRef, rhs: ExprRef) -> ExprRef { @@ -355,8 +355,8 @@ pub fn not_eq(lhs: ExprRef, rhs: ExprRef) -> ExprRef { /// let result = gt_eq(root(), lit(3)).evaluate(&Scope::new(xs.to_array())).unwrap(); /// /// assert_eq!( -/// result.to_bool().boolean_buffer(), -/// BoolArray::from_iter(vec![false, false, true]).boolean_buffer(), +/// result.to_bool().bit_buffer(), +/// BoolArray::from_iter(vec![false, false, true]).bit_buffer(), /// ); /// ``` pub fn gt_eq(lhs: ExprRef, rhs: ExprRef) -> ExprRef { @@ -377,8 +377,8 @@ pub fn gt_eq(lhs: ExprRef, rhs: ExprRef) -> ExprRef { /// let result = gt(root(), lit(2)).evaluate(&Scope::new(xs.to_array())).unwrap(); /// /// assert_eq!( -/// result.to_bool().boolean_buffer(), -/// BoolArray::from_iter(vec![false, false, true]).boolean_buffer(), +/// result.to_bool().bit_buffer(), +/// BoolArray::from_iter(vec![false, false, true]).bit_buffer(), /// ); /// ``` pub fn gt(lhs: ExprRef, rhs: ExprRef) -> ExprRef { @@ -399,8 +399,8 @@ pub fn gt(lhs: ExprRef, rhs: ExprRef) -> ExprRef { /// let result = lt_eq(root(), lit(2)).evaluate(&Scope::new(xs.to_array())).unwrap(); /// /// assert_eq!( -/// result.to_bool().boolean_buffer(), -/// BoolArray::from_iter(vec![true, true, false]).boolean_buffer(), +/// result.to_bool().bit_buffer(), +/// BoolArray::from_iter(vec![true, true, false]).bit_buffer(), /// ); /// ``` pub fn lt_eq(lhs: ExprRef, rhs: ExprRef) -> ExprRef { @@ -421,8 +421,8 @@ pub fn lt_eq(lhs: ExprRef, rhs: ExprRef) -> ExprRef { /// let result = lt(root(), lit(3)).evaluate(&Scope::new(xs.to_array())).unwrap(); /// /// assert_eq!( -/// result.to_bool().boolean_buffer(), -/// BoolArray::from_iter(vec![true, true, false]).boolean_buffer(), +/// result.to_bool().bit_buffer(), +/// BoolArray::from_iter(vec![true, true, false]).bit_buffer(), /// ); /// ``` pub fn lt(lhs: ExprRef, rhs: ExprRef) -> ExprRef { @@ -441,8 +441,8 @@ pub fn lt(lhs: ExprRef, rhs: ExprRef) -> ExprRef { /// let result = or(root(), lit(false)).evaluate(&Scope::new(xs.to_array())).unwrap(); /// /// assert_eq!( -/// result.to_bool().boolean_buffer(), -/// BoolArray::from_iter(vec![true, false, true]).boolean_buffer(), +/// result.to_bool().bit_buffer(), +/// BoolArray::from_iter(vec![true, false, true]).bit_buffer(), /// ); /// ``` pub fn or(lhs: ExprRef, rhs: ExprRef) -> ExprRef { @@ -473,8 +473,8 @@ where /// let result = and(root(), lit(true)).evaluate(&Scope::new(xs.to_array())).unwrap(); /// /// assert_eq!( -/// result.to_bool().boolean_buffer(), -/// BoolArray::from_iter(vec![true, false, true]).boolean_buffer(), +/// result.to_bool().bit_buffer(), +/// BoolArray::from_iter(vec![true, false, true]).bit_buffer(), /// ); /// ``` pub fn and(lhs: ExprRef, rhs: ExprRef) -> ExprRef { diff --git a/vortex-expr/src/exprs/is_null.rs b/vortex-expr/src/exprs/is_null.rs index c8aae8e146d..96c130e4701 100644 --- a/vortex-expr/src/exprs/is_null.rs +++ b/vortex-expr/src/exprs/is_null.rs @@ -69,7 +69,7 @@ impl VTable for IsNullVTable { match array.validity_mask() { Mask::AllTrue(len) => Ok(ConstantArray::new(false, len).into_array()), Mask::AllFalse(len) => Ok(ConstantArray::new(true, len).into_array()), - Mask::Values(mask) => Ok(BoolArray::from(mask.boolean_buffer().not()).into_array()), + Mask::Values(mask) => Ok(BoolArray::from(mask.bit_buffer().not()).into_array()), } } diff --git a/vortex-expr/src/exprs/like.rs b/vortex-expr/src/exprs/like.rs index 9e92e406e47..3a2f0aac8c1 100644 --- a/vortex-expr/src/exprs/like.rs +++ b/vortex-expr/src/exprs/like.rs @@ -181,7 +181,7 @@ mod tests { .evaluate(&Scope::new(bools.to_array())) .unwrap() .to_bool() - .boolean_buffer() + .bit_buffer() .iter() .collect::>(), vec![true, false, true, true, false, false] diff --git a/vortex-expr/src/exprs/list_contains.rs b/vortex-expr/src/exprs/list_contains.rs index a976234810c..99dcba22012 100644 --- a/vortex-expr/src/exprs/list_contains.rs +++ b/vortex-expr/src/exprs/list_contains.rs @@ -172,11 +172,11 @@ impl AnalysisExpr for ListContainsExpr { #[cfg(test)] mod tests { - use vortex_array::arrays::{BoolArray, BooleanBuffer, ListArray}; + use vortex_array::arrays::{BoolArray, ListArray, PrimitiveArray}; use vortex_array::stats::Stat; use vortex_array::validity::Validity; use vortex_array::{Array, ArrayRef, IntoArray}; - use vortex_buffer::buffer; + use vortex_buffer::BitBuffer; use vortex_dtype::PType::I32; use vortex_dtype::{DType, Field, FieldPath, FieldPathSet, Nullability, StructFields}; use vortex_scalar::Scalar; @@ -188,8 +188,8 @@ mod tests { fn test_array() -> ArrayRef { ListArray::try_new( - buffer![1, 1, 2, 2, 2, 2, 2, 3, 3, 3].into_array(), - buffer![0, 5, 10].into_array(), + PrimitiveArray::from_iter(vec![1, 1, 2, 2, 2, 2, 2, 3, 3, 3]).into_array(), + PrimitiveArray::from_iter(vec![0, 5, 10]).into_array(), Validity::AllValid, ) .unwrap() @@ -241,8 +241,8 @@ mod tests { #[test] pub fn test_empty() { let arr = ListArray::try_new( - buffer![1, 1, 2, 2, 2].into_array(), - buffer![0, 5, 5].into_array(), + PrimitiveArray::from_iter(vec![1, 1, 2, 2, 2]).into_array(), + PrimitiveArray::from_iter(vec![0, 5, 5]).into_array(), Validity::AllValid, ) .unwrap() @@ -261,9 +261,9 @@ mod tests { #[test] pub fn test_nullable() { let arr = ListArray::try_new( - buffer![1, 1, 2, 2, 2].into_array(), - buffer![0, 5, 5].into_array(), - Validity::Array(BoolArray::from(BooleanBuffer::from(vec![true, false])).into_array()), + PrimitiveArray::from_iter(vec![1, 1, 2, 2, 2]).into_array(), + PrimitiveArray::from_iter(vec![0, 5, 5]).into_array(), + Validity::Array(BoolArray::from(BitBuffer::from(vec![true, false])).into_array()), ) .unwrap() .into_array(); diff --git a/vortex-expr/src/exprs/not.rs b/vortex-expr/src/exprs/not.rs index 0fc42dbfd65..1d080ad5be3 100644 --- a/vortex-expr/src/exprs/not.rs +++ b/vortex-expr/src/exprs/not.rs @@ -138,7 +138,7 @@ mod tests { .evaluate(&Scope::new(bools.to_array())) .unwrap() .to_bool() - .boolean_buffer() + .bit_buffer() .iter() .collect::>(), vec![true, false, true, true, false, false] diff --git a/vortex-gpu/src/take.rs b/vortex-gpu/src/take.rs index 7b1886241d2..e6a842cf556 100644 --- a/vortex-gpu/src/take.rs +++ b/vortex-gpu/src/take.rs @@ -83,12 +83,12 @@ where let cu_mask = mask .map(|mask| { - let buffer = mask.to_boolean_buffer(); + let buffer = mask.to_bit_buffer(); assert_eq!(buffer.offset(), 0); assert_eq!(buffer.len() % 1024, 0); - assert!((buffer.values().as_ptr() as *const u32).is_aligned()); + assert!((buffer.inner().as_ptr() as *const u32).is_aligned()); // SAFETY: we've checked alignment and the layout is the same. - let slice: &[u32] = unsafe { transmute(buffer.values()) }; + let slice: &[u32] = unsafe { transmute(buffer.inner().as_slice()) }; stream .memcpy_stod(slice) .map_err(|e| vortex_err!("Failed to copy to device: {e}")) diff --git a/vortex-layout/src/layouts/dict/reader.rs b/vortex-layout/src/layouts/dict/reader.rs index 3e94da639a0..c01a27cb00d 100644 --- a/vortex-layout/src/layouts/dict/reader.rs +++ b/vortex-layout/src/layouts/dict/reader.rs @@ -319,7 +319,6 @@ mod tests { "", // Filter for empty string vec![false, false, false], // Expected: all false, no dict values match )] - #[test] fn shortpathes_filtering( #[case] data: Vec>, #[case] filter_value: &str, @@ -368,10 +367,7 @@ mod tests { .await .unwrap(); - assert_eq!( - mask.to_boolean_buffer().iter().collect::>(), - expected - ); + assert_eq!(mask.to_bit_buffer().iter().collect::>(), expected); }) } diff --git a/vortex-layout/src/layouts/flat/reader.rs b/vortex-layout/src/layouts/flat/reader.rs index f8ff38a9966..f17232a7280 100644 --- a/vortex-layout/src/layouts/flat/reader.rs +++ b/vortex-layout/src/layouts/flat/reader.rs @@ -258,15 +258,14 @@ async fn try_evaluate_using_operator( mod test { use std::sync::Arc; - use arrow_buffer::BooleanBuffer; use vortex_array::arrays::PrimitiveArray; use vortex_array::validity::Validity; use vortex_array::{ArrayContext, MaskFuture, ToCanonical}; - use vortex_buffer::buffer; + use vortex_buffer::{BitBuffer, buffer}; use vortex_expr::{gt, lit, root}; use vortex_io::runtime::single::block_on; - use crate::LayoutStrategy as _; + use crate::LayoutStrategy; use crate::layouts::flat::writer::FlatLayoutStrategy; use crate::segments::TestSegments; use crate::sequence::{SequenceId, SequentialArrayStreamExt}; @@ -342,8 +341,8 @@ mod test { .to_bool(); assert_eq!( - &BooleanBuffer::from_iter([false, false, false, true, true]), - result.boolean_buffer() + &BitBuffer::from_iter([false, false, false, true, true]), + result.bit_buffer() ); }) } diff --git a/vortex-layout/src/layouts/flat/writer.rs b/vortex-layout/src/layouts/flat/writer.rs index bdb5f04eca7..9c4a76a71f0 100644 --- a/vortex-layout/src/layouts/flat/writer.rs +++ b/vortex-layout/src/layouts/flat/writer.rs @@ -143,13 +143,12 @@ impl LayoutStrategy for FlatLayoutStrategy { mod tests { use std::sync::Arc; - use arrow_buffer::BooleanBufferBuilder; use vortex_array::arrays::{BoolArray, PrimitiveArray, StructArray}; use vortex_array::builders::{ArrayBuilder, VarBinViewBuilder}; use vortex_array::stats::{Precision, Stat, StatsProviderExt}; use vortex_array::validity::Validity; use vortex_array::{Array, ArrayContext, ArrayRef, IntoArray, MaskFuture, ToCanonical}; - use vortex_buffer::buffer; + use vortex_buffer::{BitBufferMut, buffer}; use vortex_dtype::{DType, FieldName, FieldNames, Nullability}; use vortex_error::VortexUnwrap; use vortex_expr::root; @@ -263,12 +262,12 @@ mod tests { #[test] fn struct_array_round_trip() { block_on(|handle| async { - let mut validity_builder = BooleanBufferBuilder::new(2); + let mut validity_builder = BitBufferMut::with_capacity(2); validity_builder.append(true); validity_builder.append(false); - let validity_boolean_buffer = validity_builder.finish(); + let validity_boolean_buffer = validity_builder.freeze(); let validity = Validity::Array( - BoolArray::from_bool_buffer(validity_boolean_buffer.clone(), Validity::NonNullable) + BoolArray::from_bit_buffer(validity_boolean_buffer.clone(), Validity::NonNullable) .into_array(), ); let array = StructArray::try_new( @@ -316,7 +315,7 @@ mod tests { .unwrap(); assert_eq!( - result.validity_mask().boolean_buffer(), + result.validity_mask().bit_buffer(), AllOr::Some(&validity_boolean_buffer) ); assert_eq!( diff --git a/vortex-layout/src/layouts/row_idx/mod.rs b/vortex-layout/src/layouts/row_idx/mod.rs index 927904cb766..118a4c2c5d4 100644 --- a/vortex-layout/src/layouts/row_idx/mod.rs +++ b/vortex-layout/src/layouts/row_idx/mod.rs @@ -261,10 +261,9 @@ fn row_idx_array_future( mod tests { use std::sync::Arc; - use arrow_buffer::BooleanBuffer; use itertools::Itertools; use vortex_array::{ArrayContext, IntoArray as _, MaskFuture, ToCanonical}; - use vortex_buffer::buffer; + use vortex_buffer::{BitBuffer, buffer}; use vortex_expr::{eq, gt, lit, or, root}; use vortex_io::runtime::single::block_on; @@ -306,8 +305,8 @@ mod tests { .to_bool(); assert_eq!( - &BooleanBuffer::from_iter([false, false, true, false, false]), - result.boolean_buffer() + &BitBuffer::from_iter([false, false, true, false, false]), + result.bit_buffer() ); }) } @@ -344,8 +343,8 @@ mod tests { .to_bool(); assert_eq!( - &BooleanBuffer::from_iter([false, false, false, false, true]), - result.boolean_buffer() + &BitBuffer::from_iter([false, false, false, false, true]), + result.bit_buffer() ); }) } @@ -387,7 +386,7 @@ mod tests { assert_eq!( vec![true, false, true, false, true], - result.boolean_buffer().iter().collect_vec() + result.bit_buffer().iter().collect_vec() ); }) } diff --git a/vortex-layout/src/layouts/struct_/reader.rs b/vortex-layout/src/layouts/struct_/reader.rs index 90f3a3e2e81..8dfbc7a1134 100644 --- a/vortex-layout/src/layouts/struct_/reader.rs +++ b/vortex-layout/src/layouts/struct_/reader.rs @@ -322,7 +322,7 @@ mod tests { .unwrap(); assert_eq!( vec![true, true, true], - result.to_boolean_buffer().iter().collect_vec() + result.to_bit_buffer().iter().collect_vec() ); } @@ -340,7 +340,7 @@ mod tests { .unwrap(); assert_eq!( vec![true, false, false], - result.to_bool().boolean_buffer().iter().collect::>() + result.to_bool().bit_buffer().iter().collect::>() ); } @@ -365,7 +365,7 @@ mod tests { assert_eq!( vec![true, false], - result.to_bool().boolean_buffer().iter().collect::>() + result.to_bool().bit_buffer().iter().collect::>() ); } diff --git a/vortex-layout/src/layouts/zoned/reader.rs b/vortex-layout/src/layouts/zoned/reader.rs index db7eb206102..7d9ca283e63 100644 --- a/vortex-layout/src/layouts/zoned/reader.rs +++ b/vortex-layout/src/layouts/zoned/reader.rs @@ -5,13 +5,13 @@ use std::collections::BTreeSet; use std::ops::{BitAnd, Range}; use std::sync::{Arc, OnceLock}; -use arrow_buffer::BooleanBufferBuilder; use futures::future::{BoxFuture, Shared}; use futures::{FutureExt, TryFutureExt}; use itertools::Itertools; use parking_lot::RwLock; use vortex_array::stats::Precision; use vortex_array::{ArrayRef, MaskFuture, ToCanonical}; +use vortex_buffer::BitBufferMut; use vortex_dtype::{DType, FieldMask, FieldPath, FieldPathSet}; use vortex_error::{SharedVortexResult, VortexError, VortexExpect, VortexResult}; use vortex_expr::dynamic::DynamicExprUpdates; @@ -238,12 +238,12 @@ impl LayoutReader for ZonedReader { let pruning_mask = pruning_mask_future.clone().await?.mask()?; - let mut builder = BooleanBufferBuilder::new(mask.len()); + let mut builder = BitBufferMut::with_capacity(mask.len()); for (zone_idx, &zone_length) in zone_range.clone().zip_eq(&zone_lengths) { - builder.append_n(zone_length, !pruning_mask.value(usize::try_from(zone_idx)?)); + builder.append_n(!pruning_mask.value(usize::try_from(zone_idx)?), zone_length); } - let stats_mask = Mask::from(builder.finish()); + let stats_mask = Mask::from(builder.freeze()); assert_eq!(stats_mask.len(), mask.len(), "Mask length mismatch"); // Intersect the masks. @@ -431,7 +431,7 @@ mod test { .unwrap() .await .unwrap() - .to_boolean_buffer() + .to_bit_buffer() .iter() .collect::>(); diff --git a/vortex-layout/src/layouts/zoned/zone_map.rs b/vortex-layout/src/layouts/zoned/zone_map.rs index 8946f8733b0..9305f2fad5a 100644 --- a/vortex-layout/src/layouts/zoned/zone_map.rs +++ b/vortex-layout/src/layouts/zoned/zone_map.rs @@ -243,7 +243,6 @@ impl StatsAccumulator { mod tests { use std::sync::Arc; - use arrow_buffer::BooleanBuffer; use itertools::Itertools; use rstest::rstest; use vortex_array::arrays::{BoolArray, PrimitiveArray, StructArray}; @@ -251,7 +250,7 @@ mod tests { use vortex_array::stats::Stat; use vortex_array::validity::Validity; use vortex_array::{IntoArray, ToCanonical}; - use vortex_buffer::buffer; + use vortex_buffer::{BitBuffer, buffer}; use vortex_dtype::{DType, FieldPath, FieldPathSet, Nullability, PType}; use vortex_error::{VortexExpect, VortexUnwrap}; use vortex_expr::pruning::checked_pruning_expr; @@ -285,12 +284,12 @@ mod tests { ] ); assert_eq!( - stats_table.array.fields()[1].to_bool().boolean_buffer(), - &BooleanBuffer::from(vec![false, true]) + stats_table.array.fields()[1].to_bool().bit_buffer(), + &BitBuffer::from(vec![false, true]) ); assert_eq!( - stats_table.array.fields()[3].to_bool().boolean_buffer(), - &BooleanBuffer::from(vec![true, false]) + stats_table.array.fields()[3].to_bool().bit_buffer(), + &BitBuffer::from(vec![true, false]) ); } @@ -311,12 +310,12 @@ mod tests { ] ); assert_eq!( - stats_table.array.fields()[1].to_bool().boolean_buffer(), - &BooleanBuffer::from(vec![false]) + stats_table.array.fields()[1].to_bool().bit_buffer(), + &BitBuffer::from(vec![false]) ); assert_eq!( - stats_table.array.fields()[3].to_bool().boolean_buffer(), - &BooleanBuffer::from(vec![false]) + stats_table.array.fields()[3].to_bool().bit_buffer(), + &BitBuffer::from(vec![false]) ); } @@ -370,7 +369,7 @@ mod tests { let (pruning_expr, _) = checked_pruning_expr(&expr, &stats).unwrap(); let mask = zone_map.prune(&pruning_expr).unwrap(); assert_eq!( - mask.to_boolean_buffer().into_iter().collect_vec(), + mask.to_bit_buffer().into_iter().collect_vec(), vec![true, false, false] ); @@ -380,7 +379,7 @@ mod tests { let (pruning_expr, _) = checked_pruning_expr(&expr, &stats).unwrap(); let mask = zone_map.prune(&pruning_expr).unwrap(); assert_eq!( - mask.to_boolean_buffer().into_iter().collect_vec(), + mask.to_bit_buffer().into_iter().collect_vec(), vec![true, false, false] ); @@ -390,7 +389,7 @@ mod tests { let (pruning_expr, _) = checked_pruning_expr(&expr, &stats).unwrap(); let mask = zone_map.prune(&pruning_expr).unwrap(); assert_eq!( - mask.to_boolean_buffer().into_iter().collect_vec(), + mask.to_bit_buffer().into_iter().collect_vec(), vec![false, true, true] ); } diff --git a/vortex-mask/Cargo.toml b/vortex-mask/Cargo.toml index 40894d71165..74e195b63cc 100644 --- a/vortex-mask/Cargo.toml +++ b/vortex-mask/Cargo.toml @@ -14,8 +14,8 @@ rust-version = { workspace = true } version = { workspace = true } [dependencies] -arrow-buffer = { workspace = true } itertools = { workspace = true } +vortex-buffer = { workspace = true } vortex-error = { workspace = true } [dev-dependencies] diff --git a/vortex-mask/src/bitops.rs b/vortex-mask/src/bitops.rs index 33db35f7d69..9e872d5bef7 100644 --- a/vortex-mask/src/bitops.rs +++ b/vortex-mask/src/bitops.rs @@ -15,7 +15,7 @@ impl BitAnd for &Mask { vortex_panic!("Masks must have the same length"); } - match (self.boolean_buffer(), rhs.boolean_buffer()) { + match (self.bit_buffer(), rhs.bit_buffer()) { (AllOr::All, _) => rhs.clone(), (_, AllOr::All) => self.clone(), (AllOr::None, _) => Mask::new_false(self.len()), @@ -33,7 +33,7 @@ impl BitOr for &Mask { vortex_panic!("Masks must have the same length"); } - match (self.boolean_buffer(), rhs.boolean_buffer()) { + match (self.bit_buffer(), rhs.bit_buffer()) { (AllOr::All, _) => Mask::new_true(self.len()), (_, AllOr::All) => Mask::new_true(self.len()), (AllOr::None, _) => rhs.clone(), @@ -55,7 +55,7 @@ impl Not for &Mask { type Output = Mask; fn not(self) -> Self::Output { - match self.boolean_buffer() { + match self.bit_buffer() { AllOr::All => Mask::new_false(self.len()), AllOr::None => Mask::new_true(self.len()), AllOr::Some(buffer) => Mask::from_buffer(!buffer), @@ -66,7 +66,7 @@ impl Not for &Mask { #[cfg(test)] #[allow(clippy::many_single_char_names)] mod tests { - use arrow_buffer::BooleanBuffer; + use vortex_buffer::BitBuffer; use super::*; @@ -99,8 +99,8 @@ mod tests { #[test] fn test_bitand_with_values() { - let mask1 = Mask::from_buffer(BooleanBuffer::from_iter([true, false, true, false, true])); - let mask2 = Mask::from_buffer(BooleanBuffer::from_iter([true, true, false, false, true])); + let mask1 = Mask::from_buffer(BitBuffer::from_iter([true, false, true, false, true])); + let mask2 = Mask::from_buffer(BitBuffer::from_iter([true, true, false, false, true])); let result = &mask1 & &mask2; assert_eq!(result.len(), 5); @@ -115,7 +115,7 @@ mod tests { #[test] fn test_bitand_all_true_with_values() { let all_true = Mask::new_true(5); - let values = Mask::from_buffer(BooleanBuffer::from_iter([true, false, true, false, true])); + let values = Mask::from_buffer(BitBuffer::from_iter([true, false, true, false, true])); // AllTrue & Values should return Values let result = &all_true & &values; @@ -131,7 +131,7 @@ mod tests { #[test] fn test_bitand_all_false_with_values() { let all_false = Mask::new_false(5); - let values = Mask::from_buffer(BooleanBuffer::from_iter([true, false, true, false, true])); + let values = Mask::from_buffer(BitBuffer::from_iter([true, false, true, false, true])); // AllFalse & Values should return AllFalse let result = &all_false & &values; @@ -141,7 +141,7 @@ mod tests { #[test] fn test_bitand_values_with_all_true() { - let values = Mask::from_buffer(BooleanBuffer::from_iter([true, false, true, false, true])); + let values = Mask::from_buffer(BitBuffer::from_iter([true, false, true, false, true])); let all_true = Mask::new_true(5); // Values & AllTrue should return Values @@ -157,7 +157,7 @@ mod tests { #[test] fn test_bitand_values_with_all_false() { - let values = Mask::from_buffer(BooleanBuffer::from_iter([true, false, true, false, true])); + let values = Mask::from_buffer(BitBuffer::from_iter([true, false, true, false, true])); let all_false = Mask::new_false(5); // Values & AllFalse should return AllFalse @@ -187,7 +187,7 @@ mod tests { #[test] fn test_not_all_true() { let all_true = Mask::new_true(5); - let result = !all_true; + let result = !&all_true; assert!(result.all_false()); assert_eq!(result.true_count(), 0); assert_eq!(result.len(), 5); @@ -196,7 +196,7 @@ mod tests { #[test] fn test_not_all_false() { let all_false = Mask::new_false(5); - let result = !all_false; + let result = !&all_false; assert!(result.all_true()); assert_eq!(result.true_count(), 5); assert_eq!(result.len(), 5); @@ -204,8 +204,8 @@ mod tests { #[test] fn test_not_values() { - let values = Mask::from_buffer(BooleanBuffer::from_iter([true, false, true, false, true])); - let result = !values; + let values = Mask::from_buffer(BitBuffer::from_iter([true, false, true, false, true])); + let result = !&values; assert_eq!(result.len(), 5); assert_eq!(result.true_count(), 2); @@ -219,21 +219,20 @@ mod tests { #[test] fn test_not_empty() { let empty_true = Mask::new_true(0); - let result = !empty_true; + let result = !&empty_true; assert_eq!(result.len(), 0); assert!(result.is_empty()); let empty_false = Mask::new_false(0); - let result = !empty_false; + let result = !&empty_false; assert_eq!(result.len(), 0); assert!(result.is_empty()); } #[test] fn test_double_not() { - let original = - Mask::from_buffer(BooleanBuffer::from_iter([true, false, true, false, true])); - let double_not = !(!original.clone()); + let original = Mask::from_buffer(BitBuffer::from_iter([true, false, true, false, true])); + let double_not = !&(!&original); // Double negation should return the original assert_eq!(double_not.true_count(), original.true_count()); @@ -245,14 +244,14 @@ mod tests { #[test] fn test_demorgan_law() { // Test De Morgan's law: !(A & B) = !A | !B - let a = Mask::from_buffer(BooleanBuffer::from_iter([true, true, false, false])); - let b = Mask::from_buffer(BooleanBuffer::from_iter([true, false, true, false])); + let a = Mask::from_buffer(BitBuffer::from_iter([true, true, false, false])); + let b = Mask::from_buffer(BitBuffer::from_iter([true, false, true, false])); let and_result = &a & &b; - let not_and = !and_result; + let not_and = !&and_result; - let not_a = !a; - let not_b = !b; + let not_a = !&a; + let not_b = !&b; let or_result = ¬_a | ¬_b; assert_eq!(not_and.len(), 4); @@ -268,9 +267,9 @@ mod tests { #[test] fn test_bitand_associativity() { // Test (A & B) & C = A & (B & C) - let a = Mask::from_buffer(BooleanBuffer::from_iter([true, true, false, true])); - let b = Mask::from_buffer(BooleanBuffer::from_iter([true, false, true, true])); - let c = Mask::from_buffer(BooleanBuffer::from_iter([false, true, true, true])); + let a = Mask::from_buffer(BitBuffer::from_iter([true, true, false, true])); + let b = Mask::from_buffer(BitBuffer::from_iter([true, false, true, true])); + let c = Mask::from_buffer(BitBuffer::from_iter([false, true, true, true])); let left_assoc = &(&a & &b) & &c; let right_assoc = &a & &(&b & &c); @@ -284,22 +283,22 @@ mod tests { #[test] fn test_bitand_commutativity() { // Test A & B = B & A - let a = Mask::from_buffer(BooleanBuffer::from_iter([true, false, true, false])); - let b = Mask::from_buffer(BooleanBuffer::from_iter([false, true, false, true])); + let a = Mask::from_buffer(BitBuffer::from_iter([true, false, true, false])); + let b = Mask::from_buffer(BitBuffer::from_iter([false, true, false, true])); - let a_b = &a & &b; - let b_a = &b & &a; + let ab = &a & &b; + let ba = &b & &a; - assert_eq!(a_b.true_count(), b_a.true_count()); + assert_eq!(ab.true_count(), ba.true_count()); for i in 0..4 { - assert_eq!(a_b.value(i), b_a.value(i)); + assert_eq!(ab.value(i), ba.value(i)); } } #[test] fn test_bitand_identity() { // Test A & AllTrue = A - let mask = Mask::from_buffer(BooleanBuffer::from_iter([true, false, true, false])); + let mask = Mask::from_buffer(BitBuffer::from_iter([true, false, true, false])); let all_true = Mask::new_true(4); let result = &mask & &all_true; @@ -312,7 +311,7 @@ mod tests { #[test] fn test_bitand_annihilator() { // Test A & AllFalse = AllFalse - let mask = Mask::from_buffer(BooleanBuffer::from_iter([true, false, true, false])); + let mask = Mask::from_buffer(BitBuffer::from_iter([true, false, true, false])); let all_false = Mask::new_false(4); let result = &mask & &all_false; @@ -323,7 +322,7 @@ mod tests { #[test] fn test_bitand_idempotence() { // Test A & A = A - let mask = Mask::from_buffer(BooleanBuffer::from_iter([true, false, true, false, true])); + let mask = Mask::from_buffer(BitBuffer::from_iter([true, false, true, false, true])); let result = &mask & &mask; assert_eq!(result.true_count(), mask.true_count()); @@ -335,9 +334,9 @@ mod tests { #[test] fn test_complex_expression() { // Test a more complex expression: (!(!A) | B) & !C - let a = Mask::from_buffer(BooleanBuffer::from_iter([true, false, true, false])); - let b = Mask::from_buffer(BooleanBuffer::from_iter([true, true, false, false])); - let c = Mask::from_buffer(BooleanBuffer::from_iter([false, true, false, true])); + let a = Mask::from_buffer(BitBuffer::from_iter([true, false, true, false])); + let b = Mask::from_buffer(BitBuffer::from_iter([true, true, false, false])); + let c = Mask::from_buffer(BitBuffer::from_iter([false, true, false, true])); let not_not_a = !(&(!&a)); let not_not_a_or_b = ¬_not_a | &b; @@ -354,8 +353,8 @@ mod tests { #[test] fn test_bitor() { // Test basic OR operations - let mask1 = Mask::from_buffer(BooleanBuffer::from_iter([true, false, true, false, true])); - let mask2 = Mask::from_buffer(BooleanBuffer::from_iter([true, true, false, false, true])); + let mask1 = Mask::from_buffer(BitBuffer::from_iter([true, false, true, false, true])); + let mask2 = Mask::from_buffer(BitBuffer::from_iter([true, true, false, false, true])); let result = &mask1 | &mask2; assert_eq!(result.len(), 5); diff --git a/vortex-mask/src/eq.rs b/vortex-mask/src/eq.rs index 9921753288c..5cf1ff640df 100644 --- a/vortex-mask/src/eq.rs +++ b/vortex-mask/src/eq.rs @@ -1,7 +1,7 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -use crate::{AllOr, Mask}; +use crate::Mask; impl PartialEq for Mask { #[inline] @@ -13,15 +13,8 @@ impl PartialEq for Mask { return false; } - match (self.boolean_buffer(), other.boolean_buffer()) { - (AllOr::All, AllOr::All) => true, - (AllOr::None, AllOr::None) => true, - (AllOr::Some(a), AllOr::Some(b)) => { - // Short-circuit if they are the same actual buffer with the same offset - (a.offset() == b.offset() && a.inner().as_ptr() == b.inner().as_ptr()) || a.eq(b) - } - _ => false, - } + // TODO(ngates): we could compare by indices if density is low enough + self.bit_buffer() == other.bit_buffer() } } @@ -29,19 +22,16 @@ impl Eq for Mask {} #[cfg(test)] mod test { - use arrow_buffer::BooleanBuffer; + use vortex_buffer::BitBuffer; use crate::Mask; #[test] fn filter_mask_eq() { - assert_eq!( - Mask::new_true(5), - Mask::from_buffer(BooleanBuffer::new_set(5)) - ); + assert_eq!(Mask::new_true(5), Mask::from_buffer(BitBuffer::new_set(5))); assert_eq!( Mask::new_false(5), - Mask::from_buffer(BooleanBuffer::new_unset(5)) + Mask::from_buffer(BitBuffer::new_unset(5)) ); assert_eq!( Mask::from_indices(5, vec![0, 2, 3]), @@ -49,7 +39,7 @@ mod test { ); assert_eq!( Mask::from_indices(5, vec![0, 2, 3]), - Mask::from_buffer(BooleanBuffer::from_iter([true, false, true, true, false])) + Mask::from_buffer(BitBuffer::from_iter([true, false, true, true, false])) ); } @@ -62,15 +52,15 @@ mod test { #[test] fn test_mask_eq_different_true_counts() { - let mask1 = Mask::from_buffer(BooleanBuffer::from_iter([true, true, false])); - let mask2 = Mask::from_buffer(BooleanBuffer::from_iter([true, false, false])); + let mask1 = Mask::from_buffer(BitBuffer::from_iter([true, true, false])); + let mask2 = Mask::from_buffer(BitBuffer::from_iter([true, false, false])); assert_ne!(mask1, mask2); } #[test] fn test_mask_eq_same_count_different_positions() { - let mask1 = Mask::from_buffer(BooleanBuffer::from_iter([true, false, false])); - let mask2 = Mask::from_buffer(BooleanBuffer::from_iter([false, true, false])); + let mask1 = Mask::from_buffer(BitBuffer::from_iter([true, false, false])); + let mask2 = Mask::from_buffer(BitBuffer::from_iter([false, true, false])); assert_ne!(mask1, mask2); } @@ -90,23 +80,23 @@ mod test { assert_ne!(all_true1, all_false1); // Test Values == Values - let values1 = Mask::from_buffer(BooleanBuffer::from_iter([true, false, true])); - let values2 = Mask::from_buffer(BooleanBuffer::from_iter([true, false, true])); + let values1 = Mask::from_buffer(BitBuffer::from_iter([true, false, true])); + let values2 = Mask::from_buffer(BitBuffer::from_iter([true, false, true])); assert_eq!(values1, values2); // Test AllTrue != Values (even if all values are true) - let all_true_values = Mask::from_buffer(BooleanBuffer::new_set(5)); + let all_true_values = Mask::from_buffer(BitBuffer::new_set(5)); assert_eq!(all_true1, all_true_values); // They should be equal // Test AllFalse != Values (even if all values are false) - let all_false_values = Mask::from_buffer(BooleanBuffer::new_unset(5)); + let all_false_values = Mask::from_buffer(BitBuffer::new_unset(5)); assert_eq!(all_false1, all_false_values); // They should be equal } #[test] fn test_mask_eq_reflexive() { // Test that a mask equals itself - let mask = Mask::from_buffer(BooleanBuffer::from_iter([true, false, true, false, true])); + let mask = Mask::from_buffer(BitBuffer::from_iter([true, false, true, false, true])); assert_eq!(mask, mask); } @@ -124,7 +114,7 @@ mod test { // Test that if a == b and b == c then a == c let mask1 = Mask::from_indices(5, vec![1, 3]); let mask2 = Mask::from_slices(5, vec![(1, 2), (3, 4)]); - let mask3 = Mask::from_buffer(BooleanBuffer::from_iter([false, true, false, true, false])); + let mask3 = Mask::from_buffer(BitBuffer::from_iter([false, true, false, true, false])); assert_eq!(mask1, mask2); assert_eq!(mask2, mask3); @@ -136,8 +126,8 @@ mod test { // All empty masks become AllFalse regardless of input type let empty1 = Mask::new_true(0); let empty2 = Mask::new_false(0); - let empty3 = Mask::from_buffer(BooleanBuffer::new_set(0)); - let empty4 = Mask::from_buffer(BooleanBuffer::new_unset(0)); + let empty3 = Mask::from_buffer(BitBuffer::new_set(0)); + let empty4 = Mask::from_buffer(BitBuffer::new_unset(0)); // All should be AllFalse(0) when created from buffer assert!(matches!(empty3, Mask::AllFalse(0))); @@ -153,7 +143,7 @@ mod test { // Test that masks with the same logical values but different internal representations are equal let indices = vec![0, 1, 2, 5, 6, 9]; let slices = vec![(0, 3), (5, 7), (9, 10)]; - let buffer = BooleanBuffer::from_iter([ + let buffer = BitBuffer::from_iter([ true, true, true, false, false, true, true, false, false, true, ]); diff --git a/vortex-mask/src/intersect_by_rank.rs b/vortex-mask/src/intersect_by_rank.rs index 2d6a25e969f..59235d4517a 100644 --- a/vortex-mask/src/intersect_by_rank.rs +++ b/vortex-mask/src/intersect_by_rank.rs @@ -52,17 +52,15 @@ impl Mask { #[cfg(test)] mod test { - use arrow_buffer::BooleanBuffer; use rstest::rstest; + use vortex_buffer::BitBuffer; use crate::Mask; #[test] fn mask_bitand_all_as_bit_and() { - let this = Mask::from_buffer(BooleanBuffer::from_iter(vec![true, true, true, true, true])); - let mask = Mask::from_buffer(BooleanBuffer::from_iter(vec![ - false, true, false, true, true, - ])); + let this = Mask::from_buffer(BitBuffer::from_iter(vec![true, true, true, true, true])); + let mask = Mask::from_buffer(BitBuffer::from_iter(vec![false, true, false, true, true])); assert_eq!( this.intersect_by_rank(&mask), Mask::from_indices(5, vec![1, 3, 4]) @@ -71,10 +69,8 @@ mod test { #[test] fn mask_bitand_all_true() { - let this = Mask::from_buffer(BooleanBuffer::from_iter(vec![ - false, false, true, true, true, - ])); - let mask = Mask::from_buffer(BooleanBuffer::from_iter(vec![true, true, true])); + let this = Mask::from_buffer(BitBuffer::from_iter(vec![false, false, true, true, true])); + let mask = Mask::from_buffer(BitBuffer::from_iter(vec![true, true, true])); assert_eq!( this.intersect_by_rank(&mask), Mask::from_indices(5, vec![2, 3, 4]) @@ -83,10 +79,8 @@ mod test { #[test] fn mask_bitand_true() { - let this = Mask::from_buffer(BooleanBuffer::from_iter(vec![ - true, false, false, true, true, - ])); - let mask = Mask::from_buffer(BooleanBuffer::from_iter(vec![true, false, true])); + let this = Mask::from_buffer(BitBuffer::from_iter(vec![true, false, false, true, true])); + let mask = Mask::from_buffer(BitBuffer::from_iter(vec![true, false, true])); assert_eq!( this.intersect_by_rank(&mask), Mask::from_indices(5, vec![0, 4]) @@ -95,10 +89,8 @@ mod test { #[test] fn mask_bitand_false() { - let this = Mask::from_buffer(BooleanBuffer::from_iter(vec![ - true, false, false, true, true, - ])); - let mask = Mask::from_buffer(BooleanBuffer::from_iter(vec![false, false, false])); + let this = Mask::from_buffer(BitBuffer::from_iter(vec![true, false, false, true, true])); + let mask = Mask::from_buffer(BitBuffer::from_iter(vec![false, false, false])); assert_eq!(this.intersect_by_rank(&mask), Mask::from_indices(5, vec![])); } diff --git a/vortex-mask/src/iter_bools.rs b/vortex-mask/src/iter_bools.rs index 903f5269dc6..f470ad9c879 100644 --- a/vortex-mask/src/iter_bools.rs +++ b/vortex-mask/src/iter_bools.rs @@ -18,7 +18,7 @@ impl Mask { where F: FnMut(&mut dyn Iterator) -> T, { - match self.boolean_buffer() { + match self.bit_buffer() { AllOr::All => f(&mut iter::repeat_n(true, self.len())), AllOr::None => f(&mut iter::repeat_n(false, self.len())), AllOr::Some(buffer) => f(&mut buffer.iter()), diff --git a/vortex-mask/src/lib.rs b/vortex-mask/src/lib.rs index 24a3f6168b3..ef8e04522f3 100644 --- a/vortex-mask/src/lib.rs +++ b/vortex-mask/src/lib.rs @@ -16,8 +16,8 @@ use std::fmt::{Debug, Formatter}; use std::ops::Range; use std::sync::{Arc, OnceLock}; -use arrow_buffer::{BooleanBuffer, BooleanBufferBuilder, NullBuffer}; use itertools::Itertools; +use vortex_buffer::{BitBuffer, BitBufferMut}; use vortex_error::{VortexResult, vortex_panic}; /// Represents a set of values that are all included, all excluded, or some mixture of both. @@ -100,14 +100,14 @@ pub enum Mask { AllTrue(usize), /// No values are included. AllFalse(usize), - /// Some values are included, represented as a [`BooleanBuffer`]. + /// Some values are included, represented as a [`BitBuffer`]. Values(Arc), } /// Represents the values of a [`Mask`] that contains some true and some false elements. #[derive(Debug)] pub struct MaskValues { - buffer: BooleanBuffer, + buffer: BitBuffer, // We cached the indices and slices representations, since it can be faster than iterating // the bit-mask over and over again. @@ -141,7 +141,7 @@ impl MaskValues { /// Returns the boolean buffer representation of the mask. #[inline] - pub fn boolean_buffer(&self) -> &BooleanBuffer { + pub fn bit_buffer(&self) -> &BitBuffer { &self.buffer } @@ -215,10 +215,10 @@ impl Mask { Self::AllFalse(length) } - /// Create a new [`Mask`] from a [`BooleanBuffer`]. - pub fn from_buffer(buffer: BooleanBuffer) -> Self { + /// Create a new [`Mask`] from a [`BitBuffer`]. + pub fn from_buffer(buffer: BitBuffer) -> Self { let len = buffer.len(); - let true_count = buffer.count_set_bits(); + let true_count = buffer.true_count(); if true_count == 0 { return Self::AllFalse(len); @@ -253,14 +253,13 @@ impl Mask { return Self::AllTrue(len); } - let mut buf = BooleanBufferBuilder::new(len); + let mut buf = BitBufferMut::new_unset(len); // TODO(ngates): for dense indices, we can do better by collecting into u64s. - buf.append_n(len, false); - indices.iter().for_each(|idx| buf.set_bit(*idx, true)); + indices.iter().for_each(|&idx| buf.set(idx)); debug_assert_eq!(buf.len(), len); Self::Values(Arc::new(MaskValues { - buffer: buf.finish(), + buffer: buf.freeze(), indices: OnceLock::from(indices), slices: Default::default(), true_count, @@ -270,12 +269,11 @@ impl Mask { /// Create a new [`Mask`] from an [`IntoIterator`] of indices to be excluded. pub fn from_excluded_indices(len: usize, indices: impl IntoIterator) -> Self { - let mut buf = BooleanBufferBuilder::new(len); - buf.append_n(len, true); + let mut buf = BitBufferMut::new_set(len); let mut false_count: usize = 0; indices.into_iter().for_each(|idx| { - buf.set_bit(idx, false); + buf.unset(idx); false_count += 1; }); debug_assert_eq!(buf.len(), len); @@ -290,7 +288,7 @@ impl Mask { } Self::Values(Arc::new(MaskValues { - buffer: buf.finish(), + buffer: buf.freeze(), indices: Default::default(), slices: Default::default(), true_count, @@ -317,18 +315,14 @@ impl Mask { return Self::AllTrue(len); } - let mut buf = BooleanBufferBuilder::new(len); + let mut buf = BitBufferMut::new_unset(len); for (start, end) in slices.iter().copied() { - buf.append_n(start - buf.len(), false); - buf.append_n(end - start, true); - } - if let Some((_, end)) = slices.last() { - buf.append_n(len - end, false); + (start..end).for_each(|idx| buf.set(idx)); } debug_assert_eq!(buf.len(), len); Self::Values(Arc::new(MaskValues { - buffer: buf.finish(), + buffer: buf.freeze(), indices: Default::default(), slices: OnceLock::from(slices), true_count, @@ -483,15 +477,13 @@ impl Mask { match &self { Self::AllTrue(_) => Self::new_true(range.len()), Self::AllFalse(_) => Self::new_false(range.len()), - Self::Values(values) => { - Self::from_buffer(values.buffer.slice(range.start, range.len())) - } + Self::Values(values) => Self::from_buffer(values.buffer.slice(range)), } } /// Return the boolean buffer representation of the mask. #[inline] - pub fn boolean_buffer(&self) -> AllOr<&BooleanBuffer> { + pub fn bit_buffer(&self) -> AllOr<&BitBuffer> { match &self { Self::AllTrue(_) => AllOr::All, Self::AllFalse(_) => AllOr::None, @@ -502,21 +494,11 @@ impl Mask { /// Return a boolean buffer representation of the mask, allocating new buffers for all-true /// and all-false variants. #[inline] - pub fn to_boolean_buffer(&self) -> BooleanBuffer { - match self { - Self::AllTrue(l) => BooleanBuffer::new_set(*l), - Self::AllFalse(l) => BooleanBuffer::new_unset(*l), - Self::Values(values) => values.boolean_buffer().clone(), - } - } - - /// Returns an Arrow null buffer representation of the mask. - #[inline] - pub fn to_null_buffer(&self) -> Option { + pub fn to_bit_buffer(&self) -> BitBuffer { match self { - Mask::AllTrue(_) => None, - Mask::AllFalse(l) => Some(NullBuffer::new_null(*l)), - Mask::Values(values) => Some(NullBuffer::from(values.buffer.clone())), + Self::AllTrue(l) => BitBuffer::new_set(*l), + Self::AllFalse(l) => BitBuffer::new_unset(*l), + Self::Values(values) => values.bit_buffer().clone(), } } @@ -569,7 +551,7 @@ impl Mask { Self::AllTrue(_) => indices.to_vec(), Self::AllFalse(_) => vec![0; indices.len()], Self::Values(values) => { - let mut bool_iter = values.boolean_buffer().iter(); + let mut bool_iter = values.bit_buffer().iter(); let mut valid_counts = Vec::with_capacity(indices.len()); let mut valid_count = 0; let mut idx = 0; @@ -608,16 +590,17 @@ impl Mask { return self; } - let existing_buffer = mask_values.boolean_buffer(); + let existing_buffer = mask_values.bit_buffer(); - let mut new_buffer_builder = BooleanBufferBuilder::new(mask_values.len()); - new_buffer_builder.append_n(mask_values.len(), false); + let mut new_buffer_builder = BitBufferMut::new_unset(mask_values.len()); for index in existing_buffer.set_indices().take(limit) { - new_buffer_builder.set_bit(index, true); + unsafe { + new_buffer_builder.set_unchecked(index); + } } - Self::from(new_buffer_builder.finish()) + Self::from(new_buffer_builder.freeze()) } } } @@ -635,17 +618,17 @@ impl Mask { return Ok(Mask::AllFalse(len)); } - let mut builder = BooleanBufferBuilder::new(len); + let mut builder = BitBufferMut::with_capacity(len); for mask in masks { match mask { - Mask::AllTrue(n) => builder.append_n(*n, true), - Mask::AllFalse(n) => builder.append_n(*n, false), - Mask::Values(v) => builder.append_buffer(v.boolean_buffer()), + Mask::AllTrue(n) => builder.append_n(true, *n), + Mask::AllFalse(n) => builder.append_n(false, *n), + Mask::Values(v) => builder.append_buffer(v.bit_buffer()), } } - Ok(Mask::from_buffer(builder.finish())) + Ok(Mask::from_buffer(builder.freeze())) } } @@ -657,9 +640,8 @@ pub enum MaskIter<'a> { Slices(&'a [(usize, usize)]), } -impl From for Mask { - #[inline] - fn from(value: BooleanBuffer) -> Self { +impl From for Mask { + fn from(value: BitBuffer) -> Self { Self::from_buffer(value) } } @@ -667,7 +649,7 @@ impl From for Mask { impl FromIterator for Mask { #[inline] fn from_iter>(iter: T) -> Self { - Self::from_buffer(BooleanBuffer::from_iter(iter)) + Self::from_buffer(BitBuffer::from_iter(iter)) } } @@ -689,16 +671,16 @@ impl FromIterator for Mask { } // Else, construct the boolean buffer - let mut buffer = BooleanBufferBuilder::new(total_length); + let mut buffer = BitBufferMut::with_capacity(total_length); for mask in masks { match mask { - Mask::AllTrue(count) => buffer.append_n(count, true), - Mask::AllFalse(count) => buffer.append_n(count, false), + Mask::AllTrue(count) => buffer.append_n(true, count), + Mask::AllFalse(count) => buffer.append_n(false, count), Mask::Values(values) => { - buffer.append_buffer(values.boolean_buffer()); + buffer.append_buffer(values.bit_buffer()); } }; } - Self::from_buffer(buffer.finish()) + Self::from_buffer(buffer.freeze()) } } diff --git a/vortex-mask/src/tests.rs b/vortex-mask/src/tests.rs index e8fef7e0fcc..989f5a0087d 100644 --- a/vortex-mask/src/tests.rs +++ b/vortex-mask/src/tests.rs @@ -4,8 +4,8 @@ #![allow(clippy::panic)] #![allow(clippy::many_single_char_names)] -use arrow_buffer::BooleanBuffer; use rstest::rstest; +use vortex_buffer::BitBuffer; use crate::{AllOr, Mask, MaskIter}; @@ -18,7 +18,7 @@ fn mask_all_true() { assert_eq!(mask.density(), 1.0); assert_eq!(mask.indices(), AllOr::All); assert_eq!(mask.slices(), AllOr::All); - assert_eq!(mask.boolean_buffer(), AllOr::All,); + assert_eq!(mask.bit_buffer(), AllOr::All,); } #[test] @@ -29,7 +29,7 @@ fn mask_all_false() { assert_eq!(mask.density(), 0.0); assert_eq!(mask.indices(), AllOr::None); assert_eq!(mask.slices(), AllOr::None); - assert_eq!(mask.boolean_buffer(), AllOr::None,); + assert_eq!(mask.bit_buffer(), AllOr::None,); } #[test] @@ -37,7 +37,7 @@ fn mask_from() { let masks = [ Mask::from_indices(5, vec![0, 2, 3]), Mask::from_slices(5, vec![(0, 1), (2, 4)]), - Mask::from_buffer(BooleanBuffer::from_iter([true, false, true, true, false])), + Mask::from_buffer(BitBuffer::from_iter([true, false, true, true, false])), ]; for mask in &masks { @@ -47,8 +47,8 @@ fn mask_from() { assert_eq!(mask.indices(), AllOr::Some(&[0, 2, 3][..])); assert_eq!(mask.slices(), AllOr::Some(&[(0, 1), (2, 4)][..])); assert_eq!( - mask.boolean_buffer(), - AllOr::Some(&BooleanBuffer::from_iter([true, false, true, true, false])) + mask.bit_buffer(), + AllOr::Some(&BitBuffer::from_iter([true, false, true, true, false])) ); } } @@ -57,8 +57,8 @@ fn mask_from() { fn length_zero_masks() { let all_false = Mask::new_false(0); let all_true = Mask::new_true(0); - let buffer_set = Mask::from_buffer(BooleanBuffer::new_set(0)); - let buffer_unset = Mask::from_buffer(BooleanBuffer::new_unset(0)); + let buffer_set = Mask::from_buffer(BitBuffer::new_set(0)); + let buffer_unset = Mask::from_buffer(BitBuffer::new_unset(0)); assert!(all_false.all_false()); assert!(all_false.all_true()); @@ -81,7 +81,7 @@ fn test_mask_value() { assert!(!all_false.value(0)); assert!(!all_false.value(4)); - let values = Mask::from_buffer(BooleanBuffer::from_iter([true, false, true, false, true])); + let values = Mask::from_buffer(BitBuffer::from_iter([true, false, true, false, true])); assert!(values.value(0)); assert!(!values.value(1)); assert!(values.value(2)); @@ -95,7 +95,7 @@ fn test_mask_first() { assert_eq!(Mask::new_false(5).first(), None); assert_eq!(Mask::new_true(0).first(), None); - let values = Mask::from_buffer(BooleanBuffer::from_iter([false, false, true, false, true])); + let values = Mask::from_buffer(BitBuffer::from_iter([false, false, true, false, true])); assert_eq!(values.first(), Some(2)); let values_indices = Mask::from_indices(5, vec![2, 4]); @@ -110,14 +110,14 @@ fn test_mask_false_count() { assert_eq!(Mask::new_true(5).false_count(), 0); assert_eq!(Mask::new_false(5).false_count(), 5); - let values = Mask::from_buffer(BooleanBuffer::from_iter([true, false, true, false, true])); + let values = Mask::from_buffer(BitBuffer::from_iter([true, false, true, false, true])); assert_eq!(values.false_count(), 2); } // Slice operations #[test] fn test_mask_slice() { - let mask = Mask::from_buffer(BooleanBuffer::from_iter([true, false, true, true, false])); + let mask = Mask::from_buffer(BitBuffer::from_iter([true, false, true, true, false])); let sliced = mask.slice(1..4); assert_eq!(sliced.len(), 3); @@ -152,8 +152,8 @@ fn limit_all_true_mask() { assert_eq!(all_true.len(), limited_mask.len()); assert_eq!(limited_mask.true_count(), 2); assert_eq!( - limited_mask.boolean_buffer(), - AllOr::Some(&BooleanBuffer::from_iter([true, true, false, false])) + limited_mask.bit_buffer(), + AllOr::Some(&BitBuffer::from_iter([true, true, false, false])) ); let limited_mask = all_true.clone().limit(5); @@ -166,8 +166,8 @@ fn limit_mask_values() { let limited_mask = original_mask.clone().limit(2); assert_eq!( - limited_mask.boolean_buffer(), - AllOr::Some(&BooleanBuffer::from_iter([ + limited_mask.bit_buffer(), + AllOr::Some(&BitBuffer::from_iter([ true, true, false, false, false, false ])) ); @@ -176,8 +176,8 @@ fn limit_mask_values() { let limited_mask = original_mask.limit(3); assert_eq!( - limited_mask.boolean_buffer(), - AllOr::Some(&BooleanBuffer::from_iter([ + limited_mask.bit_buffer(), + AllOr::Some(&BitBuffer::from_iter([ true, true, false, true, false, false ])) ); @@ -199,7 +199,7 @@ fn test_limit_all_false_mask() { #[test] fn test_limit_mask_exact() { - let mask = Mask::from_buffer(BooleanBuffer::from_iter([true, false, true, false, true])); + let mask = Mask::from_buffer(BitBuffer::from_iter([true, false, true, false, true])); let limited = mask.clone().limit(3); assert_eq!(limited.true_count(), 3); assert_eq!(limited, mask); @@ -207,7 +207,7 @@ fn test_limit_mask_exact() { #[test] fn test_limit_mask_zero() { - let mask = Mask::from_buffer(BooleanBuffer::from_iter([true, false, true, false, true])); + let mask = Mask::from_buffer(BitBuffer::from_iter([true, false, true, false, true])); let limited = mask.limit(0); assert!(limited.all_false()); assert_eq!(limited.true_count(), 0); @@ -215,37 +215,23 @@ fn test_limit_mask_zero() { // Buffer conversion tests #[test] -fn test_mask_to_boolean_buffer() { +fn test_mask_to_bit_buffer() { let all_true = Mask::new_true(5); - let buffer = all_true.to_boolean_buffer(); - assert_eq!(buffer.count_set_bits(), 5); + let buffer = all_true.to_bit_buffer(); + assert_eq!(buffer.true_count(), 5); assert_eq!(buffer.len(), 5); let all_false = Mask::new_false(5); - let buffer = all_false.to_boolean_buffer(); - assert_eq!(buffer.count_set_bits(), 0); + let buffer = all_false.to_bit_buffer(); + assert_eq!(buffer.true_count(), 0); assert_eq!(buffer.len(), 5); - let values = Mask::from_buffer(BooleanBuffer::from_iter([true, false, true, false, true])); - let buffer = values.to_boolean_buffer(); - assert_eq!(buffer.count_set_bits(), 3); + let values = Mask::from_buffer(BitBuffer::from_iter([true, false, true, false, true])); + let buffer = values.to_bit_buffer(); + assert_eq!(buffer.true_count(), 3); assert_eq!(buffer.len(), 5); } -#[test] -fn test_mask_to_null_buffer() { - let all_true = Mask::new_true(5); - assert!(all_true.to_null_buffer().is_none()); - - let all_false = Mask::new_false(5); - let null_buffer = all_false.to_null_buffer().unwrap(); - assert_eq!(null_buffer.null_count(), 5); - - let values = Mask::from_buffer(BooleanBuffer::from_iter([true, false, true, false, true])); - let null_buffer = values.to_null_buffer().unwrap(); - assert_eq!(null_buffer.null_count(), 2); -} - // MaskValues tests #[test] fn test_mask_values() { @@ -255,7 +241,7 @@ fn test_mask_values() { let all_false = Mask::new_false(5); assert!(all_false.values().is_none()); - let mask = Mask::from_buffer(BooleanBuffer::from_iter([true, false, true, false, true])); + let mask = Mask::from_buffer(BitBuffer::from_iter([true, false, true, false, true])); let values = mask.values().unwrap(); assert_eq!(values.len(), 5); assert_eq!(values.true_count(), 3); @@ -265,7 +251,7 @@ fn test_mask_values() { #[test] fn test_mask_values_threshold_iter() { - let mask = Mask::from_buffer(BooleanBuffer::from_iter([true, false, true, true, false])); + let mask = Mask::from_buffer(BitBuffer::from_iter([true, false, true, true, false])); let values = mask.values().unwrap(); // With low threshold, should prefer indices @@ -287,12 +273,12 @@ fn test_mask_values_threshold_iter() { #[test] fn test_mask_values_is_empty() { - let empty_mask = Mask::from_buffer(BooleanBuffer::new_unset(0)); + let empty_mask = Mask::from_buffer(BitBuffer::new_unset(0)); if let Some(values) = empty_mask.values() { assert!(values.is_empty()); } - let non_empty_mask = Mask::from_buffer(BooleanBuffer::from_iter([true, false])); + let non_empty_mask = Mask::from_buffer(BitBuffer::from_iter([true, false])); if let Some(values) = non_empty_mask.values() { assert!(!values.is_empty()); } @@ -369,9 +355,7 @@ fn test_mask_from_intersection_indices_same() { // Valid counts tests #[test] fn test_mask_valid_counts_for_indices() { - let mask = Mask::from_buffer(BooleanBuffer::from_iter([ - true, false, true, true, false, true, - ])); + let mask = Mask::from_buffer(BitBuffer::from_iter([true, false, true, true, false, true])); let indices = vec![0, 2, 4, 6]; let counts = mask.valid_counts_for_indices(&indices); assert_eq!(counts, vec![0, 1, 3, 4]); @@ -388,7 +372,7 @@ fn test_mask_valid_counts_for_indices() { #[test] #[should_panic] fn test_mask_valid_counts_for_indices_error() { - let mask = Mask::from_buffer(BooleanBuffer::from_iter([true, false, true])); + let mask = Mask::from_buffer(BitBuffer::from_iter([true, false, true])); let indices = vec![0, 2, 5]; // 5 is out of bounds let _ = mask.valid_counts_for_indices(&indices); } @@ -397,9 +381,9 @@ fn test_mask_valid_counts_for_indices_error() { #[test] fn test_mask_from_iter_masks() { let masks = vec![ - Mask::from_buffer(BooleanBuffer::from_iter([true, false])), - Mask::from_buffer(BooleanBuffer::from_iter([true, true, false])), - Mask::from_buffer(BooleanBuffer::from_iter([false, true])), + Mask::from_buffer(BitBuffer::from_iter([true, false])), + Mask::from_buffer(BitBuffer::from_iter([true, true, false])), + Mask::from_buffer(BitBuffer::from_iter([false, true])), ]; let combined = Mask::from_iter(masks); @@ -499,7 +483,7 @@ fn test_mask_threshold_iter() { let all_false = Mask::new_false(5); assert!(matches!(all_false.threshold_iter(0.5), AllOr::None)); - let mask = Mask::from_buffer(BooleanBuffer::from_iter([true, false, true, true, false])); + let mask = Mask::from_buffer(BitBuffer::from_iter([true, false, true, true, false])); if let AllOr::Some(MaskIter::Indices(indices)) = mask.threshold_iter(0.7) { assert_eq!(indices, &[0, 2, 3]); } else { @@ -606,7 +590,7 @@ fn test_allor_eq() { #[case::all_true(Mask::new_true(5), 5, 5, 0, 1.0)] #[case::all_false(Mask::new_false(5), 5, 0, 5, 0.0)] #[case::mixed( - Mask::from_buffer(BooleanBuffer::from_iter([true, false, true, false, true])), + Mask::from_buffer(BitBuffer::from_iter([true, false, true, false, true])), 5, 3, 2, 0.6 )] #[case::single_true(Mask::from_indices(10, vec![5]), 10, 1, 9, 0.1)] @@ -699,7 +683,7 @@ fn test_mask_concat_all_false() { #[test] fn test_mask_concat_mixed_types() { let masks = [ - Mask::from_buffer(BooleanBuffer::from_iter([true, false, true])), + Mask::from_buffer(BitBuffer::from_iter([true, false, true])), Mask::new_true(2), Mask::new_false(3), ];