From 4267621f3c291a5e1879b1ea9d16e7c648fe291c Mon Sep 17 00:00:00 2001 From: Daniel King Date: Mon, 16 Dec 2024 18:27:12 -0500 Subject: [PATCH 01/20] feat: avoid unnecessary buffer copies --- encodings/runend-bool/src/compute/mod.rs | 4 ++-- encodings/runend/src/compute/take.rs | 4 ++-- vortex-array/src/array/bool/patch.rs | 5 +++-- vortex-array/src/array/primitive/compute/take.rs | 7 ++++++- vortex-array/src/patches.rs | 13 +++++++------ 5 files changed, 20 insertions(+), 13 deletions(-) diff --git a/encodings/runend-bool/src/compute/mod.rs b/encodings/runend-bool/src/compute/mod.rs index 1f82f81d1ef..b21d9d73115 100644 --- a/encodings/runend-bool/src/compute/mod.rs +++ b/encodings/runend-bool/src/compute/mod.rs @@ -42,9 +42,9 @@ impl TakeFn for RunEndBoolEncoding { let primitive_indices = indices.clone().into_primitive()?; let physical_indices = match_each_integer_ptype!(primitive_indices.ptype(), |$P| { primitive_indices - .into_maybe_null_slice::<$P>() + .maybe_null_slice::<$P>() .into_iter() - .map(|idx| idx as usize) + .map(|idx| *idx as usize) .map(|idx| { if idx >= array.len() { vortex_bail!(OutOfBounds: idx, 0, array.len()) diff --git a/encodings/runend/src/compute/take.rs b/encodings/runend/src/compute/take.rs index 57d54acfcef..0a2e78fd683 100644 --- a/encodings/runend/src/compute/take.rs +++ b/encodings/runend/src/compute/take.rs @@ -13,10 +13,10 @@ impl TakeFn for RunEndEncoding { let primitive_indices = indices.clone().into_primitive()?; let usize_indices = match_each_integer_ptype!(primitive_indices.ptype(), |$P| { primitive_indices - .into_maybe_null_slice::<$P>() + .maybe_null_slice::<$P>() .into_iter() .map(|idx| { - let usize_idx = idx as usize; + let usize_idx = *idx as usize; if usize_idx >= array.len() { vortex_error::vortex_bail!(OutOfBounds: usize_idx, 0, array.len()); } diff --git a/vortex-array/src/array/bool/patch.rs b/vortex-array/src/array/bool/patch.rs index 8c2e1a4dfbc..3140b06a005 100644 --- a/vortex-array/src/array/bool/patch.rs +++ b/vortex-array/src/array/bool/patch.rs @@ -10,8 +10,9 @@ use crate::{ArrayLen, IntoArrayVariant, ToArrayData}; impl BoolArray { pub fn patch(self, patches: Patches) -> VortexResult { let length = self.len(); - let indices = patches.indices().clone().into_primitive()?; - let values = patches.values().clone().into_bool()?; + let (_, indices, values) = patches.into_parts(); + let indices = indices.into_primitive()?; + let values = values.into_bool()?; let patched_validity = self.validity() diff --git a/vortex-array/src/array/primitive/compute/take.rs b/vortex-array/src/array/primitive/compute/take.rs index 4c2df9a37f9..dbfc2ee3db3 100644 --- a/vortex-array/src/array/primitive/compute/take.rs +++ b/vortex-array/src/array/primitive/compute/take.rs @@ -45,7 +45,11 @@ fn take_primitive>( array: &[T], indices: &[I], ) -> Vec { - indices.iter().map(|idx| array[idx.as_()]).collect() + indices + .iter() + .cloned() + .map(|idx| array[idx.as_()]) + .collect() } // We pass a Vec in case we're T == u64. @@ -56,6 +60,7 @@ unsafe fn take_primitive_unchecked Vec { indices .iter() + .cloned() .map(|idx| unsafe { *array.get_unchecked(idx.as_()) }) .collect() } diff --git a/vortex-array/src/patches.rs b/vortex-array/src/patches.rs index e67e6350855..53009254720 100644 --- a/vortex-array/src/patches.rs +++ b/vortex-array/src/patches.rs @@ -202,12 +202,13 @@ impl Patches { let flat_indices = self.indices().clone().into_primitive()?; match_each_integer_ptype!(flat_indices.ptype(), |$I| { - for (value_idx, coordinate) in flat_indices.into_maybe_null_slice::<$I>().into_iter().enumerate() { - if buffer.value(coordinate as usize) { + for (value_idx, coordinate) in flat_indices.maybe_null_slice::<$I>().into_iter().enumerate() { + let coordinate = *coordinate as usize; + if buffer.value(coordinate) { // We count the number of truthy values between this coordinate and the previous truthy one - let adjusted_coordinate = buffer.slice(last_inserted_index, (coordinate as usize) - last_inserted_index).count_set_bits() as u64; + let adjusted_coordinate = buffer.slice(last_inserted_index, (coordinate) - last_inserted_index).count_set_bits() as u64; coordinate_indices.push(adjusted_coordinate + coordinate_indices.last().copied().unwrap_or_default()); - last_inserted_index = coordinate as usize; + last_inserted_index = coordinate; value_indices.push(value_idx as u64); } } @@ -267,9 +268,9 @@ impl Patches { let new_length = take_indices.len(); let take_indices = match_each_integer_ptype!(take_indices.ptype(), |$P| { take_indices - .into_maybe_null_slice::<$P>() + .maybe_null_slice::<$P>() .into_iter() - .map(usize::try_from) + .map(|x| usize::try_from(*x)) .collect::, _>>()? }); From a255b7ea775863426c485ddfd5369a8b6a5b6b09 Mon Sep 17 00:00:00 2001 From: Daniel King Date: Tue, 17 Dec 2024 11:46:24 -0500 Subject: [PATCH 02/20] wip: try to be polymorphic on into_maybe_null_slice --- .../src/array/primitive/compute/take.rs | 10 +---- vortex-array/src/lib.rs | 1 + vortex-array/src/take.rs | 40 +++++++++++++++++++ 3 files changed, 43 insertions(+), 8 deletions(-) create mode 100644 vortex-array/src/take.rs diff --git a/vortex-array/src/array/primitive/compute/take.rs b/vortex-array/src/array/primitive/compute/take.rs index dbfc2ee3db3..84cfef6d031 100644 --- a/vortex-array/src/array/primitive/compute/take.rs +++ b/vortex-array/src/array/primitive/compute/take.rs @@ -5,6 +5,7 @@ use vortex_error::VortexResult; use crate::array::primitive::PrimitiveArray; use crate::array::PrimitiveEncoding; use crate::compute::TakeFn; +use crate::take::take_ref_unchecked; use crate::variants::PrimitiveArrayTrait; use crate::{ArrayData, IntoArrayData, IntoArrayVariant}; @@ -28,14 +29,7 @@ impl TakeFn for PrimitiveEncoding { indices: &ArrayData, ) -> VortexResult { let indices = indices.clone().into_primitive()?; - let validity = unsafe { array.validity().take_unchecked(indices.as_ref())? }; - - match_each_native_ptype!(array.ptype(), |$T| { - match_each_integer_ptype!(indices.ptype(), |$I| { - let values = take_primitive_unchecked(array.maybe_null_slice::<$T>(), indices.maybe_null_slice::<$I>()); - Ok(PrimitiveArray::from_vec(values, validity).into_array()) - }) - }) + unsafe { take_ref_unchecked(array, &indices) }.map(IntoArrayData::into_array) } } diff --git a/vortex-array/src/lib.rs b/vortex-array/src/lib.rs index 783ef075536..27f7bb270c1 100644 --- a/vortex-array/src/lib.rs +++ b/vortex-array/src/lib.rs @@ -43,6 +43,7 @@ pub mod nbytes; pub mod patches; pub mod stats; pub mod stream; +mod take; pub mod tree; pub mod validity; pub mod variants; diff --git a/vortex-array/src/take.rs b/vortex-array/src/take.rs new file mode 100644 index 00000000000..dec6217cdbf --- /dev/null +++ b/vortex-array/src/take.rs @@ -0,0 +1,40 @@ +use vortex_dtype::{match_each_integer_ptype, match_each_native_ptype}; +use vortex_error::VortexResult; + +use crate::array::PrimitiveArray; +use crate::variants::PrimitiveArrayTrait as _; + +pub fn take(values: &PrimitiveArray, indices: &PrimitiveArray) -> VortexResult { + let new_validity = values.validity().take(indices.as_ref())?; + match_each_native_ptype!(values.ptype(), |$V| { + let values = values.maybe_null_slice::<$V>(); + let new_values = match_each_integer_ptype!(indices.ptype(), |$I| { + indices + .maybe_null_slice::<$I>() + .iter() + .cloned() + .map(|idx| values[idx as usize]) + .collect() + }); + Ok(PrimitiveArray::from_vec(new_values, new_validity)) + }) +} + +pub unsafe fn take_unchecked( + values: &PrimitiveArray, + indices: &PrimitiveArray, +) -> VortexResult { + let new_validity = unsafe { values.validity().take_unchecked(indices.as_ref())? }; + match_each_native_ptype!(values.ptype(), |$V| { + let values = values.maybe_null_slice::<$V>(); + let new_values = match_each_integer_ptype!(indices.ptype(), |$I| { + indices + .maybe_null_slice::<$I>() + .iter() + .cloned() + .map(|idx| unsafe { *values.get_unchecked(idx as usize) }) + .collect() + }); + Ok(PrimitiveArray::from_vec(new_values, new_validity)) + }) +} From 760d2a930647c767c723d49bddd62c8d32c128d5 Mon Sep 17 00:00:00 2001 From: Daniel King Date: Tue, 17 Dec 2024 16:19:41 -0500 Subject: [PATCH 03/20] try_into_maybe_null_vec and into_maybe_null_vec --- encodings/alp/src/alp/compress.rs | 2 +- encodings/alp/src/alp_rd/mod.rs | 6 ++- encodings/datetime-parts/src/compute/mod.rs | 2 +- encodings/fastlanes/src/for/compress.rs | 2 +- encodings/zigzag/src/compress.rs | 16 ++++---- fuzz/src/slice.rs | 2 +- vortex-array/src/array/bool/patch.rs | 6 +-- vortex-array/src/array/chunked/compute/mod.rs | 2 +- .../src/array/primitive/compute/take.rs | 10 ++++- vortex-array/src/array/primitive/mod.rs | 38 ++++++++++++++---- vortex-array/src/array/primitive/patch.rs | 10 ++--- vortex-array/src/compute/filter.rs | 2 +- vortex-array/src/lib.rs | 1 - vortex-array/src/take.rs | 40 ------------------- 14 files changed, 65 insertions(+), 74 deletions(-) delete mode 100644 vortex-array/src/take.rs diff --git a/encodings/alp/src/alp/compress.rs b/encodings/alp/src/alp/compress.rs index 521d2cf01f2..1e4e0db6cb0 100644 --- a/encodings/alp/src/alp/compress.rs +++ b/encodings/alp/src/alp/compress.rs @@ -69,7 +69,7 @@ pub fn decompress(array: ALPArray) -> VortexResult { let ptype = array.dtype().try_into()?; let decoded = match_each_alp_float_ptype!(ptype, |$T| { PrimitiveArray::from_vec( - <$T>::decode_vec(encoded.into_maybe_null_slice(), array.exponents()), + <$T>::decode_vec(encoded.into_maybe_null_vec(), array.exponents()), validity, ) }); diff --git a/encodings/alp/src/alp_rd/mod.rs b/encodings/alp/src/alp_rd/mod.rs index f7f8c59072f..7147fd89196 100644 --- a/encodings/alp/src/alp_rd/mod.rs +++ b/encodings/alp/src/alp_rd/mod.rs @@ -17,7 +17,7 @@ use vortex_array::aliases::hash_map::HashMap; use vortex_array::array::PrimitiveArray; use vortex_array::{ArrayDType, IntoArrayData}; use vortex_dtype::{DType, NativePType}; -use vortex_error::{vortex_bail, VortexExpect, VortexResult, VortexUnwrap}; +use vortex_error::{vortex_bail, vortex_err, VortexExpect, VortexResult, VortexUnwrap}; use vortex_fastlanes::bitpack_encode_unchecked; use crate::match_each_alp_float_ptype; @@ -281,7 +281,9 @@ pub fn alp_rd_decode( let patched_left_parts = match left_parts_patches { Some(patches) => PrimitiveArray::from(left_parts_decoded) .patch(patches)? - .into_maybe_null_slice::(), + .try_into_maybe_null_vec::() + .map_err(|_| vortex_err!("could not zero copy patched left_parts_decoded")) + .vortex_expect("there are no aliases to this primitive array"), None => left_parts_decoded, }; diff --git a/encodings/datetime-parts/src/compute/mod.rs b/encodings/datetime-parts/src/compute/mod.rs index d2d374d6d3d..5af7d2d814e 100644 --- a/encodings/datetime-parts/src/compute/mod.rs +++ b/encodings/datetime-parts/src/compute/mod.rs @@ -118,7 +118,7 @@ pub fn decode_to_temporal(array: &DateTimePartsArray) -> VortexResult = days_buf - .into_maybe_null_slice::() + .into_maybe_null_vec::() .into_iter() .map(|d| d * 86_400 * divisor) .collect(); diff --git a/encodings/fastlanes/src/for/compress.rs b/encodings/fastlanes/src/for/compress.rs index e717ade2c62..cd0ee76a19d 100644 --- a/encodings/fastlanes/src/for/compress.rs +++ b/encodings/fastlanes/src/for/compress.rs @@ -120,7 +120,7 @@ pub fn decompress(array: FoRArray) -> VortexResult { encoded } else { PrimitiveArray::from_vec( - decompress_primitive(encoded.into_maybe_null_slice::<$T>(), min, shift), + decompress_primitive(encoded.into_maybe_null_vec::<$T>(), min, shift), validity, ) } diff --git a/encodings/zigzag/src/compress.rs b/encodings/zigzag/src/compress.rs index 301874335a0..4d418061e6b 100644 --- a/encodings/zigzag/src/compress.rs +++ b/encodings/zigzag/src/compress.rs @@ -11,10 +11,10 @@ use crate::ZigZagArray; pub fn zigzag_encode(parray: PrimitiveArray) -> VortexResult { let validity = parray.validity(); let encoded = match parray.ptype() { - PType::I8 => zigzag_encode_primitive::(parray.into_maybe_null_slice(), validity), - PType::I16 => zigzag_encode_primitive::(parray.into_maybe_null_slice(), validity), - PType::I32 => zigzag_encode_primitive::(parray.into_maybe_null_slice(), validity), - PType::I64 => zigzag_encode_primitive::(parray.into_maybe_null_slice(), validity), + PType::I8 => zigzag_encode_primitive::(parray.into_maybe_null_vec(), validity), + PType::I16 => zigzag_encode_primitive::(parray.into_maybe_null_vec(), validity), + PType::I32 => zigzag_encode_primitive::(parray.into_maybe_null_vec(), validity), + PType::I64 => zigzag_encode_primitive::(parray.into_maybe_null_vec(), validity), _ => vortex_bail!( "ZigZag can only encode signed integers, got {}", parray.ptype() @@ -36,10 +36,10 @@ where pub fn zigzag_decode(parray: PrimitiveArray) -> VortexResult { let validity = parray.validity(); let decoded = match parray.ptype() { - PType::U8 => zigzag_decode_primitive::(parray.into_maybe_null_slice(), validity), - PType::U16 => zigzag_decode_primitive::(parray.into_maybe_null_slice(), validity), - PType::U32 => zigzag_decode_primitive::(parray.into_maybe_null_slice(), validity), - PType::U64 => zigzag_decode_primitive::(parray.into_maybe_null_slice(), validity), + PType::U8 => zigzag_decode_primitive::(parray.into_maybe_null_vec(), validity), + PType::U16 => zigzag_decode_primitive::(parray.into_maybe_null_vec(), validity), + PType::U32 => zigzag_decode_primitive::(parray.into_maybe_null_vec(), validity), + PType::U64 => zigzag_decode_primitive::(parray.into_maybe_null_vec(), validity), _ => vortex_bail!( "ZigZag can only decode unsigned integers, got {}", parray.ptype() diff --git a/fuzz/src/slice.rs b/fuzz/src/slice.rs index 00b82029f27..64684cb127c 100644 --- a/fuzz/src/slice.rs +++ b/fuzz/src/slice.rs @@ -30,7 +30,7 @@ pub fn slice_canonical_array(array: &ArrayData, start: usize, stop: usize) -> Ar } DType::Primitive(p, _) => match_each_native_ptype!(p, |$P| { let primitive_array = array.clone().into_primitive().unwrap(); - let vec_values = primitive_array.into_maybe_null_slice::<$P>(); + let vec_values = primitive_array.maybe_null_slice::<$P>(); PrimitiveArray::from_vec(vec_values[start..stop].into(), validity).into_array() }), DType::Utf8(_) | DType::Binary(_) => { diff --git a/vortex-array/src/array/bool/patch.rs b/vortex-array/src/array/bool/patch.rs index 3140b06a005..ebff7466ce3 100644 --- a/vortex-array/src/array/bool/patch.rs +++ b/vortex-array/src/array/bool/patch.rs @@ -21,11 +21,11 @@ impl BoolArray { let (mut own_values, bit_offset) = self.into_boolean_builder(); match_each_integer_ptype!(indices.ptype(), |$I| { for (idx, value) in indices - .into_maybe_null_slice::<$I>() - .into_iter() + .maybe_null_slice::<$I>() + .iter() .zip_eq(values.boolean_buffer().iter()) { - own_values.set_bit(idx as usize + bit_offset, value); + own_values.set_bit(*idx as usize + bit_offset, value); } }); diff --git a/vortex-array/src/array/chunked/compute/mod.rs b/vortex-array/src/array/chunked/compute/mod.rs index 242082ae032..db055873440 100644 --- a/vortex-array/src/array/chunked/compute/mod.rs +++ b/vortex-array/src/array/chunked/compute/mod.rs @@ -109,7 +109,7 @@ mod test { .unwrap() .into_primitive() .unwrap() - .into_maybe_null_slice::(), + .into_maybe_null_vec::(), vec![0u64, 1, 2, 3], ); } diff --git a/vortex-array/src/array/primitive/compute/take.rs b/vortex-array/src/array/primitive/compute/take.rs index 84cfef6d031..dbfc2ee3db3 100644 --- a/vortex-array/src/array/primitive/compute/take.rs +++ b/vortex-array/src/array/primitive/compute/take.rs @@ -5,7 +5,6 @@ use vortex_error::VortexResult; use crate::array::primitive::PrimitiveArray; use crate::array::PrimitiveEncoding; use crate::compute::TakeFn; -use crate::take::take_ref_unchecked; use crate::variants::PrimitiveArrayTrait; use crate::{ArrayData, IntoArrayData, IntoArrayVariant}; @@ -29,7 +28,14 @@ impl TakeFn for PrimitiveEncoding { indices: &ArrayData, ) -> VortexResult { let indices = indices.clone().into_primitive()?; - unsafe { take_ref_unchecked(array, &indices) }.map(IntoArrayData::into_array) + let validity = unsafe { array.validity().take_unchecked(indices.as_ref())? }; + + match_each_native_ptype!(array.ptype(), |$T| { + match_each_integer_ptype!(indices.ptype(), |$I| { + let values = take_primitive_unchecked(array.maybe_null_slice::<$T>(), indices.maybe_null_slice::<$I>()); + Ok(PrimitiveArray::from_vec(values, validity).into_array()) + }) + }) } } diff --git a/vortex-array/src/array/primitive/mod.rs b/vortex-array/src/array/primitive/mod.rs index 0467653e911..a4007729677 100644 --- a/vortex-array/src/array/primitive/mod.rs +++ b/vortex-array/src/array/primitive/mod.rs @@ -115,8 +115,26 @@ impl PrimitiveArray { } /// Convert the array into a mutable vec of the given type. - /// If possible, this will be zero-copy. - pub fn into_maybe_null_slice(self) -> Vec { + /// + /// If there are no other references to the underlying buffer, no data is copied. + pub fn into_maybe_null_vec(self) -> Vec { + match self.try_into_maybe_null_vec::() { + Ok(_) => todo!(), + Err(array) => { + let buffer = array.into_buffer(); + let (prefix, values, suffix) = unsafe { buffer.as_ref().align_to::() }; + assert!(prefix.is_empty() && suffix.is_empty()); + Vec::from(values) + } + } + } + + /// Try to convert the array into a mutable vec of the given type without copying. + /// + /// If the underlying buffer is not uniquely owned, which would necessitate a copy, then return + /// a primitive array with the same buffer and validity. + #[allow(clippy::panic_in_result_fn)] + pub fn try_into_maybe_null_vec(self) -> Result, Self> { assert_eq!( T::PTYPE, self.ptype(), @@ -124,11 +142,11 @@ impl PrimitiveArray { T::PTYPE, self.ptype(), ); - self.into_buffer().into_vec::().unwrap_or_else(|b| { - let (prefix, values, suffix) = unsafe { b.as_ref().align_to::() }; - assert!(prefix.is_empty() && suffix.is_empty()); - Vec::from(values) - }) + let ptype = self.ptype(); + let (buffer, validity) = self.into_parts(); + buffer + .into_vec::() + .map_err(|buffer| PrimitiveArray::new(buffer, ptype, validity)) } pub fn get_as_cast(&self, idx: usize) -> T { @@ -151,6 +169,12 @@ impl PrimitiveArray { PrimitiveArray::new(self.buffer().clone(), ptype, self.validity()) } + pub fn into_parts(self) -> (Buffer, Validity) { + let validity = self.validity(); + let buffer = self.into_buffer(); + (buffer, validity) + } + pub fn into_buffer(self) -> Buffer { self.into_array() .into_buffer() diff --git a/vortex-array/src/array/primitive/patch.rs b/vortex-array/src/array/primitive/patch.rs index 4b6dc0419f3..01e54194706 100644 --- a/vortex-array/src/array/primitive/patch.rs +++ b/vortex-array/src/array/primitive/patch.rs @@ -36,12 +36,12 @@ impl PrimitiveArray { T: NativePType + ArrowNativeType, I: NativePType + ArrowNativeType, { - let mut own_values = self.into_maybe_null_slice::(); + let mut own_values = self.into_maybe_null_vec::(); - let patch_indices = patch_indices.into_maybe_null_slice::(); - let patch_values = patch_values.into_maybe_null_slice::(); + let patch_indices = patch_indices.maybe_null_slice::(); + let patch_values = patch_values.maybe_null_slice::(); for (idx, value) in itertools::zip_eq(patch_indices, patch_values) { - own_values[idx.as_usize()] = value; + own_values[idx.as_usize()] = *value; } Ok(Self::from_vec(own_values, patched_validity)) } @@ -62,7 +62,7 @@ mod tests { sliced .into_primitive() .unwrap() - .into_maybe_null_slice::(), + .into_maybe_null_vec::(), vec![2u32; 6] ); } diff --git a/vortex-array/src/compute/filter.rs b/vortex-array/src/compute/filter.rs index 030f8195b58..8f3aa78c723 100644 --- a/vortex-array/src/compute/filter.rs +++ b/vortex-array/src/compute/filter.rs @@ -375,7 +375,7 @@ mod test { .unwrap() .into_primitive() .unwrap() - .into_maybe_null_slice::(), + .into_maybe_null_vec::(), vec![0i32, 1i32, 2i32] ); } diff --git a/vortex-array/src/lib.rs b/vortex-array/src/lib.rs index 27f7bb270c1..783ef075536 100644 --- a/vortex-array/src/lib.rs +++ b/vortex-array/src/lib.rs @@ -43,7 +43,6 @@ pub mod nbytes; pub mod patches; pub mod stats; pub mod stream; -mod take; pub mod tree; pub mod validity; pub mod variants; diff --git a/vortex-array/src/take.rs b/vortex-array/src/take.rs deleted file mode 100644 index dec6217cdbf..00000000000 --- a/vortex-array/src/take.rs +++ /dev/null @@ -1,40 +0,0 @@ -use vortex_dtype::{match_each_integer_ptype, match_each_native_ptype}; -use vortex_error::VortexResult; - -use crate::array::PrimitiveArray; -use crate::variants::PrimitiveArrayTrait as _; - -pub fn take(values: &PrimitiveArray, indices: &PrimitiveArray) -> VortexResult { - let new_validity = values.validity().take(indices.as_ref())?; - match_each_native_ptype!(values.ptype(), |$V| { - let values = values.maybe_null_slice::<$V>(); - let new_values = match_each_integer_ptype!(indices.ptype(), |$I| { - indices - .maybe_null_slice::<$I>() - .iter() - .cloned() - .map(|idx| values[idx as usize]) - .collect() - }); - Ok(PrimitiveArray::from_vec(new_values, new_validity)) - }) -} - -pub unsafe fn take_unchecked( - values: &PrimitiveArray, - indices: &PrimitiveArray, -) -> VortexResult { - let new_validity = unsafe { values.validity().take_unchecked(indices.as_ref())? }; - match_each_native_ptype!(values.ptype(), |$V| { - let values = values.maybe_null_slice::<$V>(); - let new_values = match_each_integer_ptype!(indices.ptype(), |$I| { - indices - .maybe_null_slice::<$I>() - .iter() - .cloned() - .map(|idx| unsafe { *values.get_unchecked(idx as usize) }) - .collect() - }); - Ok(PrimitiveArray::from_vec(new_values, new_validity)) - }) -} From 96b795719d195e2b694c88e620b4bb6003263595 Mon Sep 17 00:00:00 2001 From: Daniel King Date: Tue, 17 Dec 2024 16:56:17 -0500 Subject: [PATCH 04/20] change a few slices into vecs --- encodings/alp/src/alp_rd/array.rs | 4 +- encodings/alp/src/alp_rd/mod.rs | 4 +- encodings/datetime-parts/src/compress.rs | 2 +- encodings/fastlanes/src/for/compress.rs | 37 ++++++++++--------- encodings/fastlanes/src/for/compute.rs | 8 ++-- vortex-array/src/array/chunked/canonical.rs | 4 +- vortex-array/src/array/varbinview/mod.rs | 2 +- vortex-file/src/read/mask.rs | 24 ++++++++---- .../src/compressors/for.rs | 2 +- 9 files changed, 50 insertions(+), 37 deletions(-) diff --git a/encodings/alp/src/alp_rd/array.rs b/encodings/alp/src/alp_rd/array.rs index bd6adfa3d4d..0051bae5955 100644 --- a/encodings/alp/src/alp_rd/array.rs +++ b/encodings/alp/src/alp_rd/array.rs @@ -204,7 +204,7 @@ impl IntoCanonical for ALPRDArray { left_parts.maybe_null_slice::(), left_parts_dict, self.metadata().right_bit_width, - right_parts.maybe_null_slice::(), + right_parts.into_maybe_null_vec::(), self.left_parts_patches(), )?, self.logical_validity().into_validity(), @@ -215,7 +215,7 @@ impl IntoCanonical for ALPRDArray { left_parts.maybe_null_slice::(), left_parts_dict, self.metadata().right_bit_width, - right_parts.maybe_null_slice::(), + right_parts.into_maybe_null_vec::(), self.left_parts_patches(), )?, self.logical_validity().into_validity(), diff --git a/encodings/alp/src/alp_rd/mod.rs b/encodings/alp/src/alp_rd/mod.rs index 7147fd89196..49a9ec7226f 100644 --- a/encodings/alp/src/alp_rd/mod.rs +++ b/encodings/alp/src/alp_rd/mod.rs @@ -261,7 +261,7 @@ pub fn alp_rd_decode( left_parts: &[u16], left_parts_dict: &[u16], right_bit_width: u8, - right_parts: &[T::UINT], + right_parts: Vec, left_parts_patches: Option, ) -> VortexResult> { if left_parts.len() != right_parts.len() { @@ -290,7 +290,7 @@ pub fn alp_rd_decode( // recombine the left-and-right parts, adjusting by the right_bit_width. Ok(patched_left_parts .into_iter() - .zip(right_parts.iter().copied()) + .zip_eq(right_parts) .map(|(left, right)| { let left = ::from_u16(left); T::from_bits((left << (right_bit_width as usize)) | right) diff --git a/encodings/datetime-parts/src/compress.rs b/encodings/datetime-parts/src/compress.rs index b088f616a72..997b8d0f416 100644 --- a/encodings/datetime-parts/src/compress.rs +++ b/encodings/datetime-parts/src/compress.rs @@ -39,7 +39,7 @@ pub fn split_temporal(array: TemporalArray) -> VortexResult { let mut seconds = Vec::with_capacity(length); let mut subsecond = Vec::with_capacity(length); - for &t in timestamps.maybe_null_slice::().iter() { + for t in timestamps.into_maybe_null_vec::().into_iter() { days.push(t / (86_400 * divisor)); seconds.push((t % (86_400 * divisor)) / divisor); subsecond.push((t % (86_400 * divisor)) % divisor); diff --git a/encodings/fastlanes/src/for/compress.rs b/encodings/fastlanes/src/for/compress.rs index cd0ee76a19d..5cd2c87e050 100644 --- a/encodings/fastlanes/src/for/compress.rs +++ b/encodings/fastlanes/src/for/compress.rs @@ -1,3 +1,4 @@ +use arrow_buffer::ArrowNativeType; use itertools::Itertools; use num_traits::{PrimInt, WrappingAdd, WrappingSub}; use vortex_array::array::{ConstantArray, PrimitiveArray, SparseArray}; @@ -13,7 +14,7 @@ use vortex_scalar::Scalar; use crate::FoRArray; -pub fn for_compress(array: &PrimitiveArray) -> VortexResult { +pub fn for_compress(array: PrimitiveArray) -> VortexResult { let shift = trailing_zeros(array.as_ref()); let min = array .statistics() @@ -27,8 +28,9 @@ pub fn for_compress(array: &PrimitiveArray) -> VortexResult { encoded_zero::<$T>(array.validity().to_logical(array.len()), nullability) .vortex_expect("Failed to encode all zeroes") } else { - compress_primitive::<$T>(&array, shift, $T::try_from(&min)?) - .reinterpret_cast(array.ptype().to_unsigned()) + let unsigned_ptype = array.ptype().to_unsigned(); + compress_primitive::<$T>(array, shift, $T::try_from(&min)?) + .reinterpret_cast(unsigned_ptype) .into_array() } }); @@ -79,28 +81,29 @@ fn encoded_zero( } #[allow(clippy::cast_possible_truncation)] -fn compress_primitive( - parray: &PrimitiveArray, +fn compress_primitive( + parray: PrimitiveArray, shift: u8, min: T, ) -> PrimitiveArray { assert!(shift < T::PTYPE.bit_width() as u8); + let validity = parray.validity(); let values = if shift > 0 { parray - .maybe_null_slice::() - .iter() - .map(|&v| v.wrapping_sub(&min)) + .into_maybe_null_vec::() + .into_iter() + .map(|v| v.wrapping_sub(&min)) .map(|v| v >> shift as usize) .collect_vec() } else { parray - .maybe_null_slice::() - .iter() - .map(|&v| v.wrapping_sub(&min)) + .into_maybe_null_vec::() + .into_iter() + .map(|v| v.wrapping_sub(&min)) .collect_vec() }; - PrimitiveArray::from_vec(values, parray.validity()) + PrimitiveArray::from_vec(values, validity) } pub fn decompress(array: FoRArray) -> VortexResult { @@ -163,7 +166,7 @@ mod test { fn test_compress() { // Create a range offset by a million let array = PrimitiveArray::from((0u32..10_000).map(|v| v + 1_000_000).collect_vec()); - let compressed = for_compress(&array).unwrap(); + let compressed = for_compress(array).unwrap(); assert_eq!( u32::try_from(compressed.reference_scalar()).unwrap(), 1_000_000u32 @@ -175,7 +178,7 @@ mod test { let array = PrimitiveArray::from(vec![0i32; 10_000]); assert!(array.statistics().to_set().into_iter().next().is_none()); - let compressed = for_compress(&array).unwrap(); + let compressed = for_compress(array.clone()).unwrap(); assert_eq!(compressed.dtype(), array.dtype()); assert!(compressed.dtype().is_signed_int()); assert!(compressed.encoded().dtype().is_unsigned_int()); @@ -195,7 +198,7 @@ mod test { ); assert!(array.statistics().to_set().into_iter().next().is_none()); - let compressed = for_compress(&array).unwrap(); + let compressed = for_compress(array.clone()).unwrap(); assert_eq!(compressed.dtype(), array.dtype()); assert!(compressed.dtype().is_signed_int()); assert_eq!( @@ -230,7 +233,7 @@ mod test { .map(|v| v + 1_000_000) .collect_vec(), ); - let compressed = for_compress(&array).unwrap(); + let compressed = for_compress(array.clone()).unwrap(); assert!(compressed.shift() > 0); let decompressed = compressed.into_primitive().unwrap(); assert_eq!( @@ -242,7 +245,7 @@ mod test { #[test] fn test_overflow() { let array = PrimitiveArray::from((i8::MIN..=i8::MAX).collect_vec()); - let compressed = for_compress(&array).unwrap(); + let compressed = for_compress(array.clone()).unwrap(); assert_eq!( i8::MIN, compressed diff --git a/encodings/fastlanes/src/for/compute.rs b/encodings/fastlanes/src/for/compute.rs index 87e269353cd..3973d78c6e9 100644 --- a/encodings/fastlanes/src/for/compute.rs +++ b/encodings/fastlanes/src/for/compute.rs @@ -179,7 +179,7 @@ mod test { #[test] fn for_scalar_at() { - let for_arr = for_compress(&PrimitiveArray::from(vec![-100, 1100, 1500, 1900])).unwrap(); + let for_arr = for_compress(PrimitiveArray::from(vec![-100, 1100, 1500, 1900])).unwrap(); assert_eq!(scalar_at(&for_arr, 0).unwrap(), (-100).into()); assert_eq!(scalar_at(&for_arr, 1).unwrap(), 1100.into()); assert_eq!(scalar_at(&for_arr, 2).unwrap(), 1500.into()); @@ -188,7 +188,7 @@ mod test { #[test] fn for_search() { - let for_arr = for_compress(&PrimitiveArray::from(vec![1100, 1500, 1900])) + let for_arr = for_compress(PrimitiveArray::from(vec![1100, 1500, 1900])) .unwrap() .into_array(); assert_eq!( @@ -207,7 +207,7 @@ mod test { #[test] fn search_with_shift_notfound() { - let for_arr = for_compress(&PrimitiveArray::from(vec![62, 114])) + let for_arr = for_compress(PrimitiveArray::from(vec![62, 114])) .unwrap() .into_array(); assert_eq!( @@ -230,7 +230,7 @@ mod test { #[test] fn search_with_shift_repeated() { - let arr = for_compress(&PrimitiveArray::from(vec![62, 62, 114, 114])) + let arr = for_compress(PrimitiveArray::from(vec![62, 62, 114, 114])) .unwrap() .into_array(); let for_array = FoRArray::try_from(arr.clone()).unwrap(); diff --git a/vortex-array/src/array/chunked/canonical.rs b/vortex-array/src/array/chunked/canonical.rs index 4d1d68918d4..fe673b94c63 100644 --- a/vortex-array/src/array/chunked/canonical.rs +++ b/vortex-array/src/array/chunked/canonical.rs @@ -153,8 +153,8 @@ fn pack_lists(chunks: &[ArrayData], validity: Validity, dtype: &DType) -> Vortex .ok_or_else(|| vortex_err!("List offsets must have at least one element"))?; offsets.extend( offsets_arr - .maybe_null_slice::() - .iter() + .into_maybe_null_vec::() + .into_iter() .skip(1) .map(|off| off + adjustment_from_previous - first_offset_value as i64), ); diff --git a/vortex-array/src/array/varbinview/mod.rs b/vortex-array/src/array/varbinview/mod.rs index 4aceaf70cf6..bf13d8231ba 100644 --- a/vortex-array/src/array/varbinview/mod.rs +++ b/vortex-array/src/array/varbinview/mod.rs @@ -477,7 +477,7 @@ impl VarBinViewArray { (view.len() + view_ref.offset()) as usize, )? .into_primitive()?; - Ok(data_buf.maybe_null_slice::().to_vec()) + Ok(data_buf.into_maybe_null_vec::()) } else { // Return access to the range of bytes around it. Ok(view.as_inlined().value().to_vec()) diff --git a/vortex-file/src/read/mask.rs b/vortex-file/src/read/mask.rs index 411a8fc1b96..7ff17ceba08 100644 --- a/vortex-file/src/read/mask.rs +++ b/vortex-file/src/read/mask.rs @@ -93,13 +93,23 @@ impl RowMask { try_cast(array, &DType::Primitive(PType::U64, NonNullable))?.into_primitive()?; // TODO(ngates): should from_indices take u64? - let mask = FilterMask::from_indices( - end - begin, - indices - .maybe_null_slice::() - .iter() - .map(|i| *i as usize), - ); + let mask = if size_of::() == size_of::() { + FilterMask::from_indices( + end - begin, + indices + .into_maybe_null_vec::() + .into_iter() + .map(|i| i as usize), + ) + } else { + FilterMask::from_indices( + end - begin, + indices + .maybe_null_slice::() + .iter() + .map(|i| *i as usize), + ) + }; RowMask::try_new(mask, begin, end) } diff --git a/vortex-sampling-compressor/src/compressors/for.rs b/vortex-sampling-compressor/src/compressors/for.rs index 8db741effaf..ed0ceac19bd 100644 --- a/vortex-sampling-compressor/src/compressors/for.rs +++ b/vortex-sampling-compressor/src/compressors/for.rs @@ -56,7 +56,7 @@ impl EncodingCompressor for FoRCompressor { like: Option>, ctx: SamplingCompressor<'a>, ) -> VortexResult> { - let compressed = for_compress(&array.clone().into_primitive()?)?; + let compressed = for_compress(array.clone().into_primitive()?)?; let compressed_child = ctx.named("for_encoded").excluding(self).compress( &compressed.encoded(), From 9bb4d01af68f3e7098071439edafc994cf5cce21 Mon Sep 17 00:00:00 2001 From: Daniel King Date: Tue, 17 Dec 2024 17:07:10 -0500 Subject: [PATCH 05/20] permit the fast path in take --- .../src/array/primitive/compute/take.rs | 76 +++++++++++-------- 1 file changed, 44 insertions(+), 32 deletions(-) diff --git a/vortex-array/src/array/primitive/compute/take.rs b/vortex-array/src/array/primitive/compute/take.rs index dbfc2ee3db3..bf3bf213667 100644 --- a/vortex-array/src/array/primitive/compute/take.rs +++ b/vortex-array/src/array/primitive/compute/take.rs @@ -1,4 +1,3 @@ -use num_traits::AsPrimitive; use vortex_dtype::{match_each_integer_ptype, match_each_native_ptype, NativePType}; use vortex_error::VortexResult; @@ -15,10 +14,8 @@ impl TakeFn for PrimitiveEncoding { let validity = array.validity().take(indices.as_ref())?; match_each_native_ptype!(array.ptype(), |$T| { - match_each_integer_ptype!(indices.ptype(), |$I| { - let values = take_primitive(array.maybe_null_slice::<$T>(), indices.maybe_null_slice::<$I>()); - Ok(PrimitiveArray::from_vec(values, validity).into_array()) - }) + let values = take_primitive(array.maybe_null_slice::<$T>(), indices); + Ok(PrimitiveArray::from_vec(values, validity).into_array()) }) } @@ -31,48 +28,63 @@ impl TakeFn for PrimitiveEncoding { let validity = unsafe { array.validity().take_unchecked(indices.as_ref())? }; match_each_native_ptype!(array.ptype(), |$T| { - match_each_integer_ptype!(indices.ptype(), |$I| { - let values = take_primitive_unchecked(array.maybe_null_slice::<$T>(), indices.maybe_null_slice::<$I>()); - Ok(PrimitiveArray::from_vec(values, validity).into_array()) - }) + let values = take_primitive_unchecked(array.maybe_null_slice::<$T>(), indices); + Ok(PrimitiveArray::from_vec(values, validity).into_array()) }) } } -// We pass a Vec in case we're T == u64. -// In which case, Rust should reuse the same Vec the result. -fn take_primitive>( - array: &[T], - indices: &[I], -) -> Vec { - indices - .iter() - .cloned() - .map(|idx| array[idx.as_()]) - .collect() +fn take_primitive(array: &[T], indices: PrimitiveArray) -> Vec { + match_each_integer_ptype!(indices.ptype(), |$I| { + if size_of::() == size_of::<$I>() { + // If the sizes match, the memory can be re-used for the result. If the array happens to + // be uniquely owned by us, no copies are done. + indices.into_maybe_null_vec::<$I>() + .into_iter() + .map(|idx| array[idx as usize]) + .collect() + } else { + indices.maybe_null_slice::<$I>() + .iter() + .cloned() + .map(|idx| array[idx as usize]) + .collect() + } + }) } -// We pass a Vec in case we're T == u64. -// In which case, Rust should reuse the same Vec the result. -unsafe fn take_primitive_unchecked>( - array: &[T], - indices: &[I], -) -> Vec { - indices - .iter() - .cloned() - .map(|idx| unsafe { *array.get_unchecked(idx.as_()) }) - .collect() +unsafe fn take_primitive_unchecked(array: &[T], indices: PrimitiveArray) -> Vec { + match_each_integer_ptype!(indices.ptype(), |$I| { + if size_of::() == size_of::<$I>() { + // If the sizes match, the memory can be re-used for the result. If the array happens to + // be uniquely owned by us, no copies are done. + indices.into_maybe_null_vec::<$I>() + .into_iter() + .map(|idx| unsafe { *array.get_unchecked(idx as usize) }) + .collect() + } else { + indices.maybe_null_slice::<$I>() + .iter() + .cloned() + .map(|idx| unsafe { *array.get_unchecked(idx as usize) }) + .collect() + } + }) } #[cfg(test)] mod test { use crate::array::primitive::compute::take::take_primitive; + use crate::array::PrimitiveArray; + use crate::validity::Validity; #[test] fn test_take() { let a = vec![1i32, 2, 3, 4, 5]; - let result = take_primitive(&a, &[0, 0, 4, 2]); + let result = take_primitive( + &a, + PrimitiveArray::from_vec(vec![0, 0, 4, 2], Validity::NonNullable), + ); assert_eq!(result, vec![1i32, 1, 5, 3]); } } From a294b2f3c36e93dae7d4accf0b85fd5faaebc14f Mon Sep 17 00:00:00 2001 From: Daniel King Date: Tue, 17 Dec 2024 17:07:38 -0500 Subject: [PATCH 06/20] merge cruft: slice -> vec --- encodings/fastlanes/src/bitpacking/compute/filter.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/encodings/fastlanes/src/bitpacking/compute/filter.rs b/encodings/fastlanes/src/bitpacking/compute/filter.rs index 88a72562cdd..7706a3e2d1c 100644 --- a/encodings/fastlanes/src/bitpacking/compute/filter.rs +++ b/encodings/fastlanes/src/bitpacking/compute/filter.rs @@ -188,7 +188,7 @@ mod test { .unwrap() .into_primitive() .unwrap() - .into_maybe_null_slice::(); + .into_maybe_null_vec::(); assert_eq!(filtered, values); } From fc7aaa0de856a187f56a5bb8e1bbe46dc60a8183 Mon Sep 17 00:00:00 2001 From: Daniel King Date: Tue, 17 Dec 2024 17:09:54 -0500 Subject: [PATCH 07/20] dummy --- vortex-array/src/array/primitive/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vortex-array/src/array/primitive/mod.rs b/vortex-array/src/array/primitive/mod.rs index a4007729677..5caab9f7b84 100644 --- a/vortex-array/src/array/primitive/mod.rs +++ b/vortex-array/src/array/primitive/mod.rs @@ -119,7 +119,7 @@ impl PrimitiveArray { /// If there are no other references to the underlying buffer, no data is copied. pub fn into_maybe_null_vec(self) -> Vec { match self.try_into_maybe_null_vec::() { - Ok(_) => todo!(), + O(vector) => vector, Err(array) => { let buffer = array.into_buffer(); let (prefix, values, suffix) = unsafe { buffer.as_ref().align_to::() }; From 4aa4fbe12b5982db2463755b5ecb1f1d952d7735 Mon Sep 17 00:00:00 2001 From: Daniel King Date: Tue, 17 Dec 2024 17:10:03 -0500 Subject: [PATCH 08/20] typo --- vortex-array/src/array/primitive/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vortex-array/src/array/primitive/mod.rs b/vortex-array/src/array/primitive/mod.rs index 5caab9f7b84..e2eb58c14aa 100644 --- a/vortex-array/src/array/primitive/mod.rs +++ b/vortex-array/src/array/primitive/mod.rs @@ -119,7 +119,7 @@ impl PrimitiveArray { /// If there are no other references to the underlying buffer, no data is copied. pub fn into_maybe_null_vec(self) -> Vec { match self.try_into_maybe_null_vec::() { - O(vector) => vector, + Ok(vector) => vector, Err(array) => { let buffer = array.into_buffer(); let (prefix, values, suffix) = unsafe { buffer.as_ref().align_to::() }; From c34d1576c94a321677883bd06d217d74defb7d88 Mon Sep 17 00:00:00 2001 From: Daniel King Date: Tue, 17 Dec 2024 17:34:25 -0500 Subject: [PATCH 09/20] revert datetime-parts into_maybe_null_slice --- encodings/datetime-parts/src/compress.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/encodings/datetime-parts/src/compress.rs b/encodings/datetime-parts/src/compress.rs index 997b8d0f416..b088f616a72 100644 --- a/encodings/datetime-parts/src/compress.rs +++ b/encodings/datetime-parts/src/compress.rs @@ -39,7 +39,7 @@ pub fn split_temporal(array: TemporalArray) -> VortexResult { let mut seconds = Vec::with_capacity(length); let mut subsecond = Vec::with_capacity(length); - for t in timestamps.into_maybe_null_vec::().into_iter() { + for &t in timestamps.maybe_null_slice::().iter() { days.push(t / (86_400 * divisor)); seconds.push((t % (86_400 * divisor)) / divisor); subsecond.push((t % (86_400 * divisor)) % divisor); From 15faaf55329281474b6fa792cfcbb4f0215ae420 Mon Sep 17 00:00:00 2001 From: Daniel King Date: Tue, 17 Dec 2024 17:56:34 -0500 Subject: [PATCH 10/20] revert --- encodings/fastlanes/src/for/compress.rs | 26 +++---- encodings/fastlanes/src/for/compute.rs | 8 +-- vortex-array/src/array/bool/patch.rs | 13 ++-- vortex-array/src/array/chunked/canonical.rs | 6 +- .../src/array/primitive/compute/take.rs | 71 +++++++------------ vortex-file/src/read/mask.rs | 26 +++---- .../src/compressors/for.rs | 2 +- 7 files changed, 63 insertions(+), 89 deletions(-) diff --git a/encodings/fastlanes/src/for/compress.rs b/encodings/fastlanes/src/for/compress.rs index 5cd2c87e050..5c500c97be1 100644 --- a/encodings/fastlanes/src/for/compress.rs +++ b/encodings/fastlanes/src/for/compress.rs @@ -14,7 +14,7 @@ use vortex_scalar::Scalar; use crate::FoRArray; -pub fn for_compress(array: PrimitiveArray) -> VortexResult { +pub fn for_compress(array: &PrimitiveArray) -> VortexResult { let shift = trailing_zeros(array.as_ref()); let min = array .statistics() @@ -82,7 +82,7 @@ fn encoded_zero( #[allow(clippy::cast_possible_truncation)] fn compress_primitive( - parray: PrimitiveArray, + parray: &PrimitiveArray, shift: u8, min: T, ) -> PrimitiveArray { @@ -90,16 +90,16 @@ fn compress_primitive( let validity = parray.validity(); let values = if shift > 0 { parray - .into_maybe_null_vec::() - .into_iter() - .map(|v| v.wrapping_sub(&min)) + .maybe_null_slice::() + .iter() + .map(|&v| v.wrapping_sub(&min)) .map(|v| v >> shift as usize) .collect_vec() } else { parray - .into_maybe_null_vec::() - .into_iter() - .map(|v| v.wrapping_sub(&min)) + .maybe_null_slice::() + .iter() + .map(|&v| v.wrapping_sub(&min)) .collect_vec() }; @@ -166,7 +166,7 @@ mod test { fn test_compress() { // Create a range offset by a million let array = PrimitiveArray::from((0u32..10_000).map(|v| v + 1_000_000).collect_vec()); - let compressed = for_compress(array).unwrap(); + let compressed = for_compress(&array).unwrap(); assert_eq!( u32::try_from(compressed.reference_scalar()).unwrap(), 1_000_000u32 @@ -178,7 +178,7 @@ mod test { let array = PrimitiveArray::from(vec![0i32; 10_000]); assert!(array.statistics().to_set().into_iter().next().is_none()); - let compressed = for_compress(array.clone()).unwrap(); + let compressed = for_compress(&array.clone()).unwrap(); assert_eq!(compressed.dtype(), array.dtype()); assert!(compressed.dtype().is_signed_int()); assert!(compressed.encoded().dtype().is_unsigned_int()); @@ -198,7 +198,7 @@ mod test { ); assert!(array.statistics().to_set().into_iter().next().is_none()); - let compressed = for_compress(array.clone()).unwrap(); + let compressed = for_compress(&array.clone()).unwrap(); assert_eq!(compressed.dtype(), array.dtype()); assert!(compressed.dtype().is_signed_int()); assert_eq!( @@ -233,7 +233,7 @@ mod test { .map(|v| v + 1_000_000) .collect_vec(), ); - let compressed = for_compress(array.clone()).unwrap(); + let compressed = for_compress(&array.clone()).unwrap(); assert!(compressed.shift() > 0); let decompressed = compressed.into_primitive().unwrap(); assert_eq!( @@ -245,7 +245,7 @@ mod test { #[test] fn test_overflow() { let array = PrimitiveArray::from((i8::MIN..=i8::MAX).collect_vec()); - let compressed = for_compress(array.clone()).unwrap(); + let compressed = for_compress(&array.clone()).unwrap(); assert_eq!( i8::MIN, compressed diff --git a/encodings/fastlanes/src/for/compute.rs b/encodings/fastlanes/src/for/compute.rs index 3973d78c6e9..87e269353cd 100644 --- a/encodings/fastlanes/src/for/compute.rs +++ b/encodings/fastlanes/src/for/compute.rs @@ -179,7 +179,7 @@ mod test { #[test] fn for_scalar_at() { - let for_arr = for_compress(PrimitiveArray::from(vec![-100, 1100, 1500, 1900])).unwrap(); + let for_arr = for_compress(&PrimitiveArray::from(vec![-100, 1100, 1500, 1900])).unwrap(); assert_eq!(scalar_at(&for_arr, 0).unwrap(), (-100).into()); assert_eq!(scalar_at(&for_arr, 1).unwrap(), 1100.into()); assert_eq!(scalar_at(&for_arr, 2).unwrap(), 1500.into()); @@ -188,7 +188,7 @@ mod test { #[test] fn for_search() { - let for_arr = for_compress(PrimitiveArray::from(vec![1100, 1500, 1900])) + let for_arr = for_compress(&PrimitiveArray::from(vec![1100, 1500, 1900])) .unwrap() .into_array(); assert_eq!( @@ -207,7 +207,7 @@ mod test { #[test] fn search_with_shift_notfound() { - let for_arr = for_compress(PrimitiveArray::from(vec![62, 114])) + let for_arr = for_compress(&PrimitiveArray::from(vec![62, 114])) .unwrap() .into_array(); assert_eq!( @@ -230,7 +230,7 @@ mod test { #[test] fn search_with_shift_repeated() { - let arr = for_compress(PrimitiveArray::from(vec![62, 62, 114, 114])) + let arr = for_compress(&PrimitiveArray::from(vec![62, 62, 114, 114])) .unwrap() .into_array(); let for_array = FoRArray::try_from(arr.clone()).unwrap(); diff --git a/vortex-array/src/array/bool/patch.rs b/vortex-array/src/array/bool/patch.rs index ebff7466ce3..7af747b3a5c 100644 --- a/vortex-array/src/array/bool/patch.rs +++ b/vortex-array/src/array/bool/patch.rs @@ -1,6 +1,6 @@ use itertools::Itertools; use vortex_dtype::match_each_integer_ptype; -use vortex_error::VortexResult; +use vortex_error::{vortex_err, VortexExpect as _, VortexResult}; use crate::array::BoolArray; use crate::patches::Patches; @@ -20,12 +20,13 @@ impl BoolArray { let (mut own_values, bit_offset) = self.into_boolean_builder(); match_each_integer_ptype!(indices.ptype(), |$I| { - for (idx, value) in indices - .maybe_null_slice::<$I>() - .iter() - .zip_eq(values.boolean_buffer().iter()) + let indices_vec = indices + .try_into_maybe_null_vec::<$I>() + .map_err(|_| vortex_err!("could not zero copy patched left_parts_decoded")) + .vortex_expect("there are no aliases to this primitive array"); + for (idx, value) in indices_vec.into_iter().zip_eq(values.boolean_buffer().iter()) { - own_values.set_bit(*idx as usize + bit_offset, value); + own_values.set_bit(idx as usize + bit_offset, value); } }); diff --git a/vortex-array/src/array/chunked/canonical.rs b/vortex-array/src/array/chunked/canonical.rs index fe673b94c63..7815afa0ac9 100644 --- a/vortex-array/src/array/chunked/canonical.rs +++ b/vortex-array/src/array/chunked/canonical.rs @@ -153,10 +153,10 @@ fn pack_lists(chunks: &[ArrayData], validity: Validity, dtype: &DType) -> Vortex .ok_or_else(|| vortex_err!("List offsets must have at least one element"))?; offsets.extend( offsets_arr - .into_maybe_null_vec::() - .into_iter() + .maybe_null_slice::() + .iter() .skip(1) - .map(|off| off + adjustment_from_previous - first_offset_value as i64), + .map(|off| *off + adjustment_from_previous - first_offset_value as i64), ); } let chunked_elements = ChunkedArray::try_new(elements, elem_dtype.clone())?.into_array(); diff --git a/vortex-array/src/array/primitive/compute/take.rs b/vortex-array/src/array/primitive/compute/take.rs index bf3bf213667..4c2df9a37f9 100644 --- a/vortex-array/src/array/primitive/compute/take.rs +++ b/vortex-array/src/array/primitive/compute/take.rs @@ -1,3 +1,4 @@ +use num_traits::AsPrimitive; use vortex_dtype::{match_each_integer_ptype, match_each_native_ptype, NativePType}; use vortex_error::VortexResult; @@ -14,8 +15,10 @@ impl TakeFn for PrimitiveEncoding { let validity = array.validity().take(indices.as_ref())?; match_each_native_ptype!(array.ptype(), |$T| { - let values = take_primitive(array.maybe_null_slice::<$T>(), indices); - Ok(PrimitiveArray::from_vec(values, validity).into_array()) + match_each_integer_ptype!(indices.ptype(), |$I| { + let values = take_primitive(array.maybe_null_slice::<$T>(), indices.maybe_null_slice::<$I>()); + Ok(PrimitiveArray::from_vec(values, validity).into_array()) + }) }) } @@ -28,63 +31,43 @@ impl TakeFn for PrimitiveEncoding { let validity = unsafe { array.validity().take_unchecked(indices.as_ref())? }; match_each_native_ptype!(array.ptype(), |$T| { - let values = take_primitive_unchecked(array.maybe_null_slice::<$T>(), indices); - Ok(PrimitiveArray::from_vec(values, validity).into_array()) + match_each_integer_ptype!(indices.ptype(), |$I| { + let values = take_primitive_unchecked(array.maybe_null_slice::<$T>(), indices.maybe_null_slice::<$I>()); + Ok(PrimitiveArray::from_vec(values, validity).into_array()) + }) }) } } -fn take_primitive(array: &[T], indices: PrimitiveArray) -> Vec { - match_each_integer_ptype!(indices.ptype(), |$I| { - if size_of::() == size_of::<$I>() { - // If the sizes match, the memory can be re-used for the result. If the array happens to - // be uniquely owned by us, no copies are done. - indices.into_maybe_null_vec::<$I>() - .into_iter() - .map(|idx| array[idx as usize]) - .collect() - } else { - indices.maybe_null_slice::<$I>() - .iter() - .cloned() - .map(|idx| array[idx as usize]) - .collect() - } - }) +// We pass a Vec in case we're T == u64. +// In which case, Rust should reuse the same Vec the result. +fn take_primitive>( + array: &[T], + indices: &[I], +) -> Vec { + indices.iter().map(|idx| array[idx.as_()]).collect() } -unsafe fn take_primitive_unchecked(array: &[T], indices: PrimitiveArray) -> Vec { - match_each_integer_ptype!(indices.ptype(), |$I| { - if size_of::() == size_of::<$I>() { - // If the sizes match, the memory can be re-used for the result. If the array happens to - // be uniquely owned by us, no copies are done. - indices.into_maybe_null_vec::<$I>() - .into_iter() - .map(|idx| unsafe { *array.get_unchecked(idx as usize) }) - .collect() - } else { - indices.maybe_null_slice::<$I>() - .iter() - .cloned() - .map(|idx| unsafe { *array.get_unchecked(idx as usize) }) - .collect() - } - }) +// We pass a Vec in case we're T == u64. +// In which case, Rust should reuse the same Vec the result. +unsafe fn take_primitive_unchecked>( + array: &[T], + indices: &[I], +) -> Vec { + indices + .iter() + .map(|idx| unsafe { *array.get_unchecked(idx.as_()) }) + .collect() } #[cfg(test)] mod test { use crate::array::primitive::compute::take::take_primitive; - use crate::array::PrimitiveArray; - use crate::validity::Validity; #[test] fn test_take() { let a = vec![1i32, 2, 3, 4, 5]; - let result = take_primitive( - &a, - PrimitiveArray::from_vec(vec![0, 0, 4, 2], Validity::NonNullable), - ); + let result = take_primitive(&a, &[0, 0, 4, 2]); assert_eq!(result, vec![1i32, 1, 5, 3]); } } diff --git a/vortex-file/src/read/mask.rs b/vortex-file/src/read/mask.rs index 7ff17ceba08..e5b2b9b8f4c 100644 --- a/vortex-file/src/read/mask.rs +++ b/vortex-file/src/read/mask.rs @@ -92,24 +92,14 @@ impl RowMask { let indices = try_cast(array, &DType::Primitive(PType::U64, NonNullable))?.into_primitive()?; - // TODO(ngates): should from_indices take u64? - let mask = if size_of::() == size_of::() { - FilterMask::from_indices( - end - begin, - indices - .into_maybe_null_vec::() - .into_iter() - .map(|i| i as usize), - ) - } else { - FilterMask::from_indices( - end - begin, - indices - .maybe_null_slice::() - .iter() - .map(|i| *i as usize), - ) - }; + let mask = FilterMask::from_indices( + end - begin, + // TODO(ngates): should from_indices take u64? + indices + .maybe_null_slice::() + .iter() + .map(|i| *i as usize), + ); RowMask::try_new(mask, begin, end) } diff --git a/vortex-sampling-compressor/src/compressors/for.rs b/vortex-sampling-compressor/src/compressors/for.rs index ed0ceac19bd..8db741effaf 100644 --- a/vortex-sampling-compressor/src/compressors/for.rs +++ b/vortex-sampling-compressor/src/compressors/for.rs @@ -56,7 +56,7 @@ impl EncodingCompressor for FoRCompressor { like: Option>, ctx: SamplingCompressor<'a>, ) -> VortexResult> { - let compressed = for_compress(array.clone().into_primitive()?)?; + let compressed = for_compress(&array.clone().into_primitive()?)?; let compressed_child = ctx.named("for_encoded").excluding(self).compress( &compressed.encoded(), From 4376785b8a027ab66dd1fc2df92c7d055664ce1a Mon Sep 17 00:00:00 2001 From: Daniel King Date: Wed, 18 Dec 2024 10:45:24 -0500 Subject: [PATCH 11/20] these are not zero copy sites --- vortex-array/src/array/bool/patch.rs | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/vortex-array/src/array/bool/patch.rs b/vortex-array/src/array/bool/patch.rs index 7af747b3a5c..fccd1c68533 100644 --- a/vortex-array/src/array/bool/patch.rs +++ b/vortex-array/src/array/bool/patch.rs @@ -1,6 +1,6 @@ use itertools::Itertools; use vortex_dtype::match_each_integer_ptype; -use vortex_error::{vortex_err, VortexExpect as _, VortexResult}; +use vortex_error::VortexResult; use crate::array::BoolArray; use crate::patches::Patches; @@ -21,12 +21,10 @@ impl BoolArray { let (mut own_values, bit_offset) = self.into_boolean_builder(); match_each_integer_ptype!(indices.ptype(), |$I| { let indices_vec = indices - .try_into_maybe_null_vec::<$I>() - .map_err(|_| vortex_err!("could not zero copy patched left_parts_decoded")) - .vortex_expect("there are no aliases to this primitive array"); - for (idx, value) in indices_vec.into_iter().zip_eq(values.boolean_buffer().iter()) + .maybe_null_slice::<$I>(); + for (idx, value) in indices_vec.iter().zip_eq(values.boolean_buffer().iter()) { - own_values.set_bit(idx as usize + bit_offset, value); + own_values.set_bit(*idx as usize + bit_offset, value); } }); From d087cbe360b938a37a2505e435f6c48b1eb64ded Mon Sep 17 00:00:00 2001 From: Daniel King Date: Wed, 18 Dec 2024 10:51:02 -0500 Subject: [PATCH 12/20] redundant clones --- encodings/fastlanes/src/for/compress.rs | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/encodings/fastlanes/src/for/compress.rs b/encodings/fastlanes/src/for/compress.rs index 5c500c97be1..53ce7c7f5da 100644 --- a/encodings/fastlanes/src/for/compress.rs +++ b/encodings/fastlanes/src/for/compress.rs @@ -28,9 +28,8 @@ pub fn for_compress(array: &PrimitiveArray) -> VortexResult { encoded_zero::<$T>(array.validity().to_logical(array.len()), nullability) .vortex_expect("Failed to encode all zeroes") } else { - let unsigned_ptype = array.ptype().to_unsigned(); compress_primitive::<$T>(array, shift, $T::try_from(&min)?) - .reinterpret_cast(unsigned_ptype) + .reinterpret_cast(array.ptype().to_unsigned()) .into_array() } }); @@ -178,7 +177,7 @@ mod test { let array = PrimitiveArray::from(vec![0i32; 10_000]); assert!(array.statistics().to_set().into_iter().next().is_none()); - let compressed = for_compress(&array.clone()).unwrap(); + let compressed = for_compress(&array).unwrap(); assert_eq!(compressed.dtype(), array.dtype()); assert!(compressed.dtype().is_signed_int()); assert!(compressed.encoded().dtype().is_unsigned_int()); @@ -198,7 +197,7 @@ mod test { ); assert!(array.statistics().to_set().into_iter().next().is_none()); - let compressed = for_compress(&array.clone()).unwrap(); + let compressed = for_compress(&array).unwrap(); assert_eq!(compressed.dtype(), array.dtype()); assert!(compressed.dtype().is_signed_int()); assert_eq!( @@ -233,7 +232,7 @@ mod test { .map(|v| v + 1_000_000) .collect_vec(), ); - let compressed = for_compress(&array.clone()).unwrap(); + let compressed = for_compress(&array).unwrap(); assert!(compressed.shift() > 0); let decompressed = compressed.into_primitive().unwrap(); assert_eq!( @@ -245,7 +244,7 @@ mod test { #[test] fn test_overflow() { let array = PrimitiveArray::from((i8::MIN..=i8::MAX).collect_vec()); - let compressed = for_compress(&array.clone()).unwrap(); + let compressed = for_compress(&array).unwrap(); assert_eq!( i8::MIN, compressed From 07ea63ea1e58a2f5fc7b9f837a0489b67c151152 Mon Sep 17 00:00:00 2001 From: Daniel King Date: Wed, 18 Dec 2024 11:30:51 -0500 Subject: [PATCH 13/20] try to get a win in take_search --- encodings/fastlanes/src/for/compress.rs | 6 ++---- vortex-array/src/array/bool/patch.rs | 7 ++++--- vortex-array/src/patches.rs | 18 ++++++++++++------ 3 files changed, 18 insertions(+), 13 deletions(-) diff --git a/encodings/fastlanes/src/for/compress.rs b/encodings/fastlanes/src/for/compress.rs index 53ce7c7f5da..d3a6acdf229 100644 --- a/encodings/fastlanes/src/for/compress.rs +++ b/encodings/fastlanes/src/for/compress.rs @@ -1,4 +1,3 @@ -use arrow_buffer::ArrowNativeType; use itertools::Itertools; use num_traits::{PrimInt, WrappingAdd, WrappingSub}; use vortex_array::array::{ConstantArray, PrimitiveArray, SparseArray}; @@ -80,13 +79,12 @@ fn encoded_zero( } #[allow(clippy::cast_possible_truncation)] -fn compress_primitive( +fn compress_primitive( parray: &PrimitiveArray, shift: u8, min: T, ) -> PrimitiveArray { assert!(shift < T::PTYPE.bit_width() as u8); - let validity = parray.validity(); let values = if shift > 0 { parray .maybe_null_slice::() @@ -102,7 +100,7 @@ fn compress_primitive( .collect_vec() }; - PrimitiveArray::from_vec(values, validity) + PrimitiveArray::from_vec(values, parray.validity()) } pub fn decompress(array: FoRArray) -> VortexResult { diff --git a/vortex-array/src/array/bool/patch.rs b/vortex-array/src/array/bool/patch.rs index fccd1c68533..ebff7466ce3 100644 --- a/vortex-array/src/array/bool/patch.rs +++ b/vortex-array/src/array/bool/patch.rs @@ -20,9 +20,10 @@ impl BoolArray { let (mut own_values, bit_offset) = self.into_boolean_builder(); match_each_integer_ptype!(indices.ptype(), |$I| { - let indices_vec = indices - .maybe_null_slice::<$I>(); - for (idx, value) in indices_vec.iter().zip_eq(values.boolean_buffer().iter()) + for (idx, value) in indices + .maybe_null_slice::<$I>() + .iter() + .zip_eq(values.boolean_buffer().iter()) { own_values.set_bit(*idx as usize + bit_offset, value); } diff --git a/vortex-array/src/patches.rs b/vortex-array/src/patches.rs index 53009254720..676fe8c35fc 100644 --- a/vortex-array/src/patches.rs +++ b/vortex-array/src/patches.rs @@ -202,7 +202,7 @@ impl Patches { let flat_indices = self.indices().clone().into_primitive()?; match_each_integer_ptype!(flat_indices.ptype(), |$I| { - for (value_idx, coordinate) in flat_indices.maybe_null_slice::<$I>().into_iter().enumerate() { + for (value_idx, coordinate) in flat_indices.maybe_null_slice::<$I>().iter().enumerate() { let coordinate = *coordinate as usize; if buffer.value(coordinate) { // We count the number of truthy values between this coordinate and the previous truthy one @@ -267,11 +267,17 @@ impl Patches { pub fn take_search(&self, take_indices: PrimitiveArray) -> VortexResult> { let new_length = take_indices.len(); let take_indices = match_each_integer_ptype!(take_indices.ptype(), |$P| { - take_indices - .maybe_null_slice::<$P>() - .into_iter() - .map(|x| usize::try_from(*x)) - .collect::, _>>()? + match take_indices.try_into_maybe_null_vec::<$P>() { + Ok(vec) => vec + .into_iter() + .map(usize::try_from) + .collect::, _>>()?, + Err(take_indices) => take_indices + .maybe_null_slice::<$P>() + .iter() + .map(|x| usize::try_from(*x)) + .collect::, _>>()?, + } }); let (values_indices, new_indices): (Vec, Vec) = From a2c56e8a82842aba386e0c63214d2672e1cccf7f Mon Sep 17 00:00:00 2001 From: Daniel King Date: Wed, 18 Dec 2024 18:06:32 -0500 Subject: [PATCH 14/20] update comments --- vortex-array/src/array/primitive/compute/take.rs | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/vortex-array/src/array/primitive/compute/take.rs b/vortex-array/src/array/primitive/compute/take.rs index 4c2df9a37f9..3cb678e3da6 100644 --- a/vortex-array/src/array/primitive/compute/take.rs +++ b/vortex-array/src/array/primitive/compute/take.rs @@ -14,6 +14,12 @@ impl TakeFn for PrimitiveEncoding { let indices = indices.clone().into_primitive()?; let validity = array.validity().take(indices.as_ref())?; + // FIXME(DK): we could save an allocation and re-use memory if one of these were true: + // + // 1. We take the array as owned and there are no other references to the underyling buffer. + // + // 2. We take the indices as owned, there are no other references to the underlying indices + // buffer, and the indices bit-width matches the array's bit-width. match_each_native_ptype!(array.ptype(), |$T| { match_each_integer_ptype!(indices.ptype(), |$I| { let values = take_primitive(array.maybe_null_slice::<$T>(), indices.maybe_null_slice::<$I>()); @@ -30,6 +36,12 @@ impl TakeFn for PrimitiveEncoding { let indices = indices.clone().into_primitive()?; let validity = unsafe { array.validity().take_unchecked(indices.as_ref())? }; + // FIXME(DK): we could save an allocation and re-use memory if one of these were true: + // + // 1. We take the array as owned and there are no other references to the underyling buffer. + // + // 2. We take the indices as owned, there are no other references to the underlying indices + // buffer, and the indices bit-width matches the array's bit-width. match_each_native_ptype!(array.ptype(), |$T| { match_each_integer_ptype!(indices.ptype(), |$I| { let values = take_primitive_unchecked(array.maybe_null_slice::<$T>(), indices.maybe_null_slice::<$I>()); @@ -39,8 +51,6 @@ impl TakeFn for PrimitiveEncoding { } } -// We pass a Vec in case we're T == u64. -// In which case, Rust should reuse the same Vec the result. fn take_primitive>( array: &[T], indices: &[I], @@ -48,8 +58,6 @@ fn take_primitive>( indices.iter().map(|idx| array[idx.as_()]).collect() } -// We pass a Vec in case we're T == u64. -// In which case, Rust should reuse the same Vec the result. unsafe fn take_primitive_unchecked>( array: &[T], indices: &[I], From cde868c23ffdd50324f8ba1fd64eb354f1fd3c43 Mon Sep 17 00:00:00 2001 From: Daniel King Date: Wed, 18 Dec 2024 20:06:04 -0500 Subject: [PATCH 15/20] fix comment --- .../src/array/primitive/compute/take.rs | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/vortex-array/src/array/primitive/compute/take.rs b/vortex-array/src/array/primitive/compute/take.rs index 3cb678e3da6..081f23cc41a 100644 --- a/vortex-array/src/array/primitive/compute/take.rs +++ b/vortex-array/src/array/primitive/compute/take.rs @@ -14,12 +14,9 @@ impl TakeFn for PrimitiveEncoding { let indices = indices.clone().into_primitive()?; let validity = array.validity().take(indices.as_ref())?; - // FIXME(DK): we could save an allocation and re-use memory if one of these were true: - // - // 1. We take the array as owned and there are no other references to the underyling buffer. - // - // 2. We take the indices as owned, there are no other references to the underlying indices - // buffer, and the indices bit-width matches the array's bit-width. + // FIXME(DK): we could save an allocation and re-use memory if: we take the indices as + // owned, there are no other references to the underlying indices buffer, and the indices + // bit-width matches the array's bit-width. match_each_native_ptype!(array.ptype(), |$T| { match_each_integer_ptype!(indices.ptype(), |$I| { let values = take_primitive(array.maybe_null_slice::<$T>(), indices.maybe_null_slice::<$I>()); @@ -36,12 +33,9 @@ impl TakeFn for PrimitiveEncoding { let indices = indices.clone().into_primitive()?; let validity = unsafe { array.validity().take_unchecked(indices.as_ref())? }; - // FIXME(DK): we could save an allocation and re-use memory if one of these were true: - // - // 1. We take the array as owned and there are no other references to the underyling buffer. - // - // 2. We take the indices as owned, there are no other references to the underlying indices - // buffer, and the indices bit-width matches the array's bit-width. + // FIXME(DK): we could save an allocation and re-use memory if: We take the indices as + // owned, there are no other references to the underlying indices buffer, and the indices + // bit-width matches the array's bit-width. match_each_native_ptype!(array.ptype(), |$T| { match_each_integer_ptype!(indices.ptype(), |$I| { let values = take_primitive_unchecked(array.maybe_null_slice::<$T>(), indices.maybe_null_slice::<$I>()); From 89fcb14bc273fa29c35cc3460501e07f4c076bc2 Mon Sep 17 00:00:00 2001 From: Daniel King Date: Wed, 18 Dec 2024 20:12:10 -0500 Subject: [PATCH 16/20] better expect message --- encodings/alp/src/alp_rd/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/encodings/alp/src/alp_rd/mod.rs b/encodings/alp/src/alp_rd/mod.rs index 49a9ec7226f..ec0b3f8f974 100644 --- a/encodings/alp/src/alp_rd/mod.rs +++ b/encodings/alp/src/alp_rd/mod.rs @@ -283,7 +283,7 @@ pub fn alp_rd_decode( .patch(patches)? .try_into_maybe_null_vec::() .map_err(|_| vortex_err!("could not zero copy patched left_parts_decoded")) - .vortex_expect("there are no aliases to this primitive array"), + .vortex_expect("the buffer backing this PrimitiveArray is uniquely derived from a Vec uniquely owned by this method"), None => left_parts_decoded, }; From 105070289631fcbfc461188348d58e75131a5683 Mon Sep 17 00:00:00 2001 From: Daniel King Date: Wed, 18 Dec 2024 20:14:22 -0500 Subject: [PATCH 17/20] slim down diff --- vortex-array/src/array/chunked/canonical.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vortex-array/src/array/chunked/canonical.rs b/vortex-array/src/array/chunked/canonical.rs index 7815afa0ac9..4d1d68918d4 100644 --- a/vortex-array/src/array/chunked/canonical.rs +++ b/vortex-array/src/array/chunked/canonical.rs @@ -156,7 +156,7 @@ fn pack_lists(chunks: &[ArrayData], validity: Validity, dtype: &DType) -> Vortex .maybe_null_slice::() .iter() .skip(1) - .map(|off| *off + adjustment_from_previous - first_offset_value as i64), + .map(|off| off + adjustment_from_previous - first_offset_value as i64), ); } let chunked_elements = ChunkedArray::try_new(elements, elem_dtype.clone())?.into_array(); From d83f473e1b8cafc4dae1134c72b5023be01f8885 Mon Sep 17 00:00:00 2001 From: Daniel King Date: Wed, 18 Dec 2024 20:19:08 -0500 Subject: [PATCH 18/20] slim diff --- vortex-file/src/read/mask.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vortex-file/src/read/mask.rs b/vortex-file/src/read/mask.rs index e5b2b9b8f4c..411a8fc1b96 100644 --- a/vortex-file/src/read/mask.rs +++ b/vortex-file/src/read/mask.rs @@ -92,9 +92,9 @@ impl RowMask { let indices = try_cast(array, &DType::Primitive(PType::U64, NonNullable))?.into_primitive()?; + // TODO(ngates): should from_indices take u64? let mask = FilterMask::from_indices( end - begin, - // TODO(ngates): should from_indices take u64? indices .maybe_null_slice::() .iter() From 1ae0831269270a507f4eb881f76d56399585e5b4 Mon Sep 17 00:00:00 2001 From: Daniel King Date: Wed, 18 Dec 2024 20:50:44 -0500 Subject: [PATCH 19/20] more options --- encodings/alp/src/alp/compress.rs | 2 + encodings/alp/src/alp_rd/mod.rs | 4 ++ encodings/fastlanes/src/delta/compress.rs | 3 ++ encodings/fastlanes/src/for/compress.rs | 3 ++ encodings/runend-bool/src/compute/mod.rs | 49 ++++++++++++++++------- 5 files changed, 47 insertions(+), 14 deletions(-) diff --git a/encodings/alp/src/alp/compress.rs b/encodings/alp/src/alp/compress.rs index 1e4e0db6cb0..53459860af7 100644 --- a/encodings/alp/src/alp/compress.rs +++ b/encodings/alp/src/alp/compress.rs @@ -69,6 +69,8 @@ pub fn decompress(array: ALPArray) -> VortexResult { let ptype = array.dtype().try_into()?; let decoded = match_each_alp_float_ptype!(ptype, |$T| { PrimitiveArray::from_vec( + // If encoded is uniquely owned (e.g. it was just decompressed by into_primitive), Rust + // will re-use the allocation <$T>::decode_vec(encoded.into_maybe_null_vec(), array.exponents()), validity, ) diff --git a/encodings/alp/src/alp_rd/mod.rs b/encodings/alp/src/alp_rd/mod.rs index ec0b3f8f974..51ee8595a32 100644 --- a/encodings/alp/src/alp_rd/mod.rs +++ b/encodings/alp/src/alp_rd/mod.rs @@ -254,6 +254,10 @@ impl RDEncoder { /// Decode a vector of ALP-RD encoded values back into their original floating point format. /// +/// `left_parts_patches` is taken as an owned vector because we always need an allocation of that +/// size for the result and Rust will re-use the allocation (which has compatible length and +/// bit-width).sr +/// /// # Panics /// /// The function panics if the provided `left_parts` and `right_parts` differ in length. diff --git a/encodings/fastlanes/src/delta/compress.rs b/encodings/fastlanes/src/delta/compress.rs index 3941d2edcac..e98ba8e1195 100644 --- a/encodings/fastlanes/src/delta/compress.rs +++ b/encodings/fastlanes/src/delta/compress.rs @@ -110,6 +110,9 @@ pub fn delta_decompress(array: DeltaArray) -> VortexResult { slice(decoded, array.offset(), array.offset() + array.len())?.into_primitive() } +// FIXME(DK): This method may benefit from taking deltas as an owned vector. Rust could re-use that +// allocation for `output` but proving that is a bit complex compared to +// `vector.into_iter().map().collect()`. fn decompress_primitive( bases: &[T], deltas: &[T], diff --git a/encodings/fastlanes/src/for/compress.rs b/encodings/fastlanes/src/for/compress.rs index d3a6acdf229..0093840d635 100644 --- a/encodings/fastlanes/src/for/compress.rs +++ b/encodings/fastlanes/src/for/compress.rs @@ -128,6 +128,9 @@ pub fn decompress(array: FoRArray) -> VortexResult { })) } +/// Decompresses a frame-of-reference encoded vector given the shift and the bias (`min`). +/// +/// `values` is taken as an owned vector because we always need an allocation of that size. fn decompress_primitive( values: Vec, min: T, diff --git a/encodings/runend-bool/src/compute/mod.rs b/encodings/runend-bool/src/compute/mod.rs index b21d9d73115..d86b041a2c0 100644 --- a/encodings/runend-bool/src/compute/mod.rs +++ b/encodings/runend-bool/src/compute/mod.rs @@ -1,11 +1,12 @@ mod invert; -use arrow_buffer::BooleanBuffer; -use vortex_array::array::BoolArray; +use arrow_buffer::{ArrowNativeType, BooleanBuffer}; +use num_traits::AsPrimitive; +use vortex_array::array::{BoolArray, PrimitiveArray}; use vortex_array::compute::{slice, ComputeVTable, InvertFn, ScalarAtFn, SliceFn, TakeFn}; use vortex_array::variants::PrimitiveArrayTrait; use vortex_array::{ArrayDType, ArrayData, ArrayLen, IntoArrayData, IntoArrayVariant}; -use vortex_dtype::match_each_integer_ptype; +use vortex_dtype::{match_each_integer_ptype, NativePType}; use vortex_error::{vortex_bail, VortexResult}; use vortex_scalar::Scalar; @@ -41,17 +42,7 @@ impl TakeFn for RunEndBoolEncoding { fn take(&self, array: &RunEndBoolArray, indices: &ArrayData) -> VortexResult { let primitive_indices = indices.clone().into_primitive()?; let physical_indices = match_each_integer_ptype!(primitive_indices.ptype(), |$P| { - primitive_indices - .maybe_null_slice::<$P>() - .into_iter() - .map(|idx| *idx as usize) - .map(|idx| { - if idx >= array.len() { - vortex_bail!(OutOfBounds: idx, 0, array.len()) - } - array.find_physical_index(idx) - }) - .collect::>>()? + valid_physical_indices::<$P>(array, primitive_indices)? }); let start = array.start(); BoolArray::try_new( @@ -66,6 +57,36 @@ impl TakeFn for RunEndBoolEncoding { } } +fn valid_physical_indices>( + array: &RunEndBoolArray, + primitive_indices: PrimitiveArray, +) -> VortexResult> { + match primitive_indices.try_into_maybe_null_vec::

() { + // This allocation can be re-used when P's width matches usize's. + Ok(vector) => vector + .into_iter() + .map(|idx| idx.as_()) + .map(|idx| { + if idx >= array.len() { + vortex_bail!(OutOfBounds: idx, 0, array.len()) + } + array.find_physical_index(idx) + }) + .collect::>>(), + Err(primitive_indices) => primitive_indices + .maybe_null_slice::

() + .iter() + .map(|idx| (*idx).as_()) + .map(|idx| { + if idx >= array.len() { + vortex_bail!(OutOfBounds: idx, 0, array.len()) + } + array.find_physical_index(idx) + }) + .collect::>>(), + } +} + impl SliceFn for RunEndBoolEncoding { fn slice(&self, array: &RunEndBoolArray, start: usize, stop: usize) -> VortexResult { let new_length = stop - start; From bb85268e157114560b1c49570bcf6a5c1de46168 Mon Sep 17 00:00:00 2001 From: Daniel King Date: Fri, 27 Dec 2024 12:00:27 -0500 Subject: [PATCH 20/20] reuse allocations in datetime-parts --- encodings/datetime-parts/src/compress.rs | 25 ++++++++++++++++----- encodings/datetime-parts/src/compute/mod.rs | 2 +- 2 files changed, 20 insertions(+), 7 deletions(-) diff --git a/encodings/datetime-parts/src/compress.rs b/encodings/datetime-parts/src/compress.rs index b088f616a72..fdb3c108fef 100644 --- a/encodings/datetime-parts/src/compress.rs +++ b/encodings/datetime-parts/src/compress.rs @@ -35,15 +35,28 @@ pub fn split_temporal(array: TemporalArray) -> VortexResult { }; let length = timestamps.len(); - let mut days = Vec::with_capacity(length); let mut seconds = Vec::with_capacity(length); let mut subsecond = Vec::with_capacity(length); - for &t in timestamps.maybe_null_slice::().iter() { - days.push(t / (86_400 * divisor)); - seconds.push((t % (86_400 * divisor)) / divisor); - subsecond.push((t % (86_400 * divisor)) % divisor); - } + let days = match timestamps.try_into_maybe_null_vec::() { + Ok(vector) => vector + .into_iter() + .map(|t| { + seconds.push((t % (86_400 * divisor)) / divisor); + subsecond.push((t % (86_400 * divisor)) % divisor); + t / (86_400 * divisor) + }) + .collect::>(), + Err(timestamps) => timestamps + .maybe_null_slice::() + .iter() + .map(|t| { + seconds.push((t % (86_400 * divisor)) / divisor); + subsecond.push((t % (86_400 * divisor)) % divisor); + t / (86_400 * divisor) + }) + .collect::>(), + }; Ok(TemporalParts { days: PrimitiveArray::from_vec(days, validity).into_array(), diff --git a/encodings/datetime-parts/src/compute/mod.rs b/encodings/datetime-parts/src/compute/mod.rs index 5af7d2d814e..dbf08840683 100644 --- a/encodings/datetime-parts/src/compute/mod.rs +++ b/encodings/datetime-parts/src/compute/mod.rs @@ -118,7 +118,7 @@ pub fn decode_to_temporal(array: &DateTimePartsArray) -> VortexResult = days_buf - .into_maybe_null_vec::() + .into_maybe_null_vec::() // attempt to reuse the i64 allocation for values .into_iter() .map(|d| d * 86_400 * divisor) .collect();