Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion encodings/alp/src/alp/compress.rs
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,9 @@ pub fn decompress(array: ALPArray) -> VortexResult<PrimitiveArray> {
let ptype = array.dtype().try_into()?;
let decoded = match_each_alp_float_ptype!(ptype, |$T| {
PrimitiveArray::from_vec(
<$T>::decode_vec(encoded.into_maybe_null_slice(), array.exponents()),
// If encoded is uniquely owned (e.g. it was just decompressed by into_primitive), Rust
// will re-use the allocation
<$T>::decode_vec(encoded.into_maybe_null_vec(), array.exponents()),
validity,
)
});
Expand Down
4 changes: 2 additions & 2 deletions encodings/alp/src/alp_rd/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,7 @@ impl IntoCanonical for ALPRDArray {
left_parts.maybe_null_slice::<u16>(),
left_parts_dict,
self.metadata().right_bit_width,
right_parts.maybe_null_slice::<u32>(),
right_parts.into_maybe_null_vec::<u32>(),
self.left_parts_patches(),
)?,
self.logical_validity().into_validity(),
Expand All @@ -215,7 +215,7 @@ impl IntoCanonical for ALPRDArray {
left_parts.maybe_null_slice::<u16>(),
left_parts_dict,
self.metadata().right_bit_width,
right_parts.maybe_null_slice::<u64>(),
right_parts.into_maybe_null_vec::<u64>(),
self.left_parts_patches(),
)?,
self.logical_validity().into_validity(),
Expand Down
14 changes: 10 additions & 4 deletions encodings/alp/src/alp_rd/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ use vortex_array::aliases::hash_map::HashMap;
use vortex_array::array::PrimitiveArray;
use vortex_array::{ArrayDType, IntoArrayData};
use vortex_dtype::{DType, NativePType};
use vortex_error::{vortex_bail, VortexExpect, VortexResult, VortexUnwrap};
use vortex_error::{vortex_bail, vortex_err, VortexExpect, VortexResult, VortexUnwrap};
use vortex_fastlanes::bitpack_encode_unchecked;

use crate::match_each_alp_float_ptype;
Expand Down Expand Up @@ -254,14 +254,18 @@ impl RDEncoder {

/// Decode a vector of ALP-RD encoded values back into their original floating point format.
///
/// `left_parts_patches` is taken as an owned vector because we always need an allocation of that
/// size for the result and Rust will re-use the allocation (which has compatible length and
/// bit-width).sr
///
/// # Panics
///
/// The function panics if the provided `left_parts` and `right_parts` differ in length.
pub fn alp_rd_decode<T: ALPRDFloat>(
left_parts: &[u16],
left_parts_dict: &[u16],
right_bit_width: u8,
right_parts: &[T::UINT],
right_parts: Vec<T::UINT>,
left_parts_patches: Option<Patches>,
) -> VortexResult<Vec<T>> {
if left_parts.len() != right_parts.len() {
Expand All @@ -281,14 +285,16 @@ pub fn alp_rd_decode<T: ALPRDFloat>(
let patched_left_parts = match left_parts_patches {
Some(patches) => PrimitiveArray::from(left_parts_decoded)
.patch(patches)?
.into_maybe_null_slice::<u16>(),
.try_into_maybe_null_vec::<u16>()
.map_err(|_| vortex_err!("could not zero copy patched left_parts_decoded"))
.vortex_expect("the buffer backing this PrimitiveArray is uniquely derived from a Vec<u16> uniquely owned by this method"),
None => left_parts_decoded,
};

// recombine the left-and-right parts, adjusting by the right_bit_width.
Ok(patched_left_parts
.into_iter()
.zip(right_parts.iter().copied())
.zip_eq(right_parts)
.map(|(left, right)| {
let left = <T as ALPRDFloat>::from_u16(left);
T::from_bits((left << (right_bit_width as usize)) | right)
Expand Down
25 changes: 19 additions & 6 deletions encodings/datetime-parts/src/compress.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,15 +35,28 @@ pub fn split_temporal(array: TemporalArray) -> VortexResult<TemporalParts> {
};

let length = timestamps.len();
let mut days = Vec::with_capacity(length);
let mut seconds = Vec::with_capacity(length);
let mut subsecond = Vec::with_capacity(length);

for &t in timestamps.maybe_null_slice::<i64>().iter() {
days.push(t / (86_400 * divisor));
seconds.push((t % (86_400 * divisor)) / divisor);
subsecond.push((t % (86_400 * divisor)) % divisor);
}
let days = match timestamps.try_into_maybe_null_vec::<i64>() {
Ok(vector) => vector
.into_iter()
.map(|t| {
seconds.push((t % (86_400 * divisor)) / divisor);
subsecond.push((t % (86_400 * divisor)) % divisor);
t / (86_400 * divisor)
})
.collect::<Vec<_>>(),
Err(timestamps) => timestamps
.maybe_null_slice::<i64>()
.iter()
.map(|t| {
seconds.push((t % (86_400 * divisor)) / divisor);
subsecond.push((t % (86_400 * divisor)) % divisor);
t / (86_400 * divisor)
})
.collect::<Vec<_>>(),
};

Ok(TemporalParts {
days: PrimitiveArray::from_vec(days, validity).into_array(),
Expand Down
2 changes: 1 addition & 1 deletion encodings/datetime-parts/src/compute/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ pub fn decode_to_temporal(array: &DateTimePartsArray) -> VortexResult<TemporalAr
)?
.into_primitive()?;
let mut values: Vec<i64> = days_buf
.into_maybe_null_slice::<i64>()
.into_maybe_null_vec::<i64>() // attempt to reuse the i64 allocation for values
.into_iter()
.map(|d| d * 86_400 * divisor)
.collect();
Expand Down
2 changes: 1 addition & 1 deletion encodings/fastlanes/src/bitpacking/compute/filter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,7 @@ mod test {
.unwrap()
.into_primitive()
.unwrap()
.into_maybe_null_slice::<i64>();
.into_maybe_null_vec::<i64>();

assert_eq!(filtered, values);
}
Expand Down
3 changes: 3 additions & 0 deletions encodings/fastlanes/src/delta/compress.rs
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,9 @@ pub fn delta_decompress(array: DeltaArray) -> VortexResult<PrimitiveArray> {
slice(decoded, array.offset(), array.offset() + array.len())?.into_primitive()
}

// FIXME(DK): This method may benefit from taking deltas as an owned vector. Rust could re-use that
// allocation for `output` but proving that is a bit complex compared to
// `vector.into_iter().map().collect()`.
fn decompress_primitive<T: NativePType + Delta + Transpose + WrappingAdd>(
bases: &[T],
deltas: &[T],
Expand Down
7 changes: 5 additions & 2 deletions encodings/fastlanes/src/for/compress.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ pub fn for_compress(array: &PrimitiveArray) -> VortexResult<FoRArray> {
encoded_zero::<$T>(array.validity().to_logical(array.len()), nullability)
.vortex_expect("Failed to encode all zeroes")
} else {
compress_primitive::<$T>(&array, shift, $T::try_from(&min)?)
compress_primitive::<$T>(array, shift, $T::try_from(&min)?)
.reinterpret_cast(array.ptype().to_unsigned())
.into_array()
}
Expand Down Expand Up @@ -120,14 +120,17 @@ pub fn decompress(array: FoRArray) -> VortexResult<PrimitiveArray> {
encoded
} else {
PrimitiveArray::from_vec(
decompress_primitive(encoded.into_maybe_null_slice::<$T>(), min, shift),
decompress_primitive(encoded.into_maybe_null_vec::<$T>(), min, shift),
validity,
)
}
}
}))
}

/// Decompresses a frame-of-reference encoded vector given the shift and the bias (`min`).
///
/// `values` is taken as an owned vector because we always need an allocation of that size.
fn decompress_primitive<T: NativePType + WrappingAdd + PrimInt>(
values: Vec<T>,
min: T,
Expand Down
49 changes: 35 additions & 14 deletions encodings/runend-bool/src/compute/mod.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
mod invert;

use arrow_buffer::BooleanBuffer;
use vortex_array::array::BoolArray;
use arrow_buffer::{ArrowNativeType, BooleanBuffer};
use num_traits::AsPrimitive;
use vortex_array::array::{BoolArray, PrimitiveArray};
use vortex_array::compute::{slice, ComputeVTable, InvertFn, ScalarAtFn, SliceFn, TakeFn};
use vortex_array::variants::PrimitiveArrayTrait;
use vortex_array::{ArrayDType, ArrayData, ArrayLen, IntoArrayData, IntoArrayVariant};
use vortex_dtype::match_each_integer_ptype;
use vortex_dtype::{match_each_integer_ptype, NativePType};
use vortex_error::{vortex_bail, VortexResult};
use vortex_scalar::Scalar;

Expand Down Expand Up @@ -41,17 +42,7 @@ impl TakeFn<RunEndBoolArray> for RunEndBoolEncoding {
fn take(&self, array: &RunEndBoolArray, indices: &ArrayData) -> VortexResult<ArrayData> {
let primitive_indices = indices.clone().into_primitive()?;
let physical_indices = match_each_integer_ptype!(primitive_indices.ptype(), |$P| {
primitive_indices
.into_maybe_null_slice::<$P>()
.into_iter()
.map(|idx| idx as usize)
.map(|idx| {
if idx >= array.len() {
vortex_bail!(OutOfBounds: idx, 0, array.len())
}
array.find_physical_index(idx)
})
.collect::<VortexResult<Vec<_>>>()?
valid_physical_indices::<$P>(array, primitive_indices)?
});
let start = array.start();
BoolArray::try_new(
Expand All @@ -66,6 +57,36 @@ impl TakeFn<RunEndBoolArray> for RunEndBoolEncoding {
}
}

fn valid_physical_indices<P: NativePType + ArrowNativeType + AsPrimitive<usize>>(
array: &RunEndBoolArray,
primitive_indices: PrimitiveArray,
) -> VortexResult<Vec<usize>> {
match primitive_indices.try_into_maybe_null_vec::<P>() {
// This allocation can be re-used when P's width matches usize's.
Ok(vector) => vector
.into_iter()
.map(|idx| idx.as_())
.map(|idx| {
if idx >= array.len() {
vortex_bail!(OutOfBounds: idx, 0, array.len())
}
array.find_physical_index(idx)
})
.collect::<VortexResult<Vec<_>>>(),
Err(primitive_indices) => primitive_indices
.maybe_null_slice::<P>()
.iter()
.map(|idx| (*idx).as_())
.map(|idx| {
if idx >= array.len() {
vortex_bail!(OutOfBounds: idx, 0, array.len())
}
array.find_physical_index(idx)
})
.collect::<VortexResult<Vec<_>>>(),
}
}

impl SliceFn<RunEndBoolArray> for RunEndBoolEncoding {
fn slice(&self, array: &RunEndBoolArray, start: usize, stop: usize) -> VortexResult<ArrayData> {
let new_length = stop - start;
Expand Down
4 changes: 2 additions & 2 deletions encodings/runend/src/compute/take.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,10 @@ impl TakeFn<RunEndArray> for RunEndEncoding {
let primitive_indices = indices.clone().into_primitive()?;
let usize_indices = match_each_integer_ptype!(primitive_indices.ptype(), |$P| {
primitive_indices
.into_maybe_null_slice::<$P>()
.maybe_null_slice::<$P>()
.into_iter()
.map(|idx| {
let usize_idx = idx as usize;
let usize_idx = *idx as usize;
if usize_idx >= array.len() {
vortex_error::vortex_bail!(OutOfBounds: usize_idx, 0, array.len());
}
Expand Down
16 changes: 8 additions & 8 deletions encodings/zigzag/src/compress.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,10 @@ use crate::ZigZagArray;
pub fn zigzag_encode(parray: PrimitiveArray) -> VortexResult<ZigZagArray> {
let validity = parray.validity();
let encoded = match parray.ptype() {
PType::I8 => zigzag_encode_primitive::<i8>(parray.into_maybe_null_slice(), validity),
PType::I16 => zigzag_encode_primitive::<i16>(parray.into_maybe_null_slice(), validity),
PType::I32 => zigzag_encode_primitive::<i32>(parray.into_maybe_null_slice(), validity),
PType::I64 => zigzag_encode_primitive::<i64>(parray.into_maybe_null_slice(), validity),
PType::I8 => zigzag_encode_primitive::<i8>(parray.into_maybe_null_vec(), validity),
PType::I16 => zigzag_encode_primitive::<i16>(parray.into_maybe_null_vec(), validity),
PType::I32 => zigzag_encode_primitive::<i32>(parray.into_maybe_null_vec(), validity),
PType::I64 => zigzag_encode_primitive::<i64>(parray.into_maybe_null_vec(), validity),
_ => vortex_bail!(
"ZigZag can only encode signed integers, got {}",
parray.ptype()
Expand All @@ -36,10 +36,10 @@ where
pub fn zigzag_decode(parray: PrimitiveArray) -> VortexResult<PrimitiveArray> {
let validity = parray.validity();
let decoded = match parray.ptype() {
PType::U8 => zigzag_decode_primitive::<i8>(parray.into_maybe_null_slice(), validity),
PType::U16 => zigzag_decode_primitive::<i16>(parray.into_maybe_null_slice(), validity),
PType::U32 => zigzag_decode_primitive::<i32>(parray.into_maybe_null_slice(), validity),
PType::U64 => zigzag_decode_primitive::<i64>(parray.into_maybe_null_slice(), validity),
PType::U8 => zigzag_decode_primitive::<i8>(parray.into_maybe_null_vec(), validity),
PType::U16 => zigzag_decode_primitive::<i16>(parray.into_maybe_null_vec(), validity),
PType::U32 => zigzag_decode_primitive::<i32>(parray.into_maybe_null_vec(), validity),
PType::U64 => zigzag_decode_primitive::<i64>(parray.into_maybe_null_vec(), validity),
_ => vortex_bail!(
"ZigZag can only decode unsigned integers, got {}",
parray.ptype()
Expand Down
2 changes: 1 addition & 1 deletion fuzz/src/slice.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ pub fn slice_canonical_array(array: &ArrayData, start: usize, stop: usize) -> Ar
}
DType::Primitive(p, _) => match_each_native_ptype!(p, |$P| {
let primitive_array = array.clone().into_primitive().unwrap();
let vec_values = primitive_array.into_maybe_null_slice::<$P>();
let vec_values = primitive_array.maybe_null_slice::<$P>();
PrimitiveArray::from_vec(vec_values[start..stop].into(), validity).into_array()
}),
DType::Utf8(_) | DType::Binary(_) => {
Expand Down
11 changes: 6 additions & 5 deletions vortex-array/src/array/bool/patch.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,9 @@ use crate::{ArrayLen, IntoArrayVariant, ToArrayData};
impl BoolArray {
pub fn patch(self, patches: Patches) -> VortexResult<Self> {
let length = self.len();
let indices = patches.indices().clone().into_primitive()?;
let values = patches.values().clone().into_bool()?;
let (_, indices, values) = patches.into_parts();
let indices = indices.into_primitive()?;
let values = values.into_bool()?;

let patched_validity =
self.validity()
Expand All @@ -20,11 +21,11 @@ impl BoolArray {
let (mut own_values, bit_offset) = self.into_boolean_builder();
match_each_integer_ptype!(indices.ptype(), |$I| {
for (idx, value) in indices
.into_maybe_null_slice::<$I>()
.into_iter()
.maybe_null_slice::<$I>()
.iter()
.zip_eq(values.boolean_buffer().iter())
{
own_values.set_bit(idx as usize + bit_offset, value);
own_values.set_bit(*idx as usize + bit_offset, value);
}
});

Expand Down
2 changes: 1 addition & 1 deletion vortex-array/src/array/chunked/compute/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ mod test {
.unwrap()
.into_primitive()
.unwrap()
.into_maybe_null_slice::<u64>(),
.into_maybe_null_vec::<u64>(),
vec![0u64, 1, 2, 3],
);
}
Expand Down
10 changes: 6 additions & 4 deletions vortex-array/src/array/primitive/compute/take.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@ impl TakeFn<PrimitiveArray> for PrimitiveEncoding {
let indices = indices.clone().into_primitive()?;
let validity = array.validity().take(indices.as_ref())?;

// FIXME(DK): we could save an allocation and re-use memory if: we take the indices as
// owned, there are no other references to the underlying indices buffer, and the indices
// bit-width matches the array's bit-width.
match_each_native_ptype!(array.ptype(), |$T| {
match_each_integer_ptype!(indices.ptype(), |$I| {
let values = take_primitive(array.maybe_null_slice::<$T>(), indices.maybe_null_slice::<$I>());
Expand All @@ -30,6 +33,9 @@ impl TakeFn<PrimitiveArray> for PrimitiveEncoding {
let indices = indices.clone().into_primitive()?;
let validity = unsafe { array.validity().take_unchecked(indices.as_ref())? };

// FIXME(DK): we could save an allocation and re-use memory if: We take the indices as
// owned, there are no other references to the underlying indices buffer, and the indices
// bit-width matches the array's bit-width.
match_each_native_ptype!(array.ptype(), |$T| {
match_each_integer_ptype!(indices.ptype(), |$I| {
let values = take_primitive_unchecked(array.maybe_null_slice::<$T>(), indices.maybe_null_slice::<$I>());
Expand All @@ -39,17 +45,13 @@ impl TakeFn<PrimitiveArray> for PrimitiveEncoding {
}
}

// We pass a Vec<I> in case we're T == u64.
// In which case, Rust should reuse the same Vec<u64> the result.
fn take_primitive<T: NativePType, I: NativePType + AsPrimitive<usize>>(
array: &[T],
indices: &[I],
) -> Vec<T> {
indices.iter().map(|idx| array[idx.as_()]).collect()
}

// We pass a Vec<I> in case we're T == u64.
// In which case, Rust should reuse the same Vec<u64> the result.
unsafe fn take_primitive_unchecked<T: NativePType, I: NativePType + AsPrimitive<usize>>(
array: &[T],
indices: &[I],
Expand Down
Loading