From 9c2d3e1d00291b320200505f8473bdf633b6a863 Mon Sep 17 00:00:00 2001 From: Nicholas Gates Date: Fri, 26 Jun 2026 12:37:32 -0400 Subject: [PATCH] Expose array buffer replacement Signed-off-by: "Nicholas Gates" Signed-off-by: Nicholas Gates --- encodings/alp/src/alp/array.rs | 8 +++ encodings/alp/src/alp_rd/array.rs | 8 +++ encodings/bytebool/src/array.rs | 17 +++++ encodings/datetime-parts/src/array.rs | 8 +++ .../src/decimal_byte_parts/mod.rs | 8 +++ encodings/experimental/onpair/src/array.rs | 18 ++++++ .../fastlanes/src/bitpacking/vtable/mod.rs | 19 ++++++ encodings/fastlanes/src/delta/vtable/mod.rs | 8 +++ encodings/fastlanes/src/for/vtable/mod.rs | 8 +++ encodings/fastlanes/src/rle/vtable/mod.rs | 8 +++ encodings/fsst/src/array.rs | 19 ++++++ encodings/parquet-variant/src/vtable.rs | 8 +++ encodings/pco/src/array.rs | 22 +++++++ encodings/runend/src/array.rs | 8 +++ encodings/sequence/src/array.rs | 8 +++ encodings/sparse/src/lib.rs | 8 +++ encodings/zigzag/src/array.rs | 8 +++ encodings/zstd/src/array.rs | 27 ++++++++ encodings/zstd/src/zstd_buffers.rs | 13 ++++ vortex-array/src/array/erased.rs | 64 ++++++++++++++++++- vortex-array/src/array/foreign.rs | 14 ++++ vortex-array/src/array/mod.rs | 13 ++++ vortex-array/src/array/typed.rs | 6 ++ vortex-array/src/array/vtable/mod.rs | 53 ++++++++++++++- vortex-array/src/arrays/bool/vtable/mod.rs | 18 ++++++ vortex-array/src/arrays/chunked/tests.rs | 22 ++++--- vortex-array/src/arrays/chunked/vtable/mod.rs | 9 +++ .../src/arrays/constant/vtable/mod.rs | 31 ++++++++- vortex-array/src/arrays/decimal/vtable/mod.rs | 23 ++++++- vortex-array/src/arrays/dict/vtable/mod.rs | 9 +++ .../src/arrays/extension/vtable/mod.rs | 9 +++ vortex-array/src/arrays/filter/vtable.rs | 12 +++- .../src/arrays/fixed_size_list/vtable/mod.rs | 14 +++- vortex-array/src/arrays/interleave/mod.rs | 9 +++ vortex-array/src/arrays/list/vtable/mod.rs | 9 +++ .../src/arrays/listview/vtable/mod.rs | 14 +++- vortex-array/src/arrays/masked/vtable/mod.rs | 18 ++++-- vortex-array/src/arrays/null/mod.rs | 9 +++ vortex-array/src/arrays/patched/vtable/mod.rs | 30 +++++++-- .../src/arrays/primitive/vtable/mod.rs | 51 ++++++++++++++- .../src/arrays/scalar_fn/vtable/mod.rs | 9 +++ vortex-array/src/arrays/shared/vtable.rs | 12 +++- vortex-array/src/arrays/slice/vtable.rs | 12 +++- vortex-array/src/arrays/struct_/vtable/mod.rs | 9 +++ vortex-array/src/arrays/varbin/vtable/mod.rs | 23 ++++++- .../src/arrays/varbinview/vtable/mod.rs | 30 +++++++-- vortex-array/src/arrays/variant/vtable/mod.rs | 9 +++ vortex-array/src/canonical.rs | 4 +- vortex-array/src/executor.rs | 4 +- vortex-array/src/normalize.rs | 4 +- vortex-array/src/optimizer/mod.rs | 4 +- vortex-compressor/src/compressor.rs | 4 +- vortex-python/src/arrays/py/vtable.rs | 9 +++ 53 files changed, 753 insertions(+), 48 deletions(-) diff --git a/encodings/alp/src/alp/array.rs b/encodings/alp/src/alp/array.rs index 8e581fe51fc..5b0984dce7a 100644 --- a/encodings/alp/src/alp/array.rs +++ b/encodings/alp/src/alp/array.rs @@ -102,6 +102,14 @@ impl VTable for ALP { None } + fn with_buffers( + &self, + array: ArrayView<'_, Self>, + buffers: &[BufferHandle], + ) -> VortexResult> { + vortex_array::vtable::with_empty_buffers(self, array, buffers) + } + fn serialize( array: ArrayView<'_, Self>, _session: &VortexSession, diff --git a/encodings/alp/src/alp_rd/array.rs b/encodings/alp/src/alp_rd/array.rs index 160723e83b2..9dac5c292bd 100644 --- a/encodings/alp/src/alp_rd/array.rs +++ b/encodings/alp/src/alp_rd/array.rs @@ -129,6 +129,14 @@ impl VTable for ALPRD { None } + fn with_buffers( + &self, + array: ArrayView<'_, Self>, + buffers: &[BufferHandle], + ) -> VortexResult> { + vortex_array::vtable::with_empty_buffers(self, array, buffers) + } + fn serialize( array: ArrayView<'_, Self>, _session: &VortexSession, diff --git a/encodings/bytebool/src/array.rs b/encodings/bytebool/src/array.rs index dd5db041c15..fe00bd5fe4d 100644 --- a/encodings/bytebool/src/array.rs +++ b/encodings/bytebool/src/array.rs @@ -94,6 +94,23 @@ impl VTable for ByteBool { } } + fn with_buffers( + &self, + array: ArrayView<'_, Self>, + buffers: &[BufferHandle], + ) -> VortexResult> { + vortex_ensure!( + buffers.len() == 1, + "Expected 1 buffer, got {}", + buffers.len() + ); + let data = ByteBoolData::new(buffers[0].clone()); + Ok( + ArrayParts::new(self.clone(), array.dtype().clone(), array.len(), data) + .with_slots(array.slots().iter().cloned().collect()), + ) + } + fn serialize( _array: ArrayView<'_, Self>, _session: &VortexSession, diff --git a/encodings/datetime-parts/src/array.rs b/encodings/datetime-parts/src/array.rs index 64c6419a012..42569da9728 100644 --- a/encodings/datetime-parts/src/array.rs +++ b/encodings/datetime-parts/src/array.rs @@ -122,6 +122,14 @@ impl VTable for DateTimeParts { vortex_panic!("DateTimePartsArray buffer_name index {idx} out of bounds") } + fn with_buffers( + &self, + array: ArrayView<'_, Self>, + buffers: &[BufferHandle], + ) -> VortexResult> { + vortex_array::vtable::with_empty_buffers(self, array, buffers) + } + fn serialize( array: ArrayView<'_, Self>, _session: &VortexSession, diff --git a/encodings/decimal-byte-parts/src/decimal_byte_parts/mod.rs b/encodings/decimal-byte-parts/src/decimal_byte_parts/mod.rs index 8f4e9fed409..864cd604885 100644 --- a/encodings/decimal-byte-parts/src/decimal_byte_parts/mod.rs +++ b/encodings/decimal-byte-parts/src/decimal_byte_parts/mod.rs @@ -108,6 +108,14 @@ impl VTable for DecimalByteParts { vortex_panic!("DecimalBytePartsArray buffer_name index {idx} out of bounds") } + fn with_buffers( + &self, + array: ArrayView<'_, Self>, + buffers: &[BufferHandle], + ) -> VortexResult> { + vortex_array::vtable::with_empty_buffers(self, array, buffers) + } + fn serialize( array: ArrayView<'_, Self>, _session: &VortexSession, diff --git a/encodings/experimental/onpair/src/array.rs b/encodings/experimental/onpair/src/array.rs index f4c8c1aed85..58fd0332ac3 100644 --- a/encodings/experimental/onpair/src/array.rs +++ b/encodings/experimental/onpair/src/array.rs @@ -367,6 +367,24 @@ impl VTable for OnPair { } } + fn with_buffers( + &self, + array: ArrayView<'_, Self>, + buffers: &[BufferHandle], + ) -> VortexResult> { + vortex_ensure!( + buffers.len() == 1, + "Expected 1 buffer, got {}", + buffers.len() + ); + let mut data = array.data().clone(); + data.dict_bytes = buffers[0].clone(); + Ok( + ArrayParts::new(self.clone(), array.dtype().clone(), array.len(), data) + .with_slots(array.slots().iter().cloned().collect()), + ) + } + fn serialize( array: ArrayView<'_, Self>, _session: &VortexSession, diff --git a/encodings/fastlanes/src/bitpacking/vtable/mod.rs b/encodings/fastlanes/src/bitpacking/vtable/mod.rs index 9f8d41de014..68fbf1b41d3 100644 --- a/encodings/fastlanes/src/bitpacking/vtable/mod.rs +++ b/encodings/fastlanes/src/bitpacking/vtable/mod.rs @@ -35,6 +35,7 @@ use vortex_array::vtable::validity_to_child; use vortex_error::VortexExpect; use vortex_error::VortexResult; use vortex_error::vortex_bail; +use vortex_error::vortex_ensure; use vortex_error::vortex_err; use vortex_error::vortex_panic; use vortex_session::VortexSession; @@ -141,6 +142,24 @@ impl VTable for BitPacked { } } + fn with_buffers( + &self, + array: ArrayView<'_, Self>, + buffers: &[BufferHandle], + ) -> VortexResult> { + vortex_ensure!( + buffers.len() == 1, + "Expected 1 buffer, got {}", + buffers.len() + ); + let mut data = array.data().clone(); + data.packed = buffers[0].clone(); + Ok( + ArrayParts::new(self.clone(), array.dtype().clone(), array.len(), data) + .with_slots(array.slots().iter().cloned().collect()), + ) + } + fn serialize( array: ArrayView<'_, Self>, _session: &VortexSession, diff --git a/encodings/fastlanes/src/delta/vtable/mod.rs b/encodings/fastlanes/src/delta/vtable/mod.rs index dfb440620ab..6dcfedc0bc9 100644 --- a/encodings/fastlanes/src/delta/vtable/mod.rs +++ b/encodings/fastlanes/src/delta/vtable/mod.rs @@ -108,6 +108,14 @@ impl VTable for Delta { None } + fn with_buffers( + &self, + array: ArrayView<'_, Self>, + buffers: &[BufferHandle], + ) -> VortexResult> { + vortex_array::vtable::with_empty_buffers(self, array, buffers) + } + fn reduce_parent( array: ArrayView<'_, Self>, parent: &ArrayRef, diff --git a/encodings/fastlanes/src/for/vtable/mod.rs b/encodings/fastlanes/src/for/vtable/mod.rs index d3a0bb84811..d73a548340b 100644 --- a/encodings/fastlanes/src/for/vtable/mod.rs +++ b/encodings/fastlanes/src/for/vtable/mod.rs @@ -98,6 +98,14 @@ impl VTable for FoR { None } + fn with_buffers( + &self, + array: ArrayView<'_, Self>, + buffers: &[BufferHandle], + ) -> VortexResult> { + vortex_array::vtable::with_empty_buffers(self, array, buffers) + } + fn slot_name(_array: ArrayView<'_, Self>, idx: usize) -> String { SLOT_NAMES[idx].to_string() } diff --git a/encodings/fastlanes/src/rle/vtable/mod.rs b/encodings/fastlanes/src/rle/vtable/mod.rs index 5cb50c82953..cea655e53b4 100644 --- a/encodings/fastlanes/src/rle/vtable/mod.rs +++ b/encodings/fastlanes/src/rle/vtable/mod.rs @@ -121,6 +121,14 @@ impl VTable for RLE { None } + fn with_buffers( + &self, + array: ArrayView<'_, Self>, + buffers: &[BufferHandle], + ) -> VortexResult> { + vortex_array::vtable::with_empty_buffers(self, array, buffers) + } + fn reduce_parent( array: ArrayView<'_, Self>, parent: &ArrayRef, diff --git a/encodings/fsst/src/array.rs b/encodings/fsst/src/array.rs index 2526f0a697f..6b8d8e506e9 100644 --- a/encodings/fsst/src/array.rs +++ b/encodings/fsst/src/array.rs @@ -148,6 +148,25 @@ impl VTable for FSST { } } + fn with_buffers( + &self, + array: ArrayView<'_, Self>, + buffers: &[BufferHandle], + ) -> VortexResult> { + vortex_ensure!( + buffers.len() == 3, + "Expected 3 buffers, got {}", + buffers.len() + ); + let symbols = Buffer::::from_byte_buffer(buffers[0].clone().try_to_host_sync()?); + let symbol_lengths = Buffer::::from_byte_buffer(buffers[1].clone().try_to_host_sync()?); + let data = FSSTData::try_new(symbols, symbol_lengths, buffers[2].clone(), array.len())?; + Ok( + ArrayParts::new(self.clone(), array.dtype().clone(), array.len(), data) + .with_slots(array.slots().iter().cloned().collect()), + ) + } + fn serialize( array: ArrayView<'_, Self>, _session: &VortexSession, diff --git a/encodings/parquet-variant/src/vtable.rs b/encodings/parquet-variant/src/vtable.rs index 5aae70c7d24..4fa3ea10690 100644 --- a/encodings/parquet-variant/src/vtable.rs +++ b/encodings/parquet-variant/src/vtable.rs @@ -171,6 +171,14 @@ impl VTable for ParquetVariant { None } + fn with_buffers( + &self, + array: ArrayView<'_, Self>, + buffers: &[BufferHandle], + ) -> VortexResult> { + vortex_array::vtable::with_empty_buffers(self, array, buffers) + } + fn slot_name(_array: ArrayView<'_, Self>, idx: usize) -> String { SLOT_NAMES[idx].to_string() } diff --git a/encodings/pco/src/array.rs b/encodings/pco/src/array.rs index d0804cd80c1..20578278459 100644 --- a/encodings/pco/src/array.rs +++ b/encodings/pco/src/array.rs @@ -164,6 +164,28 @@ impl VTable for Pco { } } + fn with_buffers( + &self, + array: ArrayView<'_, Self>, + buffers: &[BufferHandle], + ) -> VortexResult> { + let mut data = array.data().clone(); + let chunk_metas_len = data.metadata.chunks.len(); + vortex_ensure!(buffers.len() >= chunk_metas_len); + data.chunk_metas = buffers[..chunk_metas_len] + .iter() + .map(|buffer| buffer.clone().try_to_host_sync()) + .collect::>>()?; + data.pages = buffers[chunk_metas_len..] + .iter() + .map(|buffer| buffer.clone().try_to_host_sync()) + .collect::>>()?; + Ok( + ArrayParts::new(self.clone(), array.dtype().clone(), array.len(), data) + .with_slots(array.slots().iter().cloned().collect()), + ) + } + fn serialize( array: ArrayView<'_, Self>, _session: &VortexSession, diff --git a/encodings/runend/src/array.rs b/encodings/runend/src/array.rs index fde553a2fd4..64d0798ebee 100644 --- a/encodings/runend/src/array.rs +++ b/encodings/runend/src/array.rs @@ -123,6 +123,14 @@ impl VTable for RunEnd { vortex_panic!("RunEndArray buffer_name index {idx} out of bounds") } + fn with_buffers( + &self, + array: ArrayView<'_, Self>, + buffers: &[BufferHandle], + ) -> VortexResult> { + vortex_array::vtable::with_empty_buffers(self, array, buffers) + } + fn serialize( array: ArrayView<'_, Self>, _session: &VortexSession, diff --git a/encodings/sequence/src/array.rs b/encodings/sequence/src/array.rs index 56e62764ce4..3b8e0065f06 100644 --- a/encodings/sequence/src/array.rs +++ b/encodings/sequence/src/array.rs @@ -258,6 +258,14 @@ impl VTable for Sequence { vortex_panic!("SequenceArray buffer_name index {idx} out of bounds") } + fn with_buffers( + &self, + array: ArrayView<'_, Self>, + buffers: &[BufferHandle], + ) -> VortexResult> { + vortex_array::vtable::with_empty_buffers(self, array, buffers) + } + fn serialize( array: ArrayView<'_, Self>, _session: &VortexSession, diff --git a/encodings/sparse/src/lib.rs b/encodings/sparse/src/lib.rs index 8ae38b56052..f94555434aa 100644 --- a/encodings/sparse/src/lib.rs +++ b/encodings/sparse/src/lib.rs @@ -223,6 +223,14 @@ impl VTable for Sparse { } } + fn with_buffers( + &self, + array: ArrayView<'_, Self>, + buffers: &[BufferHandle], + ) -> VortexResult> { + vortex_array::vtable::unsupported_buffer_replacement(array, buffers) + } + fn serialize( array: ArrayView<'_, Self>, _session: &VortexSession, diff --git a/encodings/zigzag/src/array.rs b/encodings/zigzag/src/array.rs index 8c4be0d5544..05fb8a261e0 100644 --- a/encodings/zigzag/src/array.rs +++ b/encodings/zigzag/src/array.rs @@ -90,6 +90,14 @@ impl VTable for ZigZag { vortex_panic!("ZigZagArray buffer_name index {idx} out of bounds") } + fn with_buffers( + &self, + array: ArrayView<'_, Self>, + buffers: &[BufferHandle], + ) -> VortexResult> { + vortex_array::vtable::with_empty_buffers(self, array, buffers) + } + fn serialize( _array: ArrayView<'_, Self>, _session: &VortexSession, diff --git a/encodings/zstd/src/array.rs b/encodings/zstd/src/array.rs index 87dc9af286e..948505a42f0 100644 --- a/encodings/zstd/src/array.rs +++ b/encodings/zstd/src/array.rs @@ -175,6 +175,33 @@ impl VTable for Zstd { } } + fn with_buffers( + &self, + array: ArrayView<'_, Self>, + buffers: &[BufferHandle], + ) -> VortexResult> { + let mut data = array.data().clone(); + if data.dictionary.is_some() { + let Some((dictionary, frames)) = buffers.split_first() else { + vortex_bail!("Expected dictionary buffer"); + }; + data.dictionary = Some(dictionary.clone().try_to_host_sync()?); + data.frames = frames + .iter() + .map(|buffer| buffer.clone().try_to_host_sync()) + .collect::>>()?; + } else { + data.frames = buffers + .iter() + .map(|buffer| buffer.clone().try_to_host_sync()) + .collect::>>()?; + } + Ok( + ArrayParts::new(self.clone(), array.dtype().clone(), array.len(), data) + .with_slots(array.slots().iter().cloned().collect()), + ) + } + fn serialize( array: ArrayView<'_, Self>, _session: &VortexSession, diff --git a/encodings/zstd/src/zstd_buffers.rs b/encodings/zstd/src/zstd_buffers.rs index f6cb9af586c..8e22f57d163 100644 --- a/encodings/zstd/src/zstd_buffers.rs +++ b/encodings/zstd/src/zstd_buffers.rs @@ -411,6 +411,19 @@ impl VTable for ZstdBuffers { Some(format!("compressed_{idx}")) } + fn with_buffers( + &self, + array: ArrayView<'_, Self>, + buffers: &[BufferHandle], + ) -> VortexResult> { + let mut data = array.data().clone(); + data.compressed_buffers = buffers.to_vec(); + Ok( + ArrayParts::new(self.clone(), array.dtype().clone(), array.len(), data) + .with_slots(array.slots().iter().cloned().collect()), + ) + } + fn slot_name(_array: ArrayView<'_, Self>, idx: usize) -> String { format!("child_{idx}") } diff --git a/vortex-array/src/array/erased.rs b/vortex-array/src/array/erased.rs index 9b6d92014c9..7fb3d7baf01 100644 --- a/vortex-array/src/array/erased.rs +++ b/vortex-array/src/array/erased.rs @@ -460,8 +460,18 @@ impl ArrayRef { /// This is only valid for physical rewrites: the replacement must have the same logical /// `DType` and `len` as the existing slot. /// + /// # Safety + /// + /// If this returns `Ok`, the caller must guarantee that the replacement slot represents the + /// same logical values as the original slot. Only the physical representation may change. + /// Existing parent statistics are preserved and must remain valid. + /// /// Takes ownership to allow in-place mutation when the refcount is 1. - pub fn with_slot(self, slot_idx: usize, replacement: ArrayRef) -> VortexResult { + pub unsafe fn with_slot( + self, + slot_idx: usize, + replacement: ArrayRef, + ) -> VortexResult { let mut slots: ArraySlots = self.slots().iter().cloned().collect(); let nslots = slots.len(); vortex_ensure!( @@ -488,7 +498,8 @@ impl ArrayRef { replacement.len() ); slots[slot_idx] = Some(replacement); - self.with_slots(slots) + // SAFETY: upheld by the caller of this unsafe API. + unsafe { self.with_slots(slots) } } /// Take a slot for executor-owned physical rewrites. @@ -555,7 +566,13 @@ impl ArrayRef { /// /// This is only valid for physical rewrites: slot count, presence, logical `DType`, and /// logical `len` must remain unchanged. - pub fn with_slots(self, slots: ArraySlots) -> VortexResult { + /// + /// # Safety + /// + /// If this returns `Ok`, the caller must guarantee that each replacement slot represents the + /// same logical values as the original slot. Only physical representation may change. Existing + /// parent statistics are preserved and must remain valid. + pub unsafe fn with_slots(self, slots: ArraySlots) -> VortexResult { let old_slots = self.slots(); vortex_ensure!( old_slots.len() == slots.len(), @@ -589,6 +606,47 @@ impl ArrayRef { self.0.data.with_slots(&self, slots) } + /// Returns a new array with the provided top-level buffer handles. + /// + /// This is only valid for physical rewrites: buffer count, logical `DType`, logical `len`, and + /// child slots must remain unchanged. Encoding-specific validation checks buffer shape, + /// alignment, and metadata consistency. + /// + /// # Safety + /// + /// If this returns `Ok`, the caller must guarantee that the replacement buffers represent the + /// same logical values as the original buffers. Only the buffer handle implementation, + /// placement, or backing storage may change. Existing statistics are preserved and must remain + /// valid. + pub unsafe fn with_buffers( + self, + buffers: impl IntoIterator, + ) -> VortexResult { + let buffers = buffers.into_iter().collect::>(); + let nbuffers = self.nbuffers(); + vortex_ensure!( + nbuffers == buffers.len(), + "buffer count changed from {} to {} during physical rewrite", + nbuffers, + buffers.len() + ); + for (idx, (old_buffer, new_buffer)) in self + .buffer_handles() + .into_iter() + .zip(buffers.iter()) + .enumerate() + { + vortex_ensure!( + old_buffer.len() == new_buffer.len(), + "buffer {} length changed from {} to {} during physical rewrite", + idx, + old_buffer.len(), + new_buffer.len() + ); + } + self.0.data.with_buffers(&self, buffers) + } + pub fn reduce(&self) -> VortexResult> { self.0.data.reduce(self) } diff --git a/vortex-array/src/array/foreign.rs b/vortex-array/src/array/foreign.rs index e7ea7238346..4a665044f3f 100644 --- a/vortex-array/src/array/foreign.rs +++ b/vortex-array/src/array/foreign.rs @@ -120,6 +120,20 @@ impl VTable for ForeignArray { Some(format!("buffer[{idx}]")) } + fn with_buffers( + &self, + array: ArrayView<'_, Self>, + buffers: &[BufferHandle], + ) -> VortexResult> { + Ok(ArrayParts::new( + self.clone(), + array.dtype().clone(), + array.len(), + ForeignArrayData::new(array.metadata.clone(), buffers.to_vec()), + ) + .with_slots(array.slots().iter().cloned().collect())) + } + fn serialize( array: ArrayView<'_, Self>, _session: &VortexSession, diff --git a/vortex-array/src/array/mod.rs b/vortex-array/src/array/mod.rs index 2c76945c726..0d3bbe9cae3 100644 --- a/vortex-array/src/array/mod.rs +++ b/vortex-array/src/array/mod.rs @@ -127,6 +127,9 @@ pub(crate) trait DynArrayData: 'static + private::Sealed + Send + Sync + Debug { /// Returns a new array with the given slots. fn with_slots(&self, this: &ArrayRef, slots: ArraySlots) -> VortexResult; + /// Returns a new array with the given buffers. + fn with_buffers(&self, this: &ArrayRef, buffers: Vec) -> VortexResult; + /// Returns a new array with the given slots, bypassing encoding-level validation. /// /// Used by the executor to temporarily carry an array that has had one of its child slots @@ -363,6 +366,16 @@ impl DynArrayData for ArrayData { .into_array()) } + fn with_buffers(&self, this: &ArrayRef, buffers: Vec) -> VortexResult { + let view = unsafe { ArrayView::new_unchecked(this, &self.data) }; + let stats = this.statistics().to_owned(); + Ok( + Array::::try_from_parts(V::with_buffers(&self.vtable, view, &buffers)?)? + .with_stats_set(stats) + .into_array(), + ) + } + unsafe fn with_slots_unchecked(&self, this: &ArrayRef, slots: ArraySlots) -> ArrayRef { // SAFETY: we intentionally skip `V::validate` here. Caller guarantees that the resulting // array is either repaired or not externally observed. diff --git a/vortex-array/src/array/typed.rs b/vortex-array/src/array/typed.rs index 91225ac75d6..b88f3792a84 100644 --- a/vortex-array/src/array/typed.rs +++ b/vortex-array/src/array/typed.rs @@ -196,6 +196,12 @@ impl Debug for ArrayData { /// `Array` holds an [`ArrayRef`] (shared, heap-allocated) and provides typed access /// to the encoding-specific data via [`Deref`] to `V::TypedArrayData`. /// +/// Buffers are intentionally not stored in the common `ArrayInner` or [`ArrayParts`] state. +/// Encodings may expose buffers only when writing or serializing, and those buffers need not be the +/// same representation they keep in memory. For example, an encoding may hold a deserialized +/// in-memory data structure and synthesize serialized buffers at write time; hoisting buffers here +/// would force it to retain both forms or make the serialized layout dictate the runtime layout. +/// /// This is the primary type for working with typed arrays. Convert to [`ArrayRef`] /// via [`into_array()`](IntoArray::into_array) or [`AsRef`]. pub struct Array { diff --git a/vortex-array/src/array/vtable/mod.rs b/vortex-array/src/array/vtable/mod.rs index ffa8e9dd854..617b211a4c2 100644 --- a/vortex-array/src/array/vtable/mod.rs +++ b/vortex-array/src/array/vtable/mod.rs @@ -23,6 +23,8 @@ pub use operations::*; pub use validity::*; use vortex_error::VortexExpect; use vortex_error::VortexResult; +use vortex_error::vortex_bail; +use vortex_error::vortex_ensure; use vortex_error::vortex_panic; use vortex_session::VortexSession; @@ -101,6 +103,18 @@ pub trait VTable: 'static + Clone + Sized + Send + Sync + Debug { /// Returns the name of the buffer at the given index, or `None` if unnamed. fn buffer_name(array: ArrayView<'_, Self>, idx: usize) -> Option; + /// Rebuild this array with replacement top-level buffers. + /// + /// This is for physical rewrites that preserve `dtype`, `len`, child slots, buffer count, and + /// buffer lengths. The caller checks the generic invariants before dispatching here; + /// implementations should interpret the replacement buffers for their encoding-specific + /// in-memory representation. + fn with_buffers( + &self, + array: ArrayView<'_, Self>, + buffers: &[BufferHandle], + ) -> VortexResult>; + /// Returns the number of children in the array. /// /// The default counts non-None slots. @@ -152,7 +166,7 @@ pub trait VTable: 'static + Clone + Sized + Send + Sync + Debug { /// Deserialize an array from serialized metadata, buffers, and children. /// - /// The returned [`ArrayParts`](crate::ArrayParts) are still validated by the generic adapter. + /// The returned [`ArrayParts`] are still validated by the generic adapter. /// Deserializers should use the provided `session` to resolve plugin-owned metadata instead of /// relying on global state. fn deserialize( @@ -163,7 +177,7 @@ pub trait VTable: 'static + Clone + Sized + Send + Sync + Debug { buffers: &[BufferHandle], children: &dyn ArrayChildren, session: &VortexSession, - ) -> VortexResult>; + ) -> VortexResult>; /// Writes the array's logical values into a canonical builder. /// @@ -239,6 +253,7 @@ pub trait VTable: 'static + Clone + Sized + Send + Sync + Debug { pub use VTable as ArrayVTable; use crate::array::ArrayId; +use crate::array::ArrayParts; /// Empty array metadata struct for encodings with no per-array metadata. #[derive(Clone, Debug, Default)] @@ -259,6 +274,40 @@ impl Display for EmptyArrayData { } } +/// Rebuild an array that has no top-level buffers. +#[inline] +pub fn with_empty_buffers( + vtable: &V, + array: ArrayView<'_, V>, + buffers: &[BufferHandle], +) -> VortexResult> { + vortex_ensure!( + buffers.is_empty(), + "Array {} expects 0 buffers, got {}", + array.encoding_id(), + buffers.len() + ); + Ok(ArrayParts::new( + vtable.clone(), + array.dtype().clone(), + array.len(), + array.data().clone(), + ) + .with_slots(array.slots().iter().cloned().collect())) +} + +/// Reject buffer replacement for encodings whose exposed buffers are not runtime backing buffers. +#[inline] +pub fn unsupported_buffer_replacement( + array: ArrayView<'_, V>, + _buffers: &[BufferHandle], +) -> VortexResult> { + vortex_bail!( + "Array {} does not support in-memory buffer replacement", + array.encoding_id() + ) +} + /// Placeholder type used to indicate when a particular vtable is not supported by the encoding. pub struct NotSupported; diff --git a/vortex-array/src/arrays/bool/vtable/mod.rs b/vortex-array/src/arrays/bool/vtable/mod.rs index 348a1f5f69a..2420642d972 100644 --- a/vortex-array/src/arrays/bool/vtable/mod.rs +++ b/vortex-array/src/arrays/bool/vtable/mod.rs @@ -97,6 +97,24 @@ impl VTable for Bool { } } + fn with_buffers( + &self, + array: ArrayView<'_, Self>, + buffers: &[BufferHandle], + ) -> VortexResult> { + vortex_ensure!( + buffers.len() == 1, + "Expected 1 buffer, got {}", + buffers.len() + ); + let mut data = array.data().clone(); + data.bits = buffers[0].clone(); + Ok( + ArrayParts::new(self.clone(), array.dtype().clone(), array.len(), data) + .with_slots(array.slots().iter().cloned().collect()), + ) + } + fn serialize( array: ArrayView<'_, Self>, _session: &VortexSession, diff --git a/vortex-array/src/arrays/chunked/tests.rs b/vortex-array/src/arrays/chunked/tests.rs index d3cae8be8f0..cbcd7125114 100644 --- a/vortex-array/src/arrays/chunked/tests.rs +++ b/vortex-array/src/arrays/chunked/tests.rs @@ -206,30 +206,36 @@ fn with_slot_rewrites_chunk_and_offsets() { let mut ctx = SESSION.create_execution_ctx(); let array = chunked_array().into_array(); - let replacement = buffer![10u64, 11, 12].into_array(); - let array = array.with_slot(1, replacement).unwrap(); + let replacement = buffer![1u64, 2, 3].into_array(); + // SAFETY: the replacement chunk has the same logical values as the original chunk; only the + // physical child handle changes. + let array = unsafe { array.with_slot(1, replacement) }.unwrap(); let array = array.as_::(); assert_eq!(array.nchunks(), 3); assert_eq!(array.chunk_offsets(), [0, 3, 6, 9]); assert_arrays_eq!( array.chunk(0).clone(), - PrimitiveArray::from_iter([10u64, 11, 12]), + PrimitiveArray::from_iter([1u64, 2, 3]), &mut ctx ); assert_arrays_eq!( array.array().clone(), - PrimitiveArray::from_iter([10u64, 11, 12, 4, 5, 6, 7, 8, 9]), + PrimitiveArray::from_iter([1u64, 2, 3, 4, 5, 6, 7, 8, 9]), &mut ctx ); } #[test] fn with_slot_rejects_len_mismatch() { - let err = chunked_array() - .into_array() - .with_slot(1, buffer![10u64, 11].into_array()) - .unwrap_err(); + // SAFETY: this call is expected to fail the checked slot length invariant before any rewritten + // array is returned or observed. + let err = unsafe { + chunked_array() + .into_array() + .with_slot(1, buffer![10u64, 11].into_array()) + } + .unwrap_err(); assert!(err.to_string().contains("physical rewrite")); } diff --git a/vortex-array/src/arrays/chunked/vtable/mod.rs b/vortex-array/src/arrays/chunked/vtable/mod.rs index 1054dd21d2d..673855cfc0a 100644 --- a/vortex-array/src/arrays/chunked/vtable/mod.rs +++ b/vortex-array/src/arrays/chunked/vtable/mod.rs @@ -29,6 +29,7 @@ use crate::array::ArrayId; use crate::array::ArrayParts; use crate::array::ArrayView; use crate::array::VTable; +use crate::array::with_empty_buffers; use crate::arrays::chunked::ChunkedArrayExt; use crate::arrays::chunked::ChunkedData; use crate::arrays::chunked::array::CHUNK_OFFSETS_SLOT; @@ -156,6 +157,14 @@ impl VTable for Chunked { vortex_panic!("ChunkedArray buffer_name index {idx} out of bounds") } + fn with_buffers( + &self, + array: ArrayView<'_, Self>, + buffers: &[BufferHandle], + ) -> VortexResult> { + with_empty_buffers(self, array, buffers) + } + fn serialize( _array: ArrayView<'_, Self>, _session: &VortexSession, diff --git a/vortex-array/src/arrays/constant/vtable/mod.rs b/vortex-array/src/arrays/constant/vtable/mod.rs index b2996e4985c..d3df3a14b36 100644 --- a/vortex-array/src/arrays/constant/vtable/mod.rs +++ b/vortex-array/src/arrays/constant/vtable/mod.rs @@ -15,6 +15,7 @@ use vortex_session::registry::CachedId; use crate::ArrayEq; use crate::ArrayHash; +use crate::ArrayParts; use crate::ArrayRef; use crate::EqMode; use crate::ExecutionCtx; @@ -24,6 +25,7 @@ use crate::array::Array; use crate::array::ArrayId; use crate::array::ArrayView; use crate::array::VTable; +use crate::array::unsupported_buffer_replacement; use crate::arrays::constant::ConstantData; use crate::arrays::constant::compute::rules::PARENT_RULES; use crate::arrays::constant::vtable::canonical::constant_canonicalize; @@ -109,6 +111,14 @@ impl VTable for Constant { } } + fn with_buffers( + &self, + array: ArrayView<'_, Self>, + buffers: &[BufferHandle], + ) -> VortexResult> { + unsupported_buffer_replacement(array, buffers) + } + fn slot_name(_array: ArrayView<'_, Self>, idx: usize) -> String { vortex_panic!("ConstantArray slot_name index {idx} out of bounds") } @@ -131,7 +141,7 @@ impl VTable for Constant { buffers: &[BufferHandle], _children: &dyn ArrayChildren, session: &VortexSession, - ) -> VortexResult> { + ) -> VortexResult> { vortex_ensure!( buffers.len() == 1, "Expected 1 buffer, got {}", @@ -144,7 +154,7 @@ impl VTable for Constant { let scalar_value = ScalarValue::from_proto_bytes(bytes, dtype, session)?; let scalar = Scalar::try_new(dtype.clone(), scalar_value)?; - Ok(crate::array::ArrayParts::new( + Ok(ArrayParts::new( self.clone(), dtype.clone(), len, @@ -302,6 +312,23 @@ mod tests { assert_append_matches_canonical(ConstantArray::new(Scalar::null(DType::Null), 5)) } + #[test] + fn test_with_buffers_rejects_serialized_scalar_buffer() { + let array = + ConstantArray::new(Scalar::primitive(42i32, Nullability::NonNullable), 3).into_array(); + let buffers = array.buffer_handles(); + + // SAFETY: the replacement buffers are the array's existing buffers, so the logical values + // would be unchanged if the encoding supported buffer replacement. + let Err(err) = (unsafe { array.with_buffers(buffers) }) else { + panic!("ConstantArray should reject replacing its serialized scalar buffer"); + }; + assert!( + err.to_string() + .contains("does not support in-memory buffer replacement") + ); + } + #[rstest] #[case::bool_true(true, 5)] #[case::bool_false(false, 3)] diff --git a/vortex-array/src/arrays/decimal/vtable/mod.rs b/vortex-array/src/arrays/decimal/vtable/mod.rs index a52f2882238..674f8598256 100644 --- a/vortex-array/src/arrays/decimal/vtable/mod.rs +++ b/vortex-array/src/arrays/decimal/vtable/mod.rs @@ -11,6 +11,7 @@ use vortex_error::vortex_ensure; use vortex_error::vortex_panic; use vortex_session::VortexSession; +use crate::ArrayParts; use crate::ArrayRef; use crate::ExecutionCtx; use crate::ExecutionResult; @@ -95,6 +96,24 @@ impl VTable for Decimal { } } + fn with_buffers( + &self, + array: ArrayView<'_, Self>, + buffers: &[BufferHandle], + ) -> VortexResult> { + vortex_ensure!( + buffers.len() == 1, + "Expected 1 buffer, got {}", + buffers.len() + ); + let mut data = array.data().clone(); + data.values = buffers[0].clone(); + Ok( + ArrayParts::new(self.clone(), array.dtype().clone(), array.len(), data) + .with_slots(array.slots().iter().cloned().collect()), + ) + } + fn serialize( array: ArrayView<'_, Self>, _session: &VortexSession, @@ -147,7 +166,7 @@ impl VTable for Decimal { buffers: &[BufferHandle], children: &dyn ArrayChildren, _session: &VortexSession, - ) -> VortexResult> { + ) -> VortexResult> { let metadata = DecimalMetadata::decode(metadata)?; if buffers.len() != 1 { vortex_bail!("Expected 1 buffer, got {}", buffers.len()); @@ -177,7 +196,7 @@ impl VTable for Decimal { ); DecimalData::try_new_handle(values, metadata.values_type(), *decimal_dtype) })?; - Ok(crate::array::ArrayParts::new(self.clone(), dtype.clone(), len, data).with_slots(slots)) + Ok(ArrayParts::new(self.clone(), dtype.clone(), len, data).with_slots(slots)) } fn slot_name(_array: ArrayView<'_, Self>, idx: usize) -> String { diff --git a/vortex-array/src/arrays/dict/vtable/mod.rs b/vortex-array/src/arrays/dict/vtable/mod.rs index cd2ef72a7ee..d9ced9eeffa 100644 --- a/vortex-array/src/arrays/dict/vtable/mod.rs +++ b/vortex-array/src/arrays/dict/vtable/mod.rs @@ -31,6 +31,7 @@ use crate::array::ArrayId; use crate::array::ArrayParts; use crate::array::ArrayView; use crate::array::VTable; +use crate::array::with_empty_buffers; use crate::arrays::ConstantArray; use crate::arrays::Primitive; use crate::arrays::dict::DictArrayExt; @@ -116,6 +117,14 @@ impl VTable for Dict { None } + fn with_buffers( + &self, + array: ArrayView<'_, Self>, + buffers: &[BufferHandle], + ) -> VortexResult> { + with_empty_buffers(self, array, buffers) + } + fn serialize( array: ArrayView<'_, Self>, _session: &VortexSession, diff --git a/vortex-array/src/arrays/extension/vtable/mod.rs b/vortex-array/src/arrays/extension/vtable/mod.rs index a1fab1cd47a..c27e70476dc 100644 --- a/vortex-array/src/arrays/extension/vtable/mod.rs +++ b/vortex-array/src/arrays/extension/vtable/mod.rs @@ -21,6 +21,7 @@ use crate::array::ArrayParts; use crate::array::ArrayView; use crate::array::VTable; use crate::array::ValidityVTableFromChild; +use crate::array::with_empty_buffers; use crate::arrays::extension::array::SLOT_NAMES; use crate::arrays::extension::array::STORAGE_SLOT; use crate::arrays::extension::compute::rules::PARENT_RULES; @@ -135,6 +136,14 @@ impl VTable for Extension { None } + fn with_buffers( + &self, + array: ArrayView<'_, Self>, + buffers: &[BufferHandle], + ) -> VortexResult> { + with_empty_buffers(self, array, buffers) + } + fn serialize( _array: ArrayView<'_, Self>, _session: &VortexSession, diff --git a/vortex-array/src/arrays/filter/vtable.rs b/vortex-array/src/arrays/filter/vtable.rs index af11393c5db..0f54a97c574 100644 --- a/vortex-array/src/arrays/filter/vtable.rs +++ b/vortex-array/src/arrays/filter/vtable.rs @@ -16,6 +16,7 @@ use vortex_session::registry::CachedId; use crate::AnyCanonical; use crate::ArrayEq; use crate::ArrayHash; +use crate::ArrayParts; use crate::ArrayRef; use crate::Canonical; use crate::EqMode; @@ -26,6 +27,7 @@ use crate::array::ArrayView; use crate::array::OperationsVTable; use crate::array::VTable; use crate::array::ValidityVTable; +use crate::array::with_empty_buffers; use crate::arrays::filter::FilterArrayExt; use crate::arrays::filter::array::CHILD_SLOT; use crate::arrays::filter::array::FilterData; @@ -117,6 +119,14 @@ impl VTable for Filter { None } + fn with_buffers( + &self, + array: ArrayView<'_, Self>, + buffers: &[BufferHandle], + ) -> VortexResult> { + with_empty_buffers(self, array, buffers) + } + fn slot_name(_array: ArrayView<'_, Self>, idx: usize) -> String { SLOT_NAMES[idx].to_string() } @@ -138,7 +148,7 @@ impl VTable for Filter { _buffers: &[BufferHandle], _children: &dyn ArrayChildren, _session: &VortexSession, - ) -> VortexResult> { + ) -> VortexResult> { vortex_bail!("Filter array is not serializable") } diff --git a/vortex-array/src/arrays/fixed_size_list/vtable/mod.rs b/vortex-array/src/arrays/fixed_size_list/vtable/mod.rs index bfc172a5049..8f6fef6a909 100644 --- a/vortex-array/src/arrays/fixed_size_list/vtable/mod.rs +++ b/vortex-array/src/arrays/fixed_size_list/vtable/mod.rs @@ -15,6 +15,7 @@ use vortex_session::registry::CachedId; use crate::ArrayEq; use crate::ArrayHash; +use crate::ArrayParts; use crate::ArrayRef; use crate::EqMode; use crate::ExecutionCtx; @@ -23,6 +24,7 @@ use crate::array::Array; use crate::array::ArrayId; use crate::array::ArrayView; use crate::array::VTable; +use crate::array::with_empty_buffers; use crate::arrays::fixed_size_list::FixedSizeListData; use crate::arrays::fixed_size_list::array::ELEMENTS_SLOT; use crate::arrays::fixed_size_list::array::NUM_SLOTS; @@ -80,6 +82,14 @@ impl VTable for FixedSizeList { vortex_panic!("FixedSizeListArray buffer_name index {idx} out of bounds") } + fn with_buffers( + &self, + array: ArrayView<'_, Self>, + buffers: &[BufferHandle], + ) -> VortexResult> { + with_empty_buffers(self, array, buffers) + } + fn reduce_parent( array: ArrayView<'_, Self>, parent: &ArrayRef, @@ -145,7 +155,7 @@ impl VTable for FixedSizeList { buffers: &[BufferHandle], children: &dyn ArrayChildren, _session: &VortexSession, - ) -> VortexResult> { + ) -> VortexResult> { if !metadata.is_empty() { vortex_bail!( "FixedSizeListArray expects empty metadata, got {} bytes", @@ -181,7 +191,7 @@ impl VTable for FixedSizeList { let data = FixedSizeListData::try_build(elements.clone(), *list_size, validity.clone(), len)?; let slots = FixedSizeListData::make_slots(&elements, &validity, len); - Ok(crate::array::ArrayParts::new(self.clone(), dtype.clone(), len, data).with_slots(slots)) + Ok(ArrayParts::new(self.clone(), dtype.clone(), len, data).with_slots(slots)) } fn slot_name(_array: ArrayView<'_, Self>, idx: usize) -> String { diff --git a/vortex-array/src/arrays/interleave/mod.rs b/vortex-array/src/arrays/interleave/mod.rs index b480ac142fb..bff03ab055f 100644 --- a/vortex-array/src/arrays/interleave/mod.rs +++ b/vortex-array/src/arrays/interleave/mod.rs @@ -68,6 +68,7 @@ use crate::array::OperationsVTable; use crate::array::TypedArrayRef; use crate::array::VTable; use crate::array::ValidityVTable; +use crate::array::with_empty_buffers; use crate::arrays::ConstantArray; use crate::buffer::BufferHandle; use crate::dtype::DType; @@ -347,6 +348,14 @@ impl VTable for Interleave { None } + fn with_buffers( + &self, + array: ArrayView<'_, Self>, + buffers: &[BufferHandle], + ) -> VortexResult> { + with_empty_buffers(self, array, buffers) + } + fn slot_name(_array: ArrayView<'_, Self>, idx: usize) -> String { match idx { 0 => "array_indices".to_string(), diff --git a/vortex-array/src/arrays/list/vtable/mod.rs b/vortex-array/src/arrays/list/vtable/mod.rs index c71adadd8a0..79316e76be7 100644 --- a/vortex-array/src/arrays/list/vtable/mod.rs +++ b/vortex-array/src/arrays/list/vtable/mod.rs @@ -25,6 +25,7 @@ use crate::array::ArrayId; use crate::array::ArrayParts; use crate::array::ArrayView; use crate::array::VTable; +use crate::array::with_empty_buffers; use crate::arrays::list::ListArrayExt; use crate::arrays::list::ListData; use crate::arrays::list::array::ELEMENTS_SLOT; @@ -84,6 +85,14 @@ impl VTable for List { vortex_panic!("ListArray buffer_name index {idx} out of bounds") } + fn with_buffers( + &self, + array: ArrayView<'_, Self>, + buffers: &[BufferHandle], + ) -> VortexResult> { + with_empty_buffers(self, array, buffers) + } + fn reduce_parent( array: ArrayView<'_, Self>, parent: &ArrayRef, diff --git a/vortex-array/src/arrays/listview/vtable/mod.rs b/vortex-array/src/arrays/listview/vtable/mod.rs index ba5ae42b3ad..f7a7a5486b6 100644 --- a/vortex-array/src/arrays/listview/vtable/mod.rs +++ b/vortex-array/src/arrays/listview/vtable/mod.rs @@ -16,6 +16,7 @@ use vortex_session::registry::CachedId; use crate::ArrayEq; use crate::ArrayHash; +use crate::ArrayParts; use crate::ArrayRef; use crate::EqMode; use crate::ExecutionCtx; @@ -24,6 +25,7 @@ use crate::array::Array; use crate::array::ArrayId; use crate::array::ArrayView; use crate::array::VTable; +use crate::array::with_empty_buffers; use crate::arrays::listview::ListViewArrayExt; use crate::arrays::listview::ListViewData; use crate::arrays::listview::array::ELEMENTS_SLOT; @@ -95,6 +97,14 @@ impl VTable for ListView { vortex_panic!("ListViewArray buffer_name index {idx} out of bounds") } + fn with_buffers( + &self, + array: ArrayView<'_, Self>, + buffers: &[BufferHandle], + ) -> VortexResult> { + with_empty_buffers(self, array, buffers) + } + fn serialize( array: ArrayView<'_, Self>, _session: &VortexSession, @@ -157,7 +167,7 @@ impl VTable for ListView { buffers: &[BufferHandle], children: &dyn ArrayChildren, _session: &VortexSession, - ) -> VortexResult> { + ) -> VortexResult> { let metadata = ListViewMetadata::decode(metadata)?; vortex_ensure!( buffers.is_empty(), @@ -204,7 +214,7 @@ impl VTable for ListView { ListViewData::validate(&elements, &offsets, &sizes, &validity)?; let data = ListViewData::try_new()?; let slots = ListViewData::make_slots(&elements, &offsets, &sizes, &validity, len); - Ok(crate::array::ArrayParts::new(self.clone(), dtype.clone(), len, data).with_slots(slots)) + Ok(ArrayParts::new(self.clone(), dtype.clone(), len, data).with_slots(slots)) } fn slot_name(_array: ArrayView<'_, Self>, idx: usize) -> String { diff --git a/vortex-array/src/arrays/masked/vtable/mod.rs b/vortex-array/src/arrays/masked/vtable/mod.rs index 70f7f566e41..64d39f63531 100644 --- a/vortex-array/src/arrays/masked/vtable/mod.rs +++ b/vortex-array/src/arrays/masked/vtable/mod.rs @@ -18,6 +18,7 @@ use vortex_session::registry::CachedId; use crate::AnyCanonical; use crate::ArrayEq; use crate::ArrayHash; +use crate::ArrayParts; use crate::ArrayRef; use crate::Canonical; use crate::EqMode; @@ -29,6 +30,7 @@ use crate::array::ArrayId; use crate::array::ArrayView; use crate::array::VTable; use crate::array::validity_to_child; +use crate::array::with_empty_buffers; use crate::arrays::ConstantArray; use crate::arrays::masked::MaskedArrayExt; use crate::arrays::masked::MaskedArraySlotsExt; @@ -105,6 +107,14 @@ impl VTable for Masked { None } + fn with_buffers( + &self, + array: ArrayView<'_, Self>, + buffers: &[BufferHandle], + ) -> VortexResult> { + with_empty_buffers(self, array, buffers) + } + fn serialize( _array: ArrayView<'_, Self>, _session: &VortexSession, @@ -121,7 +131,7 @@ impl VTable for Masked { buffers: &[BufferHandle], children: &dyn ArrayChildren, _session: &VortexSession, - ) -> VortexResult> { + ) -> VortexResult> { if !metadata.is_empty() { vortex_bail!( "MaskedArray expects empty metadata, got {} bytes", @@ -153,10 +163,8 @@ impl VTable for Masked { child.all_valid(&mut LEGACY_SESSION.create_execution_ctx())?, validity, )?; - Ok( - crate::array::ArrayParts::new(self.clone(), dtype.clone(), len, data) - .with_slots(smallvec![Some(child), validity_slot]), - ) + Ok(ArrayParts::new(self.clone(), dtype.clone(), len, data) + .with_slots(smallvec![Some(child), validity_slot])) } fn execute(array: Array, ctx: &mut ExecutionCtx) -> VortexResult { diff --git a/vortex-array/src/arrays/null/mod.rs b/vortex-array/src/arrays/null/mod.rs index 78fe191b6a0..9c8c8e41892 100644 --- a/vortex-array/src/arrays/null/mod.rs +++ b/vortex-array/src/arrays/null/mod.rs @@ -18,6 +18,7 @@ use crate::array::EmptyArrayData; use crate::array::OperationsVTable; use crate::array::VTable; use crate::array::ValidityVTable; +use crate::array::with_empty_buffers; use crate::arrays::null::compute::rules::PARENT_RULES; use crate::buffer::BufferHandle; use crate::dtype::DType; @@ -64,6 +65,14 @@ impl VTable for Null { None } + fn with_buffers( + &self, + array: ArrayView<'_, Self>, + buffers: &[BufferHandle], + ) -> VortexResult> { + with_empty_buffers(self, array, buffers) + } + fn slot_name(_array: ArrayView<'_, Self>, idx: usize) -> String { vortex_panic!("NullArray slot_name index {idx} out of bounds") } diff --git a/vortex-array/src/arrays/patched/vtable/mod.rs b/vortex-array/src/arrays/patched/vtable/mod.rs index dbcb4c220dc..bbe592785ec 100644 --- a/vortex-array/src/arrays/patched/vtable/mod.rs +++ b/vortex-array/src/arrays/patched/vtable/mod.rs @@ -32,6 +32,7 @@ use crate::array::ArrayView; use crate::array::VTable; use crate::array::ValidityChild; use crate::array::ValidityVTableFromChild; +use crate::array::with_empty_buffers; use crate::arrays::Primitive; use crate::arrays::PrimitiveArray; use crate::arrays::patched::PatchedArrayExt; @@ -129,6 +130,14 @@ impl VTable for Patched { vortex_panic!("invalid buffer index for PatchedArray: {idx}"); } + fn with_buffers( + &self, + array: ArrayView<'_, Self>, + buffers: &[BufferHandle], + ) -> VortexResult> { + with_empty_buffers(self, array, buffers) + } + fn child(array: ArrayView<'_, Self>, idx: usize) -> ArrayRef { match idx { PatchedSlots::INNER => array.inner().clone(), @@ -350,7 +359,9 @@ mod tests { use vortex_error::VortexResult; use vortex_session::registry::ReadContext; + use crate::Array; use crate::ArrayContext; + use crate::ArrayParts; use crate::ArraySlots; use crate::Canonical; use crate::IntoArray; @@ -359,7 +370,9 @@ mod tests { use crate::arrays::Patched; use crate::arrays::PatchedArray; use crate::arrays::PrimitiveArray; + use crate::arrays::patched::PatchedArrayExt; use crate::arrays::patched::PatchedArraySlotsExt; + use crate::arrays::patched::PatchedData; use crate::arrays::patched::PatchedSlots; use crate::arrays::patched::PatchedSlotsView; use crate::assert_arrays_eq; @@ -628,7 +641,9 @@ mod tests { // Create new PatchedArray with same children using with_slots let array_ref = array.into_array(); - let new_array = array_ref.clone().with_slots(slots.into_slots())?; + // SAFETY: the replacement slots are the original children, preserving logical values and + // parent statistics. + let new_array = unsafe { array_ref.clone().with_slots(slots.into_slots()) }?; assert!(new_array.is::()); assert_eq!(array_ref.len(), new_array.len()); @@ -645,7 +660,7 @@ mod tests { } #[test] - fn test_with_slots_modified_inner() -> VortexResult<()> { + fn test_rebuild_modified_inner_from_parts() -> VortexResult<()> { let array = make_patched_array(vec![0u16; 10], &[1, 2, 3], &[10, 20, 30])?; // Create a different inner array (all 5s instead of 0s) @@ -657,8 +672,15 @@ mod tests { patch_values: array.patch_values().clone(), }; - let array_ref = array.into_array(); - let new_array = array_ref.with_slots(slots.into_slots())?; + let data = PatchedData { + n_lanes: array.n_lanes(), + offset: array.offset(), + }; + let new_array = Array::try_from_parts( + ArrayParts::new(Patched, array.dtype().clone(), array.len(), data) + .with_slots(slots.into_slots()), + )? + .into_array(); // Execute and verify the inner values changed (except at patch positions) let mut ctx = array_session().create_execution_ctx(); diff --git a/vortex-array/src/arrays/primitive/vtable/mod.rs b/vortex-array/src/arrays/primitive/vtable/mod.rs index b5eba2cb1a0..a184253527e 100644 --- a/vortex-array/src/arrays/primitive/vtable/mod.rs +++ b/vortex-array/src/arrays/primitive/vtable/mod.rs @@ -6,6 +6,7 @@ use vortex_error::vortex_bail; use vortex_error::vortex_ensure; use vortex_error::vortex_panic; +use crate::ArrayParts; use crate::ArrayRef; use crate::ExecutionCtx; use crate::ExecutionResult; @@ -86,6 +87,24 @@ impl VTable for Primitive { } } + fn with_buffers( + &self, + array: ArrayView<'_, Self>, + buffers: &[BufferHandle], + ) -> VortexResult> { + vortex_ensure!( + buffers.len() == 1, + "Expected 1 buffer, got {}", + buffers.len() + ); + let mut data = array.data().clone(); + data.buffer = buffers[0].clone(); + Ok( + ArrayParts::new(self.clone(), array.dtype().clone(), array.len(), data) + .with_slots(array.slots().iter().cloned().collect()), + ) + } + fn serialize( _array: ArrayView<'_, Self>, _session: &VortexSession, @@ -131,7 +150,7 @@ impl VTable for Primitive { buffers: &[BufferHandle], children: &dyn ArrayChildren, _session: &VortexSession, - ) -> VortexResult> { + ) -> VortexResult> { if !metadata.is_empty() { vortex_bail!( "PrimitiveArray expects empty metadata, got {} bytes", @@ -179,7 +198,7 @@ impl VTable for Primitive { // SAFETY: checked ahead of time let slots = PrimitiveData::make_slots(&validity, len); let data = unsafe { PrimitiveData::new_unchecked_from_handle(buffer, ptype, validity) }; - Ok(crate::array::ArrayParts::new(self.clone(), dtype.clone(), len, data).with_slots(slots)) + Ok(ArrayParts::new(self.clone(), dtype.clone(), len, data).with_slots(slots)) } fn slot_name(_array: ArrayView<'_, Self>, idx: usize) -> String { @@ -221,6 +240,7 @@ pub struct Primitive; mod tests { use vortex_buffer::ByteBufferMut; use vortex_buffer::buffer; + use vortex_error::VortexResult; use vortex_session::registry::ReadContext; use crate::ArrayContext; @@ -229,6 +249,7 @@ mod tests { use crate::array_session; use crate::arrays::PrimitiveArray; use crate::assert_arrays_eq; + use crate::buffer::BufferHandle; use crate::serde::SerializeOptions; use crate::serde::SerializedArray; use crate::validity::Validity; @@ -262,4 +283,30 @@ mod tests { assert_arrays_eq!(decoded, array, &mut ctx); } + + #[test] + fn test_with_buffers_replaces_primitive_buffer_with_equivalent_contents() -> VortexResult<()> { + let session = array_session(); + let mut ctx = session.create_execution_ctx(); + + let array = PrimitiveArray::from_iter([1i32, 2, 3, 4]).into_array(); + let replacement = BufferHandle::new_host(buffer![1i32, 2, 3, 4].into_byte_buffer()); + // SAFETY: the replacement buffer contains the same logical values as the original array; + // only the buffer handle changes. + let rewritten = unsafe { array.with_buffers([replacement]) }?; + let expected = PrimitiveArray::from_iter([1i32, 2, 3, 4]); + + assert_arrays_eq!(rewritten, expected, &mut ctx); + Ok(()) + } + + #[test] + fn test_with_buffers_rejects_length_change() { + let array = PrimitiveArray::from_iter([1i32, 2, 3, 4]).into_array(); + let replacement = BufferHandle::new_host(buffer![10i32, 20, 30].into_byte_buffer()); + + // SAFETY: this call is expected to fail the checked buffer length invariant before any + // rewritten array is returned or observed. + assert!(unsafe { array.with_buffers([replacement]) }.is_err()); + } } diff --git a/vortex-array/src/arrays/scalar_fn/vtable/mod.rs b/vortex-array/src/arrays/scalar_fn/vtable/mod.rs index 6833848deae..d98e00975ea 100644 --- a/vortex-array/src/arrays/scalar_fn/vtable/mod.rs +++ b/vortex-array/src/arrays/scalar_fn/vtable/mod.rs @@ -28,6 +28,7 @@ use crate::array::ArrayId; use crate::array::ArrayParts; use crate::array::ArrayView; use crate::array::VTable; +use crate::array::with_empty_buffers; use crate::arrays::scalar_fn::array::ScalarFnArrayExt; use crate::arrays::scalar_fn::array::ScalarFnData; use crate::arrays::scalar_fn::rules::PARENT_RULES; @@ -116,6 +117,14 @@ impl VTable for ScalarFn { None } + fn with_buffers( + &self, + array: ArrayView<'_, Self>, + buffers: &[BufferHandle], + ) -> VortexResult> { + with_empty_buffers(self, array, buffers) + } + fn serialize( _array: ArrayView<'_, Self>, _session: &VortexSession, diff --git a/vortex-array/src/arrays/shared/vtable.rs b/vortex-array/src/arrays/shared/vtable.rs index 3c3a09216d2..fb2c67d1d65 100644 --- a/vortex-array/src/arrays/shared/vtable.rs +++ b/vortex-array/src/arrays/shared/vtable.rs @@ -11,6 +11,7 @@ use vortex_session::registry::CachedId; use crate::ArrayEq; use crate::ArrayHash; +use crate::ArrayParts; use crate::ArrayRef; use crate::Canonical; use crate::EqMode; @@ -22,6 +23,7 @@ use crate::array::ArrayView; use crate::array::OperationsVTable; use crate::array::VTable; use crate::array::ValidityVTable; +use crate::array::with_empty_buffers; use crate::arrays::shared::SharedArrayExt; use crate::arrays::shared::SharedData; use crate::arrays::shared::array::SLOT_NAMES; @@ -84,6 +86,14 @@ impl VTable for Shared { None } + fn with_buffers( + &self, + array: ArrayView<'_, Self>, + buffers: &[BufferHandle], + ) -> VortexResult> { + with_empty_buffers(self, array, buffers) + } + fn slot_name(_array: ArrayView<'_, Self>, idx: usize) -> String { SLOT_NAMES[idx].to_string() } @@ -104,7 +114,7 @@ impl VTable for Shared { _buffers: &[BufferHandle], _children: &dyn crate::serde::ArrayChildren, _session: &VortexSession, - ) -> VortexResult> { + ) -> VortexResult> { vortex_error::vortex_bail!("Shared array is not serializable") } diff --git a/vortex-array/src/arrays/slice/vtable.rs b/vortex-array/src/arrays/slice/vtable.rs index ba716bee024..1ff2e5bc84d 100644 --- a/vortex-array/src/arrays/slice/vtable.rs +++ b/vortex-array/src/arrays/slice/vtable.rs @@ -18,6 +18,7 @@ use vortex_session::registry::CachedId; use crate::AnyCanonical; use crate::ArrayEq; use crate::ArrayHash; +use crate::ArrayParts; use crate::ArrayRef; use crate::EqMode; use crate::array::Array; @@ -26,6 +27,7 @@ use crate::array::ArrayView; use crate::array::OperationsVTable; use crate::array::VTable; use crate::array::ValidityVTable; +use crate::array::with_empty_buffers; use crate::arrays::slice::SliceArrayExt; use crate::arrays::slice::array::CHILD_SLOT; use crate::arrays::slice::array::SLOT_NAMES; @@ -115,6 +117,14 @@ impl VTable for Slice { None } + fn with_buffers( + &self, + array: ArrayView<'_, Self>, + buffers: &[BufferHandle], + ) -> VortexResult> { + with_empty_buffers(self, array, buffers) + } + fn slot_name(_array: ArrayView<'_, Self>, idx: usize) -> String { SLOT_NAMES[idx].to_string() } @@ -136,7 +146,7 @@ impl VTable for Slice { _buffers: &[BufferHandle], _children: &dyn ArrayChildren, _session: &VortexSession, - ) -> VortexResult> { + ) -> VortexResult> { vortex_bail!("Slice array is not serializable") } diff --git a/vortex-array/src/arrays/struct_/vtable/mod.rs b/vortex-array/src/arrays/struct_/vtable/mod.rs index df9b1170b02..1b23ecf804c 100644 --- a/vortex-array/src/arrays/struct_/vtable/mod.rs +++ b/vortex-array/src/arrays/struct_/vtable/mod.rs @@ -17,6 +17,7 @@ use crate::array::ArrayView; use crate::array::EmptyArrayData; use crate::array::VTable; use crate::array::child_to_validity; +use crate::array::with_empty_buffers; use crate::arrays::struct_::array::FIELDS_OFFSET; use crate::arrays::struct_::array::VALIDITY_SLOT; use crate::arrays::struct_::array::make_struct_slots; @@ -122,6 +123,14 @@ impl VTable for Struct { vortex_panic!("StructArray buffer_name index {idx} out of bounds") } + fn with_buffers( + &self, + array: ArrayView<'_, Self>, + buffers: &[BufferHandle], + ) -> VortexResult> { + with_empty_buffers(self, array, buffers) + } + fn serialize( _array: ArrayView<'_, Self>, _session: &VortexSession, diff --git a/vortex-array/src/arrays/varbin/vtable/mod.rs b/vortex-array/src/arrays/varbin/vtable/mod.rs index 90dcf320427..301fdebc084 100644 --- a/vortex-array/src/arrays/varbin/vtable/mod.rs +++ b/vortex-array/src/arrays/varbin/vtable/mod.rs @@ -11,6 +11,7 @@ use vortex_error::vortex_ensure; use vortex_error::vortex_panic; use vortex_session::registry::CachedId; +use crate::ArrayParts; use crate::ArrayRef; use crate::ExecutionCtx; use crate::ExecutionResult; @@ -124,6 +125,24 @@ impl VTable for VarBin { } } + fn with_buffers( + &self, + array: ArrayView<'_, Self>, + buffers: &[BufferHandle], + ) -> VortexResult> { + vortex_ensure!( + buffers.len() == 1, + "Expected 1 buffer, got {}", + buffers.len() + ); + let mut data = array.data().clone(); + data.bytes = buffers[0].clone(); + Ok( + ArrayParts::new(self.clone(), array.dtype().clone(), array.len(), data) + .with_slots(array.slots().iter().cloned().collect()), + ) + } + fn serialize( array: ArrayView<'_, Self>, _session: &VortexSession, @@ -145,7 +164,7 @@ impl VTable for VarBin { buffers: &[BufferHandle], children: &dyn ArrayChildren, _session: &VortexSession, - ) -> VortexResult> { + ) -> VortexResult> { let metadata = VarBinMetadata::decode(metadata)?; let validity = if children.len() == 1 { Validity::from(dtype.nullability()) @@ -169,7 +188,7 @@ impl VTable for VarBin { let data = VarBinData::try_build(offsets.clone(), bytes, dtype.clone(), validity.clone())?; let slots = VarBinData::make_slots(offsets, &validity, len); - Ok(crate::array::ArrayParts::new(self.clone(), dtype.clone(), len, data).with_slots(slots)) + Ok(ArrayParts::new(self.clone(), dtype.clone(), len, data).with_slots(slots)) } fn slot_name(_array: ArrayView<'_, Self>, idx: usize) -> String { diff --git a/vortex-array/src/arrays/varbinview/vtable/mod.rs b/vortex-array/src/arrays/varbinview/vtable/mod.rs index 386625adcf1..9335ed092ca 100644 --- a/vortex-array/src/arrays/varbinview/vtable/mod.rs +++ b/vortex-array/src/arrays/varbinview/vtable/mod.rs @@ -14,6 +14,7 @@ use vortex_error::vortex_panic; use vortex_session::VortexSession; use vortex_session::registry::CachedId; +use crate::ArrayParts; use crate::ArrayRef; use crate::EqMode; use crate::ExecutionCtx; @@ -131,6 +132,26 @@ impl VTable for VarBinView { } } + fn with_buffers( + &self, + array: ArrayView<'_, Self>, + buffers: &[BufferHandle], + ) -> VortexResult> { + let Some((views, data_buffers)) = buffers.split_last() else { + vortex_bail!("Expected at least 1 buffer, got 0"); + }; + let data = VarBinViewData::try_new_handle( + views.clone(), + Arc::from(data_buffers.to_vec()), + array.dtype().clone(), + array.validity()?, + )?; + Ok( + ArrayParts::new(self.clone(), array.dtype().clone(), array.len(), data) + .with_slots(array.slots().iter().cloned().collect()), + ) + } + fn serialize( _array: ArrayView<'_, Self>, _session: &VortexSession, @@ -147,7 +168,7 @@ impl VTable for VarBinView { buffers: &[BufferHandle], children: &dyn ArrayChildren, _session: &VortexSession, - ) -> VortexResult> { + ) -> VortexResult> { if !metadata.is_empty() { vortex_bail!( "VarBinViewArray expects empty metadata, got {} bytes", @@ -188,10 +209,7 @@ impl VTable for VarBinView { validity.clone(), )?; let slots = VarBinViewData::make_slots(&validity, len); - return Ok( - crate::array::ArrayParts::new(self.clone(), dtype.clone(), len, data) - .with_slots(slots), - ); + return Ok(ArrayParts::new(self.clone(), dtype.clone(), len, data).with_slots(slots)); } let data_buffers = data_handles @@ -207,7 +225,7 @@ impl VTable for VarBinView { validity.clone(), )?; let slots = VarBinViewData::make_slots(&validity, len); - Ok(crate::array::ArrayParts::new(self.clone(), dtype.clone(), len, data).with_slots(slots)) + Ok(ArrayParts::new(self.clone(), dtype.clone(), len, data).with_slots(slots)) } fn slot_name(_array: ArrayView<'_, Self>, idx: usize) -> String { diff --git a/vortex-array/src/arrays/variant/vtable/mod.rs b/vortex-array/src/arrays/variant/vtable/mod.rs index 19ade687875..4d84511ae6a 100644 --- a/vortex-array/src/arrays/variant/vtable/mod.rs +++ b/vortex-array/src/arrays/variant/vtable/mod.rs @@ -24,6 +24,7 @@ use crate::array::ArrayParts; use crate::array::ArrayView; use crate::array::EmptyArrayData; use crate::array::VTable; +use crate::array::with_empty_buffers; use crate::arrays::variant::CORE_STORAGE_SLOT; use crate::arrays::variant::NUM_SLOTS; use crate::arrays::variant::SHREDDED_SLOT; @@ -125,6 +126,14 @@ impl VTable for Variant { None } + fn with_buffers( + &self, + array: ArrayView<'_, Self>, + buffers: &[BufferHandle], + ) -> VortexResult> { + with_empty_buffers(self, array, buffers) + } + fn serialize( array: ArrayView<'_, Self>, _session: &VortexSession, diff --git a/vortex-array/src/canonical.rs b/vortex-array/src/canonical.rs index 28953a93e4e..02029fc8e3e 100644 --- a/vortex-array/src/canonical.rs +++ b/vortex-array/src/canonical.rs @@ -711,7 +711,9 @@ fn recursively_canonicalize_slots( .transpose() }) .collect::>()?; - array.clone().with_slots(slots) + // SAFETY: recursive canonicalization rewrites child slots to equivalent canonical + // representations, preserving the parent array's logical values and statistics. + unsafe { array.clone().with_slots(slots) } } impl Executable for RecursiveCanonical { fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult { diff --git a/vortex-array/src/executor.rs b/vortex-array/src/executor.rs index 0c083f18ee5..cf5607152a0 100644 --- a/vortex-array/src/executor.rs +++ b/vortex-array/src/executor.rs @@ -481,7 +481,9 @@ impl Executable for ArrayRef { ExecutionStep::ExecuteSlot(i, _) => { let child = array.slots()[i].clone().vortex_expect("valid slot index"); let executed_child = child.execute::(ctx)?; - array.with_slot(i, executed_child) + // SAFETY: execution of a child slot produces a logically equivalent array in a + // different physical representation, preserving parent values and statistics. + unsafe { array.with_slot(i, executed_child) } } ExecutionStep::AppendChild(_) => { // Single-step: build the entire parent via the builder path. diff --git a/vortex-array/src/normalize.rs b/vortex-array/src/normalize.rs index 7957adc5297..99a08c1ec1d 100644 --- a/vortex-array/src/normalize.rs +++ b/vortex-array/src/normalize.rs @@ -84,7 +84,9 @@ impl ArrayRef { } if any_slot_changed { - normalized = normalized.with_slots(normalized_slots)?; + // SAFETY: normalization only rewrites child slots to logically equivalent allowed + // encodings, preserving parent logical values and statistics. + normalized = unsafe { normalized.with_slots(normalized_slots) }?; } Ok(normalized) diff --git a/vortex-array/src/optimizer/mod.rs b/vortex-array/src/optimizer/mod.rs index 77a337098be..6f7ae931458 100644 --- a/vortex-array/src/optimizer/mod.rs +++ b/vortex-array/src/optimizer/mod.rs @@ -156,7 +156,9 @@ fn try_optimize_recursive( } if any_slot_optimized { - current_array = current_array.with_slots(new_slots)?; + // SAFETY: optimizer rules only replace child slots with logically equivalent arrays, so + // parent logical values and statistics remain valid. + current_array = unsafe { current_array.with_slots(new_slots) }?; any_optimizations = true; } diff --git a/vortex-compressor/src/compressor.rs b/vortex-compressor/src/compressor.rs index e66ffb053e7..965c719bf4b 100644 --- a/vortex-compressor/src/compressor.rs +++ b/vortex-compressor/src/compressor.rs @@ -583,7 +583,9 @@ impl CascadingCompressor { }) .collect::>()?; - array.clone().with_slots(slots) + // SAFETY: compression rewrites each child slot to an equivalent physical representation, + // preserving the parent array's logical values and statistics. + unsafe { array.clone().with_slots(slots) } } } diff --git a/vortex-python/src/arrays/py/vtable.rs b/vortex-python/src/arrays/py/vtable.rs index f8c8f44d7f8..91eb97d1dff 100644 --- a/vortex-python/src/arrays/py/vtable.rs +++ b/vortex-python/src/arrays/py/vtable.rs @@ -20,6 +20,7 @@ use vortex::array::ValidityVTable; use vortex::array::buffer::BufferHandle; use vortex::array::serde::ArrayChildren; use vortex::array::validity::Validity; +use vortex::array::with_empty_buffers; use vortex::dtype::DType; use vortex::error::VortexResult; use vortex::error::vortex_bail; @@ -83,6 +84,14 @@ impl VTable for PythonVTable { None } + fn with_buffers( + &self, + array: ArrayView<'_, Self>, + buffers: &[BufferHandle], + ) -> VortexResult> { + with_empty_buffers(self, array, buffers) + } + fn nchildren(_array: ArrayView<'_, Self>) -> usize { 0 }