From 024aad57082f62741d38013dbabd8d973d430653 Mon Sep 17 00:00:00 2001 From: Connor Tsui Date: Tue, 4 Nov 2025 11:37:08 -0500 Subject: [PATCH 1/4] add scaffolding for ListVector Signed-off-by: Connor Tsui --- vortex-compute/src/arrow/list.rs | 14 ++ vortex-compute/src/arrow/mod.rs | 1 + vortex-compute/src/mask/mod.rs | 34 ++-- vortex-vector/src/fixed_size_list/vector.rs | 11 +- vortex-vector/src/lib.rs | 1 + vortex-vector/src/listview/mod.rs | 31 +++ vortex-vector/src/listview/vector.rs | 212 ++++++++++++++++++++ vortex-vector/src/listview/vector_mut.rs | 192 ++++++++++++++++++ vortex-vector/src/macros.rs | 4 + vortex-vector/src/private.rs | 4 + vortex-vector/src/vector.rs | 21 +- vortex-vector/src/vector_mut.rs | 21 +- 12 files changed, 527 insertions(+), 19 deletions(-) create mode 100644 vortex-compute/src/arrow/list.rs create mode 100644 vortex-vector/src/listview/mod.rs create mode 100644 vortex-vector/src/listview/vector.rs create mode 100644 vortex-vector/src/listview/vector_mut.rs diff --git a/vortex-compute/src/arrow/list.rs b/vortex-compute/src/arrow/list.rs new file mode 100644 index 00000000000..a28400b759b --- /dev/null +++ b/vortex-compute/src/arrow/list.rs @@ -0,0 +1,14 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use arrow_array::ArrayRef; +use vortex_error::VortexResult; +use vortex_vector::listview::ListViewVector; + +use crate::arrow::IntoArrow; + +impl IntoArrow for ListViewVector { + fn into_arrow(self) -> VortexResult { + todo!("Figure out how to do this") + } +} diff --git a/vortex-compute/src/arrow/mod.rs b/vortex-compute/src/arrow/mod.rs index f3b67b96b1a..cca88e183ec 100644 --- a/vortex-compute/src/arrow/mod.rs +++ b/vortex-compute/src/arrow/mod.rs @@ -9,6 +9,7 @@ mod binaryview; mod bool; mod decimal; mod fixed_size_list; +mod list; mod mask; mod null; mod primitive; diff --git a/vortex-compute/src/mask/mod.rs b/vortex-compute/src/mask/mod.rs index 3fbf41fc874..a5c12f8e9e5 100644 --- a/vortex-compute/src/mask/mod.rs +++ b/vortex-compute/src/mask/mod.rs @@ -11,6 +11,7 @@ use vortex_vector::binaryview::{BinaryViewType, BinaryViewVector}; use vortex_vector::bool::BoolVector; use vortex_vector::decimal::{DVector, DecimalVector}; use vortex_vector::fixed_size_list::FixedSizeListVector; +use vortex_vector::listview::ListViewVector; use vortex_vector::null::NullVector; use vortex_vector::primitive::{PVector, PrimitiveVector}; use vortex_vector::struct_::StructVector; @@ -46,31 +47,31 @@ impl MaskValidity for BoolVector { } } -impl MaskValidity for DecimalVector { +impl MaskValidity for PrimitiveVector { fn mask_validity(self, mask: &Mask) -> Self { - match_each_dvector!(self, |v| { MaskValidity::mask_validity(v, mask).into() }) + match_each_pvector!(self, |v| { MaskValidity::mask_validity(v, mask).into() }) } } -impl MaskValidity for DVector { +impl MaskValidity for PVector { fn mask_validity(self, mask: &Mask) -> Self { - let (ps, elements, validity) = self.into_parts(); - // SAFETY: we are preserving the original elements buffer and only modifying the validity. - unsafe { Self::new_unchecked(ps, elements, validity.bitand(mask)) } + let (data, validity) = self.into_parts(); + // SAFETY: we are preserving the original data buffer and only modifying the validity. + unsafe { Self::new_unchecked(data, validity.bitand(mask)) } } } -impl MaskValidity for PrimitiveVector { +impl MaskValidity for DecimalVector { fn mask_validity(self, mask: &Mask) -> Self { - match_each_pvector!(self, |v| { MaskValidity::mask_validity(v, mask).into() }) + match_each_dvector!(self, |v| { MaskValidity::mask_validity(v, mask).into() }) } } -impl MaskValidity for PVector { +impl MaskValidity for DVector { fn mask_validity(self, mask: &Mask) -> Self { - let (data, validity) = self.into_parts(); - // SAFETY: we are preserving the original data buffer and only modifying the validity. - unsafe { Self::new_unchecked(data, validity.bitand(mask)) } + let (ps, elements, validity) = self.into_parts(); + // SAFETY: we are preserving the original elements buffer and only modifying the validity. + unsafe { Self::new_unchecked(ps, elements, validity.bitand(mask)) } } } @@ -82,6 +83,15 @@ impl MaskValidity for BinaryViewVector { } } +impl MaskValidity for ListViewVector { + fn mask_validity(self, mask: &Mask) -> Self { + let (elements, offsets, sizes, validity) = self.into_parts(); + // SAFETY: we are preserving the original elements and `list_size`, only modifying the + // validity. + unsafe { Self::new_unchecked(elements, offsets, sizes, validity.bitand(mask)) } + } +} + impl MaskValidity for FixedSizeListVector { fn mask_validity(self, mask: &Mask) -> Self { let (elements, list_size, validity) = self.into_parts(); diff --git a/vortex-vector/src/fixed_size_list/vector.rs b/vortex-vector/src/fixed_size_list/vector.rs index 46c3fae3e11..2d4844e62bf 100644 --- a/vortex-vector/src/fixed_size_list/vector.rs +++ b/vortex-vector/src/fixed_size_list/vector.rs @@ -149,22 +149,24 @@ impl VectorOps for FixedSizeListVector { &self.validity } - fn try_into_mut(self) -> Result + fn try_into_mut(self) -> Result where Self: Sized, { let len = self.len; let list_size = self.list_size; + // Try to unwrap the `Arc`. let elements = match Arc::try_unwrap(self.elements) { Ok(elements) => elements, - Err(elements) => return Err(FixedSizeListVector { elements, ..self }), + Err(elements) => return Err(Self { elements, ..self }), }; + // Try to make validity mutable. let validity = match self.validity.try_into_mut() { Ok(validity) => validity, Err(validity) => { - return Err(FixedSizeListVector { + return Err(Self { elements: Arc::new(elements), list_size, validity, @@ -173,6 +175,7 @@ impl VectorOps for FixedSizeListVector { } }; + // Try to make the elements mutable. match elements.try_into_mut() { Ok(mutable_elements) => Ok(FixedSizeListVectorMut { elements: Box::new(mutable_elements), @@ -180,7 +183,7 @@ impl VectorOps for FixedSizeListVector { validity, len, }), - Err(elements) => Err(FixedSizeListVector { + Err(elements) => Err(Self { elements: Arc::new(elements), list_size, validity: validity.freeze(), diff --git a/vortex-vector/src/lib.rs b/vortex-vector/src/lib.rs index e5f27986961..0eb15a7f5b9 100644 --- a/vortex-vector/src/lib.rs +++ b/vortex-vector/src/lib.rs @@ -14,6 +14,7 @@ pub mod binaryview; pub mod bool; pub mod decimal; pub mod fixed_size_list; +pub mod listview; pub mod null; pub mod primitive; pub mod struct_; diff --git a/vortex-vector/src/listview/mod.rs b/vortex-vector/src/listview/mod.rs new file mode 100644 index 00000000000..e1653ac2044 --- /dev/null +++ b/vortex-vector/src/listview/mod.rs @@ -0,0 +1,31 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Definition and implementation of [`ListViewVector`] and [`ListViewVectorMut`]. +//! +//! A [`ListViewVector`] represents a collection of variable-width lists, where each list can +//! contain a different number of elements. +//! +//! The structure uses separate offset and size vectors to track the boundaries of each list +//! within the flat elements array. This allows for efficient access to individual lists without +//! copying data. This is similar to Apache Arrow's `ListView` type. + +mod vector; +pub use vector::ListViewVector; + +mod vector_mut; +pub use vector_mut::ListViewVectorMut; + +use crate::{Vector, VectorMut}; + +impl From for Vector { + fn from(v: ListViewVector) -> Self { + Self::List(v) + } +} + +impl From for VectorMut { + fn from(v: ListViewVectorMut) -> Self { + Self::List(v) + } +} diff --git a/vortex-vector/src/listview/vector.rs b/vortex-vector/src/listview/vector.rs new file mode 100644 index 00000000000..84ba6866a0a --- /dev/null +++ b/vortex-vector/src/listview/vector.rs @@ -0,0 +1,212 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Definition and implementation of [`ListViewVector`]. + +use std::sync::Arc; + +use vortex_error::{VortexExpect, VortexResult}; +use vortex_mask::Mask; + +use super::ListViewVectorMut; +use crate::Vector; +use crate::ops::{VectorMutOps, VectorOps}; +use crate::primitive::PrimitiveVector; + +/// A vector of variable-width lists. +/// +/// Each list is defined by 2 integers: an offset and a size (a "list view"), which point into a +/// child `elements` vector. +/// +/// Note that the list views **do not** need to be sorted, nor do they have to be contiguous or +/// fully cover the `elements` vector. This means that multiple views can be pointing to the same +/// elements. +/// +/// # Structure +/// +/// - `elements`: The child vector of all list elements, stored as an [`Arc`]. +/// - `offsets`: A [`PrimitiveVector`] containing the starting offset of each list in the `elements` +/// vector. +/// - `sizes`: A [`PrimitiveVector`] containing the size (number of elements) of each list. +/// - `validity`: A [`Mask`] indicating which lists are null. +#[derive(Debug, Clone)] +pub struct ListViewVector { + /// The child vector of elements. + pub(super) elements: Arc, + + /// Offsets for each list into the elements vector. + pub(super) offsets: PrimitiveVector, + + /// Sizes (lengths) of each list. + pub(super) sizes: PrimitiveVector, + + /// The validity mask (where `true` represents a list is **not** null). + /// + /// Note that the `elements` vector will have its own internal validity, denoting if individual + /// list elements are null. + pub(super) validity: Mask, + + /// The length of the vector (which is the same as the length of the validity mask). + /// + /// This is stored here as a convenience, as the validity also tracks this information. + pub(super) len: usize, +} + +impl ListViewVector { + /// Creates a new [`ListViewVector`] from its components. + /// + /// # Panics + /// + /// TODO + pub fn new( + elements: Arc, + offsets: PrimitiveVector, + sizes: PrimitiveVector, + validity: Mask, + ) -> Self { + Self::try_new(elements, offsets, sizes, validity) + .vortex_expect("Invalid ListViewVector construction") + } + + /// Attempts to create a new [`ListViewVector`] from its components. + /// + /// # Errors + /// + /// TODO + pub fn try_new( + _elements: Arc, + _offsets: PrimitiveVector, + _sizes: PrimitiveVector, + _validity: Mask, + ) -> VortexResult { + todo!() + } + + /// Creates a new [`ListViewVector`] without validation. + /// + /// # Safety + /// + /// TODO + pub unsafe fn new_unchecked( + elements: Arc, + offsets: PrimitiveVector, + sizes: PrimitiveVector, + validity: Mask, + ) -> Self { + let len = validity.len(); + + if cfg!(debug_assertions) { + Self::new(elements, offsets, sizes, validity) + } else { + Self { + elements, + offsets, + sizes, + validity, + len, + } + } + } + + /// Decomposes the [`ListViewVector`] into its constituent parts (child elements, offsets, + /// sizes, and validity). + pub fn into_parts(self) -> (Arc, PrimitiveVector, PrimitiveVector, Mask) { + (self.elements, self.offsets, self.sizes, self.validity) + } + + /// Returns a reference to the `elements` vector. + #[inline] + pub fn elements(&self) -> &Vector { + &self.elements + } + + /// Returns a reference to the integer `offsets` vector. + #[inline] + pub fn offsets(&self) -> &PrimitiveVector { + &self.offsets + } + + /// Returns a reference to the integer `sizes` vector. + #[inline] + pub fn sizes(&self) -> &PrimitiveVector { + &self.sizes + } +} + +impl VectorOps for ListViewVector { + type Mutable = ListViewVectorMut; + + fn len(&self) -> usize { + self.len + } + + fn validity(&self) -> &Mask { + &self.validity + } + + fn try_into_mut(self) -> Result { + // Try to unwrap the `Arc`. + let elements = match Arc::try_unwrap(self.elements) { + Ok(elements) => elements, + Err(elements) => return Err(Self { elements, ..self }), + }; + + // Try to make the validity mutable. + let validity = match self.validity.try_into_mut() { + Ok(v) => v, + Err(validity) => { + return Err(Self { + elements: Arc::new(elements), + validity, + ..self + }); + } + }; + + // Try to make the offsets mutable. + let offsets = match self.offsets.try_into_mut() { + Ok(mutable_offsets) => mutable_offsets, + Err(offsets) => { + return Err(Self { + offsets, + sizes: self.sizes, + elements: Arc::new(elements), + validity: validity.freeze(), + len: self.len, + }); + } + }; + + // Try to make the sizes mutable. + let sizes = match self.sizes.try_into_mut() { + Ok(mutable_sizes) => mutable_sizes, + Err(sizes) => { + return Err(Self { + offsets: offsets.freeze(), + sizes, + elements: Arc::new(elements), + validity: validity.freeze(), + len: self.len, + }); + } + }; + + // Try to make the elements mutable. + match elements.try_into_mut() { + Ok(mut_elements) => Ok(ListViewVectorMut { + offsets, + sizes, + elements: Box::new(mut_elements), + validity, + len: self.len, + }), + Err(elements) => Err(Self { + offsets: offsets.freeze(), + sizes: sizes.freeze(), + elements: Arc::new(elements), + validity: validity.freeze(), + len: self.len, + }), + } + } +} diff --git a/vortex-vector/src/listview/vector_mut.rs b/vortex-vector/src/listview/vector_mut.rs new file mode 100644 index 00000000000..b9dd60b3321 --- /dev/null +++ b/vortex-vector/src/listview/vector_mut.rs @@ -0,0 +1,192 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Definition and implementation of [`ListViewVectorMut`]. + +use std::sync::Arc; + +use vortex_dtype::DType; +use vortex_error::{VortexExpect, VortexResult}; +use vortex_mask::MaskMut; + +use super::ListViewVector; +use crate::VectorMut; +use crate::ops::VectorMutOps; +use crate::primitive::PrimitiveVectorMut; + +/// A mutable vector of variable-width lists. +/// +/// Each list is defined by 2 integers: an offset and a size (a "list view"), which point into a +/// child `elements` vector. +/// +/// Note that the list views **do not** need to be sorted, nor do they have to be contiguous or +/// fully cover the `elements` vector. This means that multiple views can be pointing to the same +/// elements. +/// +/// # Structure +/// +/// - `elements`: The child vector of all list elements, stored as a [`Box`]. +/// - `offsets`: A [`PrimitiveVectorMut`] containing the starting offset of each list in the +/// `elements` vector. +/// - `sizes`: A [`PrimitiveVectorMut`] containing the size (number of elements) of each list. +/// - `validity`: A [`MaskMut`] indicating which lists are null. +#[derive(Debug, Clone)] +pub struct ListViewVectorMut { + /// The mutable child vector of elements. + pub(super) elements: Box, + + /// Mutable offsets for each list into the elements array. + pub(super) offsets: PrimitiveVectorMut, + + /// Mutable sizes (lengths) of each list. + pub(super) sizes: PrimitiveVectorMut, + + /// The validity mask (where `true` represents a list is **not** null). + /// + /// Note that the `elements` vector will have its own internal validity, denoting if individual + /// list elements are null. + pub(super) validity: MaskMut, + + /// The length of the vector (which is the same as the length of the validity mask). + /// + /// This is stored here as a convenience, as the validity also tracks this information. + pub(super) len: usize, +} + +impl ListViewVectorMut { + /// Creates a new [`ListViewVectorMut`] from its components. + /// + /// # Panics + /// + /// TODO + pub fn new( + elements: Box, + offsets: PrimitiveVectorMut, + sizes: PrimitiveVectorMut, + validity: MaskMut, + ) -> Self { + Self::try_new(elements, offsets, sizes, validity) + .vortex_expect("Failed to create `ListViewVectorMut`") + } + + /// Attempts to create a new [`ListViewVectorMut`] from its components. + /// + /// # Errors + /// + /// TODO + pub fn try_new( + _elements: Box, + _offsets: PrimitiveVectorMut, + _sizes: PrimitiveVectorMut, + _validity: MaskMut, + ) -> VortexResult { + todo!() + } + + /// Creates a new [`ListViewVectorMut`] without validation. + /// + /// # Safety + /// + /// TODO + pub unsafe fn new_unchecked( + elements: Box, + offsets: PrimitiveVectorMut, + sizes: PrimitiveVectorMut, + validity: MaskMut, + ) -> Self { + let len = validity.len(); + + if cfg!(debug_assertions) { + Self::new(elements, offsets, sizes, validity) + } else { + Self { + elements, + offsets, + sizes, + validity, + len, + } + } + } + + /// Creates a new [`ListViewVectorMut`] with the specified capacity. + /// + /// TODO figure out how to set offsets and sizes type? + pub fn with_capacity(_element_dtype: &DType, _capacity: usize) -> Self { + todo!("ListViewVectorMut::with_capacity") + } + + /// Decomposes the [`ListViewVectorMut`] into its constituent parts (child elements, offsets, + /// sizes, and validity). + pub fn into_parts( + self, + ) -> ( + Box, + PrimitiveVectorMut, + PrimitiveVectorMut, + MaskMut, + ) { + (self.elements, self.offsets, self.sizes, self.validity) + } + + /// Returns a reference to the elements vector. + pub fn elements(&self) -> &VectorMut { + &self.elements + } + + /// Returns a reference to the offsets vector. + pub fn offsets(&self) -> &PrimitiveVectorMut { + &self.offsets + } + + /// Returns a reference to the sizes vector. + pub fn sizes(&self) -> &PrimitiveVectorMut { + &self.sizes + } +} + +impl VectorMutOps for ListViewVectorMut { + type Immutable = ListViewVector; + + fn len(&self) -> usize { + self.len + } + + fn validity(&self) -> &MaskMut { + &self.validity + } + + fn capacity(&self) -> usize { + todo!() + } + + fn reserve(&mut self, _additional: usize) { + todo!() + } + + fn extend_from_vector(&mut self, _other: &ListViewVector) { + todo!() + } + + fn append_nulls(&mut self, _n: usize) { + todo!() + } + + fn freeze(self) -> ListViewVector { + ListViewVector { + offsets: self.offsets.freeze(), + sizes: self.sizes.freeze(), + elements: Arc::new(self.elements.freeze()), + validity: self.validity.freeze(), + len: self.len, + } + } + + fn split_off(&mut self, _at: usize) -> Self { + todo!() + } + + fn unsplit(&mut self, _other: Self) { + todo!() + } +} diff --git a/vortex-vector/src/macros.rs b/vortex-vector/src/macros.rs index 177ffd294c0..9c704935752 100644 --- a/vortex-vector/src/macros.rs +++ b/vortex-vector/src/macros.rs @@ -46,6 +46,7 @@ macro_rules! match_each_vector { $crate::Vector::Primitive($vec) => $body, $crate::Vector::String($vec) => $body, $crate::Vector::Binary($vec) => $body, + $crate::Vector::List($vec) => $body, $crate::Vector::FixedSizeList($vec) => $body, $crate::Vector::Struct($vec) => $body, } @@ -94,6 +95,7 @@ macro_rules! match_each_vector_mut { $crate::VectorMut::Primitive($vec) => $body, $crate::VectorMut::String($vec) => $body, $crate::VectorMut::Binary($vec) => $body, + $crate::VectorMut::List($vec) => $body, $crate::VectorMut::FixedSizeList($vec) => $body, $crate::VectorMut::Struct($vec) => $body, } @@ -116,9 +118,11 @@ macro_rules! __match_vector_pair_arms { match ($left, $right) { ($crate::$enum_left::Null($a), $crate::$enum_right::Null($b)) => $body, ($crate::$enum_left::Bool($a), $crate::$enum_right::Bool($b)) => $body, + ($crate::$enum_left::Decimal($a), $crate::$enum_right::Decimal($b)) => $body, ($crate::$enum_left::Primitive($a), $crate::$enum_right::Primitive($b)) => $body, ($crate::$enum_left::String($a), $crate::$enum_right::String($b)) => $body, ($crate::$enum_left::Binary($a), $crate::$enum_right::Binary($b)) => $body, + ($crate::$enum_left::List($a), $crate::$enum_right::List($b)) => $body, ($crate::$enum_left::FixedSizeList($a), $crate::$enum_right::FixedSizeList($b)) => { $body } diff --git a/vortex-vector/src/private.rs b/vortex-vector/src/private.rs index 5dec8c8a52b..7a3a0d5b779 100644 --- a/vortex-vector/src/private.rs +++ b/vortex-vector/src/private.rs @@ -14,6 +14,7 @@ use crate::binaryview::{BinaryViewType, BinaryViewVector, BinaryViewVectorMut}; use crate::bool::{BoolVector, BoolVectorMut}; use crate::decimal::{DVector, DVectorMut, DecimalVector, DecimalVectorMut}; use crate::fixed_size_list::{FixedSizeListVector, FixedSizeListVectorMut}; +use crate::listview::{ListViewVector, ListViewVectorMut}; use crate::null::{NullVector, NullVectorMut}; use crate::primitive::{PVector, PVectorMut, PrimitiveVector, PrimitiveVectorMut}; use crate::struct_::{StructVector, StructVectorMut}; @@ -47,5 +48,8 @@ impl Sealed for BinaryViewVectorMut {} impl Sealed for FixedSizeListVector {} impl Sealed for FixedSizeListVectorMut {} +impl Sealed for ListViewVector {} +impl Sealed for ListViewVectorMut {} + impl Sealed for StructVector {} impl Sealed for StructVectorMut {} diff --git a/vortex-vector/src/vector.rs b/vortex-vector/src/vector.rs index 58b1a161964..01166ba4599 100644 --- a/vortex-vector/src/vector.rs +++ b/vortex-vector/src/vector.rs @@ -12,6 +12,7 @@ use crate::binaryview::{BinaryVector, StringVector}; use crate::bool::BoolVector; use crate::decimal::DecimalVector; use crate::fixed_size_list::FixedSizeListVector; +use crate::listview::ListViewVector; use crate::null::NullVector; use crate::primitive::PrimitiveVector; use crate::struct_::StructVector; @@ -50,8 +51,8 @@ pub enum Vector { String(StringVector), /// Binary vectors Binary(BinaryVector), - // List - // List(ListVector), + /// Vectors of Lists with variable sizes. + List(ListViewVector), /// Vectors of Lists with fixed sizes. FixedSizeList(FixedSizeListVector), /// Vectors of Struct elements. @@ -120,6 +121,14 @@ impl Vector { vortex_panic!("Expected BinaryVector, got {self:?}"); } + /// Returns a reference to the inner [`ListViewVector`] if `self` is of that variant. + pub fn as_list(&self) -> &ListViewVector { + if let Vector::List(v) = self { + return v; + } + vortex_panic!("Expected ListViewVector, got {self:?}"); + } + /// Returns a reference to the inner [`FixedSizeListVector`] if `self` is of that variant. pub fn as_fixed_size_list(&self) -> &FixedSizeListVector { if let Vector::FixedSizeList(v) = self { @@ -178,6 +187,14 @@ impl Vector { vortex_panic!("Expected BinaryVector, got {self:?}"); } + /// Consumes `self` and returns the inner [`ListViewVector`] if `self` is of that variant. + pub fn into_list(self) -> ListViewVector { + if let Vector::List(v) = self { + return v; + } + vortex_panic!("Expected ListViewVector, got {self:?}"); + } + /// Consumes `self` and returns the inner [`FixedSizeListVector`] if `self` is of that /// variant. pub fn into_fixed_size_list(self) -> FixedSizeListVector { diff --git a/vortex-vector/src/vector_mut.rs b/vortex-vector/src/vector_mut.rs index 8e9ac42c6fc..b45a0868f63 100644 --- a/vortex-vector/src/vector_mut.rs +++ b/vortex-vector/src/vector_mut.rs @@ -14,6 +14,7 @@ use crate::binaryview::{BinaryVectorMut, StringVectorMut}; use crate::bool::BoolVectorMut; use crate::decimal::DecimalVectorMut; use crate::fixed_size_list::FixedSizeListVectorMut; +use crate::listview::ListViewVectorMut; use crate::null::NullVectorMut; use crate::primitive::PrimitiveVectorMut; use crate::struct_::StructVectorMut; @@ -52,6 +53,8 @@ pub enum VectorMut { String(StringVectorMut), /// Mutable Binary vectors. Binary(BinaryVectorMut), + /// Mutable vectors of Lists with variable sizes. + List(ListViewVectorMut), /// Mutable vectors of Lists with fixed sizes. FixedSizeList(FixedSizeListVectorMut), /// Mutable vectors of Struct elements. @@ -79,7 +82,7 @@ impl VectorMut { DType::Utf8(..) => StringVectorMut::with_capacity(capacity).into(), DType::Binary(..) => BinaryVectorMut::with_capacity(capacity).into(), DType::Extension(ext) => VectorMut::with_capacity(ext.storage_dtype(), capacity), - DType::List(..) => todo!("vector mut with capacity"), + DType::List(..) => ListViewVectorMut::with_capacity(dtype, capacity).into(), } } } @@ -167,6 +170,14 @@ impl VectorMut { vortex_panic!("Expected BinaryVectorMut, got {self:?}"); } + /// Returns a reference to the inner [`ListViewVectorMut`] if `self` is of that variant. + pub fn as_list(&self) -> &ListViewVectorMut { + if let VectorMut::List(v) = self { + return v; + } + vortex_panic!("Expected ListViewVectorMut, got {self:?}"); + } + /// Returns a reference to the inner [`FixedSizeListVectorMut`] if `self` is of that variant. pub fn as_fixed_size_list(&self) -> &FixedSizeListVectorMut { if let VectorMut::FixedSizeList(v) = self { @@ -225,6 +236,14 @@ impl VectorMut { vortex_panic!("Expected BinaryVectorMut, got {self:?}"); } + /// Consumes `self` and returns the inner [`ListViewVectorMut`] if `self` is of that variant. + pub fn into_list(self) -> ListViewVectorMut { + if let VectorMut::List(v) = self { + return v; + } + vortex_panic!("Expected ListViewVectorMut, got {self:?}"); + } + /// Consumes `self` and returns the inner [`FixedSizeListVectorMut`] if `self` is of that /// variant. pub fn into_fixed_size_list(self) -> FixedSizeListVectorMut { From 585c66ac5e5e4591956cfeb06952bfe856b2743f Mon Sep 17 00:00:00 2001 From: Connor Tsui Date: Tue, 4 Nov 2025 14:21:00 -0500 Subject: [PATCH 2/4] add constructors and validation to list vector Signed-off-by: Connor Tsui --- vortex-buffer/src/bit/buf_mut.rs | 17 ++- vortex-mask/src/lib.rs | 174 +++++++++++------------ vortex-mask/src/mask_mut.rs | 18 +++ vortex-vector/src/listview/vector.rs | 112 +++++++++++++-- vortex-vector/src/listview/vector_mut.rs | 128 +++++++++++++++-- vortex-vector/src/primitive/macros.rs | 70 +++++++++ vortex-vector/src/primitive/vector.rs | 3 +- 7 files changed, 409 insertions(+), 113 deletions(-) diff --git a/vortex-buffer/src/bit/buf_mut.rs b/vortex-buffer/src/bit/buf_mut.rs index c1a348665e0..7f5192244b3 100644 --- a/vortex-buffer/src/bit/buf_mut.rs +++ b/vortex-buffer/src/bit/buf_mut.rs @@ -3,7 +3,7 @@ use std::ops::Not; -use arrow_buffer::bit_chunk_iterator::BitChunks; +use arrow_buffer::bit_chunk_iterator::{BitChunks, UnalignedBitChunk}; use bitvec::view::BitView; use crate::bit::{get_bit_unchecked, ops, set_bit_unchecked, unset_bit_unchecked}; @@ -495,6 +495,21 @@ impl BitBufferMut { pub fn as_mut_ptr(&mut self) -> *mut u8 { self.buffer.as_mut_ptr() } + + /// Access chunks of the buffer aligned to 8 byte boundary as [prefix, \, suffix] + pub fn unaligned_chunks(&self) -> UnalignedBitChunk<'_> { + UnalignedBitChunk::new(self.buffer.as_slice(), self.offset, self.len) + } + + /// Get the number of set bits in the buffer. + pub fn true_count(&self) -> usize { + self.unaligned_chunks().count_ones() + } + + /// Get the number of unset bits in the buffer. + pub fn false_count(&self) -> usize { + self.len - self.true_count() + } } impl Default for BitBufferMut { diff --git a/vortex-mask/src/lib.rs b/vortex-mask/src/lib.rs index 88a12be6ef4..748fcd7d2e5 100644 --- a/vortex-mask/src/lib.rs +++ b/vortex-mask/src/lib.rs @@ -123,93 +123,6 @@ pub struct MaskValues { density: f64, } -impl MaskValues { - /// Returns the length of the mask. - #[inline] - pub fn len(&self) -> usize { - self.buffer.len() - } - - /// Returns true if the mask is empty i.e., it's length is 0. - #[inline] - pub fn is_empty(&self) -> bool { - self.buffer.is_empty() - } - - /// Returns the true count of the mask. - #[inline] - pub fn true_count(&self) -> usize { - self.true_count - } - - /// Returns the boolean buffer representation of the mask. - #[inline] - pub fn bit_buffer(&self) -> &BitBuffer { - &self.buffer - } - - /// Returns the boolean value at a given index. - #[inline] - pub fn value(&self, index: usize) -> bool { - self.buffer.value(index) - } - - /// Constructs an indices vector from one of the other representations. - pub fn indices(&self) -> &[usize] { - self.indices.get_or_init(|| { - if self.true_count == 0 { - return vec![]; - } - - if self.true_count == self.len() { - return (0..self.len()).collect(); - } - - if let Some(slices) = self.slices.get() { - let mut indices = Vec::with_capacity(self.true_count); - indices.extend(slices.iter().flat_map(|(start, end)| *start..*end)); - debug_assert!(indices.is_sorted()); - assert_eq!(indices.len(), self.true_count); - return indices; - } - - let mut indices = Vec::with_capacity(self.true_count); - indices.extend(self.buffer.set_indices()); - debug_assert!(indices.is_sorted()); - assert_eq!(indices.len(), self.true_count); - indices - }) - } - - /// Constructs a slices vector from one of the other representations. - #[allow(clippy::cast_possible_truncation)] - #[inline] - pub fn slices(&self) -> &[(usize, usize)] { - self.slices.get_or_init(|| { - if self.true_count == self.len() { - return vec![(0, self.len())]; - } - - self.buffer.set_slices().collect() - }) - } - - /// Return an iterator over either indices or slices of the mask based on a density threshold. - #[inline] - pub fn threshold_iter(&self, threshold: f64) -> MaskIter<'_> { - if self.density >= threshold { - MaskIter::Slices(self.slices()) - } else { - MaskIter::Indices(self.indices()) - } - } - - /// Extracts the internal [`BitBuffer`]. - pub(crate) fn into_buffer(self) -> BitBuffer { - self.buffer - } -} - impl Mask { /// Create a new Mask where all values are set. #[inline] @@ -642,6 +555,93 @@ impl Mask { } } +impl MaskValues { + /// Returns the length of the mask. + #[inline] + pub fn len(&self) -> usize { + self.buffer.len() + } + + /// Returns true if the mask is empty i.e., it's length is 0. + #[inline] + pub fn is_empty(&self) -> bool { + self.buffer.is_empty() + } + + /// Returns the true count of the mask. + #[inline] + pub fn true_count(&self) -> usize { + self.true_count + } + + /// Returns the boolean buffer representation of the mask. + #[inline] + pub fn bit_buffer(&self) -> &BitBuffer { + &self.buffer + } + + /// Returns the boolean value at a given index. + #[inline] + pub fn value(&self, index: usize) -> bool { + self.buffer.value(index) + } + + /// Constructs an indices vector from one of the other representations. + pub fn indices(&self) -> &[usize] { + self.indices.get_or_init(|| { + if self.true_count == 0 { + return vec![]; + } + + if self.true_count == self.len() { + return (0..self.len()).collect(); + } + + if let Some(slices) = self.slices.get() { + let mut indices = Vec::with_capacity(self.true_count); + indices.extend(slices.iter().flat_map(|(start, end)| *start..*end)); + debug_assert!(indices.is_sorted()); + assert_eq!(indices.len(), self.true_count); + return indices; + } + + let mut indices = Vec::with_capacity(self.true_count); + indices.extend(self.buffer.set_indices()); + debug_assert!(indices.is_sorted()); + assert_eq!(indices.len(), self.true_count); + indices + }) + } + + /// Constructs a slices vector from one of the other representations. + #[allow(clippy::cast_possible_truncation)] + #[inline] + pub fn slices(&self) -> &[(usize, usize)] { + self.slices.get_or_init(|| { + if self.true_count == self.len() { + return vec![(0, self.len())]; + } + + self.buffer.set_slices().collect() + }) + } + + /// Return an iterator over either indices or slices of the mask based on a density threshold. + #[inline] + pub fn threshold_iter(&self, threshold: f64) -> MaskIter<'_> { + if self.density >= threshold { + MaskIter::Slices(self.slices()) + } else { + MaskIter::Indices(self.indices()) + } + } + + /// Extracts the internal [`BitBuffer`]. + pub(crate) fn into_buffer(self) -> BitBuffer { + self.buffer + } +} + /// Iterator over the indices or slices of a mask. pub enum MaskIter<'a> { /// Slice of pre-cached indices of a mask. diff --git a/vortex-mask/src/mask_mut.rs b/vortex-mask/src/mask_mut.rs index c1b9f6a6326..1e75d722355 100644 --- a/vortex-mask/src/mask_mut.rs +++ b/vortex-mask/src/mask_mut.rs @@ -279,6 +279,24 @@ impl MaskMut { pub fn is_empty(&self) -> bool { self.len() == 0 } + + /// Returns true if all values in the mask are true. + pub fn all_true(&self) -> bool { + match &self.0 { + Inner::Empty { .. } => true, + Inner::Constant { value, .. } => *value, + Inner::Builder(bits) => bits.true_count() == bits.len(), + } + } + + /// Returns true if all values in the mask are false. + pub fn all_false(&self) -> bool { + match &self.0 { + Inner::Empty { .. } => true, + Inner::Constant { value, .. } => !*value, + Inner::Builder(bits) => !bits.is_empty() && bits.true_count() == 0, + } + } } impl Mask { diff --git a/vortex-vector/src/listview/vector.rs b/vortex-vector/src/listview/vector.rs index 84ba6866a0a..134ec416dc8 100644 --- a/vortex-vector/src/listview/vector.rs +++ b/vortex-vector/src/listview/vector.rs @@ -5,13 +5,13 @@ use std::sync::Arc; -use vortex_error::{VortexExpect, VortexResult}; +use vortex_error::{VortexExpect, VortexResult, vortex_ensure}; use vortex_mask::Mask; use super::ListViewVectorMut; -use crate::Vector; use crate::ops::{VectorMutOps, VectorOps}; use crate::primitive::PrimitiveVector; +use crate::{Vector, match_each_integer_pvector}; /// A vector of variable-width lists. /// @@ -35,9 +35,13 @@ pub struct ListViewVector { pub(super) elements: Arc, /// Offsets for each list into the elements vector. + /// + /// Offsets are always integers, and always non-negative (even if the type is signed). pub(super) offsets: PrimitiveVector, /// Sizes (lengths) of each list. + /// + /// Sizes are always integers, and always non-negative (even if the type is signed). pub(super) sizes: PrimitiveVector, /// The validity mask (where `true` represents a list is **not** null). @@ -57,7 +61,15 @@ impl ListViewVector { /// /// # Panics /// - /// TODO + /// Panics if: + /// + /// - `offsets` or `sizes` contain nulls values. + /// - `offsets`, `sizes`, and `validity` do not all have the same length + /// - The `sizes` integer width is not less than or equal to the `offsets` integer width (this + /// would cause overflow) + /// - For any `i`, `offsets[i] + sizes[i]` causes an overflow or is greater than + /// `elements.len()` (even if the corresponding view is defined as null by the validity + /// array). pub fn new( elements: Arc, offsets: PrimitiveVector, @@ -72,21 +84,74 @@ impl ListViewVector { /// /// # Errors /// - /// TODO + /// Returns an error if: + /// + /// - `offsets` or `sizes` contain nulls values. + /// - `offsets`, `sizes`, and `validity` do not all have the same length + /// - The `sizes` integer width is not less than or equal to the `offsets` integer width (this + /// would cause overflow) + /// - For any `i`, `offsets[i] + sizes[i]` causes an overflow or is greater than + /// `elements.len()` (even if the corresponding view is defined as null by the validity + /// array). pub fn try_new( - _elements: Arc, - _offsets: PrimitiveVector, - _sizes: PrimitiveVector, - _validity: Mask, + elements: Arc, + offsets: PrimitiveVector, + sizes: PrimitiveVector, + validity: Mask, ) -> VortexResult { - todo!() + let len = validity.len(); + + vortex_ensure!( + offsets.len() == len, + "Offsets length {} does not match validity length {len}", + offsets.len(), + ); + vortex_ensure!( + sizes.len() == len, + "Sizes length {} does not match validity length {len}", + sizes.len(), + ); + + vortex_ensure!( + offsets.validity().all_true(), + "Offsets vector must not contain null values" + ); + vortex_ensure!( + sizes.validity().all_true(), + "Sizes vector must not contain null values" + ); + + let offsets_width = offsets.ptype().byte_width(); + let sizes_width = sizes.ptype().byte_width(); + vortex_ensure!( + sizes_width <= offsets_width, + "Sizes integer width {sizes_width} must be \ + <= offsets integer width {offsets_width} to prevent overflow", + ); + + // Check that each `offsets[i] + sizes[i] <= elements.len()`. + validate_views_bound(elements.len(), &offsets, &sizes)?; + + Ok(Self { + elements, + offsets, + sizes, + validity, + len, + }) } /// Creates a new [`ListViewVector`] without validation. /// /// # Safety /// - /// TODO + /// The caller must ensure all of the following invariants are satisfied: + /// + /// - `offsets` and `sizes` must be non-nullable integer vectors. + /// - `offsets`, `sizes`, and `validity` must have the same length. + /// - Size integer width must be smaller than or equal to offset type (to prevent overflow). + /// - For each `i`, `offsets[i] + sizes[i]` must not overflow and must be `<= elements.len()` + /// (even if the corresponding view is defined as null by the validity array). pub unsafe fn new_unchecked( elements: Arc, offsets: PrimitiveVector, @@ -210,3 +275,30 @@ impl VectorOps for ListViewVector { } } } + +// TODO(connor): It would be better to separate everything inside the macros into its own function, +// but that would require adding another macro that sets a type `$type` to be used by the caller. +/// Checks that all views are `<= elements_len`. +#[allow(clippy::cognitive_complexity, clippy::cast_possible_truncation)] +fn validate_views_bound( + elements_len: usize, + offsets: &PrimitiveVector, + sizes: &PrimitiveVector, +) -> VortexResult<()> { + let len = offsets.len(); + + match_each_integer_pvector!(&offsets, |offsets_vector| { + match_each_integer_pvector!(&sizes, |sizes_vector| { + let offsets_slice = offsets_vector.as_ref(); + let sizes_slice = sizes_vector.as_ref(); + + for i in 0..len { + let offset = offsets_slice[i] as usize; + let size = sizes_slice[i] as usize; + vortex_ensure!(offset + size <= elements_len); + } + }); + }); + + Ok(()) +} diff --git a/vortex-vector/src/listview/vector_mut.rs b/vortex-vector/src/listview/vector_mut.rs index b9dd60b3321..2c19e72ceaa 100644 --- a/vortex-vector/src/listview/vector_mut.rs +++ b/vortex-vector/src/listview/vector_mut.rs @@ -6,13 +6,13 @@ use std::sync::Arc; use vortex_dtype::DType; -use vortex_error::{VortexExpect, VortexResult}; +use vortex_error::{VortexExpect, VortexResult, vortex_ensure}; use vortex_mask::MaskMut; use super::ListViewVector; -use crate::VectorMut; use crate::ops::VectorMutOps; use crate::primitive::PrimitiveVectorMut; +use crate::{VectorMut, match_each_integer_pvector_mut}; /// A mutable vector of variable-width lists. /// @@ -36,9 +36,13 @@ pub struct ListViewVectorMut { pub(super) elements: Box, /// Mutable offsets for each list into the elements array. + /// + /// Offsets are always integers, and always non-negative (even if the type is signed). pub(super) offsets: PrimitiveVectorMut, /// Mutable sizes (lengths) of each list. + /// + /// Sizes are always integers, and always non-negative (even if the type is signed). pub(super) sizes: PrimitiveVectorMut, /// The validity mask (where `true` represents a list is **not** null). @@ -58,7 +62,15 @@ impl ListViewVectorMut { /// /// # Panics /// - /// TODO + /// Panics if: + /// + /// - `offsets` or `sizes` contain nulls values. + /// - `offsets`, `sizes`, and `validity` do not all have the same length + /// - The `sizes` integer width is not less than or equal to the `offsets` integer width (this + /// would cause overflow) + /// - For any `i`, `offsets[i] + sizes[i]` causes an overflow or is greater than + /// `elements.len()` (even if the corresponding view is defined as null by the validity + /// array). pub fn new( elements: Box, offsets: PrimitiveVectorMut, @@ -73,21 +85,74 @@ impl ListViewVectorMut { /// /// # Errors /// - /// TODO + /// Returns an error if: + /// + /// - `offsets` or `sizes` contain nulls values. + /// - `offsets`, `sizes`, and `validity` do not all have the same length + /// - The `sizes` integer width is not less than or equal to the `offsets` integer width (this + /// would cause overflow) + /// - For any `i`, `offsets[i] + sizes[i]` causes an overflow or is greater than + /// `elements.len()` (even if the corresponding view is defined as null by the validity + /// array). pub fn try_new( - _elements: Box, - _offsets: PrimitiveVectorMut, - _sizes: PrimitiveVectorMut, - _validity: MaskMut, + elements: Box, + offsets: PrimitiveVectorMut, + sizes: PrimitiveVectorMut, + validity: MaskMut, ) -> VortexResult { - todo!() + let len = validity.len(); + + vortex_ensure!( + offsets.len() == len, + "Offsets length {} does not match validity length {len}", + offsets.len(), + ); + vortex_ensure!( + sizes.len() == len, + "Sizes length {} does not match validity length {len}", + sizes.len(), + ); + + vortex_ensure!( + offsets.validity().all_true(), + "Offsets vector must not contain null values" + ); + vortex_ensure!( + sizes.validity().all_true(), + "Sizes vector must not contain null values" + ); + + let offsets_width = offsets.ptype().byte_width(); + let sizes_width = sizes.ptype().byte_width(); + vortex_ensure!( + sizes_width <= offsets_width, + "Sizes integer width {sizes_width} must be \ + <= offsets integer width {offsets_width} to prevent overflow", + ); + + // Check that each `offsets[i] + sizes[i] <= elements.len()`. + validate_views_bound(elements.len(), &offsets, &sizes)?; + + Ok(Self { + elements, + offsets, + sizes, + validity, + len, + }) } /// Creates a new [`ListViewVectorMut`] without validation. /// /// # Safety /// - /// TODO + /// The caller must ensure all of the following invariants are satisfied: + /// + /// - `offsets` and `sizes` must be non-nullable integer vectors. + /// - `offsets`, `sizes`, and `validity` must have the same length. + /// - Size integer width must be smaller than or equal to offset type (to prevent overflow). + /// - For each `i`, `offsets[i] + sizes[i]` must not overflow and must be `<= elements.len()` + /// (even if the corresponding view is defined as null by the validity array). pub unsafe fn new_unchecked( elements: Box, offsets: PrimitiveVectorMut, @@ -157,11 +222,19 @@ impl VectorMutOps for ListViewVectorMut { } fn capacity(&self) -> usize { - todo!() + debug_assert!( + self.offsets.capacity() <= self.sizes.capacity(), + "the capacity of the sizes was somehow less than the offsets" + ); + + self.offsets.capacity() } - fn reserve(&mut self, _additional: usize) { - todo!() + fn reserve(&mut self, additional: usize) { + self.offsets.reserve(additional); + self.sizes.reserve(additional); + self.elements.reserve(additional * 2); // Sane default TODO + self.validity.reserve(additional); } fn extend_from_vector(&mut self, _other: &ListViewVector) { @@ -169,7 +242,7 @@ impl VectorMutOps for ListViewVectorMut { } fn append_nulls(&mut self, _n: usize) { - todo!() + todo!("Need to figure out what the 'value' of nulls are for list view vectors") } fn freeze(self) -> ListViewVector { @@ -190,3 +263,30 @@ impl VectorMutOps for ListViewVectorMut { todo!() } } + +// TODO(connor): It would be better to separate everything inside the macros into its own function, +// but that would require adding another macro that sets a type `$type` to be used by the caller. +/// Checks that all views are `<= elements_len`. +#[allow(clippy::cognitive_complexity, clippy::cast_possible_truncation)] +fn validate_views_bound( + elements_len: usize, + offsets: &PrimitiveVectorMut, + sizes: &PrimitiveVectorMut, +) -> VortexResult<()> { + let len = offsets.len(); + + match_each_integer_pvector_mut!(&offsets, |offsets_vector| { + match_each_integer_pvector_mut!(&sizes, |sizes_vector| { + let offsets_slice = offsets_vector.as_ref(); + let sizes_slice = sizes_vector.as_ref(); + + for i in 0..len { + let offset = offsets_slice[i] as usize; + let size = sizes_slice[i] as usize; + vortex_ensure!(offset + size <= elements_len); + } + }); + }); + + Ok(()) +} diff --git a/vortex-vector/src/primitive/macros.rs b/vortex-vector/src/primitive/macros.rs index 4e670b7c4ed..7df6f445749 100644 --- a/vortex-vector/src/primitive/macros.rs +++ b/vortex-vector/src/primitive/macros.rs @@ -60,6 +60,41 @@ macro_rules! match_each_pvector { }}; } +/// Matches on all integer type variants of [`PrimitiveVector`] and executes the same code for each +/// of the integer variant branches. +/// +/// This macro eliminates repetitive match statements when implementing operations that need to work +/// uniformly across all integer type variants (`U8`, `U16`, `U32`, `U64`, `I8`, `I16`, `I32`, +/// `I64`). +/// +/// See [`match_each_pvector`] for similar usage. +/// +/// # Panics +/// +/// Panics if the vector passed in to the macro is a float vector variant. +#[macro_export] +macro_rules! match_each_integer_pvector { + ($self:expr, | $vec:ident | $body:block) => {{ + match $self { + $crate::primitive::PrimitiveVector::U8($vec) => $body, + $crate::primitive::PrimitiveVector::U16($vec) => $body, + $crate::primitive::PrimitiveVector::U32($vec) => $body, + $crate::primitive::PrimitiveVector::U64($vec) => $body, + $crate::primitive::PrimitiveVector::I8($vec) => $body, + $crate::primitive::PrimitiveVector::I16($vec) => $body, + $crate::primitive::PrimitiveVector::I32($vec) => $body, + $crate::primitive::PrimitiveVector::I64($vec) => $body, + $crate::primitive::PrimitiveVector::F16(_) + | $crate::primitive::PrimitiveVector::F32(_) + | $crate::primitive::PrimitiveVector::F64(_) => { + ::vortex_error::vortex_panic!( + "Tried to match a float vector in an integer match statement" + ) + } + } + }}; +} + /// Matches on all primitive type variants of [`PrimitiveVectorMut`] and executes the same code /// for each variant branch. /// @@ -110,3 +145,38 @@ macro_rules! match_each_pvector_mut { } }}; } + +/// Matches on all integer type variants of [`PrimitiveVectorMut`] and executes the same code for +/// each of the integer variant branches. +/// +/// This macro eliminates repetitive match statements when implementing operations that need to work +/// uniformly across all integer type variants (`U8`, `U16`, `U32`, `U64`, `I8`, `I16`, `I32`, +/// `I64`). +/// +/// See [`match_each_pvector_mut`] for similar usage. +/// +/// # Panics +/// +/// Panics if the vector passed in to the macro is a float vector variant. +#[macro_export] +macro_rules! match_each_integer_pvector_mut { + ($self:expr, | $vec:ident | $body:block) => {{ + match $self { + $crate::primitive::PrimitiveVectorMut::U8($vec) => $body, + $crate::primitive::PrimitiveVectorMut::U16($vec) => $body, + $crate::primitive::PrimitiveVectorMut::U32($vec) => $body, + $crate::primitive::PrimitiveVectorMut::U64($vec) => $body, + $crate::primitive::PrimitiveVectorMut::I8($vec) => $body, + $crate::primitive::PrimitiveVectorMut::I16($vec) => $body, + $crate::primitive::PrimitiveVectorMut::I32($vec) => $body, + $crate::primitive::PrimitiveVectorMut::I64($vec) => $body, + $crate::primitive::PrimitiveVectorMut::F16(_) + | $crate::primitive::PrimitiveVectorMut::F32(_) + | $crate::primitive::PrimitiveVectorMut::F64(_) => { + ::vortex_error::vortex_panic!( + "Tried to match a mutable float vector in an integer match statement" + ) + } + } + }}; +} diff --git a/vortex-vector/src/primitive/vector.rs b/vortex-vector/src/primitive/vector.rs index d87daa95a3d..33988ea1cd3 100644 --- a/vortex-vector/src/primitive/vector.rs +++ b/vortex-vector/src/primitive/vector.rs @@ -6,6 +6,7 @@ use vortex_dtype::half::f16; use vortex_dtype::{NativePType, PType, PTypeDowncast, PTypeUpcast}; use vortex_error::vortex_panic; +use vortex_mask::Mask; use crate::primitive::{PVector, PrimitiveVectorMut}; use crate::{VectorOps, match_each_pvector}; @@ -70,7 +71,7 @@ impl VectorOps for PrimitiveVector { match_each_pvector!(self, |v| { v.len() }) } - fn validity(&self) -> &vortex_mask::Mask { + fn validity(&self) -> &Mask { match_each_pvector!(self, |v| { v.validity() }) } From cf0ff0ba7212c2c1b6495afff4afb25cd8ffb32a Mon Sep 17 00:00:00 2001 From: Connor Tsui Date: Tue, 4 Nov 2025 15:21:55 -0500 Subject: [PATCH 3/4] implement batch operations for list vectors Signed-off-by: Connor Tsui --- vortex-vector/src/listview/mod.rs | 2 + vortex-vector/src/listview/vector_mut.rs | 119 ++++++++++++++++++++--- 2 files changed, 110 insertions(+), 11 deletions(-) diff --git a/vortex-vector/src/listview/mod.rs b/vortex-vector/src/listview/mod.rs index e1653ac2044..b7ea3c57b8e 100644 --- a/vortex-vector/src/listview/mod.rs +++ b/vortex-vector/src/listview/mod.rs @@ -10,6 +10,8 @@ //! within the flat elements array. This allows for efficient access to individual lists without //! copying data. This is similar to Apache Arrow's `ListView` type. +// TODO(connor): More docs and examples. + mod vector; pub use vector::ListViewVector; diff --git a/vortex-vector/src/listview/vector_mut.rs b/vortex-vector/src/listview/vector_mut.rs index 2c19e72ceaa..b80aa09bbde 100644 --- a/vortex-vector/src/listview/vector_mut.rs +++ b/vortex-vector/src/listview/vector_mut.rs @@ -11,8 +11,8 @@ use vortex_mask::MaskMut; use super::ListViewVector; use crate::ops::VectorMutOps; -use crate::primitive::PrimitiveVectorMut; -use crate::{VectorMut, match_each_integer_pvector_mut}; +use crate::primitive::{PrimitiveVector, PrimitiveVectorMut}; +use crate::{VectorMut, VectorOps, match_each_integer_pvector, match_each_integer_pvector_mut}; /// A mutable vector of variable-width lists. /// @@ -131,7 +131,7 @@ impl ListViewVectorMut { ); // Check that each `offsets[i] + sizes[i] <= elements.len()`. - validate_views_bound(elements.len(), &offsets, &sizes)?; + validate_views_bound(elements.len() as u64, &offsets, &sizes)?; Ok(Self { elements, @@ -237,12 +237,72 @@ impl VectorMutOps for ListViewVectorMut { self.validity.reserve(additional); } - fn extend_from_vector(&mut self, _other: &ListViewVector) { - todo!() + /// This will also panic if we try to extend the `ListViewVector` beyond the maximum offset + /// representable by the type of the `offsets` primitive vector. + fn extend_from_vector(&mut self, other: &ListViewVector) { + // Extend the elements with the other's elements. + let old_elements_len = self.elements.len() as u64; + self.elements.extend_from_vector(&other.elements); + let new_elements_len = self.elements.len() as u64; + + // Then extend the sizes with the other's sizes (these do not need any adjustment). + self.sizes.extend_from_vector(&other.sizes); + + // We need this assertion to ensure that the casts below are infallible. + assert!( + new_elements_len < self.offsets.ptype().max_value_as_u64(), + "the elements length {new_elements_len} is not representable by the offsets type {}", + self.offsets.ptype() + ); + + // Finally, extend the offsets after adding the old `elements` length to each. + adjust_and_extend_offsets(&mut self.offsets, &other.offsets, old_elements_len); + + self.validity.append_mask(&other.validity); + self.len += other.len; + debug_assert_eq!(self.len, self.validity.len()); } - fn append_nulls(&mut self, _n: usize) { - todo!("Need to figure out what the 'value' of nulls are for list view vectors") + fn append_nulls(&mut self, n: usize) { + // To support easier copying to Arrow `List`s, we point the null views towards the ends of + // the `elements` vector (with size 0) to hopefully keep offsets sorted if they were already + // sorted. + let elements_len = self.elements.len(); + + debug_assert!( + (elements_len as u64) < self.offsets.ptype().max_value_as_u64(), + "the elements length {elements_len} is somehow not representable by the offsets type {}", + self.offsets.ptype() + ); + + self.offsets.reserve(n); + self.sizes.reserve(n); + + match_each_integer_pvector_mut!(&mut self.offsets, |offsets_vec| { + for _ in 0..n { + // SAFETY: We just reserved capacity for `n` elements above, and the cast must + // succeed because the elements length must be representable by the offset type. + #[allow(clippy::cast_possible_truncation)] + unsafe { + offsets_vec.push_unchecked(elements_len as _) + }; + } + }); + + match_each_integer_pvector_mut!(&mut self.sizes, |sizes_vec| { + for _ in 0..n { + // SAFETY: We just reserved capacity for `n` elements above, and `0` is + // representable by all integer types. + #[allow(clippy::cast_possible_truncation)] + unsafe { + sizes_vec.push_unchecked(0 as _) + }; + } + }); + + self.validity.append_n(false, n); + self.len += n; + debug_assert_eq!(self.len, self.validity.len()); } fn freeze(self) -> ListViewVector { @@ -267,9 +327,9 @@ impl VectorMutOps for ListViewVectorMut { // TODO(connor): It would be better to separate everything inside the macros into its own function, // but that would require adding another macro that sets a type `$type` to be used by the caller. /// Checks that all views are `<= elements_len`. -#[allow(clippy::cognitive_complexity, clippy::cast_possible_truncation)] +#[allow(clippy::cognitive_complexity)] fn validate_views_bound( - elements_len: usize, + elements_len: u64, offsets: &PrimitiveVectorMut, sizes: &PrimitiveVectorMut, ) -> VortexResult<()> { @@ -280,9 +340,10 @@ fn validate_views_bound( let offsets_slice = offsets_vector.as_ref(); let sizes_slice = sizes_vector.as_ref(); + #[allow(clippy::unnecessary_cast)] for i in 0..len { - let offset = offsets_slice[i] as usize; - let size = sizes_slice[i] as usize; + let offset = offsets_slice[i] as u64; + let size = sizes_slice[i] as u64; vortex_ensure!(offset + size <= elements_len); } }); @@ -290,3 +351,39 @@ fn validate_views_bound( Ok(()) } + +// TODO(connor): It would be better to separate everything inside the macros into its own function, +// but that would require adding another macro that sets a type `$type` to be used by the caller. +/// Checks that all views are `<= elements_len`. +#[allow(clippy::cognitive_complexity)] +fn adjust_and_extend_offsets( + our_offsets: &mut PrimitiveVectorMut, + other: &PrimitiveVector, + old_elements_len: u64, +) { + our_offsets.reserve(other.len()); + + // Adjust each offset from `other` by adding the current elements length to each of the + // incoming offsets. + match_each_integer_pvector_mut!(our_offsets, |self_offsets| { + match_each_integer_pvector!(other, |other_offsets| { + let other_offsets_slice = other_offsets.as_ref(); + + // Append each offset from `other`, adjusted by the elements_offset. + for i in 0..other.len() { + // All offset types are representable via a `u64` since we also ensure offsets + // are always non-negative. + #[allow(clippy::unnecessary_cast)] + let adjusted_offset = other_offsets_slice[i] as u64 + old_elements_len; + + // SAFETY: We just reserved capacity for `other.len()` elements above, and we + // also know the cast is fine because we verified above that the maximum + // possible offset is representable by the offset type. + #[allow(clippy::cast_possible_truncation)] + unsafe { + self_offsets.push_unchecked(adjusted_offset as _); + } + } + }); + }); +} From 84b0ad62488206d0c9ab5d6ff764d444115a99e8 Mon Sep 17 00:00:00 2001 From: Connor Tsui Date: Tue, 4 Nov 2025 19:38:11 -0500 Subject: [PATCH 4/4] add tests Signed-off-by: Connor Tsui --- vortex-vector/src/listview/mod.rs | 3 + vortex-vector/src/listview/tests.rs | 409 ++++++++++++++++++++++++++ vortex-vector/src/primitive/macros.rs | 4 + 3 files changed, 416 insertions(+) create mode 100644 vortex-vector/src/listview/tests.rs diff --git a/vortex-vector/src/listview/mod.rs b/vortex-vector/src/listview/mod.rs index b7ea3c57b8e..fe70860b00f 100644 --- a/vortex-vector/src/listview/mod.rs +++ b/vortex-vector/src/listview/mod.rs @@ -31,3 +31,6 @@ impl From for VectorMut { Self::List(v) } } + +#[cfg(test)] +mod tests; diff --git a/vortex-vector/src/listview/tests.rs b/vortex-vector/src/listview/tests.rs new file mode 100644 index 00000000000..7ccc6c7dde2 --- /dev/null +++ b/vortex-vector/src/listview/tests.rs @@ -0,0 +1,409 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use std::sync::Arc; + +use vortex_mask::{Mask, MaskMut}; + +use crate::listview::{ListViewVector, ListViewVectorMut}; +use crate::primitive::{PVectorMut, PrimitiveVector}; +use crate::{Vector, VectorMutOps, VectorOps}; + +// TODO(connor): This should probably be a method directly on the vector. +// Helper function to get list values at index +fn get_list_values(list: &ListViewVector, list_idx: usize) -> Vec { + let offsets = list.offsets(); + let sizes = list.sizes(); + + // Get offset and size for this list entry + let offset = match offsets { + PrimitiveVector::U32(pvec) => *pvec.get(list_idx).unwrap_or(&0) as usize, + PrimitiveVector::I32(pvec) => *pvec.get(list_idx).unwrap_or(&0) as usize, + _ => panic!("Unsupported offset type in test"), + }; + + let size = match sizes { + PrimitiveVector::U32(pvec) => *pvec.get(list_idx).unwrap_or(&0) as usize, + PrimitiveVector::I32(pvec) => *pvec.get(list_idx).unwrap_or(&0) as usize, + _ => panic!("Unsupported size type in test"), + }; + + // Extract values from elements vector + let elements = list.elements(); + if let Vector::Primitive(PrimitiveVector::I32(pvec)) = elements { + let mut values = Vec::new(); + for i in offset..(offset + size) { + if let Some(val) = pvec.get(i) { + values.push(*val); + } + } + return values; + } + panic!("Elements not i32 in test"); +} + +#[test] +fn test_basic_list_operations_with_values() { + // Create a list vector with: + // List 0: [1, 2, 3] + // List 1: [4, 5] + // List 2: [6, 7, 8, 9] + + let elements: PrimitiveVector = PVectorMut::from_iter([1i32, 2, 3, 4, 5, 6, 7, 8, 9]) + .freeze() + .into(); + let offsets: PrimitiveVector = PVectorMut::from_iter([0u32, 3, 5]).freeze().into(); + let sizes: PrimitiveVector = PVectorMut::from_iter([3u32, 2, 4]).freeze().into(); + let validity = Mask::new_true(3); + + let list = ListViewVector::new(Arc::new(elements.into()), offsets, sizes, validity); + + // Verify length + assert_eq!(list.len(), 3); + + // Verify actual values in each list + assert_eq!(get_list_values(&list, 0), vec![1, 2, 3]); + assert_eq!(get_list_values(&list, 1), vec![4, 5]); + assert_eq!(get_list_values(&list, 2), vec![6, 7, 8, 9]); +} + +#[test] +fn test_overlapping_views() { + // Create overlapping views into the same elements + // Elements: [10, 20, 30, 40, 50] + // List 0: [10, 20, 30] (offset=0, size=3) + // List 1: [20, 30, 40] (offset=1, size=3) + // List 2: [30, 40, 50] (offset=2, size=3) + + let elements: PrimitiveVector = PVectorMut::from_iter([10i32, 20, 30, 40, 50]) + .freeze() + .into(); + let offsets: PrimitiveVector = PVectorMut::from_iter([0u32, 1, 2]).freeze().into(); + let sizes: PrimitiveVector = PVectorMut::from_iter([3u32, 3, 3]).freeze().into(); + let validity = Mask::new_true(3); + + let list = ListViewVector::new(Arc::new(elements.into()), offsets, sizes, validity); + + // Verify the overlapping views show correct values + assert_eq!(get_list_values(&list, 0), vec![10, 20, 30]); + assert_eq!(get_list_values(&list, 1), vec![20, 30, 40]); + assert_eq!(get_list_values(&list, 2), vec![30, 40, 50]); +} + +#[test] +fn test_non_contiguous_views() { + // Create non-contiguous views (with gaps) + // Elements: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + // List 0: [1, 2] (offset=0, size=2) + // List 1: [5, 6] (offset=4, size=2) -- skips 3, 4 + // List 2: [9, 10] (offset=8, size=2) -- skips 7, 8 + + let elements: PrimitiveVector = PVectorMut::from_iter([1i32, 2, 3, 4, 5, 6, 7, 8, 9, 10]) + .freeze() + .into(); + let offsets: PrimitiveVector = PVectorMut::from_iter([0u32, 4, 8]).freeze().into(); + let sizes: PrimitiveVector = PVectorMut::from_iter([2u32, 2, 2]).freeze().into(); + let validity = Mask::new_true(3); + + let list = ListViewVector::new(Arc::new(elements.into()), offsets, sizes, validity); + + // Verify the non-contiguous views + assert_eq!(get_list_values(&list, 0), vec![1, 2]); + assert_eq!(get_list_values(&list, 1), vec![5, 6]); + assert_eq!(get_list_values(&list, 2), vec![9, 10]); +} + +#[test] +fn test_unsorted_offsets() { + // Test that unsorted offsets work correctly + // Elements: [100, 200, 300, 400, 500] + // List 0: [300, 400] (offset=2, size=2) + // List 1: [100, 200] (offset=0, size=2) -- out of order! + // List 2: [500] (offset=4, size=1) + + let elements: PrimitiveVector = PVectorMut::from_iter([100i32, 200, 300, 400, 500]) + .freeze() + .into(); + let offsets: PrimitiveVector = PVectorMut::from_iter([2u32, 0, 4]).freeze().into(); + let sizes: PrimitiveVector = PVectorMut::from_iter([2u32, 2, 1]).freeze().into(); + let validity = Mask::new_true(3); + + let list = ListViewVector::new(Arc::new(elements.into()), offsets, sizes, validity); + + // Verify values respect the unsorted offsets + assert_eq!(get_list_values(&list, 0), vec![300, 400]); + assert_eq!(get_list_values(&list, 1), vec![100, 200]); + assert_eq!(get_list_values(&list, 2), vec![500]); +} + +#[test] +fn test_empty_lists() { + // Test lists with size=0 + // Elements: [1, 2, 3] + // List 0: [] (offset=0, size=0) + // List 1: [1, 2] (offset=0, size=2) + // List 2: [] (offset=2, size=0) + // List 3: [3] (offset=2, size=1) + + let elements: PrimitiveVector = PVectorMut::from_iter([1i32, 2, 3]).freeze().into(); + let offsets: PrimitiveVector = PVectorMut::from_iter([0u32, 0, 2, 2]).freeze().into(); + let sizes: PrimitiveVector = PVectorMut::from_iter([0u32, 2, 0, 1]).freeze().into(); + let validity = Mask::new_true(4); + + let list = ListViewVector::new(Arc::new(elements.into()), offsets, sizes, validity); + + assert_eq!(list.len(), 4); + assert_eq!(get_list_values(&list, 0), Vec::::new()); + assert_eq!(get_list_values(&list, 1), vec![1, 2]); + assert_eq!(get_list_values(&list, 2), Vec::::new()); + assert_eq!(get_list_values(&list, 3), vec![3]); +} + +#[test] +fn test_extend_with_offset_adjustment() { + // Test that extend_from_vector properly adjusts offsets + let elements1 = PVectorMut::from_iter([10i32, 20, 30, 40]); + let offsets1 = PVectorMut::from_iter([0u32, 2]).into(); + let sizes1 = PVectorMut::from_iter([2u32, 2]).into(); + let validity1 = MaskMut::new_true(2); + + let mut list = ListViewVectorMut::new(Box::new(elements1.into()), offsets1, sizes1, validity1); + + // Second vector with its own offsets + let elements2: PrimitiveVector = PVectorMut::from_iter([50i32, 60, 70]).freeze().into(); + let offsets2: PrimitiveVector = PVectorMut::from_iter([0u32, 1]).freeze().into(); + let sizes2: PrimitiveVector = PVectorMut::from_iter([1u32, 2]).freeze().into(); + let validity2 = Mask::new_true(2); + + let list2 = ListViewVector::new(Arc::new(elements2.into()), offsets2, sizes2, validity2); + + list.extend_from_vector(&list2); + + // Freeze and check values + let frozen = list.freeze(); + assert_eq!(frozen.len(), 4); + + // Original lists + assert_eq!(get_list_values(&frozen, 0), vec![10, 20]); + assert_eq!(get_list_values(&frozen, 1), vec![30, 40]); + + // Extended lists - offsets should be adjusted by 4 (original elements length) + // List 2: offset was 0, now 4, size 1 -> [50] + // List 3: offset was 1, now 5, size 2 -> [60, 70] + assert_eq!(get_list_values(&frozen, 2), vec![50]); + assert_eq!(get_list_values(&frozen, 3), vec![60, 70]); +} + +#[test] +fn test_nulls_with_valid_metadata() { + // Test that null entries still require valid offset/size bounds + let elements: PrimitiveVector = PVectorMut::from_iter([1i32, 2, 3, 4, 5]).freeze().into(); + let offsets: PrimitiveVector = PVectorMut::from_iter([0u32, 2, 3]).freeze().into(); + let sizes: PrimitiveVector = PVectorMut::from_iter([2u32, 1, 2]).freeze().into(); + // Mark the middle list as null + let validity = Mask::from_indices(3, vec![0, 2]); + + let list = ListViewVector::new(Arc::new(elements.into()), offsets, sizes, validity); + + assert_eq!(list.len(), 3); + + // Check validity + assert!(list.validity().value(0)); // First list is valid + assert!(!list.validity().value(1)); // Second list is null + assert!(list.validity().value(2)); // Third list is valid + + // Check values of valid lists + assert_eq!(get_list_values(&list, 0), vec![1, 2]); + assert_eq!(get_list_values(&list, 2), vec![4, 5]); +} + +#[test] +fn test_validation_errors() { + let elements: PrimitiveVector = PVectorMut::from_iter([1i32, 2, 3, 4, 5]).freeze().into(); + + // Test length mismatch + let offsets: PrimitiveVector = PVectorMut::from_iter([0u32, 2, 3]).freeze().into(); + let sizes: PrimitiveVector = PVectorMut::from_iter([2u32, 1]).freeze().into(); // Wrong length! + let validity = Mask::new_true(3); + + let result = ListViewVector::try_new( + Arc::new(elements.clone().into()), + offsets, + sizes, + validity.clone(), + ); + assert!(result.is_err()); + + // Test bounds violation + let bad_offsets = PVectorMut::from_iter([0u32, 2, 4]).freeze().into(); + let bad_sizes = PVectorMut::from_iter([2u32, 1, 2]).freeze().into(); // 4 + 2 = 6 > 5 elements + let result = ListViewVector::try_new( + Arc::new(elements.clone().into()), + bad_offsets, + bad_sizes, + validity.clone(), + ); + assert!(result.is_err()); + + // Test nulls in metadata + let null_offsets = PVectorMut::from_iter([Some(0u32), None, Some(3)]) + .freeze() + .into(); + let sizes: PrimitiveVector = PVectorMut::from_iter([2u32, 1, 1]).freeze().into(); + let result = ListViewVector::try_new(Arc::new(elements.into()), null_offsets, sizes, validity); + assert!(result.is_err()); +} + +#[test] +fn test_different_integer_types() { + // Test various combinations of integer types + let elements: PrimitiveVector = PVectorMut::from_iter([10i32, 20, 30, 40, 50, 60]) + .freeze() + .into(); + + // u8/u8 combination + let offsets_u8: PrimitiveVector = PVectorMut::from_iter([0u8, 2, 4]).freeze().into(); + let sizes_u8: PrimitiveVector = PVectorMut::from_iter([2u8, 2, 2]).freeze().into(); + let validity = Mask::new_true(3); + + let list = ListViewVector::new( + Arc::new(elements.clone().into()), + offsets_u8, + sizes_u8, + validity.clone(), + ); + assert_eq!(list.len(), 3); + + // i64/i32 combination (signed types) + let offsets_i64: PrimitiveVector = PVectorMut::from_iter([0i64, 2, 4]).freeze().into(); + let sizes_i32: PrimitiveVector = PVectorMut::from_iter([2i32, 2, 2]).freeze().into(); + + let list2 = ListViewVector::new(Arc::new(elements.into()), offsets_i64, sizes_i32, validity); + assert_eq!(list2.len(), 3); +} + +#[test] +fn test_list_of_lists() { + // Create a 2-level nested structure + // Inner level: multiple lists of i32 + let inner_elements = PVectorMut::from_iter([1i32, 2, 3, 4, 5, 6, 7, 8, 9]); + let inner_offsets = PVectorMut::from_iter([0u32, 2, 3, 5, 7]).into(); + let inner_sizes = PVectorMut::from_iter([2u32, 1, 2, 2, 2]).into(); + let inner_validity = MaskMut::new_true(5); + + let inner_list = ListViewVectorMut::new( + Box::new(inner_elements.into()), + inner_offsets, + inner_sizes, + inner_validity, + ); + + // Outer level: lists that reference the inner lists + // Outer list 0: inner lists [0,1,2] -> [[1,2], [3], [4,5]] + // Outer list 1: inner lists [3,4] -> [[6,7], [8,9]] + let outer_offsets = PVectorMut::from_iter([0u32, 3]).into(); + let outer_sizes = PVectorMut::from_iter([3u32, 2]).into(); + let outer_validity = MaskMut::new_true(2); + + let outer_list = ListViewVectorMut::new( + Box::new(inner_list.into()), + outer_offsets, + outer_sizes, + outer_validity, + ); + + let frozen = outer_list.freeze(); + assert_eq!(frozen.len(), 2); + + // We have a 2-level structure + // The outer list contains 2 lists + // Each outer list contains multiple inner lists +} + +#[test] +fn test_append_nulls() { + let elements = PVectorMut::from_iter([100i32, 200]); + let offsets = PVectorMut::from_iter([0u32]).into(); + let sizes = PVectorMut::from_iter([2u32]).into(); + let validity = MaskMut::new_true(1); + + let mut list = ListViewVectorMut::new(Box::new(elements.into()), offsets, sizes, validity); + + assert_eq!(list.len(), 1); + list.append_nulls(3); + assert_eq!(list.len(), 4); + + let frozen = list.freeze(); + + // Check validity + assert!(frozen.validity().value(0)); // Original is valid + assert!(!frozen.validity().value(1)); // Appended nulls + assert!(!frozen.validity().value(2)); + assert!(!frozen.validity().value(3)); + + // Check original values are preserved + assert_eq!(get_list_values(&frozen, 0), vec![100, 200]); +} + +#[test] +fn test_try_into_mut() { + // Test: try_into_mut behavior + // Note: try_into_mut may fail even when seemingly solely owned because + // ALL components (elements, offsets, sizes, validity) must be convertible to mutable + + let elements: PrimitiveVector = PVectorMut::from_iter([1i32, 2, 3, 4]).freeze().into(); + let offsets: PrimitiveVector = PVectorMut::from_iter([0u32, 2]).freeze().into(); + let sizes: PrimitiveVector = PVectorMut::from_iter([2u32, 2]).freeze().into(); + let validity = Mask::new_true(2); + + let list = ListViewVector::new(Arc::new(elements.into()), offsets, sizes, validity); + + // Try to convert to mutable + let mut_result = list.try_into_mut(); + + match mut_result { + Ok(mut list_mut) => { + // If conversion succeeds, verify mutation works + assert_eq!(list_mut.len(), 2); + + list_mut.append_nulls(1); + assert_eq!(list_mut.len(), 3); + + let frozen = list_mut.freeze(); + assert_eq!(get_list_values(&frozen, 0), vec![1, 2]); + assert_eq!(get_list_values(&frozen, 1), vec![3, 4]); + } + Err(list_back) => { + // If conversion fails (which is OK because internal components might be shared), + // verify we get back a valid immutable vector + assert_eq!(list_back.len(), 2); + assert_eq!(get_list_values(&list_back, 0), vec![1, 2]); + assert_eq!(get_list_values(&list_back, 1), vec![3, 4]); + } + } + + // Test explicit sharing - should definitely fail + let elements2: PrimitiveVector = PVectorMut::from_iter([10i32, 20, 30]).freeze().into(); + let offsets2: PrimitiveVector = PVectorMut::from_iter([0u32, 2]).freeze().into(); + let sizes2: PrimitiveVector = PVectorMut::from_iter([2u32, 1]).freeze().into(); + let validity2 = Mask::new_true(2); + + let shared_elements: Arc = Arc::new(elements2.into()); + let list2 = ListViewVector::new( + shared_elements.clone(), // Clone the Arc to create another reference + offsets2, + sizes2, + validity2, + ); + + // Keep an extra reference to force sharing + let _keep_ref = shared_elements; + + // Should fail because elements are shared + let mut_result2 = list2.try_into_mut(); + assert!(mut_result2.is_err()); + + // Verify we get back the original immutable vector + let original = mut_result2.unwrap_err(); + assert_eq!(original.len(), 2); +} diff --git a/vortex-vector/src/primitive/macros.rs b/vortex-vector/src/primitive/macros.rs index 7df6f445749..2b42682cc56 100644 --- a/vortex-vector/src/primitive/macros.rs +++ b/vortex-vector/src/primitive/macros.rs @@ -69,6 +69,8 @@ macro_rules! match_each_pvector { /// /// See [`match_each_pvector`] for similar usage. /// +/// [`PrimitiveVector`]: crate::primitive::PrimitiveVector +/// /// # Panics /// /// Panics if the vector passed in to the macro is a float vector variant. @@ -155,6 +157,8 @@ macro_rules! match_each_pvector_mut { /// /// See [`match_each_pvector_mut`] for similar usage. /// +/// [`PrimitiveVectorMut`]: crate::primitive::PrimitiveVectorMut +/// /// # Panics /// /// Panics if the vector passed in to the macro is a float vector variant.