diff --git a/Cargo.lock b/Cargo.lock index 6d29bb6d70f..204ec3946e4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -9373,6 +9373,7 @@ dependencies = [ name = "vortex-vector" version = "0.1.0" dependencies = [ + "static_assertions", "vortex-buffer", "vortex-dtype", "vortex-error", diff --git a/vortex-compute/src/mask/mod.rs b/vortex-compute/src/mask/mod.rs index 1c206ef2e60..a88d81a52ce 100644 --- a/vortex-compute/src/mask/mod.rs +++ b/vortex-compute/src/mask/mod.rs @@ -8,8 +8,8 @@ use std::ops::BitAnd; use vortex_dtype::NativePType; use vortex_mask::Mask; use vortex_vector::{ - BoolVector, NullVector, PrimitiveVector, StructVector, Vector, match_each_pvector, - match_each_vector, + BoolVector, NullVector, PVector, PrimitiveVector, StructVector, VarBinType, VarBinVector, + Vector, match_each_pvector, match_each_vector, }; /// Trait for masking the validity of an array or vector. @@ -37,7 +37,8 @@ impl MaskValidity for NullVector { impl MaskValidity for BoolVector { fn mask_validity(self, mask: &Mask) -> Self { let (bits, validity) = self.into_parts(); - Self::new(bits, validity.bitand(mask)) + // SAFETY: we are preserving the original bits buffer and only modifying the validity. + unsafe { Self::new_unchecked(bits, validity.bitand(mask)) } } } @@ -47,16 +48,26 @@ impl MaskValidity for PrimitiveVector { } } -impl MaskValidity for vortex_vector::PVector { +impl MaskValidity for PVector { fn mask_validity(self, mask: &Mask) -> Self { let (data, validity) = self.into_parts(); - Self::new(data, validity.bitand(mask)) + // SAFETY: we are preserving the original data buffer and only modifying the validity. + unsafe { Self::new_unchecked(data, validity.bitand(mask)) } + } +} + +impl MaskValidity for VarBinVector { + fn mask_validity(self, mask: &Mask) -> Self { + let (views, buffers, validity) = self.into_parts(); + // SAFETY: we are preserving the original views and buffers, only modifying the validity. + unsafe { Self::new_unchecked(views, buffers, validity.bitand(mask)) } } } impl MaskValidity for StructVector { fn mask_validity(self, mask: &Mask) -> Self { let (fields, validity) = self.into_parts(); - StructVector::new(fields, validity.bitand(mask)) + // SAFETY: we are preserving the original fields and only modifying the validity. + unsafe { StructVector::new_unchecked(fields, validity.bitand(mask)) } } } diff --git a/vortex-vector/Cargo.toml b/vortex-vector/Cargo.toml index 885c8a5c8d7..7b3ff6832e2 100644 --- a/vortex-vector/Cargo.toml +++ b/vortex-vector/Cargo.toml @@ -24,3 +24,5 @@ vortex-buffer = { workspace = true } vortex-dtype = { workspace = true } vortex-error = { workspace = true } vortex-mask = { workspace = true } + +static_assertions = { workspace = true } diff --git a/vortex-vector/src/lib.rs b/vortex-vector/src/lib.rs index b65e4e6fb9a..aa61f2dc919 100644 --- a/vortex-vector/src/lib.rs +++ b/vortex-vector/src/lib.rs @@ -6,7 +6,6 @@ //! Immutable and mutable decompressed (canonical) vectors for Vortex. #![deny(missing_docs)] -#![deny(clippy::missing_docs_in_private_items)] #![deny(clippy::missing_errors_doc)] #![deny(clippy::missing_panics_doc)] #![deny(clippy::missing_safety_doc)] @@ -15,11 +14,13 @@ mod bool; mod null; mod primitive; mod struct_; +mod varbin; -pub use bool::{BoolVector, BoolVectorMut}; -pub use null::{NullVector, NullVectorMut}; -pub use primitive::{PVector, PVectorMut, PrimitiveVector, PrimitiveVectorMut}; -pub use struct_::{StructVector, StructVectorMut}; +pub use bool::*; +pub use null::*; +pub use primitive::*; +pub use struct_::*; +pub use varbin::*; mod ops; mod vector; diff --git a/vortex-vector/src/macros.rs b/vortex-vector/src/macros.rs index 25f9de2b615..697087ac769 100644 --- a/vortex-vector/src/macros.rs +++ b/vortex-vector/src/macros.rs @@ -49,6 +49,14 @@ macro_rules! match_each_vector { let $vec = v; $body } + $crate::Vector::String(v) => { + let $vec = v; + $body + } + $crate::Vector::Binary(v) => { + let $vec = v; + $body + } $crate::Vector::Struct(v) => { let $vec = v; $body @@ -104,6 +112,14 @@ macro_rules! match_each_vector_mut { let $vec = v; $body } + $crate::VectorMut::String(v) => { + let $vec = v; + $body + } + $crate::VectorMut::Binary(v) => { + let $vec = v; + $body + } $crate::VectorMut::Struct(v) => { let $vec = v; $body diff --git a/vortex-vector/src/private.rs b/vortex-vector/src/private.rs index b241e4fd5d8..22c668e98d2 100644 --- a/vortex-vector/src/private.rs +++ b/vortex-vector/src/private.rs @@ -29,5 +29,8 @@ impl Sealed for PrimitiveVectorMut {} impl Sealed for PVector {} impl Sealed for PVectorMut {} +impl Sealed for VarBinVector {} +impl Sealed for VarBinVectorMut {} + impl Sealed for StructVector {} impl Sealed for StructVectorMut {} diff --git a/vortex-vector/src/varbin/mod.rs b/vortex-vector/src/varbin/mod.rs new file mode 100644 index 00000000000..a594f5a1735 --- /dev/null +++ b/vortex-vector/src/varbin/mod.rs @@ -0,0 +1,83 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +pub use types::*; +pub use vector::*; +pub use vector_mut::*; +use vortex_error::vortex_panic; + +use crate::{Vector, VectorMut}; + +mod types; +mod vector; +mod vector_mut; +mod view; + +/// Type alias for non-utf8 variable-length binary vectors. +pub type BinaryVector = VarBinVector; +/// Type alias for mutable non-utf8 variable-length binary vectors. +pub type BinaryVectorMut = VarBinVectorMut; +/// Type alias for UTF-8 variable-length string vectors. +pub type StringVector = VarBinVector; +/// Type alias for mutable UTF-8 variable-length string vectors. +pub type StringVectorMut = VarBinVectorMut; + +impl VarBinTypeDowncast for Vector { + type Output = VarBinVector; + + fn into_binary(self) -> Self::Output { + if let Vector::Binary(v) = self { + return v; + } + vortex_panic!("Expected BinaryVector, got {self:?}"); + } + + fn into_string(self) -> Self::Output { + if let Vector::String(v) = self { + return v; + } + vortex_panic!("Expected StringVector, got {self:?}"); + } +} + +impl VarBinTypeUpcast for Vector { + type Input = VarBinVector; + + fn from_binary(input: Self::Input) -> Self { + Vector::Binary(input) + } + + fn from_string(input: Self::Input) -> Self { + Vector::String(input) + } +} + +impl VarBinTypeDowncast for VectorMut { + type Output = VarBinVectorMut; + + fn into_binary(self) -> Self::Output { + if let VectorMut::Binary(v) = self { + return v; + } + vortex_panic!("Expected BinaryVector, got {self:?}"); + } + + fn into_string(self) -> Self::Output { + if let VectorMut::String(v) = self { + return v; + } + vortex_panic!("Expected StringVector, got {self:?}"); + } +} + +impl VarBinTypeUpcast for VectorMut { + type Input = VarBinVectorMut; + + fn from_binary(input: Self::Input) -> Self { + VectorMut::Binary(input) + } + + fn from_string(input: Self::Input) -> Self { + VectorMut::String(input) + } +} diff --git a/vortex-vector/src/varbin/types.rs b/vortex-vector/src/varbin/types.rs new file mode 100644 index 00000000000..c4cce77c42f --- /dev/null +++ b/vortex-vector/src/varbin/types.rs @@ -0,0 +1,93 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Variable-length binary types and related traits. + +use std::fmt::Debug; + +use crate::{VarBinVector, VarBinVectorMut, Vector, VectorMut}; + +impl From> for Vector { + fn from(value: VarBinVector) -> Self { + T::upcast(value) + } +} + +impl From> for VectorMut { + fn from(value: VarBinVectorMut) -> Self { + T::upcast(value) + } +} + +/// Trait to mark supported binary view types. +pub trait VarBinType: Debug + Sized + private::Sealed { + /// The slice type for this variable binary type. + type Slice: ?Sized + AsRef<[u8]>; + + /// Downcast the provided object to a type-specific instance. + fn downcast(visitor: V) -> V::Output; + + /// Upcast a type-specific instance to a generic instance. + fn upcast(input: V::Input) -> V; +} + +/// [`BinaryType`] for UTF-8 strings. +#[derive(Clone, Debug)] +pub struct StringType; +impl VarBinType for StringType { + type Slice = str; + + fn downcast(visitor: V) -> V::Output { + visitor.into_string() + } + + fn upcast(input: V::Input) -> V { + V::from_string(input) + } +} + +/// [`BinaryType`] for raw binary data. +#[derive(Clone, Debug)] +pub struct BinaryType; +impl VarBinType for BinaryType { + type Slice = [u8]; + + fn downcast(visitor: V) -> V::Output { + visitor.into_binary() + } + + fn upcast(input: V::Input) -> V { + V::from_binary(input) + } +} + +/// Trait for downcasting generic variable binary types to specific types. +pub trait VarBinTypeDowncast { + /// The output type after downcasting. + type Output; + + /// Downcast to a binary type. + fn into_binary(self) -> Self::Output; + /// Downcast to a string type. + fn into_string(self) -> Self::Output; +} + +/// Trait for upcasting specific variable binary types to generic types. +pub trait VarBinTypeUpcast { + /// The input type for upcasting. + type Input; + + /// Upcast from a binary type. + fn from_binary(input: Self::Input) -> Self; + /// Upcast from a string type. + fn from_string(input: Self::Input) -> Self; +} + +/// Private module to seal the [`VarBinType`] trait. +mod private { + /// Sealed trait to prevent external implementations of [`VarBinType`]. + pub trait Sealed {} + + impl Sealed for super::StringType {} + impl Sealed for super::BinaryType {} +} diff --git a/vortex-vector/src/varbin/vector.rs b/vortex-vector/src/varbin/vector.rs new file mode 100644 index 00000000000..e468151e94c --- /dev/null +++ b/vortex-vector/src/varbin/vector.rs @@ -0,0 +1,76 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Variable-length binary vector implementation. + +use std::sync::Arc; + +use vortex_buffer::{Buffer, ByteBuffer}; +use vortex_mask::Mask; + +use crate::VectorOps; +use crate::varbin::VarBinType; +use crate::varbin::vector_mut::VarBinVectorMut; +use crate::varbin::view::BinaryView; + +/// A variable-length binary vector. +#[derive(Debug, Clone)] +pub struct VarBinVector { + /// Views into the binary data. + views: Buffer, + /// Buffers holding the referenced binary data. + buffers: Arc>, + /// Validity mask for the vector. + validity: Mask, + /// Marker trait for the [`VarBinType`]. + _marker: std::marker::PhantomData, +} + +impl VarBinVector { + /// Creates a new [`VarBinVector`] from the provided components. + /// + /// # Safety + /// + /// This function is unsafe because it does not validate the consistency of the provided + /// components. + /// + /// The caller must ensure that: + /// - The length of the `validity` mask matches the length of the `views` buffer. + /// - The `views` buffer correctly references the data in the `buffers`. + pub unsafe fn new_unchecked( + views: Buffer, + buffers: Arc>, + validity: Mask, + ) -> Self { + Self { + views, + buffers, + validity, + _marker: std::marker::PhantomData, + } + } + + /// Decomposes the vector into its constituent parts. + pub fn into_parts(self) -> (Buffer, Arc>, Mask) { + (self.views, self.buffers, self.validity) + } +} + +impl VectorOps for VarBinVector { + type Mutable = VarBinVectorMut; + + fn len(&self) -> usize { + self.views.len() + } + + fn validity(&self) -> &Mask { + &self.validity + } + + fn try_into_mut(self) -> Result + where + Self: Sized, + { + todo!() + } +} diff --git a/vortex-vector/src/varbin/vector_mut.rs b/vortex-vector/src/varbin/vector_mut.rs new file mode 100644 index 00000000000..e6ffcc7bd52 --- /dev/null +++ b/vortex-vector/src/varbin/vector_mut.rs @@ -0,0 +1,67 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Mutable variable-length binary vector. + +use vortex_buffer::{BufferMut, ByteBuffer, ByteBufferMut}; +use vortex_mask::MaskMut; + +use crate::VectorMutOps; +use crate::varbin::VarBinType; +use crate::varbin::vector::VarBinVector; +use crate::varbin::view::BinaryView; + +/// Mutable variable-length binary vector. +#[allow(dead_code)] // FIXME(ngates): remove after implementing the methods +#[derive(Clone, Debug)] +pub struct VarBinVectorMut { + /// Views into the binary data. + views: BufferMut, + /// Validity mask for the vector. + validity: MaskMut, + + /// The completed buffers holding referenced binary data. + buffers: Vec, + /// The current buffer being appended to, if any. + open_buffer: Option, + + /// Marker trait for the [`VarBinType`]. + _marker: std::marker::PhantomData, +} + +impl VectorMutOps for VarBinVectorMut { + type Immutable = VarBinVector; + + fn len(&self) -> usize { + self.views.len() + } + + fn capacity(&self) -> usize { + self.views.capacity() + } + + fn reserve(&mut self, additional: usize) { + self.views.reserve(additional); + } + + fn extend_from_vector(&mut self, _other: &Self::Immutable) { + todo!() + } + + fn append_nulls(&mut self, n: usize) { + self.views.push_n(BinaryView::empty_view(), n); + self.validity.append_n(false, n); + } + + fn freeze(self) -> Self::Immutable { + todo!() + } + + fn split_off(&mut self, _at: usize) -> Self { + todo!() + } + + fn unsplit(&mut self, _other: Self) { + todo!() + } +} diff --git a/vortex-vector/src/varbin/view.rs b/vortex-vector/src/varbin/view.rs new file mode 100644 index 00000000000..cbcd20919c3 --- /dev/null +++ b/vortex-vector/src/varbin/view.rs @@ -0,0 +1,261 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! The 16-byte view struct stored in variable-length binary vectors. + +use std::fmt; +use std::hash::{Hash, Hasher}; +use std::ops::Range; + +use static_assertions::{assert_eq_align, assert_eq_size}; +use vortex_error::VortexUnwrap; + +/// A view over a variable-length binary value. +/// +/// Either an inlined representation (for values <= 12 bytes) or a reference +/// to an external buffer (for values > 12 bytes). +#[derive(Clone, Copy)] +#[repr(C, align(16))] +pub union BinaryView { + /// Numeric representation. This is logically `u128`, but we split it into the high and low + /// bits to preserve the alignment. + le_bytes: [u8; 16], + + /// Inlined representation: strings <= 12 bytes + inlined: Inlined, + + /// Reference type: strings > 12 bytes. + _ref: Ref, +} + +assert_eq_align!(BinaryView, u128); +assert_eq_size!(BinaryView, [u8; 16]); +assert_eq_size!(Inlined, [u8; 16]); +assert_eq_size!(Ref, [u8; 16]); + +/// Variant of a [`BinaryView`] that holds an inlined value. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +#[repr(C, align(8))] +pub struct Inlined { + /// The size of the full value. + pub size: u32, + /// The full inlined value. + pub data: [u8; BinaryView::MAX_INLINED_SIZE], +} + +impl Inlined { + /// Creates a new inlined representation from the provided value of constant size. + fn new(value: &[u8]) -> Self { + debug_assert_eq!(value.len(), N); + let mut inlined = Self { + size: N.try_into().vortex_unwrap(), + data: [0u8; BinaryView::MAX_INLINED_SIZE], + }; + inlined.data[..N].copy_from_slice(&value[..N]); + inlined + } + + /// Returns the full inlined value. + #[inline] + pub fn value(&self) -> &[u8] { + &self.data[0..(self.size as usize)] + } +} + +/// Variant of a [`BinaryView`] that holds a reference to an external buffer. +#[derive(Clone, Copy, Debug)] +#[repr(C, align(8))] +pub struct Ref { + /// The size of the full value. + pub size: u32, + /// The prefix bytes of the value (first 4 bytes). + pub prefix: [u8; 4], + /// The index of the buffer where the full value is stored. + pub buffer_index: u32, + /// The offset within the buffer where the full value starts. + pub offset: u32, +} + +impl Ref { + /// Returns the range within the buffer where the full value is stored. + #[inline] + pub fn as_range(&self) -> Range { + self.offset as usize..(self.offset + self.size) as usize + } + + /// Replaces the buffer index and offset of the reference, returning a new `Ref`. + #[inline] + pub fn with_buffer_and_offset(&self, buffer_index: u32, offset: u32) -> Ref { + Self { + size: self.size, + prefix: self.prefix, + buffer_index, + offset, + } + } +} + +impl PartialEq for BinaryView { + fn eq(&self, other: &Self) -> bool { + let a = unsafe { std::mem::transmute::<&BinaryView, &u128>(self) }; + let b = unsafe { std::mem::transmute::<&BinaryView, &u128>(other) }; + a == b + } +} +impl Eq for BinaryView {} + +impl Hash for BinaryView { + fn hash(&self, state: &mut H) { + unsafe { std::mem::transmute::<&BinaryView, &u128>(self) }.hash(state); + } +} + +impl Default for BinaryView { + fn default() -> Self { + Self::make_view(&[], 0, 0) + } +} + +impl BinaryView { + /// Maximum size of an inlined binary value. + const MAX_INLINED_SIZE: usize = 12; + + /// Create a view from a value, block and offset + /// + /// Depending on the length of the provided value either a new inlined + /// or a reference view will be constructed. + /// + /// Adapted from arrow-rs + /// Explicitly enumerating inlined view produces code that avoids calling generic `ptr::copy_non_interleave` that's slower than explicit stores + #[inline(never)] + pub fn make_view(value: &[u8], block: u32, offset: u32) -> Self { + match value.len() { + 0 => Self { + inlined: Inlined::new::<0>(value), + }, + 1 => Self { + inlined: Inlined::new::<1>(value), + }, + 2 => Self { + inlined: Inlined::new::<2>(value), + }, + 3 => Self { + inlined: Inlined::new::<3>(value), + }, + 4 => Self { + inlined: Inlined::new::<4>(value), + }, + 5 => Self { + inlined: Inlined::new::<5>(value), + }, + 6 => Self { + inlined: Inlined::new::<6>(value), + }, + 7 => Self { + inlined: Inlined::new::<7>(value), + }, + 8 => Self { + inlined: Inlined::new::<8>(value), + }, + 9 => Self { + inlined: Inlined::new::<9>(value), + }, + 10 => Self { + inlined: Inlined::new::<10>(value), + }, + 11 => Self { + inlined: Inlined::new::<11>(value), + }, + 12 => Self { + inlined: Inlined::new::<12>(value), + }, + _ => Self { + _ref: Ref { + size: u32::try_from(value.len()).vortex_unwrap(), + prefix: value[0..4].try_into().vortex_unwrap(), + buffer_index: block, + offset, + }, + }, + } + } + + /// Create a new empty view + #[inline] + pub fn empty_view() -> Self { + Self::new_inlined(&[]) + } + + /// Create a new inlined binary view + #[inline] + pub fn new_inlined(value: &[u8]) -> Self { + assert!( + value.len() <= Self::MAX_INLINED_SIZE, + "expected inlined value to be <= 12 bytes, was {}", + value.len() + ); + + Self::make_view(value, 0, 0) + } + + /// Returns the length of the binary value. + #[inline] + pub fn len(&self) -> u32 { + unsafe { self.inlined.size } + } + + /// Returns true if the binary value is empty. + #[inline] + pub fn is_empty(&self) -> bool { + self.len() == 0 + } + + /// Returns true if the binary value is inlined. + #[inline] + #[allow(clippy::cast_possible_truncation)] + pub fn is_inlined(&self) -> bool { + self.len() <= (Self::MAX_INLINED_SIZE as u32) + } + + /// Returns the inlined representation of the binary value. + pub fn as_inlined(&self) -> &Inlined { + unsafe { &self.inlined } + } + + /// Returns the reference representation of the binary value. + pub fn as_view(&self) -> &Ref { + unsafe { &self._ref } + } + + /// Returns the binary view as u128 representation. + pub fn as_u128(&self) -> u128 { + // SAFETY: binary view always safe to read as u128 LE bytes + unsafe { u128::from_le_bytes(self.le_bytes) } + } +} + +impl From for BinaryView { + fn from(value: u128) -> Self { + BinaryView { + le_bytes: value.to_le_bytes(), + } + } +} + +impl From for BinaryView { + fn from(value: Ref) -> Self { + BinaryView { _ref: value } + } +} + +impl fmt::Debug for BinaryView { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let mut s = f.debug_struct("BinaryView"); + if self.is_inlined() { + s.field("inline", &self.as_inlined()); + } else { + s.field("ref", &self.as_view()); + } + s.finish() + } +} diff --git a/vortex-vector/src/vector.rs b/vortex-vector/src/vector.rs index ef3dc8c52ad..391e4f986ce 100644 --- a/vortex-vector/src/vector.rs +++ b/vortex-vector/src/vector.rs @@ -9,6 +9,7 @@ use vortex_error::vortex_panic; use crate::macros::match_each_vector; +use crate::varbin::{BinaryVector, StringVector}; use crate::{BoolVector, NullVector, PrimitiveVector, StructVector, VectorMut, VectorOps}; /// An enum over all kinds of immutable vectors, which represent fully decompressed (canonical) @@ -33,10 +34,10 @@ pub enum Vector { Primitive(PrimitiveVector), // Decimal // Decimal(DecimalVector), - // String - // String(StringVector), - // Binary - // Binary(BinaryVector), + /// String vectors + String(StringVector), + /// Binary vectors + Binary(BinaryVector), // List // List(ListVector), // FixedList @@ -93,6 +94,22 @@ impl Vector { vortex_panic!("Expected PrimitiveVector, got {self:?}"); } + /// Returns a reference to the inner [`StringVector`] if `self` is of that variant. + pub fn as_string(&self) -> &StringVector { + if let Vector::String(v) = self { + return v; + } + vortex_panic!("Expected StringVector, got {self:?}"); + } + + /// Returns a reference to the inner [`BinaryVector`] if `self` is of that variant. + pub fn as_binary(&self) -> &BinaryVector { + if let Vector::Binary(v) = self { + return v; + } + vortex_panic!("Expected BinaryVector, got {self:?}"); + } + /// Returns a reference to the inner [`StructVector`] if `self` is of that variant. pub fn as_struct(&self) -> &StructVector { if let Vector::Struct(v) = self { @@ -125,6 +142,24 @@ impl Vector { vortex_panic!("Expected PrimitiveVector, got {self:?}"); } + /// Consumes `self` and returns the inner [`StringVector`] if `self` is of that variant. + #[allow(clippy::same_name_method)] // Same as VarBinTypeDowncast + pub fn into_string(self) -> StringVector { + if let Vector::String(v) = self { + return v; + } + vortex_panic!("Expected StringVector, got {self:?}"); + } + + /// Consumes `self` and returns the inner [`BinaryVector`] if `self` is of that variant. + #[allow(clippy::same_name_method)] // Same as VarBinTypeDowncast + pub fn into_binary(self) -> BinaryVector { + if let Vector::Binary(v) = self { + return v; + } + vortex_panic!("Expected BinaryVector, got {self:?}"); + } + /// Consumes `self` and returns the inner [`StructVector`] if `self` is of that variant. pub fn into_struct(self) -> StructVector { if let Vector::Struct(v) = self { diff --git a/vortex-vector/src/vector_mut.rs b/vortex-vector/src/vector_mut.rs index eab12dd852e..9774c07adf2 100644 --- a/vortex-vector/src/vector_mut.rs +++ b/vortex-vector/src/vector_mut.rs @@ -11,6 +11,7 @@ use vortex_error::vortex_panic; use vortex_mask::MaskMut; use super::macros::match_each_vector_mut; +use crate::varbin::{BinaryVectorMut, StringVectorMut}; use crate::{ BoolVectorMut, NullVectorMut, PrimitiveVectorMut, StructVectorMut, Vector, VectorMutOps, }; @@ -35,6 +36,10 @@ pub enum VectorMut { /// Note that [`PrimitiveVectorMut`] is an enum over the different possible (generic) /// [`PVectorMut`](crate::PVectorMut)s. See the documentation for more information. Primitive(PrimitiveVectorMut), + /// Mutable String vectors. + String(StringVectorMut), + /// Mutable Binary vectors. + Binary(BinaryVectorMut), /// Mutable vectors of Struct elements. Struct(StructVectorMut), } @@ -115,6 +120,8 @@ impl VectorMutOps for VectorMut { (VectorMut::Null(a), VectorMut::Null(b)) => a.unsplit(b), (VectorMut::Bool(a), VectorMut::Bool(b)) => a.unsplit(b), (VectorMut::Primitive(a), VectorMut::Primitive(b)) => a.unsplit(b), + (VectorMut::String(a), VectorMut::String(b)) => a.unsplit(b), + (VectorMut::Binary(a), VectorMut::Binary(b)) => a.unsplit(b), (VectorMut::Struct(a), VectorMut::Struct(b)) => a.unsplit(b), _ => vortex_panic!("Mismatched vector types"), } @@ -146,6 +153,22 @@ impl VectorMut { vortex_panic!("Expected PrimitiveVectorMut, got {self:?}"); } + /// Returns a reference to the inner [`StringVectorMut`] if `self` is of that variant. + pub fn as_string(&self) -> &StringVectorMut { + if let VectorMut::String(v) = self { + return v; + } + vortex_panic!("Expected StringVectorMut, got {self:?}"); + } + + /// Returns a reference to the inner [`BinaryVectorMut`] if `self` is of that variant. + pub fn as_binary(&self) -> &BinaryVectorMut { + if let VectorMut::Binary(v) = self { + return v; + } + vortex_panic!("Expected BinaryVectorMut, got {self:?}"); + } + /// Returns a reference to the inner [`StructVectorMut`] if `self` is of that variant. pub fn as_struct(&self) -> &StructVectorMut { if let VectorMut::Struct(v) = self { @@ -178,6 +201,24 @@ impl VectorMut { vortex_panic!("Expected PrimitiveVectorMut, got {self:?}"); } + /// Consumes `self` and returns the inner [`StringVectorMut`] if `self` is of that variant. + #[allow(clippy::same_name_method)] // Same as VarBinTypeDowncast + pub fn into_string(self) -> StringVectorMut { + if let VectorMut::String(v) = self { + return v; + } + vortex_panic!("Expected StringVectorMut, got {self:?}"); + } + + /// Consumes `self` and returns the inner [`BinaryVectorMut`] if `self` is of that variant. + #[allow(clippy::same_name_method)] // Same as VarBinTypeDowncast + pub fn into_binary(self) -> BinaryVectorMut { + if let VectorMut::Binary(v) = self { + return v; + } + vortex_panic!("Expected BinaryVectorMut, got {self:?}"); + } + /// Consumes `self` and returns the inner [`StructVectorMut`] if `self` is of that variant. pub fn into_struct(self) -> StructVectorMut { if let VectorMut::Struct(v) = self {