From feb0905e8bf42ce61da6e9bdaad114f8ec592209 Mon Sep 17 00:00:00 2001 From: Nicholas Gates Date: Wed, 29 Oct 2025 12:00:49 -0400 Subject: [PATCH 1/3] VarBinVector Signed-off-by: Nicholas Gates --- Cargo.lock | 1 + vortex-vector/Cargo.toml | 2 + vortex-vector/src/lib.rs | 5 + vortex-vector/src/macros.rs | 16 ++ vortex-vector/src/private.rs | 3 + vortex-vector/src/varbin/mod.rs | 157 +++++++++++++++ vortex-vector/src/varbin/vector.rs | 63 ++++++ vortex-vector/src/varbin/vector_mut.rs | 74 +++++++ vortex-vector/src/varbin/view.rs | 259 +++++++++++++++++++++++++ vortex-vector/src/vector.rs | 41 +++- vortex-vector/src/vector_mut.rs | 39 ++++ 11 files changed, 656 insertions(+), 4 deletions(-) create mode 100644 vortex-vector/src/varbin/mod.rs create mode 100644 vortex-vector/src/varbin/vector.rs create mode 100644 vortex-vector/src/varbin/vector_mut.rs create mode 100644 vortex-vector/src/varbin/view.rs diff --git a/Cargo.lock b/Cargo.lock index 6d29bb6d70f..204ec3946e4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -9373,6 +9373,7 @@ dependencies = [ name = "vortex-vector" version = "0.1.0" dependencies = [ + "static_assertions", "vortex-buffer", "vortex-dtype", "vortex-error", diff --git a/vortex-vector/Cargo.toml b/vortex-vector/Cargo.toml index 885c8a5c8d7..7b3ff6832e2 100644 --- a/vortex-vector/Cargo.toml +++ b/vortex-vector/Cargo.toml @@ -24,3 +24,5 @@ vortex-buffer = { workspace = true } vortex-dtype = { workspace = true } vortex-error = { workspace = true } vortex-mask = { workspace = true } + +static_assertions = { workspace = true } diff --git a/vortex-vector/src/lib.rs b/vortex-vector/src/lib.rs index b65e4e6fb9a..bc3ac9e42dd 100644 --- a/vortex-vector/src/lib.rs +++ b/vortex-vector/src/lib.rs @@ -15,11 +15,16 @@ mod bool; mod null; mod primitive; mod struct_; +mod varbin; pub use bool::{BoolVector, BoolVectorMut}; pub use null::{NullVector, NullVectorMut}; pub use primitive::{PVector, PVectorMut, PrimitiveVector, PrimitiveVectorMut}; pub use struct_::{StructVector, StructVectorMut}; +pub use varbin::{ + BinaryVector, BinaryVectorMut, StringVector, StringVectorMut, VarBinType, VarBinVector, + VarBinVectorMut, +}; mod ops; mod vector; diff --git a/vortex-vector/src/macros.rs b/vortex-vector/src/macros.rs index 25f9de2b615..697087ac769 100644 --- a/vortex-vector/src/macros.rs +++ b/vortex-vector/src/macros.rs @@ -49,6 +49,14 @@ macro_rules! match_each_vector { let $vec = v; $body } + $crate::Vector::String(v) => { + let $vec = v; + $body + } + $crate::Vector::Binary(v) => { + let $vec = v; + $body + } $crate::Vector::Struct(v) => { let $vec = v; $body @@ -104,6 +112,14 @@ macro_rules! match_each_vector_mut { let $vec = v; $body } + $crate::VectorMut::String(v) => { + let $vec = v; + $body + } + $crate::VectorMut::Binary(v) => { + let $vec = v; + $body + } $crate::VectorMut::Struct(v) => { let $vec = v; $body diff --git a/vortex-vector/src/private.rs b/vortex-vector/src/private.rs index b241e4fd5d8..22c668e98d2 100644 --- a/vortex-vector/src/private.rs +++ b/vortex-vector/src/private.rs @@ -29,5 +29,8 @@ impl Sealed for PrimitiveVectorMut {} impl Sealed for PVector {} impl Sealed for PVectorMut {} +impl Sealed for VarBinVector {} +impl Sealed for VarBinVectorMut {} + impl Sealed for StructVector {} impl Sealed for StructVectorMut {} diff --git a/vortex-vector/src/varbin/mod.rs b/vortex-vector/src/varbin/mod.rs new file mode 100644 index 00000000000..caff563fe24 --- /dev/null +++ b/vortex-vector/src/varbin/mod.rs @@ -0,0 +1,157 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use crate::{Vector, VectorMut}; +use std::fmt::Debug; + +mod vector; +pub use vector::VarBinVector; + +mod vector_mut; +pub use vector_mut::VarBinVectorMut; +use vortex_error::vortex_panic; + +mod view; + +/// Type alias for non-utf8 variable-length binary vectors. +pub type BinaryVector = VarBinVector; +/// Type alias for mutable non-utf8 variable-length binary vectors. +pub type BinaryVectorMut = VarBinVectorMut; +/// Type alias for UTF-8 variable-length string vectors. +pub type StringVector = VarBinVector; +/// Type alias for mutable UTF-8 variable-length string vectors. +pub type StringVectorMut = VarBinVectorMut; + +impl From> for Vector { + fn from(value: VarBinVector) -> Self { + T::upcast(value) + } +} + +impl From> for VectorMut { + fn from(value: VarBinVectorMut) -> Self { + T::upcast(value) + } +} + +/// Trait to mark supported binary view types. +pub trait VarBinType: Debug + Sized + private::Sealed { + /// The slice type for this variable binary type. + type Slice: ?Sized + AsRef<[u8]>; + + /// Downcast the provided object to a type-specific instance. + fn downcast(visitor: V) -> V::Output; + + /// Upcast a type-specific instance to a generic instance. + fn upcast(input: V::Input) -> V; +} + +/// [`BinaryType`] for UTF-8 strings. +#[derive(Clone, Debug)] +pub struct StringType; +impl VarBinType for StringType { + type Slice = str; + + fn downcast(visitor: V) -> V::Output { + visitor.into_string() + } + + fn upcast(input: V::Input) -> V { + V::from_string(input) + } +} + +/// [`BinaryType`] for raw binary data. +#[derive(Clone, Debug)] +pub struct BinaryType; +impl VarBinType for BinaryType { + type Slice = [u8]; + + fn downcast(visitor: V) -> V::Output { + visitor.into_binary() + } + + fn upcast(input: V::Input) -> V { + V::from_binary(input) + } +} + +pub trait VarBinTypeDowncast { + type Output; + + fn into_binary(self) -> Self::Output; + fn into_string(self) -> Self::Output; +} + +pub trait VarBinTypeUpcast { + type Input; + + fn from_binary(input: Self::Input) -> Self; + fn from_string(input: Self::Input) -> Self; +} + +impl VarBinTypeDowncast for Vector { + type Output = VarBinVector; + + fn into_binary(self) -> Self::Output { + if let Vector::Binary(v) = self { + return v; + } + vortex_panic!("Expected BinaryVector, got {self:?}"); + } + + fn into_string(self) -> Self::Output { + if let Vector::String(v) = self { + return v; + } + vortex_panic!("Expected StringVector, got {self:?}"); + } +} + +impl VarBinTypeUpcast for Vector { + type Input = VarBinVector; + + fn from_binary(input: Self::Input) -> Self { + Vector::Binary(input) + } + + fn from_string(input: Self::Input) -> Self { + Vector::String(input) + } +} + +impl VarBinTypeDowncast for VectorMut { + type Output = VarBinVectorMut; + + fn into_binary(self) -> Self::Output { + if let VectorMut::Binary(v) = self { + return v; + } + vortex_panic!("Expected BinaryVector, got {self:?}"); + } + + fn into_string(self) -> Self::Output { + if let VectorMut::String(v) = self { + return v; + } + vortex_panic!("Expected StringVector, got {self:?}"); + } +} + +impl VarBinTypeUpcast for VectorMut { + type Input = VarBinVectorMut; + + fn from_binary(input: Self::Input) -> Self { + VectorMut::Binary(input) + } + + fn from_string(input: Self::Input) -> Self { + VectorMut::String(input) + } +} + +mod private { + pub trait Sealed {} + impl Sealed for super::StringType {} + impl Sealed for super::BinaryType {} +} diff --git a/vortex-vector/src/varbin/vector.rs b/vortex-vector/src/varbin/vector.rs new file mode 100644 index 00000000000..0274195debd --- /dev/null +++ b/vortex-vector/src/varbin/vector.rs @@ -0,0 +1,63 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use crate::varbin::vector_mut::VarBinVectorMut; +use crate::varbin::view::BinaryView; +use crate::varbin::VarBinType; +use crate::VectorOps; +use std::sync::Arc; +use vortex_buffer::{Buffer, ByteBuffer}; +use vortex_mask::Mask; + +/// A variable-length binary vector. +#[derive(Debug, Clone)] +pub struct VarBinVector { + views: Buffer, + validity: Mask, + buffers: Arc>, + _marker: std::marker::PhantomData, +} + +impl VarBinVector { + /// Creates a new [`VarBinVector`] from the provided components. + /// + /// # Safety + /// + /// This function is unsafe because it does not validate the consistency of the provided + /// components. + /// + /// The caller must ensure that: + /// - The length of the `validity` mask matches the length of the `views` buffer. + /// - The `views` buffer correctly references the data in the `buffers`. + pub unsafe fn new_unchecked( + views: Buffer, + validity: Mask, + buffers: Arc>, + ) -> Self { + Self { + views, + validity, + buffers, + _marker: std::marker::PhantomData, + } + } +} + +impl VectorOps for VarBinVector { + type Mutable = VarBinVectorMut; + + fn len(&self) -> usize { + self.views.len() + } + + fn validity(&self) -> &Mask { + &self.validity + } + + fn try_into_mut(self) -> Result + where + Self: Sized, + { + todo!() + } +} diff --git a/vortex-vector/src/varbin/vector_mut.rs b/vortex-vector/src/varbin/vector_mut.rs new file mode 100644 index 00000000000..62e2050c45c --- /dev/null +++ b/vortex-vector/src/varbin/vector_mut.rs @@ -0,0 +1,74 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use crate::varbin::vector::VarBinVector; +use crate::varbin::view::BinaryView; +use crate::varbin::VarBinType; +use crate::VectorMutOps; +use vortex_buffer::{BufferMut, ByteBuffer}; +use vortex_mask::MaskMut; + +/// Mutable variable-length binary vector. +#[derive(Clone, Debug)] +pub struct VarBinVectorMut { + views: BufferMut, + validity: MaskMut, + + buffers: Vec, + open_buffer: Option, + + _marker: std::marker::PhantomData, +} + +impl VarBinVectorMut { + pub(super) fn new( + views: BufferMut, + validity: MaskMut, + buffers: Vec, + ) -> Self { + Self { + views, + validity, + buffers, + open_buffer: None, + _marker: std::marker::PhantomData, + } + } +} + +impl VectorMutOps for VarBinVectorMut { + type Immutable = VarBinVector; + + fn len(&self) -> usize { + self.views.len() + } + + fn capacity(&self) -> usize { + self.views.capacity() + } + + fn reserve(&mut self, additional: usize) { + self.views.reserve(additional); + } + + fn extend_from_vector(&mut self, other: &Self::Immutable) { + todo!() + } + + fn append_nulls(&mut self, n: usize) { + self.views.push_n(BinaryView::empty_view(), n); + self.validity.append_n(false, n); + } + + fn freeze(self) -> Self::Immutable { + todo!() + } + + fn split_off(&mut self, at: usize) -> Self { + todo!() + } + + fn unsplit(&mut self, other: Self) { + todo!() + } +} diff --git a/vortex-vector/src/varbin/view.rs b/vortex-vector/src/varbin/view.rs new file mode 100644 index 00000000000..b4e936b6397 --- /dev/null +++ b/vortex-vector/src/varbin/view.rs @@ -0,0 +1,259 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use std::fmt; +use std::hash::{Hash, Hasher}; +use std::ops::Range; + +use static_assertions::{assert_eq_align, assert_eq_size}; +use vortex_error::VortexUnwrap; + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +#[repr(C, align(8))] +pub struct Inlined { + pub(super) size: u32, + pub(super) data: [u8; BinaryView::MAX_INLINED_SIZE], +} + +impl Inlined { + fn new(value: &[u8]) -> Self { + let mut inlined = Self { + size: N.try_into().vortex_unwrap(), + data: [0u8; BinaryView::MAX_INLINED_SIZE], + }; + inlined.data[..N].copy_from_slice(&value[..N]); + inlined + } + + #[inline] + pub fn value(&self) -> &[u8] { + &self.data[0..(self.size as usize)] + } +} + +#[derive(Clone, Copy, Debug)] +#[repr(C, align(8))] +pub struct Ref { + pub(super) size: u32, + pub(super) prefix: [u8; 4], + pub(super) buffer_index: u32, + pub(super) offset: u32, +} + +impl Ref { + pub fn new(size: u32, prefix: [u8; 4], buffer_index: u32, offset: u32) -> Self { + Self { + size, + prefix, + buffer_index, + offset, + } + } + + #[inline] + pub fn size(&self) -> u32 { + self.size + } + + #[inline] + pub fn buffer_index(&self) -> u32 { + self.buffer_index + } + + #[inline] + pub fn offset(&self) -> u32 { + self.offset + } + + #[inline] + pub fn prefix(&self) -> &[u8; 4] { + &self.prefix + } + + #[inline] + pub fn as_range(&self) -> Range { + self.offset as usize..(self.offset + self.size) as usize + } + + #[inline] + pub fn with_buffer_and_offset(&self, buffer_index: u32, offset: u32) -> Ref { + Self::new(self.size, self.prefix, buffer_index, offset) + } +} + +#[derive(Clone, Copy)] +#[repr(C, align(16))] +pub union BinaryView { + // Numeric representation. This is logically `u128`, but we split it into the high and low + // bits to preserve the alignment. + pub(super) le_bytes: [u8; 16], + + // Inlined representation: strings <= 12 bytes + pub(super) inlined: Inlined, + + // Reference type: strings > 12 bytes. + pub(super) _ref: Ref, +} + +assert_eq_size!(BinaryView, [u8; 16]); +assert_eq_size!(Inlined, [u8; 16]); +assert_eq_size!(Ref, [u8; 16]); +assert_eq_align!(BinaryView, u128); + +impl PartialEq for BinaryView { + fn eq(&self, other: &Self) -> bool { + let a = unsafe { std::mem::transmute::<&BinaryView, &[u8; 16]>(self) }; + let b = unsafe { std::mem::transmute::<&BinaryView, &[u8; 16]>(other) }; + a == b + } +} +impl Eq for BinaryView {} + +impl Hash for BinaryView { + fn hash(&self, state: &mut H) { + unsafe { std::mem::transmute::<&BinaryView, &[u8; 16]>(self) }.hash(state); + } +} + +impl Default for BinaryView { + fn default() -> Self { + Self::make_view(&[], 0, 0) + } +} + +impl BinaryView { + pub const MAX_INLINED_SIZE: usize = 12; + + /// Create a view from a value, block and offset + /// + /// Depending on the length of the provided value either a new inlined + /// or a reference view will be constructed. + /// + /// Adapted from arrow-rs + /// Explicitly enumerating inlined view produces code that avoids calling generic `ptr::copy_non_interleave` that's slower than explicit stores + #[inline(never)] + pub fn make_view(value: &[u8], block: u32, offset: u32) -> Self { + match value.len() { + 0 => Self { + inlined: Inlined::new::<0>(value), + }, + 1 => Self { + inlined: Inlined::new::<1>(value), + }, + 2 => Self { + inlined: Inlined::new::<2>(value), + }, + 3 => Self { + inlined: Inlined::new::<3>(value), + }, + 4 => Self { + inlined: Inlined::new::<4>(value), + }, + 5 => Self { + inlined: Inlined::new::<5>(value), + }, + 6 => Self { + inlined: Inlined::new::<6>(value), + }, + 7 => Self { + inlined: Inlined::new::<7>(value), + }, + 8 => Self { + inlined: Inlined::new::<8>(value), + }, + 9 => Self { + inlined: Inlined::new::<9>(value), + }, + 10 => Self { + inlined: Inlined::new::<10>(value), + }, + 11 => Self { + inlined: Inlined::new::<11>(value), + }, + 12 => Self { + inlined: Inlined::new::<12>(value), + }, + _ => Self { + _ref: Ref::new( + u32::try_from(value.len()).vortex_unwrap(), + value[0..4].try_into().vortex_unwrap(), + block, + offset, + ), + }, + } + } + + /// Create a new empty view + #[inline] + pub fn empty_view() -> Self { + Self::new_inlined(&[]) + } + + /// Create a new inlined binary view + #[inline] + pub fn new_inlined(value: &[u8]) -> Self { + assert!( + value.len() <= Self::MAX_INLINED_SIZE, + "expected inlined value to be <= 12 bytes, was {}", + value.len() + ); + + Self::make_view(value, 0, 0) + } + + #[inline] + pub fn len(&self) -> u32 { + unsafe { self.inlined.size } + } + + #[inline] + pub fn is_empty(&self) -> bool { + self.len() == 0 + } + + #[inline] + #[allow(clippy::cast_possible_truncation)] + pub fn is_inlined(&self) -> bool { + self.len() <= (Self::MAX_INLINED_SIZE as u32) + } + + pub fn as_inlined(&self) -> &Inlined { + unsafe { &self.inlined } + } + + pub fn as_view(&self) -> &Ref { + unsafe { &self._ref } + } + + pub fn as_u128(&self) -> u128 { + // SAFETY: binary view always safe to read as u128 LE bytes + unsafe { u128::from_le_bytes(self.le_bytes) } + } +} + +impl From for BinaryView { + fn from(value: u128) -> Self { + BinaryView { + le_bytes: value.to_le_bytes(), + } + } +} + +impl From for BinaryView { + fn from(value: Ref) -> Self { + BinaryView { _ref: value } + } +} + +impl fmt::Debug for BinaryView { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let mut s = f.debug_struct("BinaryView"); + if self.is_inlined() { + s.field("inline", &self.as_inlined()); + } else { + s.field("ref", &self.as_view()); + } + s.finish() + } +} diff --git a/vortex-vector/src/vector.rs b/vortex-vector/src/vector.rs index ef3dc8c52ad..e5c48db28d6 100644 --- a/vortex-vector/src/vector.rs +++ b/vortex-vector/src/vector.rs @@ -9,6 +9,7 @@ use vortex_error::vortex_panic; use crate::macros::match_each_vector; +use crate::varbin::{BinaryVector, StringVector}; use crate::{BoolVector, NullVector, PrimitiveVector, StructVector, VectorMut, VectorOps}; /// An enum over all kinds of immutable vectors, which represent fully decompressed (canonical) @@ -33,10 +34,10 @@ pub enum Vector { Primitive(PrimitiveVector), // Decimal // Decimal(DecimalVector), - // String - // String(StringVector), - // Binary - // Binary(BinaryVector), + /// String vectors + String(StringVector), + /// Binary vectors + Binary(BinaryVector), // List // List(ListVector), // FixedList @@ -93,6 +94,22 @@ impl Vector { vortex_panic!("Expected PrimitiveVector, got {self:?}"); } + /// Returns a reference to the inner [`StringVector`] if `self` is of that variant. + pub fn as_string(&self) -> &StringVector { + if let Vector::String(v) = self { + return v; + } + vortex_panic!("Expected StringVector, got {self:?}"); + } + + /// Returns a reference to the inner [`BinaryVector`] if `self` is of that variant. + pub fn as_binary(&self) -> &BinaryVector { + if let Vector::Binary(v) = self { + return v; + } + vortex_panic!("Expected BinaryVector, got {self:?}"); + } + /// Returns a reference to the inner [`StructVector`] if `self` is of that variant. pub fn as_struct(&self) -> &StructVector { if let Vector::Struct(v) = self { @@ -125,6 +142,22 @@ impl Vector { vortex_panic!("Expected PrimitiveVector, got {self:?}"); } + /// Consumes `self` and returns the inner [`StringVector`] if `self` is of that variant. + pub fn into_string(self) -> StringVector { + if let Vector::String(v) = self { + return v; + } + vortex_panic!("Expected StringVector, got {self:?}"); + } + + /// Consumes `self` and returns the inner [`BinaryVector`] if `self` is of that variant. + pub fn into_binary(self) -> BinaryVector { + if let Vector::Binary(v) = self { + return v; + } + vortex_panic!("Expected BinaryVector, got {self:?}"); + } + /// Consumes `self` and returns the inner [`StructVector`] if `self` is of that variant. pub fn into_struct(self) -> StructVector { if let Vector::Struct(v) = self { diff --git a/vortex-vector/src/vector_mut.rs b/vortex-vector/src/vector_mut.rs index eab12dd852e..e8520dae423 100644 --- a/vortex-vector/src/vector_mut.rs +++ b/vortex-vector/src/vector_mut.rs @@ -11,6 +11,7 @@ use vortex_error::vortex_panic; use vortex_mask::MaskMut; use super::macros::match_each_vector_mut; +use crate::varbin::{BinaryVectorMut, StringVectorMut}; use crate::{ BoolVectorMut, NullVectorMut, PrimitiveVectorMut, StructVectorMut, Vector, VectorMutOps, }; @@ -35,6 +36,10 @@ pub enum VectorMut { /// Note that [`PrimitiveVectorMut`] is an enum over the different possible (generic) /// [`PVectorMut`](crate::PVectorMut)s. See the documentation for more information. Primitive(PrimitiveVectorMut), + /// Mutable String vectors. + String(StringVectorMut), + /// Mutable Binary vectors. + Binary(BinaryVectorMut), /// Mutable vectors of Struct elements. Struct(StructVectorMut), } @@ -115,6 +120,8 @@ impl VectorMutOps for VectorMut { (VectorMut::Null(a), VectorMut::Null(b)) => a.unsplit(b), (VectorMut::Bool(a), VectorMut::Bool(b)) => a.unsplit(b), (VectorMut::Primitive(a), VectorMut::Primitive(b)) => a.unsplit(b), + (VectorMut::String(a), VectorMut::String(b)) => a.unsplit(b), + (VectorMut::Binary(a), VectorMut::Binary(b)) => a.unsplit(b), (VectorMut::Struct(a), VectorMut::Struct(b)) => a.unsplit(b), _ => vortex_panic!("Mismatched vector types"), } @@ -146,6 +153,22 @@ impl VectorMut { vortex_panic!("Expected PrimitiveVectorMut, got {self:?}"); } + /// Returns a reference to the inner [`StringVectorMut`] if `self` is of that variant. + pub fn as_string(&self) -> &StringVectorMut { + if let VectorMut::String(v) = self { + return v; + } + vortex_panic!("Expected StringVectorMut, got {self:?}"); + } + + /// Returns a reference to the inner [`BinaryVectorMut`] if `self` is of that variant. + pub fn as_binary(&self) -> &BinaryVectorMut { + if let VectorMut::Binary(v) = self { + return v; + } + vortex_panic!("Expected BinaryVectorMut, got {self:?}"); + } + /// Returns a reference to the inner [`StructVectorMut`] if `self` is of that variant. pub fn as_struct(&self) -> &StructVectorMut { if let VectorMut::Struct(v) = self { @@ -178,6 +201,22 @@ impl VectorMut { vortex_panic!("Expected PrimitiveVectorMut, got {self:?}"); } + /// Consumes `self` and returns the inner [`StringVectorMut`] if `self` is of that variant. + pub fn into_string(self) -> StringVectorMut { + if let VectorMut::String(v) = self { + return v; + } + vortex_panic!("Expected StringVectorMut, got {self:?}"); + } + + /// Consumes `self` and returns the inner [`BinaryVectorMut`] if `self` is of that variant. + pub fn into_binary(self) -> BinaryVectorMut { + if let VectorMut::Binary(v) = self { + return v; + } + vortex_panic!("Expected BinaryVectorMut, got {self:?}"); + } + /// Consumes `self` and returns the inner [`StructVectorMut`] if `self` is of that variant. pub fn into_struct(self) -> StructVectorMut { if let VectorMut::Struct(v) = self { From 871ab1b7d6bccc649e479cda9919c5801896c2c5 Mon Sep 17 00:00:00 2001 From: Nicholas Gates Date: Wed, 29 Oct 2025 12:20:23 -0400 Subject: [PATCH 2/3] VarBinVector Signed-off-by: Nicholas Gates --- vortex-vector/src/lib.rs | 13 ++-- vortex-vector/src/varbin/mod.rs | 87 ++----------------------- vortex-vector/src/varbin/types.rs | 88 ++++++++++++++++++++++++++ vortex-vector/src/varbin/vector.rs | 10 +-- vortex-vector/src/varbin/vector_mut.rs | 9 +-- 5 files changed, 111 insertions(+), 96 deletions(-) create mode 100644 vortex-vector/src/varbin/types.rs diff --git a/vortex-vector/src/lib.rs b/vortex-vector/src/lib.rs index bc3ac9e42dd..8e596fb10c1 100644 --- a/vortex-vector/src/lib.rs +++ b/vortex-vector/src/lib.rs @@ -17,14 +17,11 @@ mod primitive; mod struct_; mod varbin; -pub use bool::{BoolVector, BoolVectorMut}; -pub use null::{NullVector, NullVectorMut}; -pub use primitive::{PVector, PVectorMut, PrimitiveVector, PrimitiveVectorMut}; -pub use struct_::{StructVector, StructVectorMut}; -pub use varbin::{ - BinaryVector, BinaryVectorMut, StringVector, StringVectorMut, VarBinType, VarBinVector, - VarBinVectorMut, -}; +pub use bool::*; +pub use null::*; +pub use primitive::*; +pub use struct_::*; +pub use varbin::*; mod ops; mod vector; diff --git a/vortex-vector/src/varbin/mod.rs b/vortex-vector/src/varbin/mod.rs index caff563fe24..5508cbb2d77 100644 --- a/vortex-vector/src/varbin/mod.rs +++ b/vortex-vector/src/varbin/mod.rs @@ -1,16 +1,16 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors +pub use types::*; +pub use vector::*; +pub use vector_mut::*; +use vortex_error::vortex_panic; + use crate::{Vector, VectorMut}; -use std::fmt::Debug; +mod types; mod vector; -pub use vector::VarBinVector; - mod vector_mut; -pub use vector_mut::VarBinVectorMut; -use vortex_error::vortex_panic; - mod view; /// Type alias for non-utf8 variable-length binary vectors. @@ -22,74 +22,6 @@ pub type StringVector = VarBinVector; /// Type alias for mutable UTF-8 variable-length string vectors. pub type StringVectorMut = VarBinVectorMut; -impl From> for Vector { - fn from(value: VarBinVector) -> Self { - T::upcast(value) - } -} - -impl From> for VectorMut { - fn from(value: VarBinVectorMut) -> Self { - T::upcast(value) - } -} - -/// Trait to mark supported binary view types. -pub trait VarBinType: Debug + Sized + private::Sealed { - /// The slice type for this variable binary type. - type Slice: ?Sized + AsRef<[u8]>; - - /// Downcast the provided object to a type-specific instance. - fn downcast(visitor: V) -> V::Output; - - /// Upcast a type-specific instance to a generic instance. - fn upcast(input: V::Input) -> V; -} - -/// [`BinaryType`] for UTF-8 strings. -#[derive(Clone, Debug)] -pub struct StringType; -impl VarBinType for StringType { - type Slice = str; - - fn downcast(visitor: V) -> V::Output { - visitor.into_string() - } - - fn upcast(input: V::Input) -> V { - V::from_string(input) - } -} - -/// [`BinaryType`] for raw binary data. -#[derive(Clone, Debug)] -pub struct BinaryType; -impl VarBinType for BinaryType { - type Slice = [u8]; - - fn downcast(visitor: V) -> V::Output { - visitor.into_binary() - } - - fn upcast(input: V::Input) -> V { - V::from_binary(input) - } -} - -pub trait VarBinTypeDowncast { - type Output; - - fn into_binary(self) -> Self::Output; - fn into_string(self) -> Self::Output; -} - -pub trait VarBinTypeUpcast { - type Input; - - fn from_binary(input: Self::Input) -> Self; - fn from_string(input: Self::Input) -> Self; -} - impl VarBinTypeDowncast for Vector { type Output = VarBinVector; @@ -97,6 +29,7 @@ impl VarBinTypeDowncast for Vector { if let Vector::Binary(v) = self { return v; } + vortex_panic!("Expected BinaryVector, got {self:?}"); } @@ -149,9 +82,3 @@ impl VarBinTypeUpcast for VectorMut { VectorMut::String(input) } } - -mod private { - pub trait Sealed {} - impl Sealed for super::StringType {} - impl Sealed for super::BinaryType {} -} diff --git a/vortex-vector/src/varbin/types.rs b/vortex-vector/src/varbin/types.rs new file mode 100644 index 00000000000..5887ff16498 --- /dev/null +++ b/vortex-vector/src/varbin/types.rs @@ -0,0 +1,88 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use std::fmt::Debug; + +use crate::{VarBinVector, VarBinVectorMut, Vector, VectorMut}; + +impl From> for Vector { + fn from(value: VarBinVector) -> Self { + T::upcast(value) + } +} + +impl From> for VectorMut { + fn from(value: VarBinVectorMut) -> Self { + T::upcast(value) + } +} + +/// Trait to mark supported binary view types. +pub trait VarBinType: Debug + Sized + private::Sealed { + /// The slice type for this variable binary type. + type Slice: ?Sized + AsRef<[u8]>; + + /// Downcast the provided object to a type-specific instance. + fn downcast(visitor: V) -> V::Output; + + /// Upcast a type-specific instance to a generic instance. + fn upcast(input: V::Input) -> V; +} + +/// [`BinaryType`] for UTF-8 strings. +#[derive(Clone, Debug)] +pub struct StringType; +impl VarBinType for StringType { + type Slice = str; + + fn downcast(visitor: V) -> V::Output { + visitor.into_string() + } + + fn upcast(input: V::Input) -> V { + V::from_string(input) + } +} + +/// [`BinaryType`] for raw binary data. +#[derive(Clone, Debug)] +pub struct BinaryType; +impl VarBinType for BinaryType { + type Slice = [u8]; + + fn downcast(visitor: V) -> V::Output { + visitor.into_binary() + } + + fn upcast(input: V::Input) -> V { + V::from_binary(input) + } +} + +/// Trait for downcasting generic variable binary types to specific types. +pub trait VarBinTypeDowncast { + /// The output type after downcasting. + type Output; + + /// Downcast to a binary type. + fn into_binary(self) -> Self::Output; + /// Downcast to a string type. + fn into_string(self) -> Self::Output; +} + +/// Trait for upcasting specific variable binary types to generic types. +pub trait VarBinTypeUpcast { + /// The input type for upcasting. + type Input; + + /// Upcast from a binary type. + fn from_binary(input: Self::Input) -> Self; + /// Upcast from a string type. + fn from_string(input: Self::Input) -> Self; +} + +mod private { + pub trait Sealed {} + impl Sealed for super::StringType {} + impl Sealed for super::BinaryType {} +} diff --git a/vortex-vector/src/varbin/vector.rs b/vortex-vector/src/varbin/vector.rs index 0274195debd..1bf6d4e810e 100644 --- a/vortex-vector/src/varbin/vector.rs +++ b/vortex-vector/src/varbin/vector.rs @@ -1,14 +1,16 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -use crate::varbin::vector_mut::VarBinVectorMut; -use crate::varbin::view::BinaryView; -use crate::varbin::VarBinType; -use crate::VectorOps; use std::sync::Arc; + use vortex_buffer::{Buffer, ByteBuffer}; use vortex_mask::Mask; +use crate::VectorOps; +use crate::varbin::VarBinType; +use crate::varbin::vector_mut::VarBinVectorMut; +use crate::varbin::view::BinaryView; + /// A variable-length binary vector. #[derive(Debug, Clone)] pub struct VarBinVector { diff --git a/vortex-vector/src/varbin/vector_mut.rs b/vortex-vector/src/varbin/vector_mut.rs index 62e2050c45c..9271c22f356 100644 --- a/vortex-vector/src/varbin/vector_mut.rs +++ b/vortex-vector/src/varbin/vector_mut.rs @@ -1,13 +1,14 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -use crate::varbin::vector::VarBinVector; -use crate::varbin::view::BinaryView; -use crate::varbin::VarBinType; -use crate::VectorMutOps; use vortex_buffer::{BufferMut, ByteBuffer}; use vortex_mask::MaskMut; +use crate::VectorMutOps; +use crate::varbin::VarBinType; +use crate::varbin::vector::VarBinVector; +use crate::varbin::view::BinaryView; + /// Mutable variable-length binary vector. #[derive(Clone, Debug)] pub struct VarBinVectorMut { From 4fc1c05b47969595c31f5fc69a4fe84f55b99d9c Mon Sep 17 00:00:00 2001 From: Nicholas Gates Date: Wed, 29 Oct 2025 13:59:41 -0400 Subject: [PATCH 3/3] VarBinVector Signed-off-by: Nicholas Gates --- vortex-compute/src/mask/mod.rs | 23 +++-- vortex-vector/src/lib.rs | 1 - vortex-vector/src/varbin/mod.rs | 1 - vortex-vector/src/varbin/types.rs | 5 + vortex-vector/src/varbin/vector.rs | 17 +++- vortex-vector/src/varbin/vector_mut.rs | 34 +++---- vortex-vector/src/varbin/view.rs | 130 +++++++++++++------------ vortex-vector/src/vector.rs | 2 + vortex-vector/src/vector_mut.rs | 2 + 9 files changed, 119 insertions(+), 96 deletions(-) diff --git a/vortex-compute/src/mask/mod.rs b/vortex-compute/src/mask/mod.rs index 1c206ef2e60..a88d81a52ce 100644 --- a/vortex-compute/src/mask/mod.rs +++ b/vortex-compute/src/mask/mod.rs @@ -8,8 +8,8 @@ use std::ops::BitAnd; use vortex_dtype::NativePType; use vortex_mask::Mask; use vortex_vector::{ - BoolVector, NullVector, PrimitiveVector, StructVector, Vector, match_each_pvector, - match_each_vector, + BoolVector, NullVector, PVector, PrimitiveVector, StructVector, VarBinType, VarBinVector, + Vector, match_each_pvector, match_each_vector, }; /// Trait for masking the validity of an array or vector. @@ -37,7 +37,8 @@ impl MaskValidity for NullVector { impl MaskValidity for BoolVector { fn mask_validity(self, mask: &Mask) -> Self { let (bits, validity) = self.into_parts(); - Self::new(bits, validity.bitand(mask)) + // SAFETY: we are preserving the original bits buffer and only modifying the validity. + unsafe { Self::new_unchecked(bits, validity.bitand(mask)) } } } @@ -47,16 +48,26 @@ impl MaskValidity for PrimitiveVector { } } -impl MaskValidity for vortex_vector::PVector { +impl MaskValidity for PVector { fn mask_validity(self, mask: &Mask) -> Self { let (data, validity) = self.into_parts(); - Self::new(data, validity.bitand(mask)) + // SAFETY: we are preserving the original data buffer and only modifying the validity. + unsafe { Self::new_unchecked(data, validity.bitand(mask)) } + } +} + +impl MaskValidity for VarBinVector { + fn mask_validity(self, mask: &Mask) -> Self { + let (views, buffers, validity) = self.into_parts(); + // SAFETY: we are preserving the original views and buffers, only modifying the validity. + unsafe { Self::new_unchecked(views, buffers, validity.bitand(mask)) } } } impl MaskValidity for StructVector { fn mask_validity(self, mask: &Mask) -> Self { let (fields, validity) = self.into_parts(); - StructVector::new(fields, validity.bitand(mask)) + // SAFETY: we are preserving the original fields and only modifying the validity. + unsafe { StructVector::new_unchecked(fields, validity.bitand(mask)) } } } diff --git a/vortex-vector/src/lib.rs b/vortex-vector/src/lib.rs index 8e596fb10c1..aa61f2dc919 100644 --- a/vortex-vector/src/lib.rs +++ b/vortex-vector/src/lib.rs @@ -6,7 +6,6 @@ //! Immutable and mutable decompressed (canonical) vectors for Vortex. #![deny(missing_docs)] -#![deny(clippy::missing_docs_in_private_items)] #![deny(clippy::missing_errors_doc)] #![deny(clippy::missing_panics_doc)] #![deny(clippy::missing_safety_doc)] diff --git a/vortex-vector/src/varbin/mod.rs b/vortex-vector/src/varbin/mod.rs index 5508cbb2d77..a594f5a1735 100644 --- a/vortex-vector/src/varbin/mod.rs +++ b/vortex-vector/src/varbin/mod.rs @@ -29,7 +29,6 @@ impl VarBinTypeDowncast for Vector { if let Vector::Binary(v) = self { return v; } - vortex_panic!("Expected BinaryVector, got {self:?}"); } diff --git a/vortex-vector/src/varbin/types.rs b/vortex-vector/src/varbin/types.rs index 5887ff16498..c4cce77c42f 100644 --- a/vortex-vector/src/varbin/types.rs +++ b/vortex-vector/src/varbin/types.rs @@ -1,6 +1,8 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors +//! Variable-length binary types and related traits. + use std::fmt::Debug; use crate::{VarBinVector, VarBinVectorMut, Vector, VectorMut}; @@ -81,8 +83,11 @@ pub trait VarBinTypeUpcast { fn from_string(input: Self::Input) -> Self; } +/// Private module to seal the [`VarBinType`] trait. mod private { + /// Sealed trait to prevent external implementations of [`VarBinType`]. pub trait Sealed {} + impl Sealed for super::StringType {} impl Sealed for super::BinaryType {} } diff --git a/vortex-vector/src/varbin/vector.rs b/vortex-vector/src/varbin/vector.rs index 1bf6d4e810e..e468151e94c 100644 --- a/vortex-vector/src/varbin/vector.rs +++ b/vortex-vector/src/varbin/vector.rs @@ -1,6 +1,8 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors +//! Variable-length binary vector implementation. + use std::sync::Arc; use vortex_buffer::{Buffer, ByteBuffer}; @@ -14,9 +16,13 @@ use crate::varbin::view::BinaryView; /// A variable-length binary vector. #[derive(Debug, Clone)] pub struct VarBinVector { + /// Views into the binary data. views: Buffer, - validity: Mask, + /// Buffers holding the referenced binary data. buffers: Arc>, + /// Validity mask for the vector. + validity: Mask, + /// Marker trait for the [`VarBinType`]. _marker: std::marker::PhantomData, } @@ -33,16 +39,21 @@ impl VarBinVector { /// - The `views` buffer correctly references the data in the `buffers`. pub unsafe fn new_unchecked( views: Buffer, - validity: Mask, buffers: Arc>, + validity: Mask, ) -> Self { Self { views, - validity, buffers, + validity, _marker: std::marker::PhantomData, } } + + /// Decomposes the vector into its constituent parts. + pub fn into_parts(self) -> (Buffer, Arc>, Mask) { + (self.views, self.buffers, self.validity) + } } impl VectorOps for VarBinVector { diff --git a/vortex-vector/src/varbin/vector_mut.rs b/vortex-vector/src/varbin/vector_mut.rs index 9271c22f356..e6ffcc7bd52 100644 --- a/vortex-vector/src/varbin/vector_mut.rs +++ b/vortex-vector/src/varbin/vector_mut.rs @@ -1,7 +1,9 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -use vortex_buffer::{BufferMut, ByteBuffer}; +//! Mutable variable-length binary vector. + +use vortex_buffer::{BufferMut, ByteBuffer, ByteBufferMut}; use vortex_mask::MaskMut; use crate::VectorMutOps; @@ -10,33 +12,23 @@ use crate::varbin::vector::VarBinVector; use crate::varbin::view::BinaryView; /// Mutable variable-length binary vector. +#[allow(dead_code)] // FIXME(ngates): remove after implementing the methods #[derive(Clone, Debug)] pub struct VarBinVectorMut { + /// Views into the binary data. views: BufferMut, + /// Validity mask for the vector. validity: MaskMut, + /// The completed buffers holding referenced binary data. buffers: Vec, - open_buffer: Option, + /// The current buffer being appended to, if any. + open_buffer: Option, + /// Marker trait for the [`VarBinType`]. _marker: std::marker::PhantomData, } -impl VarBinVectorMut { - pub(super) fn new( - views: BufferMut, - validity: MaskMut, - buffers: Vec, - ) -> Self { - Self { - views, - validity, - buffers, - open_buffer: None, - _marker: std::marker::PhantomData, - } - } -} - impl VectorMutOps for VarBinVectorMut { type Immutable = VarBinVector; @@ -52,7 +44,7 @@ impl VectorMutOps for VarBinVectorMut { self.views.reserve(additional); } - fn extend_from_vector(&mut self, other: &Self::Immutable) { + fn extend_from_vector(&mut self, _other: &Self::Immutable) { todo!() } @@ -65,11 +57,11 @@ impl VectorMutOps for VarBinVectorMut { todo!() } - fn split_off(&mut self, at: usize) -> Self { + fn split_off(&mut self, _at: usize) -> Self { todo!() } - fn unsplit(&mut self, other: Self) { + fn unsplit(&mut self, _other: Self) { todo!() } } diff --git a/vortex-vector/src/varbin/view.rs b/vortex-vector/src/varbin/view.rs index b4e936b6397..cbcd20919c3 100644 --- a/vortex-vector/src/varbin/view.rs +++ b/vortex-vector/src/varbin/view.rs @@ -1,6 +1,8 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors +//! The 16-byte view struct stored in variable-length binary vectors. + use std::fmt; use std::hash::{Hash, Hasher}; use std::ops::Range; @@ -8,15 +10,43 @@ use std::ops::Range; use static_assertions::{assert_eq_align, assert_eq_size}; use vortex_error::VortexUnwrap; +/// A view over a variable-length binary value. +/// +/// Either an inlined representation (for values <= 12 bytes) or a reference +/// to an external buffer (for values > 12 bytes). +#[derive(Clone, Copy)] +#[repr(C, align(16))] +pub union BinaryView { + /// Numeric representation. This is logically `u128`, but we split it into the high and low + /// bits to preserve the alignment. + le_bytes: [u8; 16], + + /// Inlined representation: strings <= 12 bytes + inlined: Inlined, + + /// Reference type: strings > 12 bytes. + _ref: Ref, +} + +assert_eq_align!(BinaryView, u128); +assert_eq_size!(BinaryView, [u8; 16]); +assert_eq_size!(Inlined, [u8; 16]); +assert_eq_size!(Ref, [u8; 16]); + +/// Variant of a [`BinaryView`] that holds an inlined value. #[derive(Clone, Copy, Debug, PartialEq, Eq)] #[repr(C, align(8))] pub struct Inlined { - pub(super) size: u32, - pub(super) data: [u8; BinaryView::MAX_INLINED_SIZE], + /// The size of the full value. + pub size: u32, + /// The full inlined value. + pub data: [u8; BinaryView::MAX_INLINED_SIZE], } impl Inlined { + /// Creates a new inlined representation from the provided value of constant size. fn new(value: &[u8]) -> Self { + debug_assert_eq!(value.len(), N); let mut inlined = Self { size: N.try_into().vortex_unwrap(), data: [0u8; BinaryView::MAX_INLINED_SIZE], @@ -25,85 +55,50 @@ impl Inlined { inlined } + /// Returns the full inlined value. #[inline] pub fn value(&self) -> &[u8] { &self.data[0..(self.size as usize)] } } +/// Variant of a [`BinaryView`] that holds a reference to an external buffer. #[derive(Clone, Copy, Debug)] #[repr(C, align(8))] pub struct Ref { - pub(super) size: u32, - pub(super) prefix: [u8; 4], - pub(super) buffer_index: u32, - pub(super) offset: u32, + /// The size of the full value. + pub size: u32, + /// The prefix bytes of the value (first 4 bytes). + pub prefix: [u8; 4], + /// The index of the buffer where the full value is stored. + pub buffer_index: u32, + /// The offset within the buffer where the full value starts. + pub offset: u32, } impl Ref { - pub fn new(size: u32, prefix: [u8; 4], buffer_index: u32, offset: u32) -> Self { - Self { - size, - prefix, - buffer_index, - offset, - } - } - - #[inline] - pub fn size(&self) -> u32 { - self.size - } - - #[inline] - pub fn buffer_index(&self) -> u32 { - self.buffer_index - } - - #[inline] - pub fn offset(&self) -> u32 { - self.offset - } - - #[inline] - pub fn prefix(&self) -> &[u8; 4] { - &self.prefix - } - + /// Returns the range within the buffer where the full value is stored. #[inline] pub fn as_range(&self) -> Range { self.offset as usize..(self.offset + self.size) as usize } + /// Replaces the buffer index and offset of the reference, returning a new `Ref`. #[inline] pub fn with_buffer_and_offset(&self, buffer_index: u32, offset: u32) -> Ref { - Self::new(self.size, self.prefix, buffer_index, offset) + Self { + size: self.size, + prefix: self.prefix, + buffer_index, + offset, + } } } -#[derive(Clone, Copy)] -#[repr(C, align(16))] -pub union BinaryView { - // Numeric representation. This is logically `u128`, but we split it into the high and low - // bits to preserve the alignment. - pub(super) le_bytes: [u8; 16], - - // Inlined representation: strings <= 12 bytes - pub(super) inlined: Inlined, - - // Reference type: strings > 12 bytes. - pub(super) _ref: Ref, -} - -assert_eq_size!(BinaryView, [u8; 16]); -assert_eq_size!(Inlined, [u8; 16]); -assert_eq_size!(Ref, [u8; 16]); -assert_eq_align!(BinaryView, u128); - impl PartialEq for BinaryView { fn eq(&self, other: &Self) -> bool { - let a = unsafe { std::mem::transmute::<&BinaryView, &[u8; 16]>(self) }; - let b = unsafe { std::mem::transmute::<&BinaryView, &[u8; 16]>(other) }; + let a = unsafe { std::mem::transmute::<&BinaryView, &u128>(self) }; + let b = unsafe { std::mem::transmute::<&BinaryView, &u128>(other) }; a == b } } @@ -111,7 +106,7 @@ impl Eq for BinaryView {} impl Hash for BinaryView { fn hash(&self, state: &mut H) { - unsafe { std::mem::transmute::<&BinaryView, &[u8; 16]>(self) }.hash(state); + unsafe { std::mem::transmute::<&BinaryView, &u128>(self) }.hash(state); } } @@ -122,7 +117,8 @@ impl Default for BinaryView { } impl BinaryView { - pub const MAX_INLINED_SIZE: usize = 12; + /// Maximum size of an inlined binary value. + const MAX_INLINED_SIZE: usize = 12; /// Create a view from a value, block and offset /// @@ -174,12 +170,12 @@ impl BinaryView { inlined: Inlined::new::<12>(value), }, _ => Self { - _ref: Ref::new( - u32::try_from(value.len()).vortex_unwrap(), - value[0..4].try_into().vortex_unwrap(), - block, + _ref: Ref { + size: u32::try_from(value.len()).vortex_unwrap(), + prefix: value[0..4].try_into().vortex_unwrap(), + buffer_index: block, offset, - ), + }, }, } } @@ -202,30 +198,36 @@ impl BinaryView { Self::make_view(value, 0, 0) } + /// Returns the length of the binary value. #[inline] pub fn len(&self) -> u32 { unsafe { self.inlined.size } } + /// Returns true if the binary value is empty. #[inline] pub fn is_empty(&self) -> bool { self.len() == 0 } + /// Returns true if the binary value is inlined. #[inline] #[allow(clippy::cast_possible_truncation)] pub fn is_inlined(&self) -> bool { self.len() <= (Self::MAX_INLINED_SIZE as u32) } + /// Returns the inlined representation of the binary value. pub fn as_inlined(&self) -> &Inlined { unsafe { &self.inlined } } + /// Returns the reference representation of the binary value. pub fn as_view(&self) -> &Ref { unsafe { &self._ref } } + /// Returns the binary view as u128 representation. pub fn as_u128(&self) -> u128 { // SAFETY: binary view always safe to read as u128 LE bytes unsafe { u128::from_le_bytes(self.le_bytes) } diff --git a/vortex-vector/src/vector.rs b/vortex-vector/src/vector.rs index e5c48db28d6..391e4f986ce 100644 --- a/vortex-vector/src/vector.rs +++ b/vortex-vector/src/vector.rs @@ -143,6 +143,7 @@ impl Vector { } /// Consumes `self` and returns the inner [`StringVector`] if `self` is of that variant. + #[allow(clippy::same_name_method)] // Same as VarBinTypeDowncast pub fn into_string(self) -> StringVector { if let Vector::String(v) = self { return v; @@ -151,6 +152,7 @@ impl Vector { } /// Consumes `self` and returns the inner [`BinaryVector`] if `self` is of that variant. + #[allow(clippy::same_name_method)] // Same as VarBinTypeDowncast pub fn into_binary(self) -> BinaryVector { if let Vector::Binary(v) = self { return v; diff --git a/vortex-vector/src/vector_mut.rs b/vortex-vector/src/vector_mut.rs index e8520dae423..9774c07adf2 100644 --- a/vortex-vector/src/vector_mut.rs +++ b/vortex-vector/src/vector_mut.rs @@ -202,6 +202,7 @@ impl VectorMut { } /// Consumes `self` and returns the inner [`StringVectorMut`] if `self` is of that variant. + #[allow(clippy::same_name_method)] // Same as VarBinTypeDowncast pub fn into_string(self) -> StringVectorMut { if let VectorMut::String(v) = self { return v; @@ -210,6 +211,7 @@ impl VectorMut { } /// Consumes `self` and returns the inner [`BinaryVectorMut`] if `self` is of that variant. + #[allow(clippy::same_name_method)] // Same as VarBinTypeDowncast pub fn into_binary(self) -> BinaryVectorMut { if let VectorMut::Binary(v) = self { return v;