From e43f4c5fc1ced4c141c3edb462b211bf3ac735eb Mon Sep 17 00:00:00 2001 From: Nicholas Gates Date: Wed, 15 Oct 2025 14:04:46 -0400 Subject: [PATCH 01/13] add vortex-vector Signed-off-by: Nicholas Gates Signed-off-by: Connor Tsui --- Cargo.lock | 12 + Cargo.toml | 1 + vortex-buffer/src/bit/buf.rs | 8 + vortex-buffer/src/bit/buf_mut.rs | 57 ++++ vortex-buffer/src/buffer.rs | 60 ---- vortex-buffer/src/buffer_mut.rs | 54 +++ vortex-dtype/src/ptype.rs | 50 ++- vortex-mask/Cargo.toml | 1 + vortex-mask/src/lib.rs | 3 + vortex-mask/src/mask_mut.rs | 532 +++++++++++++++++++++++++++++ vortex-vector/Cargo.toml | 27 ++ vortex-vector/src/bool.rs | 67 ++++ vortex-vector/src/bool_mut.rs | 80 +++++ vortex-vector/src/lib.rs | 222 ++++++++++++ vortex-vector/src/null.rs | 44 +++ vortex-vector/src/null_mut.rs | 61 ++++ vortex-vector/src/ops.rs | 67 ++++ vortex-vector/src/primitive.rs | 67 ++++ vortex-vector/src/primitive_mut.rs | 82 +++++ vortex-vector/src/pvector.rs | 493 ++++++++++++++++++++++++++ 20 files changed, 1916 insertions(+), 72 deletions(-) create mode 100644 vortex-mask/src/mask_mut.rs create mode 100644 vortex-vector/Cargo.toml create mode 100644 vortex-vector/src/bool.rs create mode 100644 vortex-vector/src/bool_mut.rs create mode 100644 vortex-vector/src/lib.rs create mode 100644 vortex-vector/src/null.rs create mode 100644 vortex-vector/src/null_mut.rs create mode 100644 vortex-vector/src/ops.rs create mode 100644 vortex-vector/src/primitive.rs create mode 100644 vortex-vector/src/primitive_mut.rs create mode 100644 vortex-vector/src/pvector.rs diff --git a/Cargo.lock b/Cargo.lock index 72dd645b439..88af8dc9ef4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -9124,6 +9124,7 @@ dependencies = [ "arrow-buffer", "itertools 0.14.0", "rstest", + "vortex-buffer", "vortex-error", ] @@ -9314,6 +9315,17 @@ dependencies = [ "hashbrown 0.16.0", ] +[[package]] +name = "vortex-vector" +version = "0.1.0" +dependencies = [ + "vortex-buffer", + "vortex-dtype", + "vortex-error", + "vortex-mask", + "vortex-scalar", +] + [[package]] name = "vortex-zigzag" version = "0.1.0" diff --git a/Cargo.toml b/Cargo.toml index 2d3ff3c48eb..ca09e41bb0d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -29,6 +29,7 @@ members = [ "vortex-scalar", "vortex-tui", "vortex-utils", + "vortex-vector", "xtask", "vortex-gpu", ] diff --git a/vortex-buffer/src/bit/buf.rs b/vortex-buffer/src/bit/buf.rs index 4d18ccb2b3e..e481f10c718 100644 --- a/vortex-buffer/src/bit/buf.rs +++ b/vortex-buffer/src/bit/buf.rs @@ -277,6 +277,14 @@ impl BitBuffer { self.buffer.slice(word_start..word_end) } + /// Attempt to convert this `BitBuffer` into a mutable version. + pub fn try_into_mut(self) -> Result { + match self.buffer.try_into_mut() { + Ok(buffer) => Ok(BitBufferMut::from_buffer(buffer, self.offset, self.len)), + Err(buffer) => Err(BitBuffer::new_with_offset(buffer, self.len, self.offset)), + } + } + /// Get a mutable version of this `BitBuffer` along with bit offset in the first byte. /// /// If the caller doesn't hold only reference to the underlying buffer, a copy is created. diff --git a/vortex-buffer/src/bit/buf_mut.rs b/vortex-buffer/src/bit/buf_mut.rs index ae0d54d6222..9f83f65aa9b 100644 --- a/vortex-buffer/src/bit/buf_mut.rs +++ b/vortex-buffer/src/bit/buf_mut.rs @@ -362,6 +362,63 @@ impl BitBufferMut { self.len += bit_len; } + /// Splits the bit buffer into two at the given index. + /// + /// Afterward, self contains elements `[0, at)`, and the returned buffer contains elements + /// `[at, capacity)`. + /// + /// Unlike bytes, if the split position is not on a byte-boundary this operation will copy + /// data into the result type, and mutate self. + pub fn split_off(&mut self, at: usize) -> Self { + assert!(at <= self.len, "index {at} exceeds len {}", self.len); + + let new_offset = self.offset; + let new_len = self.len - at; + + // If we are splitting on a byte boundary, we can just slice the buffer + if (self.offset + at) % 8 == 0 { + let byte_pos = (self.offset + at) / 8; + let new_buffer = self.buffer.split_off(byte_pos); + self.len = at; + return Self { + buffer: new_buffer, + offset: new_offset, + len: new_len, + }; + } + + // Otherwise, we need to copy bits into a new buffer + let mut new_buffer = BitBufferMut::with_capacity(new_len); + for i in 0..new_len { + let value = self.value(at + i); + new_buffer.append(value); + } + + // Truncate self to the split position + self.truncate(at); + + new_buffer + } + + /// Absorbs a mutable buffer that was previously split off. + /// + /// If the two buffers were previously contiguous and not mutated in a way that causes + /// re-allocation i.e., if other was created by calling split_off on this buffer, then this is + /// an O(1) operation that just decreases a reference count and sets a few indices. + /// + /// Otherwise, this method degenerates to self.append_buffer(&other). + pub fn unsplit(&mut self, other: Self) { + if (self.offset + self.len) % 8 == 0 && other.offset == 0 { + // We are aligned and can just append the buffers + self.buffer.unsplit(other.buffer); + self.len += other.len; + return; + } + + // Otherwise, we need to append the bits one by one + self.append_buffer(&other.freeze()) + } + /// Freeze the buffer in its current state into an immutable `BoolBuffer`. pub fn freeze(self) -> BitBuffer { BitBuffer::new_with_offset(self.buffer.freeze(), self.len, self.offset) diff --git a/vortex-buffer/src/buffer.rs b/vortex-buffer/src/buffer.rs index 4c5752d70d3..faa48ed9cad 100644 --- a/vortex-buffer/src/buffer.rs +++ b/vortex-buffer/src/buffer.rs @@ -447,66 +447,6 @@ impl Buffer { vortex_panic!("Buffer is not aligned to requested alignment {}", alignment) } } - - /// Align the buffer to alignment of U - pub fn align_to(mut self) -> (Buffer, Buffer, Buffer) { - let offset = self.as_ptr().align_offset(align_of::()); - if offset > self.len() { - ( - self, - Buffer::empty_aligned(Alignment::of::()), - Buffer::empty_aligned(Alignment::of::()), - ) - } else { - let left = self.bytes.split_to(offset); - self.length -= offset; - let (us_len, _) = self.align_to_offsets::(); - let trailer = self.bytes.split_off(us_len * size_of::()); - ( - Buffer::from_bytes_aligned(left, Alignment::of::()), - Buffer::from_bytes_aligned(self.bytes, Alignment::of::()), - Buffer::from_bytes_aligned(trailer, Alignment::of::()), - ) - } - } - - /// Adapted from standard library slice::align_to_offsets - /// Function to calculate lengths of the middle and trailing slice for `align_to`. - fn align_to_offsets(&self) -> (usize, usize) { - // What we're going to do about `rest` is figure out what multiple of `U`s we can put in the - // lowest number of `T`s. And how many `T`s we need for each such "multiple". - // - // Consider for example T=u8 U=u16. Then we can put 1 U in 2 Ts. Simple. Now, consider - // for example a case where size_of:: = 16, size_of:: = 24. We can put 2 Us in - // place of every 3 Ts in the `rest` slice. A bit more complicated. - // - // Formula to calculate this is: - // - // Us = lcm(size_of::, size_of::) / size_of:: - // Ts = lcm(size_of::, size_of::) / size_of:: - // - // Expanded and simplified: - // - // Us = size_of:: / gcd(size_of::, size_of::) - // Ts = size_of:: / gcd(size_of::, size_of::) - // - // Luckily since all this is constant-evaluated... performance here matters not! - const fn gcd(a: usize, b: usize) -> usize { - if b == 0 { a } else { gcd(b, a % b) } - } - - // Explicitly wrap the function call in a const block so it gets - // constant-evaluated even in debug mode. - let gcd: usize = const { gcd(size_of::(), size_of::()) }; - let ts: usize = size_of::() / gcd; - let us: usize = size_of::() / gcd; - - // Armed with this knowledge, we can find how many `U`s we can fit! - let us_len = self.len() / ts * us; - // And how many `T`s will be in the trailing slice! - let ts_len = self.len() % ts; - (us_len, ts_len) - } } /// An iterator over Buffer elements. diff --git a/vortex-buffer/src/buffer_mut.rs b/vortex-buffer/src/buffer_mut.rs index 6b782817666..c28f7b7150f 100644 --- a/vortex-buffer/src/buffer_mut.rs +++ b/vortex-buffer/src/buffer_mut.rs @@ -328,6 +328,60 @@ impl BufferMut { self.length += slice.len(); } + /// Splits the buffer into two at the given index. + /// + /// Afterward, self contains elements `[0, at)`, and the returned buffer contains elements + /// `[at, capacity)`. It’s guaranteed that the memory does not move, that is, the address of + /// self does not change, and the address of the returned slice is at bytes after that. + /// + /// This is an O(1) operation that just increases the reference count and sets a few indices. + /// + /// Panics if either half would have a length that is not a multiple of the alignment. + pub fn split_off(&mut self, at: usize) -> Self { + if at > self.len() { + vortex_panic!("Cannot split buffer of length {} at {}", self.len(), at); + } + + let bytes_at = at * size_of::(); + if !bytes_at.is_multiple_of(*self.alignment) { + vortex_panic!( + "Cannot split buffer at {}, resulting alignment is not {}", + at, + self.alignment + ); + } + + let new_bytes = self.bytes.split_off(bytes_at); + let new_length = self.length - at; + self.length = at; + + BufferMut { + bytes: new_bytes, + length: new_length, + alignment: self.alignment, + _marker: Default::default(), + } + } + + /// Absorbs a mutable buffer that was previously split off. + /// + /// If the two buffers were previously contiguous and not mutated in a way that causes + /// re-allocation i.e., if other was created by calling split_off on this buffer, then this is + /// an O(1) operation that just decreases a reference count and sets a few indices. + /// + /// Otherwise, this method degenerates to self.extend_from_slice(other.as_ref()). + pub fn unsplit(&mut self, other: Self) { + if self.alignment != other.alignment { + vortex_panic!( + "Cannot unsplit buffers with different alignments: {} and {}", + self.alignment, + other.alignment + ); + } + self.bytes.unsplit(other.bytes); + self.length += other.length; + } + /// Freeze the `BufferMut` into a `Buffer`. pub fn freeze(self) -> Buffer { Buffer { diff --git a/vortex-dtype/src/ptype.rs b/vortex-dtype/src/ptype.rs index b5ea5ad32d7..5af9bfee515 100644 --- a/vortex-dtype/src/ptype.rs +++ b/vortex-dtype/src/ptype.rs @@ -143,15 +143,18 @@ pub trait NativePType: fn is_eq(self, other: Self) -> bool; /// Downcast the provided object to a type-specific instance. - fn downcast(visitor: &V) -> V::Output; + fn downcast(visitor: &V) -> V::Output; /// Downcast the provided object to a type-specific instance. - fn downcast_mut(visitor: &mut V) -> V::Output; + fn downcast_mut(visitor: &mut V) -> V::Output; + + /// Upcast a type-specific instance to a generic instance. + fn upcast(input: V::Input) -> V; } /// A visitor trait for converting a `NativePType` to another parameterized type. #[allow(missing_docs)] // Kind of obvious. -pub trait PTypeVisitor { +pub trait PTypeDowncast { type Output; fn as_u8(&self) -> Self::Output; @@ -167,19 +170,19 @@ pub trait PTypeVisitor { fn as_f64(&self) -> Self::Output; } -/// Extension trait to provide generic downcasting for [`PTypeVisitor`]. -pub trait PTypeVisitorExt: PTypeVisitor { +/// Extension trait to provide generic downcasting for [`PTypeDowncast`]. +pub trait PTypeDowncastExt: PTypeDowncast { /// Downcast the object to a specific primitive type. fn as_primitive(&self) -> Self::Output { T::downcast(self) } } -impl PTypeVisitorExt for T {} +impl PTypeDowncastExt for T {} /// A visitor trait for converting a `NativePType` to another mutable parameterized type. #[allow(missing_docs)] // Kind of obvious.. -pub trait PTypeVisitorMut { +pub trait PTypeDowncastMut { type Output; fn as_u8(&mut self) -> Self::Output; @@ -195,8 +198,8 @@ pub trait PTypeVisitorMut { fn as_f64(&mut self) -> Self::Output; } -/// Extension trait to provide generic downcasting for [`PTypeVisitorMut`]. -pub trait PTypeVisitorMutExt: PTypeVisitorMut { +/// Extension trait to provide generic downcasting for [`PTypeDowncastMut`]. +pub trait PTypeDowncastMutExt: PTypeDowncastMut { /// Downcast the object to a specific primitive type. fn as_primitive_mut(&mut self) -> Self::Output { T::downcast_mut(self) @@ -206,17 +209,40 @@ pub trait PTypeVisitorMutExt: PTypeVisitorMut { macro_rules! impl_ptype_downcast { ($T:ty) => { #[inline] - fn downcast(visitor: &V) -> V::Output { + fn downcast(visitor: &V) -> V::Output { paste::paste! { visitor.[]() } } #[inline] - fn downcast_mut(visitor: &mut V) -> V::Output { + fn downcast_mut(visitor: &mut V) -> V::Output { paste::paste! { visitor.[]() } } + + #[inline] + fn upcast(input: V::Input) -> V { + paste::paste! { V::[](input) } + } }; } +/// A visitor trait for converting a generic `NativePType` into a non-parameterized type. +#[allow(missing_docs)] // Kind of obvious. +pub trait PTypeUpcast { + type Input; + + fn from_u8(input: Self::Input) -> Self; + fn from_u16(input: Self::Input) -> Self; + fn from_u32(input: Self::Input) -> Self; + fn from_u64(input: Self::Input) -> Self; + fn from_i8(input: Self::Input) -> Self; + fn from_i16(input: Self::Input) -> Self; + fn from_i32(input: Self::Input) -> Self; + fn from_i64(input: Self::Input) -> Self; + fn from_f16(input: Self::Input) -> Self; + fn from_f32(input: Self::Input) -> Self; + fn from_f64(input: Self::Input) -> Self; +} + macro_rules! native_ptype { ($T:ty, $ptype:tt) => { impl crate::NativeDType for $T { @@ -253,7 +279,7 @@ macro_rules! native_ptype { }; } -impl PTypeVisitorMutExt for T {} +impl PTypeDowncastMutExt for T {} macro_rules! native_float_ptype { ($T:ty, $ptype:tt) => { diff --git a/vortex-mask/Cargo.toml b/vortex-mask/Cargo.toml index 40894d71165..ccf43593c91 100644 --- a/vortex-mask/Cargo.toml +++ b/vortex-mask/Cargo.toml @@ -16,6 +16,7 @@ version = { workspace = true } [dependencies] arrow-buffer = { workspace = true } itertools = { workspace = true } +vortex-buffer = { workspace = true, features = ["arrow"] } vortex-error = { workspace = true } [dev-dependencies] diff --git a/vortex-mask/src/lib.rs b/vortex-mask/src/lib.rs index 24a3f6168b3..d68f8d2f8bc 100644 --- a/vortex-mask/src/lib.rs +++ b/vortex-mask/src/lib.rs @@ -3,10 +3,12 @@ //! A mask is a set of sorted unique positive integers. #![deny(missing_docs)] + mod bitops; mod eq; mod intersect_by_rank; mod iter_bools; +mod mask_mut; #[cfg(test)] mod tests; @@ -18,6 +20,7 @@ use std::sync::{Arc, OnceLock}; use arrow_buffer::{BooleanBuffer, BooleanBufferBuilder, NullBuffer}; use itertools::Itertools; +pub use mask_mut::*; use vortex_error::{VortexResult, vortex_panic}; /// Represents a set of values that are all included, all excluded, or some mixture of both. diff --git a/vortex-mask/src/mask_mut.rs b/vortex-mask/src/mask_mut.rs new file mode 100644 index 00000000000..a6b28b0de37 --- /dev/null +++ b/vortex-mask/src/mask_mut.rs @@ -0,0 +1,532 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use std::ops::Sub; + +use vortex_buffer::{BitBuffer, BitBufferMut}; + +use crate::Mask; + +/// A mutable mask, used for lazily allocating the bit buffer as required. +pub struct MaskMut(Inner); + +enum Inner { + /// Initially, the mask is empty but may have some capacity. + Empty { capacity: usize }, + /// When the first value is pushed, the mask becomes constant. + Constant { + value: bool, + len: usize, + capacity: usize, + }, + /// When the first non-constant value is written, we allocate the bit buffer and switch + /// into the builder state. + Builder(BitBufferMut), +} + +impl MaskMut { + /// Creates a new empty mask with the default capacity. + pub fn with_capacity(capacity: usize) -> Self { + Self(Inner::Empty { capacity }) + } + + /// Creates a new mask with all values set to `true`. + pub fn new_true(len: usize) -> Self { + Self(Inner::Constant { + value: true, + len, + capacity: len, + }) + } + + /// Creates a new mask with all values set to `false`. + pub fn new_false(len: usize) -> Self { + Self(Inner::Constant { + value: false, + len, + capacity: len, + }) + } + + /// Reserve capacity for at least `additional` more values to be appended. + pub fn reserve(&mut self, additional: usize) { + match &mut self.0 { + Inner::Empty { capacity } => { + *capacity += additional; + } + Inner::Constant { capacity, .. } => { + *capacity += additional; + } + Inner::Builder(bits) => { + bits.reserve(additional); + } + } + } + + /// Append n values to the mask. + pub fn append_n(&mut self, new_value: bool, n: usize) { + match &mut self.0 { + Inner::Empty { capacity } => { + self.0 = Inner::Constant { + value: new_value, + len: n, + capacity: (*capacity).max(n), + } + } + Inner::Constant { + value, + len, + capacity, + } => { + if *value == new_value { + // Same value, just increase length. + self.0 = Inner::Constant { + value: *value, + len: *len + n, + capacity: (*capacity).max(*len + n), + } + } else { + // Different value, need to allocate the bit buffer. + // Note: materialize() already appends the existing constant values + let bits = self.materialize(); + bits.append_n(new_value, n); + } + } + Inner::Builder(bits) => { + bits.append_n(new_value, n); + } + } + } + + /// Append a [`Mask`] to this mutable mask. + pub fn append_mask(&mut self, other: &Mask) { + match other { + Mask::AllTrue(len) => self.append_n(true, *len), + Mask::AllFalse(len) => self.append_n(false, *len), + Mask::Values(values) => { + let bitbuffer = BitBuffer::from(values.buffer.clone()); + self.materialize().append_buffer(&bitbuffer); + } + } + } + + /// Ensures that the internal bit buffer is allocated and returns a mutable reference to it. + fn materialize(&mut self) -> &mut BitBufferMut { + let needs_materialization = !matches!(self.0, Inner::Builder(_)); + + if needs_materialization { + let new_builder = match &self.0 { + Inner::Empty { capacity } => BitBufferMut::with_capacity(*capacity), + Inner::Constant { + value, + len, + capacity, + } => { + let required_capacity = (*capacity).max(*len); + let mut bits = BitBufferMut::with_capacity(required_capacity); + bits.append_n(*value, *len); + bits + } + Inner::Builder(_) => unreachable!(), + }; + self.0 = Inner::Builder(new_builder); + } + + match &mut self.0 { + Inner::Builder(bits) => bits, + _ => unreachable!(), + } + } + + /// Split-off the mask at the given index, returning a new mask with the + /// values from `at` to the end, and leaving `self` with the values from + /// the start to `at`. + pub fn split_off(&mut self, at: usize) -> Self { + assert!(at <= self.len(), "split_off index out of bounds"); + match &mut self.0 { + Inner::Empty { capacity } => { + let new_capacity = (*capacity).saturating_sub(at); + Self(Inner::Empty { + capacity: new_capacity, + }) + } + Inner::Constant { + value, + len, + capacity, + } => { + let new_len = len.sub(at); + *len = at; + let new_capacity = (*capacity).saturating_sub(at); + Self(Inner::Constant { + value: *value, + len: new_len, + capacity: new_capacity, + }) + } + Inner::Builder(bits) => { + let new_bits = bits.split_off(at); + Self(Inner::Builder(new_bits)) + } + } + } + + /// Absorb another mask into this one, appending its values. + pub fn unsplit(&mut self, other: Self) { + match other.0 { + Inner::Empty { .. } => { + // No work to do + } + Inner::Constant { value, len, .. } => { + self.append_n(value, len); + } + Inner::Builder(bits) => { + self.materialize().unsplit(bits); + } + } + } + + /// Freezes the mutable mask into an immutable one. + pub fn freeze(self) -> Mask { + match self.0 { + Inner::Empty { .. } => Mask::new_true(0), + Inner::Constant { value, len, .. } => { + if value { + Mask::new_true(len) + } else { + Mask::new_false(len) + } + } + Inner::Builder(bits) => Mask::from_buffer(bits.freeze().into()), + } + } + + /// Returns the logical length of the mask. + pub fn len(&self) -> usize { + match &self.0 { + Inner::Empty { .. } => 0, + Inner::Constant { len, .. } => *len, + Inner::Builder(bits) => bits.len(), + } + } + + /// Returns true if the mask is empty. + pub fn is_empty(&self) -> bool { + self.len() == 0 + } +} + +impl Mask { + /// Attempts to convert an immutable mask into a mutable one. + pub fn try_into_mut(self) -> Result { + match self { + Mask::AllTrue(len) => Ok(MaskMut::new_true(len)), + Mask::AllFalse(len) => Ok(MaskMut::new_false(len)), + Mask::Values(values) => { + // FIXME(ngates): we can never convert Arrow BooleanBuffer to ByteBufferMut, + // so we have to wait until we use BitBuffer internally in MaskValues. + Err(Mask::Values(values)) + } + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_split_off_empty() { + let mut mask = MaskMut::with_capacity(10); + assert_eq!(mask.len(), 0); + + let other = mask.split_off(0); + assert_eq!(mask.len(), 0); + assert_eq!(other.len(), 0); + } + + #[test] + fn test_split_off_constant_true_at_zero() { + let mut mask = MaskMut::new_true(10); + let other = mask.split_off(0); + + assert_eq!(mask.len(), 0); + assert_eq!(other.len(), 10); + + let frozen = other.freeze(); + assert_eq!(frozen.true_count(), 10); + } + + #[test] + fn test_split_off_constant_true_at_end() { + let mut mask = MaskMut::new_true(10); + let other = mask.split_off(10); + + assert_eq!(mask.len(), 10); + assert_eq!(other.len(), 0); + + let frozen = mask.freeze(); + assert_eq!(frozen.true_count(), 10); + } + + #[test] + fn test_split_off_constant_true_in_middle() { + let mut mask = MaskMut::new_true(10); + let other = mask.split_off(6); + + assert_eq!(mask.len(), 6); + assert_eq!(other.len(), 4); + + let frozen_first = mask.freeze(); + assert_eq!(frozen_first.true_count(), 6); + + let frozen_second = other.freeze(); + assert_eq!(frozen_second.true_count(), 4); + } + + #[test] + fn test_split_off_constant_false() { + let mut mask = MaskMut::new_false(20); + let other = mask.split_off(12); + + assert_eq!(mask.len(), 12); + assert_eq!(other.len(), 8); + + let frozen_first = mask.freeze(); + assert_eq!(frozen_first.true_count(), 0); + + let frozen_second = other.freeze(); + assert_eq!(frozen_second.true_count(), 0); + } + + // Note: Tests using BitBuffer operations are marked as ignored under miri + // because bitvec uses raw pointer operations that miri cannot verify. + #[test] + fn test_split_off_builder_at_byte_boundary() { + let mut mask = MaskMut::with_capacity(16); + // Create a pattern: 8 true, 8 false + mask.append_n(true, 8); + mask.append_n(false, 8); + + let mask_ptr = match &mask.0 { + Inner::Builder(bits) => bits.as_slice().as_ptr(), + _ => unreachable!(), + }; + + let other = mask.split_off(8); + + assert_eq!(mask.len(), 8); + assert_eq!(other.len(), 8); + + // Ensure the unsplit was zero-copy. + mask.unsplit(other); + let new_mask_ptr = match &mask.0 { + Inner::Builder(bits) => bits.as_slice().as_ptr(), + _ => unreachable!(), + }; + assert_eq!(mask_ptr, new_mask_ptr); + } + + #[test] + fn test_split_off_builder_not_byte_aligned() { + let mut mask = MaskMut::with_capacity(20); + // Create a pattern: 10 true, 10 false + mask.append_n(true, 10); + mask.append_n(false, 10); + + let other = mask.split_off(10); + + assert_eq!(mask.len(), 10); + assert_eq!(other.len(), 10); + + let frozen_first = mask.freeze(); + assert_eq!(frozen_first.true_count(), 10); + + let frozen_second = other.freeze(); + assert_eq!(frozen_second.true_count(), 0); + } + + #[test] + fn test_split_off_builder_mixed_pattern() { + let mut mask = MaskMut::with_capacity(15); + // Create pattern: TFTFTFTFTFTFTFT (alternating) + for i in 0..15 { + mask.append_n(i % 2 == 0, 1); + } + + let other = mask.split_off(7); + + assert_eq!(mask.len(), 7); + assert_eq!(other.len(), 8); + + let frozen_first = mask.freeze(); + assert_eq!(frozen_first.true_count(), 4); // positions 0,2,4,6 + + let frozen_second = other.freeze(); + assert_eq!(frozen_second.true_count(), 4); // positions 7,9,11,13 => 0,2,4,6 in split + } + + #[test] + fn test_unsplit_empty_with_empty() { + let mut mask = MaskMut::with_capacity(10); + let other = MaskMut::with_capacity(10); + + mask.unsplit(other); + assert_eq!(mask.len(), 0); + } + + #[test] + fn test_unsplit_empty_with_constant() { + let mut mask = MaskMut::with_capacity(10); + let other = MaskMut::new_true(5); + + mask.unsplit(other); + assert_eq!(mask.len(), 5); + + let frozen = mask.freeze(); + assert_eq!(frozen.true_count(), 5); + } + + #[test] + fn test_unsplit_constant_with_constant_same() { + let mut mask = MaskMut::new_true(5); + let other = MaskMut::new_true(5); + + mask.unsplit(other); + assert_eq!(mask.len(), 10); + + let frozen = mask.freeze(); + assert_eq!(frozen.true_count(), 10); + } + + #[test] + fn test_unsplit_constant_with_constant_different() { + let mut mask = MaskMut::new_true(5); + let other = MaskMut::new_false(5); + + mask.unsplit(other); + assert_eq!(mask.len(), 10); + + let frozen = mask.freeze(); + assert_eq!(frozen.true_count(), 5); + } + + #[test] + fn test_unsplit_constant_with_builder() { + let mut mask = MaskMut::new_true(5); + + let mut other = MaskMut::with_capacity(10); + other.append_n(true, 3); + other.append_n(false, 2); + + mask.unsplit(other); + assert_eq!(mask.len(), 10); + + let frozen = mask.freeze(); + assert_eq!(frozen.true_count(), 8); // 5 from first + 3 from second + } + + #[test] + fn test_unsplit_builder_with_constant() { + let mut mask = MaskMut::with_capacity(10); + mask.append_n(true, 3); + mask.append_n(false, 2); + + let other = MaskMut::new_true(5); + + mask.unsplit(other); + assert_eq!(mask.len(), 10); + + let frozen = mask.freeze(); + assert_eq!(frozen.true_count(), 8); // 3 from first + 5 from second + } + + #[test] + fn test_unsplit_builder_with_builder() { + let mut mask = MaskMut::with_capacity(10); + mask.append_n(true, 3); + mask.append_n(false, 2); + + let mut other = MaskMut::with_capacity(10); + other.append_n(false, 3); + other.append_n(true, 2); + + mask.unsplit(other); + assert_eq!(mask.len(), 10); + + let frozen = mask.freeze(); + assert_eq!(frozen.true_count(), 5); // 3 from first + 2 from second + } + + #[test] + // TODO(ngates): when mask uses BitBuffer internally, into_mut should succeed + #[should_panic] + fn test_round_trip_split_unsplit() { + let mut original = MaskMut::with_capacity(20); + // Pattern: 10 true, 10 false + original.append_n(true, 10); + original.append_n(false, 10); + + let original_frozen = original.freeze(); + let original_true_count = original_frozen.true_count(); + + // Convert back to mutable for split + let mut mask = original_frozen.try_into_mut().unwrap(); + + // Split at 10 + let other = mask.split_off(10); + + // Unsplit back together + mask.unsplit(other); + + assert_eq!(mask.len(), 20); + let frozen = mask.freeze(); + assert_eq!(frozen.true_count(), original_true_count); + } + + #[test] + #[should_panic(expected = "split_off index out of bounds")] + fn test_split_off_out_of_bounds() { + let mut mask = MaskMut::new_true(10); + let _ = mask.split_off(11); + } + + #[test] + fn test_split_off_builder_at_bit_1() { + let mut mask = MaskMut::with_capacity(16); + mask.append_n(true, 16); + + let other = mask.split_off(1); + + assert_eq!(mask.len(), 1); + assert_eq!(other.len(), 15); + + let frozen_first = mask.freeze(); + assert_eq!(frozen_first.true_count(), 1); + + let frozen_second = other.freeze(); + assert_eq!(frozen_second.true_count(), 15); + } + + #[test] + fn test_multiple_split_unsplit() { + let mut mask = MaskMut::new_true(30); + + // Split into 3 parts + let third = mask.split_off(20); // 20-30 + let second = mask.split_off(10); // 10-20 + // first is 0-10 + + assert_eq!(mask.len(), 10); + assert_eq!(second.len(), 10); + assert_eq!(third.len(), 10); + + // Recombine in order + mask.unsplit(second); + mask.unsplit(third); + + assert_eq!(mask.len(), 30); + let frozen = mask.freeze(); + assert_eq!(frozen.true_count(), 30); + } +} diff --git a/vortex-vector/Cargo.toml b/vortex-vector/Cargo.toml new file mode 100644 index 00000000000..353f2ae1d0b --- /dev/null +++ b/vortex-vector/Cargo.toml @@ -0,0 +1,27 @@ +[package] +name = "vortex-vector" +authors = { workspace = true } +categories = { workspace = true } +description = "Vortex in-memory canonical data format" +edition = { workspace = true } +homepage = { workspace = true } +include = { workspace = true } +keywords = { workspace = true } +license = { workspace = true } +readme = { workspace = true } +repository = { workspace = true } +rust-version = { workspace = true } +version = { workspace = true } + +[package.metadata.docs.rs] +all-features = true + +[lints] +workspace = true + +[dependencies] +vortex-buffer = { workspace = true } +vortex-dtype = { workspace = true } +vortex-error = { workspace = true } +vortex-mask = { workspace = true } +vortex-scalar = { workspace = true } diff --git a/vortex-vector/src/bool.rs b/vortex-vector/src/bool.rs new file mode 100644 index 00000000000..a46d25a9e56 --- /dev/null +++ b/vortex-vector/src/bool.rs @@ -0,0 +1,67 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use vortex_buffer::BitBuffer; +use vortex_dtype::DType; +use vortex_mask::Mask; + +use crate::ops::VectorOps; +use crate::{BoolVectorMut, Vector}; + +/// An immutable vector of boolean values. +pub struct BoolVector { + pub(super) dtype: DType, + pub(super) bits: BitBuffer, + pub(super) validity: Mask, +} + +impl From for Vector { + fn from(v: BoolVector) -> Self { + Self::Bool(v) + } +} + +impl VectorOps for BoolVector { + type Mutable = BoolVectorMut; + + fn len(&self) -> usize { + self.bits.len() + } + + fn dtype(&self) -> &DType { + &self.dtype + } + + fn try_into_mut(self) -> Result + where + Self: Sized, + { + let bits = match self.bits.try_into_mut() { + Ok(bits) => bits, + Err(bits) => { + return Err(BoolVector { + dtype: self.dtype, + bits, + validity: self.validity, + }); + } + }; + + let validity = match self.validity.try_into_mut() { + Ok(validity) => validity, + Err(validity) => { + return Err(BoolVector { + dtype: self.dtype, + bits: bits.freeze(), + validity, + }); + } + }; + + Ok(BoolVectorMut { + dtype: self.dtype, + bits, + validity, + }) + } +} diff --git a/vortex-vector/src/bool_mut.rs b/vortex-vector/src/bool_mut.rs new file mode 100644 index 00000000000..627b5394bff --- /dev/null +++ b/vortex-vector/src/bool_mut.rs @@ -0,0 +1,80 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use vortex_buffer::BitBufferMut; +use vortex_dtype::{DType, Nullability}; +use vortex_mask::MaskMut; + +use crate::BoolVector; +use crate::ops::VectorMutOps; + +/// A mutable vector of boolean values. +pub struct BoolVectorMut { + pub(super) dtype: DType, + pub(super) bits: BitBufferMut, + pub(super) validity: MaskMut, +} + +impl BoolVectorMut { + /// Create a new mutable boolean vector with the given capacity and nullability. + pub fn with_capacity(capacity: usize, nullability: Nullability) -> Self { + Self { + dtype: DType::Bool(nullability), + bits: BitBufferMut::with_capacity(capacity), + validity: MaskMut::with_capacity(capacity), + } + } +} + +impl From for crate::VectorMut { + fn from(v: BoolVectorMut) -> Self { + Self::Bool(v) + } +} + +impl VectorMutOps for BoolVectorMut { + type Immutable = BoolVector; + + fn len(&self) -> usize { + self.bits.len() + } + + fn dtype(&self) -> &DType { + &self.dtype + } + + fn capacity(&self) -> usize { + self.bits.capacity() + } + + fn reserve(&mut self, additional: usize) { + self.bits.reserve(additional); + self.validity.reserve(additional); + } + + fn split_off(&mut self, at: usize) -> Self { + BoolVectorMut { + dtype: self.dtype.clone(), + bits: self.bits.split_off(at), + validity: self.validity.split_off(at), + } + } + + fn unsplit(&mut self, other: Self) { + self.bits.unsplit(other.bits); + self.validity.unsplit(other.validity); + } + + fn extend_from_vector(&mut self, other: &BoolVector) { + self.bits.append_buffer(&other.bits); + self.validity.append_mask(&other.validity); + } + + fn freeze(self) -> Self::Immutable { + BoolVector { + dtype: self.dtype, + bits: self.bits.freeze(), + validity: self.validity.freeze(), + } + } +} diff --git a/vortex-vector/src/lib.rs b/vortex-vector/src/lib.rs new file mode 100644 index 00000000000..7ad2912a0e7 --- /dev/null +++ b/vortex-vector/src/lib.rs @@ -0,0 +1,222 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Vector types for Vortex. + +#![deny(missing_docs)] + +use vortex_dtype::DType; + +use crate::ops::{VectorMutOps, VectorOps}; + +mod bool; +mod bool_mut; +mod null; +mod null_mut; +mod ops; +mod primitive; +mod primitive_mut; +mod pvector; + +pub use bool::*; +pub use bool_mut::*; +pub use null::*; +pub use null_mut::*; +pub use primitive::*; +pub use primitive_mut::*; +pub use pvector::*; +use vortex_error::vortex_panic; + +/// An enum over all vector types. +pub enum Vector { + /// Null + Null(NullVector), + /// Bool + Bool(BoolVector), + /// Primitive + Primitive(PVector), + // Decimal + // Decimal(DecimalVector), + // String + // String(StringVector), + // Binary + // Binary(BinaryVector), + // List + // List(ListVector), + // FixedList + // FixedList(FixedListVector), + // Struct + // Struct(StructVector), + // Extension + // Extension(ExtensionVector), +} + +/// An enum over all mutable vector types. +pub enum VectorMut { + /// Null + Null(NullVectorMut), + /// Bool + Bool(BoolVectorMut), + /// Primitive + Primitive(PVectorMut), +} + +impl VectorMut { + /// Create a new mutable vector with the given capacity and dtype. + pub fn with_capacity(capacity: usize, dtype: &DType) -> Self { + use vortex_dtype::NativePType; + match dtype { + DType::Null => NullVectorMut::new(0).into(), + DType::Bool(n) => BoolVectorMut::with_capacity(capacity, *n).into(), + DType::Primitive(ptype, nullability) => { + PVectorMut::with_capacity(capacity, *ptype, *nullability).into() + } + _ => vortex_panic!("Unsupported dtype for VectorMut"), + } + } +} + +macro_rules! match_each_vector { + ($self:expr, | $vec:ident | $body:block) => {{ + match $self { + Vector::Null(v) => { + let $vec = v; + $body + } + Vector::Bool(v) => { + let $vec = v; + $body + } + Vector::Primitive(v) => { + let $vec = v; + $body + } + } + }}; +} + +macro_rules! match_each_vector_mut { + ($self:expr, | $vec:ident | $body:block) => {{ + match $self { + VectorMut::Null(v) => { + let $vec = v; + $body + } + VectorMut::Bool(v) => { + let $vec = v; + $body + } + VectorMut::Primitive(v) => { + let $vec = v; + $body + } + } + }}; +} + +macro_rules! match_each_vector_mut_pair { + ($self:expr, $other:expr, | $vec:ident, $vec_other:ident | $body:block) => {{ + match ($self, $other) { + (VectorMut::Null(a), VectorMut::Null(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + (VectorMut::Bool(a), VectorMut::Bool(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + (VectorMut::Primitive(a), VectorMut::Primitive(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + _ => vortex_panic!("Mismatched vector types"), + } + }}; +} + +macro_rules! match_each_vector_mut_immut_pair { + ($self:expr, $other:expr, | $vec:ident, $vec_other:ident | $body:block) => {{ + match ($self, $other) { + (VectorMut::Null(a), Vector::Null(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + (VectorMut::Bool(a), Vector::Bool(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + (VectorMut::Primitive(a), Vector::Primitive(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + _ => vortex_panic!("Mismatched vector types"), + } + }}; +} + +impl VectorOps for Vector { + type Mutable = VectorMut; + + fn len(&self) -> usize { + match_each_vector!(self, |v| { v.len() }) + } + + fn dtype(&self) -> &DType { + match_each_vector!(self, |v| { v.dtype() }) + } + + fn try_into_mut(self) -> Result + where + Self: Sized, + { + match_each_vector!(self, |v| { + v.try_into_mut().map(VectorMut::from).map_err(Vector::from) + }) + } +} + +impl VectorMutOps for VectorMut { + type Immutable = Vector; + + fn len(&self) -> usize { + match_each_vector_mut!(self, |v| { v.len() }) + } + + fn dtype(&self) -> &DType { + match_each_vector_mut!(self, |v| { v.dtype() }) + } + + fn capacity(&self) -> usize { + match_each_vector_mut!(self, |v| { v.capacity() }) + } + + fn reserve(&mut self, additional: usize) { + match_each_vector_mut!(self, |v| { v.reserve(additional) }) + } + + fn split_off(&mut self, at: usize) -> Self { + match_each_vector_mut!(self, |v| { v.split_off(at).into() }) + } + + fn unsplit(&mut self, other: Self) { + match_each_vector_mut_pair!(self, other, |a, b| { + a.unsplit(b); + }); + } + + fn extend_from_vector(&mut self, other: &Self::Immutable) { + match_each_vector_mut_immut_pair!(self, other, |a, b| { + a.extend_from_vector(b); + }); + } + + fn freeze(self) -> Self::Immutable { + match_each_vector_mut!(self, |v| { v.freeze().into() }) + } +} diff --git a/vortex-vector/src/null.rs b/vortex-vector/src/null.rs new file mode 100644 index 00000000000..b864aaabd00 --- /dev/null +++ b/vortex-vector/src/null.rs @@ -0,0 +1,44 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use vortex_dtype::DType; + +use crate::ops::VectorOps; +use crate::{NullVectorMut, Vector}; + +/// An immutable vector of null values. +pub struct NullVector { + pub(super) len: usize, +} + +impl NullVector { + /// Creates a new `NullVector` with the given length. + pub fn new(len: usize) -> Self { + Self { len } + } +} + +impl From for Vector { + fn from(v: NullVector) -> Self { + Self::Null(v) + } +} + +impl VectorOps for NullVector { + type Mutable = NullVectorMut; + + fn len(&self) -> usize { + self.len + } + + fn dtype(&self) -> &DType { + &DType::Null + } + + fn try_into_mut(self) -> Result + where + Self: Sized, + { + Ok(NullVectorMut::new(self.len)) + } +} diff --git a/vortex-vector/src/null_mut.rs b/vortex-vector/src/null_mut.rs new file mode 100644 index 00000000000..6066e0f234b --- /dev/null +++ b/vortex-vector/src/null_mut.rs @@ -0,0 +1,61 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use vortex_dtype::DType; + +use crate::ops::VectorMutOps; +use crate::{NullVector, VectorMut}; + +/// A mutable vector of null values. +pub struct NullVectorMut { + pub(super) len: usize, +} + +impl NullVectorMut { + /// Creates a new `NullVectorMut` with the given length. + pub fn new(len: usize) -> Self { + Self { len } + } +} + +impl From for VectorMut { + fn from(v: NullVectorMut) -> Self { + Self::Null(v) + } +} + +impl VectorMutOps for NullVectorMut { + type Immutable = NullVector; + + fn len(&self) -> usize { + self.len + } + + fn dtype(&self) -> &DType { + &DType::Null + } + + fn capacity(&self) -> usize { + usize::MAX + } + + fn reserve(&mut self, _additional: usize) {} + + fn split_off(&mut self, at: usize) -> Self { + let new_len = self.len - at; + self.len = at; + NullVectorMut { len: new_len } + } + + fn unsplit(&mut self, other: Self) { + self.len += other.len; + } + + fn extend_from_vector(&mut self, other: &Self::Immutable) { + self.len += other.len; + } + + fn freeze(self) -> Self::Immutable { + NullVector::new(self.len) + } +} diff --git a/vortex-vector/src/ops.rs b/vortex-vector/src/ops.rs new file mode 100644 index 00000000000..6ad122c7bcf --- /dev/null +++ b/vortex-vector/src/ops.rs @@ -0,0 +1,67 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use vortex_dtype::DType; + +use crate::{Vector, VectorMut}; + +/// Common operations for immutable vectors. +pub trait VectorOps: Into { + type Mutable: VectorMutOps; + + /// Returns the length of the vector. + fn len(&self) -> usize; + + /// Returns the data type of the vector. + fn dtype(&self) -> &DType; + + /// Try to convert self into a mutable vector. + // + // If self is the only unique strong reference, this will succeed and return a mutable vector + // with the contents of self without copying. If self is not unique, this will fail and return + // self. + fn try_into_mut(self) -> Result + where + Self: Sized; +} + +/// Common operations for mutable vectors. +pub trait VectorMutOps: Into { + type Immutable: VectorOps; + + /// Returns the length of the vector. + fn len(&self) -> usize; + + /// Returns the data type of the vector. + fn dtype(&self) -> &DType; + + /// Returns the capacity of the vector. + fn capacity(&self) -> usize; + + /// Reserves capacity for at least `additional` more elements to be inserted. + fn reserve(&mut self, additional: usize); + + /// Splits the vector into two at the given index. + /// + /// Afterward, self contains elements `[0, at)`, and the returned vector contains elements + /// `[at, capacity)`. It’s guaranteed that the memory does not move, that is, the address of + /// self does not change, and the address of the returned slice is at bytes after that. + /// + /// This is an O(1) operation that just increases the reference count and sets a few indices. + fn split_off(&mut self, at: usize) -> Self; + + /// Absorbs a mutable vector that was previously split off. + /// + /// If the two vectors were previously contiguous and not mutated in a way that causes + /// re-allocation i.e., if other was created by calling split_off on this vector, then this is + /// an O(1) operation that just decreases a reference count and sets a few indices. + /// + // Otherwise, this method degenerates to self.extend_from_vector(other.as_ref()). + fn unsplit(&mut self, other: Self); + + /// Extends the vector by appending elements from another vector. + fn extend_from_vector(&mut self, other: &Self::Immutable); + + /// Freeze the vector into an immutable one. + fn freeze(self) -> Self::Immutable; +} diff --git a/vortex-vector/src/primitive.rs b/vortex-vector/src/primitive.rs new file mode 100644 index 00000000000..9a39f571c55 --- /dev/null +++ b/vortex-vector/src/primitive.rs @@ -0,0 +1,67 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use vortex_buffer::Buffer; +use vortex_dtype::{DType, NativePType}; +use vortex_mask::Mask; + +use crate::ops::VectorOps; +use crate::{PVector, PrimitiveVectorMut, Vector}; + +/// An immutable vector of primitive values. +pub struct PrimitiveVector { + pub(super) dtype: DType, + pub(super) elements: Buffer, + pub(super) validity: Mask, +} + +impl From> for Vector { + fn from(v: PrimitiveVector) -> Self { + Self::Primitive(PVector::from(v)) + } +} + +impl VectorOps for PrimitiveVector { + type Mutable = PrimitiveVectorMut; + + /// Returns the length of the vector. + fn len(&self) -> usize { + self.elements.len() + } + + /// Returns the data type of the vector. + fn dtype(&self) -> &DType { + &self.dtype + } + + /// Try to convert self into a mutable vector. + fn try_into_mut(self) -> Result, Self> { + let elements = match self.elements.try_into_mut() { + Ok(elements) => elements, + Err(elements) => { + return Err(PrimitiveVector { + dtype: self.dtype, + elements, + validity: self.validity, + }); + } + }; + + let validity = match self.validity.try_into_mut() { + Ok(validity) => validity, + Err(validity) => { + return Err(PrimitiveVector { + dtype: self.dtype, + elements: elements.freeze(), + validity, + }); + } + }; + + Ok(PrimitiveVectorMut { + dtype: self.dtype, + elements, + validity, + }) + } +} diff --git a/vortex-vector/src/primitive_mut.rs b/vortex-vector/src/primitive_mut.rs new file mode 100644 index 00000000000..14ef69aeada --- /dev/null +++ b/vortex-vector/src/primitive_mut.rs @@ -0,0 +1,82 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use vortex_buffer::BufferMut; +use vortex_dtype::{DType, NativePType, Nullability}; +use vortex_mask::MaskMut; + +use crate::ops::VectorMutOps; +use crate::{PVectorMut, PrimitiveVector, VectorMut}; + +/// A mutable vector of primitive values. +pub struct PrimitiveVectorMut { + pub(super) dtype: DType, + pub(super) elements: BufferMut, + pub(super) validity: MaskMut, +} + +impl PrimitiveVectorMut { + /// Create a new mutable primitive vector with the given capacity and nullability. + pub fn with_capacity(capacity: usize, nullability: Nullability) -> Self { + Self { + dtype: DType::Primitive(T::PTYPE, nullability), + elements: BufferMut::with_capacity(capacity), + validity: MaskMut::with_capacity(capacity), + } + } +} + +impl From> for VectorMut { + fn from(val: PrimitiveVectorMut) -> Self { + VectorMut::Primitive(PVectorMut::from(val)) + } +} + +impl VectorMutOps for PrimitiveVectorMut { + type Immutable = PrimitiveVector; + + fn len(&self) -> usize { + self.elements.len() + } + + fn dtype(&self) -> &DType { + &self.dtype + } + + fn capacity(&self) -> usize { + self.elements.capacity() + } + + fn reserve(&mut self, additional: usize) { + self.elements.reserve(additional); + self.validity.reserve(additional); + } + + fn split_off(&mut self, at: usize) -> Self { + PrimitiveVectorMut { + dtype: self.dtype.clone(), + elements: self.elements.split_off(at), + validity: self.validity.split_off(at), + } + } + + fn unsplit(&mut self, other: Self) { + self.elements.unsplit(other.elements); + self.validity.unsplit(other.validity); + } + + /// Extends the vector by appending elements from another vector. + fn extend_from_vector(&mut self, other: &PrimitiveVector) { + self.elements.extend_from_slice(other.elements.as_slice()); + self.validity.append_mask(&other.validity); + } + + /// Freeze the vector into an immutable one. + fn freeze(self) -> PrimitiveVector { + PrimitiveVector { + dtype: self.dtype, + elements: self.elements.freeze(), + validity: self.validity.freeze(), + } + } +} diff --git a/vortex-vector/src/pvector.rs b/vortex-vector/src/pvector.rs new file mode 100644 index 00000000000..ca21d86e02c --- /dev/null +++ b/vortex-vector/src/pvector.rs @@ -0,0 +1,493 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use vortex_dtype::half::f16; +use vortex_dtype::{DType, NativePType, Nullability, PType, PTypeUpcast}; +use vortex_error::vortex_panic; + +use crate::ops::{VectorMutOps, VectorOps}; +use crate::{PrimitiveVector, PrimitiveVectorMut, Vector}; + +/// An enum over all primitive vector types. +pub enum PVector { + /// U8 + U8(PrimitiveVector), + /// U16 + U16(PrimitiveVector), + /// U32 + U32(PrimitiveVector), + /// U64 + U64(PrimitiveVector), + /// I8 + I8(PrimitiveVector), + /// I16 + I16(PrimitiveVector), + /// I32 + I32(PrimitiveVector), + /// I64 + I64(PrimitiveVector), + /// F16 + F16(PrimitiveVector), + /// F32 + F32(PrimitiveVector), + /// F64 + F64(PrimitiveVector), +} + +/// An enum over all mutable primitive vector types. +pub enum PVectorMut { + /// U8 + U8(PrimitiveVectorMut), + /// U16 + U16(PrimitiveVectorMut), + /// U32 + U32(PrimitiveVectorMut), + /// U64 + U64(PrimitiveVectorMut), + /// I8 + I8(PrimitiveVectorMut), + /// I16 + I16(PrimitiveVectorMut), + /// I32 + I32(PrimitiveVectorMut), + /// I64 + I64(PrimitiveVectorMut), + /// F16 + F16(PrimitiveVectorMut), + /// F32 + F32(PrimitiveVectorMut), + /// F64 + F64(PrimitiveVectorMut), +} + +impl PVectorMut { + /// Create a new mutable primitive vector with the given capacity, primitive type, and nullability. + pub fn with_capacity(capacity: usize, ptype: PType, nullability: Nullability) -> Self { + match ptype { + PType::U8 => PrimitiveVectorMut::::with_capacity(capacity, nullability).into(), + PType::U16 => PrimitiveVectorMut::::with_capacity(capacity, nullability).into(), + PType::U32 => PrimitiveVectorMut::::with_capacity(capacity, nullability).into(), + PType::U64 => PrimitiveVectorMut::::with_capacity(capacity, nullability).into(), + PType::I8 => PrimitiveVectorMut::::with_capacity(capacity, nullability).into(), + PType::I16 => PrimitiveVectorMut::::with_capacity(capacity, nullability).into(), + PType::I32 => PrimitiveVectorMut::::with_capacity(capacity, nullability).into(), + PType::I64 => PrimitiveVectorMut::::with_capacity(capacity, nullability).into(), + PType::F16 => PrimitiveVectorMut::::with_capacity(capacity, nullability).into(), + PType::F32 => PrimitiveVectorMut::::with_capacity(capacity, nullability).into(), + PType::F64 => PrimitiveVectorMut::::with_capacity(capacity, nullability).into(), + } + } +} + +macro_rules! match_each_pvector { + ($self:expr, | $vec:ident | $body:block) => {{ + match $self { + PVector::U8(v) => { + let $vec = v; + $body + } + PVector::U16(v) => { + let $vec = v; + $body + } + PVector::U32(v) => { + let $vec = v; + $body + } + PVector::U64(v) => { + let $vec = v; + $body + } + PVector::I8(v) => { + let $vec = v; + $body + } + PVector::I16(v) => { + let $vec = v; + $body + } + PVector::I32(v) => { + let $vec = v; + $body + } + PVector::I64(v) => { + let $vec = v; + $body + } + PVector::F16(v) => { + let $vec = v; + $body + } + PVector::F32(v) => { + let $vec = v; + $body + } + PVector::F64(v) => { + let $vec = v; + $body + } + } + }}; +} + +macro_rules! match_each_pvector_mut { + ($self:expr, | $vec:ident | $body:block) => {{ + match $self { + PVectorMut::U8(v) => { + let $vec = v; + $body + } + PVectorMut::U16(v) => { + let $vec = v; + $body + } + PVectorMut::U32(v) => { + let $vec = v; + $body + } + PVectorMut::U64(v) => { + let $vec = v; + $body + } + PVectorMut::I8(v) => { + let $vec = v; + $body + } + PVectorMut::I16(v) => { + let $vec = v; + $body + } + PVectorMut::I32(v) => { + let $vec = v; + $body + } + PVectorMut::I64(v) => { + let $vec = v; + $body + } + PVectorMut::F16(v) => { + let $vec = v; + $body + } + PVectorMut::F32(v) => { + let $vec = v; + $body + } + PVectorMut::F64(v) => { + let $vec = v; + $body + } + } + }}; +} + +macro_rules! match_each_pvector_mut_pair { + ($self:expr, $other:expr, | $vec:ident, $vec_other:ident | $body:block) => {{ + match ($self, $other) { + (PVectorMut::U8(a), PVectorMut::U8(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + (PVectorMut::U16(a), PVectorMut::U16(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + (PVectorMut::U32(a), PVectorMut::U32(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + (PVectorMut::U64(a), PVectorMut::U64(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + (PVectorMut::I8(a), PVectorMut::I8(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + (PVectorMut::I16(a), PVectorMut::I16(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + (PVectorMut::I32(a), PVectorMut::I32(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + (PVectorMut::I64(a), PVectorMut::I64(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + (PVectorMut::F16(a), PVectorMut::F16(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + (PVectorMut::F32(a), PVectorMut::F32(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + (PVectorMut::F64(a), PVectorMut::F64(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + _ => vortex_panic!("Mismatched primitive vector types"), + } + }}; +} + +macro_rules! match_each_pvector_mut_immut_pair { + ($self:expr, $other:expr, | $vec:ident, $vec_other:ident | $body:block) => {{ + match ($self, $other) { + (PVectorMut::U8(a), PVector::U8(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + (PVectorMut::U16(a), PVector::U16(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + (PVectorMut::U32(a), PVector::U32(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + (PVectorMut::U64(a), PVector::U64(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + (PVectorMut::I8(a), PVector::I8(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + (PVectorMut::I16(a), PVector::I16(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + (PVectorMut::I32(a), PVector::I32(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + (PVectorMut::I64(a), PVector::I64(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + (PVectorMut::F16(a), PVector::F16(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + (PVectorMut::F32(a), PVector::F32(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + (PVectorMut::F64(a), PVector::F64(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + _ => vortex_panic!("Mismatched primitive vector types"), + } + }}; +} + +impl From for Vector { + fn from(v: PVector) -> Self { + Self::Primitive(v) + } +} + +impl VectorOps for PVector { + type Mutable = PVectorMut; + + fn len(&self) -> usize { + match_each_pvector!(self, |v| { v.len() }) + } + + fn dtype(&self) -> &DType { + match_each_pvector!(self, |v| { v.dtype() }) + } + + fn try_into_mut(self) -> Result + where + Self: Sized, + { + match_each_pvector!(self, |v| { + v.try_into_mut() + .map(PVectorMut::from) + .map_err(PVector::from) + }) + } +} + +impl From for crate::VectorMut { + fn from(v: PVectorMut) -> Self { + Self::Primitive(v) + } +} + +impl VectorMutOps for PVectorMut { + type Immutable = PVector; + + fn len(&self) -> usize { + match_each_pvector_mut!(self, |v| { v.len() }) + } + + fn dtype(&self) -> &DType { + match_each_pvector_mut!(self, |v| { v.dtype() }) + } + + fn capacity(&self) -> usize { + match_each_pvector_mut!(self, |v| { v.capacity() }) + } + + fn reserve(&mut self, additional: usize) { + match_each_pvector_mut!(self, |v| { v.reserve(additional) }) + } + + fn split_off(&mut self, at: usize) -> Self { + match_each_pvector_mut!(self, |v| { v.split_off(at).into() }) + } + + fn unsplit(&mut self, other: Self) { + match_each_pvector_mut_pair!(self, other, |a, b| { + a.unsplit(b); + }); + } + + fn extend_from_vector(&mut self, other: &Self::Immutable) { + match_each_pvector_mut_immut_pair!(self, other, |a, b| { + a.extend_from_vector(b); + }); + } + + fn freeze(self) -> Self::Immutable { + match_each_pvector_mut!(self, |v| { v.freeze().into() }) + } +} + +// From impls for PVector variants +impl From> for PVector { + fn from(v: PrimitiveVector) -> Self { + T::upcast(v) + } +} + +impl PTypeUpcast for PVector { + type Input = PrimitiveVector; + + fn from_u8(input: Self::Input) -> Self { + PVector::U8(input) + } + + fn from_u16(input: Self::Input) -> Self { + PVector::U16(input) + } + + fn from_u32(input: Self::Input) -> Self { + PVector::U32(input) + } + + fn from_u64(input: Self::Input) -> Self { + PVector::U64(input) + } + + fn from_i8(input: Self::Input) -> Self { + PVector::I8(input) + } + + fn from_i16(input: Self::Input) -> Self { + PVector::I16(input) + } + + fn from_i32(input: Self::Input) -> Self { + PVector::I32(input) + } + + fn from_i64(input: Self::Input) -> Self { + PVector::I64(input) + } + + fn from_f16(input: Self::Input) -> Self { + PVector::F16(input) + } + + fn from_f32(input: Self::Input) -> Self { + PVector::F32(input) + } + + fn from_f64(input: Self::Input) -> Self { + PVector::F64(input) + } +} + +// From impls for PVectorMut variants +impl From> for PVectorMut { + fn from(v: PrimitiveVectorMut) -> Self { + T::upcast(v) + } +} + +impl PTypeUpcast for PVectorMut { + type Input = PrimitiveVectorMut; + + fn from_u8(input: Self::Input) -> Self { + PVectorMut::U8(input) + } + + fn from_u16(input: Self::Input) -> Self { + PVectorMut::U16(input) + } + + fn from_u32(input: Self::Input) -> Self { + PVectorMut::U32(input) + } + + fn from_u64(input: Self::Input) -> Self { + PVectorMut::U64(input) + } + + fn from_i8(input: Self::Input) -> Self { + PVectorMut::I8(input) + } + + fn from_i16(input: Self::Input) -> Self { + PVectorMut::I16(input) + } + + fn from_i32(input: Self::Input) -> Self { + PVectorMut::I32(input) + } + + fn from_i64(input: Self::Input) -> Self { + PVectorMut::I64(input) + } + + fn from_f16(input: Self::Input) -> Self { + PVectorMut::F16(input) + } + + fn from_f32(input: Self::Input) -> Self { + PVectorMut::F32(input) + } + + fn from_f64(input: Self::Input) -> Self { + PVectorMut::F64(input) + } +} From eef7c49547bc4971b319a26c2c762d06cd9e249b Mon Sep 17 00:00:00 2001 From: Connor Tsui Date: Fri, 17 Oct 2025 17:35:43 -0400 Subject: [PATCH 02/13] reorganize modules Signed-off-by: Connor Tsui --- vortex-vector/src/bool/mod.rs | 22 + vortex-vector/src/{bool.rs => bool/vector.rs} | 54 +- vortex-vector/src/bool/vector_mut.rs | 100 ++++ vortex-vector/src/bool_mut.rs | 80 --- vortex-vector/src/lib.rs | 224 +------- vortex-vector/src/null/mod.rs | 22 + vortex-vector/src/{null.rs => null/vector.rs} | 23 +- .../src/{null_mut.rs => null/vector_mut.rs} | 52 +- vortex-vector/src/ops.rs | 67 --- vortex-vector/src/primitive.rs | 67 --- vortex-vector/src/primitive/generic.rs | 64 +++ vortex-vector/src/primitive/generic_mut.rs | 108 ++++ vortex-vector/src/primitive/macros.rs | 238 +++++++++ vortex-vector/src/primitive/mod.rs | 16 + vortex-vector/src/primitive/vector.rs | 177 +++++++ vortex-vector/src/primitive/vector_mut.rs | 105 ++++ vortex-vector/src/primitive_mut.rs | 82 --- vortex-vector/src/private.rs | 27 + vortex-vector/src/pvector.rs | 493 ------------------ vortex-vector/src/vector/macros.rs | 94 ++++ vortex-vector/src/vector/mod.rs | 62 +++ vortex-vector/src/vector/ops.rs | 174 +++++++ 22 files changed, 1287 insertions(+), 1064 deletions(-) create mode 100644 vortex-vector/src/bool/mod.rs rename vortex-vector/src/{bool.rs => bool/vector.rs} (50%) create mode 100644 vortex-vector/src/bool/vector_mut.rs delete mode 100644 vortex-vector/src/bool_mut.rs create mode 100644 vortex-vector/src/null/mod.rs rename vortex-vector/src/{null.rs => null/vector.rs} (72%) rename vortex-vector/src/{null_mut.rs => null/vector_mut.rs} (61%) delete mode 100644 vortex-vector/src/ops.rs delete mode 100644 vortex-vector/src/primitive.rs create mode 100644 vortex-vector/src/primitive/generic.rs create mode 100644 vortex-vector/src/primitive/generic_mut.rs create mode 100644 vortex-vector/src/primitive/macros.rs create mode 100644 vortex-vector/src/primitive/mod.rs create mode 100644 vortex-vector/src/primitive/vector.rs create mode 100644 vortex-vector/src/primitive/vector_mut.rs delete mode 100644 vortex-vector/src/primitive_mut.rs create mode 100644 vortex-vector/src/private.rs delete mode 100644 vortex-vector/src/pvector.rs create mode 100644 vortex-vector/src/vector/macros.rs create mode 100644 vortex-vector/src/vector/mod.rs create mode 100644 vortex-vector/src/vector/ops.rs diff --git a/vortex-vector/src/bool/mod.rs b/vortex-vector/src/bool/mod.rs new file mode 100644 index 00000000000..8541fc43397 --- /dev/null +++ b/vortex-vector/src/bool/mod.rs @@ -0,0 +1,22 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +mod vector; +pub use vector::BoolVector; + +mod vector_mut; +pub use vector_mut::BoolVectorMut; + +use crate::{Vector, VectorMut}; + +impl From for Vector { + fn from(v: BoolVector) -> Self { + Self::Bool(v) + } +} + +impl From for VectorMut { + fn from(v: BoolVectorMut) -> Self { + Self::Bool(v) + } +} diff --git a/vortex-vector/src/bool.rs b/vortex-vector/src/bool/vector.rs similarity index 50% rename from vortex-vector/src/bool.rs rename to vortex-vector/src/bool/vector.rs index a46d25a9e56..b63e05d993b 100644 --- a/vortex-vector/src/bool.rs +++ b/vortex-vector/src/bool/vector.rs @@ -2,34 +2,31 @@ // SPDX-FileCopyrightText: Copyright the Vortex contributors use vortex_buffer::BitBuffer; -use vortex_dtype::DType; +use vortex_dtype::{DType, Nullability}; use vortex_mask::Mask; -use crate::ops::VectorOps; -use crate::{BoolVectorMut, Vector}; +use super::BoolVectorMut; +use crate::VectorOps; /// An immutable vector of boolean values. pub struct BoolVector { - pub(super) dtype: DType, pub(super) bits: BitBuffer, - pub(super) validity: Mask, -} - -impl From for Vector { - fn from(v: BoolVector) -> Self { - Self::Bool(v) - } + pub(super) validity: Option, } impl VectorOps for BoolVector { type Mutable = BoolVectorMut; - fn len(&self) -> usize { - self.bits.len() + fn nullability(&self) -> Nullability { + Nullability::from(self.validity.is_some()) + } + + fn dtype(&self) -> DType { + DType::Bool(self.nullability()) } - fn dtype(&self) -> &DType { - &self.dtype + fn len(&self) -> usize { + self.bits.len() } fn try_into_mut(self) -> Result @@ -40,28 +37,25 @@ impl VectorOps for BoolVector { Ok(bits) => bits, Err(bits) => { return Err(BoolVector { - dtype: self.dtype, bits, validity: self.validity, }); } }; - let validity = match self.validity.try_into_mut() { - Ok(validity) => validity, - Err(validity) => { - return Err(BoolVector { - dtype: self.dtype, - bits: bits.freeze(), - validity, - }); - } + let validity = match self.validity { + Some(v) => match v.try_into_mut() { + Ok(v) => Some(v), + Err(v) => { + return Err(BoolVector { + bits: bits.freeze(), + validity: Some(v), + }); + } + }, + None => None, }; - Ok(BoolVectorMut { - dtype: self.dtype, - bits, - validity, - }) + Ok(BoolVectorMut { bits, validity }) } } diff --git a/vortex-vector/src/bool/vector_mut.rs b/vortex-vector/src/bool/vector_mut.rs new file mode 100644 index 00000000000..43231f3dc40 --- /dev/null +++ b/vortex-vector/src/bool/vector_mut.rs @@ -0,0 +1,100 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use vortex_buffer::BitBufferMut; +use vortex_dtype::{DType, Nullability}; +use vortex_mask::MaskMut; + +use super::BoolVector; +use crate::VectorMutOps; + +/// A mutable vector of boolean values. +pub struct BoolVectorMut { + pub(super) bits: BitBufferMut, + pub(super) validity: Option, +} + +impl BoolVectorMut { + /// Create a new mutable boolean vector with the given capacity and nullability. + pub fn with_capacity(capacity: usize, nullability: Nullability) -> Self { + let validity = match nullability { + Nullability::NonNullable => None, + Nullability::Nullable => Some(MaskMut::with_capacity(capacity)), + }; + + Self { + bits: BitBufferMut::with_capacity(capacity), + validity, + } + } +} + +impl VectorMutOps for BoolVectorMut { + type Immutable = BoolVector; + + fn nullability(&self) -> Nullability { + Nullability::from(self.validity.is_some()) + } + + fn dtype(&self) -> DType { + DType::Bool(self.nullability()) + } + + fn len(&self) -> usize { + self.bits.len() + } + + fn capacity(&self) -> usize { + self.bits.capacity() + } + + fn reserve(&mut self, additional: usize) { + self.bits.reserve(additional); + if let Some(v) = self.validity.as_mut() { + v.reserve(additional); + } + } + + fn extend_from_vector(&mut self, other: &BoolVector) { + self.bits.append_buffer(&other.bits); + match (&mut self.validity, &other.validity) { + (Some(self_v), Some(other_v)) => self_v.append_mask(other_v), + (Some(self_v), None) => self_v.append_n(true, other.bits.len()), + (None, Some(other_v)) => { + let mut new_validity = MaskMut::new_true(self.bits.len() - other.bits.len()); + new_validity.append_mask(other_v); + self.validity = Some(new_validity); + } + (None, None) => {} + } + } + + fn freeze(self) -> Self::Immutable { + BoolVector { + bits: self.bits.freeze(), + validity: self.validity.map(|v| v.freeze()), + } + } + + fn split_off(&mut self, at: usize) -> Self { + BoolVectorMut { + bits: self.bits.split_off(at), + validity: self.validity.as_mut().map(|v| v.split_off(at)), + } + } + + fn unsplit(&mut self, other: Self) { + let other_len = other.bits.len(); + self.bits.unsplit(other.bits); + match (&mut self.validity, other.validity) { + (Some(self_v), Some(other_v)) => self_v.unsplit(other_v), + (Some(self_v), None) => self_v.append_n(true, other_len), + (None, Some(other_v)) => { + let mut new_validity = MaskMut::new_true(self.bits.len() - other_len); + new_validity.unsplit(other_v); + self.validity = Some(new_validity); + } + (None, None) => {} + } + } +} diff --git a/vortex-vector/src/bool_mut.rs b/vortex-vector/src/bool_mut.rs deleted file mode 100644 index 627b5394bff..00000000000 --- a/vortex-vector/src/bool_mut.rs +++ /dev/null @@ -1,80 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 -// SPDX-FileCopyrightText: Copyright the Vortex contributors - -use vortex_buffer::BitBufferMut; -use vortex_dtype::{DType, Nullability}; -use vortex_mask::MaskMut; - -use crate::BoolVector; -use crate::ops::VectorMutOps; - -/// A mutable vector of boolean values. -pub struct BoolVectorMut { - pub(super) dtype: DType, - pub(super) bits: BitBufferMut, - pub(super) validity: MaskMut, -} - -impl BoolVectorMut { - /// Create a new mutable boolean vector with the given capacity and nullability. - pub fn with_capacity(capacity: usize, nullability: Nullability) -> Self { - Self { - dtype: DType::Bool(nullability), - bits: BitBufferMut::with_capacity(capacity), - validity: MaskMut::with_capacity(capacity), - } - } -} - -impl From for crate::VectorMut { - fn from(v: BoolVectorMut) -> Self { - Self::Bool(v) - } -} - -impl VectorMutOps for BoolVectorMut { - type Immutable = BoolVector; - - fn len(&self) -> usize { - self.bits.len() - } - - fn dtype(&self) -> &DType { - &self.dtype - } - - fn capacity(&self) -> usize { - self.bits.capacity() - } - - fn reserve(&mut self, additional: usize) { - self.bits.reserve(additional); - self.validity.reserve(additional); - } - - fn split_off(&mut self, at: usize) -> Self { - BoolVectorMut { - dtype: self.dtype.clone(), - bits: self.bits.split_off(at), - validity: self.validity.split_off(at), - } - } - - fn unsplit(&mut self, other: Self) { - self.bits.unsplit(other.bits); - self.validity.unsplit(other.validity); - } - - fn extend_from_vector(&mut self, other: &BoolVector) { - self.bits.append_buffer(&other.bits); - self.validity.append_mask(&other.validity); - } - - fn freeze(self) -> Self::Immutable { - BoolVector { - dtype: self.dtype, - bits: self.bits.freeze(), - validity: self.validity.freeze(), - } - } -} diff --git a/vortex-vector/src/lib.rs b/vortex-vector/src/lib.rs index 7ad2912a0e7..ce7ec5c6e90 100644 --- a/vortex-vector/src/lib.rs +++ b/vortex-vector/src/lib.rs @@ -1,222 +1,26 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -//! Vector types for Vortex. +//! Mutable decompressed (canonical) vectors for Vortex. +//! +//! TODO docs. #![deny(missing_docs)] +// #![warn(clippy::missing_docs_in_private_items)] +// #![warn(clippy::missing_errors_doc)] +// #![warn(clippy::missing_panics_doc)] +// #![warn(clippy::missing_safety_doc)] -use vortex_dtype::DType; - -use crate::ops::{VectorMutOps, VectorOps}; +mod vector; +pub use vector::ops::{VectorMutOps, VectorOps}; +pub use vector::{Vector, VectorMut}; mod bool; -mod bool_mut; mod null; -mod null_mut; -mod ops; mod primitive; -mod primitive_mut; -mod pvector; - -pub use bool::*; -pub use bool_mut::*; -pub use null::*; -pub use null_mut::*; -pub use primitive::*; -pub use primitive_mut::*; -pub use pvector::*; -use vortex_error::vortex_panic; - -/// An enum over all vector types. -pub enum Vector { - /// Null - Null(NullVector), - /// Bool - Bool(BoolVector), - /// Primitive - Primitive(PVector), - // Decimal - // Decimal(DecimalVector), - // String - // String(StringVector), - // Binary - // Binary(BinaryVector), - // List - // List(ListVector), - // FixedList - // FixedList(FixedListVector), - // Struct - // Struct(StructVector), - // Extension - // Extension(ExtensionVector), -} - -/// An enum over all mutable vector types. -pub enum VectorMut { - /// Null - Null(NullVectorMut), - /// Bool - Bool(BoolVectorMut), - /// Primitive - Primitive(PVectorMut), -} - -impl VectorMut { - /// Create a new mutable vector with the given capacity and dtype. - pub fn with_capacity(capacity: usize, dtype: &DType) -> Self { - use vortex_dtype::NativePType; - match dtype { - DType::Null => NullVectorMut::new(0).into(), - DType::Bool(n) => BoolVectorMut::with_capacity(capacity, *n).into(), - DType::Primitive(ptype, nullability) => { - PVectorMut::with_capacity(capacity, *ptype, *nullability).into() - } - _ => vortex_panic!("Unsupported dtype for VectorMut"), - } - } -} - -macro_rules! match_each_vector { - ($self:expr, | $vec:ident | $body:block) => {{ - match $self { - Vector::Null(v) => { - let $vec = v; - $body - } - Vector::Bool(v) => { - let $vec = v; - $body - } - Vector::Primitive(v) => { - let $vec = v; - $body - } - } - }}; -} - -macro_rules! match_each_vector_mut { - ($self:expr, | $vec:ident | $body:block) => {{ - match $self { - VectorMut::Null(v) => { - let $vec = v; - $body - } - VectorMut::Bool(v) => { - let $vec = v; - $body - } - VectorMut::Primitive(v) => { - let $vec = v; - $body - } - } - }}; -} - -macro_rules! match_each_vector_mut_pair { - ($self:expr, $other:expr, | $vec:ident, $vec_other:ident | $body:block) => {{ - match ($self, $other) { - (VectorMut::Null(a), VectorMut::Null(b)) => { - let $vec = a; - let $vec_other = b; - $body - } - (VectorMut::Bool(a), VectorMut::Bool(b)) => { - let $vec = a; - let $vec_other = b; - $body - } - (VectorMut::Primitive(a), VectorMut::Primitive(b)) => { - let $vec = a; - let $vec_other = b; - $body - } - _ => vortex_panic!("Mismatched vector types"), - } - }}; -} - -macro_rules! match_each_vector_mut_immut_pair { - ($self:expr, $other:expr, | $vec:ident, $vec_other:ident | $body:block) => {{ - match ($self, $other) { - (VectorMut::Null(a), Vector::Null(b)) => { - let $vec = a; - let $vec_other = b; - $body - } - (VectorMut::Bool(a), Vector::Bool(b)) => { - let $vec = a; - let $vec_other = b; - $body - } - (VectorMut::Primitive(a), Vector::Primitive(b)) => { - let $vec = a; - let $vec_other = b; - $body - } - _ => vortex_panic!("Mismatched vector types"), - } - }}; -} - -impl VectorOps for Vector { - type Mutable = VectorMut; - - fn len(&self) -> usize { - match_each_vector!(self, |v| { v.len() }) - } - - fn dtype(&self) -> &DType { - match_each_vector!(self, |v| { v.dtype() }) - } - - fn try_into_mut(self) -> Result - where - Self: Sized, - { - match_each_vector!(self, |v| { - v.try_into_mut().map(VectorMut::from).map_err(Vector::from) - }) - } -} - -impl VectorMutOps for VectorMut { - type Immutable = Vector; - - fn len(&self) -> usize { - match_each_vector_mut!(self, |v| { v.len() }) - } - - fn dtype(&self) -> &DType { - match_each_vector_mut!(self, |v| { v.dtype() }) - } - - fn capacity(&self) -> usize { - match_each_vector_mut!(self, |v| { v.capacity() }) - } - - fn reserve(&mut self, additional: usize) { - match_each_vector_mut!(self, |v| { v.reserve(additional) }) - } - - fn split_off(&mut self, at: usize) -> Self { - match_each_vector_mut!(self, |v| { v.split_off(at).into() }) - } - - fn unsplit(&mut self, other: Self) { - match_each_vector_mut_pair!(self, other, |a, b| { - a.unsplit(b); - }); - } - fn extend_from_vector(&mut self, other: &Self::Immutable) { - match_each_vector_mut_immut_pair!(self, other, |a, b| { - a.extend_from_vector(b); - }); - } +pub use bool::{BoolVector, BoolVectorMut}; +pub use null::{NullVector, NullVectorMut}; +pub use primitive::{GenericPVector, GenericPVectorMut, PrimitiveVector, PrimitiveVectorMut}; - fn freeze(self) -> Self::Immutable { - match_each_vector_mut!(self, |v| { v.freeze().into() }) - } -} +mod private; diff --git a/vortex-vector/src/null/mod.rs b/vortex-vector/src/null/mod.rs new file mode 100644 index 00000000000..ebb6f129a36 --- /dev/null +++ b/vortex-vector/src/null/mod.rs @@ -0,0 +1,22 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +mod vector; +pub use vector::NullVector; + +mod vector_mut; +pub use vector_mut::NullVectorMut; + +use crate::{Vector, VectorMut}; + +impl From for Vector { + fn from(v: NullVector) -> Self { + Self::Null(v) + } +} + +impl From for VectorMut { + fn from(v: NullVectorMut) -> Self { + Self::Null(v) + } +} diff --git a/vortex-vector/src/null.rs b/vortex-vector/src/null/vector.rs similarity index 72% rename from vortex-vector/src/null.rs rename to vortex-vector/src/null/vector.rs index b864aaabd00..a481ad442b5 100644 --- a/vortex-vector/src/null.rs +++ b/vortex-vector/src/null/vector.rs @@ -1,10 +1,9 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -use vortex_dtype::DType; +use vortex_dtype::{DType, Nullability}; -use crate::ops::VectorOps; -use crate::{NullVectorMut, Vector}; +use crate::{NullVectorMut, VectorOps}; /// An immutable vector of null values. pub struct NullVector { @@ -18,21 +17,19 @@ impl NullVector { } } -impl From for Vector { - fn from(v: NullVector) -> Self { - Self::Null(v) - } -} - impl VectorOps for NullVector { type Mutable = NullVectorMut; - fn len(&self) -> usize { - self.len + fn nullability(&self) -> Nullability { + Nullability::Nullable } - fn dtype(&self) -> &DType { - &DType::Null + fn dtype(&self) -> DType { + DType::Null + } + + fn len(&self) -> usize { + self.len } fn try_into_mut(self) -> Result diff --git a/vortex-vector/src/null_mut.rs b/vortex-vector/src/null/vector_mut.rs similarity index 61% rename from vortex-vector/src/null_mut.rs rename to vortex-vector/src/null/vector_mut.rs index 6066e0f234b..720dae2c9b5 100644 --- a/vortex-vector/src/null_mut.rs +++ b/vortex-vector/src/null/vector_mut.rs @@ -1,10 +1,10 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -use vortex_dtype::DType; +use vortex_dtype::{DType, Nullability}; -use crate::ops::VectorMutOps; -use crate::{NullVector, VectorMut}; +use super::NullVector; +use crate::VectorMutOps; /// A mutable vector of null values. pub struct NullVectorMut { @@ -18,30 +18,46 @@ impl NullVectorMut { } } -impl From for VectorMut { - fn from(v: NullVectorMut) -> Self { - Self::Null(v) - } -} - impl VectorMutOps for NullVectorMut { type Immutable = NullVector; - fn len(&self) -> usize { - self.len + fn nullability(&self) -> Nullability { + Nullability::Nullable + } + + fn dtype(&self) -> DType { + DType::Null } - fn dtype(&self) -> &DType { - &DType::Null + fn len(&self) -> usize { + self.len } fn capacity(&self) -> usize { usize::MAX } - fn reserve(&mut self, _additional: usize) {} + fn reserve(&mut self, _additional: usize) { + // We do not allocate memory for `NullVector`, so this is a no-op. + } + + fn extend_from_vector(&mut self, other: &Self::Immutable) { + self.len += other.len; + } + + fn freeze(self) -> Self::Immutable { + NullVector::new(self.len) + } fn split_off(&mut self, at: usize) -> Self { + // TODO(connor): This is wrong (https://docs.rs/bytes/latest/src/bytes/bytes_mut.rs.html#320-335) + assert!( + at <= self.capacity(), + "split_off out of bounds: {:?} <= {:?}", + at, + self.capacity(), + ); + let new_len = self.len - at; self.len = at; NullVectorMut { len: new_len } @@ -50,12 +66,4 @@ impl VectorMutOps for NullVectorMut { fn unsplit(&mut self, other: Self) { self.len += other.len; } - - fn extend_from_vector(&mut self, other: &Self::Immutable) { - self.len += other.len; - } - - fn freeze(self) -> Self::Immutable { - NullVector::new(self.len) - } } diff --git a/vortex-vector/src/ops.rs b/vortex-vector/src/ops.rs deleted file mode 100644 index 6ad122c7bcf..00000000000 --- a/vortex-vector/src/ops.rs +++ /dev/null @@ -1,67 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 -// SPDX-FileCopyrightText: Copyright the Vortex contributors - -use vortex_dtype::DType; - -use crate::{Vector, VectorMut}; - -/// Common operations for immutable vectors. -pub trait VectorOps: Into { - type Mutable: VectorMutOps; - - /// Returns the length of the vector. - fn len(&self) -> usize; - - /// Returns the data type of the vector. - fn dtype(&self) -> &DType; - - /// Try to convert self into a mutable vector. - // - // If self is the only unique strong reference, this will succeed and return a mutable vector - // with the contents of self without copying. If self is not unique, this will fail and return - // self. - fn try_into_mut(self) -> Result - where - Self: Sized; -} - -/// Common operations for mutable vectors. -pub trait VectorMutOps: Into { - type Immutable: VectorOps; - - /// Returns the length of the vector. - fn len(&self) -> usize; - - /// Returns the data type of the vector. - fn dtype(&self) -> &DType; - - /// Returns the capacity of the vector. - fn capacity(&self) -> usize; - - /// Reserves capacity for at least `additional` more elements to be inserted. - fn reserve(&mut self, additional: usize); - - /// Splits the vector into two at the given index. - /// - /// Afterward, self contains elements `[0, at)`, and the returned vector contains elements - /// `[at, capacity)`. It’s guaranteed that the memory does not move, that is, the address of - /// self does not change, and the address of the returned slice is at bytes after that. - /// - /// This is an O(1) operation that just increases the reference count and sets a few indices. - fn split_off(&mut self, at: usize) -> Self; - - /// Absorbs a mutable vector that was previously split off. - /// - /// If the two vectors were previously contiguous and not mutated in a way that causes - /// re-allocation i.e., if other was created by calling split_off on this vector, then this is - /// an O(1) operation that just decreases a reference count and sets a few indices. - /// - // Otherwise, this method degenerates to self.extend_from_vector(other.as_ref()). - fn unsplit(&mut self, other: Self); - - /// Extends the vector by appending elements from another vector. - fn extend_from_vector(&mut self, other: &Self::Immutable); - - /// Freeze the vector into an immutable one. - fn freeze(self) -> Self::Immutable; -} diff --git a/vortex-vector/src/primitive.rs b/vortex-vector/src/primitive.rs deleted file mode 100644 index 9a39f571c55..00000000000 --- a/vortex-vector/src/primitive.rs +++ /dev/null @@ -1,67 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 -// SPDX-FileCopyrightText: Copyright the Vortex contributors - -use vortex_buffer::Buffer; -use vortex_dtype::{DType, NativePType}; -use vortex_mask::Mask; - -use crate::ops::VectorOps; -use crate::{PVector, PrimitiveVectorMut, Vector}; - -/// An immutable vector of primitive values. -pub struct PrimitiveVector { - pub(super) dtype: DType, - pub(super) elements: Buffer, - pub(super) validity: Mask, -} - -impl From> for Vector { - fn from(v: PrimitiveVector) -> Self { - Self::Primitive(PVector::from(v)) - } -} - -impl VectorOps for PrimitiveVector { - type Mutable = PrimitiveVectorMut; - - /// Returns the length of the vector. - fn len(&self) -> usize { - self.elements.len() - } - - /// Returns the data type of the vector. - fn dtype(&self) -> &DType { - &self.dtype - } - - /// Try to convert self into a mutable vector. - fn try_into_mut(self) -> Result, Self> { - let elements = match self.elements.try_into_mut() { - Ok(elements) => elements, - Err(elements) => { - return Err(PrimitiveVector { - dtype: self.dtype, - elements, - validity: self.validity, - }); - } - }; - - let validity = match self.validity.try_into_mut() { - Ok(validity) => validity, - Err(validity) => { - return Err(PrimitiveVector { - dtype: self.dtype, - elements: elements.freeze(), - validity, - }); - } - }; - - Ok(PrimitiveVectorMut { - dtype: self.dtype, - elements, - validity, - }) - } -} diff --git a/vortex-vector/src/primitive/generic.rs b/vortex-vector/src/primitive/generic.rs new file mode 100644 index 00000000000..3ce671e1a5b --- /dev/null +++ b/vortex-vector/src/primitive/generic.rs @@ -0,0 +1,64 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use vortex_buffer::Buffer; +use vortex_dtype::{DType, NativePType, Nullability}; +use vortex_mask::Mask; + +use crate::{GenericPVectorMut, PrimitiveVector, Vector, VectorOps}; + +/// An immutable vector of generic primitive values. +pub struct GenericPVector { + pub(super) elements: Buffer, + pub(super) validity: Option, +} + +impl From> for Vector { + fn from(v: GenericPVector) -> Self { + Self::Primitive(PrimitiveVector::from(v)) + } +} + +impl VectorOps for GenericPVector { + type Mutable = GenericPVectorMut; + + fn nullability(&self) -> Nullability { + Nullability::from(self.validity.is_some()) + } + + fn dtype(&self) -> DType { + DType::Primitive(T::PTYPE, self.nullability()) + } + + fn len(&self) -> usize { + self.elements.len() + } + + /// Try to convert self into a mutable vector. + fn try_into_mut(self) -> Result, Self> { + let elements = match self.elements.try_into_mut() { + Ok(elements) => elements, + Err(elements) => { + return Err(GenericPVector { + elements, + validity: self.validity, + }); + } + }; + + let validity = match self.validity { + Some(v) => match v.try_into_mut() { + Ok(v) => Some(v), + Err(v) => { + return Err(GenericPVector { + elements: elements.freeze(), + validity: Some(v), + }); + } + }, + None => None, + }; + + Ok(GenericPVectorMut { elements, validity }) + } +} diff --git a/vortex-vector/src/primitive/generic_mut.rs b/vortex-vector/src/primitive/generic_mut.rs new file mode 100644 index 00000000000..c2cd65681b4 --- /dev/null +++ b/vortex-vector/src/primitive/generic_mut.rs @@ -0,0 +1,108 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use vortex_buffer::BufferMut; +use vortex_dtype::{DType, NativePType, Nullability}; +use vortex_mask::MaskMut; + +use crate::{GenericPVector, PrimitiveVectorMut, VectorMut, VectorMutOps}; + +/// A mutable vector of primitive values. +pub struct GenericPVectorMut { + pub(super) elements: BufferMut, + pub(super) validity: Option, +} + +impl GenericPVectorMut { + /// Create a new mutable primitive vector with the given capacity and nullability. + pub fn with_capacity(capacity: usize, nullability: Nullability) -> Self { + let validity = match nullability { + Nullability::NonNullable => None, + Nullability::Nullable => Some(MaskMut::with_capacity(capacity)), + }; + + Self { + elements: BufferMut::with_capacity(capacity), + validity, + } + } +} + +impl From> for VectorMut { + fn from(val: GenericPVectorMut) -> Self { + VectorMut::Primitive(PrimitiveVectorMut::from(val)) + } +} + +impl VectorMutOps for GenericPVectorMut { + type Immutable = GenericPVector; + + fn nullability(&self) -> Nullability { + Nullability::from(self.validity.is_some()) + } + + fn dtype(&self) -> DType { + DType::Primitive(T::PTYPE, self.nullability()) + } + + fn len(&self) -> usize { + self.elements.len() + } + + fn capacity(&self) -> usize { + self.elements.capacity() + } + + fn reserve(&mut self, additional: usize) { + self.elements.reserve(additional); + if let Some(v) = self.validity.as_mut() { + v.reserve(additional); + } + } + + /// Extends the vector by appending elements from another vector. + fn extend_from_vector(&mut self, other: &GenericPVector) { + self.elements.extend_from_slice(other.elements.as_slice()); + match (&mut self.validity, &other.validity) { + (Some(self_v), Some(other_v)) => self_v.append_mask(other_v), + (Some(self_v), None) => self_v.append_n(true, other.elements.len()), + (None, Some(other_v)) => { + let mut new_validity = + MaskMut::new_true(self.elements.len() - other.elements.len()); + new_validity.append_mask(other_v); + self.validity = Some(new_validity); + } + (None, None) => {} + } + } + + /// Freeze the vector into an immutable one. + fn freeze(self) -> GenericPVector { + GenericPVector { + elements: self.elements.freeze(), + validity: self.validity.map(|v| v.freeze()), + } + } + + fn split_off(&mut self, at: usize) -> Self { + GenericPVectorMut { + elements: self.elements.split_off(at), + validity: self.validity.as_mut().map(|v| v.split_off(at)), + } + } + + fn unsplit(&mut self, other: Self) { + let other_len = other.elements.len(); + self.elements.unsplit(other.elements); + match (&mut self.validity, other.validity) { + (Some(self_v), Some(other_v)) => self_v.unsplit(other_v), + (Some(self_v), None) => self_v.append_n(true, other_len), + (None, Some(other_v)) => { + let mut new_validity = MaskMut::new_true(self.elements.len() - other_len); + new_validity.unsplit(other_v); + self.validity = Some(new_validity); + } + (None, None) => {} + } + } +} diff --git a/vortex-vector/src/primitive/macros.rs b/vortex-vector/src/primitive/macros.rs new file mode 100644 index 00000000000..8d9b5b0195b --- /dev/null +++ b/vortex-vector/src/primitive/macros.rs @@ -0,0 +1,238 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +/// TODO(connor): Write docs. +#[macro_export] +macro_rules! match_each_pvector { + ($self:expr, | $vec:ident | $body:block) => {{ + match $self { + PrimitiveVector::U8(v) => { + let $vec = v; + $body + } + PrimitiveVector::U16(v) => { + let $vec = v; + $body + } + PrimitiveVector::U32(v) => { + let $vec = v; + $body + } + PrimitiveVector::U64(v) => { + let $vec = v; + $body + } + PrimitiveVector::I8(v) => { + let $vec = v; + $body + } + PrimitiveVector::I16(v) => { + let $vec = v; + $body + } + PrimitiveVector::I32(v) => { + let $vec = v; + $body + } + PrimitiveVector::I64(v) => { + let $vec = v; + $body + } + PrimitiveVector::F16(v) => { + let $vec = v; + $body + } + PrimitiveVector::F32(v) => { + let $vec = v; + $body + } + PrimitiveVector::F64(v) => { + let $vec = v; + $body + } + } + }}; +} + +/// TODO(connor): Write docs. +#[macro_export] +macro_rules! match_each_pvector_mut { + ($self:expr, | $vec:ident | $body:block) => {{ + match $self { + PrimitiveVectorMut::U8(v) => { + let $vec = v; + $body + } + PrimitiveVectorMut::U16(v) => { + let $vec = v; + $body + } + PrimitiveVectorMut::U32(v) => { + let $vec = v; + $body + } + PrimitiveVectorMut::U64(v) => { + let $vec = v; + $body + } + PrimitiveVectorMut::I8(v) => { + let $vec = v; + $body + } + PrimitiveVectorMut::I16(v) => { + let $vec = v; + $body + } + PrimitiveVectorMut::I32(v) => { + let $vec = v; + $body + } + PrimitiveVectorMut::I64(v) => { + let $vec = v; + $body + } + PrimitiveVectorMut::F16(v) => { + let $vec = v; + $body + } + PrimitiveVectorMut::F32(v) => { + let $vec = v; + $body + } + PrimitiveVectorMut::F64(v) => { + let $vec = v; + $body + } + } + }}; +} + +/// TODO(connor): Write docs. +#[macro_export] +macro_rules! match_each_pvector_mut_pair { + ($self:expr, $other:expr, | $vec:ident, $vec_other:ident | $body:block) => {{ + match ($self, $other) { + (PrimitiveVectorMut::U8(a), PrimitiveVectorMut::U8(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + (PrimitiveVectorMut::U16(a), PrimitiveVectorMut::U16(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + (PrimitiveVectorMut::U32(a), PrimitiveVectorMut::U32(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + (PrimitiveVectorMut::U64(a), PrimitiveVectorMut::U64(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + (PrimitiveVectorMut::I8(a), PrimitiveVectorMut::I8(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + (PrimitiveVectorMut::I16(a), PrimitiveVectorMut::I16(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + (PrimitiveVectorMut::I32(a), PrimitiveVectorMut::I32(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + (PrimitiveVectorMut::I64(a), PrimitiveVectorMut::I64(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + (PrimitiveVectorMut::F16(a), PrimitiveVectorMut::F16(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + (PrimitiveVectorMut::F32(a), PrimitiveVectorMut::F32(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + (PrimitiveVectorMut::F64(a), PrimitiveVectorMut::F64(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + _ => ::vortex_error::vortex_panic!("Mismatched primitive vector types"), + } + }}; +} + +/// TODO(connor): Write docs. +#[macro_export] +macro_rules! match_each_pvector_mut_immut_pair { + ($self:expr, $other:expr, | $vec:ident, $vec_other:ident | $body:block) => {{ + match ($self, $other) { + (PrimitiveVectorMut::U8(a), PrimitiveVector::U8(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + (PrimitiveVectorMut::U16(a), PrimitiveVector::U16(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + (PrimitiveVectorMut::U32(a), PrimitiveVector::U32(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + (PrimitiveVectorMut::U64(a), PrimitiveVector::U64(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + (PrimitiveVectorMut::I8(a), PrimitiveVector::I8(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + (PrimitiveVectorMut::I16(a), PrimitiveVector::I16(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + (PrimitiveVectorMut::I32(a), PrimitiveVector::I32(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + (PrimitiveVectorMut::I64(a), PrimitiveVector::I64(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + (PrimitiveVectorMut::F16(a), PrimitiveVector::F16(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + (PrimitiveVectorMut::F32(a), PrimitiveVector::F32(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + (PrimitiveVectorMut::F64(a), PrimitiveVector::F64(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + _ => ::vortex_error::vortex_panic!("Mismatched primitive vector types"), + } + }}; +} diff --git a/vortex-vector/src/primitive/mod.rs b/vortex-vector/src/primitive/mod.rs new file mode 100644 index 00000000000..25c901aba14 --- /dev/null +++ b/vortex-vector/src/primitive/mod.rs @@ -0,0 +1,16 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +mod generic; +pub use generic::GenericPVector; + +mod generic_mut; +pub use generic_mut::GenericPVectorMut; + +mod vector; +pub use vector::PrimitiveVector; + +mod vector_mut; +pub use vector_mut::PrimitiveVectorMut; + +mod macros; diff --git a/vortex-vector/src/primitive/vector.rs b/vortex-vector/src/primitive/vector.rs new file mode 100644 index 00000000000..2d803aeaca6 --- /dev/null +++ b/vortex-vector/src/primitive/vector.rs @@ -0,0 +1,177 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use vortex_dtype::half::f16; +use vortex_dtype::{DType, NativePType, Nullability, PTypeUpcast}; + +use super::{GenericPVector, GenericPVectorMut, PrimitiveVectorMut}; +use crate::{Vector, VectorOps, match_each_pvector}; + +/// An enum over all primitive vector types. +pub enum PrimitiveVector { + /// U8 + U8(GenericPVector), + /// U16 + U16(GenericPVector), + /// U32 + U32(GenericPVector), + /// U64 + U64(GenericPVector), + /// I8 + I8(GenericPVector), + /// I16 + I16(GenericPVector), + /// I32 + I32(GenericPVector), + /// I64 + I64(GenericPVector), + /// F16 + F16(GenericPVector), + /// F32 + F32(GenericPVector), + /// F64 + F64(GenericPVector), +} + +impl From for Vector { + fn from(v: PrimitiveVector) -> Self { + Self::Primitive(v) + } +} + +impl VectorOps for PrimitiveVector { + type Mutable = PrimitiveVectorMut; + + fn nullability(&self) -> Nullability { + match_each_pvector!(self, |v| { v.nullability() }) + } + + fn dtype(&self) -> DType { + match_each_pvector!(self, |v| { v.dtype() }) + } + + fn len(&self) -> usize { + match_each_pvector!(self, |v| { v.len() }) + } + + fn try_into_mut(self) -> Result + where + Self: Sized, + { + match_each_pvector!(self, |v| { + v.try_into_mut() + .map(PrimitiveVectorMut::from) + .map_err(PrimitiveVector::from) + }) + } +} + +// From impls for PrimitiveVector variants +impl From> for PrimitiveVector { + fn from(v: GenericPVector) -> Self { + T::upcast(v) + } +} + +impl PTypeUpcast for PrimitiveVector { + type Input = GenericPVector; + + fn from_u8(input: Self::Input) -> Self { + PrimitiveVector::U8(input) + } + + fn from_u16(input: Self::Input) -> Self { + PrimitiveVector::U16(input) + } + + fn from_u32(input: Self::Input) -> Self { + PrimitiveVector::U32(input) + } + + fn from_u64(input: Self::Input) -> Self { + PrimitiveVector::U64(input) + } + + fn from_i8(input: Self::Input) -> Self { + PrimitiveVector::I8(input) + } + + fn from_i16(input: Self::Input) -> Self { + PrimitiveVector::I16(input) + } + + fn from_i32(input: Self::Input) -> Self { + PrimitiveVector::I32(input) + } + + fn from_i64(input: Self::Input) -> Self { + PrimitiveVector::I64(input) + } + + fn from_f16(input: Self::Input) -> Self { + PrimitiveVector::F16(input) + } + + fn from_f32(input: Self::Input) -> Self { + PrimitiveVector::F32(input) + } + + fn from_f64(input: Self::Input) -> Self { + PrimitiveVector::F64(input) + } +} + +// From impls for PrimitiveVectorMut variants +impl From> for PrimitiveVectorMut { + fn from(v: GenericPVectorMut) -> Self { + T::upcast(v) + } +} + +impl PTypeUpcast for PrimitiveVectorMut { + type Input = GenericPVectorMut; + + fn from_u8(input: Self::Input) -> Self { + PrimitiveVectorMut::U8(input) + } + + fn from_u16(input: Self::Input) -> Self { + PrimitiveVectorMut::U16(input) + } + + fn from_u32(input: Self::Input) -> Self { + PrimitiveVectorMut::U32(input) + } + + fn from_u64(input: Self::Input) -> Self { + PrimitiveVectorMut::U64(input) + } + + fn from_i8(input: Self::Input) -> Self { + PrimitiveVectorMut::I8(input) + } + + fn from_i16(input: Self::Input) -> Self { + PrimitiveVectorMut::I16(input) + } + + fn from_i32(input: Self::Input) -> Self { + PrimitiveVectorMut::I32(input) + } + + fn from_i64(input: Self::Input) -> Self { + PrimitiveVectorMut::I64(input) + } + + fn from_f16(input: Self::Input) -> Self { + PrimitiveVectorMut::F16(input) + } + + fn from_f32(input: Self::Input) -> Self { + PrimitiveVectorMut::F32(input) + } + + fn from_f64(input: Self::Input) -> Self { + PrimitiveVectorMut::F64(input) + } +} diff --git a/vortex-vector/src/primitive/vector_mut.rs b/vortex-vector/src/primitive/vector_mut.rs new file mode 100644 index 00000000000..c2365a22eb7 --- /dev/null +++ b/vortex-vector/src/primitive/vector_mut.rs @@ -0,0 +1,105 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use vortex_dtype::half::f16; +use vortex_dtype::{DType, Nullability, PType}; + +use crate::{ + GenericPVectorMut, PrimitiveVector, VectorMut, VectorMutOps, match_each_pvector_mut, + match_each_pvector_mut_immut_pair, match_each_pvector_mut_pair, +}; + +/// An enum over all mutable primitive vector types. +pub enum PrimitiveVectorMut { + /// U8 + U8(GenericPVectorMut), + /// U16 + U16(GenericPVectorMut), + /// U32 + U32(GenericPVectorMut), + /// U64 + U64(GenericPVectorMut), + /// I8 + I8(GenericPVectorMut), + /// I16 + I16(GenericPVectorMut), + /// I32 + I32(GenericPVectorMut), + /// I64 + I64(GenericPVectorMut), + /// F16 + F16(GenericPVectorMut), + /// F32 + F32(GenericPVectorMut), + /// F64 + F64(GenericPVectorMut), +} + +impl PrimitiveVectorMut { + /// Create a new mutable primitive vector with the given capacity, primitive type, and nullability. + pub fn with_capacity(capacity: usize, ptype: PType, nullability: Nullability) -> Self { + match ptype { + PType::U8 => GenericPVectorMut::::with_capacity(capacity, nullability).into(), + PType::U16 => GenericPVectorMut::::with_capacity(capacity, nullability).into(), + PType::U32 => GenericPVectorMut::::with_capacity(capacity, nullability).into(), + PType::U64 => GenericPVectorMut::::with_capacity(capacity, nullability).into(), + PType::I8 => GenericPVectorMut::::with_capacity(capacity, nullability).into(), + PType::I16 => GenericPVectorMut::::with_capacity(capacity, nullability).into(), + PType::I32 => GenericPVectorMut::::with_capacity(capacity, nullability).into(), + PType::I64 => GenericPVectorMut::::with_capacity(capacity, nullability).into(), + PType::F16 => GenericPVectorMut::::with_capacity(capacity, nullability).into(), + PType::F32 => GenericPVectorMut::::with_capacity(capacity, nullability).into(), + PType::F64 => GenericPVectorMut::::with_capacity(capacity, nullability).into(), + } + } +} + +impl From for VectorMut { + fn from(v: PrimitiveVectorMut) -> Self { + Self::Primitive(v) + } +} + +impl VectorMutOps for PrimitiveVectorMut { + type Immutable = PrimitiveVector; + + fn nullability(&self) -> Nullability { + match_each_pvector_mut!(self, |v| { v.nullability() }) + } + + fn dtype(&self) -> DType { + match_each_pvector_mut!(self, |v| { v.dtype() }) + } + + fn len(&self) -> usize { + match_each_pvector_mut!(self, |v| { v.len() }) + } + + fn capacity(&self) -> usize { + match_each_pvector_mut!(self, |v| { v.capacity() }) + } + + fn reserve(&mut self, additional: usize) { + match_each_pvector_mut!(self, |v| { v.reserve(additional) }) + } + + fn extend_from_vector(&mut self, other: &Self::Immutable) { + match_each_pvector_mut_immut_pair!(self, other, |a, b| { + a.extend_from_vector(b); + }); + } + + fn freeze(self) -> Self::Immutable { + match_each_pvector_mut!(self, |v| { v.freeze().into() }) + } + + fn split_off(&mut self, at: usize) -> Self { + match_each_pvector_mut!(self, |v| { v.split_off(at).into() }) + } + + fn unsplit(&mut self, other: Self) { + match_each_pvector_mut_pair!(self, other, |a, b| { + a.unsplit(b); + }); + } +} diff --git a/vortex-vector/src/primitive_mut.rs b/vortex-vector/src/primitive_mut.rs deleted file mode 100644 index 14ef69aeada..00000000000 --- a/vortex-vector/src/primitive_mut.rs +++ /dev/null @@ -1,82 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 -// SPDX-FileCopyrightText: Copyright the Vortex contributors - -use vortex_buffer::BufferMut; -use vortex_dtype::{DType, NativePType, Nullability}; -use vortex_mask::MaskMut; - -use crate::ops::VectorMutOps; -use crate::{PVectorMut, PrimitiveVector, VectorMut}; - -/// A mutable vector of primitive values. -pub struct PrimitiveVectorMut { - pub(super) dtype: DType, - pub(super) elements: BufferMut, - pub(super) validity: MaskMut, -} - -impl PrimitiveVectorMut { - /// Create a new mutable primitive vector with the given capacity and nullability. - pub fn with_capacity(capacity: usize, nullability: Nullability) -> Self { - Self { - dtype: DType::Primitive(T::PTYPE, nullability), - elements: BufferMut::with_capacity(capacity), - validity: MaskMut::with_capacity(capacity), - } - } -} - -impl From> for VectorMut { - fn from(val: PrimitiveVectorMut) -> Self { - VectorMut::Primitive(PVectorMut::from(val)) - } -} - -impl VectorMutOps for PrimitiveVectorMut { - type Immutable = PrimitiveVector; - - fn len(&self) -> usize { - self.elements.len() - } - - fn dtype(&self) -> &DType { - &self.dtype - } - - fn capacity(&self) -> usize { - self.elements.capacity() - } - - fn reserve(&mut self, additional: usize) { - self.elements.reserve(additional); - self.validity.reserve(additional); - } - - fn split_off(&mut self, at: usize) -> Self { - PrimitiveVectorMut { - dtype: self.dtype.clone(), - elements: self.elements.split_off(at), - validity: self.validity.split_off(at), - } - } - - fn unsplit(&mut self, other: Self) { - self.elements.unsplit(other.elements); - self.validity.unsplit(other.validity); - } - - /// Extends the vector by appending elements from another vector. - fn extend_from_vector(&mut self, other: &PrimitiveVector) { - self.elements.extend_from_slice(other.elements.as_slice()); - self.validity.append_mask(&other.validity); - } - - /// Freeze the vector into an immutable one. - fn freeze(self) -> PrimitiveVector { - PrimitiveVector { - dtype: self.dtype, - elements: self.elements.freeze(), - validity: self.validity.freeze(), - } - } -} diff --git a/vortex-vector/src/private.rs b/vortex-vector/src/private.rs new file mode 100644 index 00000000000..b9aabca330a --- /dev/null +++ b/vortex-vector/src/private.rs @@ -0,0 +1,27 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! This private module contains the [`Sealed`] implementations for different [`Vector`] types. This +//! allows us to seal our [`VectorOps`] and [`VectorMutOps`] traits. +//! +//! Sealing these traits prevents external crates from implementing them while still allowing public +//! usage, which gives us the freedom to add new trait methods in the future without breaking +//! backward compatibility. + +use crate::*; + +pub trait Sealed {} + +impl Sealed for Vector {} +impl Sealed for VectorMut {} + +impl Sealed for NullVector {} +impl Sealed for NullVectorMut {} + +impl Sealed for BoolVector {} +impl Sealed for BoolVectorMut {} + +impl Sealed for PrimitiveVector {} +impl Sealed for PrimitiveVectorMut {} +impl Sealed for GenericPVector {} +impl Sealed for GenericPVectorMut {} diff --git a/vortex-vector/src/pvector.rs b/vortex-vector/src/pvector.rs deleted file mode 100644 index ca21d86e02c..00000000000 --- a/vortex-vector/src/pvector.rs +++ /dev/null @@ -1,493 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 -// SPDX-FileCopyrightText: Copyright the Vortex contributors - -use vortex_dtype::half::f16; -use vortex_dtype::{DType, NativePType, Nullability, PType, PTypeUpcast}; -use vortex_error::vortex_panic; - -use crate::ops::{VectorMutOps, VectorOps}; -use crate::{PrimitiveVector, PrimitiveVectorMut, Vector}; - -/// An enum over all primitive vector types. -pub enum PVector { - /// U8 - U8(PrimitiveVector), - /// U16 - U16(PrimitiveVector), - /// U32 - U32(PrimitiveVector), - /// U64 - U64(PrimitiveVector), - /// I8 - I8(PrimitiveVector), - /// I16 - I16(PrimitiveVector), - /// I32 - I32(PrimitiveVector), - /// I64 - I64(PrimitiveVector), - /// F16 - F16(PrimitiveVector), - /// F32 - F32(PrimitiveVector), - /// F64 - F64(PrimitiveVector), -} - -/// An enum over all mutable primitive vector types. -pub enum PVectorMut { - /// U8 - U8(PrimitiveVectorMut), - /// U16 - U16(PrimitiveVectorMut), - /// U32 - U32(PrimitiveVectorMut), - /// U64 - U64(PrimitiveVectorMut), - /// I8 - I8(PrimitiveVectorMut), - /// I16 - I16(PrimitiveVectorMut), - /// I32 - I32(PrimitiveVectorMut), - /// I64 - I64(PrimitiveVectorMut), - /// F16 - F16(PrimitiveVectorMut), - /// F32 - F32(PrimitiveVectorMut), - /// F64 - F64(PrimitiveVectorMut), -} - -impl PVectorMut { - /// Create a new mutable primitive vector with the given capacity, primitive type, and nullability. - pub fn with_capacity(capacity: usize, ptype: PType, nullability: Nullability) -> Self { - match ptype { - PType::U8 => PrimitiveVectorMut::::with_capacity(capacity, nullability).into(), - PType::U16 => PrimitiveVectorMut::::with_capacity(capacity, nullability).into(), - PType::U32 => PrimitiveVectorMut::::with_capacity(capacity, nullability).into(), - PType::U64 => PrimitiveVectorMut::::with_capacity(capacity, nullability).into(), - PType::I8 => PrimitiveVectorMut::::with_capacity(capacity, nullability).into(), - PType::I16 => PrimitiveVectorMut::::with_capacity(capacity, nullability).into(), - PType::I32 => PrimitiveVectorMut::::with_capacity(capacity, nullability).into(), - PType::I64 => PrimitiveVectorMut::::with_capacity(capacity, nullability).into(), - PType::F16 => PrimitiveVectorMut::::with_capacity(capacity, nullability).into(), - PType::F32 => PrimitiveVectorMut::::with_capacity(capacity, nullability).into(), - PType::F64 => PrimitiveVectorMut::::with_capacity(capacity, nullability).into(), - } - } -} - -macro_rules! match_each_pvector { - ($self:expr, | $vec:ident | $body:block) => {{ - match $self { - PVector::U8(v) => { - let $vec = v; - $body - } - PVector::U16(v) => { - let $vec = v; - $body - } - PVector::U32(v) => { - let $vec = v; - $body - } - PVector::U64(v) => { - let $vec = v; - $body - } - PVector::I8(v) => { - let $vec = v; - $body - } - PVector::I16(v) => { - let $vec = v; - $body - } - PVector::I32(v) => { - let $vec = v; - $body - } - PVector::I64(v) => { - let $vec = v; - $body - } - PVector::F16(v) => { - let $vec = v; - $body - } - PVector::F32(v) => { - let $vec = v; - $body - } - PVector::F64(v) => { - let $vec = v; - $body - } - } - }}; -} - -macro_rules! match_each_pvector_mut { - ($self:expr, | $vec:ident | $body:block) => {{ - match $self { - PVectorMut::U8(v) => { - let $vec = v; - $body - } - PVectorMut::U16(v) => { - let $vec = v; - $body - } - PVectorMut::U32(v) => { - let $vec = v; - $body - } - PVectorMut::U64(v) => { - let $vec = v; - $body - } - PVectorMut::I8(v) => { - let $vec = v; - $body - } - PVectorMut::I16(v) => { - let $vec = v; - $body - } - PVectorMut::I32(v) => { - let $vec = v; - $body - } - PVectorMut::I64(v) => { - let $vec = v; - $body - } - PVectorMut::F16(v) => { - let $vec = v; - $body - } - PVectorMut::F32(v) => { - let $vec = v; - $body - } - PVectorMut::F64(v) => { - let $vec = v; - $body - } - } - }}; -} - -macro_rules! match_each_pvector_mut_pair { - ($self:expr, $other:expr, | $vec:ident, $vec_other:ident | $body:block) => {{ - match ($self, $other) { - (PVectorMut::U8(a), PVectorMut::U8(b)) => { - let $vec = a; - let $vec_other = b; - $body - } - (PVectorMut::U16(a), PVectorMut::U16(b)) => { - let $vec = a; - let $vec_other = b; - $body - } - (PVectorMut::U32(a), PVectorMut::U32(b)) => { - let $vec = a; - let $vec_other = b; - $body - } - (PVectorMut::U64(a), PVectorMut::U64(b)) => { - let $vec = a; - let $vec_other = b; - $body - } - (PVectorMut::I8(a), PVectorMut::I8(b)) => { - let $vec = a; - let $vec_other = b; - $body - } - (PVectorMut::I16(a), PVectorMut::I16(b)) => { - let $vec = a; - let $vec_other = b; - $body - } - (PVectorMut::I32(a), PVectorMut::I32(b)) => { - let $vec = a; - let $vec_other = b; - $body - } - (PVectorMut::I64(a), PVectorMut::I64(b)) => { - let $vec = a; - let $vec_other = b; - $body - } - (PVectorMut::F16(a), PVectorMut::F16(b)) => { - let $vec = a; - let $vec_other = b; - $body - } - (PVectorMut::F32(a), PVectorMut::F32(b)) => { - let $vec = a; - let $vec_other = b; - $body - } - (PVectorMut::F64(a), PVectorMut::F64(b)) => { - let $vec = a; - let $vec_other = b; - $body - } - _ => vortex_panic!("Mismatched primitive vector types"), - } - }}; -} - -macro_rules! match_each_pvector_mut_immut_pair { - ($self:expr, $other:expr, | $vec:ident, $vec_other:ident | $body:block) => {{ - match ($self, $other) { - (PVectorMut::U8(a), PVector::U8(b)) => { - let $vec = a; - let $vec_other = b; - $body - } - (PVectorMut::U16(a), PVector::U16(b)) => { - let $vec = a; - let $vec_other = b; - $body - } - (PVectorMut::U32(a), PVector::U32(b)) => { - let $vec = a; - let $vec_other = b; - $body - } - (PVectorMut::U64(a), PVector::U64(b)) => { - let $vec = a; - let $vec_other = b; - $body - } - (PVectorMut::I8(a), PVector::I8(b)) => { - let $vec = a; - let $vec_other = b; - $body - } - (PVectorMut::I16(a), PVector::I16(b)) => { - let $vec = a; - let $vec_other = b; - $body - } - (PVectorMut::I32(a), PVector::I32(b)) => { - let $vec = a; - let $vec_other = b; - $body - } - (PVectorMut::I64(a), PVector::I64(b)) => { - let $vec = a; - let $vec_other = b; - $body - } - (PVectorMut::F16(a), PVector::F16(b)) => { - let $vec = a; - let $vec_other = b; - $body - } - (PVectorMut::F32(a), PVector::F32(b)) => { - let $vec = a; - let $vec_other = b; - $body - } - (PVectorMut::F64(a), PVector::F64(b)) => { - let $vec = a; - let $vec_other = b; - $body - } - _ => vortex_panic!("Mismatched primitive vector types"), - } - }}; -} - -impl From for Vector { - fn from(v: PVector) -> Self { - Self::Primitive(v) - } -} - -impl VectorOps for PVector { - type Mutable = PVectorMut; - - fn len(&self) -> usize { - match_each_pvector!(self, |v| { v.len() }) - } - - fn dtype(&self) -> &DType { - match_each_pvector!(self, |v| { v.dtype() }) - } - - fn try_into_mut(self) -> Result - where - Self: Sized, - { - match_each_pvector!(self, |v| { - v.try_into_mut() - .map(PVectorMut::from) - .map_err(PVector::from) - }) - } -} - -impl From for crate::VectorMut { - fn from(v: PVectorMut) -> Self { - Self::Primitive(v) - } -} - -impl VectorMutOps for PVectorMut { - type Immutable = PVector; - - fn len(&self) -> usize { - match_each_pvector_mut!(self, |v| { v.len() }) - } - - fn dtype(&self) -> &DType { - match_each_pvector_mut!(self, |v| { v.dtype() }) - } - - fn capacity(&self) -> usize { - match_each_pvector_mut!(self, |v| { v.capacity() }) - } - - fn reserve(&mut self, additional: usize) { - match_each_pvector_mut!(self, |v| { v.reserve(additional) }) - } - - fn split_off(&mut self, at: usize) -> Self { - match_each_pvector_mut!(self, |v| { v.split_off(at).into() }) - } - - fn unsplit(&mut self, other: Self) { - match_each_pvector_mut_pair!(self, other, |a, b| { - a.unsplit(b); - }); - } - - fn extend_from_vector(&mut self, other: &Self::Immutable) { - match_each_pvector_mut_immut_pair!(self, other, |a, b| { - a.extend_from_vector(b); - }); - } - - fn freeze(self) -> Self::Immutable { - match_each_pvector_mut!(self, |v| { v.freeze().into() }) - } -} - -// From impls for PVector variants -impl From> for PVector { - fn from(v: PrimitiveVector) -> Self { - T::upcast(v) - } -} - -impl PTypeUpcast for PVector { - type Input = PrimitiveVector; - - fn from_u8(input: Self::Input) -> Self { - PVector::U8(input) - } - - fn from_u16(input: Self::Input) -> Self { - PVector::U16(input) - } - - fn from_u32(input: Self::Input) -> Self { - PVector::U32(input) - } - - fn from_u64(input: Self::Input) -> Self { - PVector::U64(input) - } - - fn from_i8(input: Self::Input) -> Self { - PVector::I8(input) - } - - fn from_i16(input: Self::Input) -> Self { - PVector::I16(input) - } - - fn from_i32(input: Self::Input) -> Self { - PVector::I32(input) - } - - fn from_i64(input: Self::Input) -> Self { - PVector::I64(input) - } - - fn from_f16(input: Self::Input) -> Self { - PVector::F16(input) - } - - fn from_f32(input: Self::Input) -> Self { - PVector::F32(input) - } - - fn from_f64(input: Self::Input) -> Self { - PVector::F64(input) - } -} - -// From impls for PVectorMut variants -impl From> for PVectorMut { - fn from(v: PrimitiveVectorMut) -> Self { - T::upcast(v) - } -} - -impl PTypeUpcast for PVectorMut { - type Input = PrimitiveVectorMut; - - fn from_u8(input: Self::Input) -> Self { - PVectorMut::U8(input) - } - - fn from_u16(input: Self::Input) -> Self { - PVectorMut::U16(input) - } - - fn from_u32(input: Self::Input) -> Self { - PVectorMut::U32(input) - } - - fn from_u64(input: Self::Input) -> Self { - PVectorMut::U64(input) - } - - fn from_i8(input: Self::Input) -> Self { - PVectorMut::I8(input) - } - - fn from_i16(input: Self::Input) -> Self { - PVectorMut::I16(input) - } - - fn from_i32(input: Self::Input) -> Self { - PVectorMut::I32(input) - } - - fn from_i64(input: Self::Input) -> Self { - PVectorMut::I64(input) - } - - fn from_f16(input: Self::Input) -> Self { - PVectorMut::F16(input) - } - - fn from_f32(input: Self::Input) -> Self { - PVectorMut::F32(input) - } - - fn from_f64(input: Self::Input) -> Self { - PVectorMut::F64(input) - } -} diff --git a/vortex-vector/src/vector/macros.rs b/vortex-vector/src/vector/macros.rs new file mode 100644 index 00000000000..04e53544dc3 --- /dev/null +++ b/vortex-vector/src/vector/macros.rs @@ -0,0 +1,94 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +/// TODO(connor): Write docs. +#[macro_export] +macro_rules! match_each_vector { + ($self:expr, | $vec:ident | $body:block) => {{ + match $self { + Vector::Null(v) => { + let $vec = v; + $body + } + Vector::Bool(v) => { + let $vec = v; + $body + } + Vector::Primitive(v) => { + let $vec = v; + $body + } + } + }}; +} + +/// TODO(connor): Write docs. +#[macro_export] +macro_rules! match_each_vector_mut { + ($self:expr, | $vec:ident | $body:block) => {{ + match $self { + VectorMut::Null(v) => { + let $vec = v; + $body + } + VectorMut::Bool(v) => { + let $vec = v; + $body + } + VectorMut::Primitive(v) => { + let $vec = v; + $body + } + } + }}; +} + +/// TODO(connor): Write docs. +#[macro_export] +macro_rules! match_each_vector_mut_pair { + ($self:expr, $other:expr, | $vec:ident, $vec_other:ident | $body:block) => {{ + match ($self, $other) { + (VectorMut::Null(a), VectorMut::Null(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + (VectorMut::Bool(a), VectorMut::Bool(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + (VectorMut::Primitive(a), VectorMut::Primitive(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + _ => ::vortex_error::vortex_panic!("Mismatched vector types"), + } + }}; +} + +/// TODO(connor): Write docs. +#[macro_export] +macro_rules! match_each_vector_mut_immut_pair { + ($self:expr, $other:expr, | $vec:ident, $vec_other:ident | $body:block) => {{ + match ($self, $other) { + (VectorMut::Null(a), Vector::Null(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + (VectorMut::Bool(a), Vector::Bool(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + (VectorMut::Primitive(a), Vector::Primitive(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + _ => ::vortex_error::vortex_panic!("Mismatched vector types"), + } + }}; +} diff --git a/vortex-vector/src/vector/mod.rs b/vortex-vector/src/vector/mod.rs new file mode 100644 index 00000000000..5ad76b6b174 --- /dev/null +++ b/vortex-vector/src/vector/mod.rs @@ -0,0 +1,62 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use vortex_dtype::DType; +use vortex_error::vortex_panic; + +use crate::{ + BoolVector, BoolVectorMut, NullVector, NullVectorMut, PrimitiveVector, PrimitiveVectorMut, +}; + +mod macros; +pub(super) mod ops; + +/// An enum over all vector types. +pub enum Vector { + /// Null + Null(NullVector), + /// Bool + Bool(BoolVector), + /// Primitive + /// + /// TODO(connor): Document that this is an enum, not a struct. + Primitive(PrimitiveVector), + // Decimal + // Decimal(DecimalVector), + // String + // String(StringVector), + // Binary + // Binary(BinaryVector), + // List + // List(ListVector), + // FixedList + // FixedList(FixedListVector), + // Struct + // Struct(StructVector), + // Extension + // Extension(ExtensionVector), +} + +/// An enum over all mutable vector types. +pub enum VectorMut { + /// Null + Null(NullVectorMut), + /// Bool + Bool(BoolVectorMut), + /// Primitive + Primitive(PrimitiveVectorMut), +} + +impl VectorMut { + /// Create a new mutable vector with the given capacity and dtype. + pub fn with_capacity(capacity: usize, dtype: &DType) -> Self { + match dtype { + DType::Null => NullVectorMut::new(0).into(), + DType::Bool(n) => BoolVectorMut::with_capacity(capacity, *n).into(), + DType::Primitive(ptype, nullability) => { + PrimitiveVectorMut::with_capacity(capacity, *ptype, *nullability).into() + } + _ => vortex_panic!("Unsupported dtype for VectorMut"), + } + } +} diff --git a/vortex-vector/src/vector/ops.rs b/vortex-vector/src/vector/ops.rs new file mode 100644 index 00000000000..5585b4ffb5e --- /dev/null +++ b/vortex-vector/src/vector/ops.rs @@ -0,0 +1,174 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Definition and implementation of [`VectorOps`] and [`VectorOpsMut`] for [`Vector`] and +//! [`VectorMut`], respecitively. + +use vortex_dtype::{DType, Nullability}; + +use crate::{ + Vector, VectorMut, match_each_vector, match_each_vector_mut, match_each_vector_mut_immut_pair, + match_each_vector_mut_pair, private, +}; + +/// Common operations for immutable vectors. +pub trait VectorOps: private::Sealed + Into { + /// The mutable equivalent of this immutable vector. + type Mutable: VectorMutOps; + + /// Returns the [`Nullability`] of the vector. + fn nullability(&self) -> Nullability; + + /// Returns the [`DType`] (or data type) of the vector. + fn dtype(&self) -> DType; + + /// Returns the number of elements in the vector, also referred to as its "length". + fn len(&self) -> usize; + + /// Returns `true` if the vector contains no elements. + fn is_empty(&self) -> bool { + self.len() == 0 + } + + /// Try to convert `self` into a mutable vector (implementing [`VectorMutOps`]). + /// + /// If `self` is the only unique strong reference (it effectively "owns" the buffer), this + /// method will succeed and return a mutable vector with the contents of `self` **without** + /// copying. + /// + /// If `self` is not unique, this will fail and return `self` back. + fn try_into_mut(self) -> Result + where + Self: Sized; +} + +impl VectorOps for Vector { + type Mutable = VectorMut; + + fn nullability(&self) -> Nullability { + match_each_vector!(self, |v| { v.nullability() }) + } + + fn dtype(&self) -> DType { + match_each_vector!(self, |v| { v.dtype() }) + } + + fn len(&self) -> usize { + match_each_vector!(self, |v| { v.len() }) + } + + fn try_into_mut(self) -> Result + where + Self: Sized, + { + match_each_vector!(self, |v| { + v.try_into_mut().map(VectorMut::from).map_err(Vector::from) + }) + } +} + +/// Common operations for mutable vectors. +pub trait VectorMutOps: private::Sealed + Into { + /// The immutable equivalent of this mutable vector. + type Immutable: VectorOps; + + /// Returns the [`Nullability`] of the vector. + fn nullability(&self) -> Nullability; + + /// Returns the [`DType`] (or data type) of the vector. + fn dtype(&self) -> DType; + + /// Returns the number of elements in the vector, also referred to as its "length". + fn len(&self) -> usize; + + /// Returns `true` if the vector contains no elements. + fn is_empty(&self) -> bool { + self.len() == 0 + } + + /// Returns the total number of elements the vector can hold without reallocating. + fn capacity(&self) -> usize; + + /// Reserves capacity for at least `additional` more elements to be inserted in the given + /// vector. + /// + /// The collection may reserve more space to speculatively avoid frequent reallocations. After + /// calling `reserve`, the capacity will be greater than or equal to `self.len() + additional`. + /// Does nothing if capacity is already sufficient. + /// + /// Please let us know if you need `reserve_exact` functionality! + fn reserve(&mut self, additional: usize); + + /// Extends the vector by appending elements from another vector. + fn extend_from_vector(&mut self, other: &Self::Immutable); + + /// Converts `self` into an immutable vector. + fn freeze(self) -> Self::Immutable; + + /// Splits the vector into two at the given index. + /// + /// Afterward, `self` contains elements `[0, at)`, and the returned vector contains elements + /// `[at, capacity)`. It's guaranteed that the memory does not move, that is, the address of + /// `self` does not change, and the address of the returned slice is at bytes after that. + /// + /// This is an `O(1)` operation that just increases the reference count and sets a few indices. + /// + /// # Panics + /// + /// Panics if we try to split off more than the current capacity of the vector (if + /// `at > capacity`). + fn split_off(&mut self, at: usize) -> Self; + + /// Absorbs a mutable vector that was previously split off. + /// + /// If the two vectors were previously contiguous and not mutated in a way that causes + /// re-allocation i.e., if other was created by calling split_off on this vector, then this is + /// an O(1) operation that just decreases a reference count and sets a few indices. + /// + /// Otherwise, this method degenerates to `self.extend_from_vector(other.as_ref())`. + fn unsplit(&mut self, other: Self); +} + +impl VectorMutOps for VectorMut { + type Immutable = Vector; + + fn nullability(&self) -> Nullability { + match_each_vector_mut!(self, |v| { v.nullability() }) + } + + fn dtype(&self) -> DType { + match_each_vector_mut!(self, |v| { v.dtype() }) + } + + fn len(&self) -> usize { + match_each_vector_mut!(self, |v| { v.len() }) + } + + fn capacity(&self) -> usize { + match_each_vector_mut!(self, |v| { v.capacity() }) + } + + fn reserve(&mut self, additional: usize) { + match_each_vector_mut!(self, |v| { v.reserve(additional) }) + } + + fn extend_from_vector(&mut self, other: &Self::Immutable) { + match_each_vector_mut_immut_pair!(self, other, |a, b| { + a.extend_from_vector(b); + }); + } + + fn freeze(self) -> Self::Immutable { + match_each_vector_mut!(self, |v| { v.freeze().into() }) + } + + fn split_off(&mut self, at: usize) -> Self { + match_each_vector_mut!(self, |v| { v.split_off(at).into() }) + } + + fn unsplit(&mut self, other: Self) { + match_each_vector_mut_pair!(self, other, |a, b| { + a.unsplit(b); + }); + } +} From 87543f73640008e5fdcd7e3076a123476b47a865 Mon Sep 17 00:00:00 2001 From: Connor Tsui Date: Mon, 20 Oct 2025 12:06:58 -0400 Subject: [PATCH 03/13] add documentation Signed-off-by: Connor Tsui --- vortex-buffer/src/bit/buf.rs | 2 +- vortex-buffer/src/bit/buf_mut.rs | 22 ++++++ vortex-mask/src/lib.rs | 2 +- vortex-mask/src/mask_mut.rs | 2 + vortex-vector/src/bool/mod.rs | 2 + vortex-vector/src/bool/vector.rs | 14 ++++ vortex-vector/src/bool/vector_mut.rs | 22 +++++- vortex-vector/src/lib.rs | 17 +++-- vortex-vector/src/null/mod.rs | 2 + vortex-vector/src/null/vector.rs | 10 ++- vortex-vector/src/null/vector_mut.rs | 12 +++- vortex-vector/src/primitive/generic.rs | 17 +++-- vortex-vector/src/primitive/generic_mut.rs | 19 ++--- vortex-vector/src/primitive/mod.rs | 49 +++++++++++++ vortex-vector/src/primitive/vector.rs | 80 ++++------------------ vortex-vector/src/primitive/vector_mut.rs | 78 ++++++++++++++++++--- vortex-vector/src/private.rs | 1 + vortex-vector/src/vector/macros.rs | 2 + vortex-vector/src/vector/mod.rs | 34 +++++++-- vortex-vector/src/vector/ops.rs | 30 ++++---- 20 files changed, 300 insertions(+), 117 deletions(-) diff --git a/vortex-buffer/src/bit/buf.rs b/vortex-buffer/src/bit/buf.rs index e481f10c718..be431a4384a 100644 --- a/vortex-buffer/src/bit/buf.rs +++ b/vortex-buffer/src/bit/buf.rs @@ -11,7 +11,7 @@ use crate::bit::{ use crate::{Alignment, BitBufferMut, Buffer, BufferMut, ByteBuffer, buffer}; /// An immutable bitset stored as a packed byte buffer. -#[derive(Clone, Debug, Eq)] +#[derive(Debug, Clone, Eq)] pub struct BitBuffer { buffer: ByteBuffer, len: usize, diff --git a/vortex-buffer/src/bit/buf_mut.rs b/vortex-buffer/src/bit/buf_mut.rs index 9f83f65aa9b..d9fc075cfe2 100644 --- a/vortex-buffer/src/bit/buf_mut.rs +++ b/vortex-buffer/src/bit/buf_mut.rs @@ -1,6 +1,7 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors +use arrow_buffer::bit_chunk_iterator::BitChunks; use bitvec::view::BitView; use crate::bit::{get_bit_unchecked, set_bit_unchecked, unset_bit_unchecked}; @@ -25,12 +26,26 @@ use crate::{BitBuffer, BufferMut, ByteBufferMut, buffer_mut}; /// ``` /// /// See also: [`BitBuffer`]. +#[derive(Debug, Clone, Eq)] pub struct BitBufferMut { buffer: ByteBufferMut, offset: usize, len: usize, } +impl PartialEq for BitBufferMut { + fn eq(&self, other: &Self) -> bool { + if self.len != other.len { + return false; + } + + self.chunks() + .iter() + .zip(other.chunks()) + .all(|(a, b)| a == b) + } +} + impl BitBufferMut { /// Create new bit buffer from given byte buffer and logical bit length pub fn from_buffer(buffer: ByteBufferMut, offset: usize, len: usize) -> Self { @@ -118,6 +133,13 @@ impl BitBufferMut { unsafe { get_bit_unchecked(self.buffer.as_ptr(), self.offset + index) } } + /// Access chunks of the underlying buffer as 8 byte chunks with a final trailer + /// + /// If you're performing operations on a single buffer, prefer [BitBuffer::unaligned_chunks] + pub fn chunks(&self) -> BitChunks<'_> { + BitChunks::new(self.buffer.as_slice(), self.offset, self.len) + } + /// Get the bit capacity of the buffer. #[inline(always)] pub fn capacity(&self) -> usize { diff --git a/vortex-mask/src/lib.rs b/vortex-mask/src/lib.rs index d68f8d2f8bc..d30e1b2948b 100644 --- a/vortex-mask/src/lib.rs +++ b/vortex-mask/src/lib.rs @@ -97,7 +97,7 @@ impl Eq for AllOr where T: Eq {} /// /// A [`Mask`] can be constructed from various representations, and converted to various /// others. Internally, these are cached. -#[derive(Clone, Debug)] +#[derive(Debug, Clone)] pub enum Mask { /// All values are included. AllTrue(usize), diff --git a/vortex-mask/src/mask_mut.rs b/vortex-mask/src/mask_mut.rs index a6b28b0de37..d72403f54a2 100644 --- a/vortex-mask/src/mask_mut.rs +++ b/vortex-mask/src/mask_mut.rs @@ -8,8 +8,10 @@ use vortex_buffer::{BitBuffer, BitBufferMut}; use crate::Mask; /// A mutable mask, used for lazily allocating the bit buffer as required. +#[derive(Debug, Clone)] pub struct MaskMut(Inner); +#[derive(Debug, Clone)] enum Inner { /// Initially, the mask is empty but may have some capacity. Empty { capacity: usize }, diff --git a/vortex-vector/src/bool/mod.rs b/vortex-vector/src/bool/mod.rs index 8541fc43397..b5e5abddd1c 100644 --- a/vortex-vector/src/bool/mod.rs +++ b/vortex-vector/src/bool/mod.rs @@ -1,6 +1,8 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors +//! Definition and implementation of [`BoolVector`] and [`BoolVectorMut`]. + mod vector; pub use vector::BoolVector; diff --git a/vortex-vector/src/bool/vector.rs b/vortex-vector/src/bool/vector.rs index b63e05d993b..54d31cc5572 100644 --- a/vortex-vector/src/bool/vector.rs +++ b/vortex-vector/src/bool/vector.rs @@ -1,6 +1,8 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors +//! Definition and implementation of [`BoolVector`]. + use vortex_buffer::BitBuffer; use vortex_dtype::{DType, Nullability}; use vortex_mask::Mask; @@ -9,6 +11,12 @@ use super::BoolVectorMut; use crate::VectorOps; /// An immutable vector of boolean values. +/// +/// Internally, the boolean values are stored as the bits of a [`BitBuffer`] plus an optional +/// [`Mask`] for null booleans. +/// +/// The mutable equivalent of this type is [`BoolVectorMut`]. +#[derive(Debug, Clone)] pub struct BoolVector { pub(super) bits: BitBuffer, pub(super) validity: Option, @@ -26,6 +34,12 @@ impl VectorOps for BoolVector { } fn len(&self) -> usize { + debug_assert!( + self.validity + .as_ref() + .is_none_or(|mask| mask.len() == self.bits.len()) + ); + self.bits.len() } diff --git a/vortex-vector/src/bool/vector_mut.rs b/vortex-vector/src/bool/vector_mut.rs index 43231f3dc40..7fc539b1d15 100644 --- a/vortex-vector/src/bool/vector_mut.rs +++ b/vortex-vector/src/bool/vector_mut.rs @@ -1,6 +1,8 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors +//! Definition and implementation of [`BoolVectorMut`]. + use vortex_buffer::BitBufferMut; use vortex_dtype::{DType, Nullability}; use vortex_mask::MaskMut; @@ -9,13 +11,19 @@ use super::BoolVector; use crate::VectorMutOps; /// A mutable vector of boolean values. +/// +/// Internally, the boolean values are stored as the bits of a [`BitBufferMut`] plus an optional +/// [`MaskMut`] for null booleans. +/// +/// The immutable equivalent of this type is [`BoolVector`]. +#[derive(Debug, Clone)] pub struct BoolVectorMut { pub(super) bits: BitBufferMut, pub(super) validity: Option, } impl BoolVectorMut { - /// Create a new mutable boolean vector with the given capacity and nullability. + /// Creates a new mutable boolean vector with the given `capacity` and `nullability`. pub fn with_capacity(capacity: usize, nullability: Nullability) -> Self { let validity = match nullability { Nullability::NonNullable => None, @@ -41,6 +49,12 @@ impl VectorMutOps for BoolVectorMut { } fn len(&self) -> usize { + debug_assert!( + self.validity + .as_ref() + .is_none_or(|mask| mask.len() == self.bits.len()) + ); + self.bits.len() } @@ -50,6 +64,7 @@ impl VectorMutOps for BoolVectorMut { fn reserve(&mut self, additional: usize) { self.bits.reserve(additional); + if let Some(v) = self.validity.as_mut() { v.reserve(additional); } @@ -57,6 +72,8 @@ impl VectorMutOps for BoolVectorMut { fn extend_from_vector(&mut self, other: &BoolVector) { self.bits.append_buffer(&other.bits); + + // TODO(connor): We must `other`'s nullability in relation to `self`. match (&mut self.validity, &other.validity) { (Some(self_v), Some(other_v)) => self_v.append_mask(other_v), (Some(self_v), None) => self_v.append_n(true, other.bits.len()), @@ -84,8 +101,11 @@ impl VectorMutOps for BoolVectorMut { } fn unsplit(&mut self, other: Self) { + // TODO(connor): We must check `other`'s nullability in relation to `self`. + let other_len = other.bits.len(); self.bits.unsplit(other.bits); + match (&mut self.validity, other.validity) { (Some(self_v), Some(other_v)) => self_v.unsplit(other_v), (Some(self_v), None) => self_v.append_n(true, other_len), diff --git a/vortex-vector/src/lib.rs b/vortex-vector/src/lib.rs index ce7ec5c6e90..86c76dfd074 100644 --- a/vortex-vector/src/lib.rs +++ b/vortex-vector/src/lib.rs @@ -3,13 +3,20 @@ //! Mutable decompressed (canonical) vectors for Vortex. //! -//! TODO docs. +//! TODO(connor) More docs. + +// TODO(connor) +// - Document everything +// - Figure out correct panic propagation +// - Figure out exact semantics of `split_off` w.r.t. length of capacity +// - Fix bugs in implementations +// - Add tests +// - Figure out error semantics on ops traits +// - Implement PartialEq and Eq for vectors +// - Add stubs for remaining vector variants +// - Potentially add `TryFrom<Vector> for Vector` or some other conversion method #![deny(missing_docs)] -// #![warn(clippy::missing_docs_in_private_items)] -// #![warn(clippy::missing_errors_doc)] -// #![warn(clippy::missing_panics_doc)] -// #![warn(clippy::missing_safety_doc)] mod vector; pub use vector::ops::{VectorMutOps, VectorOps}; diff --git a/vortex-vector/src/null/mod.rs b/vortex-vector/src/null/mod.rs index ebb6f129a36..4f9de101c94 100644 --- a/vortex-vector/src/null/mod.rs +++ b/vortex-vector/src/null/mod.rs @@ -1,6 +1,8 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors +//! Definition and implementation of [`NullVector`] and [`NullVectorMut`]. + mod vector; pub use vector::NullVector; diff --git a/vortex-vector/src/null/vector.rs b/vortex-vector/src/null/vector.rs index a481ad442b5..8782cc3c6a8 100644 --- a/vortex-vector/src/null/vector.rs +++ b/vortex-vector/src/null/vector.rs @@ -1,17 +1,25 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors +//! Definition and implementation of [`NullVector`]. + use vortex_dtype::{DType, Nullability}; use crate::{NullVectorMut, VectorOps}; /// An immutable vector of null values. +/// +/// Since a "null" value does not require any data storage, the nulls are stored internally with a +/// single `length` counter. +/// +/// The mutable equivalent of this type is [`NullVectorMut`]. +#[derive(Debug, Clone, Copy)] pub struct NullVector { pub(super) len: usize, } impl NullVector { - /// Creates a new `NullVector` with the given length. + /// Creates a new immutable vector of nulls with the given length. pub fn new(len: usize) -> Self { Self { len } } diff --git a/vortex-vector/src/null/vector_mut.rs b/vortex-vector/src/null/vector_mut.rs index 720dae2c9b5..46a55da7f93 100644 --- a/vortex-vector/src/null/vector_mut.rs +++ b/vortex-vector/src/null/vector_mut.rs @@ -1,18 +1,26 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors +//! Definition and implementation of [`NullVectorMut`]. + use vortex_dtype::{DType, Nullability}; use super::NullVector; use crate::VectorMutOps; /// A mutable vector of null values. +/// +/// Since a "null" value does not require any data storage, the nulls are stored internally with a +/// single `length` counter. +/// +/// The immutable equivalent of this type is [`NullVector`]. +#[derive(Debug, Clone, Copy)] pub struct NullVectorMut { pub(super) len: usize, } impl NullVectorMut { - /// Creates a new `NullVectorMut` with the given length. + /// Creates a new mutable vector of nulls with the given length. pub fn new(len: usize) -> Self { Self { len } } @@ -50,7 +58,6 @@ impl VectorMutOps for NullVectorMut { } fn split_off(&mut self, at: usize) -> Self { - // TODO(connor): This is wrong (https://docs.rs/bytes/latest/src/bytes/bytes_mut.rs.html#320-335) assert!( at <= self.capacity(), "split_off out of bounds: {:?} <= {:?}", @@ -58,6 +65,7 @@ impl VectorMutOps for NullVectorMut { self.capacity(), ); + // TODO(connor): This is wrong (https://docs.rs/bytes/latest/src/bytes/bytes_mut.rs.html#320-335) let new_len = self.len - at; self.len = at; NullVectorMut { len: new_len } diff --git a/vortex-vector/src/primitive/generic.rs b/vortex-vector/src/primitive/generic.rs index 3ce671e1a5b..7ed4048de28 100644 --- a/vortex-vector/src/primitive/generic.rs +++ b/vortex-vector/src/primitive/generic.rs @@ -1,24 +1,27 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors +//! Definition and implementation of [`GenericPVector`]. + use vortex_buffer::Buffer; use vortex_dtype::{DType, NativePType, Nullability}; use vortex_mask::Mask; -use crate::{GenericPVectorMut, PrimitiveVector, Vector, VectorOps}; +use crate::{GenericPVectorMut, VectorOps}; /// An immutable vector of generic primitive values. +/// +/// `T` is expected to be bound by [`NativePType`], which templates an internal [`Buffer`] that +/// stores the elements of the vector. Additionally, an optional [`Mask`] is stored to track null +/// primitive elements. +/// +/// The mutable equivalent of this type is [`GenericPVectorMut`]. +#[derive(Debug, Clone)] pub struct GenericPVector { pub(super) elements: Buffer, pub(super) validity: Option, } -impl From> for Vector { - fn from(v: GenericPVector) -> Self { - Self::Primitive(PrimitiveVector::from(v)) - } -} - impl VectorOps for GenericPVector { type Mutable = GenericPVectorMut; diff --git a/vortex-vector/src/primitive/generic_mut.rs b/vortex-vector/src/primitive/generic_mut.rs index c2cd65681b4..314f2819263 100644 --- a/vortex-vector/src/primitive/generic_mut.rs +++ b/vortex-vector/src/primitive/generic_mut.rs @@ -1,13 +1,22 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors +//! Definition and implementation of [`GenericPVectorMut`]. + use vortex_buffer::BufferMut; use vortex_dtype::{DType, NativePType, Nullability}; use vortex_mask::MaskMut; -use crate::{GenericPVector, PrimitiveVectorMut, VectorMut, VectorMutOps}; +use crate::{GenericPVector, VectorMutOps}; -/// A mutable vector of primitive values. +/// A mutable vector of generic primitive values. +/// +/// `T` is expected to be bound by [`NativePType`], which templates an internal [`BufferMut`] +/// that stores the elements of the vector. Additionally, an optional [`MaskMut`] is stored to track +/// null primitive elements. +/// +/// The immutable equivalent of this type is [`GenericPVector`]. +#[derive(Debug, Clone)] pub struct GenericPVectorMut { pub(super) elements: BufferMut, pub(super) validity: Option, @@ -28,12 +37,6 @@ impl GenericPVectorMut { } } -impl From> for VectorMut { - fn from(val: GenericPVectorMut) -> Self { - VectorMut::Primitive(PrimitiveVectorMut::from(val)) - } -} - impl VectorMutOps for GenericPVectorMut { type Immutable = GenericPVector; diff --git a/vortex-vector/src/primitive/mod.rs b/vortex-vector/src/primitive/mod.rs index 25c901aba14..a317e3df7b2 100644 --- a/vortex-vector/src/primitive/mod.rs +++ b/vortex-vector/src/primitive/mod.rs @@ -1,6 +1,19 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors +//! Definitions and implementations of native primitive vector types. +//! +//! The types that hold data are [`GenericPVector`] and [`GenericPVectorMut`], which are generic +//! over types `T` that implement [`NativePType`] (which are just the integer and floating-point +//! types that are native to Rust plus [`f16`]). +//! +//! [`PrimitiveVector`] and [`PrimitiveVectorMut`] are enums that wrap all of the different possible +//! [`GenericPVector`]s. There are several macros defined in this crate to make working with these +//! primitive vector types easier. +//! +//! [`NativePType`]: vortex_dtype::NativePType +//! [`f16`]: vortex_dtype::half::f16 + mod generic; pub use generic::GenericPVector; @@ -13,4 +26,40 @@ pub use vector::PrimitiveVector; mod vector_mut; pub use vector_mut::PrimitiveVectorMut; +/// Helper macros for working with the different variants of [`PrimitiveVector`] and +/// [`PrimitiveVectorMut`]. +/// +/// All macros are exported at the crate level with `#[macro_use]`. mod macros; + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Vector Conversions +//////////////////////////////////////////////////////////////////////////////////////////////////// + +use vortex_dtype::NativePType; + +use crate::{Vector, VectorMut}; + +impl From for Vector { + fn from(v: PrimitiveVector) -> Self { + Self::Primitive(v) + } +} + +impl From> for Vector { + fn from(v: GenericPVector) -> Self { + Self::Primitive(PrimitiveVector::from(v)) + } +} + +impl From for VectorMut { + fn from(v: PrimitiveVectorMut) -> Self { + Self::Primitive(v) + } +} + +impl From> for VectorMut { + fn from(val: GenericPVectorMut) -> Self { + Self::Primitive(PrimitiveVectorMut::from(val)) + } +} diff --git a/vortex-vector/src/primitive/vector.rs b/vortex-vector/src/primitive/vector.rs index 2d803aeaca6..cbb720b4312 100644 --- a/vortex-vector/src/primitive/vector.rs +++ b/vortex-vector/src/primitive/vector.rs @@ -1,13 +1,21 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors +//! Definition and implementation of [`PrimitiveVector`]. + use vortex_dtype::half::f16; use vortex_dtype::{DType, NativePType, Nullability, PTypeUpcast}; -use super::{GenericPVector, GenericPVectorMut, PrimitiveVectorMut}; -use crate::{Vector, VectorOps, match_each_pvector}; +use super::{GenericPVector, PrimitiveVectorMut}; +use crate::{VectorOps, match_each_pvector}; -/// An enum over all primitive vector types. +/// An immutable vector of primitive values. +/// +/// `PrimitiveVector` is represented by an enum over all possible [`GenericPVector`] types (which +/// are templated by the types that implement [`NativePType`]). +/// +/// The mutable equivalent of this type is [`PrimitiveVectorMut`]. +#[derive(Debug, Clone)] pub enum PrimitiveVector { /// U8 U8(GenericPVector), @@ -33,12 +41,6 @@ pub enum PrimitiveVector { F64(GenericPVector), } -impl From for Vector { - fn from(v: PrimitiveVector) -> Self { - Self::Primitive(v) - } -} - impl VectorOps for PrimitiveVector { type Mutable = PrimitiveVectorMut; @@ -66,7 +68,10 @@ impl VectorOps for PrimitiveVector { } } -// From impls for PrimitiveVector variants +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Upcast Conversion +//////////////////////////////////////////////////////////////////////////////////////////////////// + impl From> for PrimitiveVector { fn from(v: GenericPVector) -> Self { T::upcast(v) @@ -120,58 +125,3 @@ impl PTypeUpcast for PrimitiveVector { PrimitiveVector::F64(input) } } - -// From impls for PrimitiveVectorMut variants -impl From> for PrimitiveVectorMut { - fn from(v: GenericPVectorMut) -> Self { - T::upcast(v) - } -} - -impl PTypeUpcast for PrimitiveVectorMut { - type Input = GenericPVectorMut; - - fn from_u8(input: Self::Input) -> Self { - PrimitiveVectorMut::U8(input) - } - - fn from_u16(input: Self::Input) -> Self { - PrimitiveVectorMut::U16(input) - } - - fn from_u32(input: Self::Input) -> Self { - PrimitiveVectorMut::U32(input) - } - - fn from_u64(input: Self::Input) -> Self { - PrimitiveVectorMut::U64(input) - } - - fn from_i8(input: Self::Input) -> Self { - PrimitiveVectorMut::I8(input) - } - - fn from_i16(input: Self::Input) -> Self { - PrimitiveVectorMut::I16(input) - } - - fn from_i32(input: Self::Input) -> Self { - PrimitiveVectorMut::I32(input) - } - - fn from_i64(input: Self::Input) -> Self { - PrimitiveVectorMut::I64(input) - } - - fn from_f16(input: Self::Input) -> Self { - PrimitiveVectorMut::F16(input) - } - - fn from_f32(input: Self::Input) -> Self { - PrimitiveVectorMut::F32(input) - } - - fn from_f64(input: Self::Input) -> Self { - PrimitiveVectorMut::F64(input) - } -} diff --git a/vortex-vector/src/primitive/vector_mut.rs b/vortex-vector/src/primitive/vector_mut.rs index c2365a22eb7..cf78dc67990 100644 --- a/vortex-vector/src/primitive/vector_mut.rs +++ b/vortex-vector/src/primitive/vector_mut.rs @@ -1,15 +1,23 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors +//! Definition and implementation of [`PrimitiveVectorMut`]. + use vortex_dtype::half::f16; -use vortex_dtype::{DType, Nullability, PType}; +use vortex_dtype::{DType, NativePType, Nullability, PType, PTypeUpcast}; use crate::{ - GenericPVectorMut, PrimitiveVector, VectorMut, VectorMutOps, match_each_pvector_mut, + GenericPVectorMut, PrimitiveVector, VectorMutOps, match_each_pvector_mut, match_each_pvector_mut_immut_pair, match_each_pvector_mut_pair, }; -/// An enum over all mutable primitive vector types. +/// A mutable vector of primitive values. +/// +/// `PrimitiveVector` is represented by an enum over all possible [`GenericPVectorMut`] types (which +/// are templated by the types that implement [`NativePType`]). +/// +/// The immutable equivalent of this type is [`PrimitiveVector`]. +#[derive(Debug, Clone)] pub enum PrimitiveVectorMut { /// U8 U8(GenericPVectorMut), @@ -54,12 +62,6 @@ impl PrimitiveVectorMut { } } -impl From for VectorMut { - fn from(v: PrimitiveVectorMut) -> Self { - Self::Primitive(v) - } -} - impl VectorMutOps for PrimitiveVectorMut { type Immutable = PrimitiveVector; @@ -103,3 +105,61 @@ impl VectorMutOps for PrimitiveVectorMut { }); } } + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Upcast Conversion +//////////////////////////////////////////////////////////////////////////////////////////////////// + +impl From> for PrimitiveVectorMut { + fn from(v: GenericPVectorMut) -> Self { + T::upcast(v) + } +} + +impl PTypeUpcast for PrimitiveVectorMut { + type Input = GenericPVectorMut; + + fn from_u8(input: Self::Input) -> Self { + PrimitiveVectorMut::U8(input) + } + + fn from_u16(input: Self::Input) -> Self { + PrimitiveVectorMut::U16(input) + } + + fn from_u32(input: Self::Input) -> Self { + PrimitiveVectorMut::U32(input) + } + + fn from_u64(input: Self::Input) -> Self { + PrimitiveVectorMut::U64(input) + } + + fn from_i8(input: Self::Input) -> Self { + PrimitiveVectorMut::I8(input) + } + + fn from_i16(input: Self::Input) -> Self { + PrimitiveVectorMut::I16(input) + } + + fn from_i32(input: Self::Input) -> Self { + PrimitiveVectorMut::I32(input) + } + + fn from_i64(input: Self::Input) -> Self { + PrimitiveVectorMut::I64(input) + } + + fn from_f16(input: Self::Input) -> Self { + PrimitiveVectorMut::F16(input) + } + + fn from_f32(input: Self::Input) -> Self { + PrimitiveVectorMut::F32(input) + } + + fn from_f64(input: Self::Input) -> Self { + PrimitiveVectorMut::F64(input) + } +} diff --git a/vortex-vector/src/private.rs b/vortex-vector/src/private.rs index b9aabca330a..6948a1f3c3d 100644 --- a/vortex-vector/src/private.rs +++ b/vortex-vector/src/private.rs @@ -10,6 +10,7 @@ use crate::*; +/// A private trait for sealing implementations of other traits. pub trait Sealed {} impl Sealed for Vector {} diff --git a/vortex-vector/src/vector/macros.rs b/vortex-vector/src/vector/macros.rs index 04e53544dc3..1d8f259cfcc 100644 --- a/vortex-vector/src/vector/macros.rs +++ b/vortex-vector/src/vector/macros.rs @@ -1,6 +1,8 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors +// TODO(connor): Finish implementing the rest of the macros. + /// TODO(connor): Write docs. #[macro_export] macro_rules! match_each_vector { diff --git a/vortex-vector/src/vector/mod.rs b/vortex-vector/src/vector/mod.rs index 5ad76b6b174..62d881c3a79 100644 --- a/vortex-vector/src/vector/mod.rs +++ b/vortex-vector/src/vector/mod.rs @@ -1,6 +1,9 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors +//! Definition of the [`Vector`] and [`VectorMut`] types, which represent fully decompressed +//! (canonical) array data. + use vortex_dtype::DType; use vortex_error::vortex_panic; @@ -8,10 +11,24 @@ use crate::{ BoolVector, BoolVectorMut, NullVector, NullVectorMut, PrimitiveVector, PrimitiveVectorMut, }; +/// Helper macros for working with the different variants of [`Vector`] and [`VectorMut`]. +/// +/// All macros are exported at the crate level with `#[macro_use]`. mod macros; + +/// Definition and implementation of [`VectorOps`](ops::VectorOps) and +/// [`VectorMutOps`](ops::VectorMutOps) for [`Vector`] and [`VectorMut`], respecitively. pub(super) mod ops; -/// An enum over all vector types. +/// An enum over all kinds of immutable vectors, which represent fully decompressed (canonical) +/// array data. +/// +/// Most of the behavior of `Vector` is described by the [`VectorOps`] trait. +/// +/// The mutable equivalent of this type is [`VectorMut`], which implements. +/// +/// [`VectorOps`]: crate::VectorOps +#[derive(Debug, Clone)] pub enum Vector { /// Null Null(NullVector), @@ -19,7 +36,8 @@ pub enum Vector { Bool(BoolVector), /// Primitive /// - /// TODO(connor): Document that this is an enum, not a struct. + /// TODO(connor): Document that this is an enum, not a struct (to represent all possible + /// primitive native generics). Primitive(PrimitiveVector), // Decimal // Decimal(DecimalVector), @@ -37,7 +55,15 @@ pub enum Vector { // Extension(ExtensionVector), } -/// An enum over all mutable vector types. +/// An enum over all kinds of mutable vectors, which represent fully decompressed (canonical) array +/// data. +/// +/// Most of the behavior of `VectorMut` is described by the [`VectorMutOps`] trait. +/// +/// The immutable equivalent of this type is [`Vector`]. +/// +/// [`VectorMutOps`]: crate::VectorMutOps +#[derive(Debug, Clone)] pub enum VectorMut { /// Null Null(NullVectorMut), @@ -51,7 +77,7 @@ impl VectorMut { /// Create a new mutable vector with the given capacity and dtype. pub fn with_capacity(capacity: usize, dtype: &DType) -> Self { match dtype { - DType::Null => NullVectorMut::new(0).into(), + DType::Null => NullVectorMut::new(0).into(), // `NullVector` has `usize::MAX` capacity. DType::Bool(n) => BoolVectorMut::with_capacity(capacity, *n).into(), DType::Primitive(ptype, nullability) => { PrimitiveVectorMut::with_capacity(capacity, *ptype, *nullability).into() diff --git a/vortex-vector/src/vector/ops.rs b/vortex-vector/src/vector/ops.rs index 5585b4ffb5e..e995aa52409 100644 --- a/vortex-vector/src/vector/ops.rs +++ b/vortex-vector/src/vector/ops.rs @@ -1,9 +1,6 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -//! Definition and implementation of [`VectorOps`] and [`VectorOpsMut`] for [`Vector`] and -//! [`VectorMut`], respecitively. - use vortex_dtype::{DType, Nullability}; use crate::{ @@ -11,7 +8,7 @@ use crate::{ match_each_vector_mut_pair, private, }; -/// Common operations for immutable vectors. +/// Common operations for immutable vectors (all the variants of [`Vector`]). pub trait VectorOps: private::Sealed + Into { /// The mutable equivalent of this immutable vector. type Mutable: VectorMutOps; @@ -30,13 +27,15 @@ pub trait VectorOps: private::Sealed + Into { self.len() == 0 } - /// Try to convert `self` into a mutable vector (implementing [`VectorMutOps`]). + /// Tries to convert `self` into a mutable vector (implementing [`VectorMutOps`]). + /// + /// This method will only succeed if `self` is the only unique strong reference (it effectively + /// "owns" the buffer). If this is true, this method will return a mutable vector with the + /// contents of `self` **without** any copying of data. /// - /// If `self` is the only unique strong reference (it effectively "owns" the buffer), this - /// method will succeed and return a mutable vector with the contents of `self` **without** - /// copying. + /// # Errors /// - /// If `self` is not unique, this will fail and return `self` back. + /// If `self` is not unique, this will fail and return `self` back to the caller. fn try_into_mut(self) -> Result where Self: Sized; @@ -67,7 +66,7 @@ impl VectorOps for Vector { } } -/// Common operations for mutable vectors. +/// Common operations for mutable vectors (all the variants of [`VectorMut`]). pub trait VectorMutOps: private::Sealed + Into { /// The immutable equivalent of this mutable vector. type Immutable: VectorOps; @@ -100,6 +99,9 @@ pub trait VectorMutOps: private::Sealed + Into { fn reserve(&mut self, additional: usize); /// Extends the vector by appending elements from another vector. + /// + /// TODO(connor): Document semantics of what happens if `self` is non-nullable and `other` is + /// nullable (should panic?). fn extend_from_vector(&mut self, other: &Self::Immutable); /// Converts `self` into an immutable vector. @@ -122,10 +124,12 @@ pub trait VectorMutOps: private::Sealed + Into { /// Absorbs a mutable vector that was previously split off. /// /// If the two vectors were previously contiguous and not mutated in a way that causes - /// re-allocation i.e., if other was created by calling split_off on this vector, then this is - /// an O(1) operation that just decreases a reference count and sets a few indices. + /// re-allocation i.e., if other was created by calling [`split_off()`] on this vector, then + /// this is an `O(1)` operation (simply decreases a reference count and sets a few indices). + /// + /// Otherwise, this method falls back to `self.extend_from_vector(other)`. /// - /// Otherwise, this method degenerates to `self.extend_from_vector(other.as_ref())`. + /// [`split_off()`]: Self::split_off fn unsplit(&mut self, other: Self); } From a94cabd4f7d9950f7ca42f4f8369eb6f5c1fc2cb Mon Sep 17 00:00:00 2001 From: Connor Tsui Date: Mon, 20 Oct 2025 16:15:41 -0400 Subject: [PATCH 04/13] add `is_nullable_then` Signed-off-by: Connor Tsui --- vortex-dtype/src/nullability.rs | 69 ++++++++++++++++++++-- vortex-vector/src/bool/vector_mut.rs | 5 +- vortex-vector/src/primitive/generic_mut.rs | 5 +- 3 files changed, 66 insertions(+), 13 deletions(-) diff --git a/vortex-dtype/src/nullability.rs b/vortex-dtype/src/nullability.rs index 96678ef2bd9..175701354d1 100644 --- a/vortex-dtype/src/nullability.rs +++ b/vortex-dtype/src/nullability.rs @@ -15,13 +15,72 @@ pub enum Nullability { } impl Nullability { - /// A self-describing displayed form. + /// Returns `Some(f())` if the the nullability is [`Nullable`](Self::Nullable), otherwise + /// returns `None`. /// - /// The usual Display renders [Nullability::NonNullable] as the empty string. - pub fn verbose_display(&self) -> impl Display { + /// # Examples + /// + /// ``` + /// use vortex_dtype::Nullability::*; + /// + /// assert_eq!(NonNullable.is_nullable_then(|| 0), None); + /// assert_eq!(Nullable.is_nullable_then(|| 0), Some(0)); + /// ``` + /// + /// ``` + /// # use vortex_dtype::Nullability::*; + /// # + /// let mut a = 0; + /// + /// Nullable.is_nullable_then(|| { a += 1; }); + /// NonNullable.is_nullable_then(|| { a += 1; }); + /// + /// // `a` is incremented once because the closure is evaluated lazily by `is_nullable_then`. + /// assert_eq!(a, 1); + /// ```c + /// + /// Inspired by the [`bool::then`] function. + pub fn is_nullable_then T>(self, f: F) -> Option { + match self { + Nullability::NonNullable => None, + Nullability::Nullable => Some(f()), + } + } + + /// Returns `Some(t)` if the the nullability is [`Nullable`](Self::Nullable), otherwise returns + /// `None`. + /// + /// Arguments passed to `is_nullable_then_some` are eagerly evaluated; if you are passing the + /// result of a function call, it is recommended to use [`then`](bool::then), which is lazily + /// evaluated. + /// + /// # Examples + /// + /// ``` + /// use vortex_dtype::Nullability::*; + /// + /// assert_eq!(NonNullable.is_nullable_then_some(0), None); + /// assert_eq!(Nullable.is_nullable_then_some(0), Some(0)); + /// ``` + /// + /// ``` + /// # use vortex_dtype::Nullability::*; + /// # + /// let mut a = 0; + /// let mut function_with_side_effects = || { a += 1; }; + /// + /// Nullable.is_nullable_then_some(function_with_side_effects()); + /// NonNullable.is_nullable_then_some(function_with_side_effects()); + /// + /// // `a` is incremented twice because the value passed to `then_some` is evaluated eagerly. + /// assert_eq!(a, 2); + /// ``` + /// + /// Inspired by the [`bool::then_some`] function. + pub fn is_nullable_then_some(self, t: T) -> Option { match self { - Nullability::NonNullable => "NonNullable", - Nullability::Nullable => "Nullable", + Nullability::NonNullable => None, + Nullability::Nullable => Some(t), } } } diff --git a/vortex-vector/src/bool/vector_mut.rs b/vortex-vector/src/bool/vector_mut.rs index 7fc539b1d15..8a1cca87262 100644 --- a/vortex-vector/src/bool/vector_mut.rs +++ b/vortex-vector/src/bool/vector_mut.rs @@ -25,10 +25,7 @@ pub struct BoolVectorMut { impl BoolVectorMut { /// Creates a new mutable boolean vector with the given `capacity` and `nullability`. pub fn with_capacity(capacity: usize, nullability: Nullability) -> Self { - let validity = match nullability { - Nullability::NonNullable => None, - Nullability::Nullable => Some(MaskMut::with_capacity(capacity)), - }; + let validity = nullability.is_nullable_then(|| MaskMut::with_capacity(capacity)); Self { bits: BitBufferMut::with_capacity(capacity), diff --git a/vortex-vector/src/primitive/generic_mut.rs b/vortex-vector/src/primitive/generic_mut.rs index 314f2819263..3c010fc5de6 100644 --- a/vortex-vector/src/primitive/generic_mut.rs +++ b/vortex-vector/src/primitive/generic_mut.rs @@ -25,10 +25,7 @@ pub struct GenericPVectorMut { impl GenericPVectorMut { /// Create a new mutable primitive vector with the given capacity and nullability. pub fn with_capacity(capacity: usize, nullability: Nullability) -> Self { - let validity = match nullability { - Nullability::NonNullable => None, - Nullability::Nullable => Some(MaskMut::with_capacity(capacity)), - }; + let validity = nullability.is_nullable_then(|| MaskMut::with_capacity(capacity)); Self { elements: BufferMut::with_capacity(capacity), From 39d85dae7dfd615637b685f4ffd8fcced0b9f4a6 Mon Sep 17 00:00:00 2001 From: Connor Tsui Date: Mon, 20 Oct 2025 16:30:15 -0400 Subject: [PATCH 05/13] restructure modules and files again Signed-off-by: Connor Tsui --- vortex-dtype/src/nullability.rs | 2 +- vortex-vector/src/lib.rs | 26 +++++-- vortex-vector/src/{vector => }/macros.rs | 0 vortex-vector/src/{vector => }/ops.rs | 74 +------------------- vortex-vector/src/vector.rs | 68 ++++++++++++++++++ vortex-vector/src/vector/mod.rs | 88 ------------------------ vortex-vector/src/vector_mut.rs | 86 +++++++++++++++++++++++ 7 files changed, 176 insertions(+), 168 deletions(-) rename vortex-vector/src/{vector => }/macros.rs (100%) rename vortex-vector/src/{vector => }/ops.rs (69%) create mode 100644 vortex-vector/src/vector.rs delete mode 100644 vortex-vector/src/vector/mod.rs create mode 100644 vortex-vector/src/vector_mut.rs diff --git a/vortex-dtype/src/nullability.rs b/vortex-dtype/src/nullability.rs index 175701354d1..a44821f47e0 100644 --- a/vortex-dtype/src/nullability.rs +++ b/vortex-dtype/src/nullability.rs @@ -37,7 +37,7 @@ impl Nullability { /// /// // `a` is incremented once because the closure is evaluated lazily by `is_nullable_then`. /// assert_eq!(a, 1); - /// ```c + /// ``` /// /// Inspired by the [`bool::then`] function. pub fn is_nullable_then T>(self, f: F) -> Option { diff --git a/vortex-vector/src/lib.rs b/vortex-vector/src/lib.rs index 86c76dfd074..2fa69fa9457 100644 --- a/vortex-vector/src/lib.rs +++ b/vortex-vector/src/lib.rs @@ -1,11 +1,10 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -//! Mutable decompressed (canonical) vectors for Vortex. -//! -//! TODO(connor) More docs. +//! Immutable and mutable decompressed (canonical) vectors for Vortex. +// TODO(connor): More docs -// TODO(connor) +// TODO(connor): // - Document everything // - Figure out correct panic propagation // - Figure out exact semantics of `split_off` w.r.t. length of capacity @@ -19,8 +18,18 @@ #![deny(missing_docs)] mod vector; -pub use vector::ops::{VectorMutOps, VectorOps}; -pub use vector::{Vector, VectorMut}; +pub use vector::Vector; + +mod vector_mut; +pub use vector_mut::VectorMut; + +/// Definition and implementation of [`VectorOps`] and [`VectorMutOps`] for [`Vector`] and +/// [`VectorMut`], respectively. +/// +/// [`VectorOps`]: ops::VectorOps +/// [`VectorMutOps`]: ops::VectorMutOps +mod ops; +pub use ops::{VectorMutOps, VectorOps}; mod bool; mod null; @@ -30,4 +39,9 @@ pub use bool::{BoolVector, BoolVectorMut}; pub use null::{NullVector, NullVectorMut}; pub use primitive::{GenericPVector, GenericPVectorMut, PrimitiveVector, PrimitiveVectorMut}; +/// Helper macros for working with the different variants of [`Vector`] and [`VectorMut`]. +/// +/// All macros are exported at the crate level with `#[macro_use]`. +mod macros; + mod private; diff --git a/vortex-vector/src/vector/macros.rs b/vortex-vector/src/macros.rs similarity index 100% rename from vortex-vector/src/vector/macros.rs rename to vortex-vector/src/macros.rs diff --git a/vortex-vector/src/vector/ops.rs b/vortex-vector/src/ops.rs similarity index 69% rename from vortex-vector/src/vector/ops.rs rename to vortex-vector/src/ops.rs index e995aa52409..5e240c8f6b0 100644 --- a/vortex-vector/src/vector/ops.rs +++ b/vortex-vector/src/ops.rs @@ -3,10 +3,7 @@ use vortex_dtype::{DType, Nullability}; -use crate::{ - Vector, VectorMut, match_each_vector, match_each_vector_mut, match_each_vector_mut_immut_pair, - match_each_vector_mut_pair, private, -}; +use crate::{Vector, VectorMut, private}; /// Common operations for immutable vectors (all the variants of [`Vector`]). pub trait VectorOps: private::Sealed + Into { @@ -41,31 +38,6 @@ pub trait VectorOps: private::Sealed + Into { Self: Sized; } -impl VectorOps for Vector { - type Mutable = VectorMut; - - fn nullability(&self) -> Nullability { - match_each_vector!(self, |v| { v.nullability() }) - } - - fn dtype(&self) -> DType { - match_each_vector!(self, |v| { v.dtype() }) - } - - fn len(&self) -> usize { - match_each_vector!(self, |v| { v.len() }) - } - - fn try_into_mut(self) -> Result - where - Self: Sized, - { - match_each_vector!(self, |v| { - v.try_into_mut().map(VectorMut::from).map_err(Vector::from) - }) - } -} - /// Common operations for mutable vectors (all the variants of [`VectorMut`]). pub trait VectorMutOps: private::Sealed + Into { /// The immutable equivalent of this mutable vector. @@ -132,47 +104,3 @@ pub trait VectorMutOps: private::Sealed + Into { /// [`split_off()`]: Self::split_off fn unsplit(&mut self, other: Self); } - -impl VectorMutOps for VectorMut { - type Immutable = Vector; - - fn nullability(&self) -> Nullability { - match_each_vector_mut!(self, |v| { v.nullability() }) - } - - fn dtype(&self) -> DType { - match_each_vector_mut!(self, |v| { v.dtype() }) - } - - fn len(&self) -> usize { - match_each_vector_mut!(self, |v| { v.len() }) - } - - fn capacity(&self) -> usize { - match_each_vector_mut!(self, |v| { v.capacity() }) - } - - fn reserve(&mut self, additional: usize) { - match_each_vector_mut!(self, |v| { v.reserve(additional) }) - } - - fn extend_from_vector(&mut self, other: &Self::Immutable) { - match_each_vector_mut_immut_pair!(self, other, |a, b| { - a.extend_from_vector(b); - }); - } - - fn freeze(self) -> Self::Immutable { - match_each_vector_mut!(self, |v| { v.freeze().into() }) - } - - fn split_off(&mut self, at: usize) -> Self { - match_each_vector_mut!(self, |v| { v.split_off(at).into() }) - } - - fn unsplit(&mut self, other: Self) { - match_each_vector_mut_pair!(self, other, |a, b| { - a.unsplit(b); - }); - } -} diff --git a/vortex-vector/src/vector.rs b/vortex-vector/src/vector.rs new file mode 100644 index 00000000000..2dd8b4e11c2 --- /dev/null +++ b/vortex-vector/src/vector.rs @@ -0,0 +1,68 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Definition of the [`Vector`] type, which represent immutable and fully decompressed (canonical) +//! array data. + +use vortex_dtype::{DType, Nullability}; + +use crate::{BoolVector, NullVector, PrimitiveVector, VectorMut, VectorOps, match_each_vector}; + +/// An enum over all kinds of immutable vectors, which represent fully decompressed (canonical) +/// array data. +/// +/// Most of the behavior of `Vector` is described by the [`VectorOps`] trait. +/// +/// The mutable equivalent of this type is [`VectorMut`], which implements. +/// +/// [`VectorOps`]: crate::VectorOps +#[derive(Debug, Clone)] +pub enum Vector { + /// Null + Null(NullVector), + /// Bool + Bool(BoolVector), + // TODO(connor): Document that this is an enum, not a struct (to represent all possible + // primitive native generics). + /// Primitive + Primitive(PrimitiveVector), + // Decimal + // Decimal(DecimalVector), + // String + // String(StringVector), + // Binary + // Binary(BinaryVector), + // List + // List(ListVector), + // FixedList + // FixedList(FixedListVector), + // Struct + // Struct(StructVector), + // Extension + // Extension(ExtensionVector), +} + +impl VectorOps for Vector { + type Mutable = VectorMut; + + fn nullability(&self) -> Nullability { + match_each_vector!(self, |v| { v.nullability() }) + } + + fn dtype(&self) -> DType { + match_each_vector!(self, |v| { v.dtype() }) + } + + fn len(&self) -> usize { + match_each_vector!(self, |v| { v.len() }) + } + + fn try_into_mut(self) -> Result + where + Self: Sized, + { + match_each_vector!(self, |v| { + v.try_into_mut().map(VectorMut::from).map_err(Vector::from) + }) + } +} diff --git a/vortex-vector/src/vector/mod.rs b/vortex-vector/src/vector/mod.rs deleted file mode 100644 index 62d881c3a79..00000000000 --- a/vortex-vector/src/vector/mod.rs +++ /dev/null @@ -1,88 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 -// SPDX-FileCopyrightText: Copyright the Vortex contributors - -//! Definition of the [`Vector`] and [`VectorMut`] types, which represent fully decompressed -//! (canonical) array data. - -use vortex_dtype::DType; -use vortex_error::vortex_panic; - -use crate::{ - BoolVector, BoolVectorMut, NullVector, NullVectorMut, PrimitiveVector, PrimitiveVectorMut, -}; - -/// Helper macros for working with the different variants of [`Vector`] and [`VectorMut`]. -/// -/// All macros are exported at the crate level with `#[macro_use]`. -mod macros; - -/// Definition and implementation of [`VectorOps`](ops::VectorOps) and -/// [`VectorMutOps`](ops::VectorMutOps) for [`Vector`] and [`VectorMut`], respecitively. -pub(super) mod ops; - -/// An enum over all kinds of immutable vectors, which represent fully decompressed (canonical) -/// array data. -/// -/// Most of the behavior of `Vector` is described by the [`VectorOps`] trait. -/// -/// The mutable equivalent of this type is [`VectorMut`], which implements. -/// -/// [`VectorOps`]: crate::VectorOps -#[derive(Debug, Clone)] -pub enum Vector { - /// Null - Null(NullVector), - /// Bool - Bool(BoolVector), - /// Primitive - /// - /// TODO(connor): Document that this is an enum, not a struct (to represent all possible - /// primitive native generics). - Primitive(PrimitiveVector), - // Decimal - // Decimal(DecimalVector), - // String - // String(StringVector), - // Binary - // Binary(BinaryVector), - // List - // List(ListVector), - // FixedList - // FixedList(FixedListVector), - // Struct - // Struct(StructVector), - // Extension - // Extension(ExtensionVector), -} - -/// An enum over all kinds of mutable vectors, which represent fully decompressed (canonical) array -/// data. -/// -/// Most of the behavior of `VectorMut` is described by the [`VectorMutOps`] trait. -/// -/// The immutable equivalent of this type is [`Vector`]. -/// -/// [`VectorMutOps`]: crate::VectorMutOps -#[derive(Debug, Clone)] -pub enum VectorMut { - /// Null - Null(NullVectorMut), - /// Bool - Bool(BoolVectorMut), - /// Primitive - Primitive(PrimitiveVectorMut), -} - -impl VectorMut { - /// Create a new mutable vector with the given capacity and dtype. - pub fn with_capacity(capacity: usize, dtype: &DType) -> Self { - match dtype { - DType::Null => NullVectorMut::new(0).into(), // `NullVector` has `usize::MAX` capacity. - DType::Bool(n) => BoolVectorMut::with_capacity(capacity, *n).into(), - DType::Primitive(ptype, nullability) => { - PrimitiveVectorMut::with_capacity(capacity, *ptype, *nullability).into() - } - _ => vortex_panic!("Unsupported dtype for VectorMut"), - } - } -} diff --git a/vortex-vector/src/vector_mut.rs b/vortex-vector/src/vector_mut.rs new file mode 100644 index 00000000000..fc26a533cab --- /dev/null +++ b/vortex-vector/src/vector_mut.rs @@ -0,0 +1,86 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use vortex_dtype::{DType, Nullability}; +use vortex_error::vortex_panic; + +use crate::{ + BoolVectorMut, NullVectorMut, PrimitiveVectorMut, Vector, VectorMutOps, match_each_vector_mut, + match_each_vector_mut_immut_pair, match_each_vector_mut_pair, +}; + +/// An enum over all kinds of mutable vectors, which represent fully decompressed (canonical) array +/// data. +/// +/// Most of the behavior of `VectorMut` is described by the [`VectorMutOps`] trait. +/// +/// The immutable equivalent of this type is [`Vector`]. +/// +/// [`VectorMutOps`]: crate::VectorMutOps +#[derive(Debug, Clone)] +pub enum VectorMut { + /// Null + Null(NullVectorMut), + /// Bool + Bool(BoolVectorMut), + /// Primitive + Primitive(PrimitiveVectorMut), +} + +impl VectorMut { + /// Create a new mutable vector with the given capacity and dtype. + pub fn with_capacity(capacity: usize, dtype: &DType) -> Self { + match dtype { + DType::Null => NullVectorMut::new(0).into(), // `NullVector` has `usize::MAX` capacity. + DType::Bool(n) => BoolVectorMut::with_capacity(capacity, *n).into(), + DType::Primitive(ptype, nullability) => { + PrimitiveVectorMut::with_capacity(capacity, *ptype, *nullability).into() + } + _ => vortex_panic!("Unsupported dtype for VectorMut"), + } + } +} + +impl VectorMutOps for VectorMut { + type Immutable = Vector; + + fn nullability(&self) -> Nullability { + match_each_vector_mut!(self, |v| { v.nullability() }) + } + + fn dtype(&self) -> DType { + match_each_vector_mut!(self, |v| { v.dtype() }) + } + + fn len(&self) -> usize { + match_each_vector_mut!(self, |v| { v.len() }) + } + + fn capacity(&self) -> usize { + match_each_vector_mut!(self, |v| { v.capacity() }) + } + + fn reserve(&mut self, additional: usize) { + match_each_vector_mut!(self, |v| { v.reserve(additional) }) + } + + fn extend_from_vector(&mut self, other: &Self::Immutable) { + match_each_vector_mut_immut_pair!(self, other, |a, b| { + a.extend_from_vector(b); + }); + } + + fn freeze(self) -> Self::Immutable { + match_each_vector_mut!(self, |v| { v.freeze().into() }) + } + + fn split_off(&mut self, at: usize) -> Self { + match_each_vector_mut!(self, |v| { v.split_off(at).into() }) + } + + fn unsplit(&mut self, other: Self) { + match_each_vector_mut_pair!(self, other, |a, b| { + a.unsplit(b); + }); + } +} From ba7b4136e896bee8fb804ee550807bdbbcbddb83 Mon Sep 17 00:00:00 2001 From: Connor Tsui Date: Mon, 20 Oct 2025 16:57:48 -0400 Subject: [PATCH 06/13] fix `PartialEq` impl for `BitBuffer` Signed-off-by: Connor Tsui --- vortex-buffer/src/bit/buf.rs | 51 ++++++++++++++++++++++++++++++-- vortex-buffer/src/bit/buf_mut.rs | 50 +++++++++++++++++++++++++++++-- 2 files changed, 97 insertions(+), 4 deletions(-) diff --git a/vortex-buffer/src/bit/buf.rs b/vortex-buffer/src/bit/buf.rs index be431a4384a..260a4cd6815 100644 --- a/vortex-buffer/src/bit/buf.rs +++ b/vortex-buffer/src/bit/buf.rs @@ -25,8 +25,8 @@ impl PartialEq for BitBuffer { } self.chunks() - .iter() - .zip(other.chunks()) + .iter_padded() + .zip(other.chunks().iter_padded()) .all(|(a, b)| a == b) } } @@ -450,4 +450,51 @@ mod tests { } } } + + #[test] + fn test_partial_eq_regression_under_64_bits() { + // Regression test: chunks().iter() returns empty for buffers < 64 bits, + // causing all such buffers to incorrectly compare as equal! + + // Create two 32-bit buffers with completely different content + let buf1 = BitBuffer::new_set(32); // All bits set + let buf2 = BitBuffer::new_unset(32); // All bits unset + + // Verify they have different bits + for i in 0..32 { + assert_ne!(buf1.value(i), buf2.value(i), "Bit {} should differ", i); + } + + // With the buggy implementation using iter(), these would incorrectly be equal + // because chunks().iter() returns empty for both! + assert_ne!( + buf1, buf2, + "Buffers with different bits should not be equal" + ); + } + + #[test] + fn test_partial_eq_regression_over_64_bits() { + // Test with 65 bits where the issue is in the remainder + let buf1 = { + let mut bytes = vec![0xFF; 9]; + bytes[8] = 0x01; // 65th bit set, rest of byte clear + BitBuffer::new(ByteBuffer::from(bytes), 65) + }; + + let buf2 = { + let mut bytes = vec![0xFF; 9]; + bytes[8] = 0x00; // 65th bit clear + BitBuffer::new(ByteBuffer::from(bytes), 65) + }; + + // Bit 64 differs + assert_ne!(buf1.value(64), buf2.value(64)); + + // Should not be equal + assert_ne!( + buf1, buf2, + "Buffers with different bit 64 should not be equal" + ); + } } diff --git a/vortex-buffer/src/bit/buf_mut.rs b/vortex-buffer/src/bit/buf_mut.rs index d9fc075cfe2..2dc7deae125 100644 --- a/vortex-buffer/src/bit/buf_mut.rs +++ b/vortex-buffer/src/bit/buf_mut.rs @@ -40,8 +40,8 @@ impl PartialEq for BitBufferMut { } self.chunks() - .iter() - .zip(other.chunks()) + .iter_padded() + .zip(other.chunks().iter_padded()) .all(|(a, b)| a == b) } } @@ -897,4 +897,50 @@ mod tests { assert_eq!(frozen.offset(), 3); assert_eq!(frozen.len(), 6); } + + #[test] + fn test_partial_eq_regression_under_64_bits_mut() { + // Regression test: chunks().iter() returns empty for buffers < 64 bits + let buf1 = BitBufferMut::new_set(32); // All bits set + let buf2 = BitBufferMut::new_unset(32); // All bits unset + + // Verify they have different bits + for i in 0..32 { + assert_ne!(buf1.value(i), buf2.value(i), "Bit {} should differ", i); + } + + assert_ne!( + buf1, buf2, + "Buffers with different bits should not be equal" + ); + } + + #[test] + fn test_partial_eq_regression_over_64_bits_mut() { + // Test with 65 bits where the issue is in the remainder + let buf1 = { + let mut bytes = BufferMut::zeroed(9); + for i in 0..8 { + bytes.as_mut_slice()[i] = 0xFF; + } + bytes.as_mut_slice()[8] = 0x01; // 65th bit set + BitBufferMut::from_buffer(bytes, 0, 65) + }; + + let buf2 = { + let mut bytes = BufferMut::zeroed(9); + for i in 0..8 { + bytes.as_mut_slice()[i] = 0xFF; + } + bytes.as_mut_slice()[8] = 0x00; // 65th bit clear + BitBufferMut::from_buffer(bytes, 0, 65) + }; + + // Bit 64 differs + assert_ne!(buf1.value(64), buf2.value(64)); + assert_ne!( + buf1, buf2, + "Buffers with different bit 64 should not be equal" + ); + } } From 8ce17ce9352154ba6dba167aaaad89dd1f3df642 Mon Sep 17 00:00:00 2001 From: Connor Tsui Date: Mon, 20 Oct 2025 17:22:57 -0400 Subject: [PATCH 07/13] clean up Signed-off-by: Connor Tsui --- vortex-dtype/src/nullability.rs | 65 +++------------------- vortex-vector/src/bool/vector_mut.rs | 12 +++- vortex-vector/src/ops.rs | 21 ++++++- vortex-vector/src/primitive/generic_mut.rs | 12 +++- 4 files changed, 45 insertions(+), 65 deletions(-) diff --git a/vortex-dtype/src/nullability.rs b/vortex-dtype/src/nullability.rs index a44821f47e0..bbabfe46dba 100644 --- a/vortex-dtype/src/nullability.rs +++ b/vortex-dtype/src/nullability.rs @@ -15,72 +15,21 @@ pub enum Nullability { } impl Nullability { - /// Returns `Some(f())` if the the nullability is [`Nullable`](Self::Nullable), otherwise - /// returns `None`. + /// Returns `true` if the nullability is [`Nullable`](Self::Nullable), otherwise returns + /// `false`. /// /// # Examples /// /// ``` /// use vortex_dtype::Nullability::*; /// - /// assert_eq!(NonNullable.is_nullable_then(|| 0), None); - /// assert_eq!(Nullable.is_nullable_then(|| 0), Some(0)); + /// assert!(!NonNullable.is_nullable()); + /// assert!(Nullable.is_nullable()); /// ``` - /// - /// ``` - /// # use vortex_dtype::Nullability::*; - /// # - /// let mut a = 0; - /// - /// Nullable.is_nullable_then(|| { a += 1; }); - /// NonNullable.is_nullable_then(|| { a += 1; }); - /// - /// // `a` is incremented once because the closure is evaluated lazily by `is_nullable_then`. - /// assert_eq!(a, 1); - /// ``` - /// - /// Inspired by the [`bool::then`] function. - pub fn is_nullable_then T>(self, f: F) -> Option { - match self { - Nullability::NonNullable => None, - Nullability::Nullable => Some(f()), - } - } - - /// Returns `Some(t)` if the the nullability is [`Nullable`](Self::Nullable), otherwise returns - /// `None`. - /// - /// Arguments passed to `is_nullable_then_some` are eagerly evaluated; if you are passing the - /// result of a function call, it is recommended to use [`then`](bool::then), which is lazily - /// evaluated. - /// - /// # Examples - /// - /// ``` - /// use vortex_dtype::Nullability::*; - /// - /// assert_eq!(NonNullable.is_nullable_then_some(0), None); - /// assert_eq!(Nullable.is_nullable_then_some(0), Some(0)); - /// ``` - /// - /// ``` - /// # use vortex_dtype::Nullability::*; - /// # - /// let mut a = 0; - /// let mut function_with_side_effects = || { a += 1; }; - /// - /// Nullable.is_nullable_then_some(function_with_side_effects()); - /// NonNullable.is_nullable_then_some(function_with_side_effects()); - /// - /// // `a` is incremented twice because the value passed to `then_some` is evaluated eagerly. - /// assert_eq!(a, 2); - /// ``` - /// - /// Inspired by the [`bool::then_some`] function. - pub fn is_nullable_then_some(self, t: T) -> Option { + pub fn is_nullable(&self) -> bool { match self { - Nullability::NonNullable => None, - Nullability::Nullable => Some(t), + Nullability::NonNullable => false, + Nullability::Nullable => true, } } } diff --git a/vortex-vector/src/bool/vector_mut.rs b/vortex-vector/src/bool/vector_mut.rs index 8a1cca87262..159781d75a2 100644 --- a/vortex-vector/src/bool/vector_mut.rs +++ b/vortex-vector/src/bool/vector_mut.rs @@ -8,7 +8,7 @@ use vortex_dtype::{DType, Nullability}; use vortex_mask::MaskMut; use super::BoolVector; -use crate::VectorMutOps; +use crate::{VectorMutOps, VectorOps}; /// A mutable vector of boolean values. /// @@ -25,7 +25,9 @@ pub struct BoolVectorMut { impl BoolVectorMut { /// Creates a new mutable boolean vector with the given `capacity` and `nullability`. pub fn with_capacity(capacity: usize, nullability: Nullability) -> Self { - let validity = nullability.is_nullable_then(|| MaskMut::with_capacity(capacity)); + let validity = nullability + .is_nullable() + .then(|| MaskMut::with_capacity(capacity)); Self { bits: BitBufferMut::with_capacity(capacity), @@ -68,9 +70,13 @@ impl VectorMutOps for BoolVectorMut { } fn extend_from_vector(&mut self, other: &BoolVector) { + assert!( + self.is_nullable() || !other.is_nullable(), + "tried to extend a non-nullable `BoolVector` with a nullable vector" + ); + self.bits.append_buffer(&other.bits); - // TODO(connor): We must `other`'s nullability in relation to `self`. match (&mut self.validity, &other.validity) { (Some(self_v), Some(other_v)) => self_v.append_mask(other_v), (Some(self_v), None) => self_v.append_n(true, other.bits.len()), diff --git a/vortex-vector/src/ops.rs b/vortex-vector/src/ops.rs index 5e240c8f6b0..3dc1501d4b9 100644 --- a/vortex-vector/src/ops.rs +++ b/vortex-vector/src/ops.rs @@ -13,6 +13,13 @@ pub trait VectorOps: private::Sealed + Into { /// Returns the [`Nullability`] of the vector. fn nullability(&self) -> Nullability; + /// Returns `true` if the nullability of this vector is [`Nullable`]. + /// + /// [`Nullable`]: Nullability::Nullable + fn is_nullable(&self) -> bool { + self.nullability().is_nullable() + } + /// Returns the [`DType`] (or data type) of the vector. fn dtype(&self) -> DType; @@ -46,6 +53,13 @@ pub trait VectorMutOps: private::Sealed + Into { /// Returns the [`Nullability`] of the vector. fn nullability(&self) -> Nullability; + /// Returns `true` if the nullability of this vector is [`Nullable`]. + /// + /// [`Nullable`]: Nullability::Nullable + fn is_nullable(&self) -> bool { + self.nullability().is_nullable() + } + /// Returns the [`DType`] (or data type) of the vector. fn dtype(&self) -> DType; @@ -72,8 +86,10 @@ pub trait VectorMutOps: private::Sealed + Into { /// Extends the vector by appending elements from another vector. /// - /// TODO(connor): Document semantics of what happens if `self` is non-nullable and `other` is - /// nullable (should panic?). + /// # Panics + /// + /// If `self` is a non-nullable vector, and `other` is a nullable vector, implementors should + /// ensure that this function panics. fn extend_from_vector(&mut self, other: &Self::Immutable); /// Converts `self` into an immutable vector. @@ -93,6 +109,7 @@ pub trait VectorMutOps: private::Sealed + Into { /// `at > capacity`). fn split_off(&mut self, at: usize) -> Self; + // TODO(connor): Should this panic if other has a different nullability? /// Absorbs a mutable vector that was previously split off. /// /// If the two vectors were previously contiguous and not mutated in a way that causes diff --git a/vortex-vector/src/primitive/generic_mut.rs b/vortex-vector/src/primitive/generic_mut.rs index 3c010fc5de6..aca03e7ff55 100644 --- a/vortex-vector/src/primitive/generic_mut.rs +++ b/vortex-vector/src/primitive/generic_mut.rs @@ -7,7 +7,7 @@ use vortex_buffer::BufferMut; use vortex_dtype::{DType, NativePType, Nullability}; use vortex_mask::MaskMut; -use crate::{GenericPVector, VectorMutOps}; +use crate::{GenericPVector, VectorMutOps, VectorOps}; /// A mutable vector of generic primitive values. /// @@ -25,7 +25,9 @@ pub struct GenericPVectorMut { impl GenericPVectorMut { /// Create a new mutable primitive vector with the given capacity and nullability. pub fn with_capacity(capacity: usize, nullability: Nullability) -> Self { - let validity = nullability.is_nullable_then(|| MaskMut::with_capacity(capacity)); + let validity = nullability + .is_nullable() + .then(|| MaskMut::with_capacity(capacity)); Self { elements: BufferMut::with_capacity(capacity), @@ -62,7 +64,13 @@ impl VectorMutOps for GenericPVectorMut { /// Extends the vector by appending elements from another vector. fn extend_from_vector(&mut self, other: &GenericPVector) { + assert!( + self.is_nullable() || !other.is_nullable(), + "tried to extend a non-nullable `GenericPVector` with a nullable vector" + ); + self.elements.extend_from_slice(other.elements.as_slice()); + match (&mut self.validity, &other.validity) { (Some(self_v), Some(other_v)) => self_v.append_mask(other_v), (Some(self_v), None) => self_v.append_n(true, other.elements.len()), From 6a978d34133069983c6a79f368e076b5ef0c587f Mon Sep 17 00:00:00 2001 From: Connor Tsui Date: Tue, 21 Oct 2025 09:32:04 -0400 Subject: [PATCH 08/13] fix split off bug Signed-off-by: Connor Tsui --- vortex-vector/src/null/vector_mut.rs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/vortex-vector/src/null/vector_mut.rs b/vortex-vector/src/null/vector_mut.rs index 46a55da7f93..e14699bb9c9 100644 --- a/vortex-vector/src/null/vector_mut.rs +++ b/vortex-vector/src/null/vector_mut.rs @@ -65,9 +65,8 @@ impl VectorMutOps for NullVectorMut { self.capacity(), ); - // TODO(connor): This is wrong (https://docs.rs/bytes/latest/src/bytes/bytes_mut.rs.html#320-335) - let new_len = self.len - at; - self.len = at; + let new_len = self.len.saturating_sub(at); + self.len = std::cmp::min(self.len, at); NullVectorMut { len: new_len } } From 7c8c719693890ee3816c4fc31071fbf76f21c9f0 Mon Sep 17 00:00:00 2001 From: Connor Tsui Date: Tue, 21 Oct 2025 10:18:06 -0400 Subject: [PATCH 09/13] clean up vortex-buffer test Signed-off-by: Connor Tsui --- vortex-buffer/src/bit/buf.rs | 60 ++++++++++---------------------- vortex-buffer/src/bit/buf_mut.rs | 48 ++----------------------- 2 files changed, 21 insertions(+), 87 deletions(-) diff --git a/vortex-buffer/src/bit/buf.rs b/vortex-buffer/src/bit/buf.rs index 260a4cd6815..728be1446b3 100644 --- a/vortex-buffer/src/bit/buf.rs +++ b/vortex-buffer/src/bit/buf.rs @@ -14,8 +14,8 @@ use crate::{Alignment, BitBufferMut, Buffer, BufferMut, ByteBuffer, buffer}; #[derive(Debug, Clone, Eq)] pub struct BitBuffer { buffer: ByteBuffer, - len: usize, offset: usize, + len: usize, } impl PartialEq for BitBuffer { @@ -48,10 +48,10 @@ impl BitBuffer { } } - /// Create a new `BoolBuffer` backed by a [`ByteBuffer`] with `len` bits in view, starting at the - /// given `offset` (in bits). + /// Create a new `BoolBuffer` backed by a [`ByteBuffer`] with `len` bits in view, starting at + /// the given `offset` (in bits). /// - /// Panics if the buffer is not large enough to hold `len` bits or if the offset is greater than + /// Panics if the buffer is not large enough to hold `len` bits after the offset. pub fn new_with_offset(buffer: ByteBuffer, len: usize, offset: usize) -> Self { assert!( len.saturating_add(offset) <= buffer.len().saturating_mul(8), @@ -61,8 +61,8 @@ impl BitBuffer { Self { buffer, - len, offset, + len, } } @@ -452,49 +452,27 @@ mod tests { } #[test] - fn test_partial_eq_regression_under_64_bits() { - // Regression test: chunks().iter() returns empty for buffers < 64 bits, - // causing all such buffers to incorrectly compare as equal! - - // Create two 32-bit buffers with completely different content - let buf1 = BitBuffer::new_set(32); // All bits set - let buf2 = BitBuffer::new_unset(32); // All bits unset + fn test_padded_equaltiy() { + let buf1 = BitBuffer::new_set(64); // All bits set. + let buf2 = BitBuffer::collect_bool(64, |x| x < 32); // First half set, other half unset. - // Verify they have different bits for i in 0..32 { + assert_eq!(buf1.value(i), buf2.value(i), "Bit {} should be the same", i); + } + + for i in 32..64 { assert_ne!(buf1.value(i), buf2.value(i), "Bit {} should differ", i); } - // With the buggy implementation using iter(), these would incorrectly be equal - // because chunks().iter() returns empty for both! - assert_ne!( - buf1, buf2, - "Buffers with different bits should not be equal" + assert_eq!( + buf1.slice(0..32), + buf2.slice(0..32), + "Buffer slices with same bits should be equal (`PartialEq` needs `iter_padded()`)" ); - } - - #[test] - fn test_partial_eq_regression_over_64_bits() { - // Test with 65 bits where the issue is in the remainder - let buf1 = { - let mut bytes = vec![0xFF; 9]; - bytes[8] = 0x01; // 65th bit set, rest of byte clear - BitBuffer::new(ByteBuffer::from(bytes), 65) - }; - - let buf2 = { - let mut bytes = vec![0xFF; 9]; - bytes[8] = 0x00; // 65th bit clear - BitBuffer::new(ByteBuffer::from(bytes), 65) - }; - - // Bit 64 differs - assert_ne!(buf1.value(64), buf2.value(64)); - - // Should not be equal assert_ne!( - buf1, buf2, - "Buffers with different bit 64 should not be equal" + buf1.slice(32..64), + buf2.slice(32..64), + "Buffer slices with different bits should not be equal (`PartialEq` needs `iter_padded()`)" ); } } diff --git a/vortex-buffer/src/bit/buf_mut.rs b/vortex-buffer/src/bit/buf_mut.rs index 2dc7deae125..6ede630b0e9 100644 --- a/vortex-buffer/src/bit/buf_mut.rs +++ b/vortex-buffer/src/bit/buf_mut.rs @@ -1,6 +1,8 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors +// TODO(connor): The API of `BitBufferMut` should probably share more methods with `BitBuffer`. + use arrow_buffer::bit_chunk_iterator::BitChunks; use bitvec::view::BitView; @@ -897,50 +899,4 @@ mod tests { assert_eq!(frozen.offset(), 3); assert_eq!(frozen.len(), 6); } - - #[test] - fn test_partial_eq_regression_under_64_bits_mut() { - // Regression test: chunks().iter() returns empty for buffers < 64 bits - let buf1 = BitBufferMut::new_set(32); // All bits set - let buf2 = BitBufferMut::new_unset(32); // All bits unset - - // Verify they have different bits - for i in 0..32 { - assert_ne!(buf1.value(i), buf2.value(i), "Bit {} should differ", i); - } - - assert_ne!( - buf1, buf2, - "Buffers with different bits should not be equal" - ); - } - - #[test] - fn test_partial_eq_regression_over_64_bits_mut() { - // Test with 65 bits where the issue is in the remainder - let buf1 = { - let mut bytes = BufferMut::zeroed(9); - for i in 0..8 { - bytes.as_mut_slice()[i] = 0xFF; - } - bytes.as_mut_slice()[8] = 0x01; // 65th bit set - BitBufferMut::from_buffer(bytes, 0, 65) - }; - - let buf2 = { - let mut bytes = BufferMut::zeroed(9); - for i in 0..8 { - bytes.as_mut_slice()[i] = 0xFF; - } - bytes.as_mut_slice()[8] = 0x00; // 65th bit clear - BitBufferMut::from_buffer(bytes, 0, 65) - }; - - // Bit 64 differs - assert_ne!(buf1.value(64), buf2.value(64)); - assert_ne!( - buf1, buf2, - "Buffers with different bit 64 should not be equal" - ); - } } From b7ce53fe5791a2751496bcd06fa68c216c9f9a0d Mon Sep 17 00:00:00 2001 From: Connor Tsui Date: Tue, 21 Oct 2025 10:20:15 -0400 Subject: [PATCH 10/13] add `append_nulls` method to vector Also addresses comments. Signed-off-by: Connor Tsui --- vortex-vector/src/bool/vector.rs | 3 ++- vortex-vector/src/bool/vector_mut.rs | 14 +++++++++++++- vortex-vector/src/lib.rs | 8 -------- vortex-vector/src/macros.rs | 4 ++++ vortex-vector/src/null/vector_mut.rs | 4 ++++ vortex-vector/src/ops.rs | 13 +++++++++++++ vortex-vector/src/primitive/generic.rs | 3 ++- vortex-vector/src/primitive/generic_mut.rs | 14 +++++++++++++- vortex-vector/src/primitive/macros.rs | 8 ++++++++ vortex-vector/src/primitive/mod.rs | 8 -------- vortex-vector/src/primitive/vector.rs | 4 ---- vortex-vector/src/primitive/vector_mut.rs | 8 ++++---- vortex-vector/src/vector_mut.rs | 4 ++++ 13 files changed, 67 insertions(+), 28 deletions(-) diff --git a/vortex-vector/src/bool/vector.rs b/vortex-vector/src/bool/vector.rs index 54d31cc5572..2a11d8dcfdb 100644 --- a/vortex-vector/src/bool/vector.rs +++ b/vortex-vector/src/bool/vector.rs @@ -13,7 +13,8 @@ use crate::VectorOps; /// An immutable vector of boolean values. /// /// Internally, the boolean values are stored as the bits of a [`BitBuffer`] plus an optional -/// [`Mask`] for null booleans. +/// [`Mask`] for null booleans (where `true` represents a _valid_ boolean and `false` represents a +/// `null` boolean). /// /// The mutable equivalent of this type is [`BoolVectorMut`]. #[derive(Debug, Clone)] diff --git a/vortex-vector/src/bool/vector_mut.rs b/vortex-vector/src/bool/vector_mut.rs index 159781d75a2..26cbf33cd41 100644 --- a/vortex-vector/src/bool/vector_mut.rs +++ b/vortex-vector/src/bool/vector_mut.rs @@ -5,6 +5,7 @@ use vortex_buffer::BitBufferMut; use vortex_dtype::{DType, Nullability}; +use vortex_error::vortex_panic; use vortex_mask::MaskMut; use super::BoolVector; @@ -13,7 +14,8 @@ use crate::{VectorMutOps, VectorOps}; /// A mutable vector of boolean values. /// /// Internally, the boolean values are stored as the bits of a [`BitBufferMut`] plus an optional -/// [`MaskMut`] for null booleans. +/// [`MaskMut`] for null booleans (where `true` represents a _valid_ boolean and `false` represents +/// a `null` boolean). /// /// The immutable equivalent of this type is [`BoolVector`]. #[derive(Debug, Clone)] @@ -89,6 +91,16 @@ impl VectorMutOps for BoolVectorMut { } } + fn append_nulls(&mut self, n: usize) { + let Some(mask) = &mut self.validity else { + vortex_panic!("tried to append nulls to a non-nullable vector") + }; + + mask.append_n(false, n); + + self.bits.append_n(false, n); + } + fn freeze(self) -> Self::Immutable { BoolVector { bits: self.bits.freeze(), diff --git a/vortex-vector/src/lib.rs b/vortex-vector/src/lib.rs index 2fa69fa9457..60e110fae6f 100644 --- a/vortex-vector/src/lib.rs +++ b/vortex-vector/src/lib.rs @@ -23,11 +23,6 @@ pub use vector::Vector; mod vector_mut; pub use vector_mut::VectorMut; -/// Definition and implementation of [`VectorOps`] and [`VectorMutOps`] for [`Vector`] and -/// [`VectorMut`], respectively. -/// -/// [`VectorOps`]: ops::VectorOps -/// [`VectorMutOps`]: ops::VectorMutOps mod ops; pub use ops::{VectorMutOps, VectorOps}; @@ -39,9 +34,6 @@ pub use bool::{BoolVector, BoolVectorMut}; pub use null::{NullVector, NullVectorMut}; pub use primitive::{GenericPVector, GenericPVectorMut, PrimitiveVector, PrimitiveVectorMut}; -/// Helper macros for working with the different variants of [`Vector`] and [`VectorMut`]. -/// -/// All macros are exported at the crate level with `#[macro_use]`. mod macros; mod private; diff --git a/vortex-vector/src/macros.rs b/vortex-vector/src/macros.rs index 1d8f259cfcc..ea02ec70033 100644 --- a/vortex-vector/src/macros.rs +++ b/vortex-vector/src/macros.rs @@ -1,6 +1,10 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors +//! Helper macros for working with the different variants of [`Vector`] and [`VectorMut`]. +//! +//! All macros are exported at the crate level with `#[macro_use]`. + // TODO(connor): Finish implementing the rest of the macros. /// TODO(connor): Write docs. diff --git a/vortex-vector/src/null/vector_mut.rs b/vortex-vector/src/null/vector_mut.rs index e14699bb9c9..4cfe4f3d4ba 100644 --- a/vortex-vector/src/null/vector_mut.rs +++ b/vortex-vector/src/null/vector_mut.rs @@ -53,6 +53,10 @@ impl VectorMutOps for NullVectorMut { self.len += other.len; } + fn append_nulls(&mut self, n: usize) { + self.len += n; + } + fn freeze(self) -> Self::Immutable { NullVector::new(self.len) } diff --git a/vortex-vector/src/ops.rs b/vortex-vector/src/ops.rs index 3dc1501d4b9..76936f21dfd 100644 --- a/vortex-vector/src/ops.rs +++ b/vortex-vector/src/ops.rs @@ -1,6 +1,9 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors +//! Definition and implementation of [`VectorOps`] and [`VectorMutOps`] for [`Vector`] and +//! [`VectorMut`], respectively. + use vortex_dtype::{DType, Nullability}; use crate::{Vector, VectorMut, private}; @@ -92,6 +95,16 @@ pub trait VectorMutOps: private::Sealed + Into { /// ensure that this function panics. fn extend_from_vector(&mut self, other: &Self::Immutable); + /// Appends `n` null elements to the vector (if it is nullable). + /// + /// Implementors should ensure that they correctly append "null" or garbage values to their + /// elements in addition to adding nulls to their validity mask. + /// + /// # Panics + /// + /// If `self` is a non-nullable vector, implementors should ensure that this function panics. + fn append_nulls(&mut self, n: usize); + /// Converts `self` into an immutable vector. fn freeze(self) -> Self::Immutable; diff --git a/vortex-vector/src/primitive/generic.rs b/vortex-vector/src/primitive/generic.rs index 7ed4048de28..d31246b8811 100644 --- a/vortex-vector/src/primitive/generic.rs +++ b/vortex-vector/src/primitive/generic.rs @@ -13,7 +13,8 @@ use crate::{GenericPVectorMut, VectorOps}; /// /// `T` is expected to be bound by [`NativePType`], which templates an internal [`Buffer`] that /// stores the elements of the vector. Additionally, an optional [`Mask`] is stored to track null -/// primitive elements. +/// primitive elements (where `true` represents a _valid_ primitive and `false` represents a `null` +/// primitive). /// /// The mutable equivalent of this type is [`GenericPVectorMut`]. #[derive(Debug, Clone)] diff --git a/vortex-vector/src/primitive/generic_mut.rs b/vortex-vector/src/primitive/generic_mut.rs index aca03e7ff55..89cf57892f3 100644 --- a/vortex-vector/src/primitive/generic_mut.rs +++ b/vortex-vector/src/primitive/generic_mut.rs @@ -5,6 +5,7 @@ use vortex_buffer::BufferMut; use vortex_dtype::{DType, NativePType, Nullability}; +use vortex_error::vortex_panic; use vortex_mask::MaskMut; use crate::{GenericPVector, VectorMutOps, VectorOps}; @@ -13,7 +14,8 @@ use crate::{GenericPVector, VectorMutOps, VectorOps}; /// /// `T` is expected to be bound by [`NativePType`], which templates an internal [`BufferMut`] /// that stores the elements of the vector. Additionally, an optional [`MaskMut`] is stored to track -/// null primitive elements. +/// null primitive elements (where `true` represents a _valid_ element and `false` represents a +/// `null` element). /// /// The immutable equivalent of this type is [`GenericPVector`]. #[derive(Debug, Clone)] @@ -84,6 +86,16 @@ impl VectorMutOps for GenericPVectorMut { } } + fn append_nulls(&mut self, n: usize) { + let Some(mask) = &mut self.validity else { + vortex_panic!("tried to append nulls to a non-nullable vector") + }; + + mask.append_n(false, n); + + self.elements.push_n(T::zero(), n) + } + /// Freeze the vector into an immutable one. fn freeze(self) -> GenericPVector { GenericPVector { diff --git a/vortex-vector/src/primitive/macros.rs b/vortex-vector/src/primitive/macros.rs index 8d9b5b0195b..01e82a7fdf5 100644 --- a/vortex-vector/src/primitive/macros.rs +++ b/vortex-vector/src/primitive/macros.rs @@ -1,6 +1,14 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors +//! Helper macros for working with the different variants of [`PrimitiveVector`] and +//! [`PrimitiveVectorMut`]. +//! +//! All macros are exported at the crate level with `#[macro_use]`. +//! +//! [`PrimitiveVector`]: crate::PrimitiveVector +//! [`PrimitiveVectorMut`]: crate::PrimitiveVectorMut + /// TODO(connor): Write docs. #[macro_export] macro_rules! match_each_pvector { diff --git a/vortex-vector/src/primitive/mod.rs b/vortex-vector/src/primitive/mod.rs index a317e3df7b2..080c83af4d7 100644 --- a/vortex-vector/src/primitive/mod.rs +++ b/vortex-vector/src/primitive/mod.rs @@ -26,16 +26,8 @@ pub use vector::PrimitiveVector; mod vector_mut; pub use vector_mut::PrimitiveVectorMut; -/// Helper macros for working with the different variants of [`PrimitiveVector`] and -/// [`PrimitiveVectorMut`]. -/// -/// All macros are exported at the crate level with `#[macro_use]`. mod macros; -//////////////////////////////////////////////////////////////////////////////////////////////////// -// Vector Conversions -//////////////////////////////////////////////////////////////////////////////////////////////////// - use vortex_dtype::NativePType; use crate::{Vector, VectorMut}; diff --git a/vortex-vector/src/primitive/vector.rs b/vortex-vector/src/primitive/vector.rs index cbb720b4312..2fc2a888b89 100644 --- a/vortex-vector/src/primitive/vector.rs +++ b/vortex-vector/src/primitive/vector.rs @@ -68,10 +68,6 @@ impl VectorOps for PrimitiveVector { } } -//////////////////////////////////////////////////////////////////////////////////////////////////// -// Upcast Conversion -//////////////////////////////////////////////////////////////////////////////////////////////////// - impl From> for PrimitiveVector { fn from(v: GenericPVector) -> Self { T::upcast(v) diff --git a/vortex-vector/src/primitive/vector_mut.rs b/vortex-vector/src/primitive/vector_mut.rs index cf78dc67990..47f930c4623 100644 --- a/vortex-vector/src/primitive/vector_mut.rs +++ b/vortex-vector/src/primitive/vector_mut.rs @@ -91,6 +91,10 @@ impl VectorMutOps for PrimitiveVectorMut { }); } + fn append_nulls(&mut self, n: usize) { + match_each_pvector_mut!(self, |v| { v.append_nulls(n) }) + } + fn freeze(self) -> Self::Immutable { match_each_pvector_mut!(self, |v| { v.freeze().into() }) } @@ -106,10 +110,6 @@ impl VectorMutOps for PrimitiveVectorMut { } } -//////////////////////////////////////////////////////////////////////////////////////////////////// -// Upcast Conversion -//////////////////////////////////////////////////////////////////////////////////////////////////// - impl From> for PrimitiveVectorMut { fn from(v: GenericPVectorMut) -> Self { T::upcast(v) diff --git a/vortex-vector/src/vector_mut.rs b/vortex-vector/src/vector_mut.rs index fc26a533cab..e3b7f5514c1 100644 --- a/vortex-vector/src/vector_mut.rs +++ b/vortex-vector/src/vector_mut.rs @@ -70,6 +70,10 @@ impl VectorMutOps for VectorMut { }); } + fn append_nulls(&mut self, n: usize) { + match_each_vector_mut!(self, |v| { v.append_nulls(n) }) + } + fn freeze(self) -> Self::Immutable { match_each_vector_mut!(self, |v| { v.freeze().into() }) } From 9d1415c7776f77565a60550e6a4fc86f681c0a75 Mon Sep 17 00:00:00 2001 From: Connor Tsui Date: Tue, 21 Oct 2025 11:03:29 -0400 Subject: [PATCH 11/13] remove dtype from vector traits Signed-off-by: Connor Tsui --- vortex-vector/src/bool/vector.rs | 6 +----- vortex-vector/src/bool/vector_mut.rs | 6 +----- vortex-vector/src/null/vector.rs | 6 +----- vortex-vector/src/null/vector_mut.rs | 6 +----- vortex-vector/src/ops.rs | 8 +------- vortex-vector/src/primitive/generic.rs | 6 +----- vortex-vector/src/primitive/generic_mut.rs | 6 +----- vortex-vector/src/primitive/vector.rs | 6 +----- vortex-vector/src/primitive/vector_mut.rs | 6 +----- vortex-vector/src/vector.rs | 8 +------- vortex-vector/src/vector_mut.rs | 4 ---- 11 files changed, 10 insertions(+), 58 deletions(-) diff --git a/vortex-vector/src/bool/vector.rs b/vortex-vector/src/bool/vector.rs index 2a11d8dcfdb..cee4257612a 100644 --- a/vortex-vector/src/bool/vector.rs +++ b/vortex-vector/src/bool/vector.rs @@ -4,7 +4,7 @@ //! Definition and implementation of [`BoolVector`]. use vortex_buffer::BitBuffer; -use vortex_dtype::{DType, Nullability}; +use vortex_dtype::Nullability; use vortex_mask::Mask; use super::BoolVectorMut; @@ -30,10 +30,6 @@ impl VectorOps for BoolVector { Nullability::from(self.validity.is_some()) } - fn dtype(&self) -> DType { - DType::Bool(self.nullability()) - } - fn len(&self) -> usize { debug_assert!( self.validity diff --git a/vortex-vector/src/bool/vector_mut.rs b/vortex-vector/src/bool/vector_mut.rs index 26cbf33cd41..70cd4480eab 100644 --- a/vortex-vector/src/bool/vector_mut.rs +++ b/vortex-vector/src/bool/vector_mut.rs @@ -4,7 +4,7 @@ //! Definition and implementation of [`BoolVectorMut`]. use vortex_buffer::BitBufferMut; -use vortex_dtype::{DType, Nullability}; +use vortex_dtype::Nullability; use vortex_error::vortex_panic; use vortex_mask::MaskMut; @@ -45,10 +45,6 @@ impl VectorMutOps for BoolVectorMut { Nullability::from(self.validity.is_some()) } - fn dtype(&self) -> DType { - DType::Bool(self.nullability()) - } - fn len(&self) -> usize { debug_assert!( self.validity diff --git a/vortex-vector/src/null/vector.rs b/vortex-vector/src/null/vector.rs index 8782cc3c6a8..056f4b59bc2 100644 --- a/vortex-vector/src/null/vector.rs +++ b/vortex-vector/src/null/vector.rs @@ -3,7 +3,7 @@ //! Definition and implementation of [`NullVector`]. -use vortex_dtype::{DType, Nullability}; +use vortex_dtype::Nullability; use crate::{NullVectorMut, VectorOps}; @@ -32,10 +32,6 @@ impl VectorOps for NullVector { Nullability::Nullable } - fn dtype(&self) -> DType { - DType::Null - } - fn len(&self) -> usize { self.len } diff --git a/vortex-vector/src/null/vector_mut.rs b/vortex-vector/src/null/vector_mut.rs index 4cfe4f3d4ba..229a3683b61 100644 --- a/vortex-vector/src/null/vector_mut.rs +++ b/vortex-vector/src/null/vector_mut.rs @@ -3,7 +3,7 @@ //! Definition and implementation of [`NullVectorMut`]. -use vortex_dtype::{DType, Nullability}; +use vortex_dtype::Nullability; use super::NullVector; use crate::VectorMutOps; @@ -33,10 +33,6 @@ impl VectorMutOps for NullVectorMut { Nullability::Nullable } - fn dtype(&self) -> DType { - DType::Null - } - fn len(&self) -> usize { self.len } diff --git a/vortex-vector/src/ops.rs b/vortex-vector/src/ops.rs index 76936f21dfd..32bdd0e5f0b 100644 --- a/vortex-vector/src/ops.rs +++ b/vortex-vector/src/ops.rs @@ -4,7 +4,7 @@ //! Definition and implementation of [`VectorOps`] and [`VectorMutOps`] for [`Vector`] and //! [`VectorMut`], respectively. -use vortex_dtype::{DType, Nullability}; +use vortex_dtype::Nullability; use crate::{Vector, VectorMut, private}; @@ -23,9 +23,6 @@ pub trait VectorOps: private::Sealed + Into { self.nullability().is_nullable() } - /// Returns the [`DType`] (or data type) of the vector. - fn dtype(&self) -> DType; - /// Returns the number of elements in the vector, also referred to as its "length". fn len(&self) -> usize; @@ -63,9 +60,6 @@ pub trait VectorMutOps: private::Sealed + Into { self.nullability().is_nullable() } - /// Returns the [`DType`] (or data type) of the vector. - fn dtype(&self) -> DType; - /// Returns the number of elements in the vector, also referred to as its "length". fn len(&self) -> usize; diff --git a/vortex-vector/src/primitive/generic.rs b/vortex-vector/src/primitive/generic.rs index d31246b8811..51b994e67e0 100644 --- a/vortex-vector/src/primitive/generic.rs +++ b/vortex-vector/src/primitive/generic.rs @@ -4,7 +4,7 @@ //! Definition and implementation of [`GenericPVector`]. use vortex_buffer::Buffer; -use vortex_dtype::{DType, NativePType, Nullability}; +use vortex_dtype::{NativePType, Nullability}; use vortex_mask::Mask; use crate::{GenericPVectorMut, VectorOps}; @@ -30,10 +30,6 @@ impl VectorOps for GenericPVector { Nullability::from(self.validity.is_some()) } - fn dtype(&self) -> DType { - DType::Primitive(T::PTYPE, self.nullability()) - } - fn len(&self) -> usize { self.elements.len() } diff --git a/vortex-vector/src/primitive/generic_mut.rs b/vortex-vector/src/primitive/generic_mut.rs index 89cf57892f3..097972f0d94 100644 --- a/vortex-vector/src/primitive/generic_mut.rs +++ b/vortex-vector/src/primitive/generic_mut.rs @@ -4,7 +4,7 @@ //! Definition and implementation of [`GenericPVectorMut`]. use vortex_buffer::BufferMut; -use vortex_dtype::{DType, NativePType, Nullability}; +use vortex_dtype::{NativePType, Nullability}; use vortex_error::vortex_panic; use vortex_mask::MaskMut; @@ -45,10 +45,6 @@ impl VectorMutOps for GenericPVectorMut { Nullability::from(self.validity.is_some()) } - fn dtype(&self) -> DType { - DType::Primitive(T::PTYPE, self.nullability()) - } - fn len(&self) -> usize { self.elements.len() } diff --git a/vortex-vector/src/primitive/vector.rs b/vortex-vector/src/primitive/vector.rs index 2fc2a888b89..5b687189a60 100644 --- a/vortex-vector/src/primitive/vector.rs +++ b/vortex-vector/src/primitive/vector.rs @@ -4,7 +4,7 @@ //! Definition and implementation of [`PrimitiveVector`]. use vortex_dtype::half::f16; -use vortex_dtype::{DType, NativePType, Nullability, PTypeUpcast}; +use vortex_dtype::{NativePType, Nullability, PTypeUpcast}; use super::{GenericPVector, PrimitiveVectorMut}; use crate::{VectorOps, match_each_pvector}; @@ -48,10 +48,6 @@ impl VectorOps for PrimitiveVector { match_each_pvector!(self, |v| { v.nullability() }) } - fn dtype(&self) -> DType { - match_each_pvector!(self, |v| { v.dtype() }) - } - fn len(&self) -> usize { match_each_pvector!(self, |v| { v.len() }) } diff --git a/vortex-vector/src/primitive/vector_mut.rs b/vortex-vector/src/primitive/vector_mut.rs index 47f930c4623..3e7f6afa60e 100644 --- a/vortex-vector/src/primitive/vector_mut.rs +++ b/vortex-vector/src/primitive/vector_mut.rs @@ -4,7 +4,7 @@ //! Definition and implementation of [`PrimitiveVectorMut`]. use vortex_dtype::half::f16; -use vortex_dtype::{DType, NativePType, Nullability, PType, PTypeUpcast}; +use vortex_dtype::{NativePType, Nullability, PType, PTypeUpcast}; use crate::{ GenericPVectorMut, PrimitiveVector, VectorMutOps, match_each_pvector_mut, @@ -69,10 +69,6 @@ impl VectorMutOps for PrimitiveVectorMut { match_each_pvector_mut!(self, |v| { v.nullability() }) } - fn dtype(&self) -> DType { - match_each_pvector_mut!(self, |v| { v.dtype() }) - } - fn len(&self) -> usize { match_each_pvector_mut!(self, |v| { v.len() }) } diff --git a/vortex-vector/src/vector.rs b/vortex-vector/src/vector.rs index 2dd8b4e11c2..e857aa3cd2c 100644 --- a/vortex-vector/src/vector.rs +++ b/vortex-vector/src/vector.rs @@ -4,7 +4,7 @@ //! Definition of the [`Vector`] type, which represent immutable and fully decompressed (canonical) //! array data. -use vortex_dtype::{DType, Nullability}; +use vortex_dtype::Nullability; use crate::{BoolVector, NullVector, PrimitiveVector, VectorMut, VectorOps, match_each_vector}; @@ -14,8 +14,6 @@ use crate::{BoolVector, NullVector, PrimitiveVector, VectorMut, VectorOps, match /// Most of the behavior of `Vector` is described by the [`VectorOps`] trait. /// /// The mutable equivalent of this type is [`VectorMut`], which implements. -/// -/// [`VectorOps`]: crate::VectorOps #[derive(Debug, Clone)] pub enum Vector { /// Null @@ -49,10 +47,6 @@ impl VectorOps for Vector { match_each_vector!(self, |v| { v.nullability() }) } - fn dtype(&self) -> DType { - match_each_vector!(self, |v| { v.dtype() }) - } - fn len(&self) -> usize { match_each_vector!(self, |v| { v.len() }) } diff --git a/vortex-vector/src/vector_mut.rs b/vortex-vector/src/vector_mut.rs index e3b7f5514c1..a1188abe0ff 100644 --- a/vortex-vector/src/vector_mut.rs +++ b/vortex-vector/src/vector_mut.rs @@ -48,10 +48,6 @@ impl VectorMutOps for VectorMut { match_each_vector_mut!(self, |v| { v.nullability() }) } - fn dtype(&self) -> DType { - match_each_vector_mut!(self, |v| { v.dtype() }) - } - fn len(&self) -> usize { match_each_vector_mut!(self, |v| { v.len() }) } From 65448e5fb0fd732eafad5d1cacbdcca58f6758f9 Mon Sep 17 00:00:00 2001 From: Connor Tsui Date: Tue, 21 Oct 2025 11:11:43 -0400 Subject: [PATCH 12/13] remove `Generic_` Signed-off-by: Connor Tsui --- vortex-vector/src/lib.rs | 2 +- vortex-vector/src/primitive/generic.rs | 16 +++---- vortex-vector/src/primitive/generic_mut.rs | 18 ++++---- vortex-vector/src/primitive/mod.rs | 12 ++--- vortex-vector/src/primitive/vector.rs | 30 ++++++------- vortex-vector/src/primitive/vector_mut.rs | 52 +++++++++++----------- vortex-vector/src/private.rs | 4 +- 7 files changed, 67 insertions(+), 67 deletions(-) diff --git a/vortex-vector/src/lib.rs b/vortex-vector/src/lib.rs index 60e110fae6f..759686eab1c 100644 --- a/vortex-vector/src/lib.rs +++ b/vortex-vector/src/lib.rs @@ -32,7 +32,7 @@ mod primitive; pub use bool::{BoolVector, BoolVectorMut}; pub use null::{NullVector, NullVectorMut}; -pub use primitive::{GenericPVector, GenericPVectorMut, PrimitiveVector, PrimitiveVectorMut}; +pub use primitive::{PVector, PVectorMut, PrimitiveVector, PrimitiveVectorMut}; mod macros; diff --git a/vortex-vector/src/primitive/generic.rs b/vortex-vector/src/primitive/generic.rs index 51b994e67e0..7ec296a1bab 100644 --- a/vortex-vector/src/primitive/generic.rs +++ b/vortex-vector/src/primitive/generic.rs @@ -7,7 +7,7 @@ use vortex_buffer::Buffer; use vortex_dtype::{NativePType, Nullability}; use vortex_mask::Mask; -use crate::{GenericPVectorMut, VectorOps}; +use crate::{PVectorMut, VectorOps}; /// An immutable vector of generic primitive values. /// @@ -18,13 +18,13 @@ use crate::{GenericPVectorMut, VectorOps}; /// /// The mutable equivalent of this type is [`GenericPVectorMut`]. #[derive(Debug, Clone)] -pub struct GenericPVector { +pub struct PVector { pub(super) elements: Buffer, pub(super) validity: Option, } -impl VectorOps for GenericPVector { - type Mutable = GenericPVectorMut; +impl VectorOps for PVector { + type Mutable = PVectorMut; fn nullability(&self) -> Nullability { Nullability::from(self.validity.is_some()) @@ -35,11 +35,11 @@ impl VectorOps for GenericPVector { } /// Try to convert self into a mutable vector. - fn try_into_mut(self) -> Result, Self> { + fn try_into_mut(self) -> Result, Self> { let elements = match self.elements.try_into_mut() { Ok(elements) => elements, Err(elements) => { - return Err(GenericPVector { + return Err(PVector { elements, validity: self.validity, }); @@ -50,7 +50,7 @@ impl VectorOps for GenericPVector { Some(v) => match v.try_into_mut() { Ok(v) => Some(v), Err(v) => { - return Err(GenericPVector { + return Err(PVector { elements: elements.freeze(), validity: Some(v), }); @@ -59,6 +59,6 @@ impl VectorOps for GenericPVector { None => None, }; - Ok(GenericPVectorMut { elements, validity }) + Ok(PVectorMut { elements, validity }) } } diff --git a/vortex-vector/src/primitive/generic_mut.rs b/vortex-vector/src/primitive/generic_mut.rs index 097972f0d94..e2769ca2362 100644 --- a/vortex-vector/src/primitive/generic_mut.rs +++ b/vortex-vector/src/primitive/generic_mut.rs @@ -8,7 +8,7 @@ use vortex_dtype::{NativePType, Nullability}; use vortex_error::vortex_panic; use vortex_mask::MaskMut; -use crate::{GenericPVector, VectorMutOps, VectorOps}; +use crate::{PVector, VectorMutOps, VectorOps}; /// A mutable vector of generic primitive values. /// @@ -19,12 +19,12 @@ use crate::{GenericPVector, VectorMutOps, VectorOps}; /// /// The immutable equivalent of this type is [`GenericPVector`]. #[derive(Debug, Clone)] -pub struct GenericPVectorMut { +pub struct PVectorMut { pub(super) elements: BufferMut, pub(super) validity: Option, } -impl GenericPVectorMut { +impl PVectorMut { /// Create a new mutable primitive vector with the given capacity and nullability. pub fn with_capacity(capacity: usize, nullability: Nullability) -> Self { let validity = nullability @@ -38,8 +38,8 @@ impl GenericPVectorMut { } } -impl VectorMutOps for GenericPVectorMut { - type Immutable = GenericPVector; +impl VectorMutOps for PVectorMut { + type Immutable = PVector; fn nullability(&self) -> Nullability { Nullability::from(self.validity.is_some()) @@ -61,7 +61,7 @@ impl VectorMutOps for GenericPVectorMut { } /// Extends the vector by appending elements from another vector. - fn extend_from_vector(&mut self, other: &GenericPVector) { + fn extend_from_vector(&mut self, other: &PVector) { assert!( self.is_nullable() || !other.is_nullable(), "tried to extend a non-nullable `GenericPVector` with a nullable vector" @@ -93,15 +93,15 @@ impl VectorMutOps for GenericPVectorMut { } /// Freeze the vector into an immutable one. - fn freeze(self) -> GenericPVector { - GenericPVector { + fn freeze(self) -> PVector { + PVector { elements: self.elements.freeze(), validity: self.validity.map(|v| v.freeze()), } } fn split_off(&mut self, at: usize) -> Self { - GenericPVectorMut { + PVectorMut { elements: self.elements.split_off(at), validity: self.validity.as_mut().map(|v| v.split_off(at)), } diff --git a/vortex-vector/src/primitive/mod.rs b/vortex-vector/src/primitive/mod.rs index 080c83af4d7..4676beaf571 100644 --- a/vortex-vector/src/primitive/mod.rs +++ b/vortex-vector/src/primitive/mod.rs @@ -15,10 +15,10 @@ //! [`f16`]: vortex_dtype::half::f16 mod generic; -pub use generic::GenericPVector; +pub use generic::PVector; mod generic_mut; -pub use generic_mut::GenericPVectorMut; +pub use generic_mut::PVectorMut; mod vector; pub use vector::PrimitiveVector; @@ -38,8 +38,8 @@ impl From for Vector { } } -impl From> for Vector { - fn from(v: GenericPVector) -> Self { +impl From> for Vector { + fn from(v: PVector) -> Self { Self::Primitive(PrimitiveVector::from(v)) } } @@ -50,8 +50,8 @@ impl From for VectorMut { } } -impl From> for VectorMut { - fn from(val: GenericPVectorMut) -> Self { +impl From> for VectorMut { + fn from(val: PVectorMut) -> Self { Self::Primitive(PrimitiveVectorMut::from(val)) } } diff --git a/vortex-vector/src/primitive/vector.rs b/vortex-vector/src/primitive/vector.rs index 5b687189a60..416c0ac56a5 100644 --- a/vortex-vector/src/primitive/vector.rs +++ b/vortex-vector/src/primitive/vector.rs @@ -6,7 +6,7 @@ use vortex_dtype::half::f16; use vortex_dtype::{NativePType, Nullability, PTypeUpcast}; -use super::{GenericPVector, PrimitiveVectorMut}; +use super::{PVector, PrimitiveVectorMut}; use crate::{VectorOps, match_each_pvector}; /// An immutable vector of primitive values. @@ -18,27 +18,27 @@ use crate::{VectorOps, match_each_pvector}; #[derive(Debug, Clone)] pub enum PrimitiveVector { /// U8 - U8(GenericPVector), + U8(PVector), /// U16 - U16(GenericPVector), + U16(PVector), /// U32 - U32(GenericPVector), + U32(PVector), /// U64 - U64(GenericPVector), + U64(PVector), /// I8 - I8(GenericPVector), + I8(PVector), /// I16 - I16(GenericPVector), + I16(PVector), /// I32 - I32(GenericPVector), + I32(PVector), /// I64 - I64(GenericPVector), + I64(PVector), /// F16 - F16(GenericPVector), + F16(PVector), /// F32 - F32(GenericPVector), + F32(PVector), /// F64 - F64(GenericPVector), + F64(PVector), } impl VectorOps for PrimitiveVector { @@ -64,14 +64,14 @@ impl VectorOps for PrimitiveVector { } } -impl From> for PrimitiveVector { - fn from(v: GenericPVector) -> Self { +impl From> for PrimitiveVector { + fn from(v: PVector) -> Self { T::upcast(v) } } impl PTypeUpcast for PrimitiveVector { - type Input = GenericPVector; + type Input = PVector; fn from_u8(input: Self::Input) -> Self { PrimitiveVector::U8(input) diff --git a/vortex-vector/src/primitive/vector_mut.rs b/vortex-vector/src/primitive/vector_mut.rs index 3e7f6afa60e..37197ea9eb7 100644 --- a/vortex-vector/src/primitive/vector_mut.rs +++ b/vortex-vector/src/primitive/vector_mut.rs @@ -7,7 +7,7 @@ use vortex_dtype::half::f16; use vortex_dtype::{NativePType, Nullability, PType, PTypeUpcast}; use crate::{ - GenericPVectorMut, PrimitiveVector, VectorMutOps, match_each_pvector_mut, + PVectorMut, PrimitiveVector, VectorMutOps, match_each_pvector_mut, match_each_pvector_mut_immut_pair, match_each_pvector_mut_pair, }; @@ -20,44 +20,44 @@ use crate::{ #[derive(Debug, Clone)] pub enum PrimitiveVectorMut { /// U8 - U8(GenericPVectorMut), + U8(PVectorMut), /// U16 - U16(GenericPVectorMut), + U16(PVectorMut), /// U32 - U32(GenericPVectorMut), + U32(PVectorMut), /// U64 - U64(GenericPVectorMut), + U64(PVectorMut), /// I8 - I8(GenericPVectorMut), + I8(PVectorMut), /// I16 - I16(GenericPVectorMut), + I16(PVectorMut), /// I32 - I32(GenericPVectorMut), + I32(PVectorMut), /// I64 - I64(GenericPVectorMut), + I64(PVectorMut), /// F16 - F16(GenericPVectorMut), + F16(PVectorMut), /// F32 - F32(GenericPVectorMut), + F32(PVectorMut), /// F64 - F64(GenericPVectorMut), + F64(PVectorMut), } impl PrimitiveVectorMut { /// Create a new mutable primitive vector with the given capacity, primitive type, and nullability. pub fn with_capacity(capacity: usize, ptype: PType, nullability: Nullability) -> Self { match ptype { - PType::U8 => GenericPVectorMut::::with_capacity(capacity, nullability).into(), - PType::U16 => GenericPVectorMut::::with_capacity(capacity, nullability).into(), - PType::U32 => GenericPVectorMut::::with_capacity(capacity, nullability).into(), - PType::U64 => GenericPVectorMut::::with_capacity(capacity, nullability).into(), - PType::I8 => GenericPVectorMut::::with_capacity(capacity, nullability).into(), - PType::I16 => GenericPVectorMut::::with_capacity(capacity, nullability).into(), - PType::I32 => GenericPVectorMut::::with_capacity(capacity, nullability).into(), - PType::I64 => GenericPVectorMut::::with_capacity(capacity, nullability).into(), - PType::F16 => GenericPVectorMut::::with_capacity(capacity, nullability).into(), - PType::F32 => GenericPVectorMut::::with_capacity(capacity, nullability).into(), - PType::F64 => GenericPVectorMut::::with_capacity(capacity, nullability).into(), + PType::U8 => PVectorMut::::with_capacity(capacity, nullability).into(), + PType::U16 => PVectorMut::::with_capacity(capacity, nullability).into(), + PType::U32 => PVectorMut::::with_capacity(capacity, nullability).into(), + PType::U64 => PVectorMut::::with_capacity(capacity, nullability).into(), + PType::I8 => PVectorMut::::with_capacity(capacity, nullability).into(), + PType::I16 => PVectorMut::::with_capacity(capacity, nullability).into(), + PType::I32 => PVectorMut::::with_capacity(capacity, nullability).into(), + PType::I64 => PVectorMut::::with_capacity(capacity, nullability).into(), + PType::F16 => PVectorMut::::with_capacity(capacity, nullability).into(), + PType::F32 => PVectorMut::::with_capacity(capacity, nullability).into(), + PType::F64 => PVectorMut::::with_capacity(capacity, nullability).into(), } } } @@ -106,14 +106,14 @@ impl VectorMutOps for PrimitiveVectorMut { } } -impl From> for PrimitiveVectorMut { - fn from(v: GenericPVectorMut) -> Self { +impl From> for PrimitiveVectorMut { + fn from(v: PVectorMut) -> Self { T::upcast(v) } } impl PTypeUpcast for PrimitiveVectorMut { - type Input = GenericPVectorMut; + type Input = PVectorMut; fn from_u8(input: Self::Input) -> Self { PrimitiveVectorMut::U8(input) diff --git a/vortex-vector/src/private.rs b/vortex-vector/src/private.rs index 6948a1f3c3d..0721138bda4 100644 --- a/vortex-vector/src/private.rs +++ b/vortex-vector/src/private.rs @@ -24,5 +24,5 @@ impl Sealed for BoolVectorMut {} impl Sealed for PrimitiveVector {} impl Sealed for PrimitiveVectorMut {} -impl Sealed for GenericPVector {} -impl Sealed for GenericPVectorMut {} +impl Sealed for PVector {} +impl Sealed for PVectorMut {} From 912cb78248f219dab4b1df9896d2143c1edf4ca6 Mon Sep 17 00:00:00 2001 From: Connor Tsui Date: Tue, 21 Oct 2025 11:27:08 -0400 Subject: [PATCH 13/13] fix nullability Signed-off-by: Connor Tsui --- vortex-vector/src/bool/vector_mut.rs | 9 ++++++--- vortex-vector/src/macros.rs | 3 ++- vortex-vector/src/ops.rs | 3 +-- vortex-vector/src/primitive/generic.rs | 4 ++-- vortex-vector/src/primitive/generic_mut.rs | 13 ++++++++----- vortex-vector/src/primitive/mod.rs | 8 ++++---- vortex-vector/src/primitive/vector.rs | 4 ++-- vortex-vector/src/primitive/vector_mut.rs | 4 ++-- 8 files changed, 27 insertions(+), 21 deletions(-) diff --git a/vortex-vector/src/bool/vector_mut.rs b/vortex-vector/src/bool/vector_mut.rs index 70cd4480eab..35a4a3c764e 100644 --- a/vortex-vector/src/bool/vector_mut.rs +++ b/vortex-vector/src/bool/vector_mut.rs @@ -68,9 +68,12 @@ impl VectorMutOps for BoolVectorMut { } fn extend_from_vector(&mut self, other: &BoolVector) { - assert!( - self.is_nullable() || !other.is_nullable(), - "tried to extend a non-nullable `BoolVector` with a nullable vector" + assert_eq!( + self.nullability(), + other.nullability(), + "tried to extend a vector with nullability {} with another vector with nullability {}", + self.nullability(), + other.nullability(), ); self.bits.append_buffer(&other.bits); diff --git a/vortex-vector/src/macros.rs b/vortex-vector/src/macros.rs index ea02ec70033..48d93f8c338 100644 --- a/vortex-vector/src/macros.rs +++ b/vortex-vector/src/macros.rs @@ -1,7 +1,8 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -//! Helper macros for working with the different variants of [`Vector`] and [`VectorMut`]. +//! Helper macros for working with the different variants of [`Vector`](crate::Vector) and +//! [`VectorMut`](crate::VectorMut). //! //! All macros are exported at the crate level with `#[macro_use]`. diff --git a/vortex-vector/src/ops.rs b/vortex-vector/src/ops.rs index 32bdd0e5f0b..79704075bcb 100644 --- a/vortex-vector/src/ops.rs +++ b/vortex-vector/src/ops.rs @@ -85,8 +85,7 @@ pub trait VectorMutOps: private::Sealed + Into { /// /// # Panics /// - /// If `self` is a non-nullable vector, and `other` is a nullable vector, implementors should - /// ensure that this function panics. + /// Panics if `other` does not have the same nullability as `self`. fn extend_from_vector(&mut self, other: &Self::Immutable); /// Appends `n` null elements to the vector (if it is nullable). diff --git a/vortex-vector/src/primitive/generic.rs b/vortex-vector/src/primitive/generic.rs index 7ec296a1bab..e26900b6a2f 100644 --- a/vortex-vector/src/primitive/generic.rs +++ b/vortex-vector/src/primitive/generic.rs @@ -1,7 +1,7 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -//! Definition and implementation of [`GenericPVector`]. +//! Definition and implementation of [`PVector`]. use vortex_buffer::Buffer; use vortex_dtype::{NativePType, Nullability}; @@ -16,7 +16,7 @@ use crate::{PVectorMut, VectorOps}; /// primitive elements (where `true` represents a _valid_ primitive and `false` represents a `null` /// primitive). /// -/// The mutable equivalent of this type is [`GenericPVectorMut`]. +/// The mutable equivalent of this type is [`PVectorMut`]. #[derive(Debug, Clone)] pub struct PVector { pub(super) elements: Buffer, diff --git a/vortex-vector/src/primitive/generic_mut.rs b/vortex-vector/src/primitive/generic_mut.rs index e2769ca2362..085050d0bd3 100644 --- a/vortex-vector/src/primitive/generic_mut.rs +++ b/vortex-vector/src/primitive/generic_mut.rs @@ -1,7 +1,7 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -//! Definition and implementation of [`GenericPVectorMut`]. +//! Definition and implementation of [`PVectorMut`]. use vortex_buffer::BufferMut; use vortex_dtype::{NativePType, Nullability}; @@ -17,7 +17,7 @@ use crate::{PVector, VectorMutOps, VectorOps}; /// null primitive elements (where `true` represents a _valid_ element and `false` represents a /// `null` element). /// -/// The immutable equivalent of this type is [`GenericPVector`]. +/// The immutable equivalent of this type is [`PVector`]. #[derive(Debug, Clone)] pub struct PVectorMut { pub(super) elements: BufferMut, @@ -62,9 +62,12 @@ impl VectorMutOps for PVectorMut { /// Extends the vector by appending elements from another vector. fn extend_from_vector(&mut self, other: &PVector) { - assert!( - self.is_nullable() || !other.is_nullable(), - "tried to extend a non-nullable `GenericPVector` with a nullable vector" + assert_eq!( + self.nullability(), + other.nullability(), + "tried to extend a vector with nullability {} with another vector with nullability {}", + self.nullability(), + other.nullability(), ); self.elements.extend_from_slice(other.elements.as_slice()); diff --git a/vortex-vector/src/primitive/mod.rs b/vortex-vector/src/primitive/mod.rs index 4676beaf571..df6965e507f 100644 --- a/vortex-vector/src/primitive/mod.rs +++ b/vortex-vector/src/primitive/mod.rs @@ -3,12 +3,12 @@ //! Definitions and implementations of native primitive vector types. //! -//! The types that hold data are [`GenericPVector`] and [`GenericPVectorMut`], which are generic -//! over types `T` that implement [`NativePType`] (which are just the integer and floating-point -//! types that are native to Rust plus [`f16`]). +//! The types that hold data are [`PVector`] and [`PVectorMut`], which are generic over types `T` +//! that implement [`NativePType`] (which are just the integer and floating-point types that are +//! native to Rust plus [`f16`]). //! //! [`PrimitiveVector`] and [`PrimitiveVectorMut`] are enums that wrap all of the different possible -//! [`GenericPVector`]s. There are several macros defined in this crate to make working with these +//! [`PVector`]s. There are several macros defined in this crate to make working with these //! primitive vector types easier. //! //! [`NativePType`]: vortex_dtype::NativePType diff --git a/vortex-vector/src/primitive/vector.rs b/vortex-vector/src/primitive/vector.rs index 416c0ac56a5..aebac12d664 100644 --- a/vortex-vector/src/primitive/vector.rs +++ b/vortex-vector/src/primitive/vector.rs @@ -11,8 +11,8 @@ use crate::{VectorOps, match_each_pvector}; /// An immutable vector of primitive values. /// -/// `PrimitiveVector` is represented by an enum over all possible [`GenericPVector`] types (which -/// are templated by the types that implement [`NativePType`]). +/// `PrimitiveVector` is represented by an enum over all possible [`PVector`] types (which are +/// templated by the types that implement [`NativePType`]). /// /// The mutable equivalent of this type is [`PrimitiveVectorMut`]. #[derive(Debug, Clone)] diff --git a/vortex-vector/src/primitive/vector_mut.rs b/vortex-vector/src/primitive/vector_mut.rs index 37197ea9eb7..dba16d3e6ed 100644 --- a/vortex-vector/src/primitive/vector_mut.rs +++ b/vortex-vector/src/primitive/vector_mut.rs @@ -13,8 +13,8 @@ use crate::{ /// A mutable vector of primitive values. /// -/// `PrimitiveVector` is represented by an enum over all possible [`GenericPVectorMut`] types (which -/// are templated by the types that implement [`NativePType`]). +/// `PrimitiveVector` is represented by an enum over all possible [`PVectorMut`] types (which are +/// templated by the types that implement [`NativePType`]). /// /// The immutable equivalent of this type is [`PrimitiveVector`]. #[derive(Debug, Clone)]