From 0f7d8f2a26a2306c491e7ee82eae6fee2ba8d88b Mon Sep 17 00:00:00 2001 From: Nicholas Gates Date: Wed, 15 Oct 2025 14:04:46 -0400 Subject: [PATCH 1/3] add vortex-vector Signed-off-by: Nicholas Gates Signed-off-by: Connor Tsui --- Cargo.lock | 12 + Cargo.toml | 1 + vortex-buffer/src/bit/buf.rs | 8 + vortex-buffer/src/bit/buf_mut.rs | 57 ++++ vortex-buffer/src/buffer.rs | 60 ---- vortex-buffer/src/buffer_mut.rs | 54 +++ vortex-dtype/src/ptype.rs | 50 ++- vortex-mask/Cargo.toml | 1 + vortex-mask/src/lib.rs | 3 + vortex-mask/src/mask_mut.rs | 532 +++++++++++++++++++++++++++++ vortex-vector/Cargo.toml | 27 ++ vortex-vector/src/bool.rs | 67 ++++ vortex-vector/src/bool_mut.rs | 80 +++++ vortex-vector/src/lib.rs | 222 ++++++++++++ vortex-vector/src/null.rs | 44 +++ vortex-vector/src/null_mut.rs | 61 ++++ vortex-vector/src/ops.rs | 67 ++++ vortex-vector/src/primitive.rs | 67 ++++ vortex-vector/src/primitive_mut.rs | 82 +++++ vortex-vector/src/pvector.rs | 493 ++++++++++++++++++++++++++ 20 files changed, 1916 insertions(+), 72 deletions(-) create mode 100644 vortex-mask/src/mask_mut.rs create mode 100644 vortex-vector/Cargo.toml create mode 100644 vortex-vector/src/bool.rs create mode 100644 vortex-vector/src/bool_mut.rs create mode 100644 vortex-vector/src/lib.rs create mode 100644 vortex-vector/src/null.rs create mode 100644 vortex-vector/src/null_mut.rs create mode 100644 vortex-vector/src/ops.rs create mode 100644 vortex-vector/src/primitive.rs create mode 100644 vortex-vector/src/primitive_mut.rs create mode 100644 vortex-vector/src/pvector.rs diff --git a/Cargo.lock b/Cargo.lock index dbe7fa8d4f3..bd54a657679 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -9124,6 +9124,7 @@ dependencies = [ "arrow-buffer", "itertools 0.14.0", "rstest", + "vortex-buffer", "vortex-error", ] @@ -9314,6 +9315,17 @@ dependencies = [ "hashbrown 0.16.0", ] +[[package]] +name = "vortex-vector" +version = "0.1.0" +dependencies = [ + "vortex-buffer", + "vortex-dtype", + "vortex-error", + "vortex-mask", + "vortex-scalar", +] + [[package]] name = "vortex-zigzag" version = "0.1.0" diff --git a/Cargo.toml b/Cargo.toml index d4e23ec5563..a8e26e97c54 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -29,6 +29,7 @@ members = [ "vortex-scalar", "vortex-tui", "vortex-utils", + "vortex-vector", "xtask", "vortex-gpu", ] diff --git a/vortex-buffer/src/bit/buf.rs b/vortex-buffer/src/bit/buf.rs index 4d18ccb2b3e..e481f10c718 100644 --- a/vortex-buffer/src/bit/buf.rs +++ b/vortex-buffer/src/bit/buf.rs @@ -277,6 +277,14 @@ impl BitBuffer { self.buffer.slice(word_start..word_end) } + /// Attempt to convert this `BitBuffer` into a mutable version. + pub fn try_into_mut(self) -> Result { + match self.buffer.try_into_mut() { + Ok(buffer) => Ok(BitBufferMut::from_buffer(buffer, self.offset, self.len)), + Err(buffer) => Err(BitBuffer::new_with_offset(buffer, self.len, self.offset)), + } + } + /// Get a mutable version of this `BitBuffer` along with bit offset in the first byte. /// /// If the caller doesn't hold only reference to the underlying buffer, a copy is created. diff --git a/vortex-buffer/src/bit/buf_mut.rs b/vortex-buffer/src/bit/buf_mut.rs index ae0d54d6222..9f83f65aa9b 100644 --- a/vortex-buffer/src/bit/buf_mut.rs +++ b/vortex-buffer/src/bit/buf_mut.rs @@ -362,6 +362,63 @@ impl BitBufferMut { self.len += bit_len; } + /// Splits the bit buffer into two at the given index. + /// + /// Afterward, self contains elements `[0, at)`, and the returned buffer contains elements + /// `[at, capacity)`. + /// + /// Unlike bytes, if the split position is not on a byte-boundary this operation will copy + /// data into the result type, and mutate self. + pub fn split_off(&mut self, at: usize) -> Self { + assert!(at <= self.len, "index {at} exceeds len {}", self.len); + + let new_offset = self.offset; + let new_len = self.len - at; + + // If we are splitting on a byte boundary, we can just slice the buffer + if (self.offset + at) % 8 == 0 { + let byte_pos = (self.offset + at) / 8; + let new_buffer = self.buffer.split_off(byte_pos); + self.len = at; + return Self { + buffer: new_buffer, + offset: new_offset, + len: new_len, + }; + } + + // Otherwise, we need to copy bits into a new buffer + let mut new_buffer = BitBufferMut::with_capacity(new_len); + for i in 0..new_len { + let value = self.value(at + i); + new_buffer.append(value); + } + + // Truncate self to the split position + self.truncate(at); + + new_buffer + } + + /// Absorbs a mutable buffer that was previously split off. + /// + /// If the two buffers were previously contiguous and not mutated in a way that causes + /// re-allocation i.e., if other was created by calling split_off on this buffer, then this is + /// an O(1) operation that just decreases a reference count and sets a few indices. + /// + /// Otherwise, this method degenerates to self.append_buffer(&other). + pub fn unsplit(&mut self, other: Self) { + if (self.offset + self.len) % 8 == 0 && other.offset == 0 { + // We are aligned and can just append the buffers + self.buffer.unsplit(other.buffer); + self.len += other.len; + return; + } + + // Otherwise, we need to append the bits one by one + self.append_buffer(&other.freeze()) + } + /// Freeze the buffer in its current state into an immutable `BoolBuffer`. pub fn freeze(self) -> BitBuffer { BitBuffer::new_with_offset(self.buffer.freeze(), self.len, self.offset) diff --git a/vortex-buffer/src/buffer.rs b/vortex-buffer/src/buffer.rs index 4c5752d70d3..faa48ed9cad 100644 --- a/vortex-buffer/src/buffer.rs +++ b/vortex-buffer/src/buffer.rs @@ -447,66 +447,6 @@ impl Buffer { vortex_panic!("Buffer is not aligned to requested alignment {}", alignment) } } - - /// Align the buffer to alignment of U - pub fn align_to(mut self) -> (Buffer, Buffer, Buffer) { - let offset = self.as_ptr().align_offset(align_of::()); - if offset > self.len() { - ( - self, - Buffer::empty_aligned(Alignment::of::()), - Buffer::empty_aligned(Alignment::of::()), - ) - } else { - let left = self.bytes.split_to(offset); - self.length -= offset; - let (us_len, _) = self.align_to_offsets::(); - let trailer = self.bytes.split_off(us_len * size_of::()); - ( - Buffer::from_bytes_aligned(left, Alignment::of::()), - Buffer::from_bytes_aligned(self.bytes, Alignment::of::()), - Buffer::from_bytes_aligned(trailer, Alignment::of::()), - ) - } - } - - /// Adapted from standard library slice::align_to_offsets - /// Function to calculate lengths of the middle and trailing slice for `align_to`. - fn align_to_offsets(&self) -> (usize, usize) { - // What we're going to do about `rest` is figure out what multiple of `U`s we can put in the - // lowest number of `T`s. And how many `T`s we need for each such "multiple". - // - // Consider for example T=u8 U=u16. Then we can put 1 U in 2 Ts. Simple. Now, consider - // for example a case where size_of:: = 16, size_of:: = 24. We can put 2 Us in - // place of every 3 Ts in the `rest` slice. A bit more complicated. - // - // Formula to calculate this is: - // - // Us = lcm(size_of::, size_of::) / size_of:: - // Ts = lcm(size_of::, size_of::) / size_of:: - // - // Expanded and simplified: - // - // Us = size_of:: / gcd(size_of::, size_of::) - // Ts = size_of:: / gcd(size_of::, size_of::) - // - // Luckily since all this is constant-evaluated... performance here matters not! - const fn gcd(a: usize, b: usize) -> usize { - if b == 0 { a } else { gcd(b, a % b) } - } - - // Explicitly wrap the function call in a const block so it gets - // constant-evaluated even in debug mode. - let gcd: usize = const { gcd(size_of::(), size_of::()) }; - let ts: usize = size_of::() / gcd; - let us: usize = size_of::() / gcd; - - // Armed with this knowledge, we can find how many `U`s we can fit! - let us_len = self.len() / ts * us; - // And how many `T`s will be in the trailing slice! - let ts_len = self.len() % ts; - (us_len, ts_len) - } } /// An iterator over Buffer elements. diff --git a/vortex-buffer/src/buffer_mut.rs b/vortex-buffer/src/buffer_mut.rs index 6b782817666..c28f7b7150f 100644 --- a/vortex-buffer/src/buffer_mut.rs +++ b/vortex-buffer/src/buffer_mut.rs @@ -328,6 +328,60 @@ impl BufferMut { self.length += slice.len(); } + /// Splits the buffer into two at the given index. + /// + /// Afterward, self contains elements `[0, at)`, and the returned buffer contains elements + /// `[at, capacity)`. It’s guaranteed that the memory does not move, that is, the address of + /// self does not change, and the address of the returned slice is at bytes after that. + /// + /// This is an O(1) operation that just increases the reference count and sets a few indices. + /// + /// Panics if either half would have a length that is not a multiple of the alignment. + pub fn split_off(&mut self, at: usize) -> Self { + if at > self.len() { + vortex_panic!("Cannot split buffer of length {} at {}", self.len(), at); + } + + let bytes_at = at * size_of::(); + if !bytes_at.is_multiple_of(*self.alignment) { + vortex_panic!( + "Cannot split buffer at {}, resulting alignment is not {}", + at, + self.alignment + ); + } + + let new_bytes = self.bytes.split_off(bytes_at); + let new_length = self.length - at; + self.length = at; + + BufferMut { + bytes: new_bytes, + length: new_length, + alignment: self.alignment, + _marker: Default::default(), + } + } + + /// Absorbs a mutable buffer that was previously split off. + /// + /// If the two buffers were previously contiguous and not mutated in a way that causes + /// re-allocation i.e., if other was created by calling split_off on this buffer, then this is + /// an O(1) operation that just decreases a reference count and sets a few indices. + /// + /// Otherwise, this method degenerates to self.extend_from_slice(other.as_ref()). + pub fn unsplit(&mut self, other: Self) { + if self.alignment != other.alignment { + vortex_panic!( + "Cannot unsplit buffers with different alignments: {} and {}", + self.alignment, + other.alignment + ); + } + self.bytes.unsplit(other.bytes); + self.length += other.length; + } + /// Freeze the `BufferMut` into a `Buffer`. pub fn freeze(self) -> Buffer { Buffer { diff --git a/vortex-dtype/src/ptype.rs b/vortex-dtype/src/ptype.rs index b5ea5ad32d7..5af9bfee515 100644 --- a/vortex-dtype/src/ptype.rs +++ b/vortex-dtype/src/ptype.rs @@ -143,15 +143,18 @@ pub trait NativePType: fn is_eq(self, other: Self) -> bool; /// Downcast the provided object to a type-specific instance. - fn downcast(visitor: &V) -> V::Output; + fn downcast(visitor: &V) -> V::Output; /// Downcast the provided object to a type-specific instance. - fn downcast_mut(visitor: &mut V) -> V::Output; + fn downcast_mut(visitor: &mut V) -> V::Output; + + /// Upcast a type-specific instance to a generic instance. + fn upcast(input: V::Input) -> V; } /// A visitor trait for converting a `NativePType` to another parameterized type. #[allow(missing_docs)] // Kind of obvious. -pub trait PTypeVisitor { +pub trait PTypeDowncast { type Output; fn as_u8(&self) -> Self::Output; @@ -167,19 +170,19 @@ pub trait PTypeVisitor { fn as_f64(&self) -> Self::Output; } -/// Extension trait to provide generic downcasting for [`PTypeVisitor`]. -pub trait PTypeVisitorExt: PTypeVisitor { +/// Extension trait to provide generic downcasting for [`PTypeDowncast`]. +pub trait PTypeDowncastExt: PTypeDowncast { /// Downcast the object to a specific primitive type. fn as_primitive(&self) -> Self::Output { T::downcast(self) } } -impl PTypeVisitorExt for T {} +impl PTypeDowncastExt for T {} /// A visitor trait for converting a `NativePType` to another mutable parameterized type. #[allow(missing_docs)] // Kind of obvious.. -pub trait PTypeVisitorMut { +pub trait PTypeDowncastMut { type Output; fn as_u8(&mut self) -> Self::Output; @@ -195,8 +198,8 @@ pub trait PTypeVisitorMut { fn as_f64(&mut self) -> Self::Output; } -/// Extension trait to provide generic downcasting for [`PTypeVisitorMut`]. -pub trait PTypeVisitorMutExt: PTypeVisitorMut { +/// Extension trait to provide generic downcasting for [`PTypeDowncastMut`]. +pub trait PTypeDowncastMutExt: PTypeDowncastMut { /// Downcast the object to a specific primitive type. fn as_primitive_mut(&mut self) -> Self::Output { T::downcast_mut(self) @@ -206,17 +209,40 @@ pub trait PTypeVisitorMutExt: PTypeVisitorMut { macro_rules! impl_ptype_downcast { ($T:ty) => { #[inline] - fn downcast(visitor: &V) -> V::Output { + fn downcast(visitor: &V) -> V::Output { paste::paste! { visitor.[]() } } #[inline] - fn downcast_mut(visitor: &mut V) -> V::Output { + fn downcast_mut(visitor: &mut V) -> V::Output { paste::paste! { visitor.[]() } } + + #[inline] + fn upcast(input: V::Input) -> V { + paste::paste! { V::[](input) } + } }; } +/// A visitor trait for converting a generic `NativePType` into a non-parameterized type. +#[allow(missing_docs)] // Kind of obvious. +pub trait PTypeUpcast { + type Input; + + fn from_u8(input: Self::Input) -> Self; + fn from_u16(input: Self::Input) -> Self; + fn from_u32(input: Self::Input) -> Self; + fn from_u64(input: Self::Input) -> Self; + fn from_i8(input: Self::Input) -> Self; + fn from_i16(input: Self::Input) -> Self; + fn from_i32(input: Self::Input) -> Self; + fn from_i64(input: Self::Input) -> Self; + fn from_f16(input: Self::Input) -> Self; + fn from_f32(input: Self::Input) -> Self; + fn from_f64(input: Self::Input) -> Self; +} + macro_rules! native_ptype { ($T:ty, $ptype:tt) => { impl crate::NativeDType for $T { @@ -253,7 +279,7 @@ macro_rules! native_ptype { }; } -impl PTypeVisitorMutExt for T {} +impl PTypeDowncastMutExt for T {} macro_rules! native_float_ptype { ($T:ty, $ptype:tt) => { diff --git a/vortex-mask/Cargo.toml b/vortex-mask/Cargo.toml index 40894d71165..ccf43593c91 100644 --- a/vortex-mask/Cargo.toml +++ b/vortex-mask/Cargo.toml @@ -16,6 +16,7 @@ version = { workspace = true } [dependencies] arrow-buffer = { workspace = true } itertools = { workspace = true } +vortex-buffer = { workspace = true, features = ["arrow"] } vortex-error = { workspace = true } [dev-dependencies] diff --git a/vortex-mask/src/lib.rs b/vortex-mask/src/lib.rs index 24a3f6168b3..d68f8d2f8bc 100644 --- a/vortex-mask/src/lib.rs +++ b/vortex-mask/src/lib.rs @@ -3,10 +3,12 @@ //! A mask is a set of sorted unique positive integers. #![deny(missing_docs)] + mod bitops; mod eq; mod intersect_by_rank; mod iter_bools; +mod mask_mut; #[cfg(test)] mod tests; @@ -18,6 +20,7 @@ use std::sync::{Arc, OnceLock}; use arrow_buffer::{BooleanBuffer, BooleanBufferBuilder, NullBuffer}; use itertools::Itertools; +pub use mask_mut::*; use vortex_error::{VortexResult, vortex_panic}; /// Represents a set of values that are all included, all excluded, or some mixture of both. diff --git a/vortex-mask/src/mask_mut.rs b/vortex-mask/src/mask_mut.rs new file mode 100644 index 00000000000..a6b28b0de37 --- /dev/null +++ b/vortex-mask/src/mask_mut.rs @@ -0,0 +1,532 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use std::ops::Sub; + +use vortex_buffer::{BitBuffer, BitBufferMut}; + +use crate::Mask; + +/// A mutable mask, used for lazily allocating the bit buffer as required. +pub struct MaskMut(Inner); + +enum Inner { + /// Initially, the mask is empty but may have some capacity. + Empty { capacity: usize }, + /// When the first value is pushed, the mask becomes constant. + Constant { + value: bool, + len: usize, + capacity: usize, + }, + /// When the first non-constant value is written, we allocate the bit buffer and switch + /// into the builder state. + Builder(BitBufferMut), +} + +impl MaskMut { + /// Creates a new empty mask with the default capacity. + pub fn with_capacity(capacity: usize) -> Self { + Self(Inner::Empty { capacity }) + } + + /// Creates a new mask with all values set to `true`. + pub fn new_true(len: usize) -> Self { + Self(Inner::Constant { + value: true, + len, + capacity: len, + }) + } + + /// Creates a new mask with all values set to `false`. + pub fn new_false(len: usize) -> Self { + Self(Inner::Constant { + value: false, + len, + capacity: len, + }) + } + + /// Reserve capacity for at least `additional` more values to be appended. + pub fn reserve(&mut self, additional: usize) { + match &mut self.0 { + Inner::Empty { capacity } => { + *capacity += additional; + } + Inner::Constant { capacity, .. } => { + *capacity += additional; + } + Inner::Builder(bits) => { + bits.reserve(additional); + } + } + } + + /// Append n values to the mask. + pub fn append_n(&mut self, new_value: bool, n: usize) { + match &mut self.0 { + Inner::Empty { capacity } => { + self.0 = Inner::Constant { + value: new_value, + len: n, + capacity: (*capacity).max(n), + } + } + Inner::Constant { + value, + len, + capacity, + } => { + if *value == new_value { + // Same value, just increase length. + self.0 = Inner::Constant { + value: *value, + len: *len + n, + capacity: (*capacity).max(*len + n), + } + } else { + // Different value, need to allocate the bit buffer. + // Note: materialize() already appends the existing constant values + let bits = self.materialize(); + bits.append_n(new_value, n); + } + } + Inner::Builder(bits) => { + bits.append_n(new_value, n); + } + } + } + + /// Append a [`Mask`] to this mutable mask. + pub fn append_mask(&mut self, other: &Mask) { + match other { + Mask::AllTrue(len) => self.append_n(true, *len), + Mask::AllFalse(len) => self.append_n(false, *len), + Mask::Values(values) => { + let bitbuffer = BitBuffer::from(values.buffer.clone()); + self.materialize().append_buffer(&bitbuffer); + } + } + } + + /// Ensures that the internal bit buffer is allocated and returns a mutable reference to it. + fn materialize(&mut self) -> &mut BitBufferMut { + let needs_materialization = !matches!(self.0, Inner::Builder(_)); + + if needs_materialization { + let new_builder = match &self.0 { + Inner::Empty { capacity } => BitBufferMut::with_capacity(*capacity), + Inner::Constant { + value, + len, + capacity, + } => { + let required_capacity = (*capacity).max(*len); + let mut bits = BitBufferMut::with_capacity(required_capacity); + bits.append_n(*value, *len); + bits + } + Inner::Builder(_) => unreachable!(), + }; + self.0 = Inner::Builder(new_builder); + } + + match &mut self.0 { + Inner::Builder(bits) => bits, + _ => unreachable!(), + } + } + + /// Split-off the mask at the given index, returning a new mask with the + /// values from `at` to the end, and leaving `self` with the values from + /// the start to `at`. + pub fn split_off(&mut self, at: usize) -> Self { + assert!(at <= self.len(), "split_off index out of bounds"); + match &mut self.0 { + Inner::Empty { capacity } => { + let new_capacity = (*capacity).saturating_sub(at); + Self(Inner::Empty { + capacity: new_capacity, + }) + } + Inner::Constant { + value, + len, + capacity, + } => { + let new_len = len.sub(at); + *len = at; + let new_capacity = (*capacity).saturating_sub(at); + Self(Inner::Constant { + value: *value, + len: new_len, + capacity: new_capacity, + }) + } + Inner::Builder(bits) => { + let new_bits = bits.split_off(at); + Self(Inner::Builder(new_bits)) + } + } + } + + /// Absorb another mask into this one, appending its values. + pub fn unsplit(&mut self, other: Self) { + match other.0 { + Inner::Empty { .. } => { + // No work to do + } + Inner::Constant { value, len, .. } => { + self.append_n(value, len); + } + Inner::Builder(bits) => { + self.materialize().unsplit(bits); + } + } + } + + /// Freezes the mutable mask into an immutable one. + pub fn freeze(self) -> Mask { + match self.0 { + Inner::Empty { .. } => Mask::new_true(0), + Inner::Constant { value, len, .. } => { + if value { + Mask::new_true(len) + } else { + Mask::new_false(len) + } + } + Inner::Builder(bits) => Mask::from_buffer(bits.freeze().into()), + } + } + + /// Returns the logical length of the mask. + pub fn len(&self) -> usize { + match &self.0 { + Inner::Empty { .. } => 0, + Inner::Constant { len, .. } => *len, + Inner::Builder(bits) => bits.len(), + } + } + + /// Returns true if the mask is empty. + pub fn is_empty(&self) -> bool { + self.len() == 0 + } +} + +impl Mask { + /// Attempts to convert an immutable mask into a mutable one. + pub fn try_into_mut(self) -> Result { + match self { + Mask::AllTrue(len) => Ok(MaskMut::new_true(len)), + Mask::AllFalse(len) => Ok(MaskMut::new_false(len)), + Mask::Values(values) => { + // FIXME(ngates): we can never convert Arrow BooleanBuffer to ByteBufferMut, + // so we have to wait until we use BitBuffer internally in MaskValues. + Err(Mask::Values(values)) + } + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_split_off_empty() { + let mut mask = MaskMut::with_capacity(10); + assert_eq!(mask.len(), 0); + + let other = mask.split_off(0); + assert_eq!(mask.len(), 0); + assert_eq!(other.len(), 0); + } + + #[test] + fn test_split_off_constant_true_at_zero() { + let mut mask = MaskMut::new_true(10); + let other = mask.split_off(0); + + assert_eq!(mask.len(), 0); + assert_eq!(other.len(), 10); + + let frozen = other.freeze(); + assert_eq!(frozen.true_count(), 10); + } + + #[test] + fn test_split_off_constant_true_at_end() { + let mut mask = MaskMut::new_true(10); + let other = mask.split_off(10); + + assert_eq!(mask.len(), 10); + assert_eq!(other.len(), 0); + + let frozen = mask.freeze(); + assert_eq!(frozen.true_count(), 10); + } + + #[test] + fn test_split_off_constant_true_in_middle() { + let mut mask = MaskMut::new_true(10); + let other = mask.split_off(6); + + assert_eq!(mask.len(), 6); + assert_eq!(other.len(), 4); + + let frozen_first = mask.freeze(); + assert_eq!(frozen_first.true_count(), 6); + + let frozen_second = other.freeze(); + assert_eq!(frozen_second.true_count(), 4); + } + + #[test] + fn test_split_off_constant_false() { + let mut mask = MaskMut::new_false(20); + let other = mask.split_off(12); + + assert_eq!(mask.len(), 12); + assert_eq!(other.len(), 8); + + let frozen_first = mask.freeze(); + assert_eq!(frozen_first.true_count(), 0); + + let frozen_second = other.freeze(); + assert_eq!(frozen_second.true_count(), 0); + } + + // Note: Tests using BitBuffer operations are marked as ignored under miri + // because bitvec uses raw pointer operations that miri cannot verify. + #[test] + fn test_split_off_builder_at_byte_boundary() { + let mut mask = MaskMut::with_capacity(16); + // Create a pattern: 8 true, 8 false + mask.append_n(true, 8); + mask.append_n(false, 8); + + let mask_ptr = match &mask.0 { + Inner::Builder(bits) => bits.as_slice().as_ptr(), + _ => unreachable!(), + }; + + let other = mask.split_off(8); + + assert_eq!(mask.len(), 8); + assert_eq!(other.len(), 8); + + // Ensure the unsplit was zero-copy. + mask.unsplit(other); + let new_mask_ptr = match &mask.0 { + Inner::Builder(bits) => bits.as_slice().as_ptr(), + _ => unreachable!(), + }; + assert_eq!(mask_ptr, new_mask_ptr); + } + + #[test] + fn test_split_off_builder_not_byte_aligned() { + let mut mask = MaskMut::with_capacity(20); + // Create a pattern: 10 true, 10 false + mask.append_n(true, 10); + mask.append_n(false, 10); + + let other = mask.split_off(10); + + assert_eq!(mask.len(), 10); + assert_eq!(other.len(), 10); + + let frozen_first = mask.freeze(); + assert_eq!(frozen_first.true_count(), 10); + + let frozen_second = other.freeze(); + assert_eq!(frozen_second.true_count(), 0); + } + + #[test] + fn test_split_off_builder_mixed_pattern() { + let mut mask = MaskMut::with_capacity(15); + // Create pattern: TFTFTFTFTFTFTFT (alternating) + for i in 0..15 { + mask.append_n(i % 2 == 0, 1); + } + + let other = mask.split_off(7); + + assert_eq!(mask.len(), 7); + assert_eq!(other.len(), 8); + + let frozen_first = mask.freeze(); + assert_eq!(frozen_first.true_count(), 4); // positions 0,2,4,6 + + let frozen_second = other.freeze(); + assert_eq!(frozen_second.true_count(), 4); // positions 7,9,11,13 => 0,2,4,6 in split + } + + #[test] + fn test_unsplit_empty_with_empty() { + let mut mask = MaskMut::with_capacity(10); + let other = MaskMut::with_capacity(10); + + mask.unsplit(other); + assert_eq!(mask.len(), 0); + } + + #[test] + fn test_unsplit_empty_with_constant() { + let mut mask = MaskMut::with_capacity(10); + let other = MaskMut::new_true(5); + + mask.unsplit(other); + assert_eq!(mask.len(), 5); + + let frozen = mask.freeze(); + assert_eq!(frozen.true_count(), 5); + } + + #[test] + fn test_unsplit_constant_with_constant_same() { + let mut mask = MaskMut::new_true(5); + let other = MaskMut::new_true(5); + + mask.unsplit(other); + assert_eq!(mask.len(), 10); + + let frozen = mask.freeze(); + assert_eq!(frozen.true_count(), 10); + } + + #[test] + fn test_unsplit_constant_with_constant_different() { + let mut mask = MaskMut::new_true(5); + let other = MaskMut::new_false(5); + + mask.unsplit(other); + assert_eq!(mask.len(), 10); + + let frozen = mask.freeze(); + assert_eq!(frozen.true_count(), 5); + } + + #[test] + fn test_unsplit_constant_with_builder() { + let mut mask = MaskMut::new_true(5); + + let mut other = MaskMut::with_capacity(10); + other.append_n(true, 3); + other.append_n(false, 2); + + mask.unsplit(other); + assert_eq!(mask.len(), 10); + + let frozen = mask.freeze(); + assert_eq!(frozen.true_count(), 8); // 5 from first + 3 from second + } + + #[test] + fn test_unsplit_builder_with_constant() { + let mut mask = MaskMut::with_capacity(10); + mask.append_n(true, 3); + mask.append_n(false, 2); + + let other = MaskMut::new_true(5); + + mask.unsplit(other); + assert_eq!(mask.len(), 10); + + let frozen = mask.freeze(); + assert_eq!(frozen.true_count(), 8); // 3 from first + 5 from second + } + + #[test] + fn test_unsplit_builder_with_builder() { + let mut mask = MaskMut::with_capacity(10); + mask.append_n(true, 3); + mask.append_n(false, 2); + + let mut other = MaskMut::with_capacity(10); + other.append_n(false, 3); + other.append_n(true, 2); + + mask.unsplit(other); + assert_eq!(mask.len(), 10); + + let frozen = mask.freeze(); + assert_eq!(frozen.true_count(), 5); // 3 from first + 2 from second + } + + #[test] + // TODO(ngates): when mask uses BitBuffer internally, into_mut should succeed + #[should_panic] + fn test_round_trip_split_unsplit() { + let mut original = MaskMut::with_capacity(20); + // Pattern: 10 true, 10 false + original.append_n(true, 10); + original.append_n(false, 10); + + let original_frozen = original.freeze(); + let original_true_count = original_frozen.true_count(); + + // Convert back to mutable for split + let mut mask = original_frozen.try_into_mut().unwrap(); + + // Split at 10 + let other = mask.split_off(10); + + // Unsplit back together + mask.unsplit(other); + + assert_eq!(mask.len(), 20); + let frozen = mask.freeze(); + assert_eq!(frozen.true_count(), original_true_count); + } + + #[test] + #[should_panic(expected = "split_off index out of bounds")] + fn test_split_off_out_of_bounds() { + let mut mask = MaskMut::new_true(10); + let _ = mask.split_off(11); + } + + #[test] + fn test_split_off_builder_at_bit_1() { + let mut mask = MaskMut::with_capacity(16); + mask.append_n(true, 16); + + let other = mask.split_off(1); + + assert_eq!(mask.len(), 1); + assert_eq!(other.len(), 15); + + let frozen_first = mask.freeze(); + assert_eq!(frozen_first.true_count(), 1); + + let frozen_second = other.freeze(); + assert_eq!(frozen_second.true_count(), 15); + } + + #[test] + fn test_multiple_split_unsplit() { + let mut mask = MaskMut::new_true(30); + + // Split into 3 parts + let third = mask.split_off(20); // 20-30 + let second = mask.split_off(10); // 10-20 + // first is 0-10 + + assert_eq!(mask.len(), 10); + assert_eq!(second.len(), 10); + assert_eq!(third.len(), 10); + + // Recombine in order + mask.unsplit(second); + mask.unsplit(third); + + assert_eq!(mask.len(), 30); + let frozen = mask.freeze(); + assert_eq!(frozen.true_count(), 30); + } +} diff --git a/vortex-vector/Cargo.toml b/vortex-vector/Cargo.toml new file mode 100644 index 00000000000..353f2ae1d0b --- /dev/null +++ b/vortex-vector/Cargo.toml @@ -0,0 +1,27 @@ +[package] +name = "vortex-vector" +authors = { workspace = true } +categories = { workspace = true } +description = "Vortex in-memory canonical data format" +edition = { workspace = true } +homepage = { workspace = true } +include = { workspace = true } +keywords = { workspace = true } +license = { workspace = true } +readme = { workspace = true } +repository = { workspace = true } +rust-version = { workspace = true } +version = { workspace = true } + +[package.metadata.docs.rs] +all-features = true + +[lints] +workspace = true + +[dependencies] +vortex-buffer = { workspace = true } +vortex-dtype = { workspace = true } +vortex-error = { workspace = true } +vortex-mask = { workspace = true } +vortex-scalar = { workspace = true } diff --git a/vortex-vector/src/bool.rs b/vortex-vector/src/bool.rs new file mode 100644 index 00000000000..a46d25a9e56 --- /dev/null +++ b/vortex-vector/src/bool.rs @@ -0,0 +1,67 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use vortex_buffer::BitBuffer; +use vortex_dtype::DType; +use vortex_mask::Mask; + +use crate::ops::VectorOps; +use crate::{BoolVectorMut, Vector}; + +/// An immutable vector of boolean values. +pub struct BoolVector { + pub(super) dtype: DType, + pub(super) bits: BitBuffer, + pub(super) validity: Mask, +} + +impl From for Vector { + fn from(v: BoolVector) -> Self { + Self::Bool(v) + } +} + +impl VectorOps for BoolVector { + type Mutable = BoolVectorMut; + + fn len(&self) -> usize { + self.bits.len() + } + + fn dtype(&self) -> &DType { + &self.dtype + } + + fn try_into_mut(self) -> Result + where + Self: Sized, + { + let bits = match self.bits.try_into_mut() { + Ok(bits) => bits, + Err(bits) => { + return Err(BoolVector { + dtype: self.dtype, + bits, + validity: self.validity, + }); + } + }; + + let validity = match self.validity.try_into_mut() { + Ok(validity) => validity, + Err(validity) => { + return Err(BoolVector { + dtype: self.dtype, + bits: bits.freeze(), + validity, + }); + } + }; + + Ok(BoolVectorMut { + dtype: self.dtype, + bits, + validity, + }) + } +} diff --git a/vortex-vector/src/bool_mut.rs b/vortex-vector/src/bool_mut.rs new file mode 100644 index 00000000000..627b5394bff --- /dev/null +++ b/vortex-vector/src/bool_mut.rs @@ -0,0 +1,80 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use vortex_buffer::BitBufferMut; +use vortex_dtype::{DType, Nullability}; +use vortex_mask::MaskMut; + +use crate::BoolVector; +use crate::ops::VectorMutOps; + +/// A mutable vector of boolean values. +pub struct BoolVectorMut { + pub(super) dtype: DType, + pub(super) bits: BitBufferMut, + pub(super) validity: MaskMut, +} + +impl BoolVectorMut { + /// Create a new mutable boolean vector with the given capacity and nullability. + pub fn with_capacity(capacity: usize, nullability: Nullability) -> Self { + Self { + dtype: DType::Bool(nullability), + bits: BitBufferMut::with_capacity(capacity), + validity: MaskMut::with_capacity(capacity), + } + } +} + +impl From for crate::VectorMut { + fn from(v: BoolVectorMut) -> Self { + Self::Bool(v) + } +} + +impl VectorMutOps for BoolVectorMut { + type Immutable = BoolVector; + + fn len(&self) -> usize { + self.bits.len() + } + + fn dtype(&self) -> &DType { + &self.dtype + } + + fn capacity(&self) -> usize { + self.bits.capacity() + } + + fn reserve(&mut self, additional: usize) { + self.bits.reserve(additional); + self.validity.reserve(additional); + } + + fn split_off(&mut self, at: usize) -> Self { + BoolVectorMut { + dtype: self.dtype.clone(), + bits: self.bits.split_off(at), + validity: self.validity.split_off(at), + } + } + + fn unsplit(&mut self, other: Self) { + self.bits.unsplit(other.bits); + self.validity.unsplit(other.validity); + } + + fn extend_from_vector(&mut self, other: &BoolVector) { + self.bits.append_buffer(&other.bits); + self.validity.append_mask(&other.validity); + } + + fn freeze(self) -> Self::Immutable { + BoolVector { + dtype: self.dtype, + bits: self.bits.freeze(), + validity: self.validity.freeze(), + } + } +} diff --git a/vortex-vector/src/lib.rs b/vortex-vector/src/lib.rs new file mode 100644 index 00000000000..7ad2912a0e7 --- /dev/null +++ b/vortex-vector/src/lib.rs @@ -0,0 +1,222 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Vector types for Vortex. + +#![deny(missing_docs)] + +use vortex_dtype::DType; + +use crate::ops::{VectorMutOps, VectorOps}; + +mod bool; +mod bool_mut; +mod null; +mod null_mut; +mod ops; +mod primitive; +mod primitive_mut; +mod pvector; + +pub use bool::*; +pub use bool_mut::*; +pub use null::*; +pub use null_mut::*; +pub use primitive::*; +pub use primitive_mut::*; +pub use pvector::*; +use vortex_error::vortex_panic; + +/// An enum over all vector types. +pub enum Vector { + /// Null + Null(NullVector), + /// Bool + Bool(BoolVector), + /// Primitive + Primitive(PVector), + // Decimal + // Decimal(DecimalVector), + // String + // String(StringVector), + // Binary + // Binary(BinaryVector), + // List + // List(ListVector), + // FixedList + // FixedList(FixedListVector), + // Struct + // Struct(StructVector), + // Extension + // Extension(ExtensionVector), +} + +/// An enum over all mutable vector types. +pub enum VectorMut { + /// Null + Null(NullVectorMut), + /// Bool + Bool(BoolVectorMut), + /// Primitive + Primitive(PVectorMut), +} + +impl VectorMut { + /// Create a new mutable vector with the given capacity and dtype. + pub fn with_capacity(capacity: usize, dtype: &DType) -> Self { + use vortex_dtype::NativePType; + match dtype { + DType::Null => NullVectorMut::new(0).into(), + DType::Bool(n) => BoolVectorMut::with_capacity(capacity, *n).into(), + DType::Primitive(ptype, nullability) => { + PVectorMut::with_capacity(capacity, *ptype, *nullability).into() + } + _ => vortex_panic!("Unsupported dtype for VectorMut"), + } + } +} + +macro_rules! match_each_vector { + ($self:expr, | $vec:ident | $body:block) => {{ + match $self { + Vector::Null(v) => { + let $vec = v; + $body + } + Vector::Bool(v) => { + let $vec = v; + $body + } + Vector::Primitive(v) => { + let $vec = v; + $body + } + } + }}; +} + +macro_rules! match_each_vector_mut { + ($self:expr, | $vec:ident | $body:block) => {{ + match $self { + VectorMut::Null(v) => { + let $vec = v; + $body + } + VectorMut::Bool(v) => { + let $vec = v; + $body + } + VectorMut::Primitive(v) => { + let $vec = v; + $body + } + } + }}; +} + +macro_rules! match_each_vector_mut_pair { + ($self:expr, $other:expr, | $vec:ident, $vec_other:ident | $body:block) => {{ + match ($self, $other) { + (VectorMut::Null(a), VectorMut::Null(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + (VectorMut::Bool(a), VectorMut::Bool(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + (VectorMut::Primitive(a), VectorMut::Primitive(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + _ => vortex_panic!("Mismatched vector types"), + } + }}; +} + +macro_rules! match_each_vector_mut_immut_pair { + ($self:expr, $other:expr, | $vec:ident, $vec_other:ident | $body:block) => {{ + match ($self, $other) { + (VectorMut::Null(a), Vector::Null(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + (VectorMut::Bool(a), Vector::Bool(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + (VectorMut::Primitive(a), Vector::Primitive(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + _ => vortex_panic!("Mismatched vector types"), + } + }}; +} + +impl VectorOps for Vector { + type Mutable = VectorMut; + + fn len(&self) -> usize { + match_each_vector!(self, |v| { v.len() }) + } + + fn dtype(&self) -> &DType { + match_each_vector!(self, |v| { v.dtype() }) + } + + fn try_into_mut(self) -> Result + where + Self: Sized, + { + match_each_vector!(self, |v| { + v.try_into_mut().map(VectorMut::from).map_err(Vector::from) + }) + } +} + +impl VectorMutOps for VectorMut { + type Immutable = Vector; + + fn len(&self) -> usize { + match_each_vector_mut!(self, |v| { v.len() }) + } + + fn dtype(&self) -> &DType { + match_each_vector_mut!(self, |v| { v.dtype() }) + } + + fn capacity(&self) -> usize { + match_each_vector_mut!(self, |v| { v.capacity() }) + } + + fn reserve(&mut self, additional: usize) { + match_each_vector_mut!(self, |v| { v.reserve(additional) }) + } + + fn split_off(&mut self, at: usize) -> Self { + match_each_vector_mut!(self, |v| { v.split_off(at).into() }) + } + + fn unsplit(&mut self, other: Self) { + match_each_vector_mut_pair!(self, other, |a, b| { + a.unsplit(b); + }); + } + + fn extend_from_vector(&mut self, other: &Self::Immutable) { + match_each_vector_mut_immut_pair!(self, other, |a, b| { + a.extend_from_vector(b); + }); + } + + fn freeze(self) -> Self::Immutable { + match_each_vector_mut!(self, |v| { v.freeze().into() }) + } +} diff --git a/vortex-vector/src/null.rs b/vortex-vector/src/null.rs new file mode 100644 index 00000000000..b864aaabd00 --- /dev/null +++ b/vortex-vector/src/null.rs @@ -0,0 +1,44 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use vortex_dtype::DType; + +use crate::ops::VectorOps; +use crate::{NullVectorMut, Vector}; + +/// An immutable vector of null values. +pub struct NullVector { + pub(super) len: usize, +} + +impl NullVector { + /// Creates a new `NullVector` with the given length. + pub fn new(len: usize) -> Self { + Self { len } + } +} + +impl From for Vector { + fn from(v: NullVector) -> Self { + Self::Null(v) + } +} + +impl VectorOps for NullVector { + type Mutable = NullVectorMut; + + fn len(&self) -> usize { + self.len + } + + fn dtype(&self) -> &DType { + &DType::Null + } + + fn try_into_mut(self) -> Result + where + Self: Sized, + { + Ok(NullVectorMut::new(self.len)) + } +} diff --git a/vortex-vector/src/null_mut.rs b/vortex-vector/src/null_mut.rs new file mode 100644 index 00000000000..6066e0f234b --- /dev/null +++ b/vortex-vector/src/null_mut.rs @@ -0,0 +1,61 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use vortex_dtype::DType; + +use crate::ops::VectorMutOps; +use crate::{NullVector, VectorMut}; + +/// A mutable vector of null values. +pub struct NullVectorMut { + pub(super) len: usize, +} + +impl NullVectorMut { + /// Creates a new `NullVectorMut` with the given length. + pub fn new(len: usize) -> Self { + Self { len } + } +} + +impl From for VectorMut { + fn from(v: NullVectorMut) -> Self { + Self::Null(v) + } +} + +impl VectorMutOps for NullVectorMut { + type Immutable = NullVector; + + fn len(&self) -> usize { + self.len + } + + fn dtype(&self) -> &DType { + &DType::Null + } + + fn capacity(&self) -> usize { + usize::MAX + } + + fn reserve(&mut self, _additional: usize) {} + + fn split_off(&mut self, at: usize) -> Self { + let new_len = self.len - at; + self.len = at; + NullVectorMut { len: new_len } + } + + fn unsplit(&mut self, other: Self) { + self.len += other.len; + } + + fn extend_from_vector(&mut self, other: &Self::Immutable) { + self.len += other.len; + } + + fn freeze(self) -> Self::Immutable { + NullVector::new(self.len) + } +} diff --git a/vortex-vector/src/ops.rs b/vortex-vector/src/ops.rs new file mode 100644 index 00000000000..6ad122c7bcf --- /dev/null +++ b/vortex-vector/src/ops.rs @@ -0,0 +1,67 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use vortex_dtype::DType; + +use crate::{Vector, VectorMut}; + +/// Common operations for immutable vectors. +pub trait VectorOps: Into { + type Mutable: VectorMutOps; + + /// Returns the length of the vector. + fn len(&self) -> usize; + + /// Returns the data type of the vector. + fn dtype(&self) -> &DType; + + /// Try to convert self into a mutable vector. + // + // If self is the only unique strong reference, this will succeed and return a mutable vector + // with the contents of self without copying. If self is not unique, this will fail and return + // self. + fn try_into_mut(self) -> Result + where + Self: Sized; +} + +/// Common operations for mutable vectors. +pub trait VectorMutOps: Into { + type Immutable: VectorOps; + + /// Returns the length of the vector. + fn len(&self) -> usize; + + /// Returns the data type of the vector. + fn dtype(&self) -> &DType; + + /// Returns the capacity of the vector. + fn capacity(&self) -> usize; + + /// Reserves capacity for at least `additional` more elements to be inserted. + fn reserve(&mut self, additional: usize); + + /// Splits the vector into two at the given index. + /// + /// Afterward, self contains elements `[0, at)`, and the returned vector contains elements + /// `[at, capacity)`. It’s guaranteed that the memory does not move, that is, the address of + /// self does not change, and the address of the returned slice is at bytes after that. + /// + /// This is an O(1) operation that just increases the reference count and sets a few indices. + fn split_off(&mut self, at: usize) -> Self; + + /// Absorbs a mutable vector that was previously split off. + /// + /// If the two vectors were previously contiguous and not mutated in a way that causes + /// re-allocation i.e., if other was created by calling split_off on this vector, then this is + /// an O(1) operation that just decreases a reference count and sets a few indices. + /// + // Otherwise, this method degenerates to self.extend_from_vector(other.as_ref()). + fn unsplit(&mut self, other: Self); + + /// Extends the vector by appending elements from another vector. + fn extend_from_vector(&mut self, other: &Self::Immutable); + + /// Freeze the vector into an immutable one. + fn freeze(self) -> Self::Immutable; +} diff --git a/vortex-vector/src/primitive.rs b/vortex-vector/src/primitive.rs new file mode 100644 index 00000000000..9a39f571c55 --- /dev/null +++ b/vortex-vector/src/primitive.rs @@ -0,0 +1,67 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use vortex_buffer::Buffer; +use vortex_dtype::{DType, NativePType}; +use vortex_mask::Mask; + +use crate::ops::VectorOps; +use crate::{PVector, PrimitiveVectorMut, Vector}; + +/// An immutable vector of primitive values. +pub struct PrimitiveVector { + pub(super) dtype: DType, + pub(super) elements: Buffer, + pub(super) validity: Mask, +} + +impl From> for Vector { + fn from(v: PrimitiveVector) -> Self { + Self::Primitive(PVector::from(v)) + } +} + +impl VectorOps for PrimitiveVector { + type Mutable = PrimitiveVectorMut; + + /// Returns the length of the vector. + fn len(&self) -> usize { + self.elements.len() + } + + /// Returns the data type of the vector. + fn dtype(&self) -> &DType { + &self.dtype + } + + /// Try to convert self into a mutable vector. + fn try_into_mut(self) -> Result, Self> { + let elements = match self.elements.try_into_mut() { + Ok(elements) => elements, + Err(elements) => { + return Err(PrimitiveVector { + dtype: self.dtype, + elements, + validity: self.validity, + }); + } + }; + + let validity = match self.validity.try_into_mut() { + Ok(validity) => validity, + Err(validity) => { + return Err(PrimitiveVector { + dtype: self.dtype, + elements: elements.freeze(), + validity, + }); + } + }; + + Ok(PrimitiveVectorMut { + dtype: self.dtype, + elements, + validity, + }) + } +} diff --git a/vortex-vector/src/primitive_mut.rs b/vortex-vector/src/primitive_mut.rs new file mode 100644 index 00000000000..14ef69aeada --- /dev/null +++ b/vortex-vector/src/primitive_mut.rs @@ -0,0 +1,82 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use vortex_buffer::BufferMut; +use vortex_dtype::{DType, NativePType, Nullability}; +use vortex_mask::MaskMut; + +use crate::ops::VectorMutOps; +use crate::{PVectorMut, PrimitiveVector, VectorMut}; + +/// A mutable vector of primitive values. +pub struct PrimitiveVectorMut { + pub(super) dtype: DType, + pub(super) elements: BufferMut, + pub(super) validity: MaskMut, +} + +impl PrimitiveVectorMut { + /// Create a new mutable primitive vector with the given capacity and nullability. + pub fn with_capacity(capacity: usize, nullability: Nullability) -> Self { + Self { + dtype: DType::Primitive(T::PTYPE, nullability), + elements: BufferMut::with_capacity(capacity), + validity: MaskMut::with_capacity(capacity), + } + } +} + +impl From> for VectorMut { + fn from(val: PrimitiveVectorMut) -> Self { + VectorMut::Primitive(PVectorMut::from(val)) + } +} + +impl VectorMutOps for PrimitiveVectorMut { + type Immutable = PrimitiveVector; + + fn len(&self) -> usize { + self.elements.len() + } + + fn dtype(&self) -> &DType { + &self.dtype + } + + fn capacity(&self) -> usize { + self.elements.capacity() + } + + fn reserve(&mut self, additional: usize) { + self.elements.reserve(additional); + self.validity.reserve(additional); + } + + fn split_off(&mut self, at: usize) -> Self { + PrimitiveVectorMut { + dtype: self.dtype.clone(), + elements: self.elements.split_off(at), + validity: self.validity.split_off(at), + } + } + + fn unsplit(&mut self, other: Self) { + self.elements.unsplit(other.elements); + self.validity.unsplit(other.validity); + } + + /// Extends the vector by appending elements from another vector. + fn extend_from_vector(&mut self, other: &PrimitiveVector) { + self.elements.extend_from_slice(other.elements.as_slice()); + self.validity.append_mask(&other.validity); + } + + /// Freeze the vector into an immutable one. + fn freeze(self) -> PrimitiveVector { + PrimitiveVector { + dtype: self.dtype, + elements: self.elements.freeze(), + validity: self.validity.freeze(), + } + } +} diff --git a/vortex-vector/src/pvector.rs b/vortex-vector/src/pvector.rs new file mode 100644 index 00000000000..ca21d86e02c --- /dev/null +++ b/vortex-vector/src/pvector.rs @@ -0,0 +1,493 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use vortex_dtype::half::f16; +use vortex_dtype::{DType, NativePType, Nullability, PType, PTypeUpcast}; +use vortex_error::vortex_panic; + +use crate::ops::{VectorMutOps, VectorOps}; +use crate::{PrimitiveVector, PrimitiveVectorMut, Vector}; + +/// An enum over all primitive vector types. +pub enum PVector { + /// U8 + U8(PrimitiveVector), + /// U16 + U16(PrimitiveVector), + /// U32 + U32(PrimitiveVector), + /// U64 + U64(PrimitiveVector), + /// I8 + I8(PrimitiveVector), + /// I16 + I16(PrimitiveVector), + /// I32 + I32(PrimitiveVector), + /// I64 + I64(PrimitiveVector), + /// F16 + F16(PrimitiveVector), + /// F32 + F32(PrimitiveVector), + /// F64 + F64(PrimitiveVector), +} + +/// An enum over all mutable primitive vector types. +pub enum PVectorMut { + /// U8 + U8(PrimitiveVectorMut), + /// U16 + U16(PrimitiveVectorMut), + /// U32 + U32(PrimitiveVectorMut), + /// U64 + U64(PrimitiveVectorMut), + /// I8 + I8(PrimitiveVectorMut), + /// I16 + I16(PrimitiveVectorMut), + /// I32 + I32(PrimitiveVectorMut), + /// I64 + I64(PrimitiveVectorMut), + /// F16 + F16(PrimitiveVectorMut), + /// F32 + F32(PrimitiveVectorMut), + /// F64 + F64(PrimitiveVectorMut), +} + +impl PVectorMut { + /// Create a new mutable primitive vector with the given capacity, primitive type, and nullability. + pub fn with_capacity(capacity: usize, ptype: PType, nullability: Nullability) -> Self { + match ptype { + PType::U8 => PrimitiveVectorMut::::with_capacity(capacity, nullability).into(), + PType::U16 => PrimitiveVectorMut::::with_capacity(capacity, nullability).into(), + PType::U32 => PrimitiveVectorMut::::with_capacity(capacity, nullability).into(), + PType::U64 => PrimitiveVectorMut::::with_capacity(capacity, nullability).into(), + PType::I8 => PrimitiveVectorMut::::with_capacity(capacity, nullability).into(), + PType::I16 => PrimitiveVectorMut::::with_capacity(capacity, nullability).into(), + PType::I32 => PrimitiveVectorMut::::with_capacity(capacity, nullability).into(), + PType::I64 => PrimitiveVectorMut::::with_capacity(capacity, nullability).into(), + PType::F16 => PrimitiveVectorMut::::with_capacity(capacity, nullability).into(), + PType::F32 => PrimitiveVectorMut::::with_capacity(capacity, nullability).into(), + PType::F64 => PrimitiveVectorMut::::with_capacity(capacity, nullability).into(), + } + } +} + +macro_rules! match_each_pvector { + ($self:expr, | $vec:ident | $body:block) => {{ + match $self { + PVector::U8(v) => { + let $vec = v; + $body + } + PVector::U16(v) => { + let $vec = v; + $body + } + PVector::U32(v) => { + let $vec = v; + $body + } + PVector::U64(v) => { + let $vec = v; + $body + } + PVector::I8(v) => { + let $vec = v; + $body + } + PVector::I16(v) => { + let $vec = v; + $body + } + PVector::I32(v) => { + let $vec = v; + $body + } + PVector::I64(v) => { + let $vec = v; + $body + } + PVector::F16(v) => { + let $vec = v; + $body + } + PVector::F32(v) => { + let $vec = v; + $body + } + PVector::F64(v) => { + let $vec = v; + $body + } + } + }}; +} + +macro_rules! match_each_pvector_mut { + ($self:expr, | $vec:ident | $body:block) => {{ + match $self { + PVectorMut::U8(v) => { + let $vec = v; + $body + } + PVectorMut::U16(v) => { + let $vec = v; + $body + } + PVectorMut::U32(v) => { + let $vec = v; + $body + } + PVectorMut::U64(v) => { + let $vec = v; + $body + } + PVectorMut::I8(v) => { + let $vec = v; + $body + } + PVectorMut::I16(v) => { + let $vec = v; + $body + } + PVectorMut::I32(v) => { + let $vec = v; + $body + } + PVectorMut::I64(v) => { + let $vec = v; + $body + } + PVectorMut::F16(v) => { + let $vec = v; + $body + } + PVectorMut::F32(v) => { + let $vec = v; + $body + } + PVectorMut::F64(v) => { + let $vec = v; + $body + } + } + }}; +} + +macro_rules! match_each_pvector_mut_pair { + ($self:expr, $other:expr, | $vec:ident, $vec_other:ident | $body:block) => {{ + match ($self, $other) { + (PVectorMut::U8(a), PVectorMut::U8(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + (PVectorMut::U16(a), PVectorMut::U16(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + (PVectorMut::U32(a), PVectorMut::U32(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + (PVectorMut::U64(a), PVectorMut::U64(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + (PVectorMut::I8(a), PVectorMut::I8(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + (PVectorMut::I16(a), PVectorMut::I16(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + (PVectorMut::I32(a), PVectorMut::I32(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + (PVectorMut::I64(a), PVectorMut::I64(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + (PVectorMut::F16(a), PVectorMut::F16(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + (PVectorMut::F32(a), PVectorMut::F32(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + (PVectorMut::F64(a), PVectorMut::F64(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + _ => vortex_panic!("Mismatched primitive vector types"), + } + }}; +} + +macro_rules! match_each_pvector_mut_immut_pair { + ($self:expr, $other:expr, | $vec:ident, $vec_other:ident | $body:block) => {{ + match ($self, $other) { + (PVectorMut::U8(a), PVector::U8(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + (PVectorMut::U16(a), PVector::U16(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + (PVectorMut::U32(a), PVector::U32(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + (PVectorMut::U64(a), PVector::U64(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + (PVectorMut::I8(a), PVector::I8(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + (PVectorMut::I16(a), PVector::I16(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + (PVectorMut::I32(a), PVector::I32(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + (PVectorMut::I64(a), PVector::I64(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + (PVectorMut::F16(a), PVector::F16(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + (PVectorMut::F32(a), PVector::F32(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + (PVectorMut::F64(a), PVector::F64(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + _ => vortex_panic!("Mismatched primitive vector types"), + } + }}; +} + +impl From for Vector { + fn from(v: PVector) -> Self { + Self::Primitive(v) + } +} + +impl VectorOps for PVector { + type Mutable = PVectorMut; + + fn len(&self) -> usize { + match_each_pvector!(self, |v| { v.len() }) + } + + fn dtype(&self) -> &DType { + match_each_pvector!(self, |v| { v.dtype() }) + } + + fn try_into_mut(self) -> Result + where + Self: Sized, + { + match_each_pvector!(self, |v| { + v.try_into_mut() + .map(PVectorMut::from) + .map_err(PVector::from) + }) + } +} + +impl From for crate::VectorMut { + fn from(v: PVectorMut) -> Self { + Self::Primitive(v) + } +} + +impl VectorMutOps for PVectorMut { + type Immutable = PVector; + + fn len(&self) -> usize { + match_each_pvector_mut!(self, |v| { v.len() }) + } + + fn dtype(&self) -> &DType { + match_each_pvector_mut!(self, |v| { v.dtype() }) + } + + fn capacity(&self) -> usize { + match_each_pvector_mut!(self, |v| { v.capacity() }) + } + + fn reserve(&mut self, additional: usize) { + match_each_pvector_mut!(self, |v| { v.reserve(additional) }) + } + + fn split_off(&mut self, at: usize) -> Self { + match_each_pvector_mut!(self, |v| { v.split_off(at).into() }) + } + + fn unsplit(&mut self, other: Self) { + match_each_pvector_mut_pair!(self, other, |a, b| { + a.unsplit(b); + }); + } + + fn extend_from_vector(&mut self, other: &Self::Immutable) { + match_each_pvector_mut_immut_pair!(self, other, |a, b| { + a.extend_from_vector(b); + }); + } + + fn freeze(self) -> Self::Immutable { + match_each_pvector_mut!(self, |v| { v.freeze().into() }) + } +} + +// From impls for PVector variants +impl From> for PVector { + fn from(v: PrimitiveVector) -> Self { + T::upcast(v) + } +} + +impl PTypeUpcast for PVector { + type Input = PrimitiveVector; + + fn from_u8(input: Self::Input) -> Self { + PVector::U8(input) + } + + fn from_u16(input: Self::Input) -> Self { + PVector::U16(input) + } + + fn from_u32(input: Self::Input) -> Self { + PVector::U32(input) + } + + fn from_u64(input: Self::Input) -> Self { + PVector::U64(input) + } + + fn from_i8(input: Self::Input) -> Self { + PVector::I8(input) + } + + fn from_i16(input: Self::Input) -> Self { + PVector::I16(input) + } + + fn from_i32(input: Self::Input) -> Self { + PVector::I32(input) + } + + fn from_i64(input: Self::Input) -> Self { + PVector::I64(input) + } + + fn from_f16(input: Self::Input) -> Self { + PVector::F16(input) + } + + fn from_f32(input: Self::Input) -> Self { + PVector::F32(input) + } + + fn from_f64(input: Self::Input) -> Self { + PVector::F64(input) + } +} + +// From impls for PVectorMut variants +impl From> for PVectorMut { + fn from(v: PrimitiveVectorMut) -> Self { + T::upcast(v) + } +} + +impl PTypeUpcast for PVectorMut { + type Input = PrimitiveVectorMut; + + fn from_u8(input: Self::Input) -> Self { + PVectorMut::U8(input) + } + + fn from_u16(input: Self::Input) -> Self { + PVectorMut::U16(input) + } + + fn from_u32(input: Self::Input) -> Self { + PVectorMut::U32(input) + } + + fn from_u64(input: Self::Input) -> Self { + PVectorMut::U64(input) + } + + fn from_i8(input: Self::Input) -> Self { + PVectorMut::I8(input) + } + + fn from_i16(input: Self::Input) -> Self { + PVectorMut::I16(input) + } + + fn from_i32(input: Self::Input) -> Self { + PVectorMut::I32(input) + } + + fn from_i64(input: Self::Input) -> Self { + PVectorMut::I64(input) + } + + fn from_f16(input: Self::Input) -> Self { + PVectorMut::F16(input) + } + + fn from_f32(input: Self::Input) -> Self { + PVectorMut::F32(input) + } + + fn from_f64(input: Self::Input) -> Self { + PVectorMut::F64(input) + } +} From 1414b1c7c8e607e9eefa51a061f57fd1bc83578e Mon Sep 17 00:00:00 2001 From: Connor Tsui Date: Fri, 17 Oct 2025 17:35:43 -0400 Subject: [PATCH 2/3] reorganize modules Signed-off-by: Connor Tsui --- vortex-vector/src/bool/mod.rs | 22 + vortex-vector/src/{bool.rs => bool/vector.rs} | 54 +- vortex-vector/src/bool/vector_mut.rs | 100 ++++ vortex-vector/src/bool_mut.rs | 80 --- vortex-vector/src/lib.rs | 224 +------- vortex-vector/src/null/mod.rs | 22 + vortex-vector/src/{null.rs => null/vector.rs} | 23 +- .../src/{null_mut.rs => null/vector_mut.rs} | 52 +- vortex-vector/src/ops.rs | 67 --- vortex-vector/src/primitive.rs | 67 --- vortex-vector/src/primitive/generic.rs | 64 +++ vortex-vector/src/primitive/generic_mut.rs | 108 ++++ vortex-vector/src/primitive/macros.rs | 238 +++++++++ vortex-vector/src/primitive/mod.rs | 16 + vortex-vector/src/primitive/vector.rs | 177 +++++++ vortex-vector/src/primitive/vector_mut.rs | 105 ++++ vortex-vector/src/primitive_mut.rs | 82 --- vortex-vector/src/private.rs | 27 + vortex-vector/src/pvector.rs | 493 ------------------ vortex-vector/src/vector/macros.rs | 94 ++++ vortex-vector/src/vector/mod.rs | 62 +++ vortex-vector/src/vector/ops.rs | 174 +++++++ 22 files changed, 1287 insertions(+), 1064 deletions(-) create mode 100644 vortex-vector/src/bool/mod.rs rename vortex-vector/src/{bool.rs => bool/vector.rs} (50%) create mode 100644 vortex-vector/src/bool/vector_mut.rs delete mode 100644 vortex-vector/src/bool_mut.rs create mode 100644 vortex-vector/src/null/mod.rs rename vortex-vector/src/{null.rs => null/vector.rs} (72%) rename vortex-vector/src/{null_mut.rs => null/vector_mut.rs} (61%) delete mode 100644 vortex-vector/src/ops.rs delete mode 100644 vortex-vector/src/primitive.rs create mode 100644 vortex-vector/src/primitive/generic.rs create mode 100644 vortex-vector/src/primitive/generic_mut.rs create mode 100644 vortex-vector/src/primitive/macros.rs create mode 100644 vortex-vector/src/primitive/mod.rs create mode 100644 vortex-vector/src/primitive/vector.rs create mode 100644 vortex-vector/src/primitive/vector_mut.rs delete mode 100644 vortex-vector/src/primitive_mut.rs create mode 100644 vortex-vector/src/private.rs delete mode 100644 vortex-vector/src/pvector.rs create mode 100644 vortex-vector/src/vector/macros.rs create mode 100644 vortex-vector/src/vector/mod.rs create mode 100644 vortex-vector/src/vector/ops.rs diff --git a/vortex-vector/src/bool/mod.rs b/vortex-vector/src/bool/mod.rs new file mode 100644 index 00000000000..8541fc43397 --- /dev/null +++ b/vortex-vector/src/bool/mod.rs @@ -0,0 +1,22 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +mod vector; +pub use vector::BoolVector; + +mod vector_mut; +pub use vector_mut::BoolVectorMut; + +use crate::{Vector, VectorMut}; + +impl From for Vector { + fn from(v: BoolVector) -> Self { + Self::Bool(v) + } +} + +impl From for VectorMut { + fn from(v: BoolVectorMut) -> Self { + Self::Bool(v) + } +} diff --git a/vortex-vector/src/bool.rs b/vortex-vector/src/bool/vector.rs similarity index 50% rename from vortex-vector/src/bool.rs rename to vortex-vector/src/bool/vector.rs index a46d25a9e56..b63e05d993b 100644 --- a/vortex-vector/src/bool.rs +++ b/vortex-vector/src/bool/vector.rs @@ -2,34 +2,31 @@ // SPDX-FileCopyrightText: Copyright the Vortex contributors use vortex_buffer::BitBuffer; -use vortex_dtype::DType; +use vortex_dtype::{DType, Nullability}; use vortex_mask::Mask; -use crate::ops::VectorOps; -use crate::{BoolVectorMut, Vector}; +use super::BoolVectorMut; +use crate::VectorOps; /// An immutable vector of boolean values. pub struct BoolVector { - pub(super) dtype: DType, pub(super) bits: BitBuffer, - pub(super) validity: Mask, -} - -impl From for Vector { - fn from(v: BoolVector) -> Self { - Self::Bool(v) - } + pub(super) validity: Option, } impl VectorOps for BoolVector { type Mutable = BoolVectorMut; - fn len(&self) -> usize { - self.bits.len() + fn nullability(&self) -> Nullability { + Nullability::from(self.validity.is_some()) + } + + fn dtype(&self) -> DType { + DType::Bool(self.nullability()) } - fn dtype(&self) -> &DType { - &self.dtype + fn len(&self) -> usize { + self.bits.len() } fn try_into_mut(self) -> Result @@ -40,28 +37,25 @@ impl VectorOps for BoolVector { Ok(bits) => bits, Err(bits) => { return Err(BoolVector { - dtype: self.dtype, bits, validity: self.validity, }); } }; - let validity = match self.validity.try_into_mut() { - Ok(validity) => validity, - Err(validity) => { - return Err(BoolVector { - dtype: self.dtype, - bits: bits.freeze(), - validity, - }); - } + let validity = match self.validity { + Some(v) => match v.try_into_mut() { + Ok(v) => Some(v), + Err(v) => { + return Err(BoolVector { + bits: bits.freeze(), + validity: Some(v), + }); + } + }, + None => None, }; - Ok(BoolVectorMut { - dtype: self.dtype, - bits, - validity, - }) + Ok(BoolVectorMut { bits, validity }) } } diff --git a/vortex-vector/src/bool/vector_mut.rs b/vortex-vector/src/bool/vector_mut.rs new file mode 100644 index 00000000000..43231f3dc40 --- /dev/null +++ b/vortex-vector/src/bool/vector_mut.rs @@ -0,0 +1,100 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use vortex_buffer::BitBufferMut; +use vortex_dtype::{DType, Nullability}; +use vortex_mask::MaskMut; + +use super::BoolVector; +use crate::VectorMutOps; + +/// A mutable vector of boolean values. +pub struct BoolVectorMut { + pub(super) bits: BitBufferMut, + pub(super) validity: Option, +} + +impl BoolVectorMut { + /// Create a new mutable boolean vector with the given capacity and nullability. + pub fn with_capacity(capacity: usize, nullability: Nullability) -> Self { + let validity = match nullability { + Nullability::NonNullable => None, + Nullability::Nullable => Some(MaskMut::with_capacity(capacity)), + }; + + Self { + bits: BitBufferMut::with_capacity(capacity), + validity, + } + } +} + +impl VectorMutOps for BoolVectorMut { + type Immutable = BoolVector; + + fn nullability(&self) -> Nullability { + Nullability::from(self.validity.is_some()) + } + + fn dtype(&self) -> DType { + DType::Bool(self.nullability()) + } + + fn len(&self) -> usize { + self.bits.len() + } + + fn capacity(&self) -> usize { + self.bits.capacity() + } + + fn reserve(&mut self, additional: usize) { + self.bits.reserve(additional); + if let Some(v) = self.validity.as_mut() { + v.reserve(additional); + } + } + + fn extend_from_vector(&mut self, other: &BoolVector) { + self.bits.append_buffer(&other.bits); + match (&mut self.validity, &other.validity) { + (Some(self_v), Some(other_v)) => self_v.append_mask(other_v), + (Some(self_v), None) => self_v.append_n(true, other.bits.len()), + (None, Some(other_v)) => { + let mut new_validity = MaskMut::new_true(self.bits.len() - other.bits.len()); + new_validity.append_mask(other_v); + self.validity = Some(new_validity); + } + (None, None) => {} + } + } + + fn freeze(self) -> Self::Immutable { + BoolVector { + bits: self.bits.freeze(), + validity: self.validity.map(|v| v.freeze()), + } + } + + fn split_off(&mut self, at: usize) -> Self { + BoolVectorMut { + bits: self.bits.split_off(at), + validity: self.validity.as_mut().map(|v| v.split_off(at)), + } + } + + fn unsplit(&mut self, other: Self) { + let other_len = other.bits.len(); + self.bits.unsplit(other.bits); + match (&mut self.validity, other.validity) { + (Some(self_v), Some(other_v)) => self_v.unsplit(other_v), + (Some(self_v), None) => self_v.append_n(true, other_len), + (None, Some(other_v)) => { + let mut new_validity = MaskMut::new_true(self.bits.len() - other_len); + new_validity.unsplit(other_v); + self.validity = Some(new_validity); + } + (None, None) => {} + } + } +} diff --git a/vortex-vector/src/bool_mut.rs b/vortex-vector/src/bool_mut.rs deleted file mode 100644 index 627b5394bff..00000000000 --- a/vortex-vector/src/bool_mut.rs +++ /dev/null @@ -1,80 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 -// SPDX-FileCopyrightText: Copyright the Vortex contributors - -use vortex_buffer::BitBufferMut; -use vortex_dtype::{DType, Nullability}; -use vortex_mask::MaskMut; - -use crate::BoolVector; -use crate::ops::VectorMutOps; - -/// A mutable vector of boolean values. -pub struct BoolVectorMut { - pub(super) dtype: DType, - pub(super) bits: BitBufferMut, - pub(super) validity: MaskMut, -} - -impl BoolVectorMut { - /// Create a new mutable boolean vector with the given capacity and nullability. - pub fn with_capacity(capacity: usize, nullability: Nullability) -> Self { - Self { - dtype: DType::Bool(nullability), - bits: BitBufferMut::with_capacity(capacity), - validity: MaskMut::with_capacity(capacity), - } - } -} - -impl From for crate::VectorMut { - fn from(v: BoolVectorMut) -> Self { - Self::Bool(v) - } -} - -impl VectorMutOps for BoolVectorMut { - type Immutable = BoolVector; - - fn len(&self) -> usize { - self.bits.len() - } - - fn dtype(&self) -> &DType { - &self.dtype - } - - fn capacity(&self) -> usize { - self.bits.capacity() - } - - fn reserve(&mut self, additional: usize) { - self.bits.reserve(additional); - self.validity.reserve(additional); - } - - fn split_off(&mut self, at: usize) -> Self { - BoolVectorMut { - dtype: self.dtype.clone(), - bits: self.bits.split_off(at), - validity: self.validity.split_off(at), - } - } - - fn unsplit(&mut self, other: Self) { - self.bits.unsplit(other.bits); - self.validity.unsplit(other.validity); - } - - fn extend_from_vector(&mut self, other: &BoolVector) { - self.bits.append_buffer(&other.bits); - self.validity.append_mask(&other.validity); - } - - fn freeze(self) -> Self::Immutable { - BoolVector { - dtype: self.dtype, - bits: self.bits.freeze(), - validity: self.validity.freeze(), - } - } -} diff --git a/vortex-vector/src/lib.rs b/vortex-vector/src/lib.rs index 7ad2912a0e7..ce7ec5c6e90 100644 --- a/vortex-vector/src/lib.rs +++ b/vortex-vector/src/lib.rs @@ -1,222 +1,26 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -//! Vector types for Vortex. +//! Mutable decompressed (canonical) vectors for Vortex. +//! +//! TODO docs. #![deny(missing_docs)] +// #![warn(clippy::missing_docs_in_private_items)] +// #![warn(clippy::missing_errors_doc)] +// #![warn(clippy::missing_panics_doc)] +// #![warn(clippy::missing_safety_doc)] -use vortex_dtype::DType; - -use crate::ops::{VectorMutOps, VectorOps}; +mod vector; +pub use vector::ops::{VectorMutOps, VectorOps}; +pub use vector::{Vector, VectorMut}; mod bool; -mod bool_mut; mod null; -mod null_mut; -mod ops; mod primitive; -mod primitive_mut; -mod pvector; - -pub use bool::*; -pub use bool_mut::*; -pub use null::*; -pub use null_mut::*; -pub use primitive::*; -pub use primitive_mut::*; -pub use pvector::*; -use vortex_error::vortex_panic; - -/// An enum over all vector types. -pub enum Vector { - /// Null - Null(NullVector), - /// Bool - Bool(BoolVector), - /// Primitive - Primitive(PVector), - // Decimal - // Decimal(DecimalVector), - // String - // String(StringVector), - // Binary - // Binary(BinaryVector), - // List - // List(ListVector), - // FixedList - // FixedList(FixedListVector), - // Struct - // Struct(StructVector), - // Extension - // Extension(ExtensionVector), -} - -/// An enum over all mutable vector types. -pub enum VectorMut { - /// Null - Null(NullVectorMut), - /// Bool - Bool(BoolVectorMut), - /// Primitive - Primitive(PVectorMut), -} - -impl VectorMut { - /// Create a new mutable vector with the given capacity and dtype. - pub fn with_capacity(capacity: usize, dtype: &DType) -> Self { - use vortex_dtype::NativePType; - match dtype { - DType::Null => NullVectorMut::new(0).into(), - DType::Bool(n) => BoolVectorMut::with_capacity(capacity, *n).into(), - DType::Primitive(ptype, nullability) => { - PVectorMut::with_capacity(capacity, *ptype, *nullability).into() - } - _ => vortex_panic!("Unsupported dtype for VectorMut"), - } - } -} - -macro_rules! match_each_vector { - ($self:expr, | $vec:ident | $body:block) => {{ - match $self { - Vector::Null(v) => { - let $vec = v; - $body - } - Vector::Bool(v) => { - let $vec = v; - $body - } - Vector::Primitive(v) => { - let $vec = v; - $body - } - } - }}; -} - -macro_rules! match_each_vector_mut { - ($self:expr, | $vec:ident | $body:block) => {{ - match $self { - VectorMut::Null(v) => { - let $vec = v; - $body - } - VectorMut::Bool(v) => { - let $vec = v; - $body - } - VectorMut::Primitive(v) => { - let $vec = v; - $body - } - } - }}; -} - -macro_rules! match_each_vector_mut_pair { - ($self:expr, $other:expr, | $vec:ident, $vec_other:ident | $body:block) => {{ - match ($self, $other) { - (VectorMut::Null(a), VectorMut::Null(b)) => { - let $vec = a; - let $vec_other = b; - $body - } - (VectorMut::Bool(a), VectorMut::Bool(b)) => { - let $vec = a; - let $vec_other = b; - $body - } - (VectorMut::Primitive(a), VectorMut::Primitive(b)) => { - let $vec = a; - let $vec_other = b; - $body - } - _ => vortex_panic!("Mismatched vector types"), - } - }}; -} - -macro_rules! match_each_vector_mut_immut_pair { - ($self:expr, $other:expr, | $vec:ident, $vec_other:ident | $body:block) => {{ - match ($self, $other) { - (VectorMut::Null(a), Vector::Null(b)) => { - let $vec = a; - let $vec_other = b; - $body - } - (VectorMut::Bool(a), Vector::Bool(b)) => { - let $vec = a; - let $vec_other = b; - $body - } - (VectorMut::Primitive(a), Vector::Primitive(b)) => { - let $vec = a; - let $vec_other = b; - $body - } - _ => vortex_panic!("Mismatched vector types"), - } - }}; -} - -impl VectorOps for Vector { - type Mutable = VectorMut; - - fn len(&self) -> usize { - match_each_vector!(self, |v| { v.len() }) - } - - fn dtype(&self) -> &DType { - match_each_vector!(self, |v| { v.dtype() }) - } - - fn try_into_mut(self) -> Result - where - Self: Sized, - { - match_each_vector!(self, |v| { - v.try_into_mut().map(VectorMut::from).map_err(Vector::from) - }) - } -} - -impl VectorMutOps for VectorMut { - type Immutable = Vector; - - fn len(&self) -> usize { - match_each_vector_mut!(self, |v| { v.len() }) - } - - fn dtype(&self) -> &DType { - match_each_vector_mut!(self, |v| { v.dtype() }) - } - - fn capacity(&self) -> usize { - match_each_vector_mut!(self, |v| { v.capacity() }) - } - - fn reserve(&mut self, additional: usize) { - match_each_vector_mut!(self, |v| { v.reserve(additional) }) - } - - fn split_off(&mut self, at: usize) -> Self { - match_each_vector_mut!(self, |v| { v.split_off(at).into() }) - } - - fn unsplit(&mut self, other: Self) { - match_each_vector_mut_pair!(self, other, |a, b| { - a.unsplit(b); - }); - } - fn extend_from_vector(&mut self, other: &Self::Immutable) { - match_each_vector_mut_immut_pair!(self, other, |a, b| { - a.extend_from_vector(b); - }); - } +pub use bool::{BoolVector, BoolVectorMut}; +pub use null::{NullVector, NullVectorMut}; +pub use primitive::{GenericPVector, GenericPVectorMut, PrimitiveVector, PrimitiveVectorMut}; - fn freeze(self) -> Self::Immutable { - match_each_vector_mut!(self, |v| { v.freeze().into() }) - } -} +mod private; diff --git a/vortex-vector/src/null/mod.rs b/vortex-vector/src/null/mod.rs new file mode 100644 index 00000000000..ebb6f129a36 --- /dev/null +++ b/vortex-vector/src/null/mod.rs @@ -0,0 +1,22 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +mod vector; +pub use vector::NullVector; + +mod vector_mut; +pub use vector_mut::NullVectorMut; + +use crate::{Vector, VectorMut}; + +impl From for Vector { + fn from(v: NullVector) -> Self { + Self::Null(v) + } +} + +impl From for VectorMut { + fn from(v: NullVectorMut) -> Self { + Self::Null(v) + } +} diff --git a/vortex-vector/src/null.rs b/vortex-vector/src/null/vector.rs similarity index 72% rename from vortex-vector/src/null.rs rename to vortex-vector/src/null/vector.rs index b864aaabd00..a481ad442b5 100644 --- a/vortex-vector/src/null.rs +++ b/vortex-vector/src/null/vector.rs @@ -1,10 +1,9 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -use vortex_dtype::DType; +use vortex_dtype::{DType, Nullability}; -use crate::ops::VectorOps; -use crate::{NullVectorMut, Vector}; +use crate::{NullVectorMut, VectorOps}; /// An immutable vector of null values. pub struct NullVector { @@ -18,21 +17,19 @@ impl NullVector { } } -impl From for Vector { - fn from(v: NullVector) -> Self { - Self::Null(v) - } -} - impl VectorOps for NullVector { type Mutable = NullVectorMut; - fn len(&self) -> usize { - self.len + fn nullability(&self) -> Nullability { + Nullability::Nullable } - fn dtype(&self) -> &DType { - &DType::Null + fn dtype(&self) -> DType { + DType::Null + } + + fn len(&self) -> usize { + self.len } fn try_into_mut(self) -> Result diff --git a/vortex-vector/src/null_mut.rs b/vortex-vector/src/null/vector_mut.rs similarity index 61% rename from vortex-vector/src/null_mut.rs rename to vortex-vector/src/null/vector_mut.rs index 6066e0f234b..720dae2c9b5 100644 --- a/vortex-vector/src/null_mut.rs +++ b/vortex-vector/src/null/vector_mut.rs @@ -1,10 +1,10 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -use vortex_dtype::DType; +use vortex_dtype::{DType, Nullability}; -use crate::ops::VectorMutOps; -use crate::{NullVector, VectorMut}; +use super::NullVector; +use crate::VectorMutOps; /// A mutable vector of null values. pub struct NullVectorMut { @@ -18,30 +18,46 @@ impl NullVectorMut { } } -impl From for VectorMut { - fn from(v: NullVectorMut) -> Self { - Self::Null(v) - } -} - impl VectorMutOps for NullVectorMut { type Immutable = NullVector; - fn len(&self) -> usize { - self.len + fn nullability(&self) -> Nullability { + Nullability::Nullable + } + + fn dtype(&self) -> DType { + DType::Null } - fn dtype(&self) -> &DType { - &DType::Null + fn len(&self) -> usize { + self.len } fn capacity(&self) -> usize { usize::MAX } - fn reserve(&mut self, _additional: usize) {} + fn reserve(&mut self, _additional: usize) { + // We do not allocate memory for `NullVector`, so this is a no-op. + } + + fn extend_from_vector(&mut self, other: &Self::Immutable) { + self.len += other.len; + } + + fn freeze(self) -> Self::Immutable { + NullVector::new(self.len) + } fn split_off(&mut self, at: usize) -> Self { + // TODO(connor): This is wrong (https://docs.rs/bytes/latest/src/bytes/bytes_mut.rs.html#320-335) + assert!( + at <= self.capacity(), + "split_off out of bounds: {:?} <= {:?}", + at, + self.capacity(), + ); + let new_len = self.len - at; self.len = at; NullVectorMut { len: new_len } @@ -50,12 +66,4 @@ impl VectorMutOps for NullVectorMut { fn unsplit(&mut self, other: Self) { self.len += other.len; } - - fn extend_from_vector(&mut self, other: &Self::Immutable) { - self.len += other.len; - } - - fn freeze(self) -> Self::Immutable { - NullVector::new(self.len) - } } diff --git a/vortex-vector/src/ops.rs b/vortex-vector/src/ops.rs deleted file mode 100644 index 6ad122c7bcf..00000000000 --- a/vortex-vector/src/ops.rs +++ /dev/null @@ -1,67 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 -// SPDX-FileCopyrightText: Copyright the Vortex contributors - -use vortex_dtype::DType; - -use crate::{Vector, VectorMut}; - -/// Common operations for immutable vectors. -pub trait VectorOps: Into { - type Mutable: VectorMutOps; - - /// Returns the length of the vector. - fn len(&self) -> usize; - - /// Returns the data type of the vector. - fn dtype(&self) -> &DType; - - /// Try to convert self into a mutable vector. - // - // If self is the only unique strong reference, this will succeed and return a mutable vector - // with the contents of self without copying. If self is not unique, this will fail and return - // self. - fn try_into_mut(self) -> Result - where - Self: Sized; -} - -/// Common operations for mutable vectors. -pub trait VectorMutOps: Into { - type Immutable: VectorOps; - - /// Returns the length of the vector. - fn len(&self) -> usize; - - /// Returns the data type of the vector. - fn dtype(&self) -> &DType; - - /// Returns the capacity of the vector. - fn capacity(&self) -> usize; - - /// Reserves capacity for at least `additional` more elements to be inserted. - fn reserve(&mut self, additional: usize); - - /// Splits the vector into two at the given index. - /// - /// Afterward, self contains elements `[0, at)`, and the returned vector contains elements - /// `[at, capacity)`. It’s guaranteed that the memory does not move, that is, the address of - /// self does not change, and the address of the returned slice is at bytes after that. - /// - /// This is an O(1) operation that just increases the reference count and sets a few indices. - fn split_off(&mut self, at: usize) -> Self; - - /// Absorbs a mutable vector that was previously split off. - /// - /// If the two vectors were previously contiguous and not mutated in a way that causes - /// re-allocation i.e., if other was created by calling split_off on this vector, then this is - /// an O(1) operation that just decreases a reference count and sets a few indices. - /// - // Otherwise, this method degenerates to self.extend_from_vector(other.as_ref()). - fn unsplit(&mut self, other: Self); - - /// Extends the vector by appending elements from another vector. - fn extend_from_vector(&mut self, other: &Self::Immutable); - - /// Freeze the vector into an immutable one. - fn freeze(self) -> Self::Immutable; -} diff --git a/vortex-vector/src/primitive.rs b/vortex-vector/src/primitive.rs deleted file mode 100644 index 9a39f571c55..00000000000 --- a/vortex-vector/src/primitive.rs +++ /dev/null @@ -1,67 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 -// SPDX-FileCopyrightText: Copyright the Vortex contributors - -use vortex_buffer::Buffer; -use vortex_dtype::{DType, NativePType}; -use vortex_mask::Mask; - -use crate::ops::VectorOps; -use crate::{PVector, PrimitiveVectorMut, Vector}; - -/// An immutable vector of primitive values. -pub struct PrimitiveVector { - pub(super) dtype: DType, - pub(super) elements: Buffer, - pub(super) validity: Mask, -} - -impl From> for Vector { - fn from(v: PrimitiveVector) -> Self { - Self::Primitive(PVector::from(v)) - } -} - -impl VectorOps for PrimitiveVector { - type Mutable = PrimitiveVectorMut; - - /// Returns the length of the vector. - fn len(&self) -> usize { - self.elements.len() - } - - /// Returns the data type of the vector. - fn dtype(&self) -> &DType { - &self.dtype - } - - /// Try to convert self into a mutable vector. - fn try_into_mut(self) -> Result, Self> { - let elements = match self.elements.try_into_mut() { - Ok(elements) => elements, - Err(elements) => { - return Err(PrimitiveVector { - dtype: self.dtype, - elements, - validity: self.validity, - }); - } - }; - - let validity = match self.validity.try_into_mut() { - Ok(validity) => validity, - Err(validity) => { - return Err(PrimitiveVector { - dtype: self.dtype, - elements: elements.freeze(), - validity, - }); - } - }; - - Ok(PrimitiveVectorMut { - dtype: self.dtype, - elements, - validity, - }) - } -} diff --git a/vortex-vector/src/primitive/generic.rs b/vortex-vector/src/primitive/generic.rs new file mode 100644 index 00000000000..3ce671e1a5b --- /dev/null +++ b/vortex-vector/src/primitive/generic.rs @@ -0,0 +1,64 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use vortex_buffer::Buffer; +use vortex_dtype::{DType, NativePType, Nullability}; +use vortex_mask::Mask; + +use crate::{GenericPVectorMut, PrimitiveVector, Vector, VectorOps}; + +/// An immutable vector of generic primitive values. +pub struct GenericPVector { + pub(super) elements: Buffer, + pub(super) validity: Option, +} + +impl From> for Vector { + fn from(v: GenericPVector) -> Self { + Self::Primitive(PrimitiveVector::from(v)) + } +} + +impl VectorOps for GenericPVector { + type Mutable = GenericPVectorMut; + + fn nullability(&self) -> Nullability { + Nullability::from(self.validity.is_some()) + } + + fn dtype(&self) -> DType { + DType::Primitive(T::PTYPE, self.nullability()) + } + + fn len(&self) -> usize { + self.elements.len() + } + + /// Try to convert self into a mutable vector. + fn try_into_mut(self) -> Result, Self> { + let elements = match self.elements.try_into_mut() { + Ok(elements) => elements, + Err(elements) => { + return Err(GenericPVector { + elements, + validity: self.validity, + }); + } + }; + + let validity = match self.validity { + Some(v) => match v.try_into_mut() { + Ok(v) => Some(v), + Err(v) => { + return Err(GenericPVector { + elements: elements.freeze(), + validity: Some(v), + }); + } + }, + None => None, + }; + + Ok(GenericPVectorMut { elements, validity }) + } +} diff --git a/vortex-vector/src/primitive/generic_mut.rs b/vortex-vector/src/primitive/generic_mut.rs new file mode 100644 index 00000000000..c2cd65681b4 --- /dev/null +++ b/vortex-vector/src/primitive/generic_mut.rs @@ -0,0 +1,108 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use vortex_buffer::BufferMut; +use vortex_dtype::{DType, NativePType, Nullability}; +use vortex_mask::MaskMut; + +use crate::{GenericPVector, PrimitiveVectorMut, VectorMut, VectorMutOps}; + +/// A mutable vector of primitive values. +pub struct GenericPVectorMut { + pub(super) elements: BufferMut, + pub(super) validity: Option, +} + +impl GenericPVectorMut { + /// Create a new mutable primitive vector with the given capacity and nullability. + pub fn with_capacity(capacity: usize, nullability: Nullability) -> Self { + let validity = match nullability { + Nullability::NonNullable => None, + Nullability::Nullable => Some(MaskMut::with_capacity(capacity)), + }; + + Self { + elements: BufferMut::with_capacity(capacity), + validity, + } + } +} + +impl From> for VectorMut { + fn from(val: GenericPVectorMut) -> Self { + VectorMut::Primitive(PrimitiveVectorMut::from(val)) + } +} + +impl VectorMutOps for GenericPVectorMut { + type Immutable = GenericPVector; + + fn nullability(&self) -> Nullability { + Nullability::from(self.validity.is_some()) + } + + fn dtype(&self) -> DType { + DType::Primitive(T::PTYPE, self.nullability()) + } + + fn len(&self) -> usize { + self.elements.len() + } + + fn capacity(&self) -> usize { + self.elements.capacity() + } + + fn reserve(&mut self, additional: usize) { + self.elements.reserve(additional); + if let Some(v) = self.validity.as_mut() { + v.reserve(additional); + } + } + + /// Extends the vector by appending elements from another vector. + fn extend_from_vector(&mut self, other: &GenericPVector) { + self.elements.extend_from_slice(other.elements.as_slice()); + match (&mut self.validity, &other.validity) { + (Some(self_v), Some(other_v)) => self_v.append_mask(other_v), + (Some(self_v), None) => self_v.append_n(true, other.elements.len()), + (None, Some(other_v)) => { + let mut new_validity = + MaskMut::new_true(self.elements.len() - other.elements.len()); + new_validity.append_mask(other_v); + self.validity = Some(new_validity); + } + (None, None) => {} + } + } + + /// Freeze the vector into an immutable one. + fn freeze(self) -> GenericPVector { + GenericPVector { + elements: self.elements.freeze(), + validity: self.validity.map(|v| v.freeze()), + } + } + + fn split_off(&mut self, at: usize) -> Self { + GenericPVectorMut { + elements: self.elements.split_off(at), + validity: self.validity.as_mut().map(|v| v.split_off(at)), + } + } + + fn unsplit(&mut self, other: Self) { + let other_len = other.elements.len(); + self.elements.unsplit(other.elements); + match (&mut self.validity, other.validity) { + (Some(self_v), Some(other_v)) => self_v.unsplit(other_v), + (Some(self_v), None) => self_v.append_n(true, other_len), + (None, Some(other_v)) => { + let mut new_validity = MaskMut::new_true(self.elements.len() - other_len); + new_validity.unsplit(other_v); + self.validity = Some(new_validity); + } + (None, None) => {} + } + } +} diff --git a/vortex-vector/src/primitive/macros.rs b/vortex-vector/src/primitive/macros.rs new file mode 100644 index 00000000000..8d9b5b0195b --- /dev/null +++ b/vortex-vector/src/primitive/macros.rs @@ -0,0 +1,238 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +/// TODO(connor): Write docs. +#[macro_export] +macro_rules! match_each_pvector { + ($self:expr, | $vec:ident | $body:block) => {{ + match $self { + PrimitiveVector::U8(v) => { + let $vec = v; + $body + } + PrimitiveVector::U16(v) => { + let $vec = v; + $body + } + PrimitiveVector::U32(v) => { + let $vec = v; + $body + } + PrimitiveVector::U64(v) => { + let $vec = v; + $body + } + PrimitiveVector::I8(v) => { + let $vec = v; + $body + } + PrimitiveVector::I16(v) => { + let $vec = v; + $body + } + PrimitiveVector::I32(v) => { + let $vec = v; + $body + } + PrimitiveVector::I64(v) => { + let $vec = v; + $body + } + PrimitiveVector::F16(v) => { + let $vec = v; + $body + } + PrimitiveVector::F32(v) => { + let $vec = v; + $body + } + PrimitiveVector::F64(v) => { + let $vec = v; + $body + } + } + }}; +} + +/// TODO(connor): Write docs. +#[macro_export] +macro_rules! match_each_pvector_mut { + ($self:expr, | $vec:ident | $body:block) => {{ + match $self { + PrimitiveVectorMut::U8(v) => { + let $vec = v; + $body + } + PrimitiveVectorMut::U16(v) => { + let $vec = v; + $body + } + PrimitiveVectorMut::U32(v) => { + let $vec = v; + $body + } + PrimitiveVectorMut::U64(v) => { + let $vec = v; + $body + } + PrimitiveVectorMut::I8(v) => { + let $vec = v; + $body + } + PrimitiveVectorMut::I16(v) => { + let $vec = v; + $body + } + PrimitiveVectorMut::I32(v) => { + let $vec = v; + $body + } + PrimitiveVectorMut::I64(v) => { + let $vec = v; + $body + } + PrimitiveVectorMut::F16(v) => { + let $vec = v; + $body + } + PrimitiveVectorMut::F32(v) => { + let $vec = v; + $body + } + PrimitiveVectorMut::F64(v) => { + let $vec = v; + $body + } + } + }}; +} + +/// TODO(connor): Write docs. +#[macro_export] +macro_rules! match_each_pvector_mut_pair { + ($self:expr, $other:expr, | $vec:ident, $vec_other:ident | $body:block) => {{ + match ($self, $other) { + (PrimitiveVectorMut::U8(a), PrimitiveVectorMut::U8(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + (PrimitiveVectorMut::U16(a), PrimitiveVectorMut::U16(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + (PrimitiveVectorMut::U32(a), PrimitiveVectorMut::U32(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + (PrimitiveVectorMut::U64(a), PrimitiveVectorMut::U64(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + (PrimitiveVectorMut::I8(a), PrimitiveVectorMut::I8(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + (PrimitiveVectorMut::I16(a), PrimitiveVectorMut::I16(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + (PrimitiveVectorMut::I32(a), PrimitiveVectorMut::I32(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + (PrimitiveVectorMut::I64(a), PrimitiveVectorMut::I64(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + (PrimitiveVectorMut::F16(a), PrimitiveVectorMut::F16(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + (PrimitiveVectorMut::F32(a), PrimitiveVectorMut::F32(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + (PrimitiveVectorMut::F64(a), PrimitiveVectorMut::F64(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + _ => ::vortex_error::vortex_panic!("Mismatched primitive vector types"), + } + }}; +} + +/// TODO(connor): Write docs. +#[macro_export] +macro_rules! match_each_pvector_mut_immut_pair { + ($self:expr, $other:expr, | $vec:ident, $vec_other:ident | $body:block) => {{ + match ($self, $other) { + (PrimitiveVectorMut::U8(a), PrimitiveVector::U8(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + (PrimitiveVectorMut::U16(a), PrimitiveVector::U16(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + (PrimitiveVectorMut::U32(a), PrimitiveVector::U32(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + (PrimitiveVectorMut::U64(a), PrimitiveVector::U64(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + (PrimitiveVectorMut::I8(a), PrimitiveVector::I8(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + (PrimitiveVectorMut::I16(a), PrimitiveVector::I16(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + (PrimitiveVectorMut::I32(a), PrimitiveVector::I32(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + (PrimitiveVectorMut::I64(a), PrimitiveVector::I64(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + (PrimitiveVectorMut::F16(a), PrimitiveVector::F16(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + (PrimitiveVectorMut::F32(a), PrimitiveVector::F32(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + (PrimitiveVectorMut::F64(a), PrimitiveVector::F64(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + _ => ::vortex_error::vortex_panic!("Mismatched primitive vector types"), + } + }}; +} diff --git a/vortex-vector/src/primitive/mod.rs b/vortex-vector/src/primitive/mod.rs new file mode 100644 index 00000000000..25c901aba14 --- /dev/null +++ b/vortex-vector/src/primitive/mod.rs @@ -0,0 +1,16 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +mod generic; +pub use generic::GenericPVector; + +mod generic_mut; +pub use generic_mut::GenericPVectorMut; + +mod vector; +pub use vector::PrimitiveVector; + +mod vector_mut; +pub use vector_mut::PrimitiveVectorMut; + +mod macros; diff --git a/vortex-vector/src/primitive/vector.rs b/vortex-vector/src/primitive/vector.rs new file mode 100644 index 00000000000..2d803aeaca6 --- /dev/null +++ b/vortex-vector/src/primitive/vector.rs @@ -0,0 +1,177 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use vortex_dtype::half::f16; +use vortex_dtype::{DType, NativePType, Nullability, PTypeUpcast}; + +use super::{GenericPVector, GenericPVectorMut, PrimitiveVectorMut}; +use crate::{Vector, VectorOps, match_each_pvector}; + +/// An enum over all primitive vector types. +pub enum PrimitiveVector { + /// U8 + U8(GenericPVector), + /// U16 + U16(GenericPVector), + /// U32 + U32(GenericPVector), + /// U64 + U64(GenericPVector), + /// I8 + I8(GenericPVector), + /// I16 + I16(GenericPVector), + /// I32 + I32(GenericPVector), + /// I64 + I64(GenericPVector), + /// F16 + F16(GenericPVector), + /// F32 + F32(GenericPVector), + /// F64 + F64(GenericPVector), +} + +impl From for Vector { + fn from(v: PrimitiveVector) -> Self { + Self::Primitive(v) + } +} + +impl VectorOps for PrimitiveVector { + type Mutable = PrimitiveVectorMut; + + fn nullability(&self) -> Nullability { + match_each_pvector!(self, |v| { v.nullability() }) + } + + fn dtype(&self) -> DType { + match_each_pvector!(self, |v| { v.dtype() }) + } + + fn len(&self) -> usize { + match_each_pvector!(self, |v| { v.len() }) + } + + fn try_into_mut(self) -> Result + where + Self: Sized, + { + match_each_pvector!(self, |v| { + v.try_into_mut() + .map(PrimitiveVectorMut::from) + .map_err(PrimitiveVector::from) + }) + } +} + +// From impls for PrimitiveVector variants +impl From> for PrimitiveVector { + fn from(v: GenericPVector) -> Self { + T::upcast(v) + } +} + +impl PTypeUpcast for PrimitiveVector { + type Input = GenericPVector; + + fn from_u8(input: Self::Input) -> Self { + PrimitiveVector::U8(input) + } + + fn from_u16(input: Self::Input) -> Self { + PrimitiveVector::U16(input) + } + + fn from_u32(input: Self::Input) -> Self { + PrimitiveVector::U32(input) + } + + fn from_u64(input: Self::Input) -> Self { + PrimitiveVector::U64(input) + } + + fn from_i8(input: Self::Input) -> Self { + PrimitiveVector::I8(input) + } + + fn from_i16(input: Self::Input) -> Self { + PrimitiveVector::I16(input) + } + + fn from_i32(input: Self::Input) -> Self { + PrimitiveVector::I32(input) + } + + fn from_i64(input: Self::Input) -> Self { + PrimitiveVector::I64(input) + } + + fn from_f16(input: Self::Input) -> Self { + PrimitiveVector::F16(input) + } + + fn from_f32(input: Self::Input) -> Self { + PrimitiveVector::F32(input) + } + + fn from_f64(input: Self::Input) -> Self { + PrimitiveVector::F64(input) + } +} + +// From impls for PrimitiveVectorMut variants +impl From> for PrimitiveVectorMut { + fn from(v: GenericPVectorMut) -> Self { + T::upcast(v) + } +} + +impl PTypeUpcast for PrimitiveVectorMut { + type Input = GenericPVectorMut; + + fn from_u8(input: Self::Input) -> Self { + PrimitiveVectorMut::U8(input) + } + + fn from_u16(input: Self::Input) -> Self { + PrimitiveVectorMut::U16(input) + } + + fn from_u32(input: Self::Input) -> Self { + PrimitiveVectorMut::U32(input) + } + + fn from_u64(input: Self::Input) -> Self { + PrimitiveVectorMut::U64(input) + } + + fn from_i8(input: Self::Input) -> Self { + PrimitiveVectorMut::I8(input) + } + + fn from_i16(input: Self::Input) -> Self { + PrimitiveVectorMut::I16(input) + } + + fn from_i32(input: Self::Input) -> Self { + PrimitiveVectorMut::I32(input) + } + + fn from_i64(input: Self::Input) -> Self { + PrimitiveVectorMut::I64(input) + } + + fn from_f16(input: Self::Input) -> Self { + PrimitiveVectorMut::F16(input) + } + + fn from_f32(input: Self::Input) -> Self { + PrimitiveVectorMut::F32(input) + } + + fn from_f64(input: Self::Input) -> Self { + PrimitiveVectorMut::F64(input) + } +} diff --git a/vortex-vector/src/primitive/vector_mut.rs b/vortex-vector/src/primitive/vector_mut.rs new file mode 100644 index 00000000000..c2365a22eb7 --- /dev/null +++ b/vortex-vector/src/primitive/vector_mut.rs @@ -0,0 +1,105 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use vortex_dtype::half::f16; +use vortex_dtype::{DType, Nullability, PType}; + +use crate::{ + GenericPVectorMut, PrimitiveVector, VectorMut, VectorMutOps, match_each_pvector_mut, + match_each_pvector_mut_immut_pair, match_each_pvector_mut_pair, +}; + +/// An enum over all mutable primitive vector types. +pub enum PrimitiveVectorMut { + /// U8 + U8(GenericPVectorMut), + /// U16 + U16(GenericPVectorMut), + /// U32 + U32(GenericPVectorMut), + /// U64 + U64(GenericPVectorMut), + /// I8 + I8(GenericPVectorMut), + /// I16 + I16(GenericPVectorMut), + /// I32 + I32(GenericPVectorMut), + /// I64 + I64(GenericPVectorMut), + /// F16 + F16(GenericPVectorMut), + /// F32 + F32(GenericPVectorMut), + /// F64 + F64(GenericPVectorMut), +} + +impl PrimitiveVectorMut { + /// Create a new mutable primitive vector with the given capacity, primitive type, and nullability. + pub fn with_capacity(capacity: usize, ptype: PType, nullability: Nullability) -> Self { + match ptype { + PType::U8 => GenericPVectorMut::::with_capacity(capacity, nullability).into(), + PType::U16 => GenericPVectorMut::::with_capacity(capacity, nullability).into(), + PType::U32 => GenericPVectorMut::::with_capacity(capacity, nullability).into(), + PType::U64 => GenericPVectorMut::::with_capacity(capacity, nullability).into(), + PType::I8 => GenericPVectorMut::::with_capacity(capacity, nullability).into(), + PType::I16 => GenericPVectorMut::::with_capacity(capacity, nullability).into(), + PType::I32 => GenericPVectorMut::::with_capacity(capacity, nullability).into(), + PType::I64 => GenericPVectorMut::::with_capacity(capacity, nullability).into(), + PType::F16 => GenericPVectorMut::::with_capacity(capacity, nullability).into(), + PType::F32 => GenericPVectorMut::::with_capacity(capacity, nullability).into(), + PType::F64 => GenericPVectorMut::::with_capacity(capacity, nullability).into(), + } + } +} + +impl From for VectorMut { + fn from(v: PrimitiveVectorMut) -> Self { + Self::Primitive(v) + } +} + +impl VectorMutOps for PrimitiveVectorMut { + type Immutable = PrimitiveVector; + + fn nullability(&self) -> Nullability { + match_each_pvector_mut!(self, |v| { v.nullability() }) + } + + fn dtype(&self) -> DType { + match_each_pvector_mut!(self, |v| { v.dtype() }) + } + + fn len(&self) -> usize { + match_each_pvector_mut!(self, |v| { v.len() }) + } + + fn capacity(&self) -> usize { + match_each_pvector_mut!(self, |v| { v.capacity() }) + } + + fn reserve(&mut self, additional: usize) { + match_each_pvector_mut!(self, |v| { v.reserve(additional) }) + } + + fn extend_from_vector(&mut self, other: &Self::Immutable) { + match_each_pvector_mut_immut_pair!(self, other, |a, b| { + a.extend_from_vector(b); + }); + } + + fn freeze(self) -> Self::Immutable { + match_each_pvector_mut!(self, |v| { v.freeze().into() }) + } + + fn split_off(&mut self, at: usize) -> Self { + match_each_pvector_mut!(self, |v| { v.split_off(at).into() }) + } + + fn unsplit(&mut self, other: Self) { + match_each_pvector_mut_pair!(self, other, |a, b| { + a.unsplit(b); + }); + } +} diff --git a/vortex-vector/src/primitive_mut.rs b/vortex-vector/src/primitive_mut.rs deleted file mode 100644 index 14ef69aeada..00000000000 --- a/vortex-vector/src/primitive_mut.rs +++ /dev/null @@ -1,82 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 -// SPDX-FileCopyrightText: Copyright the Vortex contributors - -use vortex_buffer::BufferMut; -use vortex_dtype::{DType, NativePType, Nullability}; -use vortex_mask::MaskMut; - -use crate::ops::VectorMutOps; -use crate::{PVectorMut, PrimitiveVector, VectorMut}; - -/// A mutable vector of primitive values. -pub struct PrimitiveVectorMut { - pub(super) dtype: DType, - pub(super) elements: BufferMut, - pub(super) validity: MaskMut, -} - -impl PrimitiveVectorMut { - /// Create a new mutable primitive vector with the given capacity and nullability. - pub fn with_capacity(capacity: usize, nullability: Nullability) -> Self { - Self { - dtype: DType::Primitive(T::PTYPE, nullability), - elements: BufferMut::with_capacity(capacity), - validity: MaskMut::with_capacity(capacity), - } - } -} - -impl From> for VectorMut { - fn from(val: PrimitiveVectorMut) -> Self { - VectorMut::Primitive(PVectorMut::from(val)) - } -} - -impl VectorMutOps for PrimitiveVectorMut { - type Immutable = PrimitiveVector; - - fn len(&self) -> usize { - self.elements.len() - } - - fn dtype(&self) -> &DType { - &self.dtype - } - - fn capacity(&self) -> usize { - self.elements.capacity() - } - - fn reserve(&mut self, additional: usize) { - self.elements.reserve(additional); - self.validity.reserve(additional); - } - - fn split_off(&mut self, at: usize) -> Self { - PrimitiveVectorMut { - dtype: self.dtype.clone(), - elements: self.elements.split_off(at), - validity: self.validity.split_off(at), - } - } - - fn unsplit(&mut self, other: Self) { - self.elements.unsplit(other.elements); - self.validity.unsplit(other.validity); - } - - /// Extends the vector by appending elements from another vector. - fn extend_from_vector(&mut self, other: &PrimitiveVector) { - self.elements.extend_from_slice(other.elements.as_slice()); - self.validity.append_mask(&other.validity); - } - - /// Freeze the vector into an immutable one. - fn freeze(self) -> PrimitiveVector { - PrimitiveVector { - dtype: self.dtype, - elements: self.elements.freeze(), - validity: self.validity.freeze(), - } - } -} diff --git a/vortex-vector/src/private.rs b/vortex-vector/src/private.rs new file mode 100644 index 00000000000..b9aabca330a --- /dev/null +++ b/vortex-vector/src/private.rs @@ -0,0 +1,27 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! This private module contains the [`Sealed`] implementations for different [`Vector`] types. This +//! allows us to seal our [`VectorOps`] and [`VectorMutOps`] traits. +//! +//! Sealing these traits prevents external crates from implementing them while still allowing public +//! usage, which gives us the freedom to add new trait methods in the future without breaking +//! backward compatibility. + +use crate::*; + +pub trait Sealed {} + +impl Sealed for Vector {} +impl Sealed for VectorMut {} + +impl Sealed for NullVector {} +impl Sealed for NullVectorMut {} + +impl Sealed for BoolVector {} +impl Sealed for BoolVectorMut {} + +impl Sealed for PrimitiveVector {} +impl Sealed for PrimitiveVectorMut {} +impl Sealed for GenericPVector {} +impl Sealed for GenericPVectorMut {} diff --git a/vortex-vector/src/pvector.rs b/vortex-vector/src/pvector.rs deleted file mode 100644 index ca21d86e02c..00000000000 --- a/vortex-vector/src/pvector.rs +++ /dev/null @@ -1,493 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 -// SPDX-FileCopyrightText: Copyright the Vortex contributors - -use vortex_dtype::half::f16; -use vortex_dtype::{DType, NativePType, Nullability, PType, PTypeUpcast}; -use vortex_error::vortex_panic; - -use crate::ops::{VectorMutOps, VectorOps}; -use crate::{PrimitiveVector, PrimitiveVectorMut, Vector}; - -/// An enum over all primitive vector types. -pub enum PVector { - /// U8 - U8(PrimitiveVector), - /// U16 - U16(PrimitiveVector), - /// U32 - U32(PrimitiveVector), - /// U64 - U64(PrimitiveVector), - /// I8 - I8(PrimitiveVector), - /// I16 - I16(PrimitiveVector), - /// I32 - I32(PrimitiveVector), - /// I64 - I64(PrimitiveVector), - /// F16 - F16(PrimitiveVector), - /// F32 - F32(PrimitiveVector), - /// F64 - F64(PrimitiveVector), -} - -/// An enum over all mutable primitive vector types. -pub enum PVectorMut { - /// U8 - U8(PrimitiveVectorMut), - /// U16 - U16(PrimitiveVectorMut), - /// U32 - U32(PrimitiveVectorMut), - /// U64 - U64(PrimitiveVectorMut), - /// I8 - I8(PrimitiveVectorMut), - /// I16 - I16(PrimitiveVectorMut), - /// I32 - I32(PrimitiveVectorMut), - /// I64 - I64(PrimitiveVectorMut), - /// F16 - F16(PrimitiveVectorMut), - /// F32 - F32(PrimitiveVectorMut), - /// F64 - F64(PrimitiveVectorMut), -} - -impl PVectorMut { - /// Create a new mutable primitive vector with the given capacity, primitive type, and nullability. - pub fn with_capacity(capacity: usize, ptype: PType, nullability: Nullability) -> Self { - match ptype { - PType::U8 => PrimitiveVectorMut::::with_capacity(capacity, nullability).into(), - PType::U16 => PrimitiveVectorMut::::with_capacity(capacity, nullability).into(), - PType::U32 => PrimitiveVectorMut::::with_capacity(capacity, nullability).into(), - PType::U64 => PrimitiveVectorMut::::with_capacity(capacity, nullability).into(), - PType::I8 => PrimitiveVectorMut::::with_capacity(capacity, nullability).into(), - PType::I16 => PrimitiveVectorMut::::with_capacity(capacity, nullability).into(), - PType::I32 => PrimitiveVectorMut::::with_capacity(capacity, nullability).into(), - PType::I64 => PrimitiveVectorMut::::with_capacity(capacity, nullability).into(), - PType::F16 => PrimitiveVectorMut::::with_capacity(capacity, nullability).into(), - PType::F32 => PrimitiveVectorMut::::with_capacity(capacity, nullability).into(), - PType::F64 => PrimitiveVectorMut::::with_capacity(capacity, nullability).into(), - } - } -} - -macro_rules! match_each_pvector { - ($self:expr, | $vec:ident | $body:block) => {{ - match $self { - PVector::U8(v) => { - let $vec = v; - $body - } - PVector::U16(v) => { - let $vec = v; - $body - } - PVector::U32(v) => { - let $vec = v; - $body - } - PVector::U64(v) => { - let $vec = v; - $body - } - PVector::I8(v) => { - let $vec = v; - $body - } - PVector::I16(v) => { - let $vec = v; - $body - } - PVector::I32(v) => { - let $vec = v; - $body - } - PVector::I64(v) => { - let $vec = v; - $body - } - PVector::F16(v) => { - let $vec = v; - $body - } - PVector::F32(v) => { - let $vec = v; - $body - } - PVector::F64(v) => { - let $vec = v; - $body - } - } - }}; -} - -macro_rules! match_each_pvector_mut { - ($self:expr, | $vec:ident | $body:block) => {{ - match $self { - PVectorMut::U8(v) => { - let $vec = v; - $body - } - PVectorMut::U16(v) => { - let $vec = v; - $body - } - PVectorMut::U32(v) => { - let $vec = v; - $body - } - PVectorMut::U64(v) => { - let $vec = v; - $body - } - PVectorMut::I8(v) => { - let $vec = v; - $body - } - PVectorMut::I16(v) => { - let $vec = v; - $body - } - PVectorMut::I32(v) => { - let $vec = v; - $body - } - PVectorMut::I64(v) => { - let $vec = v; - $body - } - PVectorMut::F16(v) => { - let $vec = v; - $body - } - PVectorMut::F32(v) => { - let $vec = v; - $body - } - PVectorMut::F64(v) => { - let $vec = v; - $body - } - } - }}; -} - -macro_rules! match_each_pvector_mut_pair { - ($self:expr, $other:expr, | $vec:ident, $vec_other:ident | $body:block) => {{ - match ($self, $other) { - (PVectorMut::U8(a), PVectorMut::U8(b)) => { - let $vec = a; - let $vec_other = b; - $body - } - (PVectorMut::U16(a), PVectorMut::U16(b)) => { - let $vec = a; - let $vec_other = b; - $body - } - (PVectorMut::U32(a), PVectorMut::U32(b)) => { - let $vec = a; - let $vec_other = b; - $body - } - (PVectorMut::U64(a), PVectorMut::U64(b)) => { - let $vec = a; - let $vec_other = b; - $body - } - (PVectorMut::I8(a), PVectorMut::I8(b)) => { - let $vec = a; - let $vec_other = b; - $body - } - (PVectorMut::I16(a), PVectorMut::I16(b)) => { - let $vec = a; - let $vec_other = b; - $body - } - (PVectorMut::I32(a), PVectorMut::I32(b)) => { - let $vec = a; - let $vec_other = b; - $body - } - (PVectorMut::I64(a), PVectorMut::I64(b)) => { - let $vec = a; - let $vec_other = b; - $body - } - (PVectorMut::F16(a), PVectorMut::F16(b)) => { - let $vec = a; - let $vec_other = b; - $body - } - (PVectorMut::F32(a), PVectorMut::F32(b)) => { - let $vec = a; - let $vec_other = b; - $body - } - (PVectorMut::F64(a), PVectorMut::F64(b)) => { - let $vec = a; - let $vec_other = b; - $body - } - _ => vortex_panic!("Mismatched primitive vector types"), - } - }}; -} - -macro_rules! match_each_pvector_mut_immut_pair { - ($self:expr, $other:expr, | $vec:ident, $vec_other:ident | $body:block) => {{ - match ($self, $other) { - (PVectorMut::U8(a), PVector::U8(b)) => { - let $vec = a; - let $vec_other = b; - $body - } - (PVectorMut::U16(a), PVector::U16(b)) => { - let $vec = a; - let $vec_other = b; - $body - } - (PVectorMut::U32(a), PVector::U32(b)) => { - let $vec = a; - let $vec_other = b; - $body - } - (PVectorMut::U64(a), PVector::U64(b)) => { - let $vec = a; - let $vec_other = b; - $body - } - (PVectorMut::I8(a), PVector::I8(b)) => { - let $vec = a; - let $vec_other = b; - $body - } - (PVectorMut::I16(a), PVector::I16(b)) => { - let $vec = a; - let $vec_other = b; - $body - } - (PVectorMut::I32(a), PVector::I32(b)) => { - let $vec = a; - let $vec_other = b; - $body - } - (PVectorMut::I64(a), PVector::I64(b)) => { - let $vec = a; - let $vec_other = b; - $body - } - (PVectorMut::F16(a), PVector::F16(b)) => { - let $vec = a; - let $vec_other = b; - $body - } - (PVectorMut::F32(a), PVector::F32(b)) => { - let $vec = a; - let $vec_other = b; - $body - } - (PVectorMut::F64(a), PVector::F64(b)) => { - let $vec = a; - let $vec_other = b; - $body - } - _ => vortex_panic!("Mismatched primitive vector types"), - } - }}; -} - -impl From for Vector { - fn from(v: PVector) -> Self { - Self::Primitive(v) - } -} - -impl VectorOps for PVector { - type Mutable = PVectorMut; - - fn len(&self) -> usize { - match_each_pvector!(self, |v| { v.len() }) - } - - fn dtype(&self) -> &DType { - match_each_pvector!(self, |v| { v.dtype() }) - } - - fn try_into_mut(self) -> Result - where - Self: Sized, - { - match_each_pvector!(self, |v| { - v.try_into_mut() - .map(PVectorMut::from) - .map_err(PVector::from) - }) - } -} - -impl From for crate::VectorMut { - fn from(v: PVectorMut) -> Self { - Self::Primitive(v) - } -} - -impl VectorMutOps for PVectorMut { - type Immutable = PVector; - - fn len(&self) -> usize { - match_each_pvector_mut!(self, |v| { v.len() }) - } - - fn dtype(&self) -> &DType { - match_each_pvector_mut!(self, |v| { v.dtype() }) - } - - fn capacity(&self) -> usize { - match_each_pvector_mut!(self, |v| { v.capacity() }) - } - - fn reserve(&mut self, additional: usize) { - match_each_pvector_mut!(self, |v| { v.reserve(additional) }) - } - - fn split_off(&mut self, at: usize) -> Self { - match_each_pvector_mut!(self, |v| { v.split_off(at).into() }) - } - - fn unsplit(&mut self, other: Self) { - match_each_pvector_mut_pair!(self, other, |a, b| { - a.unsplit(b); - }); - } - - fn extend_from_vector(&mut self, other: &Self::Immutable) { - match_each_pvector_mut_immut_pair!(self, other, |a, b| { - a.extend_from_vector(b); - }); - } - - fn freeze(self) -> Self::Immutable { - match_each_pvector_mut!(self, |v| { v.freeze().into() }) - } -} - -// From impls for PVector variants -impl From> for PVector { - fn from(v: PrimitiveVector) -> Self { - T::upcast(v) - } -} - -impl PTypeUpcast for PVector { - type Input = PrimitiveVector; - - fn from_u8(input: Self::Input) -> Self { - PVector::U8(input) - } - - fn from_u16(input: Self::Input) -> Self { - PVector::U16(input) - } - - fn from_u32(input: Self::Input) -> Self { - PVector::U32(input) - } - - fn from_u64(input: Self::Input) -> Self { - PVector::U64(input) - } - - fn from_i8(input: Self::Input) -> Self { - PVector::I8(input) - } - - fn from_i16(input: Self::Input) -> Self { - PVector::I16(input) - } - - fn from_i32(input: Self::Input) -> Self { - PVector::I32(input) - } - - fn from_i64(input: Self::Input) -> Self { - PVector::I64(input) - } - - fn from_f16(input: Self::Input) -> Self { - PVector::F16(input) - } - - fn from_f32(input: Self::Input) -> Self { - PVector::F32(input) - } - - fn from_f64(input: Self::Input) -> Self { - PVector::F64(input) - } -} - -// From impls for PVectorMut variants -impl From> for PVectorMut { - fn from(v: PrimitiveVectorMut) -> Self { - T::upcast(v) - } -} - -impl PTypeUpcast for PVectorMut { - type Input = PrimitiveVectorMut; - - fn from_u8(input: Self::Input) -> Self { - PVectorMut::U8(input) - } - - fn from_u16(input: Self::Input) -> Self { - PVectorMut::U16(input) - } - - fn from_u32(input: Self::Input) -> Self { - PVectorMut::U32(input) - } - - fn from_u64(input: Self::Input) -> Self { - PVectorMut::U64(input) - } - - fn from_i8(input: Self::Input) -> Self { - PVectorMut::I8(input) - } - - fn from_i16(input: Self::Input) -> Self { - PVectorMut::I16(input) - } - - fn from_i32(input: Self::Input) -> Self { - PVectorMut::I32(input) - } - - fn from_i64(input: Self::Input) -> Self { - PVectorMut::I64(input) - } - - fn from_f16(input: Self::Input) -> Self { - PVectorMut::F16(input) - } - - fn from_f32(input: Self::Input) -> Self { - PVectorMut::F32(input) - } - - fn from_f64(input: Self::Input) -> Self { - PVectorMut::F64(input) - } -} diff --git a/vortex-vector/src/vector/macros.rs b/vortex-vector/src/vector/macros.rs new file mode 100644 index 00000000000..04e53544dc3 --- /dev/null +++ b/vortex-vector/src/vector/macros.rs @@ -0,0 +1,94 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +/// TODO(connor): Write docs. +#[macro_export] +macro_rules! match_each_vector { + ($self:expr, | $vec:ident | $body:block) => {{ + match $self { + Vector::Null(v) => { + let $vec = v; + $body + } + Vector::Bool(v) => { + let $vec = v; + $body + } + Vector::Primitive(v) => { + let $vec = v; + $body + } + } + }}; +} + +/// TODO(connor): Write docs. +#[macro_export] +macro_rules! match_each_vector_mut { + ($self:expr, | $vec:ident | $body:block) => {{ + match $self { + VectorMut::Null(v) => { + let $vec = v; + $body + } + VectorMut::Bool(v) => { + let $vec = v; + $body + } + VectorMut::Primitive(v) => { + let $vec = v; + $body + } + } + }}; +} + +/// TODO(connor): Write docs. +#[macro_export] +macro_rules! match_each_vector_mut_pair { + ($self:expr, $other:expr, | $vec:ident, $vec_other:ident | $body:block) => {{ + match ($self, $other) { + (VectorMut::Null(a), VectorMut::Null(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + (VectorMut::Bool(a), VectorMut::Bool(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + (VectorMut::Primitive(a), VectorMut::Primitive(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + _ => ::vortex_error::vortex_panic!("Mismatched vector types"), + } + }}; +} + +/// TODO(connor): Write docs. +#[macro_export] +macro_rules! match_each_vector_mut_immut_pair { + ($self:expr, $other:expr, | $vec:ident, $vec_other:ident | $body:block) => {{ + match ($self, $other) { + (VectorMut::Null(a), Vector::Null(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + (VectorMut::Bool(a), Vector::Bool(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + (VectorMut::Primitive(a), Vector::Primitive(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + _ => ::vortex_error::vortex_panic!("Mismatched vector types"), + } + }}; +} diff --git a/vortex-vector/src/vector/mod.rs b/vortex-vector/src/vector/mod.rs new file mode 100644 index 00000000000..5ad76b6b174 --- /dev/null +++ b/vortex-vector/src/vector/mod.rs @@ -0,0 +1,62 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use vortex_dtype::DType; +use vortex_error::vortex_panic; + +use crate::{ + BoolVector, BoolVectorMut, NullVector, NullVectorMut, PrimitiveVector, PrimitiveVectorMut, +}; + +mod macros; +pub(super) mod ops; + +/// An enum over all vector types. +pub enum Vector { + /// Null + Null(NullVector), + /// Bool + Bool(BoolVector), + /// Primitive + /// + /// TODO(connor): Document that this is an enum, not a struct. + Primitive(PrimitiveVector), + // Decimal + // Decimal(DecimalVector), + // String + // String(StringVector), + // Binary + // Binary(BinaryVector), + // List + // List(ListVector), + // FixedList + // FixedList(FixedListVector), + // Struct + // Struct(StructVector), + // Extension + // Extension(ExtensionVector), +} + +/// An enum over all mutable vector types. +pub enum VectorMut { + /// Null + Null(NullVectorMut), + /// Bool + Bool(BoolVectorMut), + /// Primitive + Primitive(PrimitiveVectorMut), +} + +impl VectorMut { + /// Create a new mutable vector with the given capacity and dtype. + pub fn with_capacity(capacity: usize, dtype: &DType) -> Self { + match dtype { + DType::Null => NullVectorMut::new(0).into(), + DType::Bool(n) => BoolVectorMut::with_capacity(capacity, *n).into(), + DType::Primitive(ptype, nullability) => { + PrimitiveVectorMut::with_capacity(capacity, *ptype, *nullability).into() + } + _ => vortex_panic!("Unsupported dtype for VectorMut"), + } + } +} diff --git a/vortex-vector/src/vector/ops.rs b/vortex-vector/src/vector/ops.rs new file mode 100644 index 00000000000..5585b4ffb5e --- /dev/null +++ b/vortex-vector/src/vector/ops.rs @@ -0,0 +1,174 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Definition and implementation of [`VectorOps`] and [`VectorOpsMut`] for [`Vector`] and +//! [`VectorMut`], respecitively. + +use vortex_dtype::{DType, Nullability}; + +use crate::{ + Vector, VectorMut, match_each_vector, match_each_vector_mut, match_each_vector_mut_immut_pair, + match_each_vector_mut_pair, private, +}; + +/// Common operations for immutable vectors. +pub trait VectorOps: private::Sealed + Into { + /// The mutable equivalent of this immutable vector. + type Mutable: VectorMutOps; + + /// Returns the [`Nullability`] of the vector. + fn nullability(&self) -> Nullability; + + /// Returns the [`DType`] (or data type) of the vector. + fn dtype(&self) -> DType; + + /// Returns the number of elements in the vector, also referred to as its "length". + fn len(&self) -> usize; + + /// Returns `true` if the vector contains no elements. + fn is_empty(&self) -> bool { + self.len() == 0 + } + + /// Try to convert `self` into a mutable vector (implementing [`VectorMutOps`]). + /// + /// If `self` is the only unique strong reference (it effectively "owns" the buffer), this + /// method will succeed and return a mutable vector with the contents of `self` **without** + /// copying. + /// + /// If `self` is not unique, this will fail and return `self` back. + fn try_into_mut(self) -> Result + where + Self: Sized; +} + +impl VectorOps for Vector { + type Mutable = VectorMut; + + fn nullability(&self) -> Nullability { + match_each_vector!(self, |v| { v.nullability() }) + } + + fn dtype(&self) -> DType { + match_each_vector!(self, |v| { v.dtype() }) + } + + fn len(&self) -> usize { + match_each_vector!(self, |v| { v.len() }) + } + + fn try_into_mut(self) -> Result + where + Self: Sized, + { + match_each_vector!(self, |v| { + v.try_into_mut().map(VectorMut::from).map_err(Vector::from) + }) + } +} + +/// Common operations for mutable vectors. +pub trait VectorMutOps: private::Sealed + Into { + /// The immutable equivalent of this mutable vector. + type Immutable: VectorOps; + + /// Returns the [`Nullability`] of the vector. + fn nullability(&self) -> Nullability; + + /// Returns the [`DType`] (or data type) of the vector. + fn dtype(&self) -> DType; + + /// Returns the number of elements in the vector, also referred to as its "length". + fn len(&self) -> usize; + + /// Returns `true` if the vector contains no elements. + fn is_empty(&self) -> bool { + self.len() == 0 + } + + /// Returns the total number of elements the vector can hold without reallocating. + fn capacity(&self) -> usize; + + /// Reserves capacity for at least `additional` more elements to be inserted in the given + /// vector. + /// + /// The collection may reserve more space to speculatively avoid frequent reallocations. After + /// calling `reserve`, the capacity will be greater than or equal to `self.len() + additional`. + /// Does nothing if capacity is already sufficient. + /// + /// Please let us know if you need `reserve_exact` functionality! + fn reserve(&mut self, additional: usize); + + /// Extends the vector by appending elements from another vector. + fn extend_from_vector(&mut self, other: &Self::Immutable); + + /// Converts `self` into an immutable vector. + fn freeze(self) -> Self::Immutable; + + /// Splits the vector into two at the given index. + /// + /// Afterward, `self` contains elements `[0, at)`, and the returned vector contains elements + /// `[at, capacity)`. It's guaranteed that the memory does not move, that is, the address of + /// `self` does not change, and the address of the returned slice is at bytes after that. + /// + /// This is an `O(1)` operation that just increases the reference count and sets a few indices. + /// + /// # Panics + /// + /// Panics if we try to split off more than the current capacity of the vector (if + /// `at > capacity`). + fn split_off(&mut self, at: usize) -> Self; + + /// Absorbs a mutable vector that was previously split off. + /// + /// If the two vectors were previously contiguous and not mutated in a way that causes + /// re-allocation i.e., if other was created by calling split_off on this vector, then this is + /// an O(1) operation that just decreases a reference count and sets a few indices. + /// + /// Otherwise, this method degenerates to `self.extend_from_vector(other.as_ref())`. + fn unsplit(&mut self, other: Self); +} + +impl VectorMutOps for VectorMut { + type Immutable = Vector; + + fn nullability(&self) -> Nullability { + match_each_vector_mut!(self, |v| { v.nullability() }) + } + + fn dtype(&self) -> DType { + match_each_vector_mut!(self, |v| { v.dtype() }) + } + + fn len(&self) -> usize { + match_each_vector_mut!(self, |v| { v.len() }) + } + + fn capacity(&self) -> usize { + match_each_vector_mut!(self, |v| { v.capacity() }) + } + + fn reserve(&mut self, additional: usize) { + match_each_vector_mut!(self, |v| { v.reserve(additional) }) + } + + fn extend_from_vector(&mut self, other: &Self::Immutable) { + match_each_vector_mut_immut_pair!(self, other, |a, b| { + a.extend_from_vector(b); + }); + } + + fn freeze(self) -> Self::Immutable { + match_each_vector_mut!(self, |v| { v.freeze().into() }) + } + + fn split_off(&mut self, at: usize) -> Self { + match_each_vector_mut!(self, |v| { v.split_off(at).into() }) + } + + fn unsplit(&mut self, other: Self) { + match_each_vector_mut_pair!(self, other, |a, b| { + a.unsplit(b); + }); + } +} From 4480153def37427e48a1cf666842c6c3b5121c8d Mon Sep 17 00:00:00 2001 From: Connor Tsui Date: Mon, 20 Oct 2025 12:06:58 -0400 Subject: [PATCH 3/3] add documentation Signed-off-by: Connor Tsui --- vortex-buffer/src/bit/buf.rs | 2 +- vortex-buffer/src/bit/buf_mut.rs | 22 ++++++ vortex-mask/src/lib.rs | 2 +- vortex-mask/src/mask_mut.rs | 2 + vortex-vector/src/bool/mod.rs | 2 + vortex-vector/src/bool/vector.rs | 14 ++++ vortex-vector/src/bool/vector_mut.rs | 22 +++++- vortex-vector/src/lib.rs | 17 +++-- vortex-vector/src/null/mod.rs | 2 + vortex-vector/src/null/vector.rs | 10 ++- vortex-vector/src/null/vector_mut.rs | 12 +++- vortex-vector/src/primitive/generic.rs | 17 +++-- vortex-vector/src/primitive/generic_mut.rs | 19 ++--- vortex-vector/src/primitive/mod.rs | 49 +++++++++++++ vortex-vector/src/primitive/vector.rs | 80 ++++------------------ vortex-vector/src/primitive/vector_mut.rs | 78 ++++++++++++++++++--- vortex-vector/src/private.rs | 1 + vortex-vector/src/vector/macros.rs | 2 + vortex-vector/src/vector/mod.rs | 34 +++++++-- vortex-vector/src/vector/ops.rs | 30 ++++---- 20 files changed, 300 insertions(+), 117 deletions(-) diff --git a/vortex-buffer/src/bit/buf.rs b/vortex-buffer/src/bit/buf.rs index e481f10c718..be431a4384a 100644 --- a/vortex-buffer/src/bit/buf.rs +++ b/vortex-buffer/src/bit/buf.rs @@ -11,7 +11,7 @@ use crate::bit::{ use crate::{Alignment, BitBufferMut, Buffer, BufferMut, ByteBuffer, buffer}; /// An immutable bitset stored as a packed byte buffer. -#[derive(Clone, Debug, Eq)] +#[derive(Debug, Clone, Eq)] pub struct BitBuffer { buffer: ByteBuffer, len: usize, diff --git a/vortex-buffer/src/bit/buf_mut.rs b/vortex-buffer/src/bit/buf_mut.rs index 9f83f65aa9b..d9fc075cfe2 100644 --- a/vortex-buffer/src/bit/buf_mut.rs +++ b/vortex-buffer/src/bit/buf_mut.rs @@ -1,6 +1,7 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors +use arrow_buffer::bit_chunk_iterator::BitChunks; use bitvec::view::BitView; use crate::bit::{get_bit_unchecked, set_bit_unchecked, unset_bit_unchecked}; @@ -25,12 +26,26 @@ use crate::{BitBuffer, BufferMut, ByteBufferMut, buffer_mut}; /// ``` /// /// See also: [`BitBuffer`]. +#[derive(Debug, Clone, Eq)] pub struct BitBufferMut { buffer: ByteBufferMut, offset: usize, len: usize, } +impl PartialEq for BitBufferMut { + fn eq(&self, other: &Self) -> bool { + if self.len != other.len { + return false; + } + + self.chunks() + .iter() + .zip(other.chunks()) + .all(|(a, b)| a == b) + } +} + impl BitBufferMut { /// Create new bit buffer from given byte buffer and logical bit length pub fn from_buffer(buffer: ByteBufferMut, offset: usize, len: usize) -> Self { @@ -118,6 +133,13 @@ impl BitBufferMut { unsafe { get_bit_unchecked(self.buffer.as_ptr(), self.offset + index) } } + /// Access chunks of the underlying buffer as 8 byte chunks with a final trailer + /// + /// If you're performing operations on a single buffer, prefer [BitBuffer::unaligned_chunks] + pub fn chunks(&self) -> BitChunks<'_> { + BitChunks::new(self.buffer.as_slice(), self.offset, self.len) + } + /// Get the bit capacity of the buffer. #[inline(always)] pub fn capacity(&self) -> usize { diff --git a/vortex-mask/src/lib.rs b/vortex-mask/src/lib.rs index d68f8d2f8bc..d30e1b2948b 100644 --- a/vortex-mask/src/lib.rs +++ b/vortex-mask/src/lib.rs @@ -97,7 +97,7 @@ impl Eq for AllOr where T: Eq {} /// /// A [`Mask`] can be constructed from various representations, and converted to various /// others. Internally, these are cached. -#[derive(Clone, Debug)] +#[derive(Debug, Clone)] pub enum Mask { /// All values are included. AllTrue(usize), diff --git a/vortex-mask/src/mask_mut.rs b/vortex-mask/src/mask_mut.rs index a6b28b0de37..d72403f54a2 100644 --- a/vortex-mask/src/mask_mut.rs +++ b/vortex-mask/src/mask_mut.rs @@ -8,8 +8,10 @@ use vortex_buffer::{BitBuffer, BitBufferMut}; use crate::Mask; /// A mutable mask, used for lazily allocating the bit buffer as required. +#[derive(Debug, Clone)] pub struct MaskMut(Inner); +#[derive(Debug, Clone)] enum Inner { /// Initially, the mask is empty but may have some capacity. Empty { capacity: usize }, diff --git a/vortex-vector/src/bool/mod.rs b/vortex-vector/src/bool/mod.rs index 8541fc43397..b5e5abddd1c 100644 --- a/vortex-vector/src/bool/mod.rs +++ b/vortex-vector/src/bool/mod.rs @@ -1,6 +1,8 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors +//! Definition and implementation of [`BoolVector`] and [`BoolVectorMut`]. + mod vector; pub use vector::BoolVector; diff --git a/vortex-vector/src/bool/vector.rs b/vortex-vector/src/bool/vector.rs index b63e05d993b..54d31cc5572 100644 --- a/vortex-vector/src/bool/vector.rs +++ b/vortex-vector/src/bool/vector.rs @@ -1,6 +1,8 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors +//! Definition and implementation of [`BoolVector`]. + use vortex_buffer::BitBuffer; use vortex_dtype::{DType, Nullability}; use vortex_mask::Mask; @@ -9,6 +11,12 @@ use super::BoolVectorMut; use crate::VectorOps; /// An immutable vector of boolean values. +/// +/// Internally, the boolean values are stored as the bits of a [`BitBuffer`] plus an optional +/// [`Mask`] for null booleans. +/// +/// The mutable equivalent of this type is [`BoolVectorMut`]. +#[derive(Debug, Clone)] pub struct BoolVector { pub(super) bits: BitBuffer, pub(super) validity: Option, @@ -26,6 +34,12 @@ impl VectorOps for BoolVector { } fn len(&self) -> usize { + debug_assert!( + self.validity + .as_ref() + .is_none_or(|mask| mask.len() == self.bits.len()) + ); + self.bits.len() } diff --git a/vortex-vector/src/bool/vector_mut.rs b/vortex-vector/src/bool/vector_mut.rs index 43231f3dc40..7fc539b1d15 100644 --- a/vortex-vector/src/bool/vector_mut.rs +++ b/vortex-vector/src/bool/vector_mut.rs @@ -1,6 +1,8 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors +//! Definition and implementation of [`BoolVectorMut`]. + use vortex_buffer::BitBufferMut; use vortex_dtype::{DType, Nullability}; use vortex_mask::MaskMut; @@ -9,13 +11,19 @@ use super::BoolVector; use crate::VectorMutOps; /// A mutable vector of boolean values. +/// +/// Internally, the boolean values are stored as the bits of a [`BitBufferMut`] plus an optional +/// [`MaskMut`] for null booleans. +/// +/// The immutable equivalent of this type is [`BoolVector`]. +#[derive(Debug, Clone)] pub struct BoolVectorMut { pub(super) bits: BitBufferMut, pub(super) validity: Option, } impl BoolVectorMut { - /// Create a new mutable boolean vector with the given capacity and nullability. + /// Creates a new mutable boolean vector with the given `capacity` and `nullability`. pub fn with_capacity(capacity: usize, nullability: Nullability) -> Self { let validity = match nullability { Nullability::NonNullable => None, @@ -41,6 +49,12 @@ impl VectorMutOps for BoolVectorMut { } fn len(&self) -> usize { + debug_assert!( + self.validity + .as_ref() + .is_none_or(|mask| mask.len() == self.bits.len()) + ); + self.bits.len() } @@ -50,6 +64,7 @@ impl VectorMutOps for BoolVectorMut { fn reserve(&mut self, additional: usize) { self.bits.reserve(additional); + if let Some(v) = self.validity.as_mut() { v.reserve(additional); } @@ -57,6 +72,8 @@ impl VectorMutOps for BoolVectorMut { fn extend_from_vector(&mut self, other: &BoolVector) { self.bits.append_buffer(&other.bits); + + // TODO(connor): We must `other`'s nullability in relation to `self`. match (&mut self.validity, &other.validity) { (Some(self_v), Some(other_v)) => self_v.append_mask(other_v), (Some(self_v), None) => self_v.append_n(true, other.bits.len()), @@ -84,8 +101,11 @@ impl VectorMutOps for BoolVectorMut { } fn unsplit(&mut self, other: Self) { + // TODO(connor): We must check `other`'s nullability in relation to `self`. + let other_len = other.bits.len(); self.bits.unsplit(other.bits); + match (&mut self.validity, other.validity) { (Some(self_v), Some(other_v)) => self_v.unsplit(other_v), (Some(self_v), None) => self_v.append_n(true, other_len), diff --git a/vortex-vector/src/lib.rs b/vortex-vector/src/lib.rs index ce7ec5c6e90..86c76dfd074 100644 --- a/vortex-vector/src/lib.rs +++ b/vortex-vector/src/lib.rs @@ -3,13 +3,20 @@ //! Mutable decompressed (canonical) vectors for Vortex. //! -//! TODO docs. +//! TODO(connor) More docs. + +// TODO(connor) +// - Document everything +// - Figure out correct panic propagation +// - Figure out exact semantics of `split_off` w.r.t. length of capacity +// - Fix bugs in implementations +// - Add tests +// - Figure out error semantics on ops traits +// - Implement PartialEq and Eq for vectors +// - Add stubs for remaining vector variants +// - Potentially add `TryFrom<Vector> for Vector` or some other conversion method #![deny(missing_docs)] -// #![warn(clippy::missing_docs_in_private_items)] -// #![warn(clippy::missing_errors_doc)] -// #![warn(clippy::missing_panics_doc)] -// #![warn(clippy::missing_safety_doc)] mod vector; pub use vector::ops::{VectorMutOps, VectorOps}; diff --git a/vortex-vector/src/null/mod.rs b/vortex-vector/src/null/mod.rs index ebb6f129a36..4f9de101c94 100644 --- a/vortex-vector/src/null/mod.rs +++ b/vortex-vector/src/null/mod.rs @@ -1,6 +1,8 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors +//! Definition and implementation of [`NullVector`] and [`NullVectorMut`]. + mod vector; pub use vector::NullVector; diff --git a/vortex-vector/src/null/vector.rs b/vortex-vector/src/null/vector.rs index a481ad442b5..8782cc3c6a8 100644 --- a/vortex-vector/src/null/vector.rs +++ b/vortex-vector/src/null/vector.rs @@ -1,17 +1,25 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors +//! Definition and implementation of [`NullVector`]. + use vortex_dtype::{DType, Nullability}; use crate::{NullVectorMut, VectorOps}; /// An immutable vector of null values. +/// +/// Since a "null" value does not require any data storage, the nulls are stored internally with a +/// single `length` counter. +/// +/// The mutable equivalent of this type is [`NullVectorMut`]. +#[derive(Debug, Clone, Copy)] pub struct NullVector { pub(super) len: usize, } impl NullVector { - /// Creates a new `NullVector` with the given length. + /// Creates a new immutable vector of nulls with the given length. pub fn new(len: usize) -> Self { Self { len } } diff --git a/vortex-vector/src/null/vector_mut.rs b/vortex-vector/src/null/vector_mut.rs index 720dae2c9b5..46a55da7f93 100644 --- a/vortex-vector/src/null/vector_mut.rs +++ b/vortex-vector/src/null/vector_mut.rs @@ -1,18 +1,26 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors +//! Definition and implementation of [`NullVectorMut`]. + use vortex_dtype::{DType, Nullability}; use super::NullVector; use crate::VectorMutOps; /// A mutable vector of null values. +/// +/// Since a "null" value does not require any data storage, the nulls are stored internally with a +/// single `length` counter. +/// +/// The immutable equivalent of this type is [`NullVector`]. +#[derive(Debug, Clone, Copy)] pub struct NullVectorMut { pub(super) len: usize, } impl NullVectorMut { - /// Creates a new `NullVectorMut` with the given length. + /// Creates a new mutable vector of nulls with the given length. pub fn new(len: usize) -> Self { Self { len } } @@ -50,7 +58,6 @@ impl VectorMutOps for NullVectorMut { } fn split_off(&mut self, at: usize) -> Self { - // TODO(connor): This is wrong (https://docs.rs/bytes/latest/src/bytes/bytes_mut.rs.html#320-335) assert!( at <= self.capacity(), "split_off out of bounds: {:?} <= {:?}", @@ -58,6 +65,7 @@ impl VectorMutOps for NullVectorMut { self.capacity(), ); + // TODO(connor): This is wrong (https://docs.rs/bytes/latest/src/bytes/bytes_mut.rs.html#320-335) let new_len = self.len - at; self.len = at; NullVectorMut { len: new_len } diff --git a/vortex-vector/src/primitive/generic.rs b/vortex-vector/src/primitive/generic.rs index 3ce671e1a5b..7ed4048de28 100644 --- a/vortex-vector/src/primitive/generic.rs +++ b/vortex-vector/src/primitive/generic.rs @@ -1,24 +1,27 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors +//! Definition and implementation of [`GenericPVector`]. + use vortex_buffer::Buffer; use vortex_dtype::{DType, NativePType, Nullability}; use vortex_mask::Mask; -use crate::{GenericPVectorMut, PrimitiveVector, Vector, VectorOps}; +use crate::{GenericPVectorMut, VectorOps}; /// An immutable vector of generic primitive values. +/// +/// `T` is expected to be bound by [`NativePType`], which templates an internal [`Buffer`] that +/// stores the elements of the vector. Additionally, an optional [`Mask`] is stored to track null +/// primitive elements. +/// +/// The mutable equivalent of this type is [`GenericPVectorMut`]. +#[derive(Debug, Clone)] pub struct GenericPVector { pub(super) elements: Buffer, pub(super) validity: Option, } -impl From> for Vector { - fn from(v: GenericPVector) -> Self { - Self::Primitive(PrimitiveVector::from(v)) - } -} - impl VectorOps for GenericPVector { type Mutable = GenericPVectorMut; diff --git a/vortex-vector/src/primitive/generic_mut.rs b/vortex-vector/src/primitive/generic_mut.rs index c2cd65681b4..314f2819263 100644 --- a/vortex-vector/src/primitive/generic_mut.rs +++ b/vortex-vector/src/primitive/generic_mut.rs @@ -1,13 +1,22 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors +//! Definition and implementation of [`GenericPVectorMut`]. + use vortex_buffer::BufferMut; use vortex_dtype::{DType, NativePType, Nullability}; use vortex_mask::MaskMut; -use crate::{GenericPVector, PrimitiveVectorMut, VectorMut, VectorMutOps}; +use crate::{GenericPVector, VectorMutOps}; -/// A mutable vector of primitive values. +/// A mutable vector of generic primitive values. +/// +/// `T` is expected to be bound by [`NativePType`], which templates an internal [`BufferMut`] +/// that stores the elements of the vector. Additionally, an optional [`MaskMut`] is stored to track +/// null primitive elements. +/// +/// The immutable equivalent of this type is [`GenericPVector`]. +#[derive(Debug, Clone)] pub struct GenericPVectorMut { pub(super) elements: BufferMut, pub(super) validity: Option, @@ -28,12 +37,6 @@ impl GenericPVectorMut { } } -impl From> for VectorMut { - fn from(val: GenericPVectorMut) -> Self { - VectorMut::Primitive(PrimitiveVectorMut::from(val)) - } -} - impl VectorMutOps for GenericPVectorMut { type Immutable = GenericPVector; diff --git a/vortex-vector/src/primitive/mod.rs b/vortex-vector/src/primitive/mod.rs index 25c901aba14..a317e3df7b2 100644 --- a/vortex-vector/src/primitive/mod.rs +++ b/vortex-vector/src/primitive/mod.rs @@ -1,6 +1,19 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors +//! Definitions and implementations of native primitive vector types. +//! +//! The types that hold data are [`GenericPVector`] and [`GenericPVectorMut`], which are generic +//! over types `T` that implement [`NativePType`] (which are just the integer and floating-point +//! types that are native to Rust plus [`f16`]). +//! +//! [`PrimitiveVector`] and [`PrimitiveVectorMut`] are enums that wrap all of the different possible +//! [`GenericPVector`]s. There are several macros defined in this crate to make working with these +//! primitive vector types easier. +//! +//! [`NativePType`]: vortex_dtype::NativePType +//! [`f16`]: vortex_dtype::half::f16 + mod generic; pub use generic::GenericPVector; @@ -13,4 +26,40 @@ pub use vector::PrimitiveVector; mod vector_mut; pub use vector_mut::PrimitiveVectorMut; +/// Helper macros for working with the different variants of [`PrimitiveVector`] and +/// [`PrimitiveVectorMut`]. +/// +/// All macros are exported at the crate level with `#[macro_use]`. mod macros; + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Vector Conversions +//////////////////////////////////////////////////////////////////////////////////////////////////// + +use vortex_dtype::NativePType; + +use crate::{Vector, VectorMut}; + +impl From for Vector { + fn from(v: PrimitiveVector) -> Self { + Self::Primitive(v) + } +} + +impl From> for Vector { + fn from(v: GenericPVector) -> Self { + Self::Primitive(PrimitiveVector::from(v)) + } +} + +impl From for VectorMut { + fn from(v: PrimitiveVectorMut) -> Self { + Self::Primitive(v) + } +} + +impl From> for VectorMut { + fn from(val: GenericPVectorMut) -> Self { + Self::Primitive(PrimitiveVectorMut::from(val)) + } +} diff --git a/vortex-vector/src/primitive/vector.rs b/vortex-vector/src/primitive/vector.rs index 2d803aeaca6..cbb720b4312 100644 --- a/vortex-vector/src/primitive/vector.rs +++ b/vortex-vector/src/primitive/vector.rs @@ -1,13 +1,21 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors +//! Definition and implementation of [`PrimitiveVector`]. + use vortex_dtype::half::f16; use vortex_dtype::{DType, NativePType, Nullability, PTypeUpcast}; -use super::{GenericPVector, GenericPVectorMut, PrimitiveVectorMut}; -use crate::{Vector, VectorOps, match_each_pvector}; +use super::{GenericPVector, PrimitiveVectorMut}; +use crate::{VectorOps, match_each_pvector}; -/// An enum over all primitive vector types. +/// An immutable vector of primitive values. +/// +/// `PrimitiveVector` is represented by an enum over all possible [`GenericPVector`] types (which +/// are templated by the types that implement [`NativePType`]). +/// +/// The mutable equivalent of this type is [`PrimitiveVectorMut`]. +#[derive(Debug, Clone)] pub enum PrimitiveVector { /// U8 U8(GenericPVector), @@ -33,12 +41,6 @@ pub enum PrimitiveVector { F64(GenericPVector), } -impl From for Vector { - fn from(v: PrimitiveVector) -> Self { - Self::Primitive(v) - } -} - impl VectorOps for PrimitiveVector { type Mutable = PrimitiveVectorMut; @@ -66,7 +68,10 @@ impl VectorOps for PrimitiveVector { } } -// From impls for PrimitiveVector variants +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Upcast Conversion +//////////////////////////////////////////////////////////////////////////////////////////////////// + impl From> for PrimitiveVector { fn from(v: GenericPVector) -> Self { T::upcast(v) @@ -120,58 +125,3 @@ impl PTypeUpcast for PrimitiveVector { PrimitiveVector::F64(input) } } - -// From impls for PrimitiveVectorMut variants -impl From> for PrimitiveVectorMut { - fn from(v: GenericPVectorMut) -> Self { - T::upcast(v) - } -} - -impl PTypeUpcast for PrimitiveVectorMut { - type Input = GenericPVectorMut; - - fn from_u8(input: Self::Input) -> Self { - PrimitiveVectorMut::U8(input) - } - - fn from_u16(input: Self::Input) -> Self { - PrimitiveVectorMut::U16(input) - } - - fn from_u32(input: Self::Input) -> Self { - PrimitiveVectorMut::U32(input) - } - - fn from_u64(input: Self::Input) -> Self { - PrimitiveVectorMut::U64(input) - } - - fn from_i8(input: Self::Input) -> Self { - PrimitiveVectorMut::I8(input) - } - - fn from_i16(input: Self::Input) -> Self { - PrimitiveVectorMut::I16(input) - } - - fn from_i32(input: Self::Input) -> Self { - PrimitiveVectorMut::I32(input) - } - - fn from_i64(input: Self::Input) -> Self { - PrimitiveVectorMut::I64(input) - } - - fn from_f16(input: Self::Input) -> Self { - PrimitiveVectorMut::F16(input) - } - - fn from_f32(input: Self::Input) -> Self { - PrimitiveVectorMut::F32(input) - } - - fn from_f64(input: Self::Input) -> Self { - PrimitiveVectorMut::F64(input) - } -} diff --git a/vortex-vector/src/primitive/vector_mut.rs b/vortex-vector/src/primitive/vector_mut.rs index c2365a22eb7..cf78dc67990 100644 --- a/vortex-vector/src/primitive/vector_mut.rs +++ b/vortex-vector/src/primitive/vector_mut.rs @@ -1,15 +1,23 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors +//! Definition and implementation of [`PrimitiveVectorMut`]. + use vortex_dtype::half::f16; -use vortex_dtype::{DType, Nullability, PType}; +use vortex_dtype::{DType, NativePType, Nullability, PType, PTypeUpcast}; use crate::{ - GenericPVectorMut, PrimitiveVector, VectorMut, VectorMutOps, match_each_pvector_mut, + GenericPVectorMut, PrimitiveVector, VectorMutOps, match_each_pvector_mut, match_each_pvector_mut_immut_pair, match_each_pvector_mut_pair, }; -/// An enum over all mutable primitive vector types. +/// A mutable vector of primitive values. +/// +/// `PrimitiveVector` is represented by an enum over all possible [`GenericPVectorMut`] types (which +/// are templated by the types that implement [`NativePType`]). +/// +/// The immutable equivalent of this type is [`PrimitiveVector`]. +#[derive(Debug, Clone)] pub enum PrimitiveVectorMut { /// U8 U8(GenericPVectorMut), @@ -54,12 +62,6 @@ impl PrimitiveVectorMut { } } -impl From for VectorMut { - fn from(v: PrimitiveVectorMut) -> Self { - Self::Primitive(v) - } -} - impl VectorMutOps for PrimitiveVectorMut { type Immutable = PrimitiveVector; @@ -103,3 +105,61 @@ impl VectorMutOps for PrimitiveVectorMut { }); } } + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Upcast Conversion +//////////////////////////////////////////////////////////////////////////////////////////////////// + +impl From> for PrimitiveVectorMut { + fn from(v: GenericPVectorMut) -> Self { + T::upcast(v) + } +} + +impl PTypeUpcast for PrimitiveVectorMut { + type Input = GenericPVectorMut; + + fn from_u8(input: Self::Input) -> Self { + PrimitiveVectorMut::U8(input) + } + + fn from_u16(input: Self::Input) -> Self { + PrimitiveVectorMut::U16(input) + } + + fn from_u32(input: Self::Input) -> Self { + PrimitiveVectorMut::U32(input) + } + + fn from_u64(input: Self::Input) -> Self { + PrimitiveVectorMut::U64(input) + } + + fn from_i8(input: Self::Input) -> Self { + PrimitiveVectorMut::I8(input) + } + + fn from_i16(input: Self::Input) -> Self { + PrimitiveVectorMut::I16(input) + } + + fn from_i32(input: Self::Input) -> Self { + PrimitiveVectorMut::I32(input) + } + + fn from_i64(input: Self::Input) -> Self { + PrimitiveVectorMut::I64(input) + } + + fn from_f16(input: Self::Input) -> Self { + PrimitiveVectorMut::F16(input) + } + + fn from_f32(input: Self::Input) -> Self { + PrimitiveVectorMut::F32(input) + } + + fn from_f64(input: Self::Input) -> Self { + PrimitiveVectorMut::F64(input) + } +} diff --git a/vortex-vector/src/private.rs b/vortex-vector/src/private.rs index b9aabca330a..6948a1f3c3d 100644 --- a/vortex-vector/src/private.rs +++ b/vortex-vector/src/private.rs @@ -10,6 +10,7 @@ use crate::*; +/// A private trait for sealing implementations of other traits. pub trait Sealed {} impl Sealed for Vector {} diff --git a/vortex-vector/src/vector/macros.rs b/vortex-vector/src/vector/macros.rs index 04e53544dc3..1d8f259cfcc 100644 --- a/vortex-vector/src/vector/macros.rs +++ b/vortex-vector/src/vector/macros.rs @@ -1,6 +1,8 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors +// TODO(connor): Finish implementing the rest of the macros. + /// TODO(connor): Write docs. #[macro_export] macro_rules! match_each_vector { diff --git a/vortex-vector/src/vector/mod.rs b/vortex-vector/src/vector/mod.rs index 5ad76b6b174..62d881c3a79 100644 --- a/vortex-vector/src/vector/mod.rs +++ b/vortex-vector/src/vector/mod.rs @@ -1,6 +1,9 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors +//! Definition of the [`Vector`] and [`VectorMut`] types, which represent fully decompressed +//! (canonical) array data. + use vortex_dtype::DType; use vortex_error::vortex_panic; @@ -8,10 +11,24 @@ use crate::{ BoolVector, BoolVectorMut, NullVector, NullVectorMut, PrimitiveVector, PrimitiveVectorMut, }; +/// Helper macros for working with the different variants of [`Vector`] and [`VectorMut`]. +/// +/// All macros are exported at the crate level with `#[macro_use]`. mod macros; + +/// Definition and implementation of [`VectorOps`](ops::VectorOps) and +/// [`VectorMutOps`](ops::VectorMutOps) for [`Vector`] and [`VectorMut`], respecitively. pub(super) mod ops; -/// An enum over all vector types. +/// An enum over all kinds of immutable vectors, which represent fully decompressed (canonical) +/// array data. +/// +/// Most of the behavior of `Vector` is described by the [`VectorOps`] trait. +/// +/// The mutable equivalent of this type is [`VectorMut`], which implements. +/// +/// [`VectorOps`]: crate::VectorOps +#[derive(Debug, Clone)] pub enum Vector { /// Null Null(NullVector), @@ -19,7 +36,8 @@ pub enum Vector { Bool(BoolVector), /// Primitive /// - /// TODO(connor): Document that this is an enum, not a struct. + /// TODO(connor): Document that this is an enum, not a struct (to represent all possible + /// primitive native generics). Primitive(PrimitiveVector), // Decimal // Decimal(DecimalVector), @@ -37,7 +55,15 @@ pub enum Vector { // Extension(ExtensionVector), } -/// An enum over all mutable vector types. +/// An enum over all kinds of mutable vectors, which represent fully decompressed (canonical) array +/// data. +/// +/// Most of the behavior of `VectorMut` is described by the [`VectorMutOps`] trait. +/// +/// The immutable equivalent of this type is [`Vector`]. +/// +/// [`VectorMutOps`]: crate::VectorMutOps +#[derive(Debug, Clone)] pub enum VectorMut { /// Null Null(NullVectorMut), @@ -51,7 +77,7 @@ impl VectorMut { /// Create a new mutable vector with the given capacity and dtype. pub fn with_capacity(capacity: usize, dtype: &DType) -> Self { match dtype { - DType::Null => NullVectorMut::new(0).into(), + DType::Null => NullVectorMut::new(0).into(), // `NullVector` has `usize::MAX` capacity. DType::Bool(n) => BoolVectorMut::with_capacity(capacity, *n).into(), DType::Primitive(ptype, nullability) => { PrimitiveVectorMut::with_capacity(capacity, *ptype, *nullability).into() diff --git a/vortex-vector/src/vector/ops.rs b/vortex-vector/src/vector/ops.rs index 5585b4ffb5e..e995aa52409 100644 --- a/vortex-vector/src/vector/ops.rs +++ b/vortex-vector/src/vector/ops.rs @@ -1,9 +1,6 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -//! Definition and implementation of [`VectorOps`] and [`VectorOpsMut`] for [`Vector`] and -//! [`VectorMut`], respecitively. - use vortex_dtype::{DType, Nullability}; use crate::{ @@ -11,7 +8,7 @@ use crate::{ match_each_vector_mut_pair, private, }; -/// Common operations for immutable vectors. +/// Common operations for immutable vectors (all the variants of [`Vector`]). pub trait VectorOps: private::Sealed + Into { /// The mutable equivalent of this immutable vector. type Mutable: VectorMutOps; @@ -30,13 +27,15 @@ pub trait VectorOps: private::Sealed + Into { self.len() == 0 } - /// Try to convert `self` into a mutable vector (implementing [`VectorMutOps`]). + /// Tries to convert `self` into a mutable vector (implementing [`VectorMutOps`]). + /// + /// This method will only succeed if `self` is the only unique strong reference (it effectively + /// "owns" the buffer). If this is true, this method will return a mutable vector with the + /// contents of `self` **without** any copying of data. /// - /// If `self` is the only unique strong reference (it effectively "owns" the buffer), this - /// method will succeed and return a mutable vector with the contents of `self` **without** - /// copying. + /// # Errors /// - /// If `self` is not unique, this will fail and return `self` back. + /// If `self` is not unique, this will fail and return `self` back to the caller. fn try_into_mut(self) -> Result where Self: Sized; @@ -67,7 +66,7 @@ impl VectorOps for Vector { } } -/// Common operations for mutable vectors. +/// Common operations for mutable vectors (all the variants of [`VectorMut`]). pub trait VectorMutOps: private::Sealed + Into { /// The immutable equivalent of this mutable vector. type Immutable: VectorOps; @@ -100,6 +99,9 @@ pub trait VectorMutOps: private::Sealed + Into { fn reserve(&mut self, additional: usize); /// Extends the vector by appending elements from another vector. + /// + /// TODO(connor): Document semantics of what happens if `self` is non-nullable and `other` is + /// nullable (should panic?). fn extend_from_vector(&mut self, other: &Self::Immutable); /// Converts `self` into an immutable vector. @@ -122,10 +124,12 @@ pub trait VectorMutOps: private::Sealed + Into { /// Absorbs a mutable vector that was previously split off. /// /// If the two vectors were previously contiguous and not mutated in a way that causes - /// re-allocation i.e., if other was created by calling split_off on this vector, then this is - /// an O(1) operation that just decreases a reference count and sets a few indices. + /// re-allocation i.e., if other was created by calling [`split_off()`] on this vector, then + /// this is an `O(1)` operation (simply decreases a reference count and sets a few indices). + /// + /// Otherwise, this method falls back to `self.extend_from_vector(other)`. /// - /// Otherwise, this method degenerates to `self.extend_from_vector(other.as_ref())`. + /// [`split_off()`]: Self::split_off fn unsplit(&mut self, other: Self); }