diff --git a/Cargo.lock b/Cargo.lock index dbe7fa8d4f3..bd54a657679 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -9124,6 +9124,7 @@ dependencies = [ "arrow-buffer", "itertools 0.14.0", "rstest", + "vortex-buffer", "vortex-error", ] @@ -9314,6 +9315,17 @@ dependencies = [ "hashbrown 0.16.0", ] +[[package]] +name = "vortex-vector" +version = "0.1.0" +dependencies = [ + "vortex-buffer", + "vortex-dtype", + "vortex-error", + "vortex-mask", + "vortex-scalar", +] + [[package]] name = "vortex-zigzag" version = "0.1.0" diff --git a/Cargo.toml b/Cargo.toml index d4e23ec5563..a8e26e97c54 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -29,6 +29,7 @@ members = [ "vortex-scalar", "vortex-tui", "vortex-utils", + "vortex-vector", "xtask", "vortex-gpu", ] diff --git a/vortex-buffer/src/bit/buf.rs b/vortex-buffer/src/bit/buf.rs index 4d18ccb2b3e..be431a4384a 100644 --- a/vortex-buffer/src/bit/buf.rs +++ b/vortex-buffer/src/bit/buf.rs @@ -11,7 +11,7 @@ use crate::bit::{ use crate::{Alignment, BitBufferMut, Buffer, BufferMut, ByteBuffer, buffer}; /// An immutable bitset stored as a packed byte buffer. -#[derive(Clone, Debug, Eq)] +#[derive(Debug, Clone, Eq)] pub struct BitBuffer { buffer: ByteBuffer, len: usize, @@ -277,6 +277,14 @@ impl BitBuffer { self.buffer.slice(word_start..word_end) } + /// Attempt to convert this `BitBuffer` into a mutable version. + pub fn try_into_mut(self) -> Result { + match self.buffer.try_into_mut() { + Ok(buffer) => Ok(BitBufferMut::from_buffer(buffer, self.offset, self.len)), + Err(buffer) => Err(BitBuffer::new_with_offset(buffer, self.len, self.offset)), + } + } + /// Get a mutable version of this `BitBuffer` along with bit offset in the first byte. /// /// If the caller doesn't hold only reference to the underlying buffer, a copy is created. diff --git a/vortex-buffer/src/bit/buf_mut.rs b/vortex-buffer/src/bit/buf_mut.rs index ae0d54d6222..d9fc075cfe2 100644 --- a/vortex-buffer/src/bit/buf_mut.rs +++ b/vortex-buffer/src/bit/buf_mut.rs @@ -1,6 +1,7 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors +use arrow_buffer::bit_chunk_iterator::BitChunks; use bitvec::view::BitView; use crate::bit::{get_bit_unchecked, set_bit_unchecked, unset_bit_unchecked}; @@ -25,12 +26,26 @@ use crate::{BitBuffer, BufferMut, ByteBufferMut, buffer_mut}; /// ``` /// /// See also: [`BitBuffer`]. +#[derive(Debug, Clone, Eq)] pub struct BitBufferMut { buffer: ByteBufferMut, offset: usize, len: usize, } +impl PartialEq for BitBufferMut { + fn eq(&self, other: &Self) -> bool { + if self.len != other.len { + return false; + } + + self.chunks() + .iter() + .zip(other.chunks()) + .all(|(a, b)| a == b) + } +} + impl BitBufferMut { /// Create new bit buffer from given byte buffer and logical bit length pub fn from_buffer(buffer: ByteBufferMut, offset: usize, len: usize) -> Self { @@ -118,6 +133,13 @@ impl BitBufferMut { unsafe { get_bit_unchecked(self.buffer.as_ptr(), self.offset + index) } } + /// Access chunks of the underlying buffer as 8 byte chunks with a final trailer + /// + /// If you're performing operations on a single buffer, prefer [BitBuffer::unaligned_chunks] + pub fn chunks(&self) -> BitChunks<'_> { + BitChunks::new(self.buffer.as_slice(), self.offset, self.len) + } + /// Get the bit capacity of the buffer. #[inline(always)] pub fn capacity(&self) -> usize { @@ -362,6 +384,63 @@ impl BitBufferMut { self.len += bit_len; } + /// Splits the bit buffer into two at the given index. + /// + /// Afterward, self contains elements `[0, at)`, and the returned buffer contains elements + /// `[at, capacity)`. + /// + /// Unlike bytes, if the split position is not on a byte-boundary this operation will copy + /// data into the result type, and mutate self. + pub fn split_off(&mut self, at: usize) -> Self { + assert!(at <= self.len, "index {at} exceeds len {}", self.len); + + let new_offset = self.offset; + let new_len = self.len - at; + + // If we are splitting on a byte boundary, we can just slice the buffer + if (self.offset + at) % 8 == 0 { + let byte_pos = (self.offset + at) / 8; + let new_buffer = self.buffer.split_off(byte_pos); + self.len = at; + return Self { + buffer: new_buffer, + offset: new_offset, + len: new_len, + }; + } + + // Otherwise, we need to copy bits into a new buffer + let mut new_buffer = BitBufferMut::with_capacity(new_len); + for i in 0..new_len { + let value = self.value(at + i); + new_buffer.append(value); + } + + // Truncate self to the split position + self.truncate(at); + + new_buffer + } + + /// Absorbs a mutable buffer that was previously split off. + /// + /// If the two buffers were previously contiguous and not mutated in a way that causes + /// re-allocation i.e., if other was created by calling split_off on this buffer, then this is + /// an O(1) operation that just decreases a reference count and sets a few indices. + /// + /// Otherwise, this method degenerates to self.append_buffer(&other). + pub fn unsplit(&mut self, other: Self) { + if (self.offset + self.len) % 8 == 0 && other.offset == 0 { + // We are aligned and can just append the buffers + self.buffer.unsplit(other.buffer); + self.len += other.len; + return; + } + + // Otherwise, we need to append the bits one by one + self.append_buffer(&other.freeze()) + } + /// Freeze the buffer in its current state into an immutable `BoolBuffer`. pub fn freeze(self) -> BitBuffer { BitBuffer::new_with_offset(self.buffer.freeze(), self.len, self.offset) diff --git a/vortex-buffer/src/buffer.rs b/vortex-buffer/src/buffer.rs index 4c5752d70d3..faa48ed9cad 100644 --- a/vortex-buffer/src/buffer.rs +++ b/vortex-buffer/src/buffer.rs @@ -447,66 +447,6 @@ impl Buffer { vortex_panic!("Buffer is not aligned to requested alignment {}", alignment) } } - - /// Align the buffer to alignment of U - pub fn align_to(mut self) -> (Buffer, Buffer, Buffer) { - let offset = self.as_ptr().align_offset(align_of::()); - if offset > self.len() { - ( - self, - Buffer::empty_aligned(Alignment::of::()), - Buffer::empty_aligned(Alignment::of::()), - ) - } else { - let left = self.bytes.split_to(offset); - self.length -= offset; - let (us_len, _) = self.align_to_offsets::(); - let trailer = self.bytes.split_off(us_len * size_of::()); - ( - Buffer::from_bytes_aligned(left, Alignment::of::()), - Buffer::from_bytes_aligned(self.bytes, Alignment::of::()), - Buffer::from_bytes_aligned(trailer, Alignment::of::()), - ) - } - } - - /// Adapted from standard library slice::align_to_offsets - /// Function to calculate lengths of the middle and trailing slice for `align_to`. - fn align_to_offsets(&self) -> (usize, usize) { - // What we're going to do about `rest` is figure out what multiple of `U`s we can put in the - // lowest number of `T`s. And how many `T`s we need for each such "multiple". - // - // Consider for example T=u8 U=u16. Then we can put 1 U in 2 Ts. Simple. Now, consider - // for example a case where size_of:: = 16, size_of:: = 24. We can put 2 Us in - // place of every 3 Ts in the `rest` slice. A bit more complicated. - // - // Formula to calculate this is: - // - // Us = lcm(size_of::, size_of::) / size_of:: - // Ts = lcm(size_of::, size_of::) / size_of:: - // - // Expanded and simplified: - // - // Us = size_of:: / gcd(size_of::, size_of::) - // Ts = size_of:: / gcd(size_of::, size_of::) - // - // Luckily since all this is constant-evaluated... performance here matters not! - const fn gcd(a: usize, b: usize) -> usize { - if b == 0 { a } else { gcd(b, a % b) } - } - - // Explicitly wrap the function call in a const block so it gets - // constant-evaluated even in debug mode. - let gcd: usize = const { gcd(size_of::(), size_of::()) }; - let ts: usize = size_of::() / gcd; - let us: usize = size_of::() / gcd; - - // Armed with this knowledge, we can find how many `U`s we can fit! - let us_len = self.len() / ts * us; - // And how many `T`s will be in the trailing slice! - let ts_len = self.len() % ts; - (us_len, ts_len) - } } /// An iterator over Buffer elements. diff --git a/vortex-buffer/src/buffer_mut.rs b/vortex-buffer/src/buffer_mut.rs index 6b782817666..c28f7b7150f 100644 --- a/vortex-buffer/src/buffer_mut.rs +++ b/vortex-buffer/src/buffer_mut.rs @@ -328,6 +328,60 @@ impl BufferMut { self.length += slice.len(); } + /// Splits the buffer into two at the given index. + /// + /// Afterward, self contains elements `[0, at)`, and the returned buffer contains elements + /// `[at, capacity)`. It’s guaranteed that the memory does not move, that is, the address of + /// self does not change, and the address of the returned slice is at bytes after that. + /// + /// This is an O(1) operation that just increases the reference count and sets a few indices. + /// + /// Panics if either half would have a length that is not a multiple of the alignment. + pub fn split_off(&mut self, at: usize) -> Self { + if at > self.len() { + vortex_panic!("Cannot split buffer of length {} at {}", self.len(), at); + } + + let bytes_at = at * size_of::(); + if !bytes_at.is_multiple_of(*self.alignment) { + vortex_panic!( + "Cannot split buffer at {}, resulting alignment is not {}", + at, + self.alignment + ); + } + + let new_bytes = self.bytes.split_off(bytes_at); + let new_length = self.length - at; + self.length = at; + + BufferMut { + bytes: new_bytes, + length: new_length, + alignment: self.alignment, + _marker: Default::default(), + } + } + + /// Absorbs a mutable buffer that was previously split off. + /// + /// If the two buffers were previously contiguous and not mutated in a way that causes + /// re-allocation i.e., if other was created by calling split_off on this buffer, then this is + /// an O(1) operation that just decreases a reference count and sets a few indices. + /// + /// Otherwise, this method degenerates to self.extend_from_slice(other.as_ref()). + pub fn unsplit(&mut self, other: Self) { + if self.alignment != other.alignment { + vortex_panic!( + "Cannot unsplit buffers with different alignments: {} and {}", + self.alignment, + other.alignment + ); + } + self.bytes.unsplit(other.bytes); + self.length += other.length; + } + /// Freeze the `BufferMut` into a `Buffer`. pub fn freeze(self) -> Buffer { Buffer { diff --git a/vortex-dtype/src/ptype.rs b/vortex-dtype/src/ptype.rs index b5ea5ad32d7..5af9bfee515 100644 --- a/vortex-dtype/src/ptype.rs +++ b/vortex-dtype/src/ptype.rs @@ -143,15 +143,18 @@ pub trait NativePType: fn is_eq(self, other: Self) -> bool; /// Downcast the provided object to a type-specific instance. - fn downcast(visitor: &V) -> V::Output; + fn downcast(visitor: &V) -> V::Output; /// Downcast the provided object to a type-specific instance. - fn downcast_mut(visitor: &mut V) -> V::Output; + fn downcast_mut(visitor: &mut V) -> V::Output; + + /// Upcast a type-specific instance to a generic instance. + fn upcast(input: V::Input) -> V; } /// A visitor trait for converting a `NativePType` to another parameterized type. #[allow(missing_docs)] // Kind of obvious. -pub trait PTypeVisitor { +pub trait PTypeDowncast { type Output; fn as_u8(&self) -> Self::Output; @@ -167,19 +170,19 @@ pub trait PTypeVisitor { fn as_f64(&self) -> Self::Output; } -/// Extension trait to provide generic downcasting for [`PTypeVisitor`]. -pub trait PTypeVisitorExt: PTypeVisitor { +/// Extension trait to provide generic downcasting for [`PTypeDowncast`]. +pub trait PTypeDowncastExt: PTypeDowncast { /// Downcast the object to a specific primitive type. fn as_primitive(&self) -> Self::Output { T::downcast(self) } } -impl PTypeVisitorExt for T {} +impl PTypeDowncastExt for T {} /// A visitor trait for converting a `NativePType` to another mutable parameterized type. #[allow(missing_docs)] // Kind of obvious.. -pub trait PTypeVisitorMut { +pub trait PTypeDowncastMut { type Output; fn as_u8(&mut self) -> Self::Output; @@ -195,8 +198,8 @@ pub trait PTypeVisitorMut { fn as_f64(&mut self) -> Self::Output; } -/// Extension trait to provide generic downcasting for [`PTypeVisitorMut`]. -pub trait PTypeVisitorMutExt: PTypeVisitorMut { +/// Extension trait to provide generic downcasting for [`PTypeDowncastMut`]. +pub trait PTypeDowncastMutExt: PTypeDowncastMut { /// Downcast the object to a specific primitive type. fn as_primitive_mut(&mut self) -> Self::Output { T::downcast_mut(self) @@ -206,17 +209,40 @@ pub trait PTypeVisitorMutExt: PTypeVisitorMut { macro_rules! impl_ptype_downcast { ($T:ty) => { #[inline] - fn downcast(visitor: &V) -> V::Output { + fn downcast(visitor: &V) -> V::Output { paste::paste! { visitor.[]() } } #[inline] - fn downcast_mut(visitor: &mut V) -> V::Output { + fn downcast_mut(visitor: &mut V) -> V::Output { paste::paste! { visitor.[]() } } + + #[inline] + fn upcast(input: V::Input) -> V { + paste::paste! { V::[](input) } + } }; } +/// A visitor trait for converting a generic `NativePType` into a non-parameterized type. +#[allow(missing_docs)] // Kind of obvious. +pub trait PTypeUpcast { + type Input; + + fn from_u8(input: Self::Input) -> Self; + fn from_u16(input: Self::Input) -> Self; + fn from_u32(input: Self::Input) -> Self; + fn from_u64(input: Self::Input) -> Self; + fn from_i8(input: Self::Input) -> Self; + fn from_i16(input: Self::Input) -> Self; + fn from_i32(input: Self::Input) -> Self; + fn from_i64(input: Self::Input) -> Self; + fn from_f16(input: Self::Input) -> Self; + fn from_f32(input: Self::Input) -> Self; + fn from_f64(input: Self::Input) -> Self; +} + macro_rules! native_ptype { ($T:ty, $ptype:tt) => { impl crate::NativeDType for $T { @@ -253,7 +279,7 @@ macro_rules! native_ptype { }; } -impl PTypeVisitorMutExt for T {} +impl PTypeDowncastMutExt for T {} macro_rules! native_float_ptype { ($T:ty, $ptype:tt) => { diff --git a/vortex-mask/Cargo.toml b/vortex-mask/Cargo.toml index 40894d71165..ccf43593c91 100644 --- a/vortex-mask/Cargo.toml +++ b/vortex-mask/Cargo.toml @@ -16,6 +16,7 @@ version = { workspace = true } [dependencies] arrow-buffer = { workspace = true } itertools = { workspace = true } +vortex-buffer = { workspace = true, features = ["arrow"] } vortex-error = { workspace = true } [dev-dependencies] diff --git a/vortex-mask/src/lib.rs b/vortex-mask/src/lib.rs index 24a3f6168b3..d30e1b2948b 100644 --- a/vortex-mask/src/lib.rs +++ b/vortex-mask/src/lib.rs @@ -3,10 +3,12 @@ //! A mask is a set of sorted unique positive integers. #![deny(missing_docs)] + mod bitops; mod eq; mod intersect_by_rank; mod iter_bools; +mod mask_mut; #[cfg(test)] mod tests; @@ -18,6 +20,7 @@ use std::sync::{Arc, OnceLock}; use arrow_buffer::{BooleanBuffer, BooleanBufferBuilder, NullBuffer}; use itertools::Itertools; +pub use mask_mut::*; use vortex_error::{VortexResult, vortex_panic}; /// Represents a set of values that are all included, all excluded, or some mixture of both. @@ -94,7 +97,7 @@ impl Eq for AllOr where T: Eq {} /// /// A [`Mask`] can be constructed from various representations, and converted to various /// others. Internally, these are cached. -#[derive(Clone, Debug)] +#[derive(Debug, Clone)] pub enum Mask { /// All values are included. AllTrue(usize), diff --git a/vortex-mask/src/mask_mut.rs b/vortex-mask/src/mask_mut.rs new file mode 100644 index 00000000000..d72403f54a2 --- /dev/null +++ b/vortex-mask/src/mask_mut.rs @@ -0,0 +1,534 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use std::ops::Sub; + +use vortex_buffer::{BitBuffer, BitBufferMut}; + +use crate::Mask; + +/// A mutable mask, used for lazily allocating the bit buffer as required. +#[derive(Debug, Clone)] +pub struct MaskMut(Inner); + +#[derive(Debug, Clone)] +enum Inner { + /// Initially, the mask is empty but may have some capacity. + Empty { capacity: usize }, + /// When the first value is pushed, the mask becomes constant. + Constant { + value: bool, + len: usize, + capacity: usize, + }, + /// When the first non-constant value is written, we allocate the bit buffer and switch + /// into the builder state. + Builder(BitBufferMut), +} + +impl MaskMut { + /// Creates a new empty mask with the default capacity. + pub fn with_capacity(capacity: usize) -> Self { + Self(Inner::Empty { capacity }) + } + + /// Creates a new mask with all values set to `true`. + pub fn new_true(len: usize) -> Self { + Self(Inner::Constant { + value: true, + len, + capacity: len, + }) + } + + /// Creates a new mask with all values set to `false`. + pub fn new_false(len: usize) -> Self { + Self(Inner::Constant { + value: false, + len, + capacity: len, + }) + } + + /// Reserve capacity for at least `additional` more values to be appended. + pub fn reserve(&mut self, additional: usize) { + match &mut self.0 { + Inner::Empty { capacity } => { + *capacity += additional; + } + Inner::Constant { capacity, .. } => { + *capacity += additional; + } + Inner::Builder(bits) => { + bits.reserve(additional); + } + } + } + + /// Append n values to the mask. + pub fn append_n(&mut self, new_value: bool, n: usize) { + match &mut self.0 { + Inner::Empty { capacity } => { + self.0 = Inner::Constant { + value: new_value, + len: n, + capacity: (*capacity).max(n), + } + } + Inner::Constant { + value, + len, + capacity, + } => { + if *value == new_value { + // Same value, just increase length. + self.0 = Inner::Constant { + value: *value, + len: *len + n, + capacity: (*capacity).max(*len + n), + } + } else { + // Different value, need to allocate the bit buffer. + // Note: materialize() already appends the existing constant values + let bits = self.materialize(); + bits.append_n(new_value, n); + } + } + Inner::Builder(bits) => { + bits.append_n(new_value, n); + } + } + } + + /// Append a [`Mask`] to this mutable mask. + pub fn append_mask(&mut self, other: &Mask) { + match other { + Mask::AllTrue(len) => self.append_n(true, *len), + Mask::AllFalse(len) => self.append_n(false, *len), + Mask::Values(values) => { + let bitbuffer = BitBuffer::from(values.buffer.clone()); + self.materialize().append_buffer(&bitbuffer); + } + } + } + + /// Ensures that the internal bit buffer is allocated and returns a mutable reference to it. + fn materialize(&mut self) -> &mut BitBufferMut { + let needs_materialization = !matches!(self.0, Inner::Builder(_)); + + if needs_materialization { + let new_builder = match &self.0 { + Inner::Empty { capacity } => BitBufferMut::with_capacity(*capacity), + Inner::Constant { + value, + len, + capacity, + } => { + let required_capacity = (*capacity).max(*len); + let mut bits = BitBufferMut::with_capacity(required_capacity); + bits.append_n(*value, *len); + bits + } + Inner::Builder(_) => unreachable!(), + }; + self.0 = Inner::Builder(new_builder); + } + + match &mut self.0 { + Inner::Builder(bits) => bits, + _ => unreachable!(), + } + } + + /// Split-off the mask at the given index, returning a new mask with the + /// values from `at` to the end, and leaving `self` with the values from + /// the start to `at`. + pub fn split_off(&mut self, at: usize) -> Self { + assert!(at <= self.len(), "split_off index out of bounds"); + match &mut self.0 { + Inner::Empty { capacity } => { + let new_capacity = (*capacity).saturating_sub(at); + Self(Inner::Empty { + capacity: new_capacity, + }) + } + Inner::Constant { + value, + len, + capacity, + } => { + let new_len = len.sub(at); + *len = at; + let new_capacity = (*capacity).saturating_sub(at); + Self(Inner::Constant { + value: *value, + len: new_len, + capacity: new_capacity, + }) + } + Inner::Builder(bits) => { + let new_bits = bits.split_off(at); + Self(Inner::Builder(new_bits)) + } + } + } + + /// Absorb another mask into this one, appending its values. + pub fn unsplit(&mut self, other: Self) { + match other.0 { + Inner::Empty { .. } => { + // No work to do + } + Inner::Constant { value, len, .. } => { + self.append_n(value, len); + } + Inner::Builder(bits) => { + self.materialize().unsplit(bits); + } + } + } + + /// Freezes the mutable mask into an immutable one. + pub fn freeze(self) -> Mask { + match self.0 { + Inner::Empty { .. } => Mask::new_true(0), + Inner::Constant { value, len, .. } => { + if value { + Mask::new_true(len) + } else { + Mask::new_false(len) + } + } + Inner::Builder(bits) => Mask::from_buffer(bits.freeze().into()), + } + } + + /// Returns the logical length of the mask. + pub fn len(&self) -> usize { + match &self.0 { + Inner::Empty { .. } => 0, + Inner::Constant { len, .. } => *len, + Inner::Builder(bits) => bits.len(), + } + } + + /// Returns true if the mask is empty. + pub fn is_empty(&self) -> bool { + self.len() == 0 + } +} + +impl Mask { + /// Attempts to convert an immutable mask into a mutable one. + pub fn try_into_mut(self) -> Result { + match self { + Mask::AllTrue(len) => Ok(MaskMut::new_true(len)), + Mask::AllFalse(len) => Ok(MaskMut::new_false(len)), + Mask::Values(values) => { + // FIXME(ngates): we can never convert Arrow BooleanBuffer to ByteBufferMut, + // so we have to wait until we use BitBuffer internally in MaskValues. + Err(Mask::Values(values)) + } + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_split_off_empty() { + let mut mask = MaskMut::with_capacity(10); + assert_eq!(mask.len(), 0); + + let other = mask.split_off(0); + assert_eq!(mask.len(), 0); + assert_eq!(other.len(), 0); + } + + #[test] + fn test_split_off_constant_true_at_zero() { + let mut mask = MaskMut::new_true(10); + let other = mask.split_off(0); + + assert_eq!(mask.len(), 0); + assert_eq!(other.len(), 10); + + let frozen = other.freeze(); + assert_eq!(frozen.true_count(), 10); + } + + #[test] + fn test_split_off_constant_true_at_end() { + let mut mask = MaskMut::new_true(10); + let other = mask.split_off(10); + + assert_eq!(mask.len(), 10); + assert_eq!(other.len(), 0); + + let frozen = mask.freeze(); + assert_eq!(frozen.true_count(), 10); + } + + #[test] + fn test_split_off_constant_true_in_middle() { + let mut mask = MaskMut::new_true(10); + let other = mask.split_off(6); + + assert_eq!(mask.len(), 6); + assert_eq!(other.len(), 4); + + let frozen_first = mask.freeze(); + assert_eq!(frozen_first.true_count(), 6); + + let frozen_second = other.freeze(); + assert_eq!(frozen_second.true_count(), 4); + } + + #[test] + fn test_split_off_constant_false() { + let mut mask = MaskMut::new_false(20); + let other = mask.split_off(12); + + assert_eq!(mask.len(), 12); + assert_eq!(other.len(), 8); + + let frozen_first = mask.freeze(); + assert_eq!(frozen_first.true_count(), 0); + + let frozen_second = other.freeze(); + assert_eq!(frozen_second.true_count(), 0); + } + + // Note: Tests using BitBuffer operations are marked as ignored under miri + // because bitvec uses raw pointer operations that miri cannot verify. + #[test] + fn test_split_off_builder_at_byte_boundary() { + let mut mask = MaskMut::with_capacity(16); + // Create a pattern: 8 true, 8 false + mask.append_n(true, 8); + mask.append_n(false, 8); + + let mask_ptr = match &mask.0 { + Inner::Builder(bits) => bits.as_slice().as_ptr(), + _ => unreachable!(), + }; + + let other = mask.split_off(8); + + assert_eq!(mask.len(), 8); + assert_eq!(other.len(), 8); + + // Ensure the unsplit was zero-copy. + mask.unsplit(other); + let new_mask_ptr = match &mask.0 { + Inner::Builder(bits) => bits.as_slice().as_ptr(), + _ => unreachable!(), + }; + assert_eq!(mask_ptr, new_mask_ptr); + } + + #[test] + fn test_split_off_builder_not_byte_aligned() { + let mut mask = MaskMut::with_capacity(20); + // Create a pattern: 10 true, 10 false + mask.append_n(true, 10); + mask.append_n(false, 10); + + let other = mask.split_off(10); + + assert_eq!(mask.len(), 10); + assert_eq!(other.len(), 10); + + let frozen_first = mask.freeze(); + assert_eq!(frozen_first.true_count(), 10); + + let frozen_second = other.freeze(); + assert_eq!(frozen_second.true_count(), 0); + } + + #[test] + fn test_split_off_builder_mixed_pattern() { + let mut mask = MaskMut::with_capacity(15); + // Create pattern: TFTFTFTFTFTFTFT (alternating) + for i in 0..15 { + mask.append_n(i % 2 == 0, 1); + } + + let other = mask.split_off(7); + + assert_eq!(mask.len(), 7); + assert_eq!(other.len(), 8); + + let frozen_first = mask.freeze(); + assert_eq!(frozen_first.true_count(), 4); // positions 0,2,4,6 + + let frozen_second = other.freeze(); + assert_eq!(frozen_second.true_count(), 4); // positions 7,9,11,13 => 0,2,4,6 in split + } + + #[test] + fn test_unsplit_empty_with_empty() { + let mut mask = MaskMut::with_capacity(10); + let other = MaskMut::with_capacity(10); + + mask.unsplit(other); + assert_eq!(mask.len(), 0); + } + + #[test] + fn test_unsplit_empty_with_constant() { + let mut mask = MaskMut::with_capacity(10); + let other = MaskMut::new_true(5); + + mask.unsplit(other); + assert_eq!(mask.len(), 5); + + let frozen = mask.freeze(); + assert_eq!(frozen.true_count(), 5); + } + + #[test] + fn test_unsplit_constant_with_constant_same() { + let mut mask = MaskMut::new_true(5); + let other = MaskMut::new_true(5); + + mask.unsplit(other); + assert_eq!(mask.len(), 10); + + let frozen = mask.freeze(); + assert_eq!(frozen.true_count(), 10); + } + + #[test] + fn test_unsplit_constant_with_constant_different() { + let mut mask = MaskMut::new_true(5); + let other = MaskMut::new_false(5); + + mask.unsplit(other); + assert_eq!(mask.len(), 10); + + let frozen = mask.freeze(); + assert_eq!(frozen.true_count(), 5); + } + + #[test] + fn test_unsplit_constant_with_builder() { + let mut mask = MaskMut::new_true(5); + + let mut other = MaskMut::with_capacity(10); + other.append_n(true, 3); + other.append_n(false, 2); + + mask.unsplit(other); + assert_eq!(mask.len(), 10); + + let frozen = mask.freeze(); + assert_eq!(frozen.true_count(), 8); // 5 from first + 3 from second + } + + #[test] + fn test_unsplit_builder_with_constant() { + let mut mask = MaskMut::with_capacity(10); + mask.append_n(true, 3); + mask.append_n(false, 2); + + let other = MaskMut::new_true(5); + + mask.unsplit(other); + assert_eq!(mask.len(), 10); + + let frozen = mask.freeze(); + assert_eq!(frozen.true_count(), 8); // 3 from first + 5 from second + } + + #[test] + fn test_unsplit_builder_with_builder() { + let mut mask = MaskMut::with_capacity(10); + mask.append_n(true, 3); + mask.append_n(false, 2); + + let mut other = MaskMut::with_capacity(10); + other.append_n(false, 3); + other.append_n(true, 2); + + mask.unsplit(other); + assert_eq!(mask.len(), 10); + + let frozen = mask.freeze(); + assert_eq!(frozen.true_count(), 5); // 3 from first + 2 from second + } + + #[test] + // TODO(ngates): when mask uses BitBuffer internally, into_mut should succeed + #[should_panic] + fn test_round_trip_split_unsplit() { + let mut original = MaskMut::with_capacity(20); + // Pattern: 10 true, 10 false + original.append_n(true, 10); + original.append_n(false, 10); + + let original_frozen = original.freeze(); + let original_true_count = original_frozen.true_count(); + + // Convert back to mutable for split + let mut mask = original_frozen.try_into_mut().unwrap(); + + // Split at 10 + let other = mask.split_off(10); + + // Unsplit back together + mask.unsplit(other); + + assert_eq!(mask.len(), 20); + let frozen = mask.freeze(); + assert_eq!(frozen.true_count(), original_true_count); + } + + #[test] + #[should_panic(expected = "split_off index out of bounds")] + fn test_split_off_out_of_bounds() { + let mut mask = MaskMut::new_true(10); + let _ = mask.split_off(11); + } + + #[test] + fn test_split_off_builder_at_bit_1() { + let mut mask = MaskMut::with_capacity(16); + mask.append_n(true, 16); + + let other = mask.split_off(1); + + assert_eq!(mask.len(), 1); + assert_eq!(other.len(), 15); + + let frozen_first = mask.freeze(); + assert_eq!(frozen_first.true_count(), 1); + + let frozen_second = other.freeze(); + assert_eq!(frozen_second.true_count(), 15); + } + + #[test] + fn test_multiple_split_unsplit() { + let mut mask = MaskMut::new_true(30); + + // Split into 3 parts + let third = mask.split_off(20); // 20-30 + let second = mask.split_off(10); // 10-20 + // first is 0-10 + + assert_eq!(mask.len(), 10); + assert_eq!(second.len(), 10); + assert_eq!(third.len(), 10); + + // Recombine in order + mask.unsplit(second); + mask.unsplit(third); + + assert_eq!(mask.len(), 30); + let frozen = mask.freeze(); + assert_eq!(frozen.true_count(), 30); + } +} diff --git a/vortex-vector/Cargo.toml b/vortex-vector/Cargo.toml new file mode 100644 index 00000000000..353f2ae1d0b --- /dev/null +++ b/vortex-vector/Cargo.toml @@ -0,0 +1,27 @@ +[package] +name = "vortex-vector" +authors = { workspace = true } +categories = { workspace = true } +description = "Vortex in-memory canonical data format" +edition = { workspace = true } +homepage = { workspace = true } +include = { workspace = true } +keywords = { workspace = true } +license = { workspace = true } +readme = { workspace = true } +repository = { workspace = true } +rust-version = { workspace = true } +version = { workspace = true } + +[package.metadata.docs.rs] +all-features = true + +[lints] +workspace = true + +[dependencies] +vortex-buffer = { workspace = true } +vortex-dtype = { workspace = true } +vortex-error = { workspace = true } +vortex-mask = { workspace = true } +vortex-scalar = { workspace = true } diff --git a/vortex-vector/src/bool/mod.rs b/vortex-vector/src/bool/mod.rs new file mode 100644 index 00000000000..b5e5abddd1c --- /dev/null +++ b/vortex-vector/src/bool/mod.rs @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Definition and implementation of [`BoolVector`] and [`BoolVectorMut`]. + +mod vector; +pub use vector::BoolVector; + +mod vector_mut; +pub use vector_mut::BoolVectorMut; + +use crate::{Vector, VectorMut}; + +impl From for Vector { + fn from(v: BoolVector) -> Self { + Self::Bool(v) + } +} + +impl From for VectorMut { + fn from(v: BoolVectorMut) -> Self { + Self::Bool(v) + } +} diff --git a/vortex-vector/src/bool/vector.rs b/vortex-vector/src/bool/vector.rs new file mode 100644 index 00000000000..54d31cc5572 --- /dev/null +++ b/vortex-vector/src/bool/vector.rs @@ -0,0 +1,75 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Definition and implementation of [`BoolVector`]. + +use vortex_buffer::BitBuffer; +use vortex_dtype::{DType, Nullability}; +use vortex_mask::Mask; + +use super::BoolVectorMut; +use crate::VectorOps; + +/// An immutable vector of boolean values. +/// +/// Internally, the boolean values are stored as the bits of a [`BitBuffer`] plus an optional +/// [`Mask`] for null booleans. +/// +/// The mutable equivalent of this type is [`BoolVectorMut`]. +#[derive(Debug, Clone)] +pub struct BoolVector { + pub(super) bits: BitBuffer, + pub(super) validity: Option, +} + +impl VectorOps for BoolVector { + type Mutable = BoolVectorMut; + + fn nullability(&self) -> Nullability { + Nullability::from(self.validity.is_some()) + } + + fn dtype(&self) -> DType { + DType::Bool(self.nullability()) + } + + fn len(&self) -> usize { + debug_assert!( + self.validity + .as_ref() + .is_none_or(|mask| mask.len() == self.bits.len()) + ); + + self.bits.len() + } + + fn try_into_mut(self) -> Result + where + Self: Sized, + { + let bits = match self.bits.try_into_mut() { + Ok(bits) => bits, + Err(bits) => { + return Err(BoolVector { + bits, + validity: self.validity, + }); + } + }; + + let validity = match self.validity { + Some(v) => match v.try_into_mut() { + Ok(v) => Some(v), + Err(v) => { + return Err(BoolVector { + bits: bits.freeze(), + validity: Some(v), + }); + } + }, + None => None, + }; + + Ok(BoolVectorMut { bits, validity }) + } +} diff --git a/vortex-vector/src/bool/vector_mut.rs b/vortex-vector/src/bool/vector_mut.rs new file mode 100644 index 00000000000..7fc539b1d15 --- /dev/null +++ b/vortex-vector/src/bool/vector_mut.rs @@ -0,0 +1,120 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Definition and implementation of [`BoolVectorMut`]. + +use vortex_buffer::BitBufferMut; +use vortex_dtype::{DType, Nullability}; +use vortex_mask::MaskMut; + +use super::BoolVector; +use crate::VectorMutOps; + +/// A mutable vector of boolean values. +/// +/// Internally, the boolean values are stored as the bits of a [`BitBufferMut`] plus an optional +/// [`MaskMut`] for null booleans. +/// +/// The immutable equivalent of this type is [`BoolVector`]. +#[derive(Debug, Clone)] +pub struct BoolVectorMut { + pub(super) bits: BitBufferMut, + pub(super) validity: Option, +} + +impl BoolVectorMut { + /// Creates a new mutable boolean vector with the given `capacity` and `nullability`. + pub fn with_capacity(capacity: usize, nullability: Nullability) -> Self { + let validity = match nullability { + Nullability::NonNullable => None, + Nullability::Nullable => Some(MaskMut::with_capacity(capacity)), + }; + + Self { + bits: BitBufferMut::with_capacity(capacity), + validity, + } + } +} + +impl VectorMutOps for BoolVectorMut { + type Immutable = BoolVector; + + fn nullability(&self) -> Nullability { + Nullability::from(self.validity.is_some()) + } + + fn dtype(&self) -> DType { + DType::Bool(self.nullability()) + } + + fn len(&self) -> usize { + debug_assert!( + self.validity + .as_ref() + .is_none_or(|mask| mask.len() == self.bits.len()) + ); + + self.bits.len() + } + + fn capacity(&self) -> usize { + self.bits.capacity() + } + + fn reserve(&mut self, additional: usize) { + self.bits.reserve(additional); + + if let Some(v) = self.validity.as_mut() { + v.reserve(additional); + } + } + + fn extend_from_vector(&mut self, other: &BoolVector) { + self.bits.append_buffer(&other.bits); + + // TODO(connor): We must `other`'s nullability in relation to `self`. + match (&mut self.validity, &other.validity) { + (Some(self_v), Some(other_v)) => self_v.append_mask(other_v), + (Some(self_v), None) => self_v.append_n(true, other.bits.len()), + (None, Some(other_v)) => { + let mut new_validity = MaskMut::new_true(self.bits.len() - other.bits.len()); + new_validity.append_mask(other_v); + self.validity = Some(new_validity); + } + (None, None) => {} + } + } + + fn freeze(self) -> Self::Immutable { + BoolVector { + bits: self.bits.freeze(), + validity: self.validity.map(|v| v.freeze()), + } + } + + fn split_off(&mut self, at: usize) -> Self { + BoolVectorMut { + bits: self.bits.split_off(at), + validity: self.validity.as_mut().map(|v| v.split_off(at)), + } + } + + fn unsplit(&mut self, other: Self) { + // TODO(connor): We must check `other`'s nullability in relation to `self`. + + let other_len = other.bits.len(); + self.bits.unsplit(other.bits); + + match (&mut self.validity, other.validity) { + (Some(self_v), Some(other_v)) => self_v.unsplit(other_v), + (Some(self_v), None) => self_v.append_n(true, other_len), + (None, Some(other_v)) => { + let mut new_validity = MaskMut::new_true(self.bits.len() - other_len); + new_validity.unsplit(other_v); + self.validity = Some(new_validity); + } + (None, None) => {} + } + } +} diff --git a/vortex-vector/src/lib.rs b/vortex-vector/src/lib.rs new file mode 100644 index 00000000000..86c76dfd074 --- /dev/null +++ b/vortex-vector/src/lib.rs @@ -0,0 +1,33 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Mutable decompressed (canonical) vectors for Vortex. +//! +//! TODO(connor) More docs. + +// TODO(connor) +// - Document everything +// - Figure out correct panic propagation +// - Figure out exact semantics of `split_off` w.r.t. length of capacity +// - Fix bugs in implementations +// - Add tests +// - Figure out error semantics on ops traits +// - Implement PartialEq and Eq for vectors +// - Add stubs for remaining vector variants +// - Potentially add `TryFrom<Vector> for Vector` or some other conversion method + +#![deny(missing_docs)] + +mod vector; +pub use vector::ops::{VectorMutOps, VectorOps}; +pub use vector::{Vector, VectorMut}; + +mod bool; +mod null; +mod primitive; + +pub use bool::{BoolVector, BoolVectorMut}; +pub use null::{NullVector, NullVectorMut}; +pub use primitive::{GenericPVector, GenericPVectorMut, PrimitiveVector, PrimitiveVectorMut}; + +mod private; diff --git a/vortex-vector/src/null/mod.rs b/vortex-vector/src/null/mod.rs new file mode 100644 index 00000000000..4f9de101c94 --- /dev/null +++ b/vortex-vector/src/null/mod.rs @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Definition and implementation of [`NullVector`] and [`NullVectorMut`]. + +mod vector; +pub use vector::NullVector; + +mod vector_mut; +pub use vector_mut::NullVectorMut; + +use crate::{Vector, VectorMut}; + +impl From for Vector { + fn from(v: NullVector) -> Self { + Self::Null(v) + } +} + +impl From for VectorMut { + fn from(v: NullVectorMut) -> Self { + Self::Null(v) + } +} diff --git a/vortex-vector/src/null/vector.rs b/vortex-vector/src/null/vector.rs new file mode 100644 index 00000000000..8782cc3c6a8 --- /dev/null +++ b/vortex-vector/src/null/vector.rs @@ -0,0 +1,49 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Definition and implementation of [`NullVector`]. + +use vortex_dtype::{DType, Nullability}; + +use crate::{NullVectorMut, VectorOps}; + +/// An immutable vector of null values. +/// +/// Since a "null" value does not require any data storage, the nulls are stored internally with a +/// single `length` counter. +/// +/// The mutable equivalent of this type is [`NullVectorMut`]. +#[derive(Debug, Clone, Copy)] +pub struct NullVector { + pub(super) len: usize, +} + +impl NullVector { + /// Creates a new immutable vector of nulls with the given length. + pub fn new(len: usize) -> Self { + Self { len } + } +} + +impl VectorOps for NullVector { + type Mutable = NullVectorMut; + + fn nullability(&self) -> Nullability { + Nullability::Nullable + } + + fn dtype(&self) -> DType { + DType::Null + } + + fn len(&self) -> usize { + self.len + } + + fn try_into_mut(self) -> Result + where + Self: Sized, + { + Ok(NullVectorMut::new(self.len)) + } +} diff --git a/vortex-vector/src/null/vector_mut.rs b/vortex-vector/src/null/vector_mut.rs new file mode 100644 index 00000000000..46a55da7f93 --- /dev/null +++ b/vortex-vector/src/null/vector_mut.rs @@ -0,0 +1,77 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Definition and implementation of [`NullVectorMut`]. + +use vortex_dtype::{DType, Nullability}; + +use super::NullVector; +use crate::VectorMutOps; + +/// A mutable vector of null values. +/// +/// Since a "null" value does not require any data storage, the nulls are stored internally with a +/// single `length` counter. +/// +/// The immutable equivalent of this type is [`NullVector`]. +#[derive(Debug, Clone, Copy)] +pub struct NullVectorMut { + pub(super) len: usize, +} + +impl NullVectorMut { + /// Creates a new mutable vector of nulls with the given length. + pub fn new(len: usize) -> Self { + Self { len } + } +} + +impl VectorMutOps for NullVectorMut { + type Immutable = NullVector; + + fn nullability(&self) -> Nullability { + Nullability::Nullable + } + + fn dtype(&self) -> DType { + DType::Null + } + + fn len(&self) -> usize { + self.len + } + + fn capacity(&self) -> usize { + usize::MAX + } + + fn reserve(&mut self, _additional: usize) { + // We do not allocate memory for `NullVector`, so this is a no-op. + } + + fn extend_from_vector(&mut self, other: &Self::Immutable) { + self.len += other.len; + } + + fn freeze(self) -> Self::Immutable { + NullVector::new(self.len) + } + + fn split_off(&mut self, at: usize) -> Self { + assert!( + at <= self.capacity(), + "split_off out of bounds: {:?} <= {:?}", + at, + self.capacity(), + ); + + // TODO(connor): This is wrong (https://docs.rs/bytes/latest/src/bytes/bytes_mut.rs.html#320-335) + let new_len = self.len - at; + self.len = at; + NullVectorMut { len: new_len } + } + + fn unsplit(&mut self, other: Self) { + self.len += other.len; + } +} diff --git a/vortex-vector/src/primitive/generic.rs b/vortex-vector/src/primitive/generic.rs new file mode 100644 index 00000000000..7ed4048de28 --- /dev/null +++ b/vortex-vector/src/primitive/generic.rs @@ -0,0 +1,67 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Definition and implementation of [`GenericPVector`]. + +use vortex_buffer::Buffer; +use vortex_dtype::{DType, NativePType, Nullability}; +use vortex_mask::Mask; + +use crate::{GenericPVectorMut, VectorOps}; + +/// An immutable vector of generic primitive values. +/// +/// `T` is expected to be bound by [`NativePType`], which templates an internal [`Buffer`] that +/// stores the elements of the vector. Additionally, an optional [`Mask`] is stored to track null +/// primitive elements. +/// +/// The mutable equivalent of this type is [`GenericPVectorMut`]. +#[derive(Debug, Clone)] +pub struct GenericPVector { + pub(super) elements: Buffer, + pub(super) validity: Option, +} + +impl VectorOps for GenericPVector { + type Mutable = GenericPVectorMut; + + fn nullability(&self) -> Nullability { + Nullability::from(self.validity.is_some()) + } + + fn dtype(&self) -> DType { + DType::Primitive(T::PTYPE, self.nullability()) + } + + fn len(&self) -> usize { + self.elements.len() + } + + /// Try to convert self into a mutable vector. + fn try_into_mut(self) -> Result, Self> { + let elements = match self.elements.try_into_mut() { + Ok(elements) => elements, + Err(elements) => { + return Err(GenericPVector { + elements, + validity: self.validity, + }); + } + }; + + let validity = match self.validity { + Some(v) => match v.try_into_mut() { + Ok(v) => Some(v), + Err(v) => { + return Err(GenericPVector { + elements: elements.freeze(), + validity: Some(v), + }); + } + }, + None => None, + }; + + Ok(GenericPVectorMut { elements, validity }) + } +} diff --git a/vortex-vector/src/primitive/generic_mut.rs b/vortex-vector/src/primitive/generic_mut.rs new file mode 100644 index 00000000000..314f2819263 --- /dev/null +++ b/vortex-vector/src/primitive/generic_mut.rs @@ -0,0 +1,111 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Definition and implementation of [`GenericPVectorMut`]. + +use vortex_buffer::BufferMut; +use vortex_dtype::{DType, NativePType, Nullability}; +use vortex_mask::MaskMut; + +use crate::{GenericPVector, VectorMutOps}; + +/// A mutable vector of generic primitive values. +/// +/// `T` is expected to be bound by [`NativePType`], which templates an internal [`BufferMut`] +/// that stores the elements of the vector. Additionally, an optional [`MaskMut`] is stored to track +/// null primitive elements. +/// +/// The immutable equivalent of this type is [`GenericPVector`]. +#[derive(Debug, Clone)] +pub struct GenericPVectorMut { + pub(super) elements: BufferMut, + pub(super) validity: Option, +} + +impl GenericPVectorMut { + /// Create a new mutable primitive vector with the given capacity and nullability. + pub fn with_capacity(capacity: usize, nullability: Nullability) -> Self { + let validity = match nullability { + Nullability::NonNullable => None, + Nullability::Nullable => Some(MaskMut::with_capacity(capacity)), + }; + + Self { + elements: BufferMut::with_capacity(capacity), + validity, + } + } +} + +impl VectorMutOps for GenericPVectorMut { + type Immutable = GenericPVector; + + fn nullability(&self) -> Nullability { + Nullability::from(self.validity.is_some()) + } + + fn dtype(&self) -> DType { + DType::Primitive(T::PTYPE, self.nullability()) + } + + fn len(&self) -> usize { + self.elements.len() + } + + fn capacity(&self) -> usize { + self.elements.capacity() + } + + fn reserve(&mut self, additional: usize) { + self.elements.reserve(additional); + if let Some(v) = self.validity.as_mut() { + v.reserve(additional); + } + } + + /// Extends the vector by appending elements from another vector. + fn extend_from_vector(&mut self, other: &GenericPVector) { + self.elements.extend_from_slice(other.elements.as_slice()); + match (&mut self.validity, &other.validity) { + (Some(self_v), Some(other_v)) => self_v.append_mask(other_v), + (Some(self_v), None) => self_v.append_n(true, other.elements.len()), + (None, Some(other_v)) => { + let mut new_validity = + MaskMut::new_true(self.elements.len() - other.elements.len()); + new_validity.append_mask(other_v); + self.validity = Some(new_validity); + } + (None, None) => {} + } + } + + /// Freeze the vector into an immutable one. + fn freeze(self) -> GenericPVector { + GenericPVector { + elements: self.elements.freeze(), + validity: self.validity.map(|v| v.freeze()), + } + } + + fn split_off(&mut self, at: usize) -> Self { + GenericPVectorMut { + elements: self.elements.split_off(at), + validity: self.validity.as_mut().map(|v| v.split_off(at)), + } + } + + fn unsplit(&mut self, other: Self) { + let other_len = other.elements.len(); + self.elements.unsplit(other.elements); + match (&mut self.validity, other.validity) { + (Some(self_v), Some(other_v)) => self_v.unsplit(other_v), + (Some(self_v), None) => self_v.append_n(true, other_len), + (None, Some(other_v)) => { + let mut new_validity = MaskMut::new_true(self.elements.len() - other_len); + new_validity.unsplit(other_v); + self.validity = Some(new_validity); + } + (None, None) => {} + } + } +} diff --git a/vortex-vector/src/primitive/macros.rs b/vortex-vector/src/primitive/macros.rs new file mode 100644 index 00000000000..8d9b5b0195b --- /dev/null +++ b/vortex-vector/src/primitive/macros.rs @@ -0,0 +1,238 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +/// TODO(connor): Write docs. +#[macro_export] +macro_rules! match_each_pvector { + ($self:expr, | $vec:ident | $body:block) => {{ + match $self { + PrimitiveVector::U8(v) => { + let $vec = v; + $body + } + PrimitiveVector::U16(v) => { + let $vec = v; + $body + } + PrimitiveVector::U32(v) => { + let $vec = v; + $body + } + PrimitiveVector::U64(v) => { + let $vec = v; + $body + } + PrimitiveVector::I8(v) => { + let $vec = v; + $body + } + PrimitiveVector::I16(v) => { + let $vec = v; + $body + } + PrimitiveVector::I32(v) => { + let $vec = v; + $body + } + PrimitiveVector::I64(v) => { + let $vec = v; + $body + } + PrimitiveVector::F16(v) => { + let $vec = v; + $body + } + PrimitiveVector::F32(v) => { + let $vec = v; + $body + } + PrimitiveVector::F64(v) => { + let $vec = v; + $body + } + } + }}; +} + +/// TODO(connor): Write docs. +#[macro_export] +macro_rules! match_each_pvector_mut { + ($self:expr, | $vec:ident | $body:block) => {{ + match $self { + PrimitiveVectorMut::U8(v) => { + let $vec = v; + $body + } + PrimitiveVectorMut::U16(v) => { + let $vec = v; + $body + } + PrimitiveVectorMut::U32(v) => { + let $vec = v; + $body + } + PrimitiveVectorMut::U64(v) => { + let $vec = v; + $body + } + PrimitiveVectorMut::I8(v) => { + let $vec = v; + $body + } + PrimitiveVectorMut::I16(v) => { + let $vec = v; + $body + } + PrimitiveVectorMut::I32(v) => { + let $vec = v; + $body + } + PrimitiveVectorMut::I64(v) => { + let $vec = v; + $body + } + PrimitiveVectorMut::F16(v) => { + let $vec = v; + $body + } + PrimitiveVectorMut::F32(v) => { + let $vec = v; + $body + } + PrimitiveVectorMut::F64(v) => { + let $vec = v; + $body + } + } + }}; +} + +/// TODO(connor): Write docs. +#[macro_export] +macro_rules! match_each_pvector_mut_pair { + ($self:expr, $other:expr, | $vec:ident, $vec_other:ident | $body:block) => {{ + match ($self, $other) { + (PrimitiveVectorMut::U8(a), PrimitiveVectorMut::U8(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + (PrimitiveVectorMut::U16(a), PrimitiveVectorMut::U16(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + (PrimitiveVectorMut::U32(a), PrimitiveVectorMut::U32(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + (PrimitiveVectorMut::U64(a), PrimitiveVectorMut::U64(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + (PrimitiveVectorMut::I8(a), PrimitiveVectorMut::I8(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + (PrimitiveVectorMut::I16(a), PrimitiveVectorMut::I16(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + (PrimitiveVectorMut::I32(a), PrimitiveVectorMut::I32(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + (PrimitiveVectorMut::I64(a), PrimitiveVectorMut::I64(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + (PrimitiveVectorMut::F16(a), PrimitiveVectorMut::F16(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + (PrimitiveVectorMut::F32(a), PrimitiveVectorMut::F32(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + (PrimitiveVectorMut::F64(a), PrimitiveVectorMut::F64(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + _ => ::vortex_error::vortex_panic!("Mismatched primitive vector types"), + } + }}; +} + +/// TODO(connor): Write docs. +#[macro_export] +macro_rules! match_each_pvector_mut_immut_pair { + ($self:expr, $other:expr, | $vec:ident, $vec_other:ident | $body:block) => {{ + match ($self, $other) { + (PrimitiveVectorMut::U8(a), PrimitiveVector::U8(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + (PrimitiveVectorMut::U16(a), PrimitiveVector::U16(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + (PrimitiveVectorMut::U32(a), PrimitiveVector::U32(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + (PrimitiveVectorMut::U64(a), PrimitiveVector::U64(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + (PrimitiveVectorMut::I8(a), PrimitiveVector::I8(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + (PrimitiveVectorMut::I16(a), PrimitiveVector::I16(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + (PrimitiveVectorMut::I32(a), PrimitiveVector::I32(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + (PrimitiveVectorMut::I64(a), PrimitiveVector::I64(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + (PrimitiveVectorMut::F16(a), PrimitiveVector::F16(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + (PrimitiveVectorMut::F32(a), PrimitiveVector::F32(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + (PrimitiveVectorMut::F64(a), PrimitiveVector::F64(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + _ => ::vortex_error::vortex_panic!("Mismatched primitive vector types"), + } + }}; +} diff --git a/vortex-vector/src/primitive/mod.rs b/vortex-vector/src/primitive/mod.rs new file mode 100644 index 00000000000..a317e3df7b2 --- /dev/null +++ b/vortex-vector/src/primitive/mod.rs @@ -0,0 +1,65 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Definitions and implementations of native primitive vector types. +//! +//! The types that hold data are [`GenericPVector`] and [`GenericPVectorMut`], which are generic +//! over types `T` that implement [`NativePType`] (which are just the integer and floating-point +//! types that are native to Rust plus [`f16`]). +//! +//! [`PrimitiveVector`] and [`PrimitiveVectorMut`] are enums that wrap all of the different possible +//! [`GenericPVector`]s. There are several macros defined in this crate to make working with these +//! primitive vector types easier. +//! +//! [`NativePType`]: vortex_dtype::NativePType +//! [`f16`]: vortex_dtype::half::f16 + +mod generic; +pub use generic::GenericPVector; + +mod generic_mut; +pub use generic_mut::GenericPVectorMut; + +mod vector; +pub use vector::PrimitiveVector; + +mod vector_mut; +pub use vector_mut::PrimitiveVectorMut; + +/// Helper macros for working with the different variants of [`PrimitiveVector`] and +/// [`PrimitiveVectorMut`]. +/// +/// All macros are exported at the crate level with `#[macro_use]`. +mod macros; + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Vector Conversions +//////////////////////////////////////////////////////////////////////////////////////////////////// + +use vortex_dtype::NativePType; + +use crate::{Vector, VectorMut}; + +impl From for Vector { + fn from(v: PrimitiveVector) -> Self { + Self::Primitive(v) + } +} + +impl From> for Vector { + fn from(v: GenericPVector) -> Self { + Self::Primitive(PrimitiveVector::from(v)) + } +} + +impl From for VectorMut { + fn from(v: PrimitiveVectorMut) -> Self { + Self::Primitive(v) + } +} + +impl From> for VectorMut { + fn from(val: GenericPVectorMut) -> Self { + Self::Primitive(PrimitiveVectorMut::from(val)) + } +} diff --git a/vortex-vector/src/primitive/vector.rs b/vortex-vector/src/primitive/vector.rs new file mode 100644 index 00000000000..cbb720b4312 --- /dev/null +++ b/vortex-vector/src/primitive/vector.rs @@ -0,0 +1,127 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Definition and implementation of [`PrimitiveVector`]. + +use vortex_dtype::half::f16; +use vortex_dtype::{DType, NativePType, Nullability, PTypeUpcast}; + +use super::{GenericPVector, PrimitiveVectorMut}; +use crate::{VectorOps, match_each_pvector}; + +/// An immutable vector of primitive values. +/// +/// `PrimitiveVector` is represented by an enum over all possible [`GenericPVector`] types (which +/// are templated by the types that implement [`NativePType`]). +/// +/// The mutable equivalent of this type is [`PrimitiveVectorMut`]. +#[derive(Debug, Clone)] +pub enum PrimitiveVector { + /// U8 + U8(GenericPVector), + /// U16 + U16(GenericPVector), + /// U32 + U32(GenericPVector), + /// U64 + U64(GenericPVector), + /// I8 + I8(GenericPVector), + /// I16 + I16(GenericPVector), + /// I32 + I32(GenericPVector), + /// I64 + I64(GenericPVector), + /// F16 + F16(GenericPVector), + /// F32 + F32(GenericPVector), + /// F64 + F64(GenericPVector), +} + +impl VectorOps for PrimitiveVector { + type Mutable = PrimitiveVectorMut; + + fn nullability(&self) -> Nullability { + match_each_pvector!(self, |v| { v.nullability() }) + } + + fn dtype(&self) -> DType { + match_each_pvector!(self, |v| { v.dtype() }) + } + + fn len(&self) -> usize { + match_each_pvector!(self, |v| { v.len() }) + } + + fn try_into_mut(self) -> Result + where + Self: Sized, + { + match_each_pvector!(self, |v| { + v.try_into_mut() + .map(PrimitiveVectorMut::from) + .map_err(PrimitiveVector::from) + }) + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Upcast Conversion +//////////////////////////////////////////////////////////////////////////////////////////////////// + +impl From> for PrimitiveVector { + fn from(v: GenericPVector) -> Self { + T::upcast(v) + } +} + +impl PTypeUpcast for PrimitiveVector { + type Input = GenericPVector; + + fn from_u8(input: Self::Input) -> Self { + PrimitiveVector::U8(input) + } + + fn from_u16(input: Self::Input) -> Self { + PrimitiveVector::U16(input) + } + + fn from_u32(input: Self::Input) -> Self { + PrimitiveVector::U32(input) + } + + fn from_u64(input: Self::Input) -> Self { + PrimitiveVector::U64(input) + } + + fn from_i8(input: Self::Input) -> Self { + PrimitiveVector::I8(input) + } + + fn from_i16(input: Self::Input) -> Self { + PrimitiveVector::I16(input) + } + + fn from_i32(input: Self::Input) -> Self { + PrimitiveVector::I32(input) + } + + fn from_i64(input: Self::Input) -> Self { + PrimitiveVector::I64(input) + } + + fn from_f16(input: Self::Input) -> Self { + PrimitiveVector::F16(input) + } + + fn from_f32(input: Self::Input) -> Self { + PrimitiveVector::F32(input) + } + + fn from_f64(input: Self::Input) -> Self { + PrimitiveVector::F64(input) + } +} diff --git a/vortex-vector/src/primitive/vector_mut.rs b/vortex-vector/src/primitive/vector_mut.rs new file mode 100644 index 00000000000..cf78dc67990 --- /dev/null +++ b/vortex-vector/src/primitive/vector_mut.rs @@ -0,0 +1,165 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Definition and implementation of [`PrimitiveVectorMut`]. + +use vortex_dtype::half::f16; +use vortex_dtype::{DType, NativePType, Nullability, PType, PTypeUpcast}; + +use crate::{ + GenericPVectorMut, PrimitiveVector, VectorMutOps, match_each_pvector_mut, + match_each_pvector_mut_immut_pair, match_each_pvector_mut_pair, +}; + +/// A mutable vector of primitive values. +/// +/// `PrimitiveVector` is represented by an enum over all possible [`GenericPVectorMut`] types (which +/// are templated by the types that implement [`NativePType`]). +/// +/// The immutable equivalent of this type is [`PrimitiveVector`]. +#[derive(Debug, Clone)] +pub enum PrimitiveVectorMut { + /// U8 + U8(GenericPVectorMut), + /// U16 + U16(GenericPVectorMut), + /// U32 + U32(GenericPVectorMut), + /// U64 + U64(GenericPVectorMut), + /// I8 + I8(GenericPVectorMut), + /// I16 + I16(GenericPVectorMut), + /// I32 + I32(GenericPVectorMut), + /// I64 + I64(GenericPVectorMut), + /// F16 + F16(GenericPVectorMut), + /// F32 + F32(GenericPVectorMut), + /// F64 + F64(GenericPVectorMut), +} + +impl PrimitiveVectorMut { + /// Create a new mutable primitive vector with the given capacity, primitive type, and nullability. + pub fn with_capacity(capacity: usize, ptype: PType, nullability: Nullability) -> Self { + match ptype { + PType::U8 => GenericPVectorMut::::with_capacity(capacity, nullability).into(), + PType::U16 => GenericPVectorMut::::with_capacity(capacity, nullability).into(), + PType::U32 => GenericPVectorMut::::with_capacity(capacity, nullability).into(), + PType::U64 => GenericPVectorMut::::with_capacity(capacity, nullability).into(), + PType::I8 => GenericPVectorMut::::with_capacity(capacity, nullability).into(), + PType::I16 => GenericPVectorMut::::with_capacity(capacity, nullability).into(), + PType::I32 => GenericPVectorMut::::with_capacity(capacity, nullability).into(), + PType::I64 => GenericPVectorMut::::with_capacity(capacity, nullability).into(), + PType::F16 => GenericPVectorMut::::with_capacity(capacity, nullability).into(), + PType::F32 => GenericPVectorMut::::with_capacity(capacity, nullability).into(), + PType::F64 => GenericPVectorMut::::with_capacity(capacity, nullability).into(), + } + } +} + +impl VectorMutOps for PrimitiveVectorMut { + type Immutable = PrimitiveVector; + + fn nullability(&self) -> Nullability { + match_each_pvector_mut!(self, |v| { v.nullability() }) + } + + fn dtype(&self) -> DType { + match_each_pvector_mut!(self, |v| { v.dtype() }) + } + + fn len(&self) -> usize { + match_each_pvector_mut!(self, |v| { v.len() }) + } + + fn capacity(&self) -> usize { + match_each_pvector_mut!(self, |v| { v.capacity() }) + } + + fn reserve(&mut self, additional: usize) { + match_each_pvector_mut!(self, |v| { v.reserve(additional) }) + } + + fn extend_from_vector(&mut self, other: &Self::Immutable) { + match_each_pvector_mut_immut_pair!(self, other, |a, b| { + a.extend_from_vector(b); + }); + } + + fn freeze(self) -> Self::Immutable { + match_each_pvector_mut!(self, |v| { v.freeze().into() }) + } + + fn split_off(&mut self, at: usize) -> Self { + match_each_pvector_mut!(self, |v| { v.split_off(at).into() }) + } + + fn unsplit(&mut self, other: Self) { + match_each_pvector_mut_pair!(self, other, |a, b| { + a.unsplit(b); + }); + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Upcast Conversion +//////////////////////////////////////////////////////////////////////////////////////////////////// + +impl From> for PrimitiveVectorMut { + fn from(v: GenericPVectorMut) -> Self { + T::upcast(v) + } +} + +impl PTypeUpcast for PrimitiveVectorMut { + type Input = GenericPVectorMut; + + fn from_u8(input: Self::Input) -> Self { + PrimitiveVectorMut::U8(input) + } + + fn from_u16(input: Self::Input) -> Self { + PrimitiveVectorMut::U16(input) + } + + fn from_u32(input: Self::Input) -> Self { + PrimitiveVectorMut::U32(input) + } + + fn from_u64(input: Self::Input) -> Self { + PrimitiveVectorMut::U64(input) + } + + fn from_i8(input: Self::Input) -> Self { + PrimitiveVectorMut::I8(input) + } + + fn from_i16(input: Self::Input) -> Self { + PrimitiveVectorMut::I16(input) + } + + fn from_i32(input: Self::Input) -> Self { + PrimitiveVectorMut::I32(input) + } + + fn from_i64(input: Self::Input) -> Self { + PrimitiveVectorMut::I64(input) + } + + fn from_f16(input: Self::Input) -> Self { + PrimitiveVectorMut::F16(input) + } + + fn from_f32(input: Self::Input) -> Self { + PrimitiveVectorMut::F32(input) + } + + fn from_f64(input: Self::Input) -> Self { + PrimitiveVectorMut::F64(input) + } +} diff --git a/vortex-vector/src/private.rs b/vortex-vector/src/private.rs new file mode 100644 index 00000000000..6948a1f3c3d --- /dev/null +++ b/vortex-vector/src/private.rs @@ -0,0 +1,28 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! This private module contains the [`Sealed`] implementations for different [`Vector`] types. This +//! allows us to seal our [`VectorOps`] and [`VectorMutOps`] traits. +//! +//! Sealing these traits prevents external crates from implementing them while still allowing public +//! usage, which gives us the freedom to add new trait methods in the future without breaking +//! backward compatibility. + +use crate::*; + +/// A private trait for sealing implementations of other traits. +pub trait Sealed {} + +impl Sealed for Vector {} +impl Sealed for VectorMut {} + +impl Sealed for NullVector {} +impl Sealed for NullVectorMut {} + +impl Sealed for BoolVector {} +impl Sealed for BoolVectorMut {} + +impl Sealed for PrimitiveVector {} +impl Sealed for PrimitiveVectorMut {} +impl Sealed for GenericPVector {} +impl Sealed for GenericPVectorMut {} diff --git a/vortex-vector/src/vector/macros.rs b/vortex-vector/src/vector/macros.rs new file mode 100644 index 00000000000..1d8f259cfcc --- /dev/null +++ b/vortex-vector/src/vector/macros.rs @@ -0,0 +1,96 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +// TODO(connor): Finish implementing the rest of the macros. + +/// TODO(connor): Write docs. +#[macro_export] +macro_rules! match_each_vector { + ($self:expr, | $vec:ident | $body:block) => {{ + match $self { + Vector::Null(v) => { + let $vec = v; + $body + } + Vector::Bool(v) => { + let $vec = v; + $body + } + Vector::Primitive(v) => { + let $vec = v; + $body + } + } + }}; +} + +/// TODO(connor): Write docs. +#[macro_export] +macro_rules! match_each_vector_mut { + ($self:expr, | $vec:ident | $body:block) => {{ + match $self { + VectorMut::Null(v) => { + let $vec = v; + $body + } + VectorMut::Bool(v) => { + let $vec = v; + $body + } + VectorMut::Primitive(v) => { + let $vec = v; + $body + } + } + }}; +} + +/// TODO(connor): Write docs. +#[macro_export] +macro_rules! match_each_vector_mut_pair { + ($self:expr, $other:expr, | $vec:ident, $vec_other:ident | $body:block) => {{ + match ($self, $other) { + (VectorMut::Null(a), VectorMut::Null(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + (VectorMut::Bool(a), VectorMut::Bool(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + (VectorMut::Primitive(a), VectorMut::Primitive(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + _ => ::vortex_error::vortex_panic!("Mismatched vector types"), + } + }}; +} + +/// TODO(connor): Write docs. +#[macro_export] +macro_rules! match_each_vector_mut_immut_pair { + ($self:expr, $other:expr, | $vec:ident, $vec_other:ident | $body:block) => {{ + match ($self, $other) { + (VectorMut::Null(a), Vector::Null(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + (VectorMut::Bool(a), Vector::Bool(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + (VectorMut::Primitive(a), Vector::Primitive(b)) => { + let $vec = a; + let $vec_other = b; + $body + } + _ => ::vortex_error::vortex_panic!("Mismatched vector types"), + } + }}; +} diff --git a/vortex-vector/src/vector/mod.rs b/vortex-vector/src/vector/mod.rs new file mode 100644 index 00000000000..62d881c3a79 --- /dev/null +++ b/vortex-vector/src/vector/mod.rs @@ -0,0 +1,88 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Definition of the [`Vector`] and [`VectorMut`] types, which represent fully decompressed +//! (canonical) array data. + +use vortex_dtype::DType; +use vortex_error::vortex_panic; + +use crate::{ + BoolVector, BoolVectorMut, NullVector, NullVectorMut, PrimitiveVector, PrimitiveVectorMut, +}; + +/// Helper macros for working with the different variants of [`Vector`] and [`VectorMut`]. +/// +/// All macros are exported at the crate level with `#[macro_use]`. +mod macros; + +/// Definition and implementation of [`VectorOps`](ops::VectorOps) and +/// [`VectorMutOps`](ops::VectorMutOps) for [`Vector`] and [`VectorMut`], respecitively. +pub(super) mod ops; + +/// An enum over all kinds of immutable vectors, which represent fully decompressed (canonical) +/// array data. +/// +/// Most of the behavior of `Vector` is described by the [`VectorOps`] trait. +/// +/// The mutable equivalent of this type is [`VectorMut`], which implements. +/// +/// [`VectorOps`]: crate::VectorOps +#[derive(Debug, Clone)] +pub enum Vector { + /// Null + Null(NullVector), + /// Bool + Bool(BoolVector), + /// Primitive + /// + /// TODO(connor): Document that this is an enum, not a struct (to represent all possible + /// primitive native generics). + Primitive(PrimitiveVector), + // Decimal + // Decimal(DecimalVector), + // String + // String(StringVector), + // Binary + // Binary(BinaryVector), + // List + // List(ListVector), + // FixedList + // FixedList(FixedListVector), + // Struct + // Struct(StructVector), + // Extension + // Extension(ExtensionVector), +} + +/// An enum over all kinds of mutable vectors, which represent fully decompressed (canonical) array +/// data. +/// +/// Most of the behavior of `VectorMut` is described by the [`VectorMutOps`] trait. +/// +/// The immutable equivalent of this type is [`Vector`]. +/// +/// [`VectorMutOps`]: crate::VectorMutOps +#[derive(Debug, Clone)] +pub enum VectorMut { + /// Null + Null(NullVectorMut), + /// Bool + Bool(BoolVectorMut), + /// Primitive + Primitive(PrimitiveVectorMut), +} + +impl VectorMut { + /// Create a new mutable vector with the given capacity and dtype. + pub fn with_capacity(capacity: usize, dtype: &DType) -> Self { + match dtype { + DType::Null => NullVectorMut::new(0).into(), // `NullVector` has `usize::MAX` capacity. + DType::Bool(n) => BoolVectorMut::with_capacity(capacity, *n).into(), + DType::Primitive(ptype, nullability) => { + PrimitiveVectorMut::with_capacity(capacity, *ptype, *nullability).into() + } + _ => vortex_panic!("Unsupported dtype for VectorMut"), + } + } +} diff --git a/vortex-vector/src/vector/ops.rs b/vortex-vector/src/vector/ops.rs new file mode 100644 index 00000000000..e995aa52409 --- /dev/null +++ b/vortex-vector/src/vector/ops.rs @@ -0,0 +1,178 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use vortex_dtype::{DType, Nullability}; + +use crate::{ + Vector, VectorMut, match_each_vector, match_each_vector_mut, match_each_vector_mut_immut_pair, + match_each_vector_mut_pair, private, +}; + +/// Common operations for immutable vectors (all the variants of [`Vector`]). +pub trait VectorOps: private::Sealed + Into { + /// The mutable equivalent of this immutable vector. + type Mutable: VectorMutOps; + + /// Returns the [`Nullability`] of the vector. + fn nullability(&self) -> Nullability; + + /// Returns the [`DType`] (or data type) of the vector. + fn dtype(&self) -> DType; + + /// Returns the number of elements in the vector, also referred to as its "length". + fn len(&self) -> usize; + + /// Returns `true` if the vector contains no elements. + fn is_empty(&self) -> bool { + self.len() == 0 + } + + /// Tries to convert `self` into a mutable vector (implementing [`VectorMutOps`]). + /// + /// This method will only succeed if `self` is the only unique strong reference (it effectively + /// "owns" the buffer). If this is true, this method will return a mutable vector with the + /// contents of `self` **without** any copying of data. + /// + /// # Errors + /// + /// If `self` is not unique, this will fail and return `self` back to the caller. + fn try_into_mut(self) -> Result + where + Self: Sized; +} + +impl VectorOps for Vector { + type Mutable = VectorMut; + + fn nullability(&self) -> Nullability { + match_each_vector!(self, |v| { v.nullability() }) + } + + fn dtype(&self) -> DType { + match_each_vector!(self, |v| { v.dtype() }) + } + + fn len(&self) -> usize { + match_each_vector!(self, |v| { v.len() }) + } + + fn try_into_mut(self) -> Result + where + Self: Sized, + { + match_each_vector!(self, |v| { + v.try_into_mut().map(VectorMut::from).map_err(Vector::from) + }) + } +} + +/// Common operations for mutable vectors (all the variants of [`VectorMut`]). +pub trait VectorMutOps: private::Sealed + Into { + /// The immutable equivalent of this mutable vector. + type Immutable: VectorOps; + + /// Returns the [`Nullability`] of the vector. + fn nullability(&self) -> Nullability; + + /// Returns the [`DType`] (or data type) of the vector. + fn dtype(&self) -> DType; + + /// Returns the number of elements in the vector, also referred to as its "length". + fn len(&self) -> usize; + + /// Returns `true` if the vector contains no elements. + fn is_empty(&self) -> bool { + self.len() == 0 + } + + /// Returns the total number of elements the vector can hold without reallocating. + fn capacity(&self) -> usize; + + /// Reserves capacity for at least `additional` more elements to be inserted in the given + /// vector. + /// + /// The collection may reserve more space to speculatively avoid frequent reallocations. After + /// calling `reserve`, the capacity will be greater than or equal to `self.len() + additional`. + /// Does nothing if capacity is already sufficient. + /// + /// Please let us know if you need `reserve_exact` functionality! + fn reserve(&mut self, additional: usize); + + /// Extends the vector by appending elements from another vector. + /// + /// TODO(connor): Document semantics of what happens if `self` is non-nullable and `other` is + /// nullable (should panic?). + fn extend_from_vector(&mut self, other: &Self::Immutable); + + /// Converts `self` into an immutable vector. + fn freeze(self) -> Self::Immutable; + + /// Splits the vector into two at the given index. + /// + /// Afterward, `self` contains elements `[0, at)`, and the returned vector contains elements + /// `[at, capacity)`. It's guaranteed that the memory does not move, that is, the address of + /// `self` does not change, and the address of the returned slice is at bytes after that. + /// + /// This is an `O(1)` operation that just increases the reference count and sets a few indices. + /// + /// # Panics + /// + /// Panics if we try to split off more than the current capacity of the vector (if + /// `at > capacity`). + fn split_off(&mut self, at: usize) -> Self; + + /// Absorbs a mutable vector that was previously split off. + /// + /// If the two vectors were previously contiguous and not mutated in a way that causes + /// re-allocation i.e., if other was created by calling [`split_off()`] on this vector, then + /// this is an `O(1)` operation (simply decreases a reference count and sets a few indices). + /// + /// Otherwise, this method falls back to `self.extend_from_vector(other)`. + /// + /// [`split_off()`]: Self::split_off + fn unsplit(&mut self, other: Self); +} + +impl VectorMutOps for VectorMut { + type Immutable = Vector; + + fn nullability(&self) -> Nullability { + match_each_vector_mut!(self, |v| { v.nullability() }) + } + + fn dtype(&self) -> DType { + match_each_vector_mut!(self, |v| { v.dtype() }) + } + + fn len(&self) -> usize { + match_each_vector_mut!(self, |v| { v.len() }) + } + + fn capacity(&self) -> usize { + match_each_vector_mut!(self, |v| { v.capacity() }) + } + + fn reserve(&mut self, additional: usize) { + match_each_vector_mut!(self, |v| { v.reserve(additional) }) + } + + fn extend_from_vector(&mut self, other: &Self::Immutable) { + match_each_vector_mut_immut_pair!(self, other, |a, b| { + a.extend_from_vector(b); + }); + } + + fn freeze(self) -> Self::Immutable { + match_each_vector_mut!(self, |v| { v.freeze().into() }) + } + + fn split_off(&mut self, at: usize) -> Self { + match_each_vector_mut!(self, |v| { v.split_off(at).into() }) + } + + fn unsplit(&mut self, other: Self) { + match_each_vector_mut_pair!(self, other, |a, b| { + a.unsplit(b); + }); + } +}