diff --git a/vortex-vector/src/decimal/generic.rs b/vortex-vector/src/decimal/generic.rs index f00302bca08..3bb6472321e 100644 --- a/vortex-vector/src/decimal/generic.rs +++ b/vortex-vector/src/decimal/generic.rs @@ -1,6 +1,8 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors +//! Definition and implementation of [`DVector`]. + use vortex_buffer::Buffer; use vortex_dtype::{NativeDecimalType, PrecisionScale}; use vortex_error::{VortexExpect, VortexResult, vortex_bail}; @@ -8,22 +10,45 @@ use vortex_mask::Mask; use crate::{DVectorMut, VectorOps}; -/// A specifically typed decimal vector. +/// An immutable vector of generic decimal values. +/// +/// `DVector` can be considered a borrowed / frozen version of [`DVectorMut`], which is +/// created via the [`freeze`](crate::VectorMutOps::freeze) method. +/// +/// See the documentation for [`DVectorMut`] for more information. #[derive(Debug, Clone)] pub struct DVector { + /// The precision and scale of each decimal in the decimal vector. pub(super) ps: PrecisionScale, + /// The buffer representing the vector decimal elements. pub(super) elements: Buffer, + /// The validity mask (where `true` represents an element is **not** null). pub(super) validity: Mask, } impl DVector { - /// Try to create a new decimal vector from the given elements and validity. + /// Creates a new [`DVector`] from the given [`PrecisionScale`], elements buffer, and + /// validity mask. + /// + /// # Panics + /// + /// Panics if: + /// + /// - The lengths of the `elements` and `validity` do not match. + /// - Any of the elements are out of bounds for the given [`PrecisionScale`]. + pub fn new(ps: PrecisionScale, elements: Buffer, validity: Mask) -> Self { + Self::try_new(ps, elements, validity).vortex_expect("Failed to create `DVector`") + } + + /// Tries to create a new [`DVector`] from the given [`PrecisionScale`], elements buffer, and + /// validity mask. /// /// # Errors /// - /// Returns an error if the precision/scale is invalid, the lengths of the elements - /// and validity do not match, or any of the elements are out of bounds for the given - /// precision/scale. + /// Returns an error if: + /// + /// - The lengths of the `elements` and `validity` do not match. + /// - Any of the elements are out of bounds for the given [`PrecisionScale`]. pub fn try_new( ps: PrecisionScale, elements: Buffer, @@ -53,12 +78,15 @@ impl DVector { }) } - /// Create a new decimal vector from the given elements and validity without validation. + /// Creates a new [`DVector`] from the given [`PrecisionScale`], elements buffer, and + /// validity mask, _without_ validation. /// /// # Safety /// - /// The caller must ensure that the precision/scale is valid, the lengths of the elements - /// and validity match, and all the elements are within bounds for the given precision/scale. + /// The caller must ensure: + /// + /// - The lengths of the elements and validity are equal. + /// - All elements are in bounds for the given [`PrecisionScale`]. pub unsafe fn new_unchecked( ps: PrecisionScale, elements: Buffer, @@ -75,14 +103,42 @@ impl DVector { } } + /// Decomposes the decimal vector into its constituent parts ([`PrecisionScale`], decimal + /// buffer, and validity). + pub fn into_parts(self) -> (PrecisionScale, Buffer, Mask) { + (self.ps, self.elements, self.validity) + } + /// Get the precision/scale of the decimal vector. pub fn precision_scale(&self) -> PrecisionScale { self.ps } - /// Decomposes the decimal vector into its constituent parts (precision/scale, buffer and validity). - pub fn into_parts(self) -> (PrecisionScale, Buffer, Mask) { - (self.ps, self.elements, self.validity) + /// Returns a reference to the underlying elements buffer containing the decimal data. + pub fn elements(&self) -> &Buffer { + &self.elements + } + + /// Gets a nullable element at the given index, panicking on out-of-bounds. + /// + /// If the element at the given index is null, returns `None`. Otherwise, returns `Some(x)`, + /// where `x: D`. + /// + /// Note that this `get` method is different from the standard library [`slice::get`], which + /// returns `None` if the index is out of bounds. This method will panic if the index is out of + /// bounds, and return `None` if the elements is null. + /// + /// # Panics + /// + /// Panics if the index is out of bounds. + pub fn get(&self, index: usize) -> Option<&D> { + self.validity.value(index).then(|| &self.elements[index]) + } +} + +impl AsRef<[D]> for DVector { + fn as_ref(&self) -> &[D] { + &self.elements } } diff --git a/vortex-vector/src/decimal/generic_mut.rs b/vortex-vector/src/decimal/generic_mut.rs index b550f0f2b86..5c71b08e260 100644 --- a/vortex-vector/src/decimal/generic_mut.rs +++ b/vortex-vector/src/decimal/generic_mut.rs @@ -1,8 +1,11 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors +//! Definition and implementation of [`DVectorMut`]. + use vortex_buffer::BufferMut; -use vortex_dtype::{NativeDecimalType, PrecisionScale}; +use vortex_dtype::{DecimalDType, NativeDecimalType, PrecisionScale}; +use vortex_error::{VortexExpect, VortexResult, vortex_bail}; use vortex_mask::MaskMut; use crate::{DVector, VectorMutOps, VectorOps}; @@ -10,11 +13,165 @@ use crate::{DVector, VectorMutOps, VectorOps}; /// A specifically typed mutable decimal vector. #[derive(Debug, Clone)] pub struct DVectorMut { + /// The precision and scale of each decimal in the decimal vector. pub(super) ps: PrecisionScale, + /// The mutable buffer representing the vector decimal elements. pub(super) elements: BufferMut, + /// The validity mask (where `true` represents an element is **not** null). pub(super) validity: MaskMut, } +impl DVectorMut { + /// Creates a new [`DVectorMut`] from the given [`PrecisionScale`], elements buffer, and + /// validity mask. + /// + /// # Panics + /// + /// Panics if: + /// + /// - The lengths of the `elements` and `validity` do not match. + /// - Any of the elements are out of bounds for the given [`PrecisionScale`]. + pub fn new(ps: PrecisionScale, elements: BufferMut, validity: MaskMut) -> Self { + Self::try_new(ps, elements, validity).vortex_expect("Failed to create `DVector`") + } + + /// Tries to create a new [`DVectorMut`] from the given [`PrecisionScale`], elements buffer, + /// and validity mask. + /// + /// # Errors + /// + /// Returns an error if: + /// + /// - The lengths of the `elements` and `validity` do not match. + /// - Any of the elements are out of bounds for the given [`PrecisionScale`]. + pub fn try_new( + ps: PrecisionScale, + elements: BufferMut, + validity: MaskMut, + ) -> VortexResult { + if elements.len() != validity.len() { + vortex_bail!( + "Elements length {} does not match validity length {}", + elements.len(), + validity.len() + ); + } + + // We assert that each element is within bounds for the given precision/scale. + if !elements.iter().all(|e| ps.is_valid(*e)) { + vortex_bail!( + "One or more elements are out of bounds for precision {} and scale {}", + ps.precision(), + ps.scale() + ); + } + + Ok(Self { + ps, + elements, + validity, + }) + } + + /// Creates a new [`DVectorMut`] from the given [`PrecisionScale`], elements buffer, and + /// validity mask, _without_ validation. + /// + /// # Safety + /// + /// The caller must ensure: + /// + /// - The lengths of the elements and validity are equal. + /// - All elements are in bounds for the given [`PrecisionScale`]. + pub unsafe fn new_unchecked( + ps: PrecisionScale, + elements: BufferMut, + validity: MaskMut, + ) -> Self { + if cfg!(debug_assertions) { + Self::try_new(ps, elements, validity).vortex_expect("Failed to create `DVectorMut`") + } else { + Self { + ps, + elements, + validity, + } + } + } + + /// Create a new mutable primitive vector with the given capacity. + pub fn with_capacity(decimal_dtype: &DecimalDType, capacity: usize) -> Self { + Self { + ps: PrecisionScale::try_from(decimal_dtype) + .vortex_expect("TODO(someone): This definitely should not be fallible"), + elements: BufferMut::with_capacity(capacity), + validity: MaskMut::with_capacity(capacity), + } + } + + /// Decomposes the decimal vector into its constituent parts ([`PrecisionScale`], decimal + /// buffer, and validity). + pub fn into_parts(self) -> (PrecisionScale, BufferMut, MaskMut) { + (self.ps, self.elements, self.validity) + } + + /// Get the precision/scale of the decimal vector. + pub fn precision_scale(&self) -> PrecisionScale { + self.ps + } + + /// Returns a reference to the underlying elements buffer containing the decimal data. + pub fn elements(&self) -> &BufferMut { + &self.elements + } + + /// Returns a mutable reference to the underlying elements buffer containing the decimal data. + /// + /// # Safety + /// + /// Modifying the elements buffer directly may violate the precision/scale constraints. + /// The caller must ensure that any modifications maintain these invariants. + pub unsafe fn elements_mut(&mut self) -> &mut BufferMut { + &mut self.elements + } + + /// Gets a nullable element at the given index, panicking on out-of-bounds. + /// + /// If the element at the given index is null, returns `None`. Otherwise, returns `Some(x)`, + /// where `x: D`. + /// + /// Note that this `get` method is different from the standard library [`slice::get`], which + /// returns `None` if the index is out of bounds. This method will panic if the index is out of + /// bounds, and return `None` if the elements is null. + /// + /// # Panics + /// + /// Panics if the index is out of bounds. + pub fn get(&self, index: usize) -> Option<&D> { + self.validity.value(index).then(|| &self.elements[index]) + } + + /// Appends a new element to the end of the vector. + /// + /// # Errors + /// + /// Returns an error if the value is out of bounds for the vector's precision/scale. + pub fn try_push(&mut self, value: D) -> VortexResult<()> { + if !self.ps.is_valid(value) { + vortex_bail!("Value {:?} is out of bounds for {}", value, self.ps,); + } + + self.elements.push(value); + self.validity.append_n(true, 1); + Ok(()) + } +} + +impl AsRef<[D]> for DVectorMut { + fn as_ref(&self) -> &[D] { + &self.elements + } +} + impl VectorMutOps for DVectorMut { type Immutable = DVector; diff --git a/vortex-vector/src/decimal/generic_mut_impl.rs b/vortex-vector/src/decimal/generic_mut_impl.rs deleted file mode 100644 index ff787c892c8..00000000000 --- a/vortex-vector/src/decimal/generic_mut_impl.rs +++ /dev/null @@ -1,49 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 -// SPDX-FileCopyrightText: Copyright the Vortex contributors - -use vortex_dtype::{NativeDecimalType, PrecisionScale}; -use vortex_error::{VortexResult, vortex_bail}; - -use crate::DVectorMut; - -impl DVectorMut { - /// Get the precision/scale of the decimal vector. - pub fn precision_scale(&self) -> PrecisionScale { - self.ps - } - - /// Get a nullable element at the given index. - pub fn get(&self, index: usize) -> Option<&D> { - self.validity.value(index).then(|| &self.elements[index]) - } - - /// Appends a new element to the end of the vector. - /// - /// # Errors - /// - /// Returns an error if the value is out of bounds for the vector's precision/scale. - pub fn try_push(&mut self, value: D) -> VortexResult<()> { - if !self.ps.is_valid(value) { - vortex_bail!("Value {:?} is out of bounds for {}", value, self.ps,); - } - self.elements.push(value); - self.validity.append_n(true, 1); - Ok(()) - } - - /// Returns a mutable reference to the underlying elements buffer. - /// - /// # SAFETY - /// - /// Modifying the elements buffer directly may violate the precision/scale constraints. - /// The caller must ensure that any modifications maintain these invariants. - pub unsafe fn elements_mut(&mut self) -> &mut [D] { - &mut self.elements - } -} - -impl AsRef<[D]> for DVectorMut { - fn as_ref(&self) -> &[D] { - &self.elements - } -} diff --git a/vortex-vector/src/decimal/mod.rs b/vortex-vector/src/decimal/mod.rs index c5ea20920c2..53c1ee6e7f2 100644 --- a/vortex-vector/src/decimal/mod.rs +++ b/vortex-vector/src/decimal/mod.rs @@ -1,17 +1,29 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors +//! Definitions and implementations of decimal vector types. +//! +//! The types that hold data are [`DVector`] and [`DVectorMut`], which are generic over types `D` +//! that implement [`NativeDecimalType`]. +//! +//! [`DecimalVector`] and [`DecimalVectorMut`] are enums that wrap all of the different possible +//! [`DVector`]s. There are several macros defined in this crate to make working with these +//! primitive vector types easier. + mod generic; +pub use generic::DVector; + mod generic_mut; -mod generic_mut_impl; -mod macros; +pub use generic_mut::DVectorMut; + mod vector; +pub use vector::DecimalVector; + mod vector_mut; +pub use vector_mut::DecimalVectorMut; + +mod macros; -pub use generic::*; -pub use generic_mut::*; -pub use vector::*; -pub use vector_mut::*; use vortex_dtype::NativeDecimalType; use crate::{Vector, VectorMut}; @@ -22,18 +34,18 @@ impl From for Vector { } } -impl From> for Vector { - fn from(v: DVector) -> Self { - Self::Decimal(DecimalVector::from(v)) - } -} - impl From> for DecimalVector { fn from(value: DVector) -> Self { D::upcast(value) } } +impl From> for Vector { + fn from(v: DVector) -> Self { + Self::Decimal(DecimalVector::from(v)) + } +} + impl From for VectorMut { fn from(v: DecimalVectorMut) -> Self { Self::Decimal(v) diff --git a/vortex-vector/src/decimal/vector.rs b/vortex-vector/src/decimal/vector.rs index e95a4785e97..2298e3b1eec 100644 --- a/vortex-vector/src/decimal/vector.rs +++ b/vortex-vector/src/decimal/vector.rs @@ -1,6 +1,8 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors +//! Definition and implementation of [`DecimalVector`]. + use vortex_dtype::{DecimalTypeDowncast, DecimalTypeUpcast, NativeDecimalType, i256}; use vortex_error::vortex_panic; use vortex_mask::Mask; diff --git a/vortex-vector/src/decimal/vector_mut.rs b/vortex-vector/src/decimal/vector_mut.rs index 4c283012312..c75669be8e8 100644 --- a/vortex-vector/src/decimal/vector_mut.rs +++ b/vortex-vector/src/decimal/vector_mut.rs @@ -1,7 +1,11 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -use vortex_dtype::{DecimalTypeDowncast, DecimalTypeUpcast, NativeDecimalType, i256}; +//! Definition and implementation of [`DecimalVectorMut`]. + +use vortex_dtype::{ + DecimalDType, DecimalType, DecimalTypeDowncast, DecimalTypeUpcast, NativeDecimalType, i256, +}; use vortex_error::vortex_panic; use crate::decimal::DVectorMut; @@ -24,6 +28,34 @@ pub enum DecimalVectorMut { D256(DVectorMut), } +impl DecimalVectorMut { + /// Create a new mutable decimal vector with the given primitive type and capacity. + pub fn with_capacity(decimal_dtype: &DecimalDType, capacity: usize) -> Self { + let decimal_kind = DecimalType::smallest_decimal_value_type(decimal_dtype); + + match decimal_kind { + DecimalType::I8 => { + DecimalVectorMut::D8(DVectorMut::::with_capacity(decimal_dtype, capacity)) + } + DecimalType::I16 => { + DecimalVectorMut::D16(DVectorMut::::with_capacity(decimal_dtype, capacity)) + } + DecimalType::I32 => { + DecimalVectorMut::D32(DVectorMut::::with_capacity(decimal_dtype, capacity)) + } + DecimalType::I64 => { + DecimalVectorMut::D64(DVectorMut::::with_capacity(decimal_dtype, capacity)) + } + DecimalType::I128 => { + DecimalVectorMut::D128(DVectorMut::::with_capacity(decimal_dtype, capacity)) + } + DecimalType::I256 => { + DecimalVectorMut::D256(DVectorMut::::with_capacity(decimal_dtype, capacity)) + } + } + } +} + impl VectorMutOps for DecimalVectorMut { type Immutable = DecimalVector; diff --git a/vortex-vector/src/primitive/generic.rs b/vortex-vector/src/primitive/generic.rs index ee7577845f4..3eb0011d429 100644 --- a/vortex-vector/src/primitive/generic.rs +++ b/vortex-vector/src/primitive/generic.rs @@ -71,11 +71,15 @@ impl PVector { (self.elements, self.validity) } - /// Gets a nullable element at the given index. + /// Gets a nullable element at the given index, panicking on out-of-bounds. /// /// If the element at the given index is null, returns `None`. Otherwise, returns `Some(x)`, /// where `x: T`. /// + /// Note that this `get` method is different from the standard library [`slice::get`], which + /// returns `None` if the index is out of bounds. This method will panic if the index is out of + /// bounds, and return `None` if the elements is null. + /// /// # Panics /// /// Panics if the index is out of bounds. diff --git a/vortex-vector/src/primitive/generic_mut_impl.rs b/vortex-vector/src/primitive/generic_mut_impl.rs index 03a9c1950b7..fdb09d1dd32 100644 --- a/vortex-vector/src/primitive/generic_mut_impl.rs +++ b/vortex-vector/src/primitive/generic_mut_impl.rs @@ -11,7 +11,7 @@ use crate::{PVectorMut, VectorMutOps}; /// Point operations for [`PVectorMut`]. impl PVectorMut { - /// Gets a nullable element at the given index, **WITHOUT** bounds checking. + /// Gets a nullable element at the given index, panicking on out-of-bounds. /// /// If the element at the given index is null, returns `None`. Otherwise, returns `Some(x)`, /// where `x: T`. diff --git a/vortex-vector/src/primitive/mod.rs b/vortex-vector/src/primitive/mod.rs index ef26a2147b0..c836b290b6b 100644 --- a/vortex-vector/src/primitive/mod.rs +++ b/vortex-vector/src/primitive/mod.rs @@ -11,20 +11,19 @@ //! [`PVector`]s. There are several macros defined in this crate to make working with these //! primitive vector types easier. //! -//! [`NativePType`]: vortex_dtype::NativePType //! [`f16`]: vortex_dtype::half::f16 mod generic; pub use generic::PVector; mod generic_mut; +mod generic_mut_impl; +mod iter; pub use generic_mut::PVectorMut; mod vector; pub use vector::PrimitiveVector; -mod generic_mut_impl; -mod iter; mod vector_mut; pub use vector_mut::PrimitiveVectorMut; diff --git a/vortex-vector/src/primitive/vector_mut.rs b/vortex-vector/src/primitive/vector_mut.rs index 2590bc8cde5..c7ee23df443 100644 --- a/vortex-vector/src/primitive/vector_mut.rs +++ b/vortex-vector/src/primitive/vector_mut.rs @@ -60,9 +60,7 @@ impl PrimitiveVectorMut { PrimitiveVectorMut::F64(_) => PType::F64, } } -} -impl PrimitiveVectorMut { /// Create a new mutable primitive vector with the given primitive type and capacity. pub fn with_capacity(ptype: PType, capacity: usize) -> Self { match ptype { diff --git a/vortex-vector/src/vector_mut.rs b/vortex-vector/src/vector_mut.rs index 4be682dc9b7..661d55beb55 100644 --- a/vortex-vector/src/vector_mut.rs +++ b/vortex-vector/src/vector_mut.rs @@ -57,8 +57,10 @@ impl VectorMut { DType::Struct(struct_fields, _) => { StructVectorMut::with_capacity(struct_fields, capacity).into() } - DType::Decimal(..) - | DType::Utf8(_) + DType::Decimal(decimal_dtype, _) => { + DecimalVectorMut::with_capacity(decimal_dtype, capacity).into() + } + DType::Utf8(_) | DType::Binary(_) | DType::List(..) | DType::FixedSizeList(..) @@ -207,9 +209,10 @@ impl VectorMut { #[cfg(test)] mod tests { - use vortex_dtype::{Nullability, PType}; + use vortex_dtype::{DecimalDType, Nullability, PType}; use super::*; + use crate::decimal::DecimalVectorMut; use crate::{PVectorMut, VectorOps}; #[test] @@ -226,6 +229,85 @@ mod tests { assert!(prim_vec.capacity() >= 50); } + #[test] + fn test_with_capacity_decimal() { + // Test decimal vectors with different precisions that map to different internal types. + // Precision 1-2 uses i8, 3-4 uses i16, 5-9 uses i32, 10-18 uses i64, + // 19-38 uses i128, 39-76 uses i256. + + // Test precision 4 (uses i16 internally). + let decimal_dtype = DType::Decimal(DecimalDType::new(4, 2), Nullability::Nullable); + let decimal_vec = VectorMut::with_capacity(&decimal_dtype, 50); + + match decimal_vec { + VectorMut::Decimal(dec_vec) => { + assert_eq!(dec_vec.len(), 0, "New vector should be empty"); + assert!(dec_vec.capacity() >= 50, "Capacity should be at least 50"); + + // Verify it's using D16 variant internally. + assert!( + matches!(dec_vec, DecimalVectorMut::D16(_)), + "Precision 4 should use D16 variant" + ); + } + _ => panic!("Expected decimal vector for decimal dtype"), + } + + // Test precision 9 (uses i32 internally). + let decimal_dtype = DType::Decimal(DecimalDType::new(9, 0), Nullability::NonNullable); + let decimal_vec = VectorMut::with_capacity(&decimal_dtype, 100); + + match decimal_vec { + VectorMut::Decimal(dec_vec) => { + assert_eq!(dec_vec.len(), 0, "New vector should be empty"); + assert!(dec_vec.capacity() >= 100, "Capacity should be at least 100"); + + // Verify it's using D32 variant internally. + assert!( + matches!(dec_vec, DecimalVectorMut::D32(_)), + "Precision 9 should use D32 variant" + ); + } + _ => panic!("Expected decimal vector for decimal dtype"), + } + + // Test precision 18 (uses i64 internally). + let decimal_dtype = DType::Decimal(DecimalDType::new(18, -2), Nullability::Nullable); + let decimal_vec = VectorMut::with_capacity(&decimal_dtype, 75); + + match decimal_vec { + VectorMut::Decimal(dec_vec) => { + assert_eq!(dec_vec.len(), 0, "New vector should be empty"); + assert!(dec_vec.capacity() >= 75, "Capacity should be at least 75"); + + // Verify it's using D64 variant internally. + assert!( + matches!(dec_vec, DecimalVectorMut::D64(_)), + "Precision 18 should use D64 variant" + ); + } + _ => panic!("Expected decimal vector for decimal dtype"), + } + + // Test precision 38 (uses i128 internally). + let decimal_dtype = DType::Decimal(DecimalDType::new(38, 10), Nullability::NonNullable); + let decimal_vec = VectorMut::with_capacity(&decimal_dtype, 25); + + match decimal_vec { + VectorMut::Decimal(dec_vec) => { + assert_eq!(dec_vec.len(), 0, "New vector should be empty"); + assert!(dec_vec.capacity() >= 25, "Capacity should be at least 25"); + + // Verify it's using D128 variant internally. + assert!( + matches!(dec_vec, DecimalVectorMut::D128(_)), + "Precision 38 should use D128 variant" + ); + } + _ => panic!("Expected decimal vector for decimal dtype"), + } + } + #[test] #[should_panic(expected = "Mismatched vector types")] fn test_type_mismatch_panics() {