From 8f50b0500ece8a965da6b0eeb0905b69db85e16a Mon Sep 17 00:00:00 2001 From: Connor Tsui Date: Tue, 4 Nov 2025 09:54:03 -0500 Subject: [PATCH] add docs and tests for decimal vectors Signed-off-by: Connor Tsui --- vortex-vector/src/binaryview/types.rs | 3 +- vortex-vector/src/decimal/generic_mut.rs | 483 ++++++++++++++++++++++- vortex-vector/src/decimal/mod.rs | 2 + vortex-vector/src/primitive/mod.rs | 2 + 4 files changed, 488 insertions(+), 2 deletions(-) diff --git a/vortex-vector/src/binaryview/types.rs b/vortex-vector/src/binaryview/types.rs index e50048f2ce4..32b5539b73d 100644 --- a/vortex-vector/src/binaryview/types.rs +++ b/vortex-vector/src/binaryview/types.rs @@ -120,7 +120,8 @@ pub trait BinaryViewTypeUpcast { /// Private module to seal the `BinaryViewType` trait. mod private { - /// Sealed trait to prevent external implementations of [`VarBinType`]. + /// Sealed trait to prevent external implementations of + /// [`BinaryViewType`](super::BinaryViewType). pub trait Sealed {} impl Sealed for super::StringType {} diff --git a/vortex-vector/src/decimal/generic_mut.rs b/vortex-vector/src/decimal/generic_mut.rs index bd174143530..d3841676c86 100644 --- a/vortex-vector/src/decimal/generic_mut.rs +++ b/vortex-vector/src/decimal/generic_mut.rs @@ -10,7 +10,151 @@ use vortex_mask::MaskMut; use crate::{DVector, VectorMutOps, VectorOps}; -/// A specifically typed mutable decimal vector. +/// A mutable vector of decimal values with fixed precision and scale. +/// +/// `D` is bound by [`NativeDecimalType`], which can be one of the native integer types (`i8`, +/// `i16`, `i32`, `i64`, `i128`) or `i256`. `D` is used to store the decimal values. +/// +/// The decimal vector maintains a [`PrecisionScale`] that defines the precision (total number of +/// digits) and scale (digits after the decimal point) for all values in the vector. +/// +/// Unlike primitive vectors, decimal vectors require validation during construction and +/// modification to ensure values stay within the bounds defined by their precision and scale. +/// This makes operations like "push" fallible, thus we have a [`try_push()`] method instead. +/// +/// [`DVectorMut`] is the primary way to construct decimal vectors. It provides methods for +/// building vectors incrementally before converting them to an immutable [`DVector`] using +/// the [`freeze()`] method. +/// +/// [`try_push()`]: Self::try_push +/// [`freeze()`]: crate::VectorMutOps::freeze +/// +/// # Examples +/// +/// ## Creating and building decimal vectors +/// +/// ``` +/// use vortex_dtype::{DecimalDType, PrecisionScale}; +/// use vortex_vector::{DVectorMut, VectorMutOps}; +/// +/// // Create a decimal vector with precision=9, scale=2 (e.g., up to 9999999.99). +/// let decimal_dtype = DecimalDType::new(9, 2); +/// let mut vec = DVectorMut::::with_capacity(&decimal_dtype, 5); +/// assert_eq!(vec.len(), 0); +/// assert!(vec.capacity() >= 5); +/// +/// // Values are stored as integers scaled by 10^scale. +/// // For scale=2: 123.45 is stored as 12345. +/// vec.try_push(12345).unwrap(); // Represents 123.45. +/// vec.try_push(9999).unwrap(); // Represents 99.99. +/// assert_eq!(vec.len(), 2); +/// +/// // Values that exceed precision will fail. +/// let too_large = 10_i32.pow(9); // Would represent 10000000.00. +/// assert!(vec.try_push(too_large).is_err()); +/// +/// // Create from buffers with validation. +/// use vortex_buffer::BufferMut; +/// use vortex_mask::MaskMut; +/// let elements = BufferMut::from_iter([100_i32, 200, 300]); // 1.00, 2.00, 3.00. +/// let validity = MaskMut::new_true(3); +/// let ps = PrecisionScale::::try_from(&decimal_dtype).unwrap(); +/// let decimal_vec = DVectorMut::new(ps, elements, validity); +/// assert_eq!(decimal_vec.len(), 3); +/// ``` +/// +/// ## Working with nulls and validity +/// +/// ``` +/// use vortex_buffer::BufferMut; +/// use vortex_dtype::{DecimalDType, PrecisionScale}; +/// use vortex_mask::MaskMut; +/// use vortex_vector::{DVectorMut, VectorMutOps}; +/// +/// // Create a decimal vector with nulls. +/// let decimal_dtype = DecimalDType::new(5, 2); // Up to 999.99. +/// let ps = PrecisionScale::::try_from(&decimal_dtype).unwrap(); +/// +/// // Create with some null values (validity mask: true = not null, false = null). +/// let elements = BufferMut::from_iter([1000_i32, 0, 2500, 0]); // 10.00, null, 25.00, null. +/// let mut validity = MaskMut::with_capacity(4); +/// validity.append_n(true, 1); // index 0: valid +/// validity.append_n(false, 1); // index 1: null +/// validity.append_n(true, 1); // index 2: valid +/// validity.append_n(false, 1); // index 3: null +/// let mut vec = DVectorMut::new(ps, elements, validity); +/// +/// // Check element access with nulls. +/// assert_eq!(vec.get(0), Some(&1000)); // 10.00. +/// assert_eq!(vec.get(1), None); // Null. +/// assert_eq!(vec.get(2), Some(&2500)); // 25.00. +/// +/// // Append null values. +/// vec.append_nulls(3); +/// assert_eq!(vec.len(), 7); +/// ``` +/// +/// ## Extending and manipulating vectors +/// +/// ``` +/// use vortex_dtype::DecimalDType; +/// use vortex_vector::{DVectorMut, VectorMutOps}; +/// +/// // Create two decimal vectors with scale=3 (3 decimal places). +/// let decimal_dtype = DecimalDType::new(10, 3); +/// let mut vec1 = DVectorMut::::with_capacity(&decimal_dtype, 10); +/// vec1.try_push(1234567).unwrap(); // 1234.567. +/// vec1.try_push(2345678).unwrap(); // 2345.678. +/// +/// let mut vec2 = DVectorMut::::with_capacity(&decimal_dtype, 10); +/// vec2.try_push(3456789).unwrap(); // 3456.789. +/// vec2.try_push(4567890).unwrap(); // 4567.890. +/// +/// // Extend from an immutable vector. +/// let immutable = vec2.freeze(); +/// vec1.extend_from_vector(&immutable); +/// assert_eq!(vec1.len(), 4); +/// +/// // Split vector at index 3. +/// let mut split = vec1.split_off(3); +/// assert_eq!(vec1.len(), 3); +/// assert_eq!(split.len(), 1); +/// +/// // Reserve capacity for future operations. +/// vec1.reserve(10); +/// assert!(vec1.capacity() >= 13); +/// +/// // Rejoin the vectors. +/// vec1.unsplit(split); +/// assert_eq!(vec1.len(), 4); +/// ``` +/// +/// ## Converting between mutable and immutable +/// +/// ``` +/// use vortex_dtype::DecimalDType; +/// use vortex_vector::{DVectorMut, VectorMutOps, VectorOps}; +/// +/// // Create a mutable decimal vector. +/// let decimal_dtype = DecimalDType::new(18, 6); // High precision with 6 decimal places. +/// let mut vec_mut = DVectorMut::::with_capacity(&decimal_dtype, 3); +/// vec_mut.try_push(1000000).unwrap(); // 1.000000. +/// vec_mut.try_push(2500000).unwrap(); // 2.500000. +/// vec_mut.try_push(3333333).unwrap(); // 3.333333. +/// +/// // Freeze into an immutable vector. +/// let vec_immutable = vec_mut.freeze(); +/// assert_eq!(vec_immutable.len(), 3); +/// +/// // Access elements from the immutable vector. +/// assert_eq!(vec_immutable.get(0), Some(&1000000)); +/// assert_eq!(vec_immutable.get(1), Some(&2500000)); +/// +/// // Can also convert immutable back to mutable using try_into_mut. +/// // Note: This may fail if the buffer is shared. +/// // let vec_mut_again = vec_immutable.try_into_mut().unwrap(); +/// // assert_eq!(vec_mut_again.len(), 3); +/// ``` #[derive(Debug, Clone)] pub struct DVectorMut { /// The precision and scale of each decimal in the decimal vector. @@ -219,3 +363,340 @@ impl VectorMutOps for DVectorMut { self.validity.unsplit(other.validity); } } + +#[cfg(test)] +mod tests { + use vortex_dtype::DecimalDType; + + use super::*; + + #[test] + fn test_construction_and_validation() { + // Test with_capacity for different decimal types. + let decimal_dtype_i32 = DecimalDType::new(9, 2); + let vec_i32 = DVectorMut::::with_capacity(&decimal_dtype_i32, 10); + assert_eq!(vec_i32.len(), 0); + assert!(vec_i32.capacity() >= 10); + + let decimal_dtype_i64 = DecimalDType::new(18, 4); + let vec_i64 = DVectorMut::::with_capacity(&decimal_dtype_i64, 5); + assert_eq!(vec_i64.len(), 0); + assert!(vec_i64.capacity() >= 5); + + let decimal_dtype_i128 = DecimalDType::new(38, 10); + let vec_i128 = DVectorMut::::with_capacity(&decimal_dtype_i128, 3); + assert_eq!(vec_i128.len(), 0); + assert!(vec_i128.capacity() >= 3); + + // Test try_new with valid data. + let ps = PrecisionScale::::try_from(&decimal_dtype_i32).unwrap(); + let elements = BufferMut::from_iter([100_i32, 200, 300]); + let validity = MaskMut::new_true(3); + let vec = DVectorMut::try_new(ps, elements, validity).unwrap(); + assert_eq!(vec.len(), 3); + assert_eq!(vec.precision_scale().precision(), 9); + assert_eq!(vec.precision_scale().scale(), 2); + + // Test try_new error handling - length mismatch. + let elements_bad = BufferMut::from_iter([100_i32, 200]); + let validity_bad = MaskMut::new_true(3); + let result = DVectorMut::try_new(ps, elements_bad, validity_bad); + assert!(result.is_err()); + + // Test try_new error handling - out of bounds values. + let too_large = 10_i32.pow(9); // 10^9 exceeds precision 9. + let elements_oob = BufferMut::from_iter([100_i32, too_large, 300]); + let validity_oob = MaskMut::new_true(3); + let result = DVectorMut::try_new(ps, elements_oob, validity_oob); + assert!(result.is_err()); + + // Test new_unchecked. + let elements_unchecked = BufferMut::from_iter([100_i32, 200]); + let validity_unchecked = MaskMut::new_true(2); + let vec_unchecked = + unsafe { DVectorMut::new_unchecked(ps, elements_unchecked, validity_unchecked) }; + assert_eq!(vec_unchecked.len(), 2); + } + + #[test] + fn test_push_append_and_access() { + let decimal_dtype = DecimalDType::new(9, 2); + let mut vec = DVectorMut::::with_capacity(&decimal_dtype, 10); + + // Test try_push with valid values. + vec.try_push(12345).unwrap(); // 123.45. + vec.try_push(9999).unwrap(); // 99.99. + vec.try_push(-5000).unwrap(); // -50.00. + assert_eq!(vec.len(), 3); + + // Test try_push with out-of-bounds values. + let too_large = 10_i32.pow(9); + assert!(vec.try_push(too_large).is_err()); + assert_eq!(vec.len(), 3); // Length unchanged after failed push. + + // Test get without nulls. + assert_eq!(vec.get(0), Some(&12345)); + assert_eq!(vec.get(1), Some(&9999)); + assert_eq!(vec.get(2), Some(&-5000)); + + // Test append_nulls. + vec.append_nulls(2); + assert_eq!(vec.len(), 5); + assert_eq!(vec.get(3), None); + assert_eq!(vec.get(4), None); + + // Test AsRef<[D]> slice access. + let slice = vec.as_ref(); + assert_eq!(slice.len(), 5); + assert_eq!(slice[0], 12345); + assert_eq!(slice[1], 9999); + assert_eq!(slice[2], -5000); + // Note: slice[3] and slice[4] are default values (0) but marked as null in validity. + } + + #[test] + fn test_vector_mut_ops_comprehensive() { + let decimal_dtype = DecimalDType::new(10, 3); + let mut vec1 = DVectorMut::::with_capacity(&decimal_dtype, 10); + vec1.try_push(1000000).unwrap(); // 1000.000. + vec1.try_push(2000000).unwrap(); // 2000.000. + vec1.try_push(3000000).unwrap(); // 3000.000. + vec1.try_push(4000000).unwrap(); // 4000.000. + + // Test extend_from_vector. + let mut vec2 = DVectorMut::::with_capacity(&decimal_dtype, 10); + vec2.try_push(5000000).unwrap(); // 5000.000. + vec2.try_push(6000000).unwrap(); // 6000.000. + let frozen_vec2 = vec2.freeze(); + + let original_len = vec1.len(); + vec1.extend_from_vector(&frozen_vec2); + assert_eq!(vec1.len(), original_len + frozen_vec2.len()); + assert_eq!(vec1.get(4), Some(&5000000)); + assert_eq!(vec1.get(5), Some(&6000000)); + + // Test split_off and validity preservation. + vec1.append_nulls(2); // Add nulls at positions 6 and 7. + assert_eq!(vec1.len(), 8); + + let split = vec1.split_off(5); + assert_eq!(vec1.len(), 5); + assert_eq!(split.len(), 3); + + // Check that split preserved validity. + assert_eq!(split.get(0), Some(&6000000)); // Was at index 5. + assert_eq!(split.get(1), None); // Was null at index 6. + assert_eq!(split.get(2), None); // Was null at index 7. + + // Test reserve and capacity management. + let initial_capacity = vec1.capacity(); + vec1.reserve(20); + assert!(vec1.capacity() >= initial_capacity + 20); + + // Test len() and capacity() tracking. + assert_eq!(vec1.len(), 5); + assert!(vec1.capacity() >= 25); + + // Test unsplit - rejoin the vectors. + vec1.unsplit(split); + assert_eq!(vec1.len(), 8); + assert_eq!(vec1.get(6), None); // Verify null is still null after unsplit. + assert_eq!(vec1.get(7), None); // Verify null is still null after unsplit. + } + + #[test] + fn test_freeze_and_immutable_vector() { + let decimal_dtype = DecimalDType::new(15, 5); + let mut vec_mut = DVectorMut::::with_capacity(&decimal_dtype, 5); + + // Add some values and nulls. + vec_mut.try_push(1234567890).unwrap(); // 12345.67890. + vec_mut.try_push(9876543210).unwrap(); // 98765.43210. + vec_mut.append_nulls(1); + vec_mut.try_push(5555555555).unwrap(); // 55555.55555. + vec_mut.append_nulls(1); + + // Test freeze() to convert DVectorMut to DVector. + let vec_immutable = vec_mut.freeze(); + assert_eq!(vec_immutable.len(), 5); + + // Test DVector::get() with nulls. + assert_eq!(vec_immutable.get(0), Some(&1234567890)); + assert_eq!(vec_immutable.get(1), Some(&9876543210)); + assert_eq!(vec_immutable.get(2), None); // Null. + assert_eq!(vec_immutable.get(3), Some(&5555555555)); + assert_eq!(vec_immutable.get(4), None); // Null. + + // Test DVector::as_slice() through AsRef. + let slice = vec_immutable.as_ref(); + assert_eq!(slice.len(), 5); + assert_eq!(slice[0], 1234567890); + assert_eq!(slice[3], 5555555555); + + // Test precision_scale() getter on immutable vector. + assert_eq!(vec_immutable.precision_scale().precision(), 15); + assert_eq!(vec_immutable.precision_scale().scale(), 5); + + // Test round-trip: DVector → DVectorMut (using try_into_mut). + let mut vec_mut_again = match vec_immutable.try_into_mut() { + Ok(v) => v, + Err(_) => { + // If conversion fails (buffer is shared), create a new mutable vector. + // This is expected in some cases when the buffer cannot be made mutable. + let decimal_dtype = DecimalDType::new(15, 5); + let mut new_vec = DVectorMut::::with_capacity(&decimal_dtype, 6); + new_vec.try_push(1234567890).unwrap(); + new_vec.try_push(9876543210).unwrap(); + new_vec.append_nulls(1); + new_vec.try_push(5555555555).unwrap(); + new_vec.append_nulls(1); + new_vec + } + }; + + assert_eq!(vec_mut_again.len(), 5); + vec_mut_again.try_push(7777777777).unwrap(); // 77777.77777. + assert_eq!(vec_mut_again.len(), 6); + + // Freeze again and verify. + let vec_final = vec_mut_again.freeze(); + assert_eq!(vec_final.len(), 6); + assert_eq!(vec_final.get(5), Some(&7777777777)); + } + + #[test] + fn test_precision_scale_combinations() { + // Test Decimal(9, 2) - common currency format. + let decimal_9_2 = DecimalDType::new(9, 2); + let mut vec_9_2 = DVectorMut::::with_capacity(&decimal_9_2, 5); + vec_9_2.try_push(999999999).unwrap(); // Max: 9999999.99 stored as 999999999. + assert!(vec_9_2.try_push(1000000000).is_err()); // 10000000.00 stored as 1000000000 exceeds precision. + assert!(vec_9_2.try_push(-999999999).is_ok()); // Negative within bounds. + assert_eq!(vec_9_2.len(), 2); + + // Test Decimal(38, 10) - high precision scientific. + let decimal_38_10 = DecimalDType::new(38, 10); + let mut vec_38_10 = DVectorMut::::with_capacity(&decimal_38_10, 3); + let large_value = 10_i128.pow(28) - 1; // 10^28 - 1, well within 38 digits. + vec_38_10.try_push(large_value).unwrap(); + assert_eq!(vec_38_10.len(), 1); + + // Test Decimal(4, 0) - integer-only decimals that fit in i16. + let decimal_4_0 = DecimalDType::new(4, 0); + let mut vec_4_0 = DVectorMut::::with_capacity(&decimal_4_0, 5); + vec_4_0.try_push(9999).unwrap(); // Max: 9999. + assert!(vec_4_0.try_push(10000).is_err()); // Exceeds 4 digits. + vec_4_0.try_push(-9999).unwrap(); // Negative within bounds. + assert_eq!(vec_4_0.len(), 2); + + // Test with different underlying types. + // i8 with small precision/scale (max precision for i8 is 2). + let decimal_2_1 = DecimalDType::new(2, 1); + let mut vec_i8 = DVectorMut::::with_capacity(&decimal_2_1, 3); + vec_i8.try_push(99).unwrap(); // 9.9. + assert!(vec_i8.try_push(100).is_err()); // 10.0 exceeds precision. + + // i16 with moderate precision/scale (max precision for i16 is 4). + let decimal_4_2 = DecimalDType::new(4, 2); + let mut vec_i16 = DVectorMut::::with_capacity(&decimal_4_2, 3); + vec_i16.try_push(999).unwrap(); // 9.99. + vec_i16.try_push(9999).unwrap(); // 99.99. + assert_eq!(vec_i16.len(), 2); + } + + #[test] + fn test_empty_and_edge_cases() { + let decimal_dtype = DecimalDType::new(9, 2); + + // Test empty vector creation and operations. + let empty_vec = DVectorMut::::with_capacity(&decimal_dtype, 0); + assert_eq!(empty_vec.len(), 0); + // Capacity might be rounded up from the requested value. + let _ = empty_vec.capacity(); // Just verify it doesn't panic. + + // Freeze empty vector. + let frozen_empty = empty_vec.freeze(); + assert_eq!(frozen_empty.len(), 0); + + // Test single element vector. + let mut single = DVectorMut::::with_capacity(&decimal_dtype, 1); + single.try_push(42).unwrap(); + assert_eq!(single.len(), 1); + assert_eq!(single.get(0), Some(&42)); + + // Split single element vector at index 1. + // Original keeps [0, 1) = the element, split gets [1, len) = nothing. + let split_single = single.split_off(1); + assert_eq!(single.len(), 1); // Original keeps the element. + assert_eq!(split_single.len(), 0); // Split gets nothing. + + // Test all-null vector. + let mut all_nulls = DVectorMut::::with_capacity(&decimal_dtype, 5); + all_nulls.append_nulls(5); + assert_eq!(all_nulls.len(), 5); + for i in 0..5 { + assert_eq!(all_nulls.get(i), None); + } + + // Freeze all-null vector and check immutable. + let frozen_nulls = all_nulls.freeze(); + assert_eq!(frozen_nulls.len(), 5); + for i in 0..5 { + assert_eq!(frozen_nulls.get(i), None); + } + + // Test maximum capacity scenario - create large vector. + let mut large = DVectorMut::::with_capacity(&decimal_dtype, 1000); + for _ in 0..1000 { + large.try_push(999).unwrap(); + } + assert_eq!(large.len(), 1000); + assert!(large.capacity() >= 1000); + } + + #[test] + fn test_nulls_with_validity_mask() { + let decimal_dtype = DecimalDType::new(8, 3); + let ps = PrecisionScale::::try_from(&decimal_dtype).unwrap(); + + // Create vector with specific null pattern using validity mask. + let elements = BufferMut::from_iter([1000_i32, 0, 2000, 0, 3000]); // 0s will be null. + let mut validity = MaskMut::with_capacity(5); + validity.append_n(true, 1); // index 0: valid + validity.append_n(false, 1); // index 1: null + validity.append_n(true, 1); // index 2: valid + validity.append_n(false, 1); // index 3: null + validity.append_n(true, 1); // index 4: valid + let mut vec = DVectorMut::new(ps, elements, validity); + + assert_eq!(vec.len(), 5); + assert_eq!(vec.get(0), Some(&1000)); // 1.000. + assert_eq!(vec.get(1), None); // Null. + assert_eq!(vec.get(2), Some(&2000)); // 2.000. + assert_eq!(vec.get(3), None); // Null. + assert_eq!(vec.get(4), Some(&3000)); // 3.000. + + // Extend with more values and nulls. + vec.try_push(4000).unwrap(); + vec.append_nulls(2); + assert_eq!(vec.len(), 8); + assert_eq!(vec.get(5), Some(&4000)); + assert_eq!(vec.get(6), None); + assert_eq!(vec.get(7), None); + + // Split and verify nulls are preserved. + let split = vec.split_off(4); + assert_eq!(vec.len(), 4); + assert_eq!(split.len(), 4); + + // Original vec should have: valid, null, valid, null. + assert_eq!(vec.get(1), None); + assert_eq!(vec.get(3), None); + + // Split should have: valid, valid, null, null. + assert_eq!(split.get(0), Some(&3000)); + assert_eq!(split.get(1), Some(&4000)); + assert_eq!(split.get(2), None); + assert_eq!(split.get(3), None); + } +} diff --git a/vortex-vector/src/decimal/mod.rs b/vortex-vector/src/decimal/mod.rs index 53c1ee6e7f2..350c1bcd172 100644 --- a/vortex-vector/src/decimal/mod.rs +++ b/vortex-vector/src/decimal/mod.rs @@ -9,6 +9,8 @@ //! [`DecimalVector`] and [`DecimalVectorMut`] are enums that wrap all of the different possible //! [`DVector`]s. There are several macros defined in this crate to make working with these //! primitive vector types easier. +//! +//! See the documentation for [`DVectorMut`] for more information. mod generic; pub use generic::DVector; diff --git a/vortex-vector/src/primitive/mod.rs b/vortex-vector/src/primitive/mod.rs index c836b290b6b..315b1ae1aa2 100644 --- a/vortex-vector/src/primitive/mod.rs +++ b/vortex-vector/src/primitive/mod.rs @@ -11,6 +11,8 @@ //! [`PVector`]s. There are several macros defined in this crate to make working with these //! primitive vector types easier. //! +//! See the documentation for [`PVectorMut`] for more information. +//! //! [`f16`]: vortex_dtype::half::f16 mod generic;