Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion arrow/src/array/array_decimal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -352,7 +352,7 @@ impl From<ArrayData> for Decimal256Array {
assert_eq!(
data.buffers().len(),
1,
"Decimal128Array data should contain 1 buffer only (values)"
"Decimal256Array data should contain 1 buffer only (values)"

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fix typo

);
let values = data.buffers()[0].as_ptr();
let (precision, scale) = match data.data_type() {
Expand Down
67 changes: 65 additions & 2 deletions arrow/src/array/builder/decimal_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
// specific language governing permissions and limitations
// under the License.

use num::BigInt;
use std::any::Any;
use std::sync::Arc;

Expand All @@ -25,7 +26,7 @@ use crate::array::{ArrayBuilder, FixedSizeBinaryBuilder};

use crate::error::{ArrowError, Result};

use crate::datatypes::validate_decimal_precision;
use crate::datatypes::{validate_decimal256_precision, validate_decimal_precision};
use crate::util::decimal::{BasicDecimal, Decimal256};

/// Array Builder for [`Decimal128Array`]
Expand All @@ -51,6 +52,10 @@ pub struct Decimal256Builder {
builder: FixedSizeBinaryBuilder,
precision: usize,
scale: usize,

/// Should decimal values be validated for compatibility with scale and precision?
/// defaults to true
value_validation: bool,
}

impl Decimal128Builder {
Expand Down Expand Up @@ -163,14 +168,35 @@ impl Decimal256Builder {
builder: FixedSizeBinaryBuilder::new(capacity, Self::BYTE_LENGTH),
precision,
scale,
value_validation: true,
}
}

/// Disable validation
///
/// # Safety
///
/// After disabling validation, caller must ensure that appended values are compatible
/// for the specified precision and scale.
pub unsafe fn disable_value_validation(&mut self) {
self.value_validation = false;
}

/// Appends a [`Decimal256`] number into the builder.
///
/// Returns an error if `value` has different precision, scale or length in bytes than this builder
#[inline]
pub fn append_value(&mut self, value: &Decimal256) -> Result<()> {
let value = if self.value_validation {
let raw_bytes = value.raw_value();
let integer = BigInt::from_signed_bytes_le(raw_bytes);
let value_str = integer.to_string();
validate_decimal256_precision(&value_str, self.precision)?;
value
} else {
value
};

if self.precision != value.precision() || self.scale != value.scale() {
return Err(ArrowError::InvalidArgumentError(
"Decimal value does not have the same precision or scale as Decimal256Builder".to_string()
Expand Down Expand Up @@ -206,9 +232,10 @@ impl Decimal256Builder {
#[cfg(test)]
mod tests {
use super::*;
use num::Num;

use crate::array::array_decimal::{BasicDecimalArray, Decimal128Array};
use crate::array::Array;
use crate::array::{array_decimal, Array};
use crate::datatypes::DataType;
use crate::util::decimal::Decimal128;

Expand Down Expand Up @@ -298,4 +325,40 @@ mod tests {
let value = Decimal256::try_new_from_bytes(40, 6, bytes.as_slice()).unwrap();
builder.append_value(&value).unwrap();
}

#[test]
#[should_panic(
expected = "9999999999999999999999999999999999999999999999999999999999999999999999999999 is too large to store in a Decimal256 of precision 76. Max is 999999999999999999999999999999999999999999999999999999999999999999999999999"
)]
fn test_decimal256_builder_out_of_range_precision_scale() {
let mut builder = Decimal256Builder::new(30, 76, 6);

let big_value = BigInt::from_str_radix("9999999999999999999999999999999999999999999999999999999999999999999999999999", 10).unwrap();
let bytes = big_value.to_signed_bytes_le();
let value = Decimal256::try_new_from_bytes(76, 6, &bytes).unwrap();
builder.append_value(&value).unwrap();
}

#[test]
#[should_panic(
expected = "9999999999999999999999999999999999999999999999999999999999999999999999999999 is too large to store in a Decimal256 of precision 76. Max is 999999999999999999999999999999999999999999999999999999999999999999999999999"
)]
fn test_decimal256_data_validation() {
let mut builder = Decimal256Builder::new(30, 76, 6);
// Disable validation at builder
unsafe {
builder.disable_value_validation();
}

let big_value = BigInt::from_str_radix("9999999999999999999999999999999999999999999999999999999999999999999999999999", 10).unwrap();
let bytes = big_value.to_signed_bytes_le();
let value = Decimal256::try_new_from_bytes(76, 6, &bytes).unwrap();
builder
.append_value(&value)
.expect("should not validate invalid value at builder");

let array = builder.finish();
let array_data = array_decimal::BasicDecimalArray::data(&array);
array_data.validate_values().unwrap();
}
}
17 changes: 16 additions & 1 deletion arrow/src/array/data.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,18 @@
//! Contains `ArrayData`, a generic representation of Arrow array data which encapsulates
//! common attributes and operations for Arrow array.

use crate::datatypes::{validate_decimal_precision, DataType, IntervalUnit, UnionMode};
use crate::datatypes::{
validate_decimal256_precision, validate_decimal_precision, DataType, IntervalUnit,
UnionMode,
};
use crate::error::{ArrowError, Result};
use crate::{bitmap::Bitmap, datatypes::ArrowNativeType};
use crate::{
buffer::{Buffer, MutableBuffer},
util::bit_util,
};
use half::f16;
use num::BigInt;
use std::convert::TryInto;
use std::mem;
use std::ops::Range;
Expand Down Expand Up @@ -1018,6 +1022,17 @@ impl ArrayData {
}
Ok(())
}
DataType::Decimal256(p, _) => {
let values = self.buffers()[0].as_slice();
for pos in 0..self.len() {
let offset = pos * 32;
let raw_bytes = &values[offset..offset + 32];
let integer = BigInt::from_signed_bytes_le(raw_bytes);
let value_str = integer.to_string();
validate_decimal256_precision(&value_str, *p)?;
}
Ok(())
}
DataType::Utf8 => self.validate_utf8::<i32>(),
DataType::LargeUtf8 => self.validate_utf8::<i64>(),
DataType::Binary => self.validate_offsets_full::<i32>(self.buffers[1].len()),
Expand Down
144 changes: 143 additions & 1 deletion arrow/src/datatypes/datatype.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
// specific language governing permissions and limitations
// under the License.

use num::{BigInt, Num, ToPrimitive};
use std::fmt;

use serde_derive::{Deserialize, Serialize};
Expand Down Expand Up @@ -300,6 +301,49 @@ pub const MAX_DECIMAL_FOR_EACH_PRECISION: [i128; 38] = [
99999999999999999999999999999999999999,
];

/// `MAX_DECIMAL_FOR_LARGER_PRECISION[p]` holds the maximum integer value
/// that can be stored in [DataType::Decimal256] value of precision `p` > 38

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Depends on #2093 which adds DataType::Decimal256

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So this causes doc failure in CI.

@alamb alamb Jul 21, 2022

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I took the liberty of merging the latest apache/master into this PR

pub const MAX_DECIMAL_FOR_LARGER_PRECISION: [&str; 38] = [
"99999999999999999999999999999999999999",
"999999999999999999999999999999999999999",
"9999999999999999999999999999999999999999",
"99999999999999999999999999999999999999999",
"999999999999999999999999999999999999999999",
"9999999999999999999999999999999999999999999",
"99999999999999999999999999999999999999999999",
"999999999999999999999999999999999999999999999",
"9999999999999999999999999999999999999999999999",
"99999999999999999999999999999999999999999999999",
"999999999999999999999999999999999999999999999999",
"9999999999999999999999999999999999999999999999999",
"99999999999999999999999999999999999999999999999999",
"999999999999999999999999999999999999999999999999999",
"9999999999999999999999999999999999999999999999999999",
"99999999999999999999999999999999999999999999999999999",
"999999999999999999999999999999999999999999999999999999",
"9999999999999999999999999999999999999999999999999999999",
"99999999999999999999999999999999999999999999999999999999",
"999999999999999999999999999999999999999999999999999999999",
"9999999999999999999999999999999999999999999999999999999999",
"99999999999999999999999999999999999999999999999999999999999",
"999999999999999999999999999999999999999999999999999999999999",
"9999999999999999999999999999999999999999999999999999999999999",
"99999999999999999999999999999999999999999999999999999999999999",
"999999999999999999999999999999999999999999999999999999999999999",
"9999999999999999999999999999999999999999999999999999999999999999",
"99999999999999999999999999999999999999999999999999999999999999999",
"999999999999999999999999999999999999999999999999999999999999999999",
"9999999999999999999999999999999999999999999999999999999999999999999",
"99999999999999999999999999999999999999999999999999999999999999999999",
"999999999999999999999999999999999999999999999999999999999999999999999",
"9999999999999999999999999999999999999999999999999999999999999999999999",
"99999999999999999999999999999999999999999999999999999999999999999999999",
"999999999999999999999999999999999999999999999999999999999999999999999999",
"9999999999999999999999999999999999999999999999999999999999999999999999999",
"99999999999999999999999999999999999999999999999999999999999999999999999999",
"999999999999999999999999999999999999999999999999999999999999999999999999999",
];

/// `MIN_DECIMAL_FOR_EACH_PRECISION[p]` holds the minimum `i128` value
/// that can be stored in a [DataType::Decimal] value of precision `p`
pub const MIN_DECIMAL_FOR_EACH_PRECISION: [i128; 38] = [
Expand Down Expand Up @@ -343,13 +387,62 @@ pub const MIN_DECIMAL_FOR_EACH_PRECISION: [i128; 38] = [
-99999999999999999999999999999999999999,
];

/// `MIN_DECIMAL_FOR_LARGER_PRECISION[p]` holds the minimum integer value
/// that can be stored in a [DataType::Decimal256] value of precision `p` > 38
pub const MIN_DECIMAL_FOR_LARGER_PRECISION: [&str; 38] = [
"-99999999999999999999999999999999999999",
"-999999999999999999999999999999999999999",
"-9999999999999999999999999999999999999999",
"-99999999999999999999999999999999999999999",
"-999999999999999999999999999999999999999999",
"-9999999999999999999999999999999999999999999",
"-99999999999999999999999999999999999999999999",
"-999999999999999999999999999999999999999999999",
"-9999999999999999999999999999999999999999999999",
"-99999999999999999999999999999999999999999999999",
"-999999999999999999999999999999999999999999999999",
"-9999999999999999999999999999999999999999999999999",
"-99999999999999999999999999999999999999999999999999",
"-999999999999999999999999999999999999999999999999999",
"-9999999999999999999999999999999999999999999999999999",
"-99999999999999999999999999999999999999999999999999999",
"-999999999999999999999999999999999999999999999999999999",
"-9999999999999999999999999999999999999999999999999999999",
"-99999999999999999999999999999999999999999999999999999999",
"-999999999999999999999999999999999999999999999999999999999",
"-9999999999999999999999999999999999999999999999999999999999",
"-99999999999999999999999999999999999999999999999999999999999",
"-999999999999999999999999999999999999999999999999999999999999",
"-9999999999999999999999999999999999999999999999999999999999999",
"-99999999999999999999999999999999999999999999999999999999999999",
"-999999999999999999999999999999999999999999999999999999999999999",
"-9999999999999999999999999999999999999999999999999999999999999999",
"-99999999999999999999999999999999999999999999999999999999999999999",
"-999999999999999999999999999999999999999999999999999999999999999999",
"-9999999999999999999999999999999999999999999999999999999999999999999",
"-99999999999999999999999999999999999999999999999999999999999999999999",
"-999999999999999999999999999999999999999999999999999999999999999999999",
"-9999999999999999999999999999999999999999999999999999999999999999999999",
"-99999999999999999999999999999999999999999999999999999999999999999999999",
"-999999999999999999999999999999999999999999999999999999999999999999999999",
"-9999999999999999999999999999999999999999999999999999999999999999999999999",
"-99999999999999999999999999999999999999999999999999999999999999999999999999",
"-999999999999999999999999999999999999999999999999999999999999999999999999999",
];

/// The maximum precision for [DataType::Decimal] values
pub const DECIMAL_MAX_PRECISION: usize = 38;

/// The maximum scale for [DataType::Decimal] values
pub const DECIMAL_MAX_SCALE: usize = 38;

/// The default scale for [DataType::Decimal] values
/// The maximum precision for [DataType::Decimal256] values
pub const DECIMAL256_MAX_PRECISION: usize = 76;

/// The maximum scale for [DataType::Decimal256] values
pub const DECIMAL256_MAX_SCALE: usize = 76;

/// The default scale for [DataType::Decimal] and [DataType::Decimal256] values
pub const DECIMAL_DEFAULT_SCALE: usize = 10;

/// Validates that the specified `i128` value can be properly
Expand Down Expand Up @@ -379,6 +472,55 @@ pub(crate) fn validate_decimal_precision(value: i128, precision: usize) -> Resul
}
}

/// Validates that the specified string value can be properly
/// interpreted as a Decimal256 number with precision `precision`
#[inline]
pub(crate) fn validate_decimal256_precision(
value: &str,
precision: usize,
) -> Result<BigInt> {
if precision > 38 {
let max_str = MAX_DECIMAL_FOR_LARGER_PRECISION[precision - 38 - 1];
let min_str = MIN_DECIMAL_FOR_LARGER_PRECISION[precision - 38 - 1];

let max = BigInt::from_str_radix(max_str, 10).unwrap();
let min = BigInt::from_str_radix(min_str, 10).unwrap();

let value = BigInt::from_str_radix(value, 10).unwrap();
if value > max {
Err(ArrowError::InvalidArgumentError(format!(
"{} is too large to store in a Decimal256 of precision {}. Max is {}",
value, precision, max
)))
} else if value < min {
Err(ArrowError::InvalidArgumentError(format!(
"{} is too small to store in a Decimal256 of precision {}. Min is {}",
value, precision, min
)))
} else {
Ok(value)
}
} else {
let max = MAX_DECIMAL_FOR_EACH_PRECISION[precision - 1];
let min = MIN_DECIMAL_FOR_EACH_PRECISION[precision - 1];
let value = BigInt::from_str_radix(value, 10).unwrap();

if value.to_i128().unwrap() > max {
Err(ArrowError::InvalidArgumentError(format!(
"{} is too large to store in a Decimal256 of precision {}. Max is {}",
value, precision, max
)))
} else if value.to_i128().unwrap() < min {
Err(ArrowError::InvalidArgumentError(format!(
"{} is too small to store in a Decimal256 of precision {}. Min is {}",
value, precision, min
)))
} else {
Ok(value)
}
}
}

impl DataType {
/// Parse a data type from a JSON representation.
pub(crate) fn from(json: &Value) -> Result<DataType> {
Expand Down