From 5d9b573056d3b3f0f39279dec36acc0a5645f54e Mon Sep 17 00:00:00 2001 From: Nimalan Date: Sun, 30 Nov 2025 16:55:54 +0530 Subject: [PATCH 01/10] feat: Support log for Decimal32 --- datafusion/functions/src/math/log.rs | 45 +++++++++++++++++-- datafusion/functions/src/utils.rs | 40 ++++++++++++++++- .../sqllogictest/test_files/decimal.slt | 6 +++ 3 files changed, 86 insertions(+), 5 deletions(-) diff --git a/datafusion/functions/src/math/log.rs b/datafusion/functions/src/math/log.rs index 72a9cf4555787..2618845095d2e 100644 --- a/datafusion/functions/src/math/log.rs +++ b/datafusion/functions/src/math/log.rs @@ -21,11 +21,12 @@ use std::any::Any; use super::power::PowerFunc; -use crate::utils::{calculate_binary_math, decimal128_to_i128}; +use crate::utils::{calculate_binary_math, decimal128_to_i128, decimal32_to_f64}; use arrow::array::{Array, ArrayRef}; use arrow::compute::kernels::cast; use arrow::datatypes::{ - DataType, Decimal128Type, Decimal256Type, Float16Type, Float32Type, Float64Type, + DataType, Decimal128Type, Decimal256Type, Decimal32Type, Float16Type, Float32Type, + Float64Type, }; use arrow::error::ArrowError; use arrow_buffer::i256; @@ -102,6 +103,29 @@ impl LogFunc { } } +/// Binary function to calculate logarithm of Decimal32 `value` using `base` base +/// Returns error if base is invalid +fn log_decimal32(value: i32, scale: i8, base: f64) -> Result { + if !base.is_finite() || base.trunc() != base { + return Err(ArrowError::ComputeError(format!( + "Log cannot use non-integer base: {base}" + ))); + } + if (base as u32) < 2 { + return Err(ArrowError::ComputeError(format!( + "Log base must be greater than 1: {base}" + ))); + } + + let unscaled_value = decimal32_to_f64(value, scale)?; + if unscaled_value > 0.0 { + Ok(unscaled_value.log(base)) + } else { + // Reflect f64::log behaviour + Ok(f64::NAN) + } +} + /// Binary function to calculate an integer logarithm of Decimal128 `value` using `base` base /// Returns error if base is invalid fn log_decimal128(value: i128, scale: i8, base: f64) -> Result { @@ -226,8 +250,14 @@ impl ScalarUDFImpl for LogFunc { // TODO: native log support for decimal 32 & 64; right now upcast // to decimal128 to calculate // https://github.com/apache/datafusion/issues/17555 - DataType::Decimal32(precision, scale) - | DataType::Decimal64(precision, scale) => { + DataType::Decimal32(_, scale) => { + calculate_binary_math::( + &value, + &base, + |value, base| log_decimal32(value, *scale, base), + )? + } + DataType::Decimal64(precision, scale) => { calculate_binary_math::( &cast(&value, &DataType::Decimal128(*precision, *scale))?, &base, @@ -369,6 +399,13 @@ mod tests { ); } + #[test] + fn test_log_decimal32_native() { + let value = 1234567; + assert_eq!(log_decimal32(value, 0, 2.0).unwrap(), 20.235573703046512); + assert_eq!(log_decimal32(value, 2, 2.0).unwrap(), 13.591717513271785); + } + #[test] fn test_log_invalid_base_type() { let arg_fields = vec![ diff --git a/datafusion/functions/src/utils.rs b/datafusion/functions/src/utils.rs index c4f15d0cca7f6..8163dd3477245 100644 --- a/datafusion/functions/src/utils.rs +++ b/datafusion/functions/src/utils.rs @@ -15,7 +15,9 @@ // specific language governing permissions and limitations // under the License. -use arrow::array::{Array, ArrayRef, ArrowPrimitiveType, AsArray, PrimitiveArray}; +use arrow::array::{ + Array, ArrayRef, ArrowNativeTypeOp, ArrowPrimitiveType, AsArray, PrimitiveArray, +}; use arrow::compute::try_binary; use arrow::datatypes::{DataType, DecimalType}; use arrow::error::ArrowError; @@ -219,6 +221,15 @@ pub fn decimal128_to_i128(value: i128, scale: i8) -> Result { } } +pub fn decimal32_to_f64(value: i32, scale: i8) -> Result { + if scale == 0 { + Ok(value as f64) + } else { + let divisor = f64::from(10).pow_checked(scale as u32)?; + Ok(value as f64 / divisor) + } +} + #[cfg(test)] pub mod test { /// $FUNC ScalarUDFImpl to test @@ -376,4 +387,31 @@ pub mod test { } } } + + #[test] + fn test_decimal32_to_f64() { + let cases = [ + (123, 0, Some(123.0)), + (1230, 1, Some(123.0)), + (123000, 3, Some(123.0)), + (1234567, 2, Some(12345.67)), + (1, 0, Some(1.0)), + (123, -3, Some(123000.0)), + (i32::MAX, 0, Some(i32::MAX as f64)), + (i32::MAX, 3, Some(i32::MAX as f64 / 1000.0)), + ]; + + for (value, scale, expected) in cases { + match decimal32_to_f64(value, scale) { + Ok(actual) => { + assert_eq!( + actual, + expected.expect("Got value but expected none"), + "{value} and {scale} vs {expected:?}" + ); + } + Err(_) => assert!(expected.is_none()), + } + } + } } diff --git a/datafusion/sqllogictest/test_files/decimal.slt b/datafusion/sqllogictest/test_files/decimal.slt index a6b6dd04889ac..97f5207152a4c 100644 --- a/datafusion/sqllogictest/test_files/decimal.slt +++ b/datafusion/sqllogictest/test_files/decimal.slt @@ -794,6 +794,12 @@ select log(arrow_cast(100, 'Decimal32(9, 2)')); ---- 2 +query R +select log(2.0, arrow_cast(12345.67, 'Decimal32(9, 2)')); +--- +13.591717513271785 + + # log for small decimal64 query R select log(arrow_cast(100, 'Decimal64(18, 0)')); From bd14ace82e3d1ba453eace4c31d0c0a901a4a95a Mon Sep 17 00:00:00 2001 From: Nimalan Date: Sun, 30 Nov 2025 22:17:05 +0530 Subject: [PATCH 02/10] feat: Support log for Decimal64, refactor Decimal32 log code --- datafusion/functions/src/math/log.rs | 66 +++++++++---- datafusion/functions/src/utils.rs | 98 ++++++++++++++++--- .../sqllogictest/test_files/decimal.slt | 9 +- 3 files changed, 136 insertions(+), 37 deletions(-) diff --git a/datafusion/functions/src/math/log.rs b/datafusion/functions/src/math/log.rs index 2618845095d2e..1d8d510cc4c87 100644 --- a/datafusion/functions/src/math/log.rs +++ b/datafusion/functions/src/math/log.rs @@ -21,12 +21,13 @@ use std::any::Any; use super::power::PowerFunc; -use crate::utils::{calculate_binary_math, decimal128_to_i128, decimal32_to_f64}; +use crate::utils::{ + calculate_binary_math, decimal128_to_i128, decimal32_to_f64, decimal64_to_f64, +}; use arrow::array::{Array, ArrayRef}; -use arrow::compute::kernels::cast; use arrow::datatypes::{ - DataType, Decimal128Type, Decimal256Type, Decimal32Type, Float16Type, Float32Type, - Float64Type, + DataType, Decimal128Type, Decimal256Type, Decimal32Type, Decimal64Type, Float16Type, + Float32Type, Float64Type, }; use arrow::error::ArrowError; use arrow_buffer::i256; @@ -105,7 +106,12 @@ impl LogFunc { /// Binary function to calculate logarithm of Decimal32 `value` using `base` base /// Returns error if base is invalid -fn log_decimal32(value: i32, scale: i8, base: f64) -> Result { +fn log_decimal32( + value: i32, + precision: u8, + scale: i8, + base: f64, +) -> Result { if !base.is_finite() || base.trunc() != base { return Err(ArrowError::ComputeError(format!( "Log cannot use non-integer base: {base}" @@ -117,7 +123,35 @@ fn log_decimal32(value: i32, scale: i8, base: f64) -> Result { ))); } - let unscaled_value = decimal32_to_f64(value, scale)?; + let unscaled_value = decimal32_to_f64(value, precision, scale)?; + if unscaled_value > 0.0 { + Ok(unscaled_value.log(base)) + } else { + // Reflect f64::log behaviour + Ok(f64::NAN) + } +} + +/// Binary function to calculate logarithm of Decimal64 `value` using `base` base +/// Returns error if base is invalid +fn log_decimal64( + value: i64, + precision: u8, + scale: i8, + base: f64, +) -> Result { + if !base.is_finite() || base.trunc() != base { + return Err(ArrowError::ComputeError(format!( + "Log cannot use non-integer base: {base}" + ))); + } + if (base as u32) < 2 { + return Err(ArrowError::ComputeError(format!( + "Log base must be greater than 1: {base}" + ))); + } + + let unscaled_value = decimal64_to_f64(value, precision, scale)?; if unscaled_value > 0.0 { Ok(unscaled_value.log(base)) } else { @@ -247,21 +281,18 @@ impl ScalarUDFImpl for LogFunc { |value, base| Ok(value.log(base)), )? } - // TODO: native log support for decimal 32 & 64; right now upcast - // to decimal128 to calculate - // https://github.com/apache/datafusion/issues/17555 - DataType::Decimal32(_, scale) => { + DataType::Decimal32(precision, scale) => { calculate_binary_math::( &value, &base, - |value, base| log_decimal32(value, *scale, base), + |value, base| log_decimal32(value, *precision, *scale, base), )? } DataType::Decimal64(precision, scale) => { - calculate_binary_math::( - &cast(&value, &DataType::Decimal128(*precision, *scale))?, + calculate_binary_math::( + &value, &base, - |value, base| log_decimal128(value, *scale, base), + |value, base| log_decimal64(value, *precision, *scale, base), )? } DataType::Decimal128(_, scale) => { @@ -399,13 +430,6 @@ mod tests { ); } - #[test] - fn test_log_decimal32_native() { - let value = 1234567; - assert_eq!(log_decimal32(value, 0, 2.0).unwrap(), 20.235573703046512); - assert_eq!(log_decimal32(value, 2, 2.0).unwrap(), 13.591717513271785); - } - #[test] fn test_log_invalid_base_type() { let arg_fields = vec![ diff --git a/datafusion/functions/src/utils.rs b/datafusion/functions/src/utils.rs index 8163dd3477245..354bc2758180e 100644 --- a/datafusion/functions/src/utils.rs +++ b/datafusion/functions/src/utils.rs @@ -19,7 +19,9 @@ use arrow::array::{ Array, ArrayRef, ArrowNativeTypeOp, ArrowPrimitiveType, AsArray, PrimitiveArray, }; use arrow::compute::try_binary; -use arrow::datatypes::{DataType, DecimalType}; +use arrow::datatypes::{ + validate_decimal32_precision, validate_decimal64_precision, DataType, DecimalType, +}; use arrow::error::ArrowError; use datafusion_common::{DataFusionError, Result, ScalarValue, not_impl_err}; use datafusion_expr::ColumnarValue; @@ -221,10 +223,35 @@ pub fn decimal128_to_i128(value: i128, scale: i8) -> Result { } } -pub fn decimal32_to_f64(value: i32, scale: i8) -> Result { - if scale == 0 { - Ok(value as f64) +pub fn decimal32_to_f64(value: i32, precision: u8, scale: i8) -> Result { + if scale < 0 { + Err(ArrowError::ComputeError( + "Negative scale is not supported".into(), + )) + } else if scale as u8 > precision { + Err(ArrowError::ComputeError( + "scale {scale} is greater than precision {precision}".into(), + )) } else { + validate_decimal32_precision(value, precision, scale)?; + + let divisor = f64::from(10).pow_checked(scale as u32)?; + Ok(value as f64 / divisor) + } +} + +pub fn decimal64_to_f64(value: i64, precision: u8, scale: i8) -> Result { + if scale < 0 { + Err(ArrowError::ComputeError( + "Negative scale is not supported".into(), + )) + } else if scale as u8 > precision { + Err(ArrowError::ComputeError( + "scale {scale} is greater than precision {precision}".into(), + )) + } else { + validate_decimal64_precision(value, precision, scale)?; + let divisor = f64::from(10).pow_checked(scale as u32)?; Ok(value as f64 / divisor) } @@ -391,18 +418,61 @@ pub mod test { #[test] fn test_decimal32_to_f64() { let cases = [ - (123, 0, Some(123.0)), - (1230, 1, Some(123.0)), - (123000, 3, Some(123.0)), - (1234567, 2, Some(12345.67)), - (1, 0, Some(1.0)), - (123, -3, Some(123000.0)), - (i32::MAX, 0, Some(i32::MAX as f64)), - (i32::MAX, 3, Some(i32::MAX as f64 / 1000.0)), + (123, 7, 0, Some(123.0)), + (1230, 7, 1, Some(123.0)), + (123000, 7, 3, Some(123.0)), + (1234567, 7, 2, Some(12345.67)), + (1, 7, 0, Some(1.0)), + (123, 7, -3, None), + (123, 7, i8::MAX, None), + (999999999, 9, 0, Some(999999999.0)), + (999999999, 9, 3, Some(999999.999)), ]; - for (value, scale, expected) in cases { - match decimal32_to_f64(value, scale) { + for (value, precision, scale, expected) in cases { + match decimal32_to_f64(value, precision, scale) { + Ok(actual) => { + assert_eq!( + actual, + expected.expect("Got value but expected none"), + "{value} and {precision} {scale} vs {expected:?}" + ); + } + Err(_) => assert!(expected.is_none()), + } + } + } + + #[test] + fn test_decimal64_to_f64() { + let cases = [ + (123, 18, 0, Some(123.0)), + (1234567890, 14, 2, Some(12345678.9)), + (-1234567890, 10, 2, Some(-12345678.9)), + (123, 18, -3, None), + (123, 18, i8::MAX, None), + ( + 999999999999999999i64, + 18, + 0, + Some(999999999999999999i64 as f64), + ), + ( + 999999999999999999i64, + 18, + 3, + Some(999999999999999999i64 as f64 / 1000.0), + ), + ( + -999999999999999999i64, + 18, + 3, + Some(-999999999999999999i64 as f64 / 1000.0), + ), + ]; + + for (value, precision, scale, expected) in cases { + match decimal64_to_f64(value, precision, scale) { Ok(actual) => { assert_eq!( actual, diff --git a/datafusion/sqllogictest/test_files/decimal.slt b/datafusion/sqllogictest/test_files/decimal.slt index 97f5207152a4c..1311276275e73 100644 --- a/datafusion/sqllogictest/test_files/decimal.slt +++ b/datafusion/sqllogictest/test_files/decimal.slt @@ -796,10 +796,9 @@ select log(arrow_cast(100, 'Decimal32(9, 2)')); query R select log(2.0, arrow_cast(12345.67, 'Decimal32(9, 2)')); ---- +---- 13.591717513271785 - # log for small decimal64 query R select log(arrow_cast(100, 'Decimal64(18, 0)')); @@ -811,6 +810,12 @@ select log(arrow_cast(100, 'Decimal64(18, 2)')); ---- 2 +query R +select log(2.0, arrow_cast(12345.6789, 'Decimal64(15, 4)')); +---- +13.591718553311024 + + # log for small decimal128 query R select log(arrow_cast(100, 'Decimal128(38, 0)')); From fa5784f2f62c282825b835b8fc3d2dd0c1233cb8 Mon Sep 17 00:00:00 2001 From: Nimalan Date: Mon, 1 Dec 2025 07:52:11 +0530 Subject: [PATCH 03/10] chore: Refactor changes --- datafusion/functions/src/utils.rs | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/datafusion/functions/src/utils.rs b/datafusion/functions/src/utils.rs index 354bc2758180e..079dc42d60790 100644 --- a/datafusion/functions/src/utils.rs +++ b/datafusion/functions/src/utils.rs @@ -229,9 +229,9 @@ pub fn decimal32_to_f64(value: i32, precision: u8, scale: i8) -> Result precision { - Err(ArrowError::ComputeError( - "scale {scale} is greater than precision {precision}".into(), - )) + Err(ArrowError::ComputeError(format!( + "scale {scale} is greater than precision {precision}" + ))) } else { validate_decimal32_precision(value, precision, scale)?; @@ -246,9 +246,9 @@ pub fn decimal64_to_f64(value: i64, precision: u8, scale: i8) -> Result precision { - Err(ArrowError::ComputeError( - "scale {scale} is greater than precision {precision}".into(), - )) + Err(ArrowError::ComputeError(format!( + "scale {scale} is greater than precision {precision}" + ))) } else { validate_decimal64_precision(value, precision, scale)?; @@ -422,6 +422,7 @@ pub mod test { (1230, 7, 1, Some(123.0)), (123000, 7, 3, Some(123.0)), (1234567, 7, 2, Some(12345.67)), + (-1234567, 7, 2, Some(-12345.67)), (1, 7, 0, Some(1.0)), (123, 7, -3, None), (123, 7, i8::MAX, None), From 65ec6cad586520b60efc915a0a1ee6465e66371a Mon Sep 17 00:00:00 2001 From: Nimalan Date: Mon, 1 Dec 2025 15:43:44 +0530 Subject: [PATCH 04/10] fix: Update tests to 12 decimal points --- datafusion/sqllogictest/test_files/decimal.slt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/datafusion/sqllogictest/test_files/decimal.slt b/datafusion/sqllogictest/test_files/decimal.slt index 1311276275e73..e6bdcdc83ce92 100644 --- a/datafusion/sqllogictest/test_files/decimal.slt +++ b/datafusion/sqllogictest/test_files/decimal.slt @@ -797,7 +797,7 @@ select log(arrow_cast(100, 'Decimal32(9, 2)')); query R select log(2.0, arrow_cast(12345.67, 'Decimal32(9, 2)')); ---- -13.591717513271785 +13.591717513272 # log for small decimal64 query R @@ -813,7 +813,7 @@ select log(arrow_cast(100, 'Decimal64(18, 2)')); query R select log(2.0, arrow_cast(12345.6789, 'Decimal64(15, 4)')); ---- -13.591718553311024 +13.591718553311 # log for small decimal128 From 52d86b3e58bf672144f7b410e0c12a0fa05404e9 Mon Sep 17 00:00:00 2001 From: Nimalan Date: Wed, 10 Dec 2025 18:41:02 +0530 Subject: [PATCH 05/10] fix: Perform integer log for decimal32 and decimal64 --- datafusion/functions/src/math/log.rs | 16 +++-- datafusion/functions/src/utils.rs | 65 ++++++++++--------- .../sqllogictest/test_files/decimal.slt | 4 +- 3 files changed, 47 insertions(+), 38 deletions(-) diff --git a/datafusion/functions/src/math/log.rs b/datafusion/functions/src/math/log.rs index 1d8d510cc4c87..2f4483b9c9726 100644 --- a/datafusion/functions/src/math/log.rs +++ b/datafusion/functions/src/math/log.rs @@ -22,7 +22,7 @@ use std::any::Any; use super::power::PowerFunc; use crate::utils::{ - calculate_binary_math, decimal128_to_i128, decimal32_to_f64, decimal64_to_f64, + calculate_binary_math, decimal128_to_i128, decimal32_to_i32, decimal64_to_i64, }; use arrow::array::{Array, ArrayRef}; use arrow::datatypes::{ @@ -123,9 +123,10 @@ fn log_decimal32( ))); } - let unscaled_value = decimal32_to_f64(value, precision, scale)?; - if unscaled_value > 0.0 { - Ok(unscaled_value.log(base)) + let unscaled_value = decimal32_to_i32(value, precision, scale)?; + if unscaled_value > 0 { + let log_value: u32 = unscaled_value.ilog(base as i32); + Ok(log_value as f64) } else { // Reflect f64::log behaviour Ok(f64::NAN) @@ -151,9 +152,10 @@ fn log_decimal64( ))); } - let unscaled_value = decimal64_to_f64(value, precision, scale)?; - if unscaled_value > 0.0 { - Ok(unscaled_value.log(base)) + let unscaled_value = decimal64_to_i64(value, precision, scale)?; + if unscaled_value > 0 { + let log_value: u32 = unscaled_value.ilog(base as i64); + Ok(log_value as f64) } else { // Reflect f64::log behaviour Ok(f64::NAN) diff --git a/datafusion/functions/src/utils.rs b/datafusion/functions/src/utils.rs index 079dc42d60790..58877d6b9df2b 100644 --- a/datafusion/functions/src/utils.rs +++ b/datafusion/functions/src/utils.rs @@ -223,7 +223,7 @@ pub fn decimal128_to_i128(value: i128, scale: i8) -> Result { } } -pub fn decimal32_to_f64(value: i32, precision: u8, scale: i8) -> Result { +pub fn decimal32_to_i32(value: i32, precision: u8, scale: i8) -> Result { if scale < 0 { Err(ArrowError::ComputeError( "Negative scale is not supported".into(), @@ -232,15 +232,21 @@ pub fn decimal32_to_f64(value: i32, precision: u8, scale: i8) -> Result Ok(value / divisor), + None => Err(ArrowError::ComputeError(format!( + "Cannot get a power of {scale}" + ))), + } } } -pub fn decimal64_to_f64(value: i64, precision: u8, scale: i8) -> Result { +pub fn decimal64_to_i64(value: i64, precision: u8, scale: i8) -> Result { if scale < 0 { Err(ArrowError::ComputeError( "Negative scale is not supported".into(), @@ -249,11 +255,17 @@ pub fn decimal64_to_f64(value: i64, precision: u8, scale: i8) -> Result Ok(value / divisor), + None => Err(ArrowError::ComputeError(format!( + "Cannot get a power of {scale}" + ))), + } } } @@ -416,22 +428,22 @@ pub mod test { } #[test] - fn test_decimal32_to_f64() { + fn test_decimal32_to_i32() { let cases = [ - (123, 7, 0, Some(123.0)), - (1230, 7, 1, Some(123.0)), - (123000, 7, 3, Some(123.0)), - (1234567, 7, 2, Some(12345.67)), - (-1234567, 7, 2, Some(-12345.67)), - (1, 7, 0, Some(1.0)), + (123, 7, 0, Some(123)), + (1230, 7, 1, Some(123)), + (123000, 7, 3, Some(123)), + (1234567, 7, 2, Some(12345)), + (-1234567, 7, 2, Some(-12345)), + (1, 7, 0, Some(1)), (123, 7, -3, None), (123, 7, i8::MAX, None), - (999999999, 9, 0, Some(999999999.0)), - (999999999, 9, 3, Some(999999.999)), + (999999999, 9, 0, Some(999999999)), + (999999999, 9, 3, Some(999999)), ]; for (value, precision, scale, expected) in cases { - match decimal32_to_f64(value, precision, scale) { + match decimal32_to_i32(value, precision, scale) { Ok(actual) => { assert_eq!( actual, @@ -445,35 +457,30 @@ pub mod test { } #[test] - fn test_decimal64_to_f64() { + fn test_decimal64_to_i64() { let cases = [ - (123, 18, 0, Some(123.0)), - (1234567890, 14, 2, Some(12345678.9)), - (-1234567890, 10, 2, Some(-12345678.9)), + (123, 18, 0, Some(123)), + (1234567890, 14, 2, Some(12345678)), + (-1234567890, 10, 2, Some(-12345678)), (123, 18, -3, None), (123, 18, i8::MAX, None), - ( - 999999999999999999i64, - 18, - 0, - Some(999999999999999999i64 as f64), - ), + (999999999999999999i64, 18, 0, Some(999999999999999999i64)), ( 999999999999999999i64, 18, 3, - Some(999999999999999999i64 as f64 / 1000.0), + Some(999999999999999999i64 / 1000), ), ( -999999999999999999i64, 18, 3, - Some(-999999999999999999i64 as f64 / 1000.0), + Some(-999999999999999999i64 / 1000), ), ]; for (value, precision, scale, expected) in cases { - match decimal64_to_f64(value, precision, scale) { + match decimal64_to_i64(value, precision, scale) { Ok(actual) => { assert_eq!( actual, diff --git a/datafusion/sqllogictest/test_files/decimal.slt b/datafusion/sqllogictest/test_files/decimal.slt index e6bdcdc83ce92..143cd786ab3c0 100644 --- a/datafusion/sqllogictest/test_files/decimal.slt +++ b/datafusion/sqllogictest/test_files/decimal.slt @@ -797,7 +797,7 @@ select log(arrow_cast(100, 'Decimal32(9, 2)')); query R select log(2.0, arrow_cast(12345.67, 'Decimal32(9, 2)')); ---- -13.591717513272 +13 # log for small decimal64 query R @@ -813,7 +813,7 @@ select log(arrow_cast(100, 'Decimal64(18, 2)')); query R select log(2.0, arrow_cast(12345.6789, 'Decimal64(15, 4)')); ---- -13.591718553311 +13 # log for small decimal128 From 8fb3a8df91d3efdc4e167cdaa6e43eec9545665d Mon Sep 17 00:00:00 2001 From: Nimalan Date: Wed, 10 Dec 2025 20:01:46 +0530 Subject: [PATCH 06/10] lint: Fix lint and fmt issue --- datafusion/functions/src/math/log.rs | 4 ++-- datafusion/functions/src/utils.rs | 8 +++----- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/datafusion/functions/src/math/log.rs b/datafusion/functions/src/math/log.rs index 2f4483b9c9726..1ba381543d3c6 100644 --- a/datafusion/functions/src/math/log.rs +++ b/datafusion/functions/src/math/log.rs @@ -22,11 +22,11 @@ use std::any::Any; use super::power::PowerFunc; use crate::utils::{ - calculate_binary_math, decimal128_to_i128, decimal32_to_i32, decimal64_to_i64, + calculate_binary_math, decimal32_to_i32, decimal64_to_i64, decimal128_to_i128, }; use arrow::array::{Array, ArrayRef}; use arrow::datatypes::{ - DataType, Decimal128Type, Decimal256Type, Decimal32Type, Decimal64Type, Float16Type, + DataType, Decimal32Type, Decimal64Type, Decimal128Type, Decimal256Type, Float16Type, Float32Type, Float64Type, }; use arrow::error::ArrowError; diff --git a/datafusion/functions/src/utils.rs b/datafusion/functions/src/utils.rs index 58877d6b9df2b..11c1e7126c393 100644 --- a/datafusion/functions/src/utils.rs +++ b/datafusion/functions/src/utils.rs @@ -15,12 +15,10 @@ // specific language governing permissions and limitations // under the License. -use arrow::array::{ - Array, ArrayRef, ArrowNativeTypeOp, ArrowPrimitiveType, AsArray, PrimitiveArray, -}; +use arrow::array::{Array, ArrayRef, ArrowPrimitiveType, AsArray, PrimitiveArray}; use arrow::compute::try_binary; use arrow::datatypes::{ - validate_decimal32_precision, validate_decimal64_precision, DataType, DecimalType, + DataType, DecimalType, validate_decimal32_precision, validate_decimal64_precision, }; use arrow::error::ArrowError; use datafusion_common::{DataFusionError, Result, ScalarValue, not_impl_err}; @@ -237,7 +235,7 @@ pub fn decimal32_to_i32(value: i32, precision: u8, scale: i8) -> Result Ok(value / divisor), None => Err(ArrowError::ComputeError(format!( "Cannot get a power of {scale}" From b2eed3ce9e0a148952806bd9acb0921194a5fdda Mon Sep 17 00:00:00 2001 From: Nimalan Date: Tue, 16 Dec 2025 20:02:05 +0530 Subject: [PATCH 07/10] test: Assert on error message --- datafusion/functions/src/utils.rs | 106 +++++++++++++++++++++--------- 1 file changed, 75 insertions(+), 31 deletions(-) diff --git a/datafusion/functions/src/utils.rs b/datafusion/functions/src/utils.rs index 11c1e7126c393..a19370682f540 100644 --- a/datafusion/functions/src/utils.rs +++ b/datafusion/functions/src/utils.rs @@ -18,12 +18,12 @@ use arrow::array::{Array, ArrayRef, ArrowPrimitiveType, AsArray, PrimitiveArray}; use arrow::compute::try_binary; use arrow::datatypes::{ - DataType, DecimalType, validate_decimal32_precision, validate_decimal64_precision, + validate_decimal32_precision, validate_decimal64_precision, DataType, DecimalType, }; use arrow::error::ArrowError; -use datafusion_common::{DataFusionError, Result, ScalarValue, not_impl_err}; -use datafusion_expr::ColumnarValue; +use datafusion_common::{not_impl_err, DataFusionError, Result, ScalarValue}; use datafusion_expr::function::Hint; +use datafusion_expr::ColumnarValue; use std::sync::Arc; /// Creates a function to identify the optimal return type of a string function given @@ -382,6 +382,7 @@ pub mod test { } use arrow::datatypes::DataType; + use itertools::Either; pub(crate) use test_function; use super::*; @@ -427,66 +428,109 @@ pub mod test { #[test] fn test_decimal32_to_i32() { - let cases = [ - (123, 7, 0, Some(123)), - (1230, 7, 1, Some(123)), - (123000, 7, 3, Some(123)), - (1234567, 7, 2, Some(12345)), - (-1234567, 7, 2, Some(-12345)), - (1, 7, 0, Some(1)), - (123, 7, -3, None), - (123, 7, i8::MAX, None), - (999999999, 9, 0, Some(999999999)), - (999999999, 9, 3, Some(999999)), + let cases: [(i32, u8, i8, Either); _] = [ + (123, 7, 0, Either::Left(123)), + (1230, 7, 1, Either::Left(123)), + (123000, 7, 3, Either::Left(123)), + (1234567, 7, 2, Either::Left(12345)), + (-1234567, 7, 2, Either::Left(-12345)), + (1, 7, 0, Either::Left(1)), + ( + 123, + 7, + -3, + Either::Right("Negative scale is not supported".into()), + ), + ( + 123, + 7, + i8::MAX, + Either::Right("scale 127 is greater than precision 7".into()), + ), + (999999999, 9, 0, Either::Left(999999999)), + (999999999, 9, 3, Either::Left(999999)), ]; for (value, precision, scale, expected) in cases { match decimal32_to_i32(value, precision, scale) { Ok(actual) => { + let expected_value = + expected.left().expect("Got value but expected none"); assert_eq!( - actual, - expected.expect("Got value but expected none"), - "{value} and {precision} {scale} vs {expected:?}" + actual, expected_value, + "{value} and {precision} {scale} vs {expected_value:?}" ); } - Err(_) => assert!(expected.is_none()), + Err(ArrowError::ComputeError(msg)) => { + assert_eq!( + msg, + expected.right().expect("Got error but expected value") + ); + } + Err(_) => { + assert!(expected.is_right()) + } } } } #[test] fn test_decimal64_to_i64() { - let cases = [ - (123, 18, 0, Some(123)), - (1234567890, 14, 2, Some(12345678)), - (-1234567890, 10, 2, Some(-12345678)), - (123, 18, -3, None), - (123, 18, i8::MAX, None), - (999999999999999999i64, 18, 0, Some(999999999999999999i64)), + let cases: [(i64, u8, i8, Either); _] = [ + (123, 18, 0, Either::Left(123)), + (1234567890, 14, 2, Either::Left(12345678)), + (-1234567890, 10, 2, Either::Left(-12345678)), + ( + 123, + 18, + -3, + Either::Right("Negative scale is not supported".into()), + ), + ( + 123, + 18, + i8::MAX, + Either::Right("scale 127 is greater than precision 18".into()), + ), + ( + 999999999999999999i64, + 18, + 0, + Either::Left(999999999999999999i64), + ), ( 999999999999999999i64, 18, 3, - Some(999999999999999999i64 / 1000), + Either::Left(999999999999999999i64 / 1000), ), ( -999999999999999999i64, 18, 3, - Some(-999999999999999999i64 / 1000), + Either::Left(-999999999999999999i64 / 1000), ), ]; for (value, precision, scale, expected) in cases { match decimal64_to_i64(value, precision, scale) { Ok(actual) => { + let expected_value = + expected.left().expect("Got value but expected none"); assert_eq!( - actual, - expected.expect("Got value but expected none"), - "{value} and {scale} vs {expected:?}" + actual, expected_value, + "{value} and {precision} {scale} vs {expected_value:?}" ); } - Err(_) => assert!(expected.is_none()), + Err(ArrowError::ComputeError(msg)) => { + assert_eq!( + msg, + expected.right().expect("Got error but expected value") + ); + } + Err(_) => { + assert!(expected.is_right()) + } } } } From 2b78589fe39c6f0dd78b936ffa294bd25fb11a92 Mon Sep 17 00:00:00 2001 From: Nimalan Date: Tue, 16 Dec 2025 20:26:33 +0530 Subject: [PATCH 08/10] refactor: Remove redundant checks --- datafusion/functions/src/math/log.rs | 26 ++++------- datafusion/functions/src/utils.rs | 67 ++++++++++------------------ 2 files changed, 31 insertions(+), 62 deletions(-) diff --git a/datafusion/functions/src/math/log.rs b/datafusion/functions/src/math/log.rs index 1ba381543d3c6..18229fb076ad3 100644 --- a/datafusion/functions/src/math/log.rs +++ b/datafusion/functions/src/math/log.rs @@ -106,12 +106,7 @@ impl LogFunc { /// Binary function to calculate logarithm of Decimal32 `value` using `base` base /// Returns error if base is invalid -fn log_decimal32( - value: i32, - precision: u8, - scale: i8, - base: f64, -) -> Result { +fn log_decimal32(value: i32, scale: i8, base: f64) -> Result { if !base.is_finite() || base.trunc() != base { return Err(ArrowError::ComputeError(format!( "Log cannot use non-integer base: {base}" @@ -123,7 +118,7 @@ fn log_decimal32( ))); } - let unscaled_value = decimal32_to_i32(value, precision, scale)?; + let unscaled_value = decimal32_to_i32(value, scale)?; if unscaled_value > 0 { let log_value: u32 = unscaled_value.ilog(base as i32); Ok(log_value as f64) @@ -135,12 +130,7 @@ fn log_decimal32( /// Binary function to calculate logarithm of Decimal64 `value` using `base` base /// Returns error if base is invalid -fn log_decimal64( - value: i64, - precision: u8, - scale: i8, - base: f64, -) -> Result { +fn log_decimal64(value: i64, scale: i8, base: f64) -> Result { if !base.is_finite() || base.trunc() != base { return Err(ArrowError::ComputeError(format!( "Log cannot use non-integer base: {base}" @@ -152,7 +142,7 @@ fn log_decimal64( ))); } - let unscaled_value = decimal64_to_i64(value, precision, scale)?; + let unscaled_value = decimal64_to_i64(value, scale)?; if unscaled_value > 0 { let log_value: u32 = unscaled_value.ilog(base as i64); Ok(log_value as f64) @@ -283,18 +273,18 @@ impl ScalarUDFImpl for LogFunc { |value, base| Ok(value.log(base)), )? } - DataType::Decimal32(precision, scale) => { + DataType::Decimal32(_, scale) => { calculate_binary_math::( &value, &base, - |value, base| log_decimal32(value, *precision, *scale, base), + |value, base| log_decimal32(value, *scale, base), )? } - DataType::Decimal64(precision, scale) => { + DataType::Decimal64(_, scale) => { calculate_binary_math::( &value, &base, - |value, base| log_decimal64(value, *precision, *scale, base), + |value, base| log_decimal64(value, *scale, base), )? } DataType::Decimal128(_, scale) => { diff --git a/datafusion/functions/src/utils.rs b/datafusion/functions/src/utils.rs index a19370682f540..7c517afc68b47 100644 --- a/datafusion/functions/src/utils.rs +++ b/datafusion/functions/src/utils.rs @@ -17,9 +17,7 @@ use arrow::array::{Array, ArrayRef, ArrowPrimitiveType, AsArray, PrimitiveArray}; use arrow::compute::try_binary; -use arrow::datatypes::{ - validate_decimal32_precision, validate_decimal64_precision, DataType, DecimalType, -}; +use arrow::datatypes::{DataType, DecimalType}; use arrow::error::ArrowError; use datafusion_common::{not_impl_err, DataFusionError, Result, ScalarValue}; use datafusion_expr::function::Hint; @@ -221,20 +219,14 @@ pub fn decimal128_to_i128(value: i128, scale: i8) -> Result { } } -pub fn decimal32_to_i32(value: i32, precision: u8, scale: i8) -> Result { +pub fn decimal32_to_i32(value: i32, scale: i8) -> Result { if scale < 0 { Err(ArrowError::ComputeError( "Negative scale is not supported".into(), )) - } else if scale as u8 > precision { - Err(ArrowError::ComputeError(format!( - "scale {scale} is greater than precision {precision}" - ))) } else if scale == 0 { Ok(value) } else { - validate_decimal32_precision(value, precision, scale)?; - match 10_i32.checked_pow(scale as u32) { Some(divisor) => Ok(value / divisor), None => Err(ArrowError::ComputeError(format!( @@ -244,20 +236,14 @@ pub fn decimal32_to_i32(value: i32, precision: u8, scale: i8) -> Result Result { +pub fn decimal64_to_i64(value: i64, scale: i8) -> Result { if scale < 0 { Err(ArrowError::ComputeError( "Negative scale is not supported".into(), )) - } else if scale as u8 > precision { - Err(ArrowError::ComputeError(format!( - "scale {scale} is greater than precision {precision}" - ))) } else if scale == 0 { Ok(value) } else { - validate_decimal64_precision(value, precision, scale)?; - match i64::from(10).checked_pow(scale as u32) { Some(divisor) => Ok(value / divisor), None => Err(ArrowError::ComputeError(format!( @@ -428,37 +414,35 @@ pub mod test { #[test] fn test_decimal32_to_i32() { - let cases: [(i32, u8, i8, Either); _] = [ - (123, 7, 0, Either::Left(123)), - (1230, 7, 1, Either::Left(123)), - (123000, 7, 3, Either::Left(123)), - (1234567, 7, 2, Either::Left(12345)), - (-1234567, 7, 2, Either::Left(-12345)), - (1, 7, 0, Either::Left(1)), + let cases: [(i32, i8, Either); _] = [ + (123, 0, Either::Left(123)), + (1230, 1, Either::Left(123)), + (123000, 3, Either::Left(123)), + (1234567, 2, Either::Left(12345)), + (-1234567, 2, Either::Left(-12345)), + (1, 0, Either::Left(1)), ( 123, - 7, -3, Either::Right("Negative scale is not supported".into()), ), ( 123, - 7, i8::MAX, - Either::Right("scale 127 is greater than precision 7".into()), + Either::Right("Cannot get a power of 127".into()), ), - (999999999, 9, 0, Either::Left(999999999)), - (999999999, 9, 3, Either::Left(999999)), + (999999999, 0, Either::Left(999999999)), + (999999999, 3, Either::Left(999999)), ]; - for (value, precision, scale, expected) in cases { - match decimal32_to_i32(value, precision, scale) { + for (value, scale, expected) in cases { + match decimal32_to_i32(value, scale) { Ok(actual) => { let expected_value = expected.left().expect("Got value but expected none"); assert_eq!( actual, expected_value, - "{value} and {precision} {scale} vs {expected_value:?}" + "{value} and {scale} vs {expected_value:?}" ); } Err(ArrowError::ComputeError(msg)) => { @@ -476,50 +460,45 @@ pub mod test { #[test] fn test_decimal64_to_i64() { - let cases: [(i64, u8, i8, Either); _] = [ - (123, 18, 0, Either::Left(123)), - (1234567890, 14, 2, Either::Left(12345678)), - (-1234567890, 10, 2, Either::Left(-12345678)), + let cases: [(i64, i8, Either); _] = [ + (123, 0, Either::Left(123)), + (1234567890, 2, Either::Left(12345678)), + (-1234567890, 2, Either::Left(-12345678)), ( 123, - 18, -3, Either::Right("Negative scale is not supported".into()), ), ( 123, - 18, i8::MAX, Either::Right("scale 127 is greater than precision 18".into()), ), ( 999999999999999999i64, - 18, 0, Either::Left(999999999999999999i64), ), ( 999999999999999999i64, - 18, 3, Either::Left(999999999999999999i64 / 1000), ), ( -999999999999999999i64, - 18, 3, Either::Left(-999999999999999999i64 / 1000), ), ]; - for (value, precision, scale, expected) in cases { - match decimal64_to_i64(value, precision, scale) { + for (value, scale, expected) in cases { + match decimal64_to_i64(value, scale) { Ok(actual) => { let expected_value = expected.left().expect("Got value but expected none"); assert_eq!( actual, expected_value, - "{value} and {precision} {scale} vs {expected_value:?}" + "{value} and {scale} vs {expected_value:?}" ); } Err(ArrowError::ComputeError(msg)) => { From 332f0a31e547bbdc8b0d283c354e0e32d5fd6810 Mon Sep 17 00:00:00 2001 From: Nimalan Date: Tue, 16 Dec 2025 20:36:29 +0530 Subject: [PATCH 09/10] lint: Fix formatting issue --- datafusion/functions/src/utils.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/datafusion/functions/src/utils.rs b/datafusion/functions/src/utils.rs index 7c517afc68b47..61f2781530b47 100644 --- a/datafusion/functions/src/utils.rs +++ b/datafusion/functions/src/utils.rs @@ -19,9 +19,9 @@ use arrow::array::{Array, ArrayRef, ArrowPrimitiveType, AsArray, PrimitiveArray} use arrow::compute::try_binary; use arrow::datatypes::{DataType, DecimalType}; use arrow::error::ArrowError; -use datafusion_common::{not_impl_err, DataFusionError, Result, ScalarValue}; -use datafusion_expr::function::Hint; +use datafusion_common::{DataFusionError, Result, ScalarValue, not_impl_err}; use datafusion_expr::ColumnarValue; +use datafusion_expr::function::Hint; use std::sync::Arc; /// Creates a function to identify the optimal return type of a string function given From 0074c03f2e99370ef96b1a56508ae2e05c8eca2e Mon Sep 17 00:00:00 2001 From: Nimalan Date: Tue, 16 Dec 2025 21:15:35 +0530 Subject: [PATCH 10/10] fix: Fix failing test --- datafusion/functions/src/utils.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/functions/src/utils.rs b/datafusion/functions/src/utils.rs index 61f2781530b47..e160eb68d55e2 100644 --- a/datafusion/functions/src/utils.rs +++ b/datafusion/functions/src/utils.rs @@ -472,7 +472,7 @@ pub mod test { ( 123, i8::MAX, - Either::Right("scale 127 is greater than precision 18".into()), + Either::Right("Cannot get a power of 127".into()), ), ( 999999999999999999i64,