From 314e68264a846935d72dcfaf599ba6a6a8a966c4 Mon Sep 17 00:00:00 2001 From: Kumar Ujjawal Date: Tue, 23 Dec 2025 12:25:40 +0530 Subject: [PATCH 1/3] Decimal128 support for statistical aggregations --- .../expr/src/type_coercion/functions.rs | 12 +- .../src/approx_percentile_cont.rs | 42 +++-- .../src/approx_percentile_cont_with_weight.rs | 45 +++-- .../functions-aggregate/src/correlation.rs | 30 +++- .../functions-aggregate/src/covariance.rs | 30 +++- .../src/percentile_cont.rs | 32 ++-- datafusion/functions-aggregate/src/regr.rs | 19 +- .../sqllogictest/test_files/aggregate.slt | 170 ++++++++++++++++++ 8 files changed, 314 insertions(+), 66 deletions(-) diff --git a/datafusion/expr/src/type_coercion/functions.rs b/datafusion/expr/src/type_coercion/functions.rs index 7be9713f53186..b8733695cb88b 100644 --- a/datafusion/expr/src/type_coercion/functions.rs +++ b/datafusion/expr/src/type_coercion/functions.rs @@ -673,12 +673,12 @@ fn get_valid_types( default_casted_type.default_cast_for(current_type)?; new_types.push(casted_type); } else { - return internal_err!( - "Expect {} but received NativeType::{}, DataType: {}", - param.desired_type(), - current_native_type, - current_type - ); + // No valid coercion for this signature given the current type. + // Return an empty set so the higher-level signature matching + // logic can continue checking other variants (e.g. `OneOf`) + // and/or produce a consistent "Failed to coerce arguments" + // error message. + return Ok(vec![]); } } diff --git a/datafusion/functions-aggregate/src/approx_percentile_cont.rs b/datafusion/functions-aggregate/src/approx_percentile_cont.rs index b1e649ec029ff..bc84b1aa0f5a0 100644 --- a/datafusion/functions-aggregate/src/approx_percentile_cont.rs +++ b/datafusion/functions-aggregate/src/approx_percentile_cont.rs @@ -30,17 +30,17 @@ use arrow::{ }, datatypes::{DataType, Field}, }; +use datafusion_common::types::{NativeType, logical_float64}; use datafusion_common::{ DataFusionError, Result, ScalarValue, downcast_value, internal_err, not_impl_err, plan_err, }; use datafusion_expr::expr::{AggregateFunction, Sort}; use datafusion_expr::function::{AccumulatorArgs, StateFieldsArgs}; -use datafusion_expr::type_coercion::aggregates::{INTEGERS, NUMERICS}; use datafusion_expr::utils::format_state_name; use datafusion_expr::{ - Accumulator, AggregateUDFImpl, Documentation, Expr, Signature, TypeSignature, - Volatility, + Accumulator, AggregateUDFImpl, Coercion, Documentation, Expr, Signature, + TypeSignature, TypeSignatureClass, Volatility, }; use datafusion_functions_aggregate_common::tdigest::{DEFAULT_MAX_SIZE, TDigest}; use datafusion_macros::user_doc; @@ -133,18 +133,30 @@ impl Default for ApproxPercentileCont { impl ApproxPercentileCont { /// Create a new [`ApproxPercentileCont`] aggregate function. pub fn new() -> Self { - let mut variants = Vec::with_capacity(NUMERICS.len() * (INTEGERS.len() + 1)); - // Accept any numeric value paired with a float64 percentile - for num in NUMERICS { - variants.push(TypeSignature::Exact(vec![num.clone(), DataType::Float64])); - // Additionally accept an integer number of centroids for T-Digest - for int in INTEGERS { - variants.push(TypeSignature::Exact(vec![ - num.clone(), - DataType::Float64, - int.clone(), - ])) - } + let percentile_coercion = Coercion::new_implicit( + TypeSignatureClass::Native(logical_float64()), + vec![ + TypeSignatureClass::Integer, + TypeSignatureClass::Float, + TypeSignatureClass::Decimal, + ], + NativeType::Float64, + ); + + // Value must be numeric (excluding Decimal) as the accumulator currently only + // supports integers and floats. Percentile may be any numeric literal that can + // be coerced to Float64 (to support parse_float_as_decimal). + let mut variants = Vec::with_capacity(4); + for value_class in [TypeSignatureClass::Integer, TypeSignatureClass::Float] { + variants.push(TypeSignature::Coercible(vec![ + Coercion::new_exact(value_class.clone()), + percentile_coercion.clone(), + ])); + variants.push(TypeSignature::Coercible(vec![ + Coercion::new_exact(value_class), + percentile_coercion.clone(), + Coercion::new_exact(TypeSignatureClass::Integer), + ])); } Self { signature: Signature::one_of(variants, Volatility::Immutable), diff --git a/datafusion/functions-aggregate/src/approx_percentile_cont_with_weight.rs b/datafusion/functions-aggregate/src/approx_percentile_cont_with_weight.rs index ff7762e816ad6..90a453534e50c 100644 --- a/datafusion/functions-aggregate/src/approx_percentile_cont_with_weight.rs +++ b/datafusion/functions-aggregate/src/approx_percentile_cont_with_weight.rs @@ -25,13 +25,14 @@ use arrow::compute::{and, filter, is_not_null}; use arrow::datatypes::FieldRef; use arrow::{array::ArrayRef, datatypes::DataType}; use datafusion_common::ScalarValue; +use datafusion_common::types::{NativeType, logical_float64}; use datafusion_common::{Result, not_impl_err, plan_err}; use datafusion_expr::Volatility::Immutable; use datafusion_expr::expr::{AggregateFunction, Sort}; use datafusion_expr::function::{AccumulatorArgs, StateFieldsArgs}; -use datafusion_expr::type_coercion::aggregates::{INTEGERS, NUMERICS}; use datafusion_expr::{ - Accumulator, AggregateUDFImpl, Documentation, Expr, Signature, TypeSignature, + Accumulator, AggregateUDFImpl, Coercion, Documentation, Expr, Signature, + TypeSignature, TypeSignatureClass, }; use datafusion_functions_aggregate_common::tdigest::{Centroid, TDigest}; use datafusion_macros::user_doc; @@ -134,21 +135,31 @@ impl Default for ApproxPercentileContWithWeight { impl ApproxPercentileContWithWeight { /// Create a new [`ApproxPercentileContWithWeight`] aggregate function. pub fn new() -> Self { - let mut variants = Vec::with_capacity(NUMERICS.len() * (INTEGERS.len() + 1)); - // Accept any numeric value paired with weight and float64 percentile - for num in NUMERICS { - variants.push(TypeSignature::Exact(vec![ - num.clone(), - num.clone(), - DataType::Float64, - ])); - // Additionally accept an integer number of centroids for T-Digest - for int in INTEGERS { - variants.push(TypeSignature::Exact(vec![ - num.clone(), - num.clone(), - DataType::Float64, - int.clone(), + let percentile_coercion = Coercion::new_implicit( + TypeSignatureClass::Native(logical_float64()), + vec![ + TypeSignatureClass::Integer, + TypeSignatureClass::Float, + TypeSignatureClass::Decimal, + ], + NativeType::Float64, + ); + + let value_classes = [TypeSignatureClass::Integer, TypeSignatureClass::Float]; + let mut variants = + Vec::with_capacity(value_classes.len() * value_classes.len() * 2); + for value_class in value_classes.iter() { + for weight_class in value_classes.iter() { + variants.push(TypeSignature::Coercible(vec![ + Coercion::new_exact(value_class.clone()), + Coercion::new_exact(weight_class.clone()), + percentile_coercion.clone(), + ])); + variants.push(TypeSignature::Coercible(vec![ + Coercion::new_exact(value_class.clone()), + Coercion::new_exact(weight_class.clone()), + percentile_coercion.clone(), + Coercion::new_exact(TypeSignatureClass::Integer), ])); } } diff --git a/datafusion/functions-aggregate/src/correlation.rs b/datafusion/functions-aggregate/src/correlation.rs index 538311dfa2637..459073b8bd574 100644 --- a/datafusion/functions-aggregate/src/correlation.rs +++ b/datafusion/functions-aggregate/src/correlation.rs @@ -26,7 +26,7 @@ use arrow::array::{ Array, AsArray, BooleanArray, Float64Array, NullBufferBuilder, UInt64Array, downcast_array, }; -use arrow::compute::{and, filter, is_not_null}; +use arrow::compute::{and, cast, filter, is_not_null}; use arrow::datatypes::{FieldRef, Float64Type, UInt64Type}; use arrow::{ array::ArrayRef, @@ -40,7 +40,8 @@ use crate::covariance::CovarianceAccumulator; use crate::stddev::StddevAccumulator; use datafusion_common::{Result, ScalarValue}; use datafusion_expr::{ - Accumulator, AggregateUDFImpl, Documentation, Signature, Volatility, + Accumulator, AggregateUDFImpl, Coercion, Documentation, Signature, + TypeSignatureClass, Volatility, function::{AccumulatorArgs, StateFieldsArgs}, utils::format_state_name, }; @@ -85,8 +86,11 @@ impl Correlation { /// Create a new CORR aggregate function pub fn new() -> Self { Self { - signature: Signature::exact( - vec![DataType::Float64, DataType::Float64], + signature: Signature::coercible( + vec![ + Coercion::new_exact(TypeSignatureClass::Numeric), + Coercion::new_exact(TypeSignatureClass::Numeric), + ], Volatility::Immutable, ) .with_parameter_names(vec!["y".to_string(), "x".to_string()]) @@ -388,8 +392,22 @@ impl GroupsAccumulator for CorrelationGroupsAccumulator { self.sum_xx.resize(total_num_groups, 0.0); self.sum_yy.resize(total_num_groups, 0.0); - let array_x = downcast_array::(&values[0]); - let array_y = downcast_array::(&values[1]); + // Correlation computations use Float64 internally. Cast input arrays so this + // GroupsAccumulator can operate on any numeric input types (including Float32 + // and Decimal) without relying on signature-level coercion. + let array_x = if values[0].data_type() == &DataType::Float64 { + Arc::clone(&values[0]) + } else { + cast(&values[0], &DataType::Float64)? + }; + let array_y = if values[1].data_type() == &DataType::Float64 { + Arc::clone(&values[1]) + } else { + cast(&values[1], &DataType::Float64)? + }; + + let array_x = downcast_array::(&array_x); + let array_y = downcast_array::(&array_y); accumulate_multiple( group_indices, diff --git a/datafusion/functions-aggregate/src/covariance.rs b/datafusion/functions-aggregate/src/covariance.rs index e86d742db3d45..1d34c27267989 100644 --- a/datafusion/functions-aggregate/src/covariance.rs +++ b/datafusion/functions-aggregate/src/covariance.rs @@ -27,9 +27,9 @@ use datafusion_common::{ Result, ScalarValue, downcast_value, plan_err, unwrap_or_internal_err, }; use datafusion_expr::{ - Accumulator, AggregateUDFImpl, Documentation, Signature, Volatility, + Accumulator, AggregateUDFImpl, Coercion, Documentation, Signature, + TypeSignatureClass, Volatility, function::{AccumulatorArgs, StateFieldsArgs}, - type_coercion::aggregates::NUMERICS, utils::format_state_name, }; use datafusion_functions_aggregate_common::stats::StatsType; @@ -94,7 +94,13 @@ impl CovarianceSample { pub fn new() -> Self { Self { aliases: vec![String::from("covar")], - signature: Signature::uniform(2, NUMERICS.to_vec(), Volatility::Immutable), + signature: Signature::coercible( + vec![ + Coercion::new_exact(TypeSignatureClass::Numeric), + Coercion::new_exact(TypeSignatureClass::Numeric), + ], + Volatility::Immutable, + ), } } } @@ -113,7 +119,10 @@ impl AggregateUDFImpl for CovarianceSample { } fn return_type(&self, arg_types: &[DataType]) -> Result { - if !arg_types[0].is_numeric() { + let is_numeric_or_null = + |dt: &DataType| dt.is_numeric() || matches!(dt, DataType::Null); + + if !is_numeric_or_null(&arg_types[0]) || !is_numeric_or_null(&arg_types[1]) { return plan_err!("Covariance requires numeric input types"); } @@ -188,7 +197,13 @@ impl Default for CovariancePopulation { impl CovariancePopulation { pub fn new() -> Self { Self { - signature: Signature::uniform(2, NUMERICS.to_vec(), Volatility::Immutable), + signature: Signature::coercible( + vec![ + Coercion::new_exact(TypeSignatureClass::Numeric), + Coercion::new_exact(TypeSignatureClass::Numeric), + ], + Volatility::Immutable, + ), } } } @@ -207,7 +222,10 @@ impl AggregateUDFImpl for CovariancePopulation { } fn return_type(&self, arg_types: &[DataType]) -> Result { - if !arg_types[0].is_numeric() { + let is_numeric_or_null = + |dt: &DataType| dt.is_numeric() || matches!(dt, DataType::Null); + + if !is_numeric_or_null(&arg_types[0]) || !is_numeric_or_null(&arg_types[1]) { return plan_err!("Covariance requires numeric input types"); } diff --git a/datafusion/functions-aggregate/src/percentile_cont.rs b/datafusion/functions-aggregate/src/percentile_cont.rs index d6c8eabb459e6..6bc6ff2ac1a00 100644 --- a/datafusion/functions-aggregate/src/percentile_cont.rs +++ b/datafusion/functions-aggregate/src/percentile_cont.rs @@ -34,15 +34,15 @@ use arrow::{ use arrow::array::ArrowNativeTypeOp; use crate::min_max::{max_udaf, min_udaf}; +use datafusion_common::types::{NativeType, logical_float64}; use datafusion_common::{ DataFusionError, Result, ScalarValue, assert_eq_or_internal_err, internal_datafusion_err, plan_err, utils::take_function_args, }; -use datafusion_expr::type_coercion::aggregates::NUMERICS; use datafusion_expr::utils::format_state_name; use datafusion_expr::{ - Accumulator, AggregateUDFImpl, Documentation, Expr, Signature, TypeSignature, - Volatility, + Accumulator, AggregateUDFImpl, Coercion, Documentation, Expr, Signature, + TypeSignatureClass, Volatility, }; use datafusion_expr::{EmitTo, GroupsAccumulator}; use datafusion_expr::{ @@ -144,15 +144,25 @@ impl Default for PercentileCont { impl PercentileCont { pub fn new() -> Self { - let mut variants = Vec::with_capacity(NUMERICS.len()); - // Accept any numeric value paired with a float64 percentile - for num in NUMERICS { - variants.push(TypeSignature::Exact(vec![num.clone(), DataType::Float64])); - } + let percentile_coercion = Coercion::new_implicit( + TypeSignatureClass::Native(logical_float64()), + vec![ + TypeSignatureClass::Integer, + TypeSignatureClass::Float, + TypeSignatureClass::Decimal, + ], + NativeType::Float64, + ); Self { - signature: Signature::one_of(variants, Volatility::Immutable) - .with_parameter_names(vec!["expr".to_string(), "percentile".to_string()]) - .expect("valid parameter names for percentile_cont"), + signature: Signature::coercible( + vec![ + Coercion::new_exact(TypeSignatureClass::Numeric), + percentile_coercion, + ], + Volatility::Immutable, + ) + .with_parameter_names(vec!["expr".to_string(), "percentile".to_string()]) + .expect("valid parameter names for percentile_cont"), aliases: vec![String::from("quantile_cont")], } } diff --git a/datafusion/functions-aggregate/src/regr.rs b/datafusion/functions-aggregate/src/regr.rs index bbc5567dab9d6..b343b194aafdf 100644 --- a/datafusion/functions-aggregate/src/regr.rs +++ b/datafusion/functions-aggregate/src/regr.rs @@ -30,10 +30,10 @@ use datafusion_common::{ }; use datafusion_doc::aggregate_doc_sections::DOC_SECTION_STATISTICAL; use datafusion_expr::function::{AccumulatorArgs, StateFieldsArgs}; -use datafusion_expr::type_coercion::aggregates::NUMERICS; use datafusion_expr::utils::format_state_name; use datafusion_expr::{ - Accumulator, AggregateUDFImpl, Documentation, Signature, Volatility, + Accumulator, AggregateUDFImpl, Coercion, Documentation, Signature, + TypeSignatureClass, Volatility, }; use std::any::Any; use std::fmt::Debug; @@ -77,7 +77,13 @@ impl Debug for Regr { impl Regr { pub fn new(regr_type: RegrType, func_name: &'static str) -> Self { Self { - signature: Signature::uniform(2, NUMERICS.to_vec(), Volatility::Immutable), + signature: Signature::coercible( + vec![ + Coercion::new_exact(TypeSignatureClass::Numeric), + Coercion::new_exact(TypeSignatureClass::Numeric), + ], + Volatility::Immutable, + ), regr_type, func_name, } @@ -469,8 +475,11 @@ impl AggregateUDFImpl for Regr { } fn return_type(&self, arg_types: &[DataType]) -> Result { - if !arg_types[0].is_numeric() { - return plan_err!("Covariance requires numeric input types"); + let is_numeric_or_null = + |dt: &DataType| dt.is_numeric() || matches!(dt, DataType::Null); + + if !is_numeric_or_null(&arg_types[0]) || !is_numeric_or_null(&arg_types[1]) { + return plan_err!("{} requires numeric input types", self.func_name); } if matches!(self.regr_type, RegrType::Count) { diff --git a/datafusion/sqllogictest/test_files/aggregate.slt b/datafusion/sqllogictest/test_files/aggregate.slt index a3fab065dc097..42b011e17445f 100644 --- a/datafusion/sqllogictest/test_files/aggregate.slt +++ b/datafusion/sqllogictest/test_files/aggregate.slt @@ -754,6 +754,72 @@ select stddev(sq.column1) from (values (1.1), (2.0), (3.0)) as sq ---- 0.950438495292 +# csv_query_stddev_6_decimal +query R +select stddev(sq.column1) from (values (cast(1.1 as decimal(10,1))), (cast(2.0 as decimal(10,1))), (cast(3.0 as decimal(10,1)))) as sq +---- +0.950438495292 + +# csv_query_variance_6_decimal +query R +select var(sq.column1) from (values (cast(1.1 as decimal(10,1))), (cast(2.0 as decimal(10,1))), (cast(3.0 as decimal(10,1)))) as sq +---- +0.903333333333 + +# csv_query_variance_pop_6_decimal +query R +select var_pop(sq.column1) from (values (cast(1.1 as decimal(10,1))), (cast(2.0 as decimal(10,1))), (cast(3.0 as decimal(10,1)))) as sq +---- +0.602222222222 + +# csv_query_stddev_6_decimal_with_nulls +query R +select stddev(sq.column1) from (values (cast(1.1 as decimal(10,1))), (cast(null as decimal(10,1))), (cast(2.0 as decimal(10,1))), (cast(3.0 as decimal(10,1)))) as sq +---- +0.950438495292 + +# csv_query_variance_6_decimal_with_nulls +query R +select var(sq.column1) from (values (cast(1.1 as decimal(10,1))), (cast(null as decimal(10,1))), (cast(2.0 as decimal(10,1))), (cast(3.0 as decimal(10,1)))) as sq +---- +0.903333333333 + +# csv_query_variance_pop_6_decimal_with_nulls +query R +select var_pop(sq.column1) from (values (cast(1.1 as decimal(10,1))), (cast(null as decimal(10,1))), (cast(2.0 as decimal(10,1))), (cast(3.0 as decimal(10,1)))) as sq +---- +0.602222222222 + +# variance_decimal_max_precision +statement ok +create table t_decimal_max_precision (c decimal(38, 0)) as values + (cast('99999999999999999999999999999999999999' as decimal(38, 0))), + (cast('99999999999999999999999999999999999999' as decimal(38, 0))), + (cast('99999999999999999999999999999999999999' as decimal(38, 0))); + +query RT +select var_pop(c), arrow_typeof(var_pop(c)) from t_decimal_max_precision; +---- +0 Float64 + +query RT +select stddev_pop(c), arrow_typeof(stddev_pop(c)) from t_decimal_max_precision; +---- +0 Float64 + +query RT +select var(c), arrow_typeof(var(c)) from t_decimal_max_precision; +---- +0 Float64 + +query RT +select stddev(c), arrow_typeof(stddev(c)) from t_decimal_max_precision; +---- +0 Float64 + +statement ok +drop table t_decimal_max_precision; + # csv_query_stddev_7 query IR SELECT c2, stddev_samp(c12) FROM aggregate_test_100 GROUP BY c2 ORDER BY c2 @@ -2084,6 +2150,16 @@ c 122 d 124 e 115 +# csv_query_approx_percentile_cont_with_weight_decimal_percentile +query TI +SELECT c1, approx_percentile_cont_with_weight(1, cast(0.95 as decimal(10,2))) WITHIN GROUP (ORDER BY c3) AS c3_p95 FROM aggregate_test_100 GROUP BY 1 ORDER BY 1 +---- +a 73 +b 68 +c 122 +d 124 +e 115 + # csv_query_approx_percentile_cont_with_weight alternate syntax query TI SELECT c1, approx_percentile_cont_with_weight(c3, 1, 0.95) AS c3_p95 FROM aggregate_test_100 GROUP BY 1 ORDER BY 1 @@ -2900,6 +2976,78 @@ select corr(c1, c2), arrow_typeof(corr(c1, c2)) from t; ---- 1 Float64 +# covariance_decimal_1 +statement ok +create table t_decimal_stats (c1 decimal(10,2), c2 decimal(10,2)) as values (1, 4), (2, 5), (3, 6); + +query RT +select covar_pop(c1, c2), arrow_typeof(covar_pop(c1, c2)) from t_decimal_stats; +---- +0.666666666667 Float64 + +query RT +select covar_samp(c1, c2), arrow_typeof(covar_samp(c1, c2)) from t_decimal_stats; +---- +1 Float64 + +query RT rowsort +select corr(c1, c2), arrow_typeof(corr(c1, c2)) from t_decimal_stats; +---- +1 Float64 + +statement ok +drop table t_decimal_stats; + +# covariance_decimal_with_nulls +statement ok +create table t_decimal_stats_nulls (f decimal(10,2), b decimal(10,2)) as values + (1, 4), + (null, 99), + (2, 5), + (98, null), + (3, 6), + (null, null); + +query RR +select covar_samp(f, b), covar_pop(f, b) from t_decimal_stats_nulls; +---- +1 0.666666666667 + +query R +select corr(f, b) from t_decimal_stats_nulls; +---- +1 + +statement ok +drop table t_decimal_stats_nulls; + +# covariance_mixed_decimal_float +statement ok +create table t_decimal_float_mix (x decimal(10,2), y double) as values (1, 4), (2, 5), (3, 6); + +query RT +select covar_pop(x, y), arrow_typeof(covar_pop(x, y)) from t_decimal_float_mix; +---- +0.666666666667 Float64 + +query RT +select covar_samp(x, y), arrow_typeof(covar_samp(x, y)) from t_decimal_float_mix; +---- +1 Float64 + +query RT rowsort +select corr(x, y), arrow_typeof(corr(x, y)) from t_decimal_float_mix; +---- +1 Float64 + +query RR +select regr_slope(y, x), regr_intercept(y, x) from t_decimal_float_mix; +---- +1 3 + +statement ok +drop table t_decimal_float_mix; + # correlation with different numeric types (create test data) statement ok CREATE OR REPLACE TABLE corr_test( @@ -3569,6 +3717,12 @@ SELECT percentile_cont(0.5) WITHIN GROUP (ORDER BY c2) FROM aggregate_test_100 ---- 3 +# percentile_cont_decimal_percentile +query R +SELECT percentile_cont(cast(0.5 as decimal(10,2))) WITHIN GROUP (ORDER BY c2) FROM aggregate_test_100 +---- +3 + query R SELECT percentile_cont(0.0) WITHIN GROUP (ORDER BY c2) FROM aggregate_test_100 ---- @@ -6468,6 +6622,22 @@ from (values (1,2), (2,4), (3,6)); ---- 2 0 3 1 2 4 2 8 4 +# regr_*() decimal input +query RRIRRRRRR +select + regr_slope(cast(column2 as decimal(10,2)), cast(column1 as decimal(10,2))), + regr_intercept(cast(column2 as decimal(10,2)), cast(column1 as decimal(10,2))), + regr_count(cast(column2 as decimal(10,2)), cast(column1 as decimal(10,2))), + regr_r2(cast(column2 as decimal(10,2)), cast(column1 as decimal(10,2))), + regr_avgx(cast(column2 as decimal(10,2)), cast(column1 as decimal(10,2))), + regr_avgy(cast(column2 as decimal(10,2)), cast(column1 as decimal(10,2))), + regr_sxx(cast(column2 as decimal(10,2)), cast(column1 as decimal(10,2))), + regr_syy(cast(column2 as decimal(10,2)), cast(column1 as decimal(10,2))), + regr_sxy(cast(column2 as decimal(10,2)), cast(column1 as decimal(10,2))) +from (values (1,2), (2,4), (3,6)); +---- +2 0 3 1 2 4 2 8 4 + query RRIRRRRRR select regr_slope(c12, c11), From f8be080585645cbc0e6d6b2854a96d9d524cef58 Mon Sep 17 00:00:00 2001 From: Kumar Ujjawal Date: Tue, 23 Dec 2025 16:47:41 +0530 Subject: [PATCH 2/3] revert changes to functions expr --- datafusion/expr/src/type_coercion/functions.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/datafusion/expr/src/type_coercion/functions.rs b/datafusion/expr/src/type_coercion/functions.rs index b8733695cb88b..7be9713f53186 100644 --- a/datafusion/expr/src/type_coercion/functions.rs +++ b/datafusion/expr/src/type_coercion/functions.rs @@ -673,12 +673,12 @@ fn get_valid_types( default_casted_type.default_cast_for(current_type)?; new_types.push(casted_type); } else { - // No valid coercion for this signature given the current type. - // Return an empty set so the higher-level signature matching - // logic can continue checking other variants (e.g. `OneOf`) - // and/or produce a consistent "Failed to coerce arguments" - // error message. - return Ok(vec![]); + return internal_err!( + "Expect {} but received NativeType::{}, DataType: {}", + param.desired_type(), + current_native_type, + current_type + ); } } From fb87732a713fdf79c73967e52d41a638dd09b572 Mon Sep 17 00:00:00 2001 From: Kumar Ujjawal Date: Tue, 23 Dec 2025 23:25:01 +0530 Subject: [PATCH 3/3] refactor and revert for covariance --- .../src/approx_percentile_cont.rs | 42 ++-- .../src/approx_percentile_cont_with_weight.rs | 45 ++-- .../functions-aggregate/src/correlation.rs | 30 +-- .../functions-aggregate/src/covariance.rs | 29 +-- .../src/percentile_cont.rs | 32 +-- datafusion/functions-aggregate/src/regr.rs | 19 +- .../sqllogictest/test_files/aggregate.slt | 222 ++++-------------- 7 files changed, 111 insertions(+), 308 deletions(-) diff --git a/datafusion/functions-aggregate/src/approx_percentile_cont.rs b/datafusion/functions-aggregate/src/approx_percentile_cont.rs index bc84b1aa0f5a0..b1e649ec029ff 100644 --- a/datafusion/functions-aggregate/src/approx_percentile_cont.rs +++ b/datafusion/functions-aggregate/src/approx_percentile_cont.rs @@ -30,17 +30,17 @@ use arrow::{ }, datatypes::{DataType, Field}, }; -use datafusion_common::types::{NativeType, logical_float64}; use datafusion_common::{ DataFusionError, Result, ScalarValue, downcast_value, internal_err, not_impl_err, plan_err, }; use datafusion_expr::expr::{AggregateFunction, Sort}; use datafusion_expr::function::{AccumulatorArgs, StateFieldsArgs}; +use datafusion_expr::type_coercion::aggregates::{INTEGERS, NUMERICS}; use datafusion_expr::utils::format_state_name; use datafusion_expr::{ - Accumulator, AggregateUDFImpl, Coercion, Documentation, Expr, Signature, - TypeSignature, TypeSignatureClass, Volatility, + Accumulator, AggregateUDFImpl, Documentation, Expr, Signature, TypeSignature, + Volatility, }; use datafusion_functions_aggregate_common::tdigest::{DEFAULT_MAX_SIZE, TDigest}; use datafusion_macros::user_doc; @@ -133,30 +133,18 @@ impl Default for ApproxPercentileCont { impl ApproxPercentileCont { /// Create a new [`ApproxPercentileCont`] aggregate function. pub fn new() -> Self { - let percentile_coercion = Coercion::new_implicit( - TypeSignatureClass::Native(logical_float64()), - vec![ - TypeSignatureClass::Integer, - TypeSignatureClass::Float, - TypeSignatureClass::Decimal, - ], - NativeType::Float64, - ); - - // Value must be numeric (excluding Decimal) as the accumulator currently only - // supports integers and floats. Percentile may be any numeric literal that can - // be coerced to Float64 (to support parse_float_as_decimal). - let mut variants = Vec::with_capacity(4); - for value_class in [TypeSignatureClass::Integer, TypeSignatureClass::Float] { - variants.push(TypeSignature::Coercible(vec![ - Coercion::new_exact(value_class.clone()), - percentile_coercion.clone(), - ])); - variants.push(TypeSignature::Coercible(vec![ - Coercion::new_exact(value_class), - percentile_coercion.clone(), - Coercion::new_exact(TypeSignatureClass::Integer), - ])); + let mut variants = Vec::with_capacity(NUMERICS.len() * (INTEGERS.len() + 1)); + // Accept any numeric value paired with a float64 percentile + for num in NUMERICS { + variants.push(TypeSignature::Exact(vec![num.clone(), DataType::Float64])); + // Additionally accept an integer number of centroids for T-Digest + for int in INTEGERS { + variants.push(TypeSignature::Exact(vec![ + num.clone(), + DataType::Float64, + int.clone(), + ])) + } } Self { signature: Signature::one_of(variants, Volatility::Immutable), diff --git a/datafusion/functions-aggregate/src/approx_percentile_cont_with_weight.rs b/datafusion/functions-aggregate/src/approx_percentile_cont_with_weight.rs index 90a453534e50c..ff7762e816ad6 100644 --- a/datafusion/functions-aggregate/src/approx_percentile_cont_with_weight.rs +++ b/datafusion/functions-aggregate/src/approx_percentile_cont_with_weight.rs @@ -25,14 +25,13 @@ use arrow::compute::{and, filter, is_not_null}; use arrow::datatypes::FieldRef; use arrow::{array::ArrayRef, datatypes::DataType}; use datafusion_common::ScalarValue; -use datafusion_common::types::{NativeType, logical_float64}; use datafusion_common::{Result, not_impl_err, plan_err}; use datafusion_expr::Volatility::Immutable; use datafusion_expr::expr::{AggregateFunction, Sort}; use datafusion_expr::function::{AccumulatorArgs, StateFieldsArgs}; +use datafusion_expr::type_coercion::aggregates::{INTEGERS, NUMERICS}; use datafusion_expr::{ - Accumulator, AggregateUDFImpl, Coercion, Documentation, Expr, Signature, - TypeSignature, TypeSignatureClass, + Accumulator, AggregateUDFImpl, Documentation, Expr, Signature, TypeSignature, }; use datafusion_functions_aggregate_common::tdigest::{Centroid, TDigest}; use datafusion_macros::user_doc; @@ -135,31 +134,21 @@ impl Default for ApproxPercentileContWithWeight { impl ApproxPercentileContWithWeight { /// Create a new [`ApproxPercentileContWithWeight`] aggregate function. pub fn new() -> Self { - let percentile_coercion = Coercion::new_implicit( - TypeSignatureClass::Native(logical_float64()), - vec![ - TypeSignatureClass::Integer, - TypeSignatureClass::Float, - TypeSignatureClass::Decimal, - ], - NativeType::Float64, - ); - - let value_classes = [TypeSignatureClass::Integer, TypeSignatureClass::Float]; - let mut variants = - Vec::with_capacity(value_classes.len() * value_classes.len() * 2); - for value_class in value_classes.iter() { - for weight_class in value_classes.iter() { - variants.push(TypeSignature::Coercible(vec![ - Coercion::new_exact(value_class.clone()), - Coercion::new_exact(weight_class.clone()), - percentile_coercion.clone(), - ])); - variants.push(TypeSignature::Coercible(vec![ - Coercion::new_exact(value_class.clone()), - Coercion::new_exact(weight_class.clone()), - percentile_coercion.clone(), - Coercion::new_exact(TypeSignatureClass::Integer), + let mut variants = Vec::with_capacity(NUMERICS.len() * (INTEGERS.len() + 1)); + // Accept any numeric value paired with weight and float64 percentile + for num in NUMERICS { + variants.push(TypeSignature::Exact(vec![ + num.clone(), + num.clone(), + DataType::Float64, + ])); + // Additionally accept an integer number of centroids for T-Digest + for int in INTEGERS { + variants.push(TypeSignature::Exact(vec![ + num.clone(), + num.clone(), + DataType::Float64, + int.clone(), ])); } } diff --git a/datafusion/functions-aggregate/src/correlation.rs b/datafusion/functions-aggregate/src/correlation.rs index 459073b8bd574..538311dfa2637 100644 --- a/datafusion/functions-aggregate/src/correlation.rs +++ b/datafusion/functions-aggregate/src/correlation.rs @@ -26,7 +26,7 @@ use arrow::array::{ Array, AsArray, BooleanArray, Float64Array, NullBufferBuilder, UInt64Array, downcast_array, }; -use arrow::compute::{and, cast, filter, is_not_null}; +use arrow::compute::{and, filter, is_not_null}; use arrow::datatypes::{FieldRef, Float64Type, UInt64Type}; use arrow::{ array::ArrayRef, @@ -40,8 +40,7 @@ use crate::covariance::CovarianceAccumulator; use crate::stddev::StddevAccumulator; use datafusion_common::{Result, ScalarValue}; use datafusion_expr::{ - Accumulator, AggregateUDFImpl, Coercion, Documentation, Signature, - TypeSignatureClass, Volatility, + Accumulator, AggregateUDFImpl, Documentation, Signature, Volatility, function::{AccumulatorArgs, StateFieldsArgs}, utils::format_state_name, }; @@ -86,11 +85,8 @@ impl Correlation { /// Create a new CORR aggregate function pub fn new() -> Self { Self { - signature: Signature::coercible( - vec![ - Coercion::new_exact(TypeSignatureClass::Numeric), - Coercion::new_exact(TypeSignatureClass::Numeric), - ], + signature: Signature::exact( + vec![DataType::Float64, DataType::Float64], Volatility::Immutable, ) .with_parameter_names(vec!["y".to_string(), "x".to_string()]) @@ -392,22 +388,8 @@ impl GroupsAccumulator for CorrelationGroupsAccumulator { self.sum_xx.resize(total_num_groups, 0.0); self.sum_yy.resize(total_num_groups, 0.0); - // Correlation computations use Float64 internally. Cast input arrays so this - // GroupsAccumulator can operate on any numeric input types (including Float32 - // and Decimal) without relying on signature-level coercion. - let array_x = if values[0].data_type() == &DataType::Float64 { - Arc::clone(&values[0]) - } else { - cast(&values[0], &DataType::Float64)? - }; - let array_y = if values[1].data_type() == &DataType::Float64 { - Arc::clone(&values[1]) - } else { - cast(&values[1], &DataType::Float64)? - }; - - let array_x = downcast_array::(&array_x); - let array_y = downcast_array::(&array_y); + let array_x = downcast_array::(&values[0]); + let array_y = downcast_array::(&values[1]); accumulate_multiple( group_indices, diff --git a/datafusion/functions-aggregate/src/covariance.rs b/datafusion/functions-aggregate/src/covariance.rs index 1d34c27267989..ddaca918ad1cc 100644 --- a/datafusion/functions-aggregate/src/covariance.rs +++ b/datafusion/functions-aggregate/src/covariance.rs @@ -27,8 +27,7 @@ use datafusion_common::{ Result, ScalarValue, downcast_value, plan_err, unwrap_or_internal_err, }; use datafusion_expr::{ - Accumulator, AggregateUDFImpl, Coercion, Documentation, Signature, - TypeSignatureClass, Volatility, + Accumulator, AggregateUDFImpl, Documentation, Signature, Volatility, function::{AccumulatorArgs, StateFieldsArgs}, utils::format_state_name, }; @@ -94,13 +93,7 @@ impl CovarianceSample { pub fn new() -> Self { Self { aliases: vec![String::from("covar")], - signature: Signature::coercible( - vec![ - Coercion::new_exact(TypeSignatureClass::Numeric), - Coercion::new_exact(TypeSignatureClass::Numeric), - ], - Volatility::Immutable, - ), + signature: Signature::numeric(2, Volatility::Immutable), } } } @@ -119,10 +112,7 @@ impl AggregateUDFImpl for CovarianceSample { } fn return_type(&self, arg_types: &[DataType]) -> Result { - let is_numeric_or_null = - |dt: &DataType| dt.is_numeric() || matches!(dt, DataType::Null); - - if !is_numeric_or_null(&arg_types[0]) || !is_numeric_or_null(&arg_types[1]) { + if !arg_types[0].is_numeric() { return plan_err!("Covariance requires numeric input types"); } @@ -197,13 +187,7 @@ impl Default for CovariancePopulation { impl CovariancePopulation { pub fn new() -> Self { Self { - signature: Signature::coercible( - vec![ - Coercion::new_exact(TypeSignatureClass::Numeric), - Coercion::new_exact(TypeSignatureClass::Numeric), - ], - Volatility::Immutable, - ), + signature: Signature::numeric(2, Volatility::Immutable), } } } @@ -222,10 +206,7 @@ impl AggregateUDFImpl for CovariancePopulation { } fn return_type(&self, arg_types: &[DataType]) -> Result { - let is_numeric_or_null = - |dt: &DataType| dt.is_numeric() || matches!(dt, DataType::Null); - - if !is_numeric_or_null(&arg_types[0]) || !is_numeric_or_null(&arg_types[1]) { + if !arg_types[0].is_numeric() { return plan_err!("Covariance requires numeric input types"); } diff --git a/datafusion/functions-aggregate/src/percentile_cont.rs b/datafusion/functions-aggregate/src/percentile_cont.rs index 6bc6ff2ac1a00..d6c8eabb459e6 100644 --- a/datafusion/functions-aggregate/src/percentile_cont.rs +++ b/datafusion/functions-aggregate/src/percentile_cont.rs @@ -34,15 +34,15 @@ use arrow::{ use arrow::array::ArrowNativeTypeOp; use crate::min_max::{max_udaf, min_udaf}; -use datafusion_common::types::{NativeType, logical_float64}; use datafusion_common::{ DataFusionError, Result, ScalarValue, assert_eq_or_internal_err, internal_datafusion_err, plan_err, utils::take_function_args, }; +use datafusion_expr::type_coercion::aggregates::NUMERICS; use datafusion_expr::utils::format_state_name; use datafusion_expr::{ - Accumulator, AggregateUDFImpl, Coercion, Documentation, Expr, Signature, - TypeSignatureClass, Volatility, + Accumulator, AggregateUDFImpl, Documentation, Expr, Signature, TypeSignature, + Volatility, }; use datafusion_expr::{EmitTo, GroupsAccumulator}; use datafusion_expr::{ @@ -144,25 +144,15 @@ impl Default for PercentileCont { impl PercentileCont { pub fn new() -> Self { - let percentile_coercion = Coercion::new_implicit( - TypeSignatureClass::Native(logical_float64()), - vec![ - TypeSignatureClass::Integer, - TypeSignatureClass::Float, - TypeSignatureClass::Decimal, - ], - NativeType::Float64, - ); + let mut variants = Vec::with_capacity(NUMERICS.len()); + // Accept any numeric value paired with a float64 percentile + for num in NUMERICS { + variants.push(TypeSignature::Exact(vec![num.clone(), DataType::Float64])); + } Self { - signature: Signature::coercible( - vec![ - Coercion::new_exact(TypeSignatureClass::Numeric), - percentile_coercion, - ], - Volatility::Immutable, - ) - .with_parameter_names(vec!["expr".to_string(), "percentile".to_string()]) - .expect("valid parameter names for percentile_cont"), + signature: Signature::one_of(variants, Volatility::Immutable) + .with_parameter_names(vec!["expr".to_string(), "percentile".to_string()]) + .expect("valid parameter names for percentile_cont"), aliases: vec![String::from("quantile_cont")], } } diff --git a/datafusion/functions-aggregate/src/regr.rs b/datafusion/functions-aggregate/src/regr.rs index b343b194aafdf..bbc5567dab9d6 100644 --- a/datafusion/functions-aggregate/src/regr.rs +++ b/datafusion/functions-aggregate/src/regr.rs @@ -30,10 +30,10 @@ use datafusion_common::{ }; use datafusion_doc::aggregate_doc_sections::DOC_SECTION_STATISTICAL; use datafusion_expr::function::{AccumulatorArgs, StateFieldsArgs}; +use datafusion_expr::type_coercion::aggregates::NUMERICS; use datafusion_expr::utils::format_state_name; use datafusion_expr::{ - Accumulator, AggregateUDFImpl, Coercion, Documentation, Signature, - TypeSignatureClass, Volatility, + Accumulator, AggregateUDFImpl, Documentation, Signature, Volatility, }; use std::any::Any; use std::fmt::Debug; @@ -77,13 +77,7 @@ impl Debug for Regr { impl Regr { pub fn new(regr_type: RegrType, func_name: &'static str) -> Self { Self { - signature: Signature::coercible( - vec![ - Coercion::new_exact(TypeSignatureClass::Numeric), - Coercion::new_exact(TypeSignatureClass::Numeric), - ], - Volatility::Immutable, - ), + signature: Signature::uniform(2, NUMERICS.to_vec(), Volatility::Immutable), regr_type, func_name, } @@ -475,11 +469,8 @@ impl AggregateUDFImpl for Regr { } fn return_type(&self, arg_types: &[DataType]) -> Result { - let is_numeric_or_null = - |dt: &DataType| dt.is_numeric() || matches!(dt, DataType::Null); - - if !is_numeric_or_null(&arg_types[0]) || !is_numeric_or_null(&arg_types[1]) { - return plan_err!("{} requires numeric input types", self.func_name); + if !arg_types[0].is_numeric() { + return plan_err!("Covariance requires numeric input types"); } if matches!(self.regr_type, RegrType::Count) { diff --git a/datafusion/sqllogictest/test_files/aggregate.slt b/datafusion/sqllogictest/test_files/aggregate.slt index 42b011e17445f..8b754febd8e15 100644 --- a/datafusion/sqllogictest/test_files/aggregate.slt +++ b/datafusion/sqllogictest/test_files/aggregate.slt @@ -562,6 +562,58 @@ from data ---- 1 0.666666666667 +# covariance_decimal_1 +statement ok +create table t_covar_decimal (c1 decimal(10,2), c2 decimal(10,2)) as values (1, 4), (2, 5), (3, 6); + +query RT +select covar_pop(c1, c2), arrow_typeof(covar_pop(c1, c2)) from t_covar_decimal; +---- +0.666666666667 Float64 + +query RT +select covar_samp(c1, c2), arrow_typeof(covar_samp(c1, c2)) from t_covar_decimal; +---- +1 Float64 + +statement ok +drop table t_covar_decimal; + +# covariance_decimal_with_nulls +statement ok +create table t_covar_decimal_nulls (f decimal(10,2), b decimal(10,2)) as values + (1, 4), + (null, 99), + (2, 5), + (98, null), + (3, 6), + (null, null); + +query RR +select covar_samp(f, b), covar_pop(f, b) from t_covar_decimal_nulls; +---- +1 0.666666666667 + +statement ok +drop table t_covar_decimal_nulls; + +# covariance_mixed_decimal_float +statement ok +create table t_covar_mixed (x decimal(10,2), y double) as values (1, 4), (2, 5), (3, 6); + +query RT +select covar_pop(x, y), arrow_typeof(covar_pop(x, y)) from t_covar_mixed; +---- +0.666666666667 Float64 + +query RT +select covar_samp(x, y), arrow_typeof(covar_samp(x, y)) from t_covar_mixed; +---- +1 Float64 + +statement ok +drop table t_covar_mixed; + # csv_query_correlation query R SELECT corr(c2, c12) FROM aggregate_test_100 @@ -754,72 +806,6 @@ select stddev(sq.column1) from (values (1.1), (2.0), (3.0)) as sq ---- 0.950438495292 -# csv_query_stddev_6_decimal -query R -select stddev(sq.column1) from (values (cast(1.1 as decimal(10,1))), (cast(2.0 as decimal(10,1))), (cast(3.0 as decimal(10,1)))) as sq ----- -0.950438495292 - -# csv_query_variance_6_decimal -query R -select var(sq.column1) from (values (cast(1.1 as decimal(10,1))), (cast(2.0 as decimal(10,1))), (cast(3.0 as decimal(10,1)))) as sq ----- -0.903333333333 - -# csv_query_variance_pop_6_decimal -query R -select var_pop(sq.column1) from (values (cast(1.1 as decimal(10,1))), (cast(2.0 as decimal(10,1))), (cast(3.0 as decimal(10,1)))) as sq ----- -0.602222222222 - -# csv_query_stddev_6_decimal_with_nulls -query R -select stddev(sq.column1) from (values (cast(1.1 as decimal(10,1))), (cast(null as decimal(10,1))), (cast(2.0 as decimal(10,1))), (cast(3.0 as decimal(10,1)))) as sq ----- -0.950438495292 - -# csv_query_variance_6_decimal_with_nulls -query R -select var(sq.column1) from (values (cast(1.1 as decimal(10,1))), (cast(null as decimal(10,1))), (cast(2.0 as decimal(10,1))), (cast(3.0 as decimal(10,1)))) as sq ----- -0.903333333333 - -# csv_query_variance_pop_6_decimal_with_nulls -query R -select var_pop(sq.column1) from (values (cast(1.1 as decimal(10,1))), (cast(null as decimal(10,1))), (cast(2.0 as decimal(10,1))), (cast(3.0 as decimal(10,1)))) as sq ----- -0.602222222222 - -# variance_decimal_max_precision -statement ok -create table t_decimal_max_precision (c decimal(38, 0)) as values - (cast('99999999999999999999999999999999999999' as decimal(38, 0))), - (cast('99999999999999999999999999999999999999' as decimal(38, 0))), - (cast('99999999999999999999999999999999999999' as decimal(38, 0))); - -query RT -select var_pop(c), arrow_typeof(var_pop(c)) from t_decimal_max_precision; ----- -0 Float64 - -query RT -select stddev_pop(c), arrow_typeof(stddev_pop(c)) from t_decimal_max_precision; ----- -0 Float64 - -query RT -select var(c), arrow_typeof(var(c)) from t_decimal_max_precision; ----- -0 Float64 - -query RT -select stddev(c), arrow_typeof(stddev(c)) from t_decimal_max_precision; ----- -0 Float64 - -statement ok -drop table t_decimal_max_precision; - # csv_query_stddev_7 query IR SELECT c2, stddev_samp(c12) FROM aggregate_test_100 GROUP BY c2 ORDER BY c2 @@ -2150,16 +2136,6 @@ c 122 d 124 e 115 -# csv_query_approx_percentile_cont_with_weight_decimal_percentile -query TI -SELECT c1, approx_percentile_cont_with_weight(1, cast(0.95 as decimal(10,2))) WITHIN GROUP (ORDER BY c3) AS c3_p95 FROM aggregate_test_100 GROUP BY 1 ORDER BY 1 ----- -a 73 -b 68 -c 122 -d 124 -e 115 - # csv_query_approx_percentile_cont_with_weight alternate syntax query TI SELECT c1, approx_percentile_cont_with_weight(c3, 1, 0.95) AS c3_p95 FROM aggregate_test_100 GROUP BY 1 ORDER BY 1 @@ -2976,78 +2952,6 @@ select corr(c1, c2), arrow_typeof(corr(c1, c2)) from t; ---- 1 Float64 -# covariance_decimal_1 -statement ok -create table t_decimal_stats (c1 decimal(10,2), c2 decimal(10,2)) as values (1, 4), (2, 5), (3, 6); - -query RT -select covar_pop(c1, c2), arrow_typeof(covar_pop(c1, c2)) from t_decimal_stats; ----- -0.666666666667 Float64 - -query RT -select covar_samp(c1, c2), arrow_typeof(covar_samp(c1, c2)) from t_decimal_stats; ----- -1 Float64 - -query RT rowsort -select corr(c1, c2), arrow_typeof(corr(c1, c2)) from t_decimal_stats; ----- -1 Float64 - -statement ok -drop table t_decimal_stats; - -# covariance_decimal_with_nulls -statement ok -create table t_decimal_stats_nulls (f decimal(10,2), b decimal(10,2)) as values - (1, 4), - (null, 99), - (2, 5), - (98, null), - (3, 6), - (null, null); - -query RR -select covar_samp(f, b), covar_pop(f, b) from t_decimal_stats_nulls; ----- -1 0.666666666667 - -query R -select corr(f, b) from t_decimal_stats_nulls; ----- -1 - -statement ok -drop table t_decimal_stats_nulls; - -# covariance_mixed_decimal_float -statement ok -create table t_decimal_float_mix (x decimal(10,2), y double) as values (1, 4), (2, 5), (3, 6); - -query RT -select covar_pop(x, y), arrow_typeof(covar_pop(x, y)) from t_decimal_float_mix; ----- -0.666666666667 Float64 - -query RT -select covar_samp(x, y), arrow_typeof(covar_samp(x, y)) from t_decimal_float_mix; ----- -1 Float64 - -query RT rowsort -select corr(x, y), arrow_typeof(corr(x, y)) from t_decimal_float_mix; ----- -1 Float64 - -query RR -select regr_slope(y, x), regr_intercept(y, x) from t_decimal_float_mix; ----- -1 3 - -statement ok -drop table t_decimal_float_mix; - # correlation with different numeric types (create test data) statement ok CREATE OR REPLACE TABLE corr_test( @@ -3717,12 +3621,6 @@ SELECT percentile_cont(0.5) WITHIN GROUP (ORDER BY c2) FROM aggregate_test_100 ---- 3 -# percentile_cont_decimal_percentile -query R -SELECT percentile_cont(cast(0.5 as decimal(10,2))) WITHIN GROUP (ORDER BY c2) FROM aggregate_test_100 ----- -3 - query R SELECT percentile_cont(0.0) WITHIN GROUP (ORDER BY c2) FROM aggregate_test_100 ---- @@ -6622,22 +6520,6 @@ from (values (1,2), (2,4), (3,6)); ---- 2 0 3 1 2 4 2 8 4 -# regr_*() decimal input -query RRIRRRRRR -select - regr_slope(cast(column2 as decimal(10,2)), cast(column1 as decimal(10,2))), - regr_intercept(cast(column2 as decimal(10,2)), cast(column1 as decimal(10,2))), - regr_count(cast(column2 as decimal(10,2)), cast(column1 as decimal(10,2))), - regr_r2(cast(column2 as decimal(10,2)), cast(column1 as decimal(10,2))), - regr_avgx(cast(column2 as decimal(10,2)), cast(column1 as decimal(10,2))), - regr_avgy(cast(column2 as decimal(10,2)), cast(column1 as decimal(10,2))), - regr_sxx(cast(column2 as decimal(10,2)), cast(column1 as decimal(10,2))), - regr_syy(cast(column2 as decimal(10,2)), cast(column1 as decimal(10,2))), - regr_sxy(cast(column2 as decimal(10,2)), cast(column1 as decimal(10,2))) -from (values (1,2), (2,4), (3,6)); ----- -2 0 3 1 2 4 2 8 4 - query RRIRRRRRR select regr_slope(c12, c11),