From 42ec5109bb4c6249d8404e862c457ebd86ee0623 Mon Sep 17 00:00:00 2001 From: svranesevic Date: Wed, 19 Mar 2025 14:20:53 +0100 Subject: [PATCH 1/2] Support min/max agg functions on values of duration type --- datafusion/functions-aggregate/src/min_max.rs | 36 ++++++++++++++++--- .../sqllogictest/test_files/aggregate.slt | 20 +++++++++++ 2 files changed, 52 insertions(+), 4 deletions(-) diff --git a/datafusion/functions-aggregate/src/min_max.rs b/datafusion/functions-aggregate/src/min_max.rs index 83356e2f9fb42..17ac8c43019c8 100644 --- a/datafusion/functions-aggregate/src/min_max.rs +++ b/datafusion/functions-aggregate/src/min_max.rs @@ -22,8 +22,9 @@ mod min_max_bytes; use arrow::array::{ ArrayRef, BinaryArray, BinaryViewArray, BooleanArray, Date32Array, Date64Array, - Decimal128Array, Decimal256Array, Float16Array, Float32Array, Float64Array, - Int16Array, Int32Array, Int64Array, Int8Array, IntervalDayTimeArray, + Decimal128Array, Decimal256Array, DurationMicrosecondArray, DurationMillisecondArray, + DurationNanosecondArray, DurationSecondArray, Float16Array, Float32Array, + Float64Array, Int16Array, Int32Array, Int64Array, Int8Array, IntervalDayTimeArray, IntervalMonthDayNanoArray, IntervalYearMonthArray, LargeBinaryArray, LargeStringArray, StringArray, StringViewArray, Time32MillisecondArray, Time32SecondArray, Time64MicrosecondArray, Time64NanosecondArray, @@ -518,6 +519,33 @@ macro_rules! min_max_batch { $OP ) } + DataType::Duration(TimeUnit::Second) => { + typed_min_max_batch!($VALUES, DurationSecondArray, DurationSecond, $OP) + } + DataType::Duration(TimeUnit::Millisecond) => { + typed_min_max_batch!( + $VALUES, + DurationMillisecondArray, + DurationMillisecond, + $OP + ) + } + DataType::Duration(TimeUnit::Microsecond) => { + typed_min_max_batch!( + $VALUES, + DurationMicrosecondArray, + DurationMicrosecond, + $OP + ) + } + DataType::Duration(TimeUnit::Nanosecond) => { + typed_min_max_batch!( + $VALUES, + DurationNanosecondArray, + DurationNanosecond, + $OP + ) + } other => { // This should have been handled before return internal_err!( @@ -1597,7 +1625,7 @@ mod tests { assert_eq!( min_res, ScalarValue::IntervalYearMonth(Some(IntervalYearMonthType::make_value( - -2, 4 + -2, 4, ))) ); @@ -1609,7 +1637,7 @@ mod tests { assert_eq!( max_res, ScalarValue::IntervalYearMonth(Some(IntervalYearMonthType::make_value( - 5, 34 + 5, 34, ))) ); diff --git a/datafusion/sqllogictest/test_files/aggregate.slt b/datafusion/sqllogictest/test_files/aggregate.slt index bc43f6bc8e61a..c5b3ca7942455 100644 --- a/datafusion/sqllogictest/test_files/aggregate.slt +++ b/datafusion/sqllogictest/test_files/aggregate.slt @@ -3807,6 +3807,26 @@ SELECT MIN(value), MAX(value) FROM timestampmicrosecond statement ok DROP TABLE timestampmicrosecond; +# min_duration, max_duration +statement ok +create table d +as values + (arrow_cast(1, 'Duration(Second)'), arrow_cast(2, 'Duration(Millisecond)'), arrow_cast(3, 'Duration(Microsecond)'), arrow_cast(4, 'Duration(Nanosecond)')), + (arrow_cast(11, 'Duration(Second)'),arrow_cast(22, 'Duration(Millisecond)'), arrow_cast(33, 'Duration(Microsecond)'), arrow_cast(44, 'Duration(Nanosecond)')); + +query ???? +SELECT min(column1), min(column2), min(column3), min(column4) FROM d; +---- +0 days 0 hours 0 mins 1 secs 0 days 0 hours 0 mins 0.002 secs 0 days 0 hours 0 mins 0.000003 secs 0 days 0 hours 0 mins 0.000000004 secs + +query ???? +SELECT max(column1), max(column2), max(column3), max(column4) FROM d; +---- +0 days 0 hours 0 mins 11 secs 0 days 0 hours 0 mins 0.022 secs 0 days 0 hours 0 mins 0.000033 secs 0 days 0 hours 0 mins 0.000000044 secs + +statement ok +drop table d; + # max_bool statement ok CREATE TABLE max_bool (value BOOLEAN); From 6155986e98518900009462417587c2c62c8560d6 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Wed, 19 Mar 2025 15:32:55 -0400 Subject: [PATCH 2/2] Add tests for GROUP BY as well --- datafusion/sqllogictest/test_files/aggregate.slt | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/datafusion/sqllogictest/test_files/aggregate.slt b/datafusion/sqllogictest/test_files/aggregate.slt index c5b3ca7942455..9598913f57f79 100644 --- a/datafusion/sqllogictest/test_files/aggregate.slt +++ b/datafusion/sqllogictest/test_files/aggregate.slt @@ -3811,8 +3811,8 @@ DROP TABLE timestampmicrosecond; statement ok create table d as values - (arrow_cast(1, 'Duration(Second)'), arrow_cast(2, 'Duration(Millisecond)'), arrow_cast(3, 'Duration(Microsecond)'), arrow_cast(4, 'Duration(Nanosecond)')), - (arrow_cast(11, 'Duration(Second)'),arrow_cast(22, 'Duration(Millisecond)'), arrow_cast(33, 'Duration(Microsecond)'), arrow_cast(44, 'Duration(Nanosecond)')); + (arrow_cast(1, 'Duration(Second)'), arrow_cast(2, 'Duration(Millisecond)'), arrow_cast(3, 'Duration(Microsecond)'), arrow_cast(4, 'Duration(Nanosecond)'), 1), + (arrow_cast(11, 'Duration(Second)'),arrow_cast(22, 'Duration(Millisecond)'), arrow_cast(33, 'Duration(Microsecond)'), arrow_cast(44, 'Duration(Nanosecond)'), 1); query ???? SELECT min(column1), min(column2), min(column3), min(column4) FROM d; @@ -3824,6 +3824,18 @@ SELECT max(column1), max(column2), max(column3), max(column4) FROM d; ---- 0 days 0 hours 0 mins 11 secs 0 days 0 hours 0 mins 0.022 secs 0 days 0 hours 0 mins 0.000033 secs 0 days 0 hours 0 mins 0.000000044 secs +# GROUP BY follows a different code path +query ????I +SELECT min(column1), min(column2), min(column3), min(column4), column5 FROM d GROUP BY column5; +---- +0 days 0 hours 0 mins 1 secs 0 days 0 hours 0 mins 0.002 secs 0 days 0 hours 0 mins 0.000003 secs 0 days 0 hours 0 mins 0.000000004 secs 1 + +query ????I +SELECT max(column1), max(column2), max(column3), max(column4), column5 FROM d GROUP BY column5; +---- +0 days 0 hours 0 mins 11 secs 0 days 0 hours 0 mins 0.022 secs 0 days 0 hours 0 mins 0.000033 secs 0 days 0 hours 0 mins 0.000000044 secs 1 + + statement ok drop table d;