From d70ee2ea21e01ffa5bdac0e61f263e5ebd96f92f Mon Sep 17 00:00:00 2001 From: Chunchun <14298407+appletreeisyellow@users.noreply.github.com> Date: Mon, 29 Jan 2024 16:25:40 -0600 Subject: [PATCH 1/3] chore(date_trunc): add not_impl_err for ambiguous time --- datafusion/physical-expr/src/datetime_expressions.rs | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/datafusion/physical-expr/src/datetime_expressions.rs b/datafusion/physical-expr/src/datetime_expressions.rs index d21d89c19d2eb..2205b41d9bee8 100644 --- a/datafusion/physical-expr/src/datetime_expressions.rs +++ b/datafusion/physical-expr/src/datetime_expressions.rs @@ -600,6 +600,14 @@ fn date_trunc_coarse(granularity: &str, value: i64, tz: Option) -> Result Date: Mon, 29 Jan 2024 18:20:14 -0600 Subject: [PATCH 2/3] test(date_trunc): add tests --- .../physical-expr/src/datetime_expressions.rs | 123 +++++++++++++++++- 1 file changed, 122 insertions(+), 1 deletion(-) diff --git a/datafusion/physical-expr/src/datetime_expressions.rs b/datafusion/physical-expr/src/datetime_expressions.rs index 2205b41d9bee8..6ccc85edd0942 100644 --- a/datafusion/physical-expr/src/datetime_expressions.rs +++ b/datafusion/physical-expr/src/datetime_expressions.rs @@ -581,7 +581,7 @@ fn _date_trunc_coarse_without_tz( /// account that some granularities are not uniform durations of time /// (e.g. months are not always the same lengths, leap seconds, etc) fn date_trunc_coarse(granularity: &str, value: i64, tz: Option) -> Result { - let value = match tz { + let value: Option = match tz { Some(tz) => { // Use chrono DateTime to clear the various fields because need to clear per timezone, // and NaiveDateTime (ISO 8601) has no concept of timezones @@ -1693,6 +1693,21 @@ mod tests { "2020-09-08T00:00:00+08", ], ), + ( + vec![ + "2023-10-28T23:00:00+00:00", + "2023-10-29T02:00:00+00:00", + "2024-10-26T23:00:00+00:00", + "2024-10-27T02:00:00+00:00", + ], + Some("Europe/Berlin".into()), + vec![ + "2023-10-29T00:00:00+02:00", + "2023-10-29T00:00:00+02:00", + "2024-10-27T00:00:00+02:00", + "2024-10-27T00:00:00+02:00", + ], + ), ]; cases.iter().for_each(|(original, tz_opt, expected)| { @@ -1724,6 +1739,112 @@ mod tests { }); } + #[test] + fn test_date_trunc_timezones_with_error() { + let cases: Vec<(Vec<&str>, Option>, &str)> = vec![ + // daylight saving time ends at 2023-10-29T01:00:00+00:00 in Europe/Berlin + ( + vec!["2023-10-29T00:00:00+00:00"], + Some("Europe/Berlin".into()), + "second", + ), + ( + vec!["2023-10-29T00:00:00+00:00"], + Some("Europe/Berlin".into()), + "minute", + ), + ( + vec!["2023-10-29T00:00:00+00:00"], + Some("Europe/Berlin".into()), + "hour", + ), + ( + vec!["2023-10-29T00:00:00+00:00"], + Some("Europe/Berlin".into()), + "day", + ), + ( + vec!["2023-10-29T00:00:00+00:00"], + Some("Europe/Berlin".into()), + "week", + ), + ( + vec!["2023-10-29T00:00:00+00:00"], + Some("Europe/Berlin".into()), + "month", + ), + ( + vec!["2023-10-29T00:00:00+00:00"], + Some("Europe/Berlin".into()), + "quarter", + ), + ( + vec!["2023-10-29T00:00:00+00:00"], + Some("Europe/Berlin".into()), + "year", + ), + // daylight saving time ends at 2024-10-27T01:00:00+00:00 in Europe/Berlin + ( + vec!["2024-10-27T00:00:00+00:00"], + Some("Europe/Berlin".into()), + "second", + ), + ( + vec!["2024-10-27T00:00:00+00:00"], + Some("Europe/Berlin".into()), + "minute", + ), + ( + vec!["2024-10-27T00:00:00+00:00"], + Some("Europe/Berlin".into()), + "hour", + ), + ( + vec!["2024-10-27T00:00:00+00:00"], + Some("Europe/Berlin".into()), + "day", + ), + ( + vec!["2024-10-27T00:00:00+00:00"], + Some("Europe/Berlin".into()), + "week", + ), + ( + vec!["2024-10-27T00:00:00+00:00"], + Some("Europe/Berlin".into()), + "month", + ), + ( + vec!["2024-10-27T00:00:00+00:00"], + Some("Europe/Berlin".into()), + "quarter", + ), + ( + vec!["2024-10-27T00:00:00+00:00"], + Some("Europe/Berlin".into()), + "year", + ), + ]; + + cases.iter().for_each(|(original, tz_opt, granularity)| { + let input = original + .iter() + .map(|s| Some(string_to_timestamp_nanos(s).unwrap())) + .collect::() + .with_timezone_opt(tz_opt.clone()); + let result = date_trunc(&[ + ColumnarValue::Scalar(ScalarValue::from(*granularity)), + ColumnarValue::Array(Arc::new(input)), + ]); + assert!(result.is_err()); + + assert!(matches!( + result.unwrap_err(), + DataFusionError::NotImplemented(_) + )); + }); + } + #[test] fn test_date_bin_single() { use chrono::Duration; From 381d43289bed46309a9be2a70d09d51c659bdde3 Mon Sep 17 00:00:00 2001 From: Chunchun <14298407+appletreeisyellow@users.noreply.github.com> Date: Mon, 29 Jan 2024 22:02:53 -0600 Subject: [PATCH 3/3] chore(date_trunc): refactor tests --- .../physical-expr/src/datetime_expressions.rs | 102 ++++-------------- 1 file changed, 19 insertions(+), 83 deletions(-) diff --git a/datafusion/physical-expr/src/datetime_expressions.rs b/datafusion/physical-expr/src/datetime_expressions.rs index 6ccc85edd0942..544b43e17eed2 100644 --- a/datafusion/physical-expr/src/datetime_expressions.rs +++ b/datafusion/physical-expr/src/datetime_expressions.rs @@ -1741,97 +1741,33 @@ mod tests { #[test] fn test_date_trunc_timezones_with_error() { - let cases: Vec<(Vec<&str>, Option>, &str)> = vec![ + let cases = vec![ // daylight saving time ends at 2023-10-29T01:00:00+00:00 in Europe/Berlin - ( - vec!["2023-10-29T00:00:00+00:00"], - Some("Europe/Berlin".into()), - "second", - ), - ( - vec!["2023-10-29T00:00:00+00:00"], - Some("Europe/Berlin".into()), - "minute", - ), - ( - vec!["2023-10-29T00:00:00+00:00"], - Some("Europe/Berlin".into()), - "hour", - ), - ( - vec!["2023-10-29T00:00:00+00:00"], - Some("Europe/Berlin".into()), - "day", - ), - ( - vec!["2023-10-29T00:00:00+00:00"], - Some("Europe/Berlin".into()), - "week", - ), - ( - vec!["2023-10-29T00:00:00+00:00"], - Some("Europe/Berlin".into()), - "month", - ), - ( - vec!["2023-10-29T00:00:00+00:00"], - Some("Europe/Berlin".into()), - "quarter", - ), - ( - vec!["2023-10-29T00:00:00+00:00"], - Some("Europe/Berlin".into()), - "year", - ), + (vec!["2023-10-29T00:00:00+00:00"], "second"), + (vec!["2023-10-29T00:00:00+00:00"], "minute"), + (vec!["2023-10-29T00:00:00+00:00"], "hour"), + (vec!["2023-10-29T00:00:00+00:00"], "day"), + (vec!["2023-10-29T00:00:00+00:00"], "week"), + (vec!["2023-10-29T00:00:00+00:00"], "month"), + (vec!["2023-10-29T00:00:00+00:00"], "quarter"), + (vec!["2023-10-29T00:00:00+00:00"], "year"), // daylight saving time ends at 2024-10-27T01:00:00+00:00 in Europe/Berlin - ( - vec!["2024-10-27T00:00:00+00:00"], - Some("Europe/Berlin".into()), - "second", - ), - ( - vec!["2024-10-27T00:00:00+00:00"], - Some("Europe/Berlin".into()), - "minute", - ), - ( - vec!["2024-10-27T00:00:00+00:00"], - Some("Europe/Berlin".into()), - "hour", - ), - ( - vec!["2024-10-27T00:00:00+00:00"], - Some("Europe/Berlin".into()), - "day", - ), - ( - vec!["2024-10-27T00:00:00+00:00"], - Some("Europe/Berlin".into()), - "week", - ), - ( - vec!["2024-10-27T00:00:00+00:00"], - Some("Europe/Berlin".into()), - "month", - ), - ( - vec!["2024-10-27T00:00:00+00:00"], - Some("Europe/Berlin".into()), - "quarter", - ), - ( - vec!["2024-10-27T00:00:00+00:00"], - Some("Europe/Berlin".into()), - "year", - ), + (vec!["2024-10-27T00:00:00+00:00"], "second"), + (vec!["2024-10-27T00:00:00+00:00"], "minute"), + (vec!["2024-10-27T00:00:00+00:00"], "hour"), + (vec!["2024-10-27T00:00:00+00:00"], "day"), + (vec!["2024-10-27T00:00:00+00:00"], "week"), + (vec!["2024-10-27T00:00:00+00:00"], "month"), + (vec!["2024-10-27T00:00:00+00:00"], "quarter"), + (vec!["2024-10-27T00:00:00+00:00"], "year"), ]; - cases.iter().for_each(|(original, tz_opt, granularity)| { + cases.iter().for_each(|(original, granularity)| { let input = original .iter() .map(|s| Some(string_to_timestamp_nanos(s).unwrap())) .collect::() - .with_timezone_opt(tz_opt.clone()); + .with_timezone_opt("Europe/Berlin".into()); let result = date_trunc(&[ ColumnarValue::Scalar(ScalarValue::from(*granularity)), ColumnarValue::Array(Arc::new(input)),