From bbdef1e40f86a4046eead90b7fab3321a6960d46 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 14 Nov 2022 08:11:54 +0000 Subject: [PATCH 1/6] Update sqlparser requirement from 0.26 to 0.27 Updates the requirements on [sqlparser](https://github.com/sqlparser-rs/sqlparser-rs) to permit the latest version. - [Release notes](https://github.com/sqlparser-rs/sqlparser-rs/releases) - [Changelog](https://github.com/sqlparser-rs/sqlparser-rs/blob/main/CHANGELOG.md) - [Commits](https://github.com/sqlparser-rs/sqlparser-rs/compare/v0.26.0...v0.27.0) --- updated-dependencies: - dependency-name: sqlparser dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- datafusion/common/Cargo.toml | 2 +- datafusion/core/Cargo.toml | 2 +- datafusion/expr/Cargo.toml | 2 +- datafusion/sql/Cargo.toml | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/datafusion/common/Cargo.toml b/datafusion/common/Cargo.toml index cbbaf4337cd9c..588629859258c 100644 --- a/datafusion/common/Cargo.toml +++ b/datafusion/common/Cargo.toml @@ -46,4 +46,4 @@ cranelift-module = { version = "0.89.0", optional = true } object_store = { version = "0.5.0", default-features = false, optional = true } parquet = { version = "26.0.0", default-features = false, optional = true } pyo3 = { version = "0.17.1", optional = true } -sqlparser = "0.26" +sqlparser = "0.27" diff --git a/datafusion/core/Cargo.toml b/datafusion/core/Cargo.toml index 039d0ebe68662..71e0634bebab1 100644 --- a/datafusion/core/Cargo.toml +++ b/datafusion/core/Cargo.toml @@ -89,7 +89,7 @@ pyo3 = { version = "0.17.1", optional = true } rand = "0.8" rayon = { version = "1.5", optional = true } smallvec = { version = "1.6", features = ["union"] } -sqlparser = "0.26" +sqlparser = "0.27" tempfile = "3" tokio = { version = "1.0", features = ["macros", "rt", "rt-multi-thread", "sync", "fs", "parking_lot"] } tokio-stream = "0.1" diff --git a/datafusion/expr/Cargo.toml b/datafusion/expr/Cargo.toml index db4afdc2cfd12..e4bd1a3ef7bbd 100644 --- a/datafusion/expr/Cargo.toml +++ b/datafusion/expr/Cargo.toml @@ -39,4 +39,4 @@ ahash = { version = "0.8", default-features = false, features = ["runtime-rng"] arrow = { version = "26.0.0", default-features = false } datafusion-common = { path = "../common", version = "14.0.0" } log = "^0.4" -sqlparser = "0.26" +sqlparser = "0.27" diff --git a/datafusion/sql/Cargo.toml b/datafusion/sql/Cargo.toml index 8939dc8e8ad53..058785fdb7920 100644 --- a/datafusion/sql/Cargo.toml +++ b/datafusion/sql/Cargo.toml @@ -40,4 +40,4 @@ unicode_expressions = [] arrow = { version = "26.0.0", default-features = false } datafusion-common = { path = "../common", version = "14.0.0" } datafusion-expr = { path = "../expr", version = "14.0.0" } -sqlparser = "0.26" +sqlparser = "0.27" From 532a38e0f840233263dfde1527c641c6bd3adf6d Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Tue, 15 Nov 2022 15:11:02 -0500 Subject: [PATCH 2/6] Update to sqlparser 0.27 --- datafusion-cli/src/object_storage.rs | 2 +- datafusion/sql/src/planner.rs | 33 +++++++++++++++++++++------- 2 files changed, 26 insertions(+), 9 deletions(-) diff --git a/datafusion-cli/src/object_storage.rs b/datafusion-cli/src/object_storage.rs index 0982f3ff9ac0e..64c48840eefb5 100644 --- a/datafusion-cli/src/object_storage.rs +++ b/datafusion-cli/src/object_storage.rs @@ -145,7 +145,7 @@ mod tests { let res = provider.get_by_url(&url); let msg = match res { Err(e) => format!("{}", e), - Ok(_) => "".to_string() + Ok(_) => "".to_string(), }; assert_eq!("".to_string(), msg); // Fail with error message env::remove_var("AWS_REGION"); diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs index bd698b505ac7e..e22516f1c0171 100644 --- a/datafusion/sql/src/planner.rs +++ b/datafusion/sql/src/planner.rs @@ -55,7 +55,7 @@ use datafusion_expr::logical_plan::builder::project_with_alias; use datafusion_expr::logical_plan::{Filter, Subquery}; use datafusion_expr::Expr::Alias; -use sqlparser::ast::TimezoneInfo; +use sqlparser::ast::{TimezoneInfo, SetQuantifier}; use sqlparser::ast::{ BinaryOperator, DataType as SQLDataType, DateTimeField, Expr as SQLExpr, FunctionArg, FunctionArgExpr, Ident, Join, JoinConstraint, JoinOperator, ObjectName, @@ -419,8 +419,13 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { op, left, right, - all, + set_quantifier, } => { + let all = match set_quantifier { + SetQuantifier::All => true, + SetQuantifier::Distinct | SetQuantifier::None => false, + }; + let left_plan = self.set_expr_to_plan(*left, None, ctes, outer_query_schema)?; let right_plan = @@ -2491,6 +2496,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { Value::SingleQuotedString(s) => s.to_string(), Value::Number(_, _) | Value::Boolean(_) => v.to_string(), Value::DoubleQuotedString(_) + | Value::UnQuotedString(_) | Value::EscapedStringLiteral(_) | Value::NationalStringLiteral(_) | Value::HexStringLiteral(_) @@ -2670,13 +2676,18 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { fn convert_data_type(&self, sql_type: &SQLDataType) -> Result { match sql_type { - SQLDataType::Array(inner_sql_type) => { + SQLDataType::Array(Some(inner_sql_type)) => { let data_type = self.convert_simple_data_type(inner_sql_type)?; Ok(DataType::List(Box::new(Field::new( "field", data_type, true, )))) } + SQLDataType::Array(None) => { + Err(DataFusionError::NotImplemented( + "Arrays with unspecified type is not supported".to_string() + )) + } other => self.convert_simple_data_type(other), } } @@ -2700,7 +2711,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { | SQLDataType::Varchar(_) | SQLDataType::Text | SQLDataType::String => Ok(DataType::Utf8), - SQLDataType::Timestamp(tz_info) => { + SQLDataType::Timestamp(None, tz_info) => { let tz = if matches!(tz_info, TimezoneInfo::Tz) || matches!(tz_info, TimezoneInfo::WithTimeZone) { @@ -2730,7 +2741,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { Ok(DataType::Timestamp(TimeUnit::Nanosecond, tz)) } SQLDataType::Date => Ok(DataType::Date32), - SQLDataType::Time(tz_info) => { + SQLDataType::Time(None, tz_info) => { if matches!(tz_info, TimezoneInfo::None) || matches!(tz_info, TimezoneInfo::WithoutTimeZone) { @@ -2762,10 +2773,10 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { | SQLDataType::Binary(_) | SQLDataType::Varbinary(_) | SQLDataType::Blob(_) - | SQLDataType::Datetime + | SQLDataType::Datetime(_) | SQLDataType::Interval | SQLDataType::Regclass - | SQLDataType::Custom(_) + | SQLDataType::Custom(_, _) | SQLDataType::Array(_) | SQLDataType::Enum(_) | SQLDataType::Set(_) @@ -2775,7 +2786,13 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { | SQLDataType::CharacterVarying(_) | SQLDataType::CharVarying(_) | SQLDataType::CharacterLargeObject(_) - | SQLDataType::CharLargeObject(_) + | SQLDataType::CharLargeObject(_) + // precision is not supported + | SQLDataType::Timestamp(Some(_), _) + // precision is not supported + | SQLDataType::Time(Some(_), _) + | SQLDataType::Numeric(_) + | SQLDataType::Dec(_) | SQLDataType::Clob(_) => Err(DataFusionError::NotImplemented(format!( "Unsupported SQL type {:?}", sql_type From c4db2350a5dd632b7347e0da23169d35908b9891 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Tue, 15 Nov 2022 15:13:04 -0500 Subject: [PATCH 3/6] Update datafusion-cli lock --- datafusion-cli/Cargo.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock index 9bc0aef84c682..d96559bcaab9f 100644 --- a/datafusion-cli/Cargo.lock +++ b/datafusion-cli/Cargo.lock @@ -2132,9 +2132,9 @@ checksum = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d" [[package]] name = "sqlparser" -version = "0.26.0" +version = "0.27.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86be66ea0b2b22749cfa157d16e2e84bf793e626a3375f4d378dc289fa03affb" +checksum = "aba319938d4bfe250a769ac88278b629701024fe16f34257f9563bc628081970" dependencies = [ "log", ] From 94aa98d49ba4a0cbc56f35bc576f8740ef841d9e Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Tue, 15 Nov 2022 17:30:47 -0500 Subject: [PATCH 4/6] fix up some tests --- datafusion/core/tests/sql/timestamp.rs | 4 ++-- datafusion/sql/src/planner.rs | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/datafusion/core/tests/sql/timestamp.rs b/datafusion/core/tests/sql/timestamp.rs index dec04f6532d28..cb70ab2d0adc1 100644 --- a/datafusion/core/tests/sql/timestamp.rs +++ b/datafusion/core/tests/sql/timestamp.rs @@ -1579,7 +1579,7 @@ async fn test_cast_to_time_with_time_zone_should_not_work() -> Result<()> { assert_eq!( results.to_string(), - "This feature is not implemented: Unsupported SQL type Time(WithTimeZone)" + "This feature is not implemented: Unsupported SQL type Time(None, WithTimeZone)" ); Ok(()) @@ -1612,7 +1612,7 @@ async fn test_cast_to_timetz_should_not_work() -> Result<()> { assert_eq!( results.to_string(), - "This feature is not implemented: Unsupported SQL type Time(Tz)" + "This feature is not implemented: Unsupported SQL type Time(None, Tz)" ); Ok(()) } diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs index e22516f1c0171..3d340008d6170 100644 --- a/datafusion/sql/src/planner.rs +++ b/datafusion/sql/src/planner.rs @@ -2754,7 +2754,8 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { ))) } } - SQLDataType::Decimal(exact_number_info) => { + SQLDataType::Numeric(exact_number_info) + |SQLDataType::Decimal(exact_number_info) => { let (precision, scale) = match *exact_number_info { ExactNumberInfo::None => (None, None), ExactNumberInfo::Precision(precision) => (Some(precision), None), @@ -2791,7 +2792,6 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { | SQLDataType::Timestamp(Some(_), _) // precision is not supported | SQLDataType::Time(Some(_), _) - | SQLDataType::Numeric(_) | SQLDataType::Dec(_) | SQLDataType::Clob(_) => Err(DataFusionError::NotImplemented(format!( "Unsupported SQL type {:?}", From f39a05fcf568b50ec1fb40f418503faeb2ad2ebd Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Wed, 16 Nov 2022 20:43:28 -0500 Subject: [PATCH 5/6] re-implement handling for array_agg --- datafusion/sql/src/planner.rs | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs index 3d340008d6170..2c61a588c5924 100644 --- a/datafusion/sql/src/planner.rs +++ b/datafusion/sql/src/planner.rs @@ -55,7 +55,7 @@ use datafusion_expr::logical_plan::builder::project_with_alias; use datafusion_expr::logical_plan::{Filter, Subquery}; use datafusion_expr::Expr::Alias; -use sqlparser::ast::{TimezoneInfo, SetQuantifier}; +use sqlparser::ast::{TimezoneInfo, SetQuantifier, ArrayAgg}; use sqlparser::ast::{ BinaryOperator, DataType as SQLDataType, DateTimeField, Expr as SQLExpr, FunctionArg, FunctionArgExpr, Ident, Join, JoinConstraint, JoinOperator, ObjectName, @@ -2284,6 +2284,8 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { SQLExpr::Subquery(subquery) => self.parse_scalar_subquery(&subquery, schema, ctes), + SQLExpr::ArrayAgg(array_agg) => self.parse_array_agg(array_agg, schema, ctes), + _ => Err(DataFusionError::NotImplemented(format!( "Unsupported ast node in sqltorel: {:?}", sql @@ -2346,6 +2348,30 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { })) } + + fn parse_array_agg( + &self, + array_agg: ArrayAgg, + input_schema: &DFSchema, + ctes: &mut HashMap, + ) -> Result { + // Some dialects have special syntax for array_agg. DataFusion only supports it like a function. + let ArrayAgg { distinct, expr, order_by, limit, within_group } = array_agg; + + // TODO: order_by, limit or within_group throw not supported + + let args = vec![self.sql_expr_to_logical_expr(*expr, input_schema, ctes)?]; + // next, aggregate built-ins + let fun = AggregateFunction::ArrayAgg; + + Ok(Expr::AggregateFunction { + fun, + distinct, + args, + filter: None, + }) + } + fn function_args_to_expr( &self, args: Vec, From 5aee2909796a3c129c8298613ab5b4cca1a7ea28 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Wed, 16 Nov 2022 20:52:57 -0500 Subject: [PATCH 6/6] add test --- datafusion/core/tests/sql/aggregates.rs | 32 ++++++++++++++++++ datafusion/sql/src/planner.rs | 43 ++++++++++++++++++------- 2 files changed, 64 insertions(+), 11 deletions(-) diff --git a/datafusion/core/tests/sql/aggregates.rs b/datafusion/core/tests/sql/aggregates.rs index 4b8a158fb4ff7..66c3e6b50780e 100644 --- a/datafusion/core/tests/sql/aggregates.rs +++ b/datafusion/core/tests/sql/aggregates.rs @@ -1317,6 +1317,38 @@ async fn csv_query_array_agg_with_overflow() -> Result<()> { Ok(()) } +#[tokio::test] +async fn csv_query_array_agg_unsupported() -> Result<()> { + let ctx = SessionContext::new(); + register_aggregate_csv(&ctx).await?; + + let results = plan_and_collect( + &ctx, + "SELECT array_agg(c13 ORDER BY c1) FROM aggregate_test_100", + ) + .await + .unwrap_err(); + + assert_eq!( + results.to_string(), + "This feature is not implemented: ORDER BY not supported in ARRAY_AGG: c1" + ); + + let results = plan_and_collect( + &ctx, + "SELECT array_agg(c13 LIMIT 1) FROM aggregate_test_100", + ) + .await + .unwrap_err(); + + assert_eq!( + results.to_string(), + "This feature is not implemented: LIMIT not supported in ARRAY_AGG: 1" + ); + + Ok(()) +} + #[tokio::test] async fn csv_query_array_cube_agg_with_overflow() -> Result<()> { let ctx = SessionContext::new(); diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs index 2c61a588c5924..ff4b3dafba3e6 100644 --- a/datafusion/sql/src/planner.rs +++ b/datafusion/sql/src/planner.rs @@ -55,7 +55,7 @@ use datafusion_expr::logical_plan::builder::project_with_alias; use datafusion_expr::logical_plan::{Filter, Subquery}; use datafusion_expr::Expr::Alias; -use sqlparser::ast::{TimezoneInfo, SetQuantifier, ArrayAgg}; +use sqlparser::ast::{ArrayAgg, SetQuantifier, TimezoneInfo}; use sqlparser::ast::{ BinaryOperator, DataType as SQLDataType, DateTimeField, Expr as SQLExpr, FunctionArg, FunctionArgExpr, Ident, Join, JoinConstraint, JoinOperator, ObjectName, @@ -2348,7 +2348,6 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { })) } - fn parse_array_agg( &self, array_agg: ArrayAgg, @@ -2356,13 +2355,37 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { ctes: &mut HashMap, ) -> Result { // Some dialects have special syntax for array_agg. DataFusion only supports it like a function. - let ArrayAgg { distinct, expr, order_by, limit, within_group } = array_agg; + let ArrayAgg { + distinct, + expr, + order_by, + limit, + within_group, + } = array_agg; + + if let Some(order_by) = order_by { + return Err(DataFusionError::NotImplemented(format!( + "ORDER BY not supported in ARRAY_AGG: {}", + order_by + ))); + } + + if let Some(limit) = limit { + return Err(DataFusionError::NotImplemented(format!( + "LIMIT not supported in ARRAY_AGG: {}", + limit + ))); + } - // TODO: order_by, limit or within_group throw not supported + if within_group { + return Err(DataFusionError::NotImplemented( + "WITHIN GROUP not supported in ARRAY_AGG".to_string(), + )); + } let args = vec![self.sql_expr_to_logical_expr(*expr, input_schema, ctes)?]; // next, aggregate built-ins - let fun = AggregateFunction::ArrayAgg; + let fun = AggregateFunction::ArrayAgg; Ok(Expr::AggregateFunction { fun, @@ -2522,7 +2545,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { Value::SingleQuotedString(s) => s.to_string(), Value::Number(_, _) | Value::Boolean(_) => v.to_string(), Value::DoubleQuotedString(_) - | Value::UnQuotedString(_) + | Value::UnQuotedString(_) | Value::EscapedStringLiteral(_) | Value::NationalStringLiteral(_) | Value::HexStringLiteral(_) @@ -2709,11 +2732,9 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { "field", data_type, true, )))) } - SQLDataType::Array(None) => { - Err(DataFusionError::NotImplemented( - "Arrays with unspecified type is not supported".to_string() - )) - } + SQLDataType::Array(None) => Err(DataFusionError::NotImplemented( + "Arrays with unspecified type is not supported".to_string(), + )), other => self.convert_simple_data_type(other), } }