diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock index b42c2039054c2..efbbf762b996d 100644 --- a/datafusion-cli/Cargo.lock +++ b/datafusion-cli/Cargo.lock @@ -2207,9 +2207,9 @@ checksum = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d" [[package]] name = "sqlparser" -version = "0.26.0" +version = "0.27.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86be66ea0b2b22749cfa157d16e2e84bf793e626a3375f4d378dc289fa03affb" +checksum = "aba319938d4bfe250a769ac88278b629701024fe16f34257f9563bc628081970" dependencies = [ "log", ] diff --git a/datafusion-cli/src/object_storage.rs b/datafusion-cli/src/object_storage.rs index 0982f3ff9ac0e..64c48840eefb5 100644 --- a/datafusion-cli/src/object_storage.rs +++ b/datafusion-cli/src/object_storage.rs @@ -145,7 +145,7 @@ mod tests { let res = provider.get_by_url(&url); let msg = match res { Err(e) => format!("{}", e), - Ok(_) => "".to_string() + Ok(_) => "".to_string(), }; assert_eq!("".to_string(), msg); // Fail with error message env::remove_var("AWS_REGION"); diff --git a/datafusion/common/Cargo.toml b/datafusion/common/Cargo.toml index 69d01b3018a64..7284f3f370d44 100644 --- a/datafusion/common/Cargo.toml +++ b/datafusion/common/Cargo.toml @@ -46,4 +46,4 @@ cranelift-module = { version = "0.89.0", optional = true } object_store = { version = "0.5.0", default-features = false, optional = true } parquet = { version = "27.0.0", default-features = false, optional = true } pyo3 = { version = "0.17.1", optional = true } -sqlparser = "0.26" +sqlparser = "0.27" diff --git a/datafusion/core/Cargo.toml b/datafusion/core/Cargo.toml index efd28ee9c5268..ae073c7c0e83b 100644 --- a/datafusion/core/Cargo.toml +++ b/datafusion/core/Cargo.toml @@ -89,7 +89,7 @@ pyo3 = { version = "0.17.1", optional = true } rand = "0.8" rayon = { version = "1.5", optional = true } smallvec = { version = "1.6", features = ["union"] } -sqlparser = "0.26" +sqlparser = "0.27" tempfile = "3" tokio = { version = "1.0", features = ["macros", "rt", "rt-multi-thread", "sync", "fs", "parking_lot"] } tokio-stream = "0.1" diff --git a/datafusion/core/tests/sql/aggregates.rs b/datafusion/core/tests/sql/aggregates.rs index b4c25fd9e3d77..91f75c0eb32c0 100644 --- a/datafusion/core/tests/sql/aggregates.rs +++ b/datafusion/core/tests/sql/aggregates.rs @@ -1318,6 +1318,38 @@ async fn csv_query_array_agg_with_overflow() -> Result<()> { Ok(()) } +#[tokio::test] +async fn csv_query_array_agg_unsupported() -> Result<()> { + let ctx = SessionContext::new(); + register_aggregate_csv(&ctx).await?; + + let results = plan_and_collect( + &ctx, + "SELECT array_agg(c13 ORDER BY c1) FROM aggregate_test_100", + ) + .await + .unwrap_err(); + + assert_eq!( + results.to_string(), + "This feature is not implemented: ORDER BY not supported in ARRAY_AGG: c1" + ); + + let results = plan_and_collect( + &ctx, + "SELECT array_agg(c13 LIMIT 1) FROM aggregate_test_100", + ) + .await + .unwrap_err(); + + assert_eq!( + results.to_string(), + "This feature is not implemented: LIMIT not supported in ARRAY_AGG: 1" + ); + + Ok(()) +} + #[tokio::test] async fn csv_query_array_cube_agg_with_overflow() -> Result<()> { let ctx = SessionContext::new(); diff --git a/datafusion/core/tests/sql/timestamp.rs b/datafusion/core/tests/sql/timestamp.rs index dec04f6532d28..cb70ab2d0adc1 100644 --- a/datafusion/core/tests/sql/timestamp.rs +++ b/datafusion/core/tests/sql/timestamp.rs @@ -1579,7 +1579,7 @@ async fn test_cast_to_time_with_time_zone_should_not_work() -> Result<()> { assert_eq!( results.to_string(), - "This feature is not implemented: Unsupported SQL type Time(WithTimeZone)" + "This feature is not implemented: Unsupported SQL type Time(None, WithTimeZone)" ); Ok(()) @@ -1612,7 +1612,7 @@ async fn test_cast_to_timetz_should_not_work() -> Result<()> { assert_eq!( results.to_string(), - "This feature is not implemented: Unsupported SQL type Time(Tz)" + "This feature is not implemented: Unsupported SQL type Time(None, Tz)" ); Ok(()) } diff --git a/datafusion/expr/Cargo.toml b/datafusion/expr/Cargo.toml index 6e6fd13d56052..4527b2bd3f29a 100644 --- a/datafusion/expr/Cargo.toml +++ b/datafusion/expr/Cargo.toml @@ -39,4 +39,4 @@ ahash = { version = "0.8", default-features = false, features = ["runtime-rng"] arrow = { version = "27.0.0", default-features = false } datafusion-common = { path = "../common", version = "14.0.0" } log = "^0.4" -sqlparser = "0.26" +sqlparser = "0.27" diff --git a/datafusion/sql/Cargo.toml b/datafusion/sql/Cargo.toml index 2c712ba86abc6..ef421702168c8 100644 --- a/datafusion/sql/Cargo.toml +++ b/datafusion/sql/Cargo.toml @@ -40,4 +40,4 @@ unicode_expressions = [] arrow = { version = "27.0.0", default-features = false } datafusion-common = { path = "../common", version = "14.0.0" } datafusion-expr = { path = "../expr", version = "14.0.0" } -sqlparser = "0.26" +sqlparser = "0.27" diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs index d2125274430cb..f3cd1674db7d5 100644 --- a/datafusion/sql/src/planner.rs +++ b/datafusion/sql/src/planner.rs @@ -23,8 +23,8 @@ use std::sync::Arc; use std::{convert::TryInto, vec}; use arrow::datatypes::*; -use sqlparser::ast::ExactNumberInfo; use sqlparser::ast::TimezoneInfo; +use sqlparser::ast::{ArrayAgg, ExactNumberInfo, SetQuantifier}; use sqlparser::ast::{ BinaryOperator, DataType as SQLDataType, DateTimeField, Expr as SQLExpr, FunctionArg, FunctionArgExpr, Ident, Join, JoinConstraint, JoinOperator, ObjectName, @@ -460,8 +460,13 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { op, left, right, - all, + set_quantifier, } => { + let all = match set_quantifier { + SetQuantifier::All => true, + SetQuantifier::Distinct | SetQuantifier::None => false, + }; + let left_plan = self.set_expr_to_plan(*left, None, ctes, outer_query_schema)?; let right_plan = @@ -2320,6 +2325,8 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { SQLExpr::Subquery(subquery) => self.parse_scalar_subquery(&subquery, schema, ctes), + SQLExpr::ArrayAgg(array_agg) => self.parse_array_agg(array_agg, schema, ctes), + _ => Err(DataFusionError::NotImplemented(format!( "Unsupported ast node in sqltorel: {:?}", sql @@ -2382,6 +2389,53 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { })) } + fn parse_array_agg( + &self, + array_agg: ArrayAgg, + input_schema: &DFSchema, + ctes: &mut HashMap, + ) -> Result { + // Some dialects have special syntax for array_agg. DataFusion only supports it like a function. + let ArrayAgg { + distinct, + expr, + order_by, + limit, + within_group, + } = array_agg; + + if let Some(order_by) = order_by { + return Err(DataFusionError::NotImplemented(format!( + "ORDER BY not supported in ARRAY_AGG: {}", + order_by + ))); + } + + if let Some(limit) = limit { + return Err(DataFusionError::NotImplemented(format!( + "LIMIT not supported in ARRAY_AGG: {}", + limit + ))); + } + + if within_group { + return Err(DataFusionError::NotImplemented( + "WITHIN GROUP not supported in ARRAY_AGG".to_string(), + )); + } + + let args = vec![self.sql_expr_to_logical_expr(*expr, input_schema, ctes)?]; + // next, aggregate built-ins + let fun = AggregateFunction::ArrayAgg; + + Ok(Expr::AggregateFunction { + fun, + distinct, + args, + filter: None, + }) + } + fn function_args_to_expr( &self, args: Vec, @@ -2532,6 +2586,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { Value::SingleQuotedString(s) => s.to_string(), Value::Number(_, _) | Value::Boolean(_) => v.to_string(), Value::DoubleQuotedString(_) + | Value::UnQuotedString(_) | Value::EscapedStringLiteral(_) | Value::NationalStringLiteral(_) | Value::HexStringLiteral(_) @@ -2756,13 +2811,16 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { fn convert_data_type(&self, sql_type: &SQLDataType) -> Result { match sql_type { - SQLDataType::Array(inner_sql_type) => { + SQLDataType::Array(Some(inner_sql_type)) => { let data_type = self.convert_simple_data_type(inner_sql_type)?; Ok(DataType::List(Box::new(Field::new( "field", data_type, true, )))) } + SQLDataType::Array(None) => Err(DataFusionError::NotImplemented( + "Arrays with unspecified type is not supported".to_string(), + )), other => self.convert_simple_data_type(other), } } @@ -2786,7 +2844,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { | SQLDataType::Varchar(_) | SQLDataType::Text | SQLDataType::String => Ok(DataType::Utf8), - SQLDataType::Timestamp(tz_info) => { + SQLDataType::Timestamp(None, tz_info) => { let tz = if matches!(tz_info, TimezoneInfo::Tz) || matches!(tz_info, TimezoneInfo::WithTimeZone) { @@ -2816,7 +2874,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { Ok(DataType::Timestamp(TimeUnit::Nanosecond, tz)) } SQLDataType::Date => Ok(DataType::Date32), - SQLDataType::Time(tz_info) => { + SQLDataType::Time(None, tz_info) => { if matches!(tz_info, TimezoneInfo::None) || matches!(tz_info, TimezoneInfo::WithoutTimeZone) { @@ -2829,7 +2887,8 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { ))) } } - SQLDataType::Decimal(exact_number_info) => { + SQLDataType::Numeric(exact_number_info) + |SQLDataType::Decimal(exact_number_info) => { let (precision, scale) = match *exact_number_info { ExactNumberInfo::None => (None, None), ExactNumberInfo::Precision(precision) => (Some(precision), None), @@ -2848,10 +2907,10 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { | SQLDataType::Binary(_) | SQLDataType::Varbinary(_) | SQLDataType::Blob(_) - | SQLDataType::Datetime + | SQLDataType::Datetime(_) | SQLDataType::Interval | SQLDataType::Regclass - | SQLDataType::Custom(_) + | SQLDataType::Custom(_, _) | SQLDataType::Array(_) | SQLDataType::Enum(_) | SQLDataType::Set(_) @@ -2861,7 +2920,12 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { | SQLDataType::CharacterVarying(_) | SQLDataType::CharVarying(_) | SQLDataType::CharacterLargeObject(_) - | SQLDataType::CharLargeObject(_) + | SQLDataType::CharLargeObject(_) + // precision is not supported + | SQLDataType::Timestamp(Some(_), _) + // precision is not supported + | SQLDataType::Time(Some(_), _) + | SQLDataType::Dec(_) | SQLDataType::Clob(_) => Err(DataFusionError::NotImplemented(format!( "Unsupported SQL type {:?}", sql_type