Skip to content
4 changes: 2 additions & 2 deletions datafusion-cli/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion datafusion-cli/src/object_storage.rs
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ mod tests {
let res = provider.get_by_url(&url);
let msg = match res {
Err(e) => format!("{}", e),
Ok(_) => "".to_string()
Ok(_) => "".to_string(),
};
assert_eq!("".to_string(), msg); // Fail with error message
env::remove_var("AWS_REGION");
Expand Down
2 changes: 1 addition & 1 deletion datafusion/common/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -46,4 +46,4 @@ cranelift-module = { version = "0.89.0", optional = true }
object_store = { version = "0.5.0", default-features = false, optional = true }
parquet = { version = "27.0.0", default-features = false, optional = true }
pyo3 = { version = "0.17.1", optional = true }
sqlparser = "0.26"
sqlparser = "0.27"
2 changes: 1 addition & 1 deletion datafusion/core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ pyo3 = { version = "0.17.1", optional = true }
rand = "0.8"
rayon = { version = "1.5", optional = true }
smallvec = { version = "1.6", features = ["union"] }
sqlparser = "0.26"
sqlparser = "0.27"
tempfile = "3"
tokio = { version = "1.0", features = ["macros", "rt", "rt-multi-thread", "sync", "fs", "parking_lot"] }
tokio-stream = "0.1"
Expand Down
32 changes: 32 additions & 0 deletions datafusion/core/tests/sql/aggregates.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1318,6 +1318,38 @@ async fn csv_query_array_agg_with_overflow() -> Result<()> {
Ok(())
}

#[tokio::test]
async fn csv_query_array_agg_unsupported() -> Result<()> {
let ctx = SessionContext::new();
register_aggregate_csv(&ctx).await?;

let results = plan_and_collect(
&ctx,
"SELECT array_agg(c13 ORDER BY c1) FROM aggregate_test_100",

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is newly added syntax support in apache/datafusion-sqlparser-rs#662

)
.await
.unwrap_err();

assert_eq!(
results.to_string(),
"This feature is not implemented: ORDER BY not supported in ARRAY_AGG: c1"
);

let results = plan_and_collect(
&ctx,
"SELECT array_agg(c13 LIMIT 1) FROM aggregate_test_100",
)
.await
.unwrap_err();

assert_eq!(
results.to_string(),
"This feature is not implemented: LIMIT not supported in ARRAY_AGG: 1"
);

Ok(())
}

#[tokio::test]
async fn csv_query_array_cube_agg_with_overflow() -> Result<()> {
let ctx = SessionContext::new();
Expand Down
4 changes: 2 additions & 2 deletions datafusion/core/tests/sql/timestamp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1579,7 +1579,7 @@ async fn test_cast_to_time_with_time_zone_should_not_work() -> Result<()> {

assert_eq!(
results.to_string(),
"This feature is not implemented: Unsupported SQL type Time(WithTimeZone)"
"This feature is not implemented: Unsupported SQL type Time(None, WithTimeZone)"
);

Ok(())
Expand Down Expand Up @@ -1612,7 +1612,7 @@ async fn test_cast_to_timetz_should_not_work() -> Result<()> {

assert_eq!(
results.to_string(),
"This feature is not implemented: Unsupported SQL type Time(Tz)"
"This feature is not implemented: Unsupported SQL type Time(None, Tz)"
);
Ok(())
}
Expand Down
2 changes: 1 addition & 1 deletion datafusion/expr/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -39,4 +39,4 @@ ahash = { version = "0.8", default-features = false, features = ["runtime-rng"]
arrow = { version = "27.0.0", default-features = false }
datafusion-common = { path = "../common", version = "14.0.0" }
log = "^0.4"
sqlparser = "0.26"
sqlparser = "0.27"
2 changes: 1 addition & 1 deletion datafusion/sql/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -40,4 +40,4 @@ unicode_expressions = []
arrow = { version = "27.0.0", default-features = false }
datafusion-common = { path = "../common", version = "14.0.0" }
datafusion-expr = { path = "../expr", version = "14.0.0" }
sqlparser = "0.26"
sqlparser = "0.27"
82 changes: 73 additions & 9 deletions datafusion/sql/src/planner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@ use std::sync::Arc;
use std::{convert::TryInto, vec};

use arrow::datatypes::*;
use sqlparser::ast::ExactNumberInfo;
use sqlparser::ast::TimezoneInfo;
use sqlparser::ast::{ArrayAgg, ExactNumberInfo, SetQuantifier};
use sqlparser::ast::{
BinaryOperator, DataType as SQLDataType, DateTimeField, Expr as SQLExpr, FunctionArg,
FunctionArgExpr, Ident, Join, JoinConstraint, JoinOperator, ObjectName,
Expand Down Expand Up @@ -460,8 +460,13 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
op,
left,
right,
all,
set_quantifier,
} => {
let all = match set_quantifier {

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sqlparser now distinguishes between ALL , DISTINCT and not specified (previously, not specified left all=false)

SetQuantifier::All => true,
SetQuantifier::Distinct | SetQuantifier::None => false,
};

let left_plan =
self.set_expr_to_plan(*left, None, ctes, outer_query_schema)?;
let right_plan =
Expand Down Expand Up @@ -2320,6 +2325,8 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {

SQLExpr::Subquery(subquery) => self.parse_scalar_subquery(&subquery, schema, ctes),

SQLExpr::ArrayAgg(array_agg) => self.parse_array_agg(array_agg, schema, ctes),

_ => Err(DataFusionError::NotImplemented(format!(
"Unsupported ast node in sqltorel: {:?}",
sql
Expand Down Expand Up @@ -2382,6 +2389,53 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
}))
}

fn parse_array_agg(
&self,
array_agg: ArrayAgg,
input_schema: &DFSchema,
ctes: &mut HashMap<String, LogicalPlan>,
) -> Result<Expr> {
// Some dialects have special syntax for array_agg. DataFusion only supports it like a function.
let ArrayAgg {
distinct,
expr,
order_by,
limit,
within_group,
} = array_agg;

if let Some(order_by) = order_by {

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

return Err(DataFusionError::NotImplemented(format!(
"ORDER BY not supported in ARRAY_AGG: {}",
order_by
)));
}

if let Some(limit) = limit {
return Err(DataFusionError::NotImplemented(format!(
"LIMIT not supported in ARRAY_AGG: {}",
limit
)));
}

if within_group {
return Err(DataFusionError::NotImplemented(
"WITHIN GROUP not supported in ARRAY_AGG".to_string(),
));
}

let args = vec![self.sql_expr_to_logical_expr(*expr, input_schema, ctes)?];
// next, aggregate built-ins
let fun = AggregateFunction::ArrayAgg;

Ok(Expr::AggregateFunction {
fun,
distinct,
args,
filter: None,
})
}

fn function_args_to_expr(
&self,
args: Vec<FunctionArg>,
Expand Down Expand Up @@ -2532,6 +2586,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
Value::SingleQuotedString(s) => s.to_string(),
Value::Number(_, _) | Value::Boolean(_) => v.to_string(),
Value::DoubleQuotedString(_)
| Value::UnQuotedString(_)
| Value::EscapedStringLiteral(_)
| Value::NationalStringLiteral(_)
| Value::HexStringLiteral(_)
Expand Down Expand Up @@ -2756,13 +2811,16 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {

fn convert_data_type(&self, sql_type: &SQLDataType) -> Result<DataType> {
match sql_type {
SQLDataType::Array(inner_sql_type) => {
SQLDataType::Array(Some(inner_sql_type)) => {
let data_type = self.convert_simple_data_type(inner_sql_type)?;

Ok(DataType::List(Box::new(Field::new(
"field", data_type, true,
))))
}
SQLDataType::Array(None) => Err(DataFusionError::NotImplemented(
"Arrays with unspecified type is not supported".to_string(),
)),
other => self.convert_simple_data_type(other),
}
}
Expand All @@ -2786,7 +2844,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
| SQLDataType::Varchar(_)
| SQLDataType::Text
| SQLDataType::String => Ok(DataType::Utf8),
SQLDataType::Timestamp(tz_info) => {
SQLDataType::Timestamp(None, tz_info) => {
let tz = if matches!(tz_info, TimezoneInfo::Tz)
|| matches!(tz_info, TimezoneInfo::WithTimeZone)
{
Expand Down Expand Up @@ -2816,7 +2874,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
Ok(DataType::Timestamp(TimeUnit::Nanosecond, tz))
}
SQLDataType::Date => Ok(DataType::Date32),
SQLDataType::Time(tz_info) => {
SQLDataType::Time(None, tz_info) => {
if matches!(tz_info, TimezoneInfo::None)
|| matches!(tz_info, TimezoneInfo::WithoutTimeZone)
{
Expand All @@ -2829,7 +2887,8 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
)))
}
}
SQLDataType::Decimal(exact_number_info) => {
SQLDataType::Numeric(exact_number_info)
|SQLDataType::Decimal(exact_number_info) => {
let (precision, scale) = match *exact_number_info {
ExactNumberInfo::None => (None, None),
ExactNumberInfo::Precision(precision) => (Some(precision), None),
Expand All @@ -2848,10 +2907,10 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
| SQLDataType::Binary(_)
| SQLDataType::Varbinary(_)
| SQLDataType::Blob(_)
| SQLDataType::Datetime
| SQLDataType::Datetime(_)
| SQLDataType::Interval
| SQLDataType::Regclass
| SQLDataType::Custom(_)
| SQLDataType::Custom(_, _)
| SQLDataType::Array(_)
| SQLDataType::Enum(_)
| SQLDataType::Set(_)
Expand All @@ -2861,7 +2920,12 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
| SQLDataType::CharacterVarying(_)
| SQLDataType::CharVarying(_)
| SQLDataType::CharacterLargeObject(_)
| SQLDataType::CharLargeObject(_)
| SQLDataType::CharLargeObject(_)
// precision is not supported
| SQLDataType::Timestamp(Some(_), _)
// precision is not supported
| SQLDataType::Time(Some(_), _)
| SQLDataType::Dec(_)
| SQLDataType::Clob(_) => Err(DataFusionError::NotImplemented(format!(
"Unsupported SQL type {:?}",
sql_type
Expand Down