From 7bbd7702d42c7c818d558db5c0c5d77e7a10e56a Mon Sep 17 00:00:00 2001 From: Jia-Xuan Liu Date: Tue, 30 Jul 2024 21:50:56 +0800 Subject: [PATCH 1/6] use quoted_flat_name to format column --- datafusion/common/src/column.rs | 10 +++++- datafusion/expr/src/logical_plan/plan.rs | 41 +++++++++++++++++++++++- 2 files changed, 49 insertions(+), 2 deletions(-) diff --git a/datafusion/common/src/column.rs b/datafusion/common/src/column.rs index 2e2bfff403403..e521f00ba5dcc 100644 --- a/datafusion/common/src/column.rs +++ b/datafusion/common/src/column.rs @@ -372,7 +372,7 @@ impl FromStr for Column { impl fmt::Display for Column { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "{}", self.flat_name()) + write!(f, "{}", self.quoted_flat_name()) } } @@ -455,4 +455,12 @@ mod tests { Ok(()) } + + #[test] + fn test_display() { + let col = Column::new(Some("t1"), "a"); + assert_eq!(col.to_string(), "t1.a"); + let col = Column::new(TableReference::none(), "t1.a"); + assert_eq!(col.to_string(), r#""t1.a""#); + } } diff --git a/datafusion/expr/src/logical_plan/plan.rs b/datafusion/expr/src/logical_plan/plan.rs index 54c857a2b7013..505b00277de8b 100644 --- a/datafusion/expr/src/logical_plan/plan.rs +++ b/datafusion/expr/src/logical_plan/plan.rs @@ -2906,7 +2906,7 @@ mod tests { use super::*; use crate::builder::LogicalTableSource; use crate::logical_plan::table_scan; - use crate::{col, exists, in_subquery, lit, placeholder, GroupingSet}; + use crate::{col, exists, ident, in_subquery, lit, placeholder, GroupingSet}; use datafusion_common::tree_node::{TransformedResult, TreeNodeVisitor}; use datafusion_common::{not_impl_err, Constraint, ScalarValue}; @@ -3512,4 +3512,43 @@ digraph { let actual = format!("{}", plan.display_indent()); assert_eq!(expected.to_string(), actual) } + + #[test] + fn test_display_unqualifed_ident() { + let schema = Schema::new(vec![ + Field::new("max(id)", DataType::Int32, false), + Field::new("state", DataType::Utf8, false), + ]); + + let plan = table_scan(Some("t"), &schema, None) + .unwrap() + .filter(col("state").eq(lit("CO"))) + .unwrap() + .project(vec![col("max(id)")]) + .unwrap() + .build() + .unwrap(); + + let expected = "Projection: t.\"max(id)\"\n Filter: t.state = Utf8(\"CO\")\n TableScan: t"; + let actual = format!("{}", plan.display_indent()); + assert_eq!(expected.to_string(), actual); + + let schema = Schema::new(vec![ + Field::new("id", DataType::Int32, false), + Field::new("t.id", DataType::Int32, false), + ]); + + let plan = table_scan(Some("t"), &schema, None) + .unwrap() + .build() + .unwrap(); + let projection = LogicalPlan::Projection( + Projection::try_new(vec![col("t.id"), ident("t.id")], Arc::new(plan)) + .unwrap(), + ); + + let expected = "Projection: t.id, \"t.id\"\n TableScan: t"; + let actual = format!("{}", projection.display_indent()); + assert_eq!(expected.to_string(), actual); + } } From b191f3c314f0d02c0fbcf0ed775bc58d16f7bc30 Mon Sep 17 00:00:00 2001 From: Jia-Xuan Liu Date: Thu, 1 Aug 2024 00:12:10 +0800 Subject: [PATCH 2/6] fix window row_number --- datafusion/expr/src/logical_plan/plan.rs | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/datafusion/expr/src/logical_plan/plan.rs b/datafusion/expr/src/logical_plan/plan.rs index 505b00277de8b..7e5b54f7833a4 100644 --- a/datafusion/expr/src/logical_plan/plan.rs +++ b/datafusion/expr/src/logical_plan/plan.rs @@ -2752,11 +2752,9 @@ fn calc_func_dependencies_for_project( .iter() .filter_map(|expr| { let expr_name = match expr { - Expr::Alias(alias) => { - format!("{}", alias.expr) - } - _ => format!("{}", expr), - }; + Expr::Alias(alias) => alias.expr.display_name(), + _ => expr.display_name(), + }.ok()?; input_fields.iter().position(|item| *item == expr_name) }) .collect::>(); @@ -2912,6 +2910,7 @@ mod tests { use datafusion_common::{not_impl_err, Constraint, ScalarValue}; use crate::test::function_stub::count; + use crate::window_function::row_number; fn employee_schema() -> Schema { Schema::new(vec![ From 9fbc372734346918ac991bd1abf647e26f845e1b Mon Sep 17 00:00:00 2001 From: Jia-Xuan Liu Date: Thu, 1 Aug 2024 00:13:25 +0800 Subject: [PATCH 3/6] cargo fmt clippy --- datafusion/expr/src/logical_plan/plan.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/datafusion/expr/src/logical_plan/plan.rs b/datafusion/expr/src/logical_plan/plan.rs index 7e5b54f7833a4..84b8ea54a2875 100644 --- a/datafusion/expr/src/logical_plan/plan.rs +++ b/datafusion/expr/src/logical_plan/plan.rs @@ -2754,7 +2754,8 @@ fn calc_func_dependencies_for_project( let expr_name = match expr { Expr::Alias(alias) => alias.expr.display_name(), _ => expr.display_name(), - }.ok()?; + } + .ok()?; input_fields.iter().position(|item| *item == expr_name) }) .collect::>(); From f96fcc59da49ba0fca09a545ed71844358e618c2 Mon Sep 17 00:00:00 2001 From: Jia-Xuan Liu Date: Thu, 1 Aug 2024 00:24:36 +0800 Subject: [PATCH 4/6] fix cargo check --- datafusion/expr/src/logical_plan/plan.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/datafusion/expr/src/logical_plan/plan.rs b/datafusion/expr/src/logical_plan/plan.rs index 84b8ea54a2875..ce1f8e7d77902 100644 --- a/datafusion/expr/src/logical_plan/plan.rs +++ b/datafusion/expr/src/logical_plan/plan.rs @@ -2911,7 +2911,6 @@ mod tests { use datafusion_common::{not_impl_err, Constraint, ScalarValue}; use crate::test::function_stub::count; - use crate::window_function::row_number; fn employee_schema() -> Schema { Schema::new(vec![ From 94d55275ae2b22bb7b8430b516041c2c2d5d25fc Mon Sep 17 00:00:00 2001 From: Jia-Xuan Liu Date: Sat, 3 Aug 2024 18:24:34 +0800 Subject: [PATCH 5/6] quote the identifier if contains dot --- datafusion/common/src/column.rs | 49 +++++++++++++++++++++++- datafusion/expr/src/logical_plan/plan.rs | 3 +- 2 files changed, 50 insertions(+), 2 deletions(-) diff --git a/datafusion/common/src/column.rs b/datafusion/common/src/column.rs index e521f00ba5dcc..4f5c6b9dada89 100644 --- a/datafusion/common/src/column.rs +++ b/datafusion/common/src/column.rs @@ -18,6 +18,7 @@ //! Column use arrow_schema::{Field, FieldRef}; +use std::borrow::Cow; use crate::error::_schema_err; use crate::utils::{parse_identifiers_normalized, quote_identifier}; @@ -156,6 +157,17 @@ impl Column { } } + fn quoted_flat_name_if_contain_dot(&self) -> String { + match &self.relation { + Some(r) => format!( + "{}.{}", + table_reference_to_quoted_string(r), + quoted_if_contain_dot(&self.name) + ), + None => quoted_if_contain_dot(&self.name).to_string(), + } + } + /// Qualify column if not done yet. /// /// If this column already has a [relation](Self::relation), it will be returned as is and the given parameters are @@ -328,6 +340,37 @@ impl Column { } } +fn quoted_if_contain_dot(s: &str) -> Cow { + if s.contains(".") { + Cow::Owned(format!("\"{}\"", s.replace('"', "\"\""))) + } else { + Cow::Borrowed(s) + } +} + +fn table_reference_to_quoted_string(table_ref: &TableReference) -> String { + match table_ref { + TableReference::Bare { table } => quoted_if_contain_dot(table).to_string(), + TableReference::Partial { schema, table } => { + format!( + "{}.{}", + quoted_if_contain_dot(schema), + quoted_if_contain_dot(table) + ) + } + TableReference::Full { + catalog, + schema, + table, + } => format!( + "{}.{}.{}", + quoted_if_contain_dot(catalog), + quoted_if_contain_dot(schema), + quoted_if_contain_dot(table) + ), + } +} + impl From<&str> for Column { fn from(c: &str) -> Self { Self::from_qualified_name(c) @@ -372,7 +415,7 @@ impl FromStr for Column { impl fmt::Display for Column { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "{}", self.quoted_flat_name()) + write!(f, "{}", self.quoted_flat_name_if_contain_dot()) } } @@ -462,5 +505,9 @@ mod tests { assert_eq!(col.to_string(), "t1.a"); let col = Column::new(TableReference::none(), "t1.a"); assert_eq!(col.to_string(), r#""t1.a""#); + let col = Column::new(Some(TableReference::full("a.b", "c.d", "e.f")), "g.h"); + assert_eq!(col.to_string(), r#""a.b"."c.d"."e.f"."g.h""#); + let col = Column::new(TableReference::none(), "max(a)"); + assert_eq!(col.to_string(), "max(a)") } } diff --git a/datafusion/expr/src/logical_plan/plan.rs b/datafusion/expr/src/logical_plan/plan.rs index ce1f8e7d77902..bb934f26bc5c2 100644 --- a/datafusion/expr/src/logical_plan/plan.rs +++ b/datafusion/expr/src/logical_plan/plan.rs @@ -3528,7 +3528,8 @@ digraph { .build() .unwrap(); - let expected = "Projection: t.\"max(id)\"\n Filter: t.state = Utf8(\"CO\")\n TableScan: t"; + let expected = + "Projection: t.max(id)\n Filter: t.state = Utf8(\"CO\")\n TableScan: t"; let actual = format!("{}", plan.display_indent()); assert_eq!(expected.to_string(), actual); From f6d6a6f2eeeaba93a1b78df900f54694c3cd216a Mon Sep 17 00:00:00 2001 From: Jia-Xuan Liu Date: Sat, 3 Aug 2024 18:47:54 +0800 Subject: [PATCH 6/6] fix tests --- .../tests/user_defined/user_defined_scalar_functions.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs b/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs index 9164e89de8f9a..1af16c173eb63 100644 --- a/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs +++ b/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs @@ -388,9 +388,9 @@ async fn udaf_as_window_func() -> Result<()> { context.register_udaf(my_acc); let sql = "SELECT a, MY_ACC(b) OVER(PARTITION BY a) FROM my_table"; - let expected = r#"Projection: my_table.a, my_acc(my_table.b) PARTITION BY [my_table.a] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING - WindowAggr: windowExpr=[[my_acc(my_table.b) PARTITION BY [my_table.a] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]] - TableScan: my_table"#; + let expected = "Projection: my_table.a, \"my_acc(my_table.b) PARTITION BY [my_table.a] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING\"\ + \n WindowAggr: windowExpr=[[my_acc(my_table.b) PARTITION BY [my_table.a] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]\ + \n TableScan: my_table"; let dataframe = context.sql(sql).await.unwrap(); assert_eq!(format!("{:?}", dataframe.logical_plan()), expected);