From e0e3de5c19dd6de448a82e152d6c7246a7415df5 Mon Sep 17 00:00:00 2001 From: remzi <13716567376yh@gmail.com> Date: Sun, 25 Sep 2022 18:08:30 +0800 Subject: [PATCH 1/4] simpl when seprartor is null Signed-off-by: remzi <13716567376yh@gmail.com> --- .../optimizer/src/simplify_expressions.rs | 56 ++++++++++++++++++- 1 file changed, 53 insertions(+), 3 deletions(-) diff --git a/datafusion/optimizer/src/simplify_expressions.rs b/datafusion/optimizer/src/simplify_expressions.rs index 6ab0eb87c0a6a..f03af878e03a2 100644 --- a/datafusion/optimizer/src/simplify_expressions.rs +++ b/datafusion/optimizer/src/simplify_expressions.rs @@ -26,12 +26,11 @@ use arrow::record_batch::RecordBatch; use datafusion_common::{DFSchema, DFSchemaRef, DataFusionError, Result, ScalarValue}; use datafusion_expr::{ expr_fn::{and, or}, - expr_rewriter::RewriteRecursion, - expr_rewriter::{ExprRewritable, ExprRewriter}, + expr_rewriter::{ExprRewritable, ExprRewriter, RewriteRecursion}, lit, logical_plan::LogicalPlan, utils::from_plan, - ColumnarValue, Expr, ExprSchemable, Operator, Volatility, + BuiltinScalarFunction, ColumnarValue, Expr, ExprSchemable, Operator, Volatility, }; use datafusion_physical_expr::{create_physical_expr, execution_props::ExecutionProps}; use std::sync::Arc; @@ -816,6 +815,23 @@ impl<'a, S: SimplifyInfo> ExprRewriter for Simplifier<'a, S> { out_expr.rewrite(self)? } + // concat_ws + ScalarFunction { + fun: BuiltinScalarFunction::ConcatWithSeparator, + args, + } => { + match &args[..] { + [Expr::Literal(sp), ..] if sp.is_null() => { + Expr::Literal(ScalarValue::Utf8(None)) + } + // TODO https://github.com/apache/arrow-datafusion/issues/3599 + _ => ScalarFunction { + fun: BuiltinScalarFunction::ConcatWithSeparator, + args, + }, + } + } + // // Rules for Between // @@ -1132,6 +1148,40 @@ mod tests { assert_eq!(simplify(expr_plus), lit(2_i64)); } + #[test] + fn test_simplify_concat_ws_null_separator() { + fn build_concat_ws_expr(args: &[Expr]) -> Expr { + Expr::ScalarFunction { + fun: BuiltinScalarFunction::ConcatWithSeparator, + args: args.to_vec(), + } + } + + let null = Expr::Literal(ScalarValue::Utf8(None)); + // simple test + { + let expr = build_concat_ws_expr(&[null.clone(), col("c1"), col("c2")]); + assert_eq!(simplify(expr), null); + } + + // nested test + { + let sub_expr = build_concat_ws_expr(&[null.clone(), col("c1"), col("c2")]); + let expr = build_concat_ws_expr(&[lit("|"), sub_expr, col("c3")]); + assert_eq!( + simplify(expr), + build_concat_ws_expr(&[lit("|"), null.clone(), col("c3")]) + ); + } + + // nested test -- separator + { + let sub_expr = build_concat_ws_expr(&[null.clone(), col("c1"), col("c2")]); + let expr = build_concat_ws_expr(&[sub_expr, col("c3"), col("c4")]); + assert_eq!(simplify(expr), null.clone()); + } + } + // ------------------------------ // --- ConstEvaluator tests ----- // ------------------------------ From b055bf56d6680ed4fc22ad1dfe78bce3d2ed1015 Mon Sep 17 00:00:00 2001 From: remzi <13716567376yh@gmail.com> Date: Sun, 25 Sep 2022 18:32:55 +0800 Subject: [PATCH 2/4] fix clippy Signed-off-by: remzi <13716567376yh@gmail.com> --- datafusion/optimizer/src/simplify_expressions.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/optimizer/src/simplify_expressions.rs b/datafusion/optimizer/src/simplify_expressions.rs index f03af878e03a2..01b0058f72fcb 100644 --- a/datafusion/optimizer/src/simplify_expressions.rs +++ b/datafusion/optimizer/src/simplify_expressions.rs @@ -1178,7 +1178,7 @@ mod tests { { let sub_expr = build_concat_ws_expr(&[null.clone(), col("c1"), col("c2")]); let expr = build_concat_ws_expr(&[sub_expr, col("c3"), col("c4")]); - assert_eq!(simplify(expr), null.clone()); + assert_eq!(simplify(expr), null); } } From add7e10cd41e0466c1c56cc7a094dddccd046913 Mon Sep 17 00:00:00 2001 From: remzi <13716567376yh@gmail.com> Date: Sun, 25 Sep 2022 20:56:30 +0800 Subject: [PATCH 3/4] check nulls in other positions are not simplified to scalar null Signed-off-by: remzi <13716567376yh@gmail.com> --- datafusion/optimizer/src/simplify_expressions.rs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/datafusion/optimizer/src/simplify_expressions.rs b/datafusion/optimizer/src/simplify_expressions.rs index 01b0058f72fcb..2ecd7b09963b9 100644 --- a/datafusion/optimizer/src/simplify_expressions.rs +++ b/datafusion/optimizer/src/simplify_expressions.rs @@ -1164,6 +1164,13 @@ mod tests { assert_eq!(simplify(expr), null); } + // NULLs in other positions are not simplified. + { + let expr = build_concat_ws_expr(&[lit("|"), null.clone(), col("c2")]); + assert_eq!(simplify(expr.clone()), expr); + } + + // nested test { let sub_expr = build_concat_ws_expr(&[null.clone(), col("c1"), col("c2")]); From d14b01fb78f92d11067c908aefe134f2f657b7c0 Mon Sep 17 00:00:00 2001 From: remzi <13716567376yh@gmail.com> Date: Sun, 25 Sep 2022 21:29:22 +0800 Subject: [PATCH 4/4] fmt Signed-off-by: remzi <13716567376yh@gmail.com> --- datafusion/optimizer/src/simplify_expressions.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/datafusion/optimizer/src/simplify_expressions.rs b/datafusion/optimizer/src/simplify_expressions.rs index 2ecd7b09963b9..4aee2fd0378af 100644 --- a/datafusion/optimizer/src/simplify_expressions.rs +++ b/datafusion/optimizer/src/simplify_expressions.rs @@ -1166,11 +1166,10 @@ mod tests { // NULLs in other positions are not simplified. { - let expr = build_concat_ws_expr(&[lit("|"), null.clone(), col("c2")]); + let expr = build_concat_ws_expr(&[lit("|"), null.clone(), col("c2")]); assert_eq!(simplify(expr.clone()), expr); } - // nested test { let sub_expr = build_concat_ws_expr(&[null.clone(), col("c1"), col("c2")]);