From 6853bc27abff40d70c6ab8da1e9c9c49815ea78f Mon Sep 17 00:00:00 2001 From: Peter Nguyen Date: Tue, 16 Sep 2025 10:38:45 -0700 Subject: [PATCH 1/5] Extend case simplify expr --- .../simplify_expressions/expr_simplifier.rs | 31 ++++++++++++++++--- 1 file changed, 27 insertions(+), 4 deletions(-) diff --git a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs index 97dfc09c4f2a6..6856d9c4570c6 100644 --- a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs +++ b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs @@ -1400,13 +1400,36 @@ impl TreeNodeRewriter for Simplifier<'_, S> { // // CASE WHEN true THEN A ... END --> A + // CASE WHEN X THEN A WHEN TRUE THEN B ... END --> CASE WHEN X THEN A ELSE B END Expr::Case(Case { expr: None, mut when_then_expr, - else_expr: _, - }) if !when_then_expr.is_empty() && is_true(when_then_expr[0].0.as_ref()) => { - let (_, then_) = when_then_expr.swap_remove(0); - Transformed::yes(*then_) + else_expr, + // if let guard is not stabilited so we can't use it yet: https://github.com/rust-lang/rust/issues/51114 + // Once it's supported we can avoid searching through when_then_expr twice in the below .any() and .position() calls + // }) if let Some(i) = when_then_expr.iter().position(|(when, _)| is_true(when.as_ref())) => { + }) if when_then_expr.iter().any(|(when, _)| is_true(when.as_ref())) => { + if let Some(i) = when_then_expr.iter().position(|(when, _)| is_true(when.as_ref())) { + let (_, then_) = when_then_expr.swap_remove(i); + // CASE WHEN true THEN A ... END --> A + if i == 0 { + return Ok(Transformed::yes(*then_)); + } + // CASE WHEN X THEN A WHEN TRUE THEN B ... END --> CASE WHEN X THEN A ELSE B END + let truncated_when_then_expr = when_then_expr[..i].to_vec(); + return Ok(Transformed::yes( + Expr::Case(Case { + expr: None, + when_then_expr: truncated_when_then_expr, + else_expr: Some(then_), + }) + )) + } + Transformed::no(Expr::Case(Case { + expr: None, + when_then_expr, + else_expr, + })) } // CASE From 613aa45088a5e81066b88948111e0a767b5e11eb Mon Sep 17 00:00:00 2001 From: Peter Nguyen Date: Tue, 16 Sep 2025 10:39:43 -0700 Subject: [PATCH 2/5] Add tests --- .../simplify_expressions/expr_simplifier.rs | 78 ++++++++++++++++++- 1 file changed, 77 insertions(+), 1 deletion(-) diff --git a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs index 6856d9c4570c6..343d57e0ea320 100644 --- a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs +++ b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs @@ -3586,7 +3586,7 @@ mod tests { } #[test] - fn simplify_expr_case_when_true() { + fn simplify_expr_case_when_first_true() { // CASE WHEN true THEN 1 ELSE x END --> 1 assert_eq!( simplify(Expr::Case(Case::new( @@ -3655,6 +3655,82 @@ mod tests { assert_eq!(simplify(expr.clone()), expr); } + #[test] + fn simplify_expr_case_when_any_true() { + // CASE WHEN x > 0 THEN a WHEN true THEN b ELSE c END --> CASE WHEN x > 0 THEN a ELSE b END + assert_eq!( + simplify(Expr::Case(Case::new( + None, + vec![ + (Box::new(col("x").gt(lit(0))), Box::new(col("a"))), + (Box::new(lit(true)), Box::new(col("b"))), + ], + Some(Box::new(col("c"))), + ))), + Expr::Case(Case::new( + None, + vec![(Box::new(col("x").gt(lit(0))), Box::new(col("a")))], + Some(Box::new(col("b"))), + )) + ); + + // CASE WHEN x > 0 THEN a WHEN y < 0 THEN b WHEN true THEN c WHEN z = 0 THEN d ELSE e END + // --> CASE WHEN x > 0 THEN a WHEN y < 0 THEN b ELSE c END + assert_eq!( + simplify(Expr::Case(Case::new( + None, + vec![ + (Box::new(col("x").gt(lit(0))), Box::new(col("a"))), + (Box::new(col("y").lt(lit(0))), Box::new(col("b"))), + (Box::new(lit(true)), Box::new(col("c"))), + (Box::new(col("z").eq(lit(0))), Box::new(col("d"))), + ], + Some(Box::new(col("e"))), + ))), + Expr::Case(Case::new( + None, + vec![ + (Box::new(col("x").gt(lit(0))), Box::new(col("a"))), + (Box::new(col("y").lt(lit(0))), Box::new(col("b"))), + ], + Some(Box::new(col("c"))), + )) + ); + + // CASE WHEN x > 0 THEN a WHEN y < 0 THEN b WHEN true THEN c END (no else) + // --> CASE WHEN x > 0 THEN a WHEN y < 0 THEN b ELSE c END + assert_eq!( + simplify(Expr::Case(Case::new( + None, + vec![ + (Box::new(col("x").gt(lit(0))), Box::new(col("a"))), + (Box::new(col("y").lt(lit(0))), Box::new(col("b"))), + (Box::new(lit(true)), Box::new(col("c"))), + ], + None, + ))), + Expr::Case(Case::new( + None, + vec![ + (Box::new(col("x").gt(lit(0))), Box::new(col("a"))), + (Box::new(col("y").lt(lit(0))), Box::new(col("b"))), + ], + Some(Box::new(col("c"))), + )) + ); + + // Negative test: CASE WHEN x > 0 THEN a WHEN y < 0 THEN b ELSE c END should not be simplified + let expr = Expr::Case(Case::new( + None, + vec![ + (Box::new(col("x").gt(lit(0))), Box::new(col("a"))), + (Box::new(col("y").lt(lit(0))), Box::new(col("b"))), + ], + Some(Box::new(col("c"))), + )); + assert_eq!(simplify(expr.clone()), expr); + } + fn distinct_from(left: impl Into, right: impl Into) -> Expr { Expr::BinaryExpr(BinaryExpr { left: Box::new(left.into()), From 293a0d4b85cbf032e432c3cf39bbd8a32e277b62 Mon Sep 17 00:00:00 2001 From: Peter Nguyen Date: Tue, 16 Sep 2025 10:49:42 -0700 Subject: [PATCH 3/5] cargo fmt --- .../simplify_expressions/expr_simplifier.rs | 28 +++++++++++-------- 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs index 343d57e0ea320..3d9d17f0a0f03 100644 --- a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs +++ b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs @@ -1405,11 +1405,17 @@ impl TreeNodeRewriter for Simplifier<'_, S> { expr: None, mut when_then_expr, else_expr, - // if let guard is not stabilited so we can't use it yet: https://github.com/rust-lang/rust/issues/51114 - // Once it's supported we can avoid searching through when_then_expr twice in the below .any() and .position() calls - // }) if let Some(i) = when_then_expr.iter().position(|(when, _)| is_true(when.as_ref())) => { - }) if when_then_expr.iter().any(|(when, _)| is_true(when.as_ref())) => { - if let Some(i) = when_then_expr.iter().position(|(when, _)| is_true(when.as_ref())) { + // if let guard is not stabilized so we can't use it yet: https://github.com/rust-lang/rust/issues/51114 + // Once it's supported we can avoid searching through when_then_expr twice in the below .any() and .position() calls + // }) if let Some(i) = when_then_expr.iter().position(|(when, _)| is_true(when.as_ref())) => { + }) if when_then_expr + .iter() + .any(|(when, _)| is_true(when.as_ref())) => + { + if let Some(i) = when_then_expr + .iter() + .position(|(when, _)| is_true(when.as_ref())) + { let (_, then_) = when_then_expr.swap_remove(i); // CASE WHEN true THEN A ... END --> A if i == 0 { @@ -1417,13 +1423,11 @@ impl TreeNodeRewriter for Simplifier<'_, S> { } // CASE WHEN X THEN A WHEN TRUE THEN B ... END --> CASE WHEN X THEN A ELSE B END let truncated_when_then_expr = when_then_expr[..i].to_vec(); - return Ok(Transformed::yes( - Expr::Case(Case { - expr: None, - when_then_expr: truncated_when_then_expr, - else_expr: Some(then_), - }) - )) + return Ok(Transformed::yes(Expr::Case(Case { + expr: None, + when_then_expr: truncated_when_then_expr, + else_expr: Some(then_), + }))); } Transformed::no(Expr::Case(Case { expr: None, From 23c1a317f146e9c8adb6984632c4e2955a4b0dfe Mon Sep 17 00:00:00 2001 From: Peter Nguyen Date: Tue, 16 Sep 2025 17:22:50 -0700 Subject: [PATCH 4/5] Remove copying vector based on PR feedback --- .../optimizer/src/simplify_expressions/expr_simplifier.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs index 3d9d17f0a0f03..38b9f374e90bd 100644 --- a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs +++ b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs @@ -1422,10 +1422,10 @@ impl TreeNodeRewriter for Simplifier<'_, S> { return Ok(Transformed::yes(*then_)); } // CASE WHEN X THEN A WHEN TRUE THEN B ... END --> CASE WHEN X THEN A ELSE B END - let truncated_when_then_expr = when_then_expr[..i].to_vec(); + when_then_expr.truncate(i); return Ok(Transformed::yes(Expr::Case(Case { expr: None, - when_then_expr: truncated_when_then_expr, + when_then_expr, else_expr: Some(then_), }))); } From a2e60468315ff7f66dc0188a2d63723e1058e0db Mon Sep 17 00:00:00 2001 From: Peter Nguyen Date: Tue, 16 Sep 2025 23:22:33 -0700 Subject: [PATCH 5/5] Remove unnecessary if conditional (pr feedback) --- .../simplify_expressions/expr_simplifier.rs | 29 ++++++++----------- 1 file changed, 12 insertions(+), 17 deletions(-) diff --git a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs index 38b9f374e90bd..3c96f953f0000 100644 --- a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs +++ b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs @@ -1404,7 +1404,7 @@ impl TreeNodeRewriter for Simplifier<'_, S> { Expr::Case(Case { expr: None, mut when_then_expr, - else_expr, + else_expr: _, // if let guard is not stabilized so we can't use it yet: https://github.com/rust-lang/rust/issues/51114 // Once it's supported we can avoid searching through when_then_expr twice in the below .any() and .position() calls // }) if let Some(i) = when_then_expr.iter().position(|(when, _)| is_true(when.as_ref())) => { @@ -1412,27 +1412,22 @@ impl TreeNodeRewriter for Simplifier<'_, S> { .iter() .any(|(when, _)| is_true(when.as_ref())) => { - if let Some(i) = when_then_expr + let i = when_then_expr .iter() .position(|(when, _)| is_true(when.as_ref())) - { - let (_, then_) = when_then_expr.swap_remove(i); - // CASE WHEN true THEN A ... END --> A - if i == 0 { - return Ok(Transformed::yes(*then_)); - } - // CASE WHEN X THEN A WHEN TRUE THEN B ... END --> CASE WHEN X THEN A ELSE B END - when_then_expr.truncate(i); - return Ok(Transformed::yes(Expr::Case(Case { - expr: None, - when_then_expr, - else_expr: Some(then_), - }))); + .unwrap(); + let (_, then_) = when_then_expr.swap_remove(i); + // CASE WHEN true THEN A ... END --> A + if i == 0 { + return Ok(Transformed::yes(*then_)); } - Transformed::no(Expr::Case(Case { + + // CASE WHEN X THEN A WHEN TRUE THEN B ... END --> CASE WHEN X THEN A ELSE B END + when_then_expr.truncate(i); + Transformed::yes(Expr::Case(Case { expr: None, when_then_expr, - else_expr, + else_expr: Some(then_), })) }