From d6d33bf4831920e5a03c2e743ef5a6a7498f8c8b Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Tue, 3 Feb 2026 16:49:47 +0800 Subject: [PATCH] [SPARK-54881][SQL][FOLLOWUP] Extract simplifyNot method in BooleanSimplification This is a followup to #53658. Instead of recursively calling the entire `actualExprTransformer` to optimize new Not expressions created by De Morgan's Laws, this PR extracts a `simplifyNot` method that only handles Not simplification and calls itself recursively. This is more efficient because: - We only apply the Not-specific simplifications (not the And/Or factor elimination logic) - We avoid the overhead of tree pattern matching and pruning checks - The recursion is more direct and focused Co-authored-by: Cursor --- .../sql/catalyst/optimizer/expressions.scala | 23 +++++++++++-------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala index 98379241c366a..95969a947582a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala @@ -497,6 +497,15 @@ object BooleanSimplification extends Rule[LogicalPlan] with PredicateHelper { } } + case not: Not => simplifyNot(not) + } + + /** + * Simplify Not expressions. This method is extracted to allow recursive calls for + * Not(a Or b) and Not(a And b) cases, which avoids calling the entire actualExprTransformer + * and only focuses on Not simplification. + */ + private def simplifyNot(not: Not): Expression = not match { case Not(TrueLiteral) => FalseLiteral case Not(FalseLiteral) => TrueLiteral @@ -506,22 +515,18 @@ object BooleanSimplification extends Rule[LogicalPlan] with PredicateHelper { case Not(a LessThan b) => GreaterThanOrEqual(a, b) case Not(a LessThanOrEqual b) => GreaterThan(a, b) - // SPARK-54881: push down the NOT operators on children, before attaching the junction Node - // to the main tree. This ensures idempotency in an optimal way and avoids an extra rule - // iteration. + // De Morgan's Laws can create new Not expressions that need further simplification. case Not(a Or b) => - And(Not(a), Not(b)).transformDownWithPruning(_.containsPattern(NOT), ruleId) { - actualExprTransformer - } + And(simplifyNot(Not(a)), simplifyNot(Not(b))) case Not(a And b) => - Or(Not(a), Not(b)).transformDownWithPruning(_.containsPattern(NOT), ruleId) { - actualExprTransformer - } + Or(simplifyNot(Not(a)), simplifyNot(Not(b))) case Not(Not(e)) => e case Not(IsNull(e)) => IsNotNull(e) case Not(IsNotNull(e)) => IsNull(e) + + case _ => not } }