From bcd5854916d9fc7421ef1fc6e3ef342a9968441a Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Thu, 10 Mar 2022 11:11:57 +0800 Subject: [PATCH 1/2] SPARK-38489: Aggregate.groupOnly support foldable expressions --- .../plans/logical/basicLogicalOperators.scala | 2 +- .../optimizer/AggregateOptimizeSuite.scala | 14 ++++++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala index 02d6a1d3cce76..5b601fbd5eed6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala @@ -1003,7 +1003,7 @@ case class Aggregate( groupingExpressions.nonEmpty && aggregateExpressions.map { case Alias(child, _) => child case e => e - }.forall(a => groupingExpressions.exists(g => a.semanticEquals(g))) + }.forall(a => a.foldable || groupingExpressions.exists(g => a.semanticEquals(g))) } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/AggregateOptimizeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/AggregateOptimizeSuite.scala index 7981dda495de4..f48ca5d69dd2e 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/AggregateOptimizeSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/AggregateOptimizeSuite.scala @@ -21,6 +21,7 @@ import org.apache.spark.sql.catalyst.analysis.AnalysisTest import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.expressions.Literal +import org.apache.spark.sql.catalyst.expressions.Literal.{FalseLiteral, TrueLiteral} import org.apache.spark.sql.catalyst.plans.{LeftOuter, RightOuter} import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, Distinct, LocalRelation, LogicalPlan} import org.apache.spark.sql.catalyst.rules.RuleExecutor @@ -148,4 +149,17 @@ class AggregateOptimizeSuite extends AnalysisTest { x.select("x.b".attr.as("newAlias1"), "x.b".attr.as("newAlias2")) .groupBy("newAlias1".attr, "newAlias2".attr)("newAlias1".attr, "newAlias2".attr).analyze) } + + test("SPARK-38489: Aggregate.groupOnly support foldable expressions") { + val x = testRelation.subquery('x) + val y = testRelation.subquery('y) + comparePlans( + Optimize.execute( + Distinct(x.join(y, LeftOuter, Some("x.a".attr === "y.a".attr)) + .select("x.b".attr, TrueLiteral, FalseLiteral.as("newAlias"))) + .analyze), + x.select("x.b".attr, TrueLiteral, FalseLiteral.as("newAlias")) + .groupBy("x.b".attr)("x.b".attr, TrueLiteral, FalseLiteral.as("newAlias")) + .analyze) + } } From eb2f241add3c1ef6ce1a0561049be545e6882ad7 Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Thu, 10 Mar 2022 15:51:20 +0800 Subject: [PATCH 2/2] Fix test error --- .../spark/sql/catalyst/optimizer/AggregateOptimizeSuite.scala | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/AggregateOptimizeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/AggregateOptimizeSuite.scala index f48ca5d69dd2e..1db04d2f5a7ce 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/AggregateOptimizeSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/AggregateOptimizeSuite.scala @@ -123,8 +123,7 @@ class AggregateOptimizeSuite extends AnalysisTest { Optimize.execute( x.join(y, LeftOuter, Some("x.a".attr === "y.a".attr)) .groupBy("x.a".attr)("x.a".attr, Literal(1)).analyze), - x.join(y, LeftOuter, Some("x.a".attr === "y.a".attr)) - .groupBy("x.a".attr)("x.a".attr, Literal(1)).analyze) + x.groupBy("x.a".attr)("x.a".attr, Literal(1)).analyze) } test("SPARK-37292: Removes outer join if it only has DISTINCT on streamed side with alias") {