From bcaaaaf0a14d8f00c003791fe2c67ff15353330e Mon Sep 17 00:00:00 2001 From: Mihailo Timotic Date: Fri, 11 Apr 2025 10:03:21 +0200 Subject: [PATCH 1/2] Revert "[SPARK-47895][SQL] group by alias should be idempotent" This reverts commit 70e0e20b985f7d497541826ea69ebd5c8c8c5c39. --- .../ResolveReferencesInAggregate.scala | 14 +------------- .../SubstituteUnresolvedOrdinalsSuite.scala | 18 ------------------ 2 files changed, 1 insertion(+), 31 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveReferencesInAggregate.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveReferencesInAggregate.scala index d1aee4ca2a9d1..7ea90854932e5 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveReferencesInAggregate.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveReferencesInAggregate.scala @@ -116,19 +116,7 @@ class ResolveReferencesInAggregate(val catalogManager: CatalogManager) extends S groupExprs.map { g => g.transformWithPruning(_.containsPattern(UNRESOLVED_ATTRIBUTE)) { case u: UnresolvedAttribute => - val (result, index) = - selectList.zipWithIndex.find(ne => conf.resolver(ne._1.name, u.name)) - .getOrElse((u, -1)) - - trimAliases(result) match { - // HACK ALERT: If the expanded grouping expression is an integer literal, don't use it - // but use an integer literal of the index. The reason is we may - // repeatedly analyze the plan, and the original integer literal may cause - // failures with a later GROUP BY ordinal resolution. GROUP BY constant is - // meaningless so whatever value does not matter here. - case IntegerLiteral(_) => Literal(index + 1) - case _ => result - } + selectList.find(ne => conf.resolver(ne.name, u.name)).getOrElse(u) } } } else { diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/SubstituteUnresolvedOrdinalsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/SubstituteUnresolvedOrdinalsSuite.scala index 106a607ba6df5..39cf298aec434 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/SubstituteUnresolvedOrdinalsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/SubstituteUnresolvedOrdinalsSuite.scala @@ -104,22 +104,4 @@ class SubstituteUnresolvedOrdinalsSuite extends AnalysisTest { testRelationWithData.groupBy(Literal(1))(Literal(100).as("a")) ) } - - test("SPARK-47895: group by alias repeated analysis") { - val plan = testRelation.groupBy($"b")(Literal(100).as("b")).analyze - comparePlans( - plan, - testRelation.groupBy(Literal(1))(Literal(100).as("b")) - ) - - val testRelationWithData = testRelation.copy(data = Seq(new GenericInternalRow(Array(1: Any)))) - // Copy the plan to reset its `analyzed` flag, so that analyzer rules will re-apply. - val copiedPlan = plan.transform { - case _: LocalRelation => testRelationWithData - } - comparePlans( - copiedPlan.analyze, // repeated analysis - testRelationWithData.groupBy(Literal(1))(Literal(100).as("b")) - ) - } } From 0ba5d24ec4ebadfbbe183086a60e90adab29bf3d Mon Sep 17 00:00:00 2001 From: Mihailo Timotic Date: Fri, 11 Apr 2025 15:08:37 +0200 Subject: [PATCH 2/2] add test --- .../sql-tests/analyzer-results/group-by-alias.sql.out | 7 +++++++ .../test/resources/sql-tests/inputs/group-by-alias.sql | 3 +++ .../resources/sql-tests/results/group-by-alias.sql.out | 10 ++++++++++ 3 files changed, 20 insertions(+) diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/group-by-alias.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/group-by-alias.sql.out index 00ec4319ad0fc..8a57acd50d48a 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/group-by-alias.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/group-by-alias.sql.out @@ -331,6 +331,13 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException } +-- !query +SELECT MAX(col1), 3 as abc FROM VALUES(1),(2),(3),(4) GROUP BY col1 % abc +-- !query analysis +Aggregate [(col1#x % 3)], [max(col1#x) AS max(col1)#x, 3 AS abc#x] ++- LocalRelation [col1#x] + + -- !query set spark.sql.groupByAliases=false -- !query analysis diff --git a/sql/core/src/test/resources/sql-tests/inputs/group-by-alias.sql b/sql/core/src/test/resources/sql-tests/inputs/group-by-alias.sql index 80a94b50bdb77..75afc82f998d3 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/group-by-alias.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/group-by-alias.sql @@ -43,6 +43,9 @@ SELECT a AS k, COUNT(non_existing) FROM testData GROUP BY k; -- Aggregate functions cannot be used in GROUP BY SELECT COUNT(b) AS k FROM testData GROUP BY k; +-- Ordinal is replaced correctly when grouping by alias of a literal +SELECT MAX(col1), 3 as abc FROM VALUES(1),(2),(3),(4) GROUP BY col1 % abc; + -- turn off group by aliases set spark.sql.groupByAliases=false; diff --git a/sql/core/src/test/resources/sql-tests/results/group-by-alias.sql.out b/sql/core/src/test/resources/sql-tests/results/group-by-alias.sql.out index 12cb5b3056c7f..fd740c88bc558 100644 --- a/sql/core/src/test/resources/sql-tests/results/group-by-alias.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/group-by-alias.sql.out @@ -277,6 +277,16 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException } +-- !query +SELECT MAX(col1), 3 as abc FROM VALUES(1),(2),(3),(4) GROUP BY col1 % abc +-- !query schema +struct +-- !query output +2 3 +3 3 +4 3 + + -- !query set spark.sql.groupByAliases=false -- !query schema