From 0fa21acd2614fbdf128561b34c72088008d62a05 Mon Sep 17 00:00:00 2001 From: gatorsmile Date: Tue, 8 Mar 2016 20:06:39 -0800 Subject: [PATCH 1/3] remove Project with an empty projectList --- .../apache/spark/sql/catalyst/optimizer/Optimizer.scala | 3 +++ .../spark/sql/catalyst/optimizer/ColumnPruningSuite.scala | 8 ++++++++ 2 files changed, 11 insertions(+) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala index deea7238f564c..9bf46c2a1b202 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala @@ -380,6 +380,9 @@ object ColumnPruning extends Rule[LogicalPlan] { p } + // Eliminate the Projects with empty projectList + case p @ Project(projectList, child) if projectList.isEmpty => child + // Can't prune the columns on LeafNode case p @ Project(_, l: LeafNode) => p diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ColumnPruningSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ColumnPruningSuite.scala index d09601e0343d7..0346a3ff48152 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ColumnPruningSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ColumnPruningSuite.scala @@ -157,6 +157,14 @@ class ColumnPruningSuite extends PlanTest { comparePlans(Optimize.execute(query), expected) } + test("Eliminate the Project with an empty projectList") { + val input = OneRowRelation + val query = + Project(Literal(1).as("1") :: Nil, Project(Literal(1).as("1") :: Nil, input)).analyze + val expected = Project(Literal(1).as("1") :: Nil, input).analyze + comparePlans(Optimize.execute(query), expected) + } + test("column pruning for group") { val testRelation = LocalRelation('a.int, 'b.int, 'c.int) val originalQuery = From a31b1b588949f2f92981f7d1a7d04d6e1806ccd1 Mon Sep 17 00:00:00 2001 From: gatorsmile Date: Tue, 8 Mar 2016 21:02:49 -0800 Subject: [PATCH 2/3] added two cases. --- .../catalyst/optimizer/ColumnPruningSuite.scala | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ColumnPruningSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ColumnPruningSuite.scala index 0346a3ff48152..409e92238e29f 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ColumnPruningSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ColumnPruningSuite.scala @@ -159,10 +159,18 @@ class ColumnPruningSuite extends PlanTest { test("Eliminate the Project with an empty projectList") { val input = OneRowRelation - val query = - Project(Literal(1).as("1") :: Nil, Project(Literal(1).as("1") :: Nil, input)).analyze val expected = Project(Literal(1).as("1") :: Nil, input).analyze - comparePlans(Optimize.execute(query), expected) + + val query1 = + Project(Literal(1).as("1") :: Nil, Project(Literal(1).as("1") :: Nil, input)).analyze + comparePlans(Optimize.execute(query1), expected) + + val query2 = + Project(Literal(1).as("1") :: Nil, Project(Nil, input)).analyze + comparePlans(Optimize.execute(query2), expected) + + // to make sure the top Project will not be removed. + comparePlans(Optimize.execute(expected), expected) } test("column pruning for group") { From 68decd1729eb7023dc1c24efa2e8fbef7011f698 Mon Sep 17 00:00:00 2001 From: gatorsmile Date: Wed, 9 Mar 2016 00:56:15 -0800 Subject: [PATCH 3/3] reorder it. --- .../apache/spark/sql/catalyst/optimizer/Optimizer.scala | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala index 9bf46c2a1b202..6eecbd716db66 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala @@ -380,15 +380,12 @@ object ColumnPruning extends Rule[LogicalPlan] { p } - // Eliminate the Projects with empty projectList - case p @ Project(projectList, child) if projectList.isEmpty => child + // Eliminate no-op Projects + case p @ Project(projectList, child) if sameOutput(child.output, p.output) => child // Can't prune the columns on LeafNode case p @ Project(_, l: LeafNode) => p - // Eliminate no-op Projects - case p @ Project(projectList, child) if sameOutput(child.output, p.output) => child - // for all other logical plans that inherits the output from it's children case p @ Project(_, child) => val required = child.references ++ p.references