From 1d5dd768dbb56a6e84bd0494c55423668895a0ff Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Tue, 12 Dec 2017 16:54:35 -0800 Subject: [PATCH 1/2] [SPARK-19809][SQL][TEST][FOLLOWUP] Move the test case to HiveOrcQuerySuite --- .../sql/hive/execution/SQLQuerySuite.scala | 17 ------------- .../sql/hive/orc/HiveOrcQuerySuite.scala | 24 +++++++++++++++++++ 2 files changed, 24 insertions(+), 17 deletions(-) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala index 93c91d3fcb727..f2562c33e2a6e 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala @@ -2172,21 +2172,4 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton { } } } - - test("SPARK-19809 NullPointerException on zero-size ORC file") { - Seq("native", "hive").foreach { orcImpl => - withSQLConf(SQLConf.ORC_IMPLEMENTATION.key -> orcImpl) { - withTempPath { dir => - withTable("spark_19809") { - sql(s"CREATE TABLE spark_19809(a int) STORED AS ORC LOCATION '$dir'") - Files.touch(new File(s"${dir.getCanonicalPath}", "zero.orc")) - - withSQLConf(HiveUtils.CONVERT_METASTORE_ORC.key -> "true") { // default since 2.3.0 - checkAnswer(sql("SELECT * FROM spark_19809"), Seq.empty) - } - } - } - } - } - } } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcQuerySuite.scala index 7244c369bd3f4..cc0b75e834639 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcQuerySuite.scala @@ -17,6 +17,10 @@ package org.apache.spark.sql.hive.orc +import java.io.File + +import com.google.common.io.Files + import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.catalog.HiveTableRelation import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, LogicalRelation} @@ -162,4 +166,24 @@ class HiveOrcQuerySuite extends OrcQueryTest with TestHiveSingleton { } } } + + // Since Hive 1.2.1 library code path still has this problem, users may hit this + // when spark.sql.hive.convertMetastoreOrc=false. However, after SPARK-22279, + // Apache Spark with the default configuration doesn't hit this bug. + test("SPARK-19809 NullPointerException on zero-size ORC file") { + Seq("native", "hive").foreach { orcImpl => + withSQLConf(SQLConf.ORC_IMPLEMENTATION.key -> orcImpl) { + withTempPath { dir => + withTable("spark_19809") { + sql(s"CREATE TABLE spark_19809(a int) STORED AS ORC LOCATION '$dir'") + Files.touch(new File(s"${dir.getCanonicalPath}", "zero.orc")) + + withSQLConf(HiveUtils.CONVERT_METASTORE_ORC.key -> "true") { // default since 2.3.0 + checkAnswer(spark.table("spark_19809"), Seq.empty) + } + } + } + } + } + } } From a32da5fdffd0c8d19d9d777864b48f810c0b149e Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Tue, 12 Dec 2017 18:53:32 -0800 Subject: [PATCH 2/2] Move the test case of SPARK-22267, too. --- .../sql/hive/execution/SQLQuerySuite.scala | 19 --------------- .../sql/hive/orc/HiveOrcQuerySuite.scala | 24 ++++++++++++++++++- 2 files changed, 23 insertions(+), 20 deletions(-) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala index f2562c33e2a6e..c11e37a516646 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala @@ -2153,23 +2153,4 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton { } } } - - test("SPARK-22267 Spark SQL incorrectly reads ORC files when column order is different") { - Seq("native", "hive").foreach { orcImpl => - withSQLConf(SQLConf.ORC_IMPLEMENTATION.key -> orcImpl) { - withTempPath { f => - val path = f.getCanonicalPath - Seq(1 -> 2).toDF("c1", "c2").write.orc(path) - checkAnswer(spark.read.orc(path), Row(1, 2)) - - withSQLConf(HiveUtils.CONVERT_METASTORE_ORC.key -> "true") { // default since 2.3.0 - withTable("t") { - sql(s"CREATE EXTERNAL TABLE t(c2 INT, c1 INT) STORED AS ORC LOCATION '$path'") - checkAnswer(spark.table("t"), Row(2, 1)) - } - } - } - } - } - } } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcQuerySuite.scala index cc0b75e834639..92b2f069cacd6 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcQuerySuite.scala @@ -21,7 +21,7 @@ import java.io.File import com.google.common.io.Files -import org.apache.spark.sql.AnalysisException +import org.apache.spark.sql.{AnalysisException, Row} import org.apache.spark.sql.catalyst.catalog.HiveTableRelation import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, LogicalRelation} import org.apache.spark.sql.execution.datasources.orc.OrcQueryTest @@ -167,6 +167,28 @@ class HiveOrcQuerySuite extends OrcQueryTest with TestHiveSingleton { } } + // Since Hive 1.2.1 library code path still has this problem, users may hit this + // when spark.sql.hive.convertMetastoreOrc=false. However, after SPARK-22279, + // Apache Spark with the default configuration doesn't hit this bug. + test("SPARK-22267 Spark SQL incorrectly reads ORC files when column order is different") { + Seq("native", "hive").foreach { orcImpl => + withSQLConf(SQLConf.ORC_IMPLEMENTATION.key -> orcImpl) { + withTempPath { f => + val path = f.getCanonicalPath + Seq(1 -> 2).toDF("c1", "c2").write.orc(path) + checkAnswer(spark.read.orc(path), Row(1, 2)) + + withSQLConf(HiveUtils.CONVERT_METASTORE_ORC.key -> "true") { // default since 2.3.0 + withTable("t") { + sql(s"CREATE EXTERNAL TABLE t(c2 INT, c1 INT) STORED AS ORC LOCATION '$path'") + checkAnswer(spark.table("t"), Row(2, 1)) + } + } + } + } + } + } + // Since Hive 1.2.1 library code path still has this problem, users may hit this // when spark.sql.hive.convertMetastoreOrc=false. However, after SPARK-22279, // Apache Spark with the default configuration doesn't hit this bug.