From 8f8c5da5b2ff80af78fdf9b7c942e3c5c5d09fd2 Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Thu, 24 Nov 2016 21:14:47 +0800 Subject: [PATCH 1/4] make sure Spark can access the table metadata created by older version of spark --- ...iveExternalCatalogCompatibilitySuite.scala | 212 ++++++++++++++++++ .../sql/hive/MetastoreDataSourcesSuite.scala | 43 ---- 2 files changed, 212 insertions(+), 43 deletions(-) create mode 100644 sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogCompatibilitySuite.scala diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogCompatibilitySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogCompatibilitySuite.scala new file mode 100644 index 0000000000000..dbddedf0c1542 --- /dev/null +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogCompatibilitySuite.scala @@ -0,0 +1,212 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.hive + +import java.net.URI + +import org.apache.hadoop.fs.Path + +import org.apache.spark.sql.QueryTest +import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable, CatalogTableType} +import org.apache.spark.sql.hive.client.HiveClient +import org.apache.spark.sql.hive.test.TestHiveSingleton +import org.apache.spark.sql.types.StructType +import org.apache.spark.util.Utils + + +class HiveExternalCatalogCompatibilitySuite extends QueryTest with TestHiveSingleton { + + // To test `HiveExternalCatalog`, we need to read/write the raw table meta from/to hive client. + val hiveClient: HiveClient = + spark.sharedState.externalCatalog.asInstanceOf[HiveExternalCatalog].client + + val tempDir = Utils.createTempDir().getCanonicalFile + + override def beforeAll(): Unit = { + for ((tbl, _, _) <- rawTablesAndExpectations) { + hiveClient.createTable(tbl, ignoreIfExists = false) + } + } + + override def afterAll(): Unit = { + Utils.deleteRecursively(tempDir) + for (i <- 1 to rawTablesAndExpectations.length) { + hiveClient.dropTable("default", s"tbl$i", ignoreIfNotExists = true, purge = false) + } + } + + + // Raw table metadata that are dumped from tables created by Spark 2.0 + val simpleSchema = new StructType().add("i", "int") + val partitionedSchema = new StructType().add("i", "int").add("j", "int") + + val hiveTable = CatalogTable( + identifier = TableIdentifier("tbl1", Some("default")), + tableType = CatalogTableType.MANAGED, + storage = CatalogStorageFormat.empty.copy( + inputFormat = Some("org.apache.hadoop.mapred.TextInputFormat"), + outputFormat = Some("org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat")), + schema = simpleSchema) + + val externalHiveTable = CatalogTable( + identifier = TableIdentifier("tbl2", Some("default")), + tableType = CatalogTableType.EXTERNAL, + storage = CatalogStorageFormat.empty.copy( + locationUri = Some(tempDir.getCanonicalPath), + inputFormat = Some("org.apache.hadoop.mapred.TextInputFormat"), + outputFormat = Some("org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat")), + schema = simpleSchema) + + val partitionedHiveTable = CatalogTable( + identifier = TableIdentifier("tbl3", Some("default")), + tableType = CatalogTableType.MANAGED, + storage = CatalogStorageFormat.empty.copy( + inputFormat = Some("org.apache.hadoop.mapred.TextInputFormat"), + outputFormat = Some("org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat")), + schema = partitionedSchema, + partitionColumnNames = Seq("j")) + + + val simpleSchemaJson = + """ + |{ + | "type": "struct", + | "fields": [{ + | "name": "i", + | "type": "integer", + | "nullable": true, + | "metadata": {} + | }] + |} + """.stripMargin + + val partitionedSchemaJson = + """ + |{ + | "type": "struct", + | "fields": [{ + | "name": "i", + | "type": "integer", + | "nullable": true, + | "metadata": {} + | }, + | { + | "name": "j", + | "type": "integer", + | "nullable": true, + | "metadata": {} + | }] + |} + """.stripMargin + + def defaultTablePath(tableName: String): String = { + spark.sessionState.catalog.defaultTablePath(TableIdentifier(tableName)) + } + + val dataSourceTable = CatalogTable( + identifier = TableIdentifier("tbl4", Some("default")), + tableType = CatalogTableType.MANAGED, + storage = CatalogStorageFormat.empty.copy(properties = Map("path" -> defaultTablePath("tbl4"))), + schema = new StructType(), + properties = Map( + "spark.sql.sources.provider" -> "json", + "spark.sql.sources.schema.numParts" -> "1", + "spark.sql.sources.schema.part.0" -> simpleSchemaJson)) + + val hiveCompatibleDataSourceTable = CatalogTable( + identifier = TableIdentifier("tbl5", Some("default")), + tableType = CatalogTableType.MANAGED, + storage = CatalogStorageFormat.empty.copy(properties = Map("path" -> defaultTablePath("tbl5"))), + schema = simpleSchema, + properties = Map( + "spark.sql.sources.provider" -> "parquet", + "spark.sql.sources.schema.numParts" -> "1", + "spark.sql.sources.schema.part.0" -> simpleSchemaJson)) + + val partitionedDataSourceTable = CatalogTable( + identifier = TableIdentifier("tbl6", Some("default")), + tableType = CatalogTableType.MANAGED, + storage = CatalogStorageFormat.empty.copy(properties = Map("path" -> defaultTablePath("tbl6"))), + schema = new StructType(), + properties = Map( + "spark.sql.sources.provider" -> "json", + "spark.sql.sources.schema.numParts" -> "1", + "spark.sql.sources.schema.part.0" -> partitionedSchemaJson, + "spark.sql.sources.schema.numPartCols" -> "1", + "spark.sql.sources.schema.partCol.0" -> "j")) + + val externalDataSourceTable = CatalogTable( + identifier = TableIdentifier("tbl7", Some("default")), + tableType = CatalogTableType.EXTERNAL, + storage = CatalogStorageFormat.empty.copy( + locationUri = Some(new Path(defaultTablePath("tbl7"), "-__PLACEHOLDER__").toString), + properties = Map("path" -> tempDir.getAbsolutePath)), + schema = new StructType(), + properties = Map( + "spark.sql.sources.provider" -> "json", + "spark.sql.sources.schema.numParts" -> "1", + "spark.sql.sources.schema.part.0" -> simpleSchemaJson)) + + val hiveCompatibleExternalDataSourceTable = CatalogTable( + identifier = TableIdentifier("tbl8", Some("default")), + tableType = CatalogTableType.EXTERNAL, + storage = CatalogStorageFormat.empty.copy( + locationUri = Some(tempDir.getAbsolutePath), + properties = Map("path" -> tempDir.getAbsolutePath)), + schema = simpleSchema, + properties = Map( + "spark.sql.sources.provider" -> "parquet", + "spark.sql.sources.schema.numParts" -> "1", + "spark.sql.sources.schema.part.0" -> simpleSchemaJson)) + + val dataSourceTableWithoutSchema = CatalogTable( + identifier = TableIdentifier("tbl9", Some("default")), + tableType = CatalogTableType.EXTERNAL, + storage = CatalogStorageFormat.empty.copy( + locationUri = Some(new Path(defaultTablePath("tbl9"), "-__PLACEHOLDER__").toString), + properties = Map("path" -> tempDir.getAbsolutePath)), + schema = new StructType(), + properties = Map("spark.sql.sources.provider" -> "json")) + + // A list of all raw tables we want to test, with their expected schema and table location. + val rawTablesAndExpectations = Seq( + (hiveTable, simpleSchema, None), + (externalHiveTable, simpleSchema, Some(tempDir.getCanonicalPath)), + (partitionedHiveTable, partitionedSchema, None), + (dataSourceTable, simpleSchema, None), + (hiveCompatibleDataSourceTable, simpleSchema, None), + (partitionedDataSourceTable, partitionedSchema, None), + (externalDataSourceTable, simpleSchema, Some(tempDir.getCanonicalPath)), + (hiveCompatibleExternalDataSourceTable, simpleSchema, Some(tempDir.getCanonicalPath)), + (dataSourceTableWithoutSchema, new StructType(), None)) + + test("make sure we can read table created by old version of Spark") { + for ((tbl, expectedSchema, expectedLocation) <- rawTablesAndExpectations) { + val readBack = spark.sharedState.externalCatalog.getTable( + tbl.identifier.database.get, tbl.identifier.table) + + assert(readBack.schema == expectedSchema) + expectedLocation.foreach { loc => + // trim the URI prefix + val tableLocation = new URI(readBack.storage.locationUri.get).getPath + assert(tableLocation == loc) + } + } + } +} diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala index c7cc75fbc8a07..a45f4b5d6376c 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala @@ -1370,47 +1370,4 @@ class MetastoreDataSourcesSuite extends QueryTest with SQLTestUtils with TestHiv sparkSession.sparkContext.conf.set(DEBUG_MODE, previousValue) } } - - test("SPARK-17470: support old table that stores table location in storage properties") { - withTable("old") { - withTempPath { path => - Seq(1 -> "a").toDF("i", "j").write.parquet(path.getAbsolutePath) - val tableDesc = CatalogTable( - identifier = TableIdentifier("old", Some("default")), - tableType = CatalogTableType.EXTERNAL, - storage = CatalogStorageFormat.empty.copy( - properties = Map("path" -> path.getAbsolutePath) - ), - schema = new StructType(), - properties = Map( - HiveExternalCatalog.DATASOURCE_PROVIDER -> "parquet", - HiveExternalCatalog.DATASOURCE_SCHEMA -> - new StructType().add("i", "int").add("j", "string").json)) - hiveClient.createTable(tableDesc, ignoreIfExists = false) - checkAnswer(spark.table("old"), Row(1, "a")) - } - } - } - - test("SPARK-18464: support old table which doesn't store schema in table properties") { - withTable("old") { - withTempPath { path => - Seq(1 -> "a").toDF("i", "j").write.parquet(path.getAbsolutePath) - val tableDesc = CatalogTable( - identifier = TableIdentifier("old", Some("default")), - tableType = CatalogTableType.EXTERNAL, - storage = CatalogStorageFormat.empty.copy( - properties = Map("path" -> path.getAbsolutePath) - ), - schema = new StructType(), - properties = Map( - HiveExternalCatalog.DATASOURCE_PROVIDER -> "parquet")) - hiveClient.createTable(tableDesc, ignoreIfExists = false) - - checkAnswer(spark.table("old"), Row(1, "a")) - - checkAnswer(sql("DESC old"), Row("i", "int", null) :: Row("j", "string", null) :: Nil) - } - } - } } From 6f785e3685ea80784ed7c38dbd29f5976d257f20 Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Fri, 25 Nov 2016 13:19:38 +0800 Subject: [PATCH 2/4] address comment --- ...cala => HiveExternalCatalogBackwardCompatibilitySuite.scala} | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename sql/hive/src/test/scala/org/apache/spark/sql/hive/{HiveExternalCatalogCompatibilitySuite.scala => HiveExternalCatalogBackwardCompatibilitySuite.scala} (98%) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogCompatibilitySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogBackwardCompatibilitySuite.scala similarity index 98% rename from sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogCompatibilitySuite.scala rename to sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogBackwardCompatibilitySuite.scala index dbddedf0c1542..c22d6a9a67c7c 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogCompatibilitySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogBackwardCompatibilitySuite.scala @@ -30,7 +30,7 @@ import org.apache.spark.sql.types.StructType import org.apache.spark.util.Utils -class HiveExternalCatalogCompatibilitySuite extends QueryTest with TestHiveSingleton { +class HiveExternalCatalogBackwardCompatibilitySuite extends QueryTest with TestHiveSingleton { // To test `HiveExternalCatalog`, we need to read/write the raw table meta from/to hive client. val hiveClient: HiveClient = From a42b8b950d49e634b29c0f018dad321a059997b5 Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Sun, 27 Nov 2016 15:08:38 +0800 Subject: [PATCH 3/4] address comments --- ...nalCatalogBackwardCompatibilitySuite.scala | 141 +++++++++++------- 1 file changed, 90 insertions(+), 51 deletions(-) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogBackwardCompatibilitySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogBackwardCompatibilitySuite.scala index c22d6a9a67c7c..60f9f51e5c17c 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogBackwardCompatibilitySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogBackwardCompatibilitySuite.scala @@ -19,18 +19,20 @@ package org.apache.spark.sql.hive import java.net.URI -import org.apache.hadoop.fs.Path +import org.scalatest.BeforeAndAfterEach import org.apache.spark.sql.QueryTest import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable, CatalogTableType} import org.apache.spark.sql.hive.client.HiveClient import org.apache.spark.sql.hive.test.TestHiveSingleton +import org.apache.spark.sql.test.SQLTestUtils import org.apache.spark.sql.types.StructType import org.apache.spark.util.Utils -class HiveExternalCatalogBackwardCompatibilitySuite extends QueryTest with TestHiveSingleton { +class HiveExternalCatalogBackwardCompatibilitySuite extends QueryTest + with SQLTestUtils with TestHiveSingleton with BeforeAndAfterEach { // To test `HiveExternalCatalog`, we need to read/write the raw table meta from/to hive client. val hiveClient: HiveClient = @@ -38,43 +40,51 @@ class HiveExternalCatalogBackwardCompatibilitySuite extends QueryTest with TestH val tempDir = Utils.createTempDir().getCanonicalFile - override def beforeAll(): Unit = { - for ((tbl, _, _) <- rawTablesAndExpectations) { + override def beforeEach(): Unit = { + sql("CREATE DATABASE test_db") + for ((tbl, _) <- rawTablesAndExpectations) { hiveClient.createTable(tbl, ignoreIfExists = false) } } - override def afterAll(): Unit = { + override def afterEach(): Unit = { Utils.deleteRecursively(tempDir) - for (i <- 1 to rawTablesAndExpectations.length) { - hiveClient.dropTable("default", s"tbl$i", ignoreIfNotExists = true, purge = false) - } + hiveClient.dropDatabase("test_db", ignoreIfNotExists = false, cascade = true) + } + + private def getTableMetadata(tableName: String): CatalogTable = { + spark.sharedState.externalCatalog.getTable("test_db", tableName) } + private def defaultTablePath(tableName: String): String = { + spark.sessionState.catalog.defaultTablePath(TableIdentifier(tableName, Some("test_db"))) + } - // Raw table metadata that are dumped from tables created by Spark 2.0 + + // Raw table metadata that are dumped from tables created by Spark 2.0. Note that, all spark + // versions prior to 2.1 would generate same raw table metadata for a specific table. val simpleSchema = new StructType().add("i", "int") val partitionedSchema = new StructType().add("i", "int").add("j", "int") - val hiveTable = CatalogTable( - identifier = TableIdentifier("tbl1", Some("default")), + lazy val hiveTable = CatalogTable( + identifier = TableIdentifier("tbl1", Some("test_db")), tableType = CatalogTableType.MANAGED, storage = CatalogStorageFormat.empty.copy( inputFormat = Some("org.apache.hadoop.mapred.TextInputFormat"), outputFormat = Some("org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat")), schema = simpleSchema) - val externalHiveTable = CatalogTable( - identifier = TableIdentifier("tbl2", Some("default")), + lazy val externalHiveTable = CatalogTable( + identifier = TableIdentifier("tbl2", Some("test_db")), tableType = CatalogTableType.EXTERNAL, storage = CatalogStorageFormat.empty.copy( - locationUri = Some(tempDir.getCanonicalPath), + locationUri = Some(tempDir.getAbsolutePath), inputFormat = Some("org.apache.hadoop.mapred.TextInputFormat"), outputFormat = Some("org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat")), schema = simpleSchema) - val partitionedHiveTable = CatalogTable( - identifier = TableIdentifier("tbl3", Some("default")), + lazy val partitionedHiveTable = CatalogTable( + identifier = TableIdentifier("tbl3", Some("test_db")), tableType = CatalogTableType.MANAGED, storage = CatalogStorageFormat.empty.copy( inputFormat = Some("org.apache.hadoop.mapred.TextInputFormat"), @@ -115,12 +125,8 @@ class HiveExternalCatalogBackwardCompatibilitySuite extends QueryTest with TestH |} """.stripMargin - def defaultTablePath(tableName: String): String = { - spark.sessionState.catalog.defaultTablePath(TableIdentifier(tableName)) - } - - val dataSourceTable = CatalogTable( - identifier = TableIdentifier("tbl4", Some("default")), + lazy val dataSourceTable = CatalogTable( + identifier = TableIdentifier("tbl4", Some("test_db")), tableType = CatalogTableType.MANAGED, storage = CatalogStorageFormat.empty.copy(properties = Map("path" -> defaultTablePath("tbl4"))), schema = new StructType(), @@ -129,8 +135,8 @@ class HiveExternalCatalogBackwardCompatibilitySuite extends QueryTest with TestH "spark.sql.sources.schema.numParts" -> "1", "spark.sql.sources.schema.part.0" -> simpleSchemaJson)) - val hiveCompatibleDataSourceTable = CatalogTable( - identifier = TableIdentifier("tbl5", Some("default")), + lazy val hiveCompatibleDataSourceTable = CatalogTable( + identifier = TableIdentifier("tbl5", Some("test_db")), tableType = CatalogTableType.MANAGED, storage = CatalogStorageFormat.empty.copy(properties = Map("path" -> defaultTablePath("tbl5"))), schema = simpleSchema, @@ -139,8 +145,8 @@ class HiveExternalCatalogBackwardCompatibilitySuite extends QueryTest with TestH "spark.sql.sources.schema.numParts" -> "1", "spark.sql.sources.schema.part.0" -> simpleSchemaJson)) - val partitionedDataSourceTable = CatalogTable( - identifier = TableIdentifier("tbl6", Some("default")), + lazy val partitionedDataSourceTable = CatalogTable( + identifier = TableIdentifier("tbl6", Some("test_db")), tableType = CatalogTableType.MANAGED, storage = CatalogStorageFormat.empty.copy(properties = Map("path" -> defaultTablePath("tbl6"))), schema = new StructType(), @@ -151,11 +157,11 @@ class HiveExternalCatalogBackwardCompatibilitySuite extends QueryTest with TestH "spark.sql.sources.schema.numPartCols" -> "1", "spark.sql.sources.schema.partCol.0" -> "j")) - val externalDataSourceTable = CatalogTable( - identifier = TableIdentifier("tbl7", Some("default")), + lazy val externalDataSourceTable = CatalogTable( + identifier = TableIdentifier("tbl7", Some("test_db")), tableType = CatalogTableType.EXTERNAL, storage = CatalogStorageFormat.empty.copy( - locationUri = Some(new Path(defaultTablePath("tbl7"), "-__PLACEHOLDER__").toString), + locationUri = Some(defaultTablePath("tbl7") + "-__PLACEHOLDER__"), properties = Map("path" -> tempDir.getAbsolutePath)), schema = new StructType(), properties = Map( @@ -163,8 +169,8 @@ class HiveExternalCatalogBackwardCompatibilitySuite extends QueryTest with TestH "spark.sql.sources.schema.numParts" -> "1", "spark.sql.sources.schema.part.0" -> simpleSchemaJson)) - val hiveCompatibleExternalDataSourceTable = CatalogTable( - identifier = TableIdentifier("tbl8", Some("default")), + lazy val hiveCompatibleExternalDataSourceTable = CatalogTable( + identifier = TableIdentifier("tbl8", Some("test_db")), tableType = CatalogTableType.EXTERNAL, storage = CatalogStorageFormat.empty.copy( locationUri = Some(tempDir.getAbsolutePath), @@ -175,38 +181,71 @@ class HiveExternalCatalogBackwardCompatibilitySuite extends QueryTest with TestH "spark.sql.sources.schema.numParts" -> "1", "spark.sql.sources.schema.part.0" -> simpleSchemaJson)) - val dataSourceTableWithoutSchema = CatalogTable( - identifier = TableIdentifier("tbl9", Some("default")), + lazy val dataSourceTableWithoutSchema = CatalogTable( + identifier = TableIdentifier("tbl9", Some("test_db")), tableType = CatalogTableType.EXTERNAL, storage = CatalogStorageFormat.empty.copy( - locationUri = Some(new Path(defaultTablePath("tbl9"), "-__PLACEHOLDER__").toString), + locationUri = Some(defaultTablePath("tbl9") + "-__PLACEHOLDER__"), properties = Map("path" -> tempDir.getAbsolutePath)), schema = new StructType(), properties = Map("spark.sql.sources.provider" -> "json")) - // A list of all raw tables we want to test, with their expected schema and table location. - val rawTablesAndExpectations = Seq( - (hiveTable, simpleSchema, None), - (externalHiveTable, simpleSchema, Some(tempDir.getCanonicalPath)), - (partitionedHiveTable, partitionedSchema, None), - (dataSourceTable, simpleSchema, None), - (hiveCompatibleDataSourceTable, simpleSchema, None), - (partitionedDataSourceTable, partitionedSchema, None), - (externalDataSourceTable, simpleSchema, Some(tempDir.getCanonicalPath)), - (hiveCompatibleExternalDataSourceTable, simpleSchema, Some(tempDir.getCanonicalPath)), - (dataSourceTableWithoutSchema, new StructType(), None)) + // A list of all raw tables we want to test, with their expected schema. + lazy val rawTablesAndExpectations = Seq( + hiveTable -> simpleSchema, + externalHiveTable -> simpleSchema, + partitionedHiveTable -> partitionedSchema, + dataSourceTable -> simpleSchema, + hiveCompatibleDataSourceTable -> simpleSchema, + partitionedDataSourceTable -> partitionedSchema, + externalDataSourceTable -> simpleSchema, + hiveCompatibleExternalDataSourceTable -> simpleSchema, + dataSourceTableWithoutSchema -> new StructType()) test("make sure we can read table created by old version of Spark") { - for ((tbl, expectedSchema, expectedLocation) <- rawTablesAndExpectations) { - val readBack = spark.sharedState.externalCatalog.getTable( - tbl.identifier.database.get, tbl.identifier.table) - + for ((tbl, expectedSchema) <- rawTablesAndExpectations) { + val readBack = getTableMetadata(tbl.identifier.table) assert(readBack.schema == expectedSchema) - expectedLocation.foreach { loc => + + if (tbl.tableType == CatalogTableType.EXTERNAL) { // trim the URI prefix val tableLocation = new URI(readBack.storage.locationUri.get).getPath - assert(tableLocation == loc) + assert(tableLocation == tempDir.getAbsolutePath) + } + } + } + + test("make sure we can alter table location created by old version of Spark") { + withTempDir { dir => + for ((tbl, _) <- rawTablesAndExpectations if tbl.tableType == CatalogTableType.EXTERNAL) { + sql(s"ALTER TABLE ${tbl.identifier} SET LOCATION '${dir.getAbsolutePath}'") + + val readBack = getTableMetadata(tbl.identifier.table) + + // trim the URI prefix + val actualTableLocation = new URI(readBack.storage.locationUri.get).getPath + assert(actualTableLocation == dir.getAbsolutePath) + } + } + } + + test("make sure we can rename table created by old version of Spark") { + for ((tbl, expectedSchema) <- rawTablesAndExpectations) { + val newName = tbl.identifier.table + "_renamed" + sql(s"ALTER TABLE ${tbl.identifier} RENAME TO $newName") + + val readBack = getTableMetadata(newName) + assert(readBack.schema == expectedSchema) + + // trim the URI prefix + val actualTableLocation = new URI(readBack.storage.locationUri.get).getPath + val expectedLocation = if (tbl.tableType == CatalogTableType.EXTERNAL) { + tempDir.getAbsolutePath + } else { + // trim the URI prefix + new URI(defaultTablePath(newName)).getPath } + assert(actualTableLocation == expectedLocation) } } } From 117f5321cac62f01a5726c308efaf7369a9cdc9d Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Mon, 28 Nov 2016 11:53:14 +0800 Subject: [PATCH 4/4] improve comments --- .../hive/HiveExternalCatalogBackwardCompatibilitySuite.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogBackwardCompatibilitySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogBackwardCompatibilitySuite.scala index 60f9f51e5c17c..cca4480c44150 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogBackwardCompatibilitySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogBackwardCompatibilitySuite.scala @@ -62,7 +62,7 @@ class HiveExternalCatalogBackwardCompatibilitySuite extends QueryTest // Raw table metadata that are dumped from tables created by Spark 2.0. Note that, all spark - // versions prior to 2.1 would generate same raw table metadata for a specific table. + // versions prior to 2.1 would generate almost same raw table metadata for a specific table. val simpleSchema = new StructType().add("i", "int") val partitionedSchema = new StructType().add("i", "int").add("j", "int")