diff --git a/core/pom.xml b/core/pom.xml index 1cd1ad9725d7c..223ff5cbd8dbf 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -391,9 +391,15 @@ placed in the "provided" scope, rather than the "compile" scope, and NoClassDefFoundError exceptions are handled when the user has not explicitly compiled with the Hive module. --> + + ${hive.group} + hive-common + provided + ${hive.group} hive-exec + core provided @@ -401,6 +407,26 @@ hive-metastore provided + + ${hive.group} + hive-serde + provided + + + org.apache.hive + hive-storage-api + provided + + + org.apache.hive.shims + hive-shims-common + provided + + + org.apache.hive.shims + hive-shims-0.23 + provided + org.apache.thrift libthrift diff --git a/dev/deps/spark-deps-hadoop-2.7 b/dev/deps/spark-deps-hadoop-2.7 index 1af29fcaff2aa..927bfa20185e2 100644 --- a/dev/deps/spark-deps-hadoop-2.7 +++ b/dev/deps/spark-deps-hadoop-2.7 @@ -1,14 +1,11 @@ -JavaEWAH-0.3.2.jar RoaringBitmap-0.5.11.jar ST4-4.0.4.jar activation-1.1.1.jar aircompressor-0.10.jar -antlr-2.7.7.jar -antlr-runtime-3.4.jar +antlr-runtime-3.5.2.jar antlr4-runtime-4.7.1.jar aopalliance-1.0.jar aopalliance-repackaged-2.4.0-b34.jar -apache-log4j-extras-1.2.17.jar apacheds-i18n-2.0.0-M15.jar apacheds-kerberos-codec-2.0.0-M15.jar api-asn1-api-1.0.0-M20.jar @@ -18,15 +15,16 @@ arrow-format-0.10.0.jar arrow-memory-0.10.0.jar arrow-vector-0.10.0.jar automaton-1.11-8.jar +avatica-core-1.13.0.jar avro-1.8.2.jar avro-ipc-1.8.2.jar avro-mapred-1.8.2-hadoop2.jar bonecp-0.8.0.RELEASE.jar breeze-macros_2.12-0.13.2.jar breeze_2.12-0.13.2.jar -calcite-avatica-1.2.0-incubating.jar -calcite-core-1.2.0-incubating.jar -calcite-linq4j-1.2.0-incubating.jar +calcite-core-1.10.0.jar +calcite-druid-1.10.0.jar +calcite-linq4j-1.10.0.jar chill-java-0.9.3.jar chill_2.12-0.9.3.jar commons-beanutils-1.7.0.jar @@ -44,7 +42,7 @@ commons-httpclient-3.1.jar commons-io-2.4.jar commons-lang-2.6.jar commons-lang3-3.8.1.jar -commons-logging-1.1.3.jar +commons-logging-1.2.jar commons-math3-3.4.1.jar commons-net-3.1.jar commons-pool-1.5.4.jar @@ -53,9 +51,9 @@ core-1.1.2.jar curator-client-2.7.1.jar curator-framework-2.7.1.jar curator-recipes-2.7.1.jar -datanucleus-api-jdo-3.2.6.jar -datanucleus-core-3.2.10.jar -datanucleus-rdbms-3.2.9.jar +datanucleus-api-jdo-4.2.4.jar +datanucleus-core-4.1.17.jar +datanucleus-rdbms-4.1.19.jar derby-10.12.1.1.jar eigenbase-properties-1.1.5.jar flatbuffers-1.2.0-3f79e055.jar @@ -79,6 +77,19 @@ hadoop-yarn-client-2.7.4.jar hadoop-yarn-common-2.7.4.jar hadoop-yarn-server-common-2.7.4.jar hadoop-yarn-server-web-proxy-2.7.4.jar +hive-beeline-2.3.4.jar +hive-cli-2.3.4.jar +hive-common-2.3.4.jar +hive-exec-2.3.4-core.jar +hive-jdbc-2.3.4.jar +hive-llap-client-2.3.4.jar +hive-llap-common-2.3.4.jar +hive-metastore-2.3.4.jar +hive-serde-2.3.4.jar +hive-service-rpc-2.3.4.jar +hive-shims-0.23-2.3.4.jar +hive-shims-common-2.3.4.jar +hive-storage-api-2.7.0.jar hk2-api-2.4.0-b34.jar hk2-locator-2.4.0-b34.jar hk2-utils-2.4.0-b34.jar @@ -103,6 +114,7 @@ javassist-3.18.1-GA.jar javax.annotation-api-1.2.jar javax.inject-1.jar javax.inject-2.4.0-b34.jar +javax.jdo-3.2.0-m3.jar javax.servlet-api-3.1.0.jar javax.ws.rs-api-2.0.1.jar javolution-5.5.1.jar @@ -123,6 +135,7 @@ jline-2.14.6.jar joda-time-2.9.3.jar jodd-core-3.5.2.jar jpam-1.1.jar +json-1.8.jar json4s-ast_2.12-3.5.3.jar json4s-core_2.12-3.5.3.jar json4s-jackson_2.12-3.5.3.jar @@ -155,8 +168,8 @@ objenesis-2.5.1.jar okhttp-3.8.1.jar okio-1.13.0.jar opencsv-2.3.jar -orc-core-1.5.4-nohive.jar -orc-mapreduce-1.5.4-nohive.jar +orc-core-1.5.4.jar +orc-mapreduce-1.5.4.jar orc-shims-1.5.4.jar oro-2.0.8.jar osgi-resource-locator-1.0.1.jar @@ -166,7 +179,7 @@ parquet-common-1.10.0.jar parquet-encoding-1.10.0.jar parquet-format-2.4.0.jar parquet-hadoop-1.10.0.jar -parquet-hadoop-bundle-1.6.0.jar +parquet-hadoop-bundle-1.10.0.jar parquet-jackson-1.10.0.jar protobuf-java-2.5.0.jar py4j-0.10.8.1.jar @@ -180,15 +193,14 @@ shapeless_2.12-2.3.2.jar slf4j-api-1.7.16.jar slf4j-log4j12-1.7.16.jar snakeyaml-1.18.jar -snappy-0.2.jar snappy-java-1.1.7.1.jar spire-macros_2.12-0.13.0.jar spire_2.12-0.13.0.jar stax-api-1.0-2.jar stax-api-1.0.1.jar stream-2.7.0.jar -stringtemplate-3.2.1.jar super-csv-2.2.0.jar +transaction-api-1.1.jar univocity-parsers-2.7.3.jar validation-api-1.1.0.Final.jar xbean-asm7-shaded-4.12.jar diff --git a/dev/deps/spark-deps-hadoop-3.1 b/dev/deps/spark-deps-hadoop-3.1 index 05f180b17a588..66bbb5a8e0142 100644 --- a/dev/deps/spark-deps-hadoop-3.1 +++ b/dev/deps/spark-deps-hadoop-3.1 @@ -1,30 +1,28 @@ HikariCP-java7-2.4.12.jar -JavaEWAH-0.3.2.jar RoaringBitmap-0.5.11.jar ST4-4.0.4.jar accessors-smart-1.2.jar activation-1.1.1.jar aircompressor-0.10.jar -antlr-2.7.7.jar -antlr-runtime-3.4.jar +antlr-runtime-3.5.2.jar antlr4-runtime-4.7.1.jar aopalliance-1.0.jar aopalliance-repackaged-2.4.0-b34.jar -apache-log4j-extras-1.2.17.jar arpack_combined_all-0.1.jar arrow-format-0.10.0.jar arrow-memory-0.10.0.jar arrow-vector-0.10.0.jar automaton-1.11-8.jar +avatica-core-1.13.0.jar avro-1.8.2.jar avro-ipc-1.8.2.jar avro-mapred-1.8.2-hadoop2.jar bonecp-0.8.0.RELEASE.jar breeze-macros_2.12-0.13.2.jar breeze_2.12-0.13.2.jar -calcite-avatica-1.2.0-incubating.jar -calcite-core-1.2.0-incubating.jar -calcite-linq4j-1.2.0-incubating.jar +calcite-core-1.10.0.jar +calcite-druid-1.10.0.jar +calcite-linq4j-1.10.0.jar chill-java-0.9.3.jar chill_2.12-0.9.3.jar commons-beanutils-1.9.3.jar @@ -41,7 +39,7 @@ commons-httpclient-3.1.jar commons-io-2.4.jar commons-lang-2.6.jar commons-lang3-3.8.1.jar -commons-logging-1.1.3.jar +commons-logging-1.2.jar commons-math3-3.4.1.jar commons-net-3.1.jar commons-pool-1.5.4.jar @@ -50,9 +48,9 @@ core-1.1.2.jar curator-client-2.12.0.jar curator-framework-2.12.0.jar curator-recipes-2.12.0.jar -datanucleus-api-jdo-3.2.6.jar -datanucleus-core-3.2.10.jar -datanucleus-rdbms-3.2.9.jar +datanucleus-api-jdo-4.2.4.jar +datanucleus-core-4.1.17.jar +datanucleus-rdbms-4.1.19.jar derby-10.12.1.1.jar dnsjava-2.1.7.jar ehcache-3.3.1.jar @@ -78,6 +76,19 @@ hadoop-yarn-common-3.1.0.jar hadoop-yarn-registry-3.1.0.jar hadoop-yarn-server-common-3.1.0.jar hadoop-yarn-server-web-proxy-3.1.0.jar +hive-beeline-2.3.4.jar +hive-cli-2.3.4.jar +hive-common-2.3.4.jar +hive-exec-2.3.4-core.jar +hive-jdbc-2.3.4.jar +hive-llap-client-2.3.4.jar +hive-llap-common-2.3.4.jar +hive-metastore-2.3.4.jar +hive-serde-2.3.4.jar +hive-service-rpc-2.3.4.jar +hive-shims-0.23-2.3.4.jar +hive-shims-common-2.3.4.jar +hive-storage-api-2.7.0.jar hk2-api-2.4.0-b34.jar hk2-locator-2.4.0-b34.jar hk2-utils-2.4.0-b34.jar @@ -102,6 +113,7 @@ javassist-3.18.1-GA.jar javax.annotation-api-1.2.jar javax.inject-1.jar javax.inject-2.4.0-b34.jar +javax.jdo-3.2.0-m3.jar javax.servlet-api-3.1.0.jar javax.ws.rs-api-2.0.1.jar javolution-5.5.1.jar @@ -122,6 +134,7 @@ jline-2.14.6.jar joda-time-2.9.3.jar jodd-core-3.5.2.jar jpam-1.1.jar +json-1.8.jar json-smart-2.3.jar json4s-ast_2.12-3.5.3.jar json4s-core_2.12-3.5.3.jar @@ -172,8 +185,8 @@ okhttp-2.7.5.jar okhttp-3.8.1.jar okio-1.13.0.jar opencsv-2.3.jar -orc-core-1.5.4-nohive.jar -orc-mapreduce-1.5.4-nohive.jar +orc-core-1.5.4.jar +orc-mapreduce-1.5.4.jar orc-shims-1.5.4.jar oro-2.0.8.jar osgi-resource-locator-1.0.1.jar @@ -183,7 +196,7 @@ parquet-common-1.10.0.jar parquet-encoding-1.10.0.jar parquet-format-2.4.0.jar parquet-hadoop-1.10.0.jar -parquet-hadoop-bundle-1.6.0.jar +parquet-hadoop-bundle-1.10.0.jar parquet-jackson-1.10.0.jar protobuf-java-2.5.0.jar py4j-0.10.8.1.jar @@ -198,16 +211,15 @@ shapeless_2.12-2.3.2.jar slf4j-api-1.7.16.jar slf4j-log4j12-1.7.16.jar snakeyaml-1.18.jar -snappy-0.2.jar snappy-java-1.1.7.1.jar spire-macros_2.12-0.13.0.jar spire_2.12-0.13.0.jar stax-api-1.0.1.jar stax2-api-3.1.4.jar stream-2.7.0.jar -stringtemplate-3.2.1.jar super-csv-2.2.0.jar token-provider-1.0.1.jar +transaction-api-1.1.jar univocity-parsers-2.7.3.jar validation-api-1.1.0.Final.jar woodstox-core-5.0.3.jar diff --git a/examples/pom.xml b/examples/pom.xml index 0636406595f6e..2cc85ab35e20c 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -110,7 +110,7 @@ 3.7.0 - com.twitter + org.apache.parquet parquet-hadoop-bundle provided diff --git a/pom.xml b/pom.xml index de14d6add84ea..dc98b81f5cd66 100644 --- a/pom.xml +++ b/pom.xml @@ -123,18 +123,18 @@ ${hadoop.version} 3.4.6 2.7.1 - org.spark-project.hive + org.apache.hive - 1.2.1.spark2 + 2.3.4 - 1.2.1 + ${hive.version} 2.1.0 10.12.1.1 1.10.0 1.5.4 - nohive - 1.6.0 + 2.7.0 + 1.10.0 9.4.12.v20180830 3.1.0 0.9.3 @@ -165,14 +165,15 @@ 2.9.6 1.1.7.1 1.1.2 - 1.2.0-incubating + 1.10.0 + 1.13.0 1.10 2.4 2.6 3.8.1 - 3.2.10 + 4.1.17 3.0.11 2.22.2 2.9.3 @@ -456,6 +457,12 @@ commons-httpclient commons-httpclient ${httpclient.classic.version} + + + commons-logging + commons-logging + + org.apache.httpcomponents @@ -1361,6 +1368,22 @@ ${hive.group} hive-shims + + org.apache.hive + hive-storage-api + + + org.eclipse.jetty.aggregate + jetty-all + + + org.eclipse.jetty.orbit + javax.servlet + + + com.github.joshelser + dropwizard-metrics-hadoop-metrics2-reporter + org.apache.ant ant @@ -1374,12 +1397,16 @@ slf4j-api - org.slf4j - slf4j-log4j12 + org.apache.logging.log4j + log4j-web - log4j - log4j + org.apache.logging.log4j + log4j-1.2-api + + + org.apache.logging.log4j + log4j-slf4j-impl commons-logging @@ -1391,9 +1418,7 @@ ${hive.group} hive-exec - ${hive.version} ${hive.deps.scope} @@ -1403,6 +1428,14 @@ ${hive.group} hive-metastore + + ${hive.group} + hive-vector-code-gen + + + ${hive.group} + hive-llap-tez + ${hive.group} hive-shims @@ -1411,11 +1444,23 @@ ${hive.group} hive-ant + + org.apache.parquet + parquet-hadoop-bundle + + + org.apache.orc + orc-tools + ${hive.group} spark-client + + calcite-druid + org.apache.calcite + @@ -1443,6 +1488,10 @@ org.apache.avro avro-mapred + + org.apache.calcite.avatica + avatica + org.apache.calcite @@ -1473,16 +1522,24 @@ zookeeper - org.slf4j - slf4j-api + org.apache.logging.log4j + log4j-api - org.slf4j - slf4j-log4j12 + org.apache.logging.log4j + log4j-core - log4j - log4j + org.apache.logging.log4j + log4j-1.2-api + + + org.apache.logging.log4j + log4j-slf4j-impl + + + org.slf4j + slf4j-api commons-logging @@ -1585,6 +1642,14 @@ ${hive.group} hive-shims + + org.apache.hbase + hbase-client + + + HikariCP + com.zaxxer + org.apache.thrift libfb303 @@ -1601,6 +1666,18 @@ com.google.guava guava + + co.cask.tephra + tephra-api + + + co.cask.tephra + tephra-core + + + co.cask.tephra + tephra-hbase-compat-1.0 + org.slf4j slf4j-api @@ -1626,6 +1703,18 @@ ${hive.group} hive-shims + + org.apache.parquet + parquet-hadoop-bundle + + + tomcat + jasper-compiler + + + tomcat + jasper-runtime + commons-codec commons-codec @@ -1669,6 +1758,50 @@ + + ${hive.group} + hive-llap-common + ${hive.version} + ${hive.deps.scope} + + + org.slf4j + slf4j-api + + + + + + ${hive.group} + hive-llap-client + ${hive.version} + ${hive.deps.scope} + + + org.apache.zookeeper + zookeeper + + + org.apache.curator + curator-framework + + + org.apache.curator + apache-curator + + + org.slf4j + slf4j-api + + + + + + org.apache.hive + hive-storage-api + ${hive.storage.api.version} + + net.sf.jpam jpam @@ -1685,6 +1818,56 @@ + + org.apache.hive.shims + hive-shims-common + ${hive.version} + ${hive.deps.scope} + + + org.slf4j + slf4j-api + + + org.apache.logging.log4j + log4j-slf4j-impl + + + org.apache.thrift + libthrift + + + org.apache.curator + curator-framework + + + org.apache.zookeeper + zookeeper + + + + + + org.apache.hive.shims + hive-shims-0.23 + ${hive.version} + ${hive.deps.scope} + + + org.apache.hive.shims + hive-shims-common + + + org.slf4j + slf4j-api + + + org.apache.hadoop + hadoop-yarn-server-resourcemanager + + + + ${hive.group} hive-shims @@ -1737,7 +1920,6 @@ org.apache.orc orc-core ${orc.version} - ${orc.classifier} ${orc.deps.scope} @@ -1762,7 +1944,6 @@ org.apache.orc orc-mapreduce ${orc.version} - ${orc.classifier} ${orc.deps.scope} @@ -1812,7 +1993,7 @@ ${parquet.test.deps.scope} - com.twitter + org.apache.parquet parquet-hadoop-bundle ${hive.parquet.version} compile @@ -1834,6 +2015,10 @@ com.fasterxml.jackson.core jackson-databind + + org.apache.calcite.avatica + avatica + com.google.guava guava @@ -1865,20 +2050,36 @@ org.apache.calcite - calcite-avatica + calcite-druid ${calcite.version} - com.fasterxml.jackson.core - jackson-annotations + org.apache.calcite + calcite-core - com.fasterxml.jackson.core - jackson-core + org.apache.calcite + calcite-linq4j - com.fasterxml.jackson.core - jackson-databind + org.apache.calcite.avatica + avatica + + + com.google.guava + guava + + + + + + org.apache.calcite.avatica + avatica-core + ${avatica.version} + + + org.apache.calcite.avatica + avatica-metrics diff --git a/resource-managers/mesos/pom.xml b/resource-managers/mesos/pom.xml index 7b3aad4d6ce35..213ca64522724 100644 --- a/resource-managers/mesos/pom.xml +++ b/resource-managers/mesos/pom.xml @@ -77,6 +77,7 @@ ${hive.group} hive-exec + core provided diff --git a/resource-managers/yarn/pom.xml b/resource-managers/yarn/pom.xml index d18df9955bb1f..91cc46e02af24 100644 --- a/resource-managers/yarn/pom.xml +++ b/resource-managers/yarn/pom.xml @@ -172,6 +172,7 @@ ${hive.group} hive-exec + core provided diff --git a/sql/core/pom.xml b/sql/core/pom.xml index ac5f1fc923e7d..dfec03832fafa 100644 --- a/sql/core/pom.xml +++ b/sql/core/pom.xml @@ -86,15 +86,17 @@ test + + org.apache.hive + hive-storage-api + org.apache.orc orc-core - ${orc.classifier} org.apache.orc orc-mapreduce - ${orc.classifier} org.apache.parquet diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/orc/OrcColumnVector.java b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/orc/OrcColumnVector.java index 9bfad1e83ee7b..2f1925e69a337 100644 --- a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/orc/OrcColumnVector.java +++ b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/orc/OrcColumnVector.java @@ -19,7 +19,7 @@ import java.math.BigDecimal; -import org.apache.orc.storage.ql.exec.vector.*; +import org.apache.hadoop.hive.ql.exec.vector.*; import org.apache.spark.sql.types.DataType; import org.apache.spark.sql.types.Decimal; diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/orc/OrcColumnarBatchReader.java b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/orc/OrcColumnarBatchReader.java index efca96e9ce580..e0a0da1664723 100644 --- a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/orc/OrcColumnarBatchReader.java +++ b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/orc/OrcColumnarBatchReader.java @@ -21,6 +21,7 @@ import com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.exec.vector.*; import org.apache.hadoop.mapreduce.InputSplit; import org.apache.hadoop.mapreduce.RecordReader; import org.apache.hadoop.mapreduce.TaskAttemptContext; @@ -30,7 +31,6 @@ import org.apache.orc.Reader; import org.apache.orc.TypeDescription; import org.apache.orc.mapred.OrcInputFormat; -import org.apache.orc.storage.ql.exec.vector.*; import org.apache.spark.sql.catalyst.InternalRow; import org.apache.spark.sql.execution.vectorized.ColumnVectorUtils; diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcDeserializer.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcDeserializer.scala index ee16b3ab07f5a..8e3a330ba44b9 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcDeserializer.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcDeserializer.scala @@ -17,9 +17,9 @@ package org.apache.spark.sql.execution.datasources.orc +import org.apache.hadoop.hive.serde2.io.{DateWritable, HiveDecimalWritable} import org.apache.hadoop.io._ import org.apache.orc.mapred.{OrcList, OrcMap, OrcStruct, OrcTimestamp} -import org.apache.orc.storage.serde2.io.{DateWritable, HiveDecimalWritable} import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.{SpecificInternalRow, UnsafeArrayData} diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala index 0a64981b421c6..45c0d08fc8138 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala @@ -17,10 +17,10 @@ package org.apache.spark.sql.execution.datasources.orc -import org.apache.orc.storage.ql.io.sarg.{PredicateLeaf, SearchArgument} -import org.apache.orc.storage.ql.io.sarg.SearchArgument.Builder -import org.apache.orc.storage.ql.io.sarg.SearchArgumentFactory.newBuilder -import org.apache.orc.storage.serde2.io.HiveDecimalWritable +import org.apache.hadoop.hive.ql.io.sarg.{PredicateLeaf, SearchArgument} +import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.Builder +import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentFactory.newBuilder +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable import org.apache.spark.sql.sources.{And, Filter} import org.apache.spark.sql.types._ diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcSerializer.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcSerializer.scala index 90d1268028096..ff578202a5029 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcSerializer.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcSerializer.scala @@ -17,11 +17,11 @@ package org.apache.spark.sql.execution.datasources.orc +import org.apache.hadoop.hive.common.`type`.HiveDecimal +import org.apache.hadoop.hive.serde2.io.{DateWritable, HiveDecimalWritable} import org.apache.hadoop.io._ import org.apache.orc.TypeDescription import org.apache.orc.mapred.{OrcList, OrcMap, OrcStruct, OrcTimestamp} -import org.apache.orc.storage.common.`type`.HiveDecimal -import org.apache.orc.storage.serde2.io.{DateWritable, HiveDecimalWritable} import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.SpecializedGetters diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala index 052a5e757c445..42d538c756fd1 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala @@ -1371,7 +1371,7 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils { // if (isUsingHiveMetastore) { // assert(storageFormat.properties.get("path") === expected) // } - assert(storageFormat.locationUri === Some(expected)) + assert(Some(storageFormat.locationUri.get.getPath) === Some(expected.getPath)) } // set table location sql("ALTER TABLE dbx.tab1 SET LOCATION '/path/to/your/lovely/heart'") diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala index ee12f30892436..662648f4193ea 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala @@ -22,7 +22,7 @@ import java.sql.{Date, Timestamp} import scala.collection.JavaConverters._ -import org.apache.orc.storage.ql.io.sarg.{PredicateLeaf, SearchArgument} +import org.apache.hadoop.hive.ql.io.sarg.{PredicateLeaf, SearchArgument} import org.apache.spark.sql.{Column, DataFrame} import org.apache.spark.sql.catalyst.dsl.expressions._ @@ -36,7 +36,7 @@ import org.apache.spark.sql.types._ * A test suite that tests Apache ORC filter API based filter pushdown optimization. * OrcFilterSuite and HiveOrcFilterSuite is logically duplicated to provide the same test coverage. * The difference are the packages containing 'Predicate' and 'SearchArgument' classes. - * - OrcFilterSuite uses 'org.apache.orc.storage.ql.io.sarg' package. + * - OrcFilterSuite uses 'org.apache.hadoop.hive.ql.io.sarg' package. * - HiveOrcFilterSuite uses 'org.apache.hadoop.hive.ql.io.sarg' package. */ class OrcFilterSuite extends OrcTest with SharedSQLContext { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcSourceSuite.scala index 48910103e702a..36ba2c13ccdb8 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcSourceSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcSourceSuite.scala @@ -171,11 +171,7 @@ abstract class OrcSuite extends OrcTest with BeforeAndAfterAll { // Hive 0.11 and RLE v2 is introduced in Hive 0.12 ORC with more improvements. // For more details, see https://orc.apache.org/specification/ assert(stripe.getColumns(1).getKind === DICTIONARY_V2) - if (isSelective) { - assert(stripe.getColumns(2).getKind === DIRECT_V2) - } else { - assert(stripe.getColumns(2).getKind === DICTIONARY_V2) - } + assert(stripe.getColumns(2).getKind === DIRECT_V2) // Floating point types are stored with DIRECT encoding in IEEE 754 floating // point bit layout. assert(stripe.getColumns(3).getKind === DIRECT) diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml index 4a4629fae2706..f7deeff3a0bfd 100644 --- a/sql/hive-thriftserver/pom.xml +++ b/sql/hive-thriftserver/pom.xml @@ -55,6 +55,11 @@ ${hive.group} hive-cli + + ${hive.group} + hive-exec + core + ${hive.group} hive-jdbc @@ -63,6 +68,10 @@ ${hive.group} hive-beeline + + org.apache.hive.shims + hive-shims-common + org.eclipse.jetty jetty-server diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/auth/HiveAuthFactory.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/auth/HiveAuthFactory.java index 10000f12ab329..de57048496436 100644 --- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/auth/HiveAuthFactory.java +++ b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/auth/HiveAuthFactory.java @@ -42,6 +42,7 @@ import org.apache.hadoop.hive.shims.HadoopShims.KerberosNameShim; import org.apache.hadoop.hive.shims.ShimLoader; import org.apache.hadoop.hive.thrift.DBTokenStore; +import org.apache.hadoop.hive.thrift.HiveDelegationTokenManager; import org.apache.hadoop.hive.thrift.HadoopThriftAuthBridge; import org.apache.hadoop.hive.thrift.HadoopThriftAuthBridge.Server.ServerMode; import org.apache.hadoop.security.SecurityUtil; @@ -91,6 +92,7 @@ public String getAuthName() { private String authTypeStr; private final String transportMode; private final HiveConf conf; + private HiveDelegationTokenManager delegationTokenManager = null; public static final String HS2_PROXY_USER = "hive.server2.proxy.user"; public static final String HS2_CLIENT_TOKEN = "hiveserver2ClientToken"; @@ -143,6 +145,9 @@ public HiveAuthFactory(HiveConf conf) throws TTransportException, IOException { saslServer = new HadoopThriftAuthBridge.Server(); } + // Start delegation token manager + delegationTokenManager = new HiveDelegationTokenManager(); + // start delegation token manager try { // rawStore is only necessary for DBTokenStore @@ -155,7 +160,8 @@ public HiveAuthFactory(HiveConf conf) throws TTransportException, IOException { rawStore = baseHandler.getMS(); } - saslServer.startDelegationTokenSecretManager(conf, rawStore, ServerMode.HIVESERVER2); + delegationTokenManager.startDelegationTokenSecretManager(conf, rawStore, ServerMode.HIVESERVER2); + saslServer.setSecretManager(delegationTokenManager.getSecretManager()); } catch (MetaException|IOException e) { throw new TTransportException("Failed to start token manager", e); @@ -312,14 +318,16 @@ public static TServerSocket getServerSSLSocket(String hiveHost, int portNum, Str } // retrieve delegation token for the given user - public String getDelegationToken(String owner, String renewer) throws HiveSQLException { - if (saslServer == null) { + public String getDelegationToken(String owner, String renewer, String remoteAddr) + throws HiveSQLException { + if (delegationTokenManager == null) { throw new HiveSQLException( "Delegation token only supported over kerberos authentication", "08S01"); } try { - String tokenStr = saslServer.getDelegationTokenWithService(owner, renewer, HS2_CLIENT_TOKEN); + String tokenStr = delegationTokenManager.getDelegationTokenWithService(owner, renewer, + HS2_CLIENT_TOKEN, remoteAddr); if (tokenStr == null || tokenStr.isEmpty()) { throw new HiveSQLException( "Received empty retrieving delegation token for user " + owner, "08S01"); @@ -335,12 +343,12 @@ public String getDelegationToken(String owner, String renewer) throws HiveSQLExc // cancel given delegation token public void cancelDelegationToken(String delegationToken) throws HiveSQLException { - if (saslServer == null) { + if (delegationTokenManager == null) { throw new HiveSQLException( "Delegation token only supported over kerberos authentication", "08S01"); } try { - saslServer.cancelDelegationToken(delegationToken); + delegationTokenManager.cancelDelegationToken(delegationToken); } catch (IOException e) { throw new HiveSQLException( "Error canceling delegation token " + delegationToken, "08S01", e); @@ -348,12 +356,12 @@ public void cancelDelegationToken(String delegationToken) throws HiveSQLExceptio } public void renewDelegationToken(String delegationToken) throws HiveSQLException { - if (saslServer == null) { + if (delegationTokenManager == null) { throw new HiveSQLException( "Delegation token only supported over kerberos authentication", "08S01"); } try { - saslServer.renewDelegationToken(delegationToken); + delegationTokenManager.renewDelegationToken(delegationToken); } catch (IOException e) { throw new HiveSQLException( "Error renewing delegation token " + delegationToken, "08S01", e); @@ -361,12 +369,12 @@ public void renewDelegationToken(String delegationToken) throws HiveSQLException } public String getUserFromToken(String delegationToken) throws HiveSQLException { - if (saslServer == null) { + if (delegationTokenManager == null) { throw new HiveSQLException( "Delegation token only supported over kerberos authentication", "08S01"); } try { - return saslServer.getUserFromToken(delegationToken); + return delegationTokenManager.getUserFromToken(delegationToken); } catch (IOException e) { throw new HiveSQLException( "Error extracting user from delegation token " + delegationToken, "08S01", e); diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/SQLOperation.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/SQLOperation.java index 70c27948de61b..cccffa0045bd6 100644 --- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/SQLOperation.java +++ b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/SQLOperation.java @@ -32,6 +32,7 @@ import org.apache.commons.codec.binary.Base64; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.conf.VariableSubstitution; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.Schema; import org.apache.hadoop.hive.ql.CommandNeedRetryException; @@ -40,11 +41,10 @@ import org.apache.hadoop.hive.ql.exec.Task; import org.apache.hadoop.hive.ql.metadata.Hive; import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.ql.parse.VariableSubstitution; import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse; import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.serde.serdeConstants; -import org.apache.hadoop.hive.serde2.SerDe; +import org.apache.hadoop.hive.serde2.AbstractSerDe; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.SerDeUtils; import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe; @@ -73,7 +73,7 @@ public class SQLOperation extends ExecuteStatementOperation { private CommandProcessorResponse response; private TableSchema resultSchema = null; private Schema mResultSchema = null; - private SerDe serde = null; + private AbstractSerDe serde = null; private boolean fetchStarted = false; public SQLOperation(HiveSession parentSession, String statement, Map + SessionState.get().getHiveVariables()).substitute(sqlOperationConf, statement); response = driver.compileAndRespond(subStatement); if (0 != response.getResponseCode()) { throw toSQLException("Error while compiling statement", response); @@ -389,7 +390,7 @@ private RowSet decodeFromString(List rows, RowSet rowSet) return rowSet; } - private SerDe getSerDe() throws SQLException { + private AbstractSerDe getSerDe() throws SQLException { if (serde != null) { return serde; } diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/session/HiveSessionImpl.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/session/HiveSessionImpl.java index 745f385e87f78..98c22ede44349 100644 --- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/session/HiveSessionImpl.java +++ b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/session/HiveSessionImpl.java @@ -36,16 +36,16 @@ import org.apache.hadoop.hive.common.cli.IHiveFileProcessor; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; +import org.apache.hadoop.hive.conf.VariableSubstitution; import org.apache.hadoop.hive.metastore.IMetaStoreClient; import org.apache.hadoop.hive.metastore.api.MetaException; -import org.apache.hadoop.hive.ql.exec.FetchFormatter; -import org.apache.hadoop.hive.ql.exec.ListSinkOperator; import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.history.HiveHistory; import org.apache.hadoop.hive.ql.metadata.Hive; import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.ql.parse.VariableSubstitution; import org.apache.hadoop.hive.ql.session.SessionState; +import org.apache.hadoop.hive.serde2.SerDeUtils; +import org.apache.hadoop.hive.serde2.thrift.ThriftFormatter; import org.apache.hadoop.hive.shims.ShimLoader; import org.apache.hive.common.util.HiveVersionInfo; import org.apache.hive.service.auth.HiveAuthFactory; @@ -121,9 +121,8 @@ public HiveSessionImpl(TProtocolVersion protocol, String username, String passwo hiveConf.set(ConfVars.HIVESESSIONID.varname, sessionHandle.getHandleIdentifier().toString()); // Use thrift transportable formatter - hiveConf.set(ListSinkOperator.OUTPUT_FORMATTER, - FetchFormatter.ThriftFormatter.class.getName()); - hiveConf.setInt(ListSinkOperator.OUTPUT_PROTOCOL, protocol.getValue()); + hiveConf.set(SerDeUtils.LIST_SINK_OUTPUT_FORMATTER, ThriftFormatter.class.getName()); + hiveConf.setInt(SerDeUtils.LIST_SINK_OUTPUT_PROTOCOL, protocol.getValue()); } @Override @@ -142,7 +141,8 @@ public void open(Map sessionConfMap) throws HiveSQLException { sessionState.setIsHiveServerQuery(true); SessionState.start(sessionState); try { - sessionState.reloadAuxJars(); + sessionState.loadAuxJars(); + sessionState.loadReloadableAuxJars(); } catch (IOException e) { String msg = "Failed to load reloadable jar file path: " + e; LOG.error(msg, e); @@ -231,6 +231,7 @@ private void configureSession(Map sessionConfMap) throws HiveSQL // setConf(varname, propName, varvalue, true) when varname.startsWith(HIVECONF_PREFIX) public static int setVariable(String varname, String varvalue) throws Exception { SessionState ss = SessionState.get(); + VariableSubstitution substitution =new VariableSubstitution(() -> ss.getHiveVariables()); if (varvalue.contains("\n")){ ss.err.println("Warning: Value had a \\n character in it."); } @@ -240,19 +241,17 @@ public static int setVariable(String varname, String varvalue) throws Exception return 1; } else if (varname.startsWith(SYSTEM_PREFIX)){ String propName = varname.substring(SYSTEM_PREFIX.length()); - System.getProperties().setProperty(propName, - new VariableSubstitution().substitute(ss.getConf(),varvalue)); + System.getProperties().setProperty(propName, substitution.substitute(ss.getConf(),varvalue)); } else if (varname.startsWith(HIVECONF_PREFIX)){ String propName = varname.substring(HIVECONF_PREFIX.length()); setConf(varname, propName, varvalue, true); } else if (varname.startsWith(HIVEVAR_PREFIX)) { String propName = varname.substring(HIVEVAR_PREFIX.length()); - ss.getHiveVariables().put(propName, - new VariableSubstitution().substitute(ss.getConf(),varvalue)); + ss.getHiveVariables().put(propName, substitution.substitute(ss.getConf(),varvalue)); } else if (varname.startsWith(METACONF_PREFIX)) { String propName = varname.substring(METACONF_PREFIX.length()); Hive hive = Hive.get(ss.getConf()); - hive.setMetaConf(propName, new VariableSubstitution().substitute(ss.getConf(), varvalue)); + hive.setMetaConf(propName, substitution.substitute(ss.getConf(), varvalue)); } else { setConf(varname, varname, varvalue, true); } @@ -262,8 +261,10 @@ public static int setVariable(String varname, String varvalue) throws Exception // returns non-null string for validation fail private static void setConf(String varname, String key, String varvalue, boolean register) throws IllegalArgumentException { + VariableSubstitution substitution = + new VariableSubstitution(() -> SessionState.get().getHiveVariables()); HiveConf conf = SessionState.get().getConf(); - String value = new VariableSubstitution().substitute(conf, varvalue); + String value = substitution.substitute(conf, varvalue); if (conf.getBoolVar(HiveConf.ConfVars.HIVECONFVALIDATION)) { HiveConf.ConfVars confVars = HiveConf.getConfVars(key); if (confVars != null) { @@ -808,7 +809,7 @@ public void setIpAddress(String ipAddress) { public String getDelegationToken(HiveAuthFactory authFactory, String owner, String renewer) throws HiveSQLException { HiveAuthFactory.verifyProxyAccess(getUsername(), owner, getIpAddress(), getHiveConf()); - return authFactory.getDelegationToken(owner, renewer); + return authFactory.getDelegationToken(owner, renewer, getIpAddress()); } @Override diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/server/HiveServer2.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/server/HiveServer2.java index a30be2bc06b9e..a58288b00840b 100644 --- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/server/HiveServer2.java +++ b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/server/HiveServer2.java @@ -29,18 +29,18 @@ import org.apache.commons.cli.OptionBuilder; import org.apache.commons.cli.Options; import org.apache.commons.cli.ParseException; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.common.LogUtils; import org.apache.hadoop.hive.common.LogUtils.LogInitializationException; +import org.apache.hadoop.hive.common.JvmPauseMonitor; import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.shims.ShimLoader; import org.apache.hive.common.util.HiveStringUtils; import org.apache.hive.service.CompositeService; import org.apache.hive.service.cli.CLIService; import org.apache.hive.service.cli.thrift.ThriftBinaryCLIService; import org.apache.hive.service.cli.thrift.ThriftCLIService; import org.apache.hive.service.cli.thrift.ThriftHttpCLIService; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.spark.util.ShutdownHookManager; @@ -49,7 +49,7 @@ * */ public class HiveServer2 extends CompositeService { - private static final Log LOG = LogFactory.getLog(HiveServer2.class); + private static final Logger LOG = LoggerFactory.getLogger(HiveServer2.class); private CLIService cliService; private ThriftCLIService thriftCLIService; @@ -124,7 +124,15 @@ private static void startHiveServer2() throws Throwable { server = new HiveServer2(); server.init(hiveConf); server.start(); - ShimLoader.getHadoopShims().startPauseMonitor(hiveConf); + + try { + JvmPauseMonitor pauseMonitor = new JvmPauseMonitor(hiveConf); + pauseMonitor.start(); + } catch (Throwable t) { + LOG.warn("Could not initiate the JvmPauseMonitor thread." + " GCs and Pauses may not be " + + "warned upon.", t); + } + break; } catch (Throwable throwable) { if (server != null) { @@ -283,7 +291,7 @@ public void execute() { try { startHiveServer2(); } catch (Throwable t) { - LOG.fatal("Error starting HiveServer2", t); + LOG.error("Error starting HiveServer2", t); System.exit(-1); } } diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala index bb96cea2b0ae1..2dfd5a48d833c 100644 --- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala +++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala @@ -25,7 +25,6 @@ import scala.collection.JavaConverters._ import jline.console.ConsoleReader import jline.console.history.FileHistory import org.apache.commons.lang3.StringUtils -import org.apache.commons.logging.LogFactory import org.apache.hadoop.conf.Configuration import org.apache.hadoop.hive.cli.{CliDriver, CliSessionState, OptionsProcessor} import org.apache.hadoop.hive.common.{HiveInterruptCallback, HiveInterruptUtils} @@ -37,6 +36,7 @@ import org.apache.hadoop.hive.ql.session.SessionState import org.apache.hadoop.security.{Credentials, UserGroupInformation} import org.apache.log4j.Level import org.apache.thrift.transport.TSocket +import org.slf4j.LoggerFactory import org.apache.spark.SparkConf import org.apache.spark.deploy.SparkHadoopUtil @@ -298,7 +298,7 @@ private[hive] object SparkSQLCLIDriver extends Logging { private[hive] class SparkSQLCLIDriver extends CliDriver with Logging { private val sessionState = SessionState.get().asInstanceOf[CliSessionState] - private val LOG = LogFactory.getLog(classOf[SparkSQLCLIDriver]) + private val LOG = LoggerFactory.getLogger(classOf[SparkSQLCLIDriver]) private val console = new SessionState.LogHelper(LOG) diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml index fe144c76af7d0..a8f53bc0f5b6e 100644 --- a/sql/hive/pom.xml +++ b/sql/hive/pom.xml @@ -37,7 +37,7 @@ - com.twitter + org.apache.parquet parquet-hadoop-bundle @@ -77,44 +77,35 @@ test-jar test - - ${hive.group} - hive-exec - + hive-llap-common ${hive.group} - hive-metastore + hive-llap-client + + + org.apache.hive.shims + hive-shims-common + + + org.apache.hive.shims + hive-shims-0.23 - org.apache.avro @@ -133,11 +124,15 @@ org.apache.calcite - calcite-avatica + calcite-core org.apache.calcite - calcite-core + calcite-druid + + + org.apache.calcite.avatica + avatica-core org.apache.httpcomponents diff --git a/sql/hive/src/main/java/org/apache/hadoop/hive/ql/io/orc/SparkOrcNewRecordReader.java b/sql/hive/src/main/java/org/apache/hadoop/hive/ql/io/orc/SparkOrcNewRecordReader.java index f093637d412f9..eea388b700b44 100644 --- a/sql/hive/src/main/java/org/apache/hadoop/hive/ql/io/orc/SparkOrcNewRecordReader.java +++ b/sql/hive/src/main/java/org/apache/hadoop/hive/ql/io/orc/SparkOrcNewRecordReader.java @@ -22,6 +22,7 @@ import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.mapreduce.InputSplit; import org.apache.hadoop.mapreduce.TaskAttemptContext; +import org.apache.orc.OrcProto; import java.io.IOException; import java.util.List; diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveShim.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveShim.scala index c9fc3d4a02c4b..b3f068a8090ac 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveShim.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveShim.scala @@ -24,19 +24,19 @@ import scala.collection.JavaConverters._ import scala.language.implicitConversions import scala.reflect.ClassTag +import com.esotericsoftware.kryo.Kryo +import com.esotericsoftware.kryo.io.{Input, Output} import com.google.common.base.Objects import org.apache.avro.Schema import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.Path -import org.apache.hadoop.hive.ql.exec.{UDF, Utilities} +import org.apache.hadoop.hive.ql.exec.{SerializationUtilities, UDF} import org.apache.hadoop.hive.ql.plan.{FileSinkDesc, TableDesc} import org.apache.hadoop.hive.ql.udf.generic.GenericUDFMacro import org.apache.hadoop.hive.serde2.ColumnProjectionUtils import org.apache.hadoop.hive.serde2.avro.{AvroGenericRecordWritable, AvroSerdeUtils} import org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector import org.apache.hadoop.io.Writable -import org.apache.hive.com.esotericsoftware.kryo.Kryo -import org.apache.hive.com.esotericsoftware.kryo.io.{Input, Output} import org.apache.spark.internal.Logging import org.apache.spark.sql.types.Decimal @@ -168,12 +168,12 @@ private[hive] object HiveShim { } def deserializePlan[UDFType](is: java.io.InputStream, clazz: Class[_]): UDFType = { - deserializeObjectByKryo(Utilities.runtimeSerializationKryo.get(), is, clazz) + deserializeObjectByKryo(SerializationUtilities.borrowKryo(), is, clazz) .asInstanceOf[UDFType] } def serializePlan(function: AnyRef, out: java.io.OutputStream): Unit = { - serializeObjectByKryo(Utilities.runtimeSerializationKryo.get(), function, out) + serializeObjectByKryo(SerializationUtilities.borrowKryo(), function, out) } def writeExternal(out: java.io.ObjectOutput) { diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala index 597eef129f63e..0305936ff8d80 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala @@ -58,7 +58,7 @@ private[spark] object HiveUtils extends Logging { } /** The version of hive used internally by Spark SQL. */ - val builtinHiveVersion: String = "1.2.1" + val builtinHiveVersion: String = "2.3.4" val HIVE_METASTORE_VERSION = buildConf("spark.sql.hive.metastore.version") .doc("Version of the Hive metastore. Available options are " + @@ -209,8 +209,6 @@ private[spark] object HiveUtils extends Logging { ConfVars.METASTORE_AGGREGATE_STATS_CACHE_MAX_READER_WAIT -> TimeUnit.MILLISECONDS, ConfVars.HIVES_AUTO_PROGRESS_TIMEOUT -> TimeUnit.SECONDS, ConfVars.HIVE_LOG_INCREMENTAL_PLAN_PROGRESS_INTERVAL -> TimeUnit.MILLISECONDS, - ConfVars.HIVE_STATS_JDBC_TIMEOUT -> TimeUnit.SECONDS, - ConfVars.HIVE_STATS_RETRIES_WAIT -> TimeUnit.MILLISECONDS, ConfVars.HIVE_LOCK_SLEEP_BETWEEN_RETRIES -> TimeUnit.SECONDS, ConfVars.HIVE_ZOOKEEPER_SESSION_TIMEOUT -> TimeUnit.MILLISECONDS, ConfVars.HIVE_ZOOKEEPER_CONNECTION_BASESLEEPTIME -> TimeUnit.MILLISECONDS, diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala index 5e9b324a168e0..a197f7417c5b7 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala @@ -51,6 +51,7 @@ import org.apache.spark.sql.catalyst.parser.{CatalystSqlParser, ParseException} import org.apache.spark.sql.execution.QueryExecutionException import org.apache.spark.sql.execution.command.DDLUtils import org.apache.spark.sql.hive.HiveExternalCatalog.{DATASOURCE_SCHEMA, DATASOURCE_SCHEMA_NUMPARTS, DATASOURCE_SCHEMA_PART_PREFIX} +import org.apache.spark.sql.hive.HiveUtils import org.apache.spark.sql.hive.client.HiveClientImpl._ import org.apache.spark.sql.types._ import org.apache.spark.util.{CircularBuffer, Utils} @@ -187,7 +188,15 @@ private[hive] class HiveClientImpl( } /** Returns the configuration for the current session. */ - def conf: HiveConf = state.getConf + def conf: HiveConf = { + val hiveConf = state.getConf + // Hive changed the default of datanucleus.schema.autoCreateAll from true to false and + // hive.metastore.schema.verification from false to true since 2.0 + // For details, see the JIRA HIVE-6113, HIVE-12463 and HIVE-1841 + hiveConf.setBoolean("hive.metastore.schema.verification", false) + hiveConf.setBoolean("datanucleus.schema.autoCreateAll", true) + hiveConf + } private val userName = conf.getUser @@ -433,6 +442,8 @@ private[hive] class HiveClientImpl( case HiveTableType.EXTERNAL_TABLE => CatalogTableType.EXTERNAL case HiveTableType.MANAGED_TABLE => CatalogTableType.MANAGED case HiveTableType.VIRTUAL_VIEW => CatalogTableType.VIEW + case HiveTableType.MATERIALIZED_VIEW => + throw new AnalysisException("Hive materialized view is not supported.") case HiveTableType.INDEX_TABLE => throw new AnalysisException("Hive index table is not supported.") }, diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala index ca98c30add168..b27dddd7f665f 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala @@ -192,6 +192,7 @@ private[hive] class IsolatedClientLoader( (name.startsWith("com.google") && !name.startsWith("com.google.cloud")) || name.startsWith("java.lang.") || name.startsWith("java.net") || + name.startsWith("org.apache.derby.") || sharedPrefixes.exists(name.startsWith) } diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/SaveAsHiveFile.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/SaveAsHiveFile.scala index 078968ed0145f..e83d3877f9552 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/SaveAsHiveFile.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/SaveAsHiveFile.scala @@ -227,7 +227,7 @@ private[hive] trait SaveAsHiveFile extends DataWritingCommand { // SPARK-20594: This is a walk-around fix to resolve a Hive bug. Hive requires that the // staging directory needs to avoid being deleted when users set hive.exec.stagingdir // under the table directory. - if (FileUtils.isSubDir(new Path(stagingPathName), inputPath, fs) && + if (isSubDir(new Path(stagingPathName), inputPath, fs) && !stagingPathName.stripPrefix(inputPathName).stripPrefix(File.separator).startsWith(".")) { logDebug(s"The staging dir '$stagingPathName' should be a child directory starts " + "with '.' to avoid being deleted if we set hive.exec.stagingdir under the table " + @@ -253,6 +253,12 @@ private[hive] trait SaveAsHiveFile extends DataWritingCommand { dir } + private def isSubDir(p1: Path, p2: Path, fs: FileSystem): Boolean = { + val path1 = fs.makeQualified(p1).toString + val path2 = fs.makeQualified(p2).toString + if (path1.startsWith(path2)) true else false + } + private def executionId: String = { val rand: Random = new Random val format = new SimpleDateFormat("yyyy-MM-dd_HH-mm-ss_SSS", Locale.US) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala index 4a8450901e3a7..21d87b0701bbf 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala @@ -336,7 +336,7 @@ private[hive] case class HiveUDAFFunction( funcWrapper.createFunction[AbstractGenericUDAFResolver]() } - val parameterInfo = new SimpleGenericUDAFParameterInfo(inputInspectors, false, false) + val parameterInfo = new SimpleGenericUDAFParameterInfo(inputInspectors, false, false, false) resolver.getEvaluator(parameterInfo) } diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala index 4e641e34c18d9..6efa2fb5771be 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala @@ -24,10 +24,14 @@ import java.util.Properties import scala.collection.JavaConverters._ import scala.util.control.NonFatal +import com.esotericsoftware.kryo.Kryo +import com.esotericsoftware.kryo.io.Output +import org.apache.commons.codec.binary.Base64 import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.{FileStatus, Path} import org.apache.hadoop.hive.conf.HiveConf.ConfVars import org.apache.hadoop.hive.ql.io.orc._ +import org.apache.hadoop.hive.ql.io.sarg.SearchArgument import org.apache.hadoop.hive.serde2.objectinspector.{SettableStructObjectInspector, StructObjectInspector} import org.apache.hadoop.hive.serde2.typeinfo.{StructTypeInfo, TypeInfoUtils} import org.apache.hadoop.io.{NullWritable, Writable} @@ -130,7 +134,7 @@ class OrcFileFormat extends FileFormat with DataSourceRegister with Serializable if (sparkSession.sessionState.conf.orcFilterPushDown) { // Sets pushed predicates OrcFilters.createFilter(requiredSchema, filters.toArray).foreach { f => - hadoopConf.set(OrcFileFormat.SARG_PUSHDOWN, f.toKryo) + hadoopConf.set(OrcFileFormat.SARG_PUSHDOWN, toKryo(f)) hadoopConf.setBoolean(ConfVars.HIVEOPTINDEXFILTER.varname, true) } } @@ -197,6 +201,13 @@ class OrcFileFormat extends FileFormat with DataSourceRegister with Serializable case _ => false } + + def toKryo(sarg: SearchArgument): String = { + val out = new Output(4 * 1024, 10 * 1024 * 1024) + new Kryo().writeObject(out, sarg) + out.close() + Base64.encodeBase64String(out.toBytes) + } } private[orc] class OrcSerializer(dataSchema: StructType, conf: Configuration) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFilters.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFilters.scala index a82576a233acd..81c82a6c126a8 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFilters.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFilters.scala @@ -17,9 +17,10 @@ package org.apache.spark.sql.hive.orc -import org.apache.hadoop.hive.ql.io.sarg.SearchArgument +import org.apache.hadoop.hive.ql.io.sarg.{PredicateLeaf, SearchArgument} import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.Builder import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentFactory.newBuilder +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable import org.apache.spark.internal.Logging import org.apache.spark.sql.execution.datasources.orc.OrcFilters.buildTree @@ -82,6 +83,39 @@ private[orc] object OrcFilters extends Logging { createBuilder(dataTypeMap, expression, builder, canPartialPushDownConjuncts = true) } + /** + * Get PredicateLeafType which is corresponding to the given DataType. + */ + private def getPredicateLeafType(dataType: DataType) = dataType match { + case BooleanType => PredicateLeaf.Type.BOOLEAN + case ByteType | ShortType | IntegerType | LongType => PredicateLeaf.Type.LONG + case FloatType | DoubleType => PredicateLeaf.Type.FLOAT + case StringType => PredicateLeaf.Type.STRING + case DateType => PredicateLeaf.Type.DATE + case TimestampType => PredicateLeaf.Type.TIMESTAMP + case _: DecimalType => PredicateLeaf.Type.DECIMAL + case _ => throw new UnsupportedOperationException(s"DataType: ${dataType.catalogString}") + } + + /** + * Cast literal values for filters. + * + * We need to cast to long because ORC raises exceptions + * at 'checkLiteralType' of SearchArgumentImpl.java. + */ + private def castLiteralValue(value: Any, dataType: DataType): Any = dataType match { + case ByteType | ShortType | IntegerType | LongType => + value.asInstanceOf[Number].longValue + case FloatType | DoubleType => + value.asInstanceOf[Number].doubleValue() + case _: DecimalType => + val decimal = value.asInstanceOf[java.math.BigDecimal] + val decimalWritable = new HiveDecimalWritable(decimal.longValue) + decimalWritable.mutateEnforcePrecisionScale(decimal.precision, decimal.scale) + decimalWritable + case _ => value + } + /** * @param dataTypeMap a map from the attribute name to its data type. * @param expression the input filter predicates. @@ -96,6 +130,9 @@ private[orc] object OrcFilters extends Logging { expression: Filter, builder: Builder, canPartialPushDownConjuncts: Boolean): Option[Builder] = { + def getType(attribute: String): PredicateLeaf.Type = + getPredicateLeafType(dataTypeMap(attribute)) + def isSearchableType(dataType: DataType): Boolean = dataType match { // Only the values in the Spark types below can be recognized by // the `SearchArgumentImpl.BuilderImpl.boxLiteral()` method. @@ -160,31 +197,39 @@ private[orc] object OrcFilters extends Logging { // wrapped by a "parent" predicate (`And`, `Or`, or `Not`). case EqualTo(attribute, value) if isSearchableType(dataTypeMap(attribute)) => - Some(builder.startAnd().equals(attribute, value).end()) + val castedValue = castLiteralValue(value, dataTypeMap(attribute)) + Some(builder.startAnd().equals(attribute, getType(attribute), castedValue).end()) case EqualNullSafe(attribute, value) if isSearchableType(dataTypeMap(attribute)) => - Some(builder.startAnd().nullSafeEquals(attribute, value).end()) + val castedValue = castLiteralValue(value, dataTypeMap(attribute)) + Some(builder.startAnd().nullSafeEquals(attribute, getType(attribute), castedValue).end()) case LessThan(attribute, value) if isSearchableType(dataTypeMap(attribute)) => - Some(builder.startAnd().lessThan(attribute, value).end()) + val castedValue = castLiteralValue(value, dataTypeMap(attribute)) + Some(builder.startAnd().lessThan(attribute, getType(attribute), castedValue).end()) case LessThanOrEqual(attribute, value) if isSearchableType(dataTypeMap(attribute)) => - Some(builder.startAnd().lessThanEquals(attribute, value).end()) + val castedValue = castLiteralValue(value, dataTypeMap(attribute)) + Some(builder.startAnd().lessThanEquals(attribute, getType(attribute), castedValue).end()) case GreaterThan(attribute, value) if isSearchableType(dataTypeMap(attribute)) => - Some(builder.startNot().lessThanEquals(attribute, value).end()) + val castedValue = castLiteralValue(value, dataTypeMap(attribute)) + Some(builder.startNot().lessThanEquals(attribute, getType(attribute), castedValue).end()) case GreaterThanOrEqual(attribute, value) if isSearchableType(dataTypeMap(attribute)) => - Some(builder.startNot().lessThan(attribute, value).end()) + val castedValue = castLiteralValue(value, dataTypeMap(attribute)) + Some(builder.startNot().lessThan(attribute, getType(attribute), castedValue).end()) case IsNull(attribute) if isSearchableType(dataTypeMap(attribute)) => - Some(builder.startAnd().isNull(attribute).end()) + Some(builder.startAnd().isNull(attribute, getType(attribute)).end()) case IsNotNull(attribute) if isSearchableType(dataTypeMap(attribute)) => - Some(builder.startNot().isNull(attribute).end()) + Some(builder.startNot().isNull(attribute, getType(attribute)).end()) case In(attribute, values) if isSearchableType(dataTypeMap(attribute)) => - Some(builder.startAnd().in(attribute, values.map(_.asInstanceOf[AnyRef]): _*).end()) + val castedValues = values.map(castLiteralValue(_, dataTypeMap(attribute))) + Some(builder.startAnd().in( + attribute, getType(attribute), castedValues.map(_.asInstanceOf[AnyRef]): _*).end()) case _ => None } diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala index 23dd350d4b2ce..c228ea9b766af 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala @@ -62,6 +62,10 @@ object TestHive // SPARK-8910 .set(UI_ENABLED, false) .set("spark.unsafe.exceptionOnMemoryLeak", "true") + // HIVE-12320 + .set("spark.hadoop.hive.metastore.disallow.incompatible.col.type.changes", "false") + // To compatible with avatica-core:1.13.0 + .set("spark.hadoop.hive.cbo.enable", "false") // Disable ConvertToLocalRelation for better test coverage. Test cases built on // LocalRelation will exercise the optimization rules better by disabling it as // this rule may potentially block testing of other optimization rules such as diff --git a/sql/hive/src/test/resources/hive-contrib-0.13.1.jar b/sql/hive/src/test/resources/hive-contrib-0.13.1.jar deleted file mode 100644 index ce0740d9245a7..0000000000000 Binary files a/sql/hive/src/test/resources/hive-contrib-0.13.1.jar and /dev/null differ diff --git a/sql/hive/src/test/resources/hive-contrib-2.3.4.jar b/sql/hive/src/test/resources/hive-contrib-2.3.4.jar new file mode 100644 index 0000000000000..01c5a249af07e Binary files /dev/null and b/sql/hive/src/test/resources/hive-contrib-2.3.4.jar differ diff --git a/sql/hive/src/test/resources/hive-hcatalog-core-0.13.1.jar b/sql/hive/src/test/resources/hive-hcatalog-core-0.13.1.jar deleted file mode 100644 index 37af9aafad8a4..0000000000000 Binary files a/sql/hive/src/test/resources/hive-hcatalog-core-0.13.1.jar and /dev/null differ diff --git a/sql/hive/src/test/resources/hive-hcatalog-core-2.3.4.jar b/sql/hive/src/test/resources/hive-hcatalog-core-2.3.4.jar new file mode 100644 index 0000000000000..d67afda89b087 Binary files /dev/null and b/sql/hive/src/test/resources/hive-hcatalog-core-2.3.4.jar differ diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/ClasspathDependenciesSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/ClasspathDependenciesSuite.scala index f262ef62be036..48e57137c87ff 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/ClasspathDependenciesSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/ClasspathDependenciesSuite.scala @@ -58,11 +58,11 @@ class ClasspathDependenciesSuite extends SparkFunSuite { } test("shaded Protobuf") { - assertLoads("org.apache.hive.com.google.protobuf.ServiceException") + assertLoads("com.google.protobuf.ServiceException") } test("shaded Kryo") { - assertLoads("org.apache.hive.com.esotericsoftware.kryo.Kryo") + assertLoads("com.esotericsoftware.kryo.Kryo") } test("hive-common") { @@ -81,7 +81,7 @@ class ClasspathDependenciesSuite extends SparkFunSuite { } test("parquet-hadoop-bundle") { - assertLoads("parquet.hadoop.ParquetOutputFormat") - assertLoads("parquet.hadoop.ParquetInputFormat") + assertLoads("org.apache.parquet.hadoop.ParquetOutputFormat") + assertLoads("org.apache.parquet.hadoop.ParquetInputFormat") } } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala index dd0e1bd0fe303..efc86c80bf5a8 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala @@ -179,6 +179,8 @@ class HiveExternalCatalogVersionsSuite extends SparkSubmitTestUtils { "--name", "prepare testing tables", "--master", "local[2]", "--conf", "spark.ui.enabled=false", + "--conf", "spark.sql.hive.metastore.version=1.2.1", + "--conf", "spark.sql.hive.metastore.jars=maven", "--conf", "spark.master.rest.enabled=false", "--conf", s"spark.sql.warehouse.dir=${wareHousePath.getCanonicalPath}", "--conf", s"spark.sql.test.version.index=$index", @@ -196,6 +198,8 @@ class HiveExternalCatalogVersionsSuite extends SparkSubmitTestUtils { "--name", "HiveExternalCatalog backward compatibility test", "--master", "local[2]", "--conf", "spark.ui.enabled=false", + "--conf", "spark.sql.hive.metastore.version=1.2.1", + "--conf", "spark.sql.hive.metastore.jars=maven", "--conf", "spark.master.rest.enabled=false", "--conf", s"spark.sql.warehouse.dir=${wareHousePath.getCanonicalPath}", "--driver-java-options", s"-Dderby.system.home=${wareHousePath.getCanonicalPath}", diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveMetastoreCatalogSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveMetastoreCatalogSuite.scala index 5c9261c206ea7..018e0111d52ee 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveMetastoreCatalogSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveMetastoreCatalogSuite.scala @@ -206,7 +206,8 @@ class DataSourceWithHiveMetastoreCatalogSuite assert(columns.map(_.dataType) === Seq(DecimalType(10, 3), StringType)) checkAnswer(table("t"), testDF) - assert(sparkSession.metadataHive.runSqlHive("SELECT * FROM t") === Seq("1.1\t1", "2.1\t2")) + assert(sparkSession.metadataHive.runSqlHive("SELECT * FROM t") === + Seq("1.100\t1", "2.100\t2")) } } @@ -239,7 +240,7 @@ class DataSourceWithHiveMetastoreCatalogSuite checkAnswer(table("t"), testDF) assert(sparkSession.metadataHive.runSqlHive("SELECT * FROM t") === - Seq("1.1\t1", "2.1\t2")) + Seq("1.100\t1", "2.100\t2")) } } } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSuite.scala index 470c6a342b4dd..7d743bb259864 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSuite.scala @@ -17,8 +17,14 @@ package org.apache.spark.sql.hive +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs.Path +import org.apache.parquet.HadoopReadOptions +import org.apache.parquet.hadoop.ParquetFileReader +import org.apache.parquet.hadoop.util.HadoopInputFile + import org.apache.spark.sql.{QueryTest, Row} -import org.apache.spark.sql.execution.datasources.parquet.ParquetTest +import org.apache.spark.sql.execution.datasources.parquet.{ParquetTest, SpecificParquetRecordReaderBase} import org.apache.spark.sql.hive.test.TestHiveSingleton import org.apache.spark.sql.internal.SQLConf @@ -106,4 +112,20 @@ class HiveParquetSuite extends QueryTest with ParquetTest with TestHiveSingleton } } } + + test("SPARK-24766 decimal type should generate stats after upgrade Hive's parquet to 1.10.0") { + withTempDir { dir => + val path = dir.getCanonicalPath + sql(s"INSERT OVERWRITE LOCAL DIRECTORY '$path' STORED AS PARQUET " + + "SELECT CAST(1 AS decimal) AS decimal1") + + val file = SpecificParquetRecordReaderBase.listDirectory(dir).get(0) + val conf = new Configuration() + val hadoopInputFile = HadoopInputFile.fromPath(new Path(file), conf) + val parquetReadOptions = HadoopReadOptions.builder(conf).build() + val m = ParquetFileReader.open(hadoopInputFile, parquetReadOptions) + m.close() + assert(!m.getFooter.getBlocks.get(0).getColumns.get(0).getStatistics.isEmpty) + } + } } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveShimSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveShimSuite.scala index a716f739b5c20..e17a7ddcbf8df 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveShimSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveShimSuite.scala @@ -16,7 +16,6 @@ */ package org.apache.spark.sql.hive -import scala.collection.JavaConverters._ import scala.language.implicitConversions import org.apache.hadoop.conf.Configuration @@ -35,10 +34,10 @@ class HiveShimSuite extends SparkFunSuite { // test when READ_COLUMN_NAMES_CONF_STR is empty HiveShim.appendReadColumns(conf, ids, names) - assert(names.asJava === ColumnProjectionUtils.getReadColumnNames(conf)) + assert(names === ColumnProjectionUtils.getReadColumnNames(conf)) // test when READ_COLUMN_NAMES_CONF_STR is non-empty HiveShim.appendReadColumns(conf, moreIds, moreNames) - assert((names ++ moreNames).asJava === ColumnProjectionUtils.getReadColumnNames(conf)) + assert((names ++ moreNames) === ColumnProjectionUtils.getReadColumnNames(conf)) } } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala index d3640086e74af..39e99ecd1eed7 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala @@ -108,8 +108,8 @@ class HiveSparkSubmitSuite val unusedJar = TestUtils.createJarWithClasses(Seq.empty) val jar1 = TestUtils.createJarWithClasses(Seq("SparkSubmitClassA")) val jar2 = TestUtils.createJarWithClasses(Seq("SparkSubmitClassB")) - val jar3 = TestHive.getHiveFile("hive-contrib-0.13.1.jar").getCanonicalPath - val jar4 = TestHive.getHiveFile("hive-hcatalog-core-0.13.1.jar").getCanonicalPath + val jar3 = TestHive.getHiveFile("hive-contrib-2.3.4.jar").getCanonicalPath + val jar4 = TestHive.getHiveFile("hive-hcatalog-core-2.3.4.jar").getCanonicalPath val jarsString = Seq(jar1, jar2, jar3, jar4).map(j => j.toString).mkString(",") val args = Seq( "--class", SparkSubmitClassLoaderTest.getClass.getName.stripSuffix("$"), @@ -314,7 +314,7 @@ class HiveSparkSubmitSuite "--master", "local-cluster[2,1,1024]", "--conf", "spark.ui.enabled=false", "--conf", "spark.master.rest.enabled=false", - "--jars", TestHive.getHiveFile("hive-contrib-0.13.1.jar").getCanonicalPath, + "--jars", TestHive.getHiveFile("hive-contrib-2.3.4.jar").getCanonicalPath, unusedJar.toString) runSparkSubmit(argsForCreateTable) @@ -456,7 +456,7 @@ object TemporaryHiveUDFTest extends Logging { // Load a Hive UDF from the jar. logInfo("Registering a temporary Hive UDF provided in a jar.") - val jar = hiveContext.getHiveFile("hive-contrib-0.13.1.jar").getCanonicalPath + val jar = hiveContext.getHiveFile("hive-contrib-2.3.4.jar").getCanonicalPath hiveContext.sql( s""" |CREATE TEMPORARY FUNCTION example_max @@ -494,7 +494,7 @@ object PermanentHiveUDFTest1 extends Logging { // Load a Hive UDF from the jar. logInfo("Registering a permanent Hive UDF provided in a jar.") - val jar = hiveContext.getHiveFile("hive-contrib-0.13.1.jar").getCanonicalPath + val jar = hiveContext.getHiveFile("hive-contrib-2.3.4.jar").getCanonicalPath hiveContext.sql( s""" |CREATE FUNCTION example_max @@ -531,7 +531,7 @@ object PermanentHiveUDFTest2 extends Logging { val hiveContext = new TestHiveContext(sc) // Load a Hive UDF from the jar. logInfo("Write the metadata of a permanent Hive UDF into metastore.") - val jar = hiveContext.getHiveFile("hive-contrib-0.13.1.jar").getCanonicalPath + val jar = hiveContext.getHiveFile("hive-contrib-2.3.4.jar").getCanonicalPath val function = CatalogFunction( FunctionIdentifier("example_max"), "org.apache.hadoop.hive.contrib.udaf.example.UDAFExampleMax", diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala index db2024e8b5d16..b6630d1f93c77 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala @@ -100,8 +100,8 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto .asInstanceOf[HiveTableRelation] val properties = relation.tableMeta.ignoredProperties - assert(properties("totalSize").toLong <= 0, "external table totalSize must be <= 0") - assert(properties("rawDataSize").toLong <= 0, "external table rawDataSize must be <= 0") + assert(properties("totalSize").toLong > 0, "external table totalSize must be > 0") + assert(properties.get("rawDataSize").isEmpty) val sizeInBytes = relation.stats.sizeInBytes assert(sizeInBytes === BigInt(file1.length() + file2.length())) @@ -833,15 +833,10 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto // ALTER TABLE SET/UNSET TBLPROPERTIES invalidates some Hive specific statistics, but not // Spark specific statistics. This is triggered by the Hive alterTable API. val numRows = extractStatsPropValues(describeResult, "numRows") - assert(numRows.isDefined && numRows.get == -1, "numRows is lost") + assert(numRows.isDefined && numRows.get == 500) val rawDataSize = extractStatsPropValues(describeResult, "rawDataSize") - assert(rawDataSize.isDefined && rawDataSize.get == -1, "rawDataSize is lost") - - if (analyzedBySpark) { - checkTableStats(tabName, hasSizeInBytes = true, expectedRowCounts = Some(500)) - } else { - checkTableStats(tabName, hasSizeInBytes = true, expectedRowCounts = None) - } + assert(rawDataSize.isDefined && rawDataSize.get == 5312) + checkTableStats(tabName, hasSizeInBytes = true, expectedRowCounts = Some(500)) } } } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala index a5cff35abf37e..4b871a2f30caa 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala @@ -816,7 +816,7 @@ class HiveQuerySuite extends HiveComparisonTest with SQLTestUtils with BeforeAnd test("ADD JAR command 2") { // this is a test case from mapjoin_addjar.q - val testJar = TestHive.getHiveFile("hive-hcatalog-core-0.13.1.jar").toURI + val testJar = TestHive.getHiveFile("hive-hcatalog-core-2.3.4.jar").toURI val testData = TestHive.getHiveFile("data/files/sample.json").toURI sql(s"ADD JAR $testJar") sql( @@ -826,9 +826,9 @@ class HiveQuerySuite extends HiveComparisonTest with SQLTestUtils with BeforeAnd sql("select * from src join t1 on src.key = t1.a") sql("DROP TABLE t1") assert(sql("list jars"). - filter(_.getString(0).contains("hive-hcatalog-core-0.13.1.jar")).count() > 0) + filter(_.getString(0).contains("hive-hcatalog-core-2.3.4.jar")).count() > 0) assert(sql("list jar"). - filter(_.getString(0).contains("hive-hcatalog-core-0.13.1.jar")).count() > 0) + filter(_.getString(0).contains("hive-hcatalog-core-2.3.4.jar")).count() > 0) val testJar2 = TestHive.getHiveFile("TestUDTF.jar").getCanonicalPath sql(s"ADD JAR $testJar2") assert(sql(s"list jar $testJar").count() == 1) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala index 70efad103d13e..f53c7353a42d1 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala @@ -1105,7 +1105,7 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton { override def run() { // To make sure this test works, this jar should not be loaded in another place. sql( - s"ADD JAR ${hiveContext.getHiveFile("hive-contrib-0.13.1.jar").getCanonicalPath()}") + s"ADD JAR ${hiveContext.getHiveFile("hive-contrib-2.3.4.jar").getCanonicalPath()}") try { sql( """ diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcFilterSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcFilterSuite.scala index 5094763b0cd2a..e39b99795e832 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcFilterSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcFilterSuite.scala @@ -297,33 +297,25 @@ class HiveOrcFilterSuite extends OrcTest with TestHiveSingleton { // This might have to be changed after Hive version is upgraded. checkFilterPredicate( '_1.isNotNull, - """leaf-0 = (IS_NULL _1) - |expr = (not leaf-0)""".stripMargin.trim + "leaf-0 = (IS_NULL _1), expr = (not leaf-0)" ) checkFilterPredicate( '_1 =!= 1, - """leaf-0 = (IS_NULL _1) - |leaf-1 = (EQUALS _1 1) - |expr = (and (not leaf-0) (not leaf-1))""".stripMargin.trim + "leaf-0 = (IS_NULL _1), leaf-1 = (EQUALS _1 1), expr = (and (not leaf-0) (not leaf-1))" ) checkFilterPredicate( !('_1 < 4), - """leaf-0 = (IS_NULL _1) - |leaf-1 = (LESS_THAN _1 4) - |expr = (and (not leaf-0) (not leaf-1))""".stripMargin.trim + "leaf-0 = (IS_NULL _1), leaf-1 = (LESS_THAN _1 4), expr = (and (not leaf-0) (not leaf-1))" ) checkFilterPredicate( '_1 < 2 || '_1 > 3, - """leaf-0 = (LESS_THAN _1 2) - |leaf-1 = (LESS_THAN_EQUALS _1 3) - |expr = (or leaf-0 (not leaf-1))""".stripMargin.trim + "leaf-0 = (LESS_THAN _1 2), leaf-1 = (LESS_THAN_EQUALS _1 3), " + + "expr = (or leaf-0 (not leaf-1))" ) checkFilterPredicate( '_1 < 2 && '_1 > 3, - """leaf-0 = (IS_NULL _1) - |leaf-1 = (LESS_THAN _1 2) - |leaf-2 = (LESS_THAN_EQUALS _1 3) - |expr = (and (not leaf-0) leaf-1 (not leaf-2))""".stripMargin.trim + "leaf-0 = (IS_NULL _1), leaf-1 = (LESS_THAN _1 2), " + + "leaf-2 = (LESS_THAN_EQUALS _1 3), expr = (and (not leaf-0) leaf-1 (not leaf-2))" ) } } @@ -359,9 +351,7 @@ class HiveOrcFilterSuite extends OrcTest with TestHiveSingleton { StructField("a", IntegerType, nullable = true), StructField("b", StringType, nullable = true))) assertResult( - """leaf-0 = (LESS_THAN a 10) - |expr = leaf-0 - """.stripMargin.trim + "leaf-0 = (LESS_THAN a 10), expr = leaf-0" ) { OrcFilters.createFilter(schema, Array( LessThan("a", 10), @@ -371,9 +361,7 @@ class HiveOrcFilterSuite extends OrcTest with TestHiveSingleton { // The `LessThan` should be converted while the whole inner `And` shouldn't assertResult( - """leaf-0 = (LESS_THAN a 10) - |expr = leaf-0 - """.stripMargin.trim + "leaf-0 = (LESS_THAN a 10), expr = leaf-0" ) { OrcFilters.createFilter(schema, Array( LessThan("a", 10), @@ -397,9 +385,7 @@ class HiveOrcFilterSuite extends OrcTest with TestHiveSingleton { // Safely remove unsupported `StringContains` predicate and push down `LessThan` assertResult( - """leaf-0 = (LESS_THAN a 10) - |expr = leaf-0 - """.stripMargin.trim + "leaf-0 = (LESS_THAN a 10), expr = leaf-0" ) { OrcFilters.createFilter(schema, Array( And( @@ -411,10 +397,7 @@ class HiveOrcFilterSuite extends OrcTest with TestHiveSingleton { // Safely remove unsupported `StringContains` predicate, push down `LessThan` and `GreaterThan`. assertResult( - """leaf-0 = (LESS_THAN a 10) - |leaf-1 = (LESS_THAN_EQUALS a 1) - |expr = (and leaf-0 (not leaf-1)) - """.stripMargin.trim + "leaf-0 = (LESS_THAN a 10), leaf-1 = (LESS_THAN_EQUALS a 1), expr = (and leaf-0 (not leaf-1))" ) { OrcFilters.createFilter(schema, Array( And( diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcQuerySuite.scala index 597b0f56a55e4..ee7e1be3ea8fa 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcQuerySuite.scala @@ -218,4 +218,13 @@ class HiveOrcQuerySuite extends OrcQueryTest with TestHiveSingleton { } } } + + test("SPARK-26437 Can not query decimal type when value is 0") { + withSQLConf(HiveUtils.CONVERT_METASTORE_ORC.key -> "false") { + withTable("spark_26437") { + sql("CREATE TABLE spark_26437 STORED AS ORCFILE AS SELECT 0.00 AS c1") + checkAnswer(spark.table("spark_26437"), Seq(Row(0.00))) + } + } + } } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala index c46512b6f5852..831b06b92c381 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala @@ -149,7 +149,7 @@ class HiveOrcSourceSuite extends OrcSuite with TestHiveSingleton { test("Check BloomFilter creation") { Seq(true, false).foreach { convertMetastore => withSQLConf(HiveUtils.CONVERT_METASTORE_ORC.key -> s"$convertMetastore") { - testBloomFilterCreation(org.apache.orc.OrcProto.Stream.Kind.BLOOM_FILTER) // Before ORC-101 + testBloomFilterCreation(org.apache.orc.OrcProto.Stream.Kind.BLOOM_FILTER_UTF8) } } }