From c5a0b06b84696a46e63dc385b6ba20cbb84588c4 Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Mon, 23 Nov 2020 16:01:17 -0800 Subject: [PATCH 1/2] [SPARK-33524][SQL] --- .../scala/org/apache/spark/sql/connector/InMemoryTable.scala | 4 +++- .../org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala | 4 ++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryTable.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryTable.scala index c93053abc550a..d8b3fd69efcf4 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryTable.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryTable.scala @@ -156,7 +156,9 @@ class InMemoryTable( throw new IllegalArgumentException(s"Match: unsupported argument(s) type - ($v, $t)") } case BucketTransform(numBuckets, ref) => - (extractor(ref.fieldNames, schema, row).hashCode() & Integer.MAX_VALUE) % numBuckets + val (value, dataType) = extractor(ref.fieldNames, schema, row) + val valueHashCode = if (value == null) 0 else value.hashCode + ((valueHashCode + dataType.hashCode()) & Integer.MAX_VALUE) % numBuckets } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala index da53936239de8..dc4abf3eb19cf 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala @@ -2511,7 +2511,7 @@ class DataSourceV2SQLSuite checkAnswer( spark.sql(s"SELECT id, data, _partition FROM $t1"), - Seq(Row(1, "a", "3/1"), Row(2, "b", "2/2"), Row(3, "c", "2/3"))) + Seq(Row(1, "a", "3/1"), Row(2, "b", "0/2"), Row(3, "c", "1/3"))) } } @@ -2524,7 +2524,7 @@ class DataSourceV2SQLSuite checkAnswer( spark.sql(s"SELECT index, data, _partition FROM $t1"), - Seq(Row(3, "c", "2/3"), Row(2, "b", "2/2"), Row(1, "a", "3/1"))) + Seq(Row(3, "c", "1/3"), Row(2, "b", "0/2"), Row(1, "a", "3/1"))) } } From 74a50f52374dfccefdbfa348a07a4f75ecd9fda3 Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Mon, 23 Nov 2020 19:25:43 -0800 Subject: [PATCH 2/2] Address comments --- .../scala/org/apache/spark/sql/connector/InMemoryTable.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryTable.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryTable.scala index d8b3fd69efcf4..ffff00b54f1b8 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryTable.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryTable.scala @@ -158,7 +158,7 @@ class InMemoryTable( case BucketTransform(numBuckets, ref) => val (value, dataType) = extractor(ref.fieldNames, schema, row) val valueHashCode = if (value == null) 0 else value.hashCode - ((valueHashCode + dataType.hashCode()) & Integer.MAX_VALUE) % numBuckets + ((valueHashCode + 31 * dataType.hashCode()) & Integer.MAX_VALUE) % numBuckets } }