From 66b42886904a5befbc36d6f780fa9dccd4cb335c Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Mon, 21 Oct 2019 11:36:14 +0300 Subject: [PATCH 1/6] Add IntervalBenchmark --- .../benchmark/IntervalBenchmark.scala | 98 +++++++++++++++++++ 1 file changed, 98 insertions(+) create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/IntervalBenchmark.scala diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/IntervalBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/IntervalBenchmark.scala new file mode 100644 index 0000000000000..43d8462395bce --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/IntervalBenchmark.scala @@ -0,0 +1,98 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution.benchmark + +import scala.collection.mutable.ListBuffer + +import org.apache.spark.benchmark.Benchmark +import org.apache.spark.sql.SaveMode.Overwrite +import org.apache.spark.sql.internal.SQLConf + +/** + * Synthetic benchmark for interval functions. + * To run this benchmark: + * {{{ + * 1. without sbt: + * bin/spark-submit --class --jars + * 2. build/sbt "sql/test:runMain " + * 3. generate result: + * SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/test:runMain " + * Results will be written to "benchmarks/IntervalBenchmark-results.txt". + * }}} + */ +object IntervalBenchmark extends SqlBasedBenchmark { + + private def doBenchmark(cardinality: Long, exprs: String*): Unit = { + withSQLConf(SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> "true") { + spark + .range(0, cardinality, 1, 1) + .selectExpr(exprs: _*) + .write + .format("noop") + .mode(Overwrite) + .save() + } + } + + private def addCase( + benchmark: Benchmark, + cardinality: Long, + name: String, + exprs: String*): Unit = { + benchmark.addCase(name, numIters = 3) { _ => + doBenchmark(cardinality, exprs: _*) + } + } + + private def buildString(withPrefix: Boolean, units: Seq[String] = Seq.empty): String = { + val sep = if (units.length > 0) ", " else "" + val otherUnits = sep + s"'${units.mkString(" ")}'" + val prefix = if (withPrefix) "'interval'" else "''" + s"concat_ws(' ', ${prefix}, cast(id % 10000 AS string), 'years'${otherUnits})" + } + + private def addCase(benchmark: Benchmark, cardinality: Long, units: Seq[String]): Unit = { + Seq(true, false).foreach { withPrefix => + val expr = s"CAST(${buildString(withPrefix, units)} AS interval)" + val note = if (withPrefix) "w/ interval" else "w/o interval" + benchmark.addCase(s"${units.length + 1} units $note", numIters = 3) { _ => + doBenchmark(cardinality, expr) + } + } + } + + override def runBenchmarkSuite(mainArgs: Array[String]): Unit = { + val N = 1000000 + val timeUnits = Seq( + "13 months", "100 weeks", "9 days", "12 hours", + "5 minutes", "45 seconds", "123 milliseconds", "567 microseconds") + val intervalToTest = ListBuffer[String]() + + val benchmark = new Benchmark(s"cast strings to intervals", N, output = output) + addCase(benchmark, N, s"string w/ interval", buildString(true, timeUnits)) + addCase(benchmark, N, s"string w/o interval", buildString(false, timeUnits)) + addCase(benchmark, N, intervalToTest) // Only years + + for (unit <- timeUnits) { + intervalToTest.append(unit) + addCase(benchmark, N, intervalToTest) + } + + benchmark.run() + } +} From 4eaae97704dde0e091715b5a5662b9d2f8cda984 Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Mon, 21 Oct 2019 11:36:54 +0300 Subject: [PATCH 2/6] Generate results --- .../benchmarks/IntervalBenchmark-results.txt | 25 +++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 sql/core/benchmarks/IntervalBenchmark-results.txt diff --git a/sql/core/benchmarks/IntervalBenchmark-results.txt b/sql/core/benchmarks/IntervalBenchmark-results.txt new file mode 100644 index 0000000000000..26a5aa931a9f5 --- /dev/null +++ b/sql/core/benchmarks/IntervalBenchmark-results.txt @@ -0,0 +1,25 @@ +Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.15 +Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz +cast strings to intervals: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +string w/ interval 386 428 48 2.6 386.4 1.0X +string w/o interval 312 336 33 3.2 312.3 1.2X +1 units w/ interval 933 957 38 1.1 933.0 0.4X +1 units w/o interval 919 948 35 1.1 918.8 0.4X +2 units w/ interval 1080 1103 23 0.9 1080.5 0.4X +2 units w/o interval 1111 1119 8 0.9 1111.5 0.3X +3 units w/ interval 1226 1231 5 0.8 1225.7 0.3X +3 units w/o interval 1280 1288 9 0.8 1280.3 0.3X +4 units w/ interval 1418 1433 13 0.7 1417.7 0.3X +4 units w/o interval 1479 1484 8 0.7 1478.7 0.3X +5 units w/ interval 1709 1730 18 0.6 1709.3 0.2X +5 units w/o interval 1729 1739 10 0.6 1729.1 0.2X +6 units w/ interval 1820 1831 10 0.5 1819.9 0.2X +6 units w/o interval 1936 1945 9 0.5 1936.2 0.2X +7 units w/ interval 2048 2061 11 0.5 2048.2 0.2X +7 units w/o interval 2050 2086 31 0.5 2049.8 0.2X +8 units w/ interval 2306 2341 30 0.4 2306.4 0.2X +8 units w/o interval 2393 2436 55 0.4 2393.3 0.2X +9 units w/ interval 2480 2515 39 0.4 2480.1 0.2X +9 units w/o interval 2518 2521 5 0.4 2517.8 0.2X + From b35439a8e0e5cd26f441936a677c4e155dd9d5fe Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Mon, 21 Oct 2019 17:38:36 +0300 Subject: [PATCH 3/6] Use string interpolation --- .../spark/sql/execution/benchmark/IntervalBenchmark.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/IntervalBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/IntervalBenchmark.scala index 43d8462395bce..a4e2f44eef957 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/IntervalBenchmark.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/IntervalBenchmark.scala @@ -61,7 +61,7 @@ object IntervalBenchmark extends SqlBasedBenchmark { private def buildString(withPrefix: Boolean, units: Seq[String] = Seq.empty): String = { val sep = if (units.length > 0) ", " else "" - val otherUnits = sep + s"'${units.mkString(" ")}'" + val otherUnits = s"$sep'${units.mkString(" ")}'" val prefix = if (withPrefix) "'interval'" else "''" s"concat_ws(' ', ${prefix}, cast(id % 10000 AS string), 'years'${otherUnits})" } From 836eb9f92e7083eb3d4238b2ec2675daafaf9b7e Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Mon, 21 Oct 2019 23:26:06 +0300 Subject: [PATCH 4/6] Use Scala API --- .../benchmarks/IntervalBenchmark-results.txt | 40 +++++++++---------- .../benchmark/IntervalBenchmark.scala | 22 +++++----- 2 files changed, 33 insertions(+), 29 deletions(-) diff --git a/sql/core/benchmarks/IntervalBenchmark-results.txt b/sql/core/benchmarks/IntervalBenchmark-results.txt index 26a5aa931a9f5..9010b980c07b5 100644 --- a/sql/core/benchmarks/IntervalBenchmark-results.txt +++ b/sql/core/benchmarks/IntervalBenchmark-results.txt @@ -2,24 +2,24 @@ Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.15 Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz cast strings to intervals: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -string w/ interval 386 428 48 2.6 386.4 1.0X -string w/o interval 312 336 33 3.2 312.3 1.2X -1 units w/ interval 933 957 38 1.1 933.0 0.4X -1 units w/o interval 919 948 35 1.1 918.8 0.4X -2 units w/ interval 1080 1103 23 0.9 1080.5 0.4X -2 units w/o interval 1111 1119 8 0.9 1111.5 0.3X -3 units w/ interval 1226 1231 5 0.8 1225.7 0.3X -3 units w/o interval 1280 1288 9 0.8 1280.3 0.3X -4 units w/ interval 1418 1433 13 0.7 1417.7 0.3X -4 units w/o interval 1479 1484 8 0.7 1478.7 0.3X -5 units w/ interval 1709 1730 18 0.6 1709.3 0.2X -5 units w/o interval 1729 1739 10 0.6 1729.1 0.2X -6 units w/ interval 1820 1831 10 0.5 1819.9 0.2X -6 units w/o interval 1936 1945 9 0.5 1936.2 0.2X -7 units w/ interval 2048 2061 11 0.5 2048.2 0.2X -7 units w/o interval 2050 2086 31 0.5 2049.8 0.2X -8 units w/ interval 2306 2341 30 0.4 2306.4 0.2X -8 units w/o interval 2393 2436 55 0.4 2393.3 0.2X -9 units w/ interval 2480 2515 39 0.4 2480.1 0.2X -9 units w/o interval 2518 2521 5 0.4 2517.8 0.2X +string w/ interval 420 435 18 2.4 419.8 1.0X +string w/o interval 359 365 10 2.8 358.7 1.2X +1 units w/ interval 752 759 8 1.3 752.0 0.6X +1 units w/o interval 762 766 4 1.3 762.0 0.6X +2 units w/ interval 961 970 8 1.0 960.7 0.4X +2 units w/o interval 970 976 9 1.0 970.2 0.4X +3 units w/ interval 1130 1136 7 0.9 1130.4 0.4X +3 units w/o interval 1150 1158 9 0.9 1150.3 0.4X +4 units w/ interval 1333 1336 3 0.7 1333.5 0.3X +4 units w/o interval 1354 1359 4 0.7 1354.5 0.3X +5 units w/ interval 1523 1525 2 0.7 1523.3 0.3X +5 units w/o interval 1549 1551 3 0.6 1549.4 0.3X +6 units w/ interval 1661 1663 2 0.6 1660.8 0.3X +6 units w/o interval 1691 1704 13 0.6 1691.2 0.2X +7 units w/ interval 1811 1817 8 0.6 1810.6 0.2X +7 units w/o interval 1853 1854 1 0.5 1853.2 0.2X +8 units w/ interval 2029 2037 8 0.5 2028.7 0.2X +8 units w/o interval 2075 2075 1 0.5 2074.5 0.2X +9 units w/ interval 2170 2175 5 0.5 2170.0 0.2X +9 units w/o interval 2204 2212 8 0.5 2203.6 0.2X diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/IntervalBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/IntervalBenchmark.scala index a4e2f44eef957..b1a1a5c5a62ea 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/IntervalBenchmark.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/IntervalBenchmark.scala @@ -20,7 +20,9 @@ package org.apache.spark.sql.execution.benchmark import scala.collection.mutable.ListBuffer import org.apache.spark.benchmark.Benchmark +import org.apache.spark.sql.Column import org.apache.spark.sql.SaveMode.Overwrite +import org.apache.spark.sql.functions._ import org.apache.spark.sql.internal.SQLConf /** @@ -36,12 +38,13 @@ import org.apache.spark.sql.internal.SQLConf * }}} */ object IntervalBenchmark extends SqlBasedBenchmark { + import spark.implicits._ - private def doBenchmark(cardinality: Long, exprs: String*): Unit = { + private def doBenchmark(cardinality: Long, exprs: Column*): Unit = { withSQLConf(SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> "true") { spark .range(0, cardinality, 1, 1) - .selectExpr(exprs: _*) + .select(exprs: _*) .write .format("noop") .mode(Overwrite) @@ -53,22 +56,23 @@ object IntervalBenchmark extends SqlBasedBenchmark { benchmark: Benchmark, cardinality: Long, name: String, - exprs: String*): Unit = { + exprs: Column*): Unit = { benchmark.addCase(name, numIters = 3) { _ => doBenchmark(cardinality, exprs: _*) } } - private def buildString(withPrefix: Boolean, units: Seq[String] = Seq.empty): String = { - val sep = if (units.length > 0) ", " else "" - val otherUnits = s"$sep'${units.mkString(" ")}'" - val prefix = if (withPrefix) "'interval'" else "''" - s"concat_ws(' ', ${prefix}, cast(id % 10000 AS string), 'years'${otherUnits})" + private def buildString(withPrefix: Boolean, units: Seq[String] = Seq.empty): Column = { + val init = lit(if (withPrefix) "interval" else "") :: + ($"id" % 10000).cast("string") :: + lit("years") :: Nil + + concat_ws(" ", (init ++ units.map(lit)): _*) } private def addCase(benchmark: Benchmark, cardinality: Long, units: Seq[String]): Unit = { Seq(true, false).foreach { withPrefix => - val expr = s"CAST(${buildString(withPrefix, units)} AS interval)" + val expr = buildString(withPrefix, units).cast("interval") val note = if (withPrefix) "w/ interval" else "w/o interval" benchmark.addCase(s"${units.length + 1} units $note", numIters = 3) { _ => doBenchmark(cardinality, expr) From 165ee36dab5d4d949f526479229d8806f9b5b4a8 Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Mon, 21 Oct 2019 23:53:26 +0300 Subject: [PATCH 5/6] Generate results for jdk11 --- .../IntervalBenchmark-jdk11-results.txt | 25 +++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 sql/core/benchmarks/IntervalBenchmark-jdk11-results.txt diff --git a/sql/core/benchmarks/IntervalBenchmark-jdk11-results.txt b/sql/core/benchmarks/IntervalBenchmark-jdk11-results.txt new file mode 100644 index 0000000000000..2a3903200a8ac --- /dev/null +++ b/sql/core/benchmarks/IntervalBenchmark-jdk11-results.txt @@ -0,0 +1,25 @@ +OpenJDK 64-Bit Server VM 11.0.2+9 on Mac OS X 10.15 +Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz +cast strings to intervals: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +string w/ interval 471 513 57 2.1 470.7 1.0X +string w/o interval 437 444 8 2.3 436.9 1.1X +1 units w/ interval 726 758 45 1.4 726.3 0.6X +1 units w/o interval 712 717 5 1.4 711.7 0.7X +2 units w/ interval 926 935 12 1.1 925.9 0.5X +2 units w/o interval 943 947 3 1.1 943.4 0.5X +3 units w/ interval 1089 1116 31 0.9 1089.0 0.4X +3 units w/o interval 1105 1108 3 0.9 1105.1 0.4X +4 units w/ interval 1260 1261 1 0.8 1260.4 0.4X +4 units w/o interval 1276 1277 1 0.8 1275.9 0.4X +5 units w/ interval 1436 1445 11 0.7 1435.6 0.3X +5 units w/o interval 1455 1463 6 0.7 1455.5 0.3X +6 units w/ interval 1634 1639 4 0.6 1634.4 0.3X +6 units w/o interval 1642 1644 3 0.6 1641.7 0.3X +7 units w/ interval 1829 1838 8 0.5 1828.6 0.3X +7 units w/o interval 1850 1853 4 0.5 1849.5 0.3X +8 units w/ interval 2065 2070 5 0.5 2065.4 0.2X +8 units w/o interval 2070 2090 21 0.5 2070.0 0.2X +9 units w/ interval 2279 2290 10 0.4 2278.7 0.2X +9 units w/o interval 2276 2285 8 0.4 2275.7 0.2X + From 1772543c12909500c9f98558beffc2f978ffb72a Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Tue, 22 Oct 2019 00:08:58 +0300 Subject: [PATCH 6/6] remove s" --- .../spark/sql/execution/benchmark/IntervalBenchmark.scala | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/IntervalBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/IntervalBenchmark.scala index b1a1a5c5a62ea..4c1c75b815a02 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/IntervalBenchmark.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/IntervalBenchmark.scala @@ -87,9 +87,9 @@ object IntervalBenchmark extends SqlBasedBenchmark { "5 minutes", "45 seconds", "123 milliseconds", "567 microseconds") val intervalToTest = ListBuffer[String]() - val benchmark = new Benchmark(s"cast strings to intervals", N, output = output) - addCase(benchmark, N, s"string w/ interval", buildString(true, timeUnits)) - addCase(benchmark, N, s"string w/o interval", buildString(false, timeUnits)) + val benchmark = new Benchmark("cast strings to intervals", N, output = output) + addCase(benchmark, N, "string w/ interval", buildString(true, timeUnits)) + addCase(benchmark, N, "string w/o interval", buildString(false, timeUnits)) addCase(benchmark, N, intervalToTest) // Only years for (unit <- timeUnits) {