From 96cdc394a23412fad29a2fc086873d531ab3618b Mon Sep 17 00:00:00 2001 From: Kazuaki Ishizaki Date: Thu, 17 Sep 2020 15:48:06 +0000 Subject: [PATCH 01/14] add --language option --- dev/archery/archery/cli.py | 52 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 50 insertions(+), 2 deletions(-) diff --git a/dev/archery/archery/cli.py b/dev/archery/archery/cli.py index bcaddf1c795..6e1b6fe0e09 100644 --- a/dev/archery/archery/cli.py +++ b/dev/archery/archery/cli.py @@ -120,6 +120,15 @@ def cpp_toolchain_options(cmd): return _apply_options(cmd, options) +def java_toolchain_options(cmd): + options = [ + click.option("--java-home", metavar="", + help="Path to Java Developers Kit."), + click.option("--java-flags", help="java compiler flags."), + ] + return _apply_options(cmd, options) + + def _apply_options(cmd, options): for option in options: cmd = option(cmd) @@ -357,6 +366,11 @@ def benchmark(ctx): def benchmark_common_options(cmd): + def check_language(ctx, param, value): + if value != "cpp" and value != "java": + raise click.BadParameter("cpp or java is supported now") + return value + options = [ click.option("--src", metavar="", show_default=True, default=None, callback=validate_arrow_sources, @@ -367,11 +381,18 @@ def benchmark_common_options(cmd): click.option("--output", metavar="", type=click.File("w", encoding="utf8"), default="-", help="Capture output result into file."), + click.option("--language", metavar="", type=str, default="cpp", + show_default=True, callback=check_language, + help="Specify target language for the benchmark"), + click.option("--mvn-extras", type=str, multiple=True, + help="Extra flags/options to pass to mvn. " + "Can be stacked"), click.option("--cmake-extras", type=str, multiple=True, help="Extra flags/options to pass to cmake invocation. " "Can be stacked"), ] + cmd = java_toolchain_options(cmd) cmd = cpp_toolchain_options(cmd) return _apply_options(cmd, options) @@ -483,8 +504,8 @@ def benchmark_run(ctx, rev_or_path, src, preserve, output, cmake_extras, @click.argument("baseline", metavar="[]]", default="origin/master", required=False) @click.pass_context -def benchmark_diff(ctx, src, preserve, output, cmake_extras, - suite_filter, benchmark_filter, repetitions, no_counters, +def benchmark_diff(ctx, src, preserve, output, language, cmake_extras, + suite_filter, benchmark_filter, repitations, no_counters, threshold, contender, baseline, **kwargs): """Compare (diff) benchmark runs. @@ -556,6 +577,20 @@ def benchmark_diff(ctx, src, preserve, output, cmake_extras, # This should not recompute the benchmark from run.json archery --quiet benchmark diff WORKSPACE run.json > result.json """ + if language == "cpp": + ctx.forward(benchmark_diff_cpp, **kwargs) + elif language == "java": + ctx.forward(benchmark_diff_jav, **kwargs) + +@benchmark.command(name="diff_cpp") +@click.pass_context +def benchmark_diff_cpp(ctx, src, preserve, output, language, cmake_extras, + suite_filter, benchmark_filter, + repetitions, threshold, contender, baseline, + mvn_extras, java_home, java_flags, + **kwargs): + """Compare (diff) cpp benchmark runs.""" + with tmpdir(preserve=preserve) as root: logger.debug("Comparing {} (contender) with {} (baseline)" .format(contender, baseline)) @@ -584,6 +619,19 @@ def benchmark_diff(ctx, src, preserve, output, cmake_extras, output.write('\n') +@benchmark.command(name="diff_java") +@click.pass_context +def benchmark_diff_java(ctx, src, preserve, output, language, + suite_filter, benchmark_filter, + repetitions, threshold, contender, baseline, + cmake_extras, cc, cxx, cxx_flags, cxx_package_prefix, + **kwargs): + """Compare (diff) cpp benchmark runs.""" + + print("Not supported yet") + exit(1) + + def _get_comparisons_as_json(comparisons): buf = StringIO() for comparator in comparisons: From a44677878e7dcea39d4f00fe8847bc1ca79fe6d7 Mon Sep 17 00:00:00 2001 From: Kazuaki Ishizaki Date: Thu, 17 Sep 2020 15:50:40 +0000 Subject: [PATCH 02/14] refactor from_rev_or_path method --- dev/archery/archery/benchmark/runner.py | 72 +++++++++++++------------ dev/archery/archery/cli.py | 8 +-- 2 files changed, 42 insertions(+), 38 deletions(-) diff --git a/dev/archery/archery/benchmark/runner.py b/dev/archery/archery/benchmark/runner.py index 5718bcaf108..a213ee8cc58 100644 --- a/dev/archery/archery/benchmark/runner.py +++ b/dev/archery/archery/benchmark/runner.py @@ -50,40 +50,7 @@ def suites(self): @staticmethod def from_rev_or_path(src, root, rev_or_path, cmake_conf, **kwargs): - """ Returns a BenchmarkRunner from a path or a git revision. - - First, it checks if `rev_or_path` is a valid path (or string) of a json - object that can deserialize to a BenchmarkRunner. If so, it initialize - a StaticBenchmarkRunner from it. This allows memoizing the result of a - run in a file or a string. - - Second, it checks if `rev_or_path` points to a valid CMake build - directory. If so, it creates a CppBenchmarkRunner with this existing - CMakeBuild. - - Otherwise, it assumes `rev_or_path` is a revision and clone/checkout - the given revision and create a fresh CMakeBuild. - """ - build = None - if StaticBenchmarkRunner.is_json_result(rev_or_path): - return StaticBenchmarkRunner.from_json(rev_or_path, **kwargs) - elif CMakeBuild.is_build_dir(rev_or_path): - build = CMakeBuild.from_path(rev_or_path) - return CppBenchmarkRunner(build, **kwargs) - else: - # Revisions can references remote via the `/` character, ensure - # that the revision is path friendly - path_rev = rev_or_path.replace("/", "_") - root_rev = os.path.join(root, path_rev) - os.mkdir(root_rev) - - clone_dir = os.path.join(root_rev, "arrow") - # Possibly checkout the sources at given revision, no need to - # perform cleanup on cloned repository as root_rev is reclaimed. - src_rev, _ = src.at_revision(rev_or_path, clone_dir) - cmake_def = CppCMakeDefinition(src_rev.cpp, cmake_conf) - build_dir = os.path.join(root_rev, "build") - return CppBenchmarkRunner(cmake_def.build(build_dir), **kwargs) + raise NotImplementedError("BenchmarkRunner must implement from_rev_or_path") class StaticBenchmarkRunner(BenchmarkRunner): @@ -210,3 +177,40 @@ def suites(self): continue yield suite + + @staticmethod + def from_rev_or_path(src, root, rev_or_path, cmake_conf, **kwargs): + """ Returns a BenchmarkRunner from a path or a git revision. + + First, it checks if `rev_or_path` is a valid path (or string) of a json + object that can deserialize to a BenchmarkRunner. If so, it initialize + a StaticBenchmarkRunner from it. This allows memoizing the result of a + run in a file or a string. + + Second, it checks if `rev_or_path` points to a valid CMake build + directory. If so, it creates a CppBenchmarkRunner with this existing + CMakeBuild. + + Otherwise, it assumes `rev_or_path` is a revision and clone/checkout + the given revision and create a fresh CMakeBuild. + """ + build = None + if StaticBenchmarkRunner.is_json_result(rev_or_path): + return StaticBenchmarkRunner.from_json(rev_or_path, **kwargs) + elif CMakeBuild.is_build_dir(rev_or_path): + build = CMakeBuild.from_path(rev_or_path) + return CppBenchmarkRunner(build, **kwargs) + else: + # Revisions can references remote via the `/` character, ensure + # that the revision is path friendly + path_rev = rev_or_path.replace("/", "_") + root_rev = os.path.join(root, path_rev) + os.mkdir(root_rev) + + clone_dir = os.path.join(root_rev, "arrow") + # Possibly checkout the sources at given revision, no need to + # perform cleanup on cloned repository as root_rev is reclaimed. + src_rev, _ = src.at_revision(rev_or_path, clone_dir) + cmake_def = CppCMakeDefinition(src_rev.cpp, cmake_conf) + build_dir = os.path.join(root_rev, "build") + return CppBenchmarkRunner(cmake_def.build(build_dir), **kwargs) diff --git a/dev/archery/archery/cli.py b/dev/archery/archery/cli.py index 6e1b6fe0e09..079754eb76e 100644 --- a/dev/archery/archery/cli.py +++ b/dev/archery/archery/cli.py @@ -424,7 +424,7 @@ def benchmark_list(ctx, rev_or_path, src, preserve, output, cmake_extras, conf = CppBenchmarkRunner.default_configuration( cmake_extras=cmake_extras, **kwargs) - runner_base = BenchmarkRunner.from_rev_or_path( + runner_base = CppBenchmarkRunner.from_rev_or_path( src, root, rev_or_path, conf) for b in runner_base.list_benchmarks: @@ -480,7 +480,7 @@ def benchmark_run(ctx, rev_or_path, src, preserve, output, cmake_extras, conf = CppBenchmarkRunner.default_configuration( cmake_extras=cmake_extras, **kwargs) - runner_base = BenchmarkRunner.from_rev_or_path( + runner_base = CppBenchmarkRunner.from_rev_or_path( src, root, rev_or_path, conf, repetitions=repetitions, suite_filter=suite_filter, benchmark_filter=benchmark_filter) @@ -598,12 +598,12 @@ def benchmark_diff_cpp(ctx, src, preserve, output, language, cmake_extras, conf = CppBenchmarkRunner.default_configuration( cmake_extras=cmake_extras, **kwargs) - runner_cont = BenchmarkRunner.from_rev_or_path( + runner_cont = CppBenchmarkRunner.from_rev_or_path( src, root, contender, conf, repetitions=repetitions, suite_filter=suite_filter, benchmark_filter=benchmark_filter) - runner_base = BenchmarkRunner.from_rev_or_path( + runner_base = CppBenchmarkRunner.from_rev_or_path( src, root, baseline, conf, repetitions=repetitions, suite_filter=suite_filter, From 63fae86d77adf042cc27ceda37f0e58624db61f1 Mon Sep 17 00:00:00 2001 From: Kazuaki Ishizaki Date: Fri, 18 Sep 2020 01:15:55 +0000 Subject: [PATCH 03/14] fix lint errors --- dev/archery/archery/benchmark/runner.py | 3 ++- dev/archery/archery/cli.py | 7 ++++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/dev/archery/archery/benchmark/runner.py b/dev/archery/archery/benchmark/runner.py index a213ee8cc58..83eeb8a789f 100644 --- a/dev/archery/archery/benchmark/runner.py +++ b/dev/archery/archery/benchmark/runner.py @@ -50,7 +50,8 @@ def suites(self): @staticmethod def from_rev_or_path(src, root, rev_or_path, cmake_conf, **kwargs): - raise NotImplementedError("BenchmarkRunner must implement from_rev_or_path") + raise NotImplementedError( + "BenchmarkRunner must implement from_rev_or_path") class StaticBenchmarkRunner(BenchmarkRunner): diff --git a/dev/archery/archery/cli.py b/dev/archery/archery/cli.py index 079754eb76e..a79ed1a8ace 100644 --- a/dev/archery/archery/cli.py +++ b/dev/archery/archery/cli.py @@ -27,7 +27,7 @@ from .benchmark.codec import JsonEncoder from .benchmark.compare import RunnerComparator, DEFAULT_THRESHOLD -from .benchmark.runner import BenchmarkRunner, CppBenchmarkRunner +from .benchmark.runner import CppBenchmarkRunner from .lang.cpp import CppCMakeDefinition, CppConfiguration from .utils.lint import linter, python_numpydoc, LintValidationException from .utils.logger import logger, ctx as log_ctx @@ -580,7 +580,8 @@ def benchmark_diff(ctx, src, preserve, output, language, cmake_extras, if language == "cpp": ctx.forward(benchmark_diff_cpp, **kwargs) elif language == "java": - ctx.forward(benchmark_diff_jav, **kwargs) + ctx.forward(benchmark_diff_java, **kwargs) + @benchmark.command(name="diff_cpp") @click.pass_context @@ -626,7 +627,7 @@ def benchmark_diff_java(ctx, src, preserve, output, language, repetitions, threshold, contender, baseline, cmake_extras, cc, cxx, cxx_flags, cxx_package_prefix, **kwargs): - """Compare (diff) cpp benchmark runs.""" + """Compare (diff) java benchmark runs.""" print("Not supported yet") exit(1) From 23bc0b34ca58fb568e6272e298d32a0a6ce5d7c0 Mon Sep 17 00:00:00 2001 From: Kazuaki Ishizaki Date: Fri, 13 Nov 2020 03:43:10 +0900 Subject: [PATCH 04/14] support benchmark list, run, and diff with --language java --- dev/archery/archery/benchmark/compare.py | 8 + dev/archery/archery/benchmark/jmh.py | 166 +++++++++++++++++++ dev/archery/archery/benchmark/runner.py | 96 +++++++++++ dev/archery/archery/cli.py | 126 ++++++++++++-- dev/archery/archery/lang/java.py | 48 ++++++ dev/archery/archery/utils/maven.py | 200 +++++++++++++++++++++++ dev/archery/archery/utils/source.py | 5 + java/performance/pom.xml | 11 ++ 8 files changed, 648 insertions(+), 12 deletions(-) create mode 100644 dev/archery/archery/benchmark/jmh.py create mode 100644 dev/archery/archery/utils/maven.py diff --git a/dev/archery/archery/benchmark/compare.py b/dev/archery/archery/benchmark/compare.py index 622b8017917..46633525490 100644 --- a/dev/archery/archery/benchmark/compare.py +++ b/dev/archery/archery/benchmark/compare.py @@ -54,6 +54,14 @@ def formatter_for_unit(unit): return bytes_per_seconds_fmt elif unit == "items_per_second": return items_per_seconds_fmt + elif unit.startswith("ops/"): + def ops_per_time_fmt(value): + return "{:.3f} {}".format(value, unit) + return ops_per_time_fmt + elif unit.endswith("/op"): + def time_per_op_fmt(value): + return "{:.3f} {}".format(value, unit) + return time_per_op_fmt else: return lambda x: x diff --git a/dev/archery/archery/benchmark/jmh.py b/dev/archery/archery/benchmark/jmh.py new file mode 100644 index 00000000000..4a4a6369bf5 --- /dev/null +++ b/dev/archery/archery/benchmark/jmh.py @@ -0,0 +1,166 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from itertools import filterfalse, groupby, tee +import json +import subprocess +from tempfile import NamedTemporaryFile + +from .core import Benchmark +from ..utils.maven import Maven + + +def partition(pred, iterable): + # adapted from python's examples + t1, t2 = tee(iterable) + return list(filter(pred, t1)), list(filterfalse(pred, t2)) + + +class JavaMicrobenchmarkHarnessCommand(Maven): + """ Run a Java Micro Benchmark Harness + + This assumes the binary supports the standard command line options, + notably `--benchmark_filter`, `--benchmark_format`, etc... + """ + + def __init__(self, build, benchmark_filter=None): + #self.performance_dir = build.binaries_dir + "/performance" + self.benchmark_filter = benchmark_filter + self.build = build + self.maven = Maven() + + def list_benchmarks(self): + argv = [] + if self.benchmark_filter: + argv.append("-Dbenchmark.filter={}".format(self.benchmark_filter)) + #result = self.maven.run(*argv, cwd=self.performance_dir, + # stdout=subprocess.PIPE, stderr=subprocess.PIPE) + result = self.build.list(*argv, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + + """ Extract benchmark names from output. Assume the following output + ... + Benchmarks: + org.apache.arrow.vector.IntBenchmarks.setIntDirectly + ... + org.apache.arrow.vector.IntBenchmarks.setWithValueHolder + org.apache.arrow.vector.IntBenchmarks.setWithWriter + ... + [INFO] + """ + + lists = [] + benchmarks = False + for line in str.splitlines(result.stdout.decode("utf-8")): + if not benchmarks: + if line.startswith("Benchmarks:"): + benchmarks = True + else: + if line.startswith("org.apache.arrow"): + lists.append(line) + if line.startswith("[INFO]"): + benchmarks = False + break + return lists + + def results(self, repetitions=1): + with NamedTemporaryFile(suffix=".json") as out: + argv = ["-Dbenchmark.runs={}".format(repetitions), + "-Dbenchmark.resultfile={}".format(out.name), + "-Dbenchmark.resultformat=json"] + if self.benchmark_filter: + argv.append( + "-Dbenchmark.filter={}".format(self.benchmark_filter) + ) + + #self.maven.run(*argv, cwd=self.performance_dir, check=True) + self.build.benchmark(*argv, check=True) + return json.load(out) + + +class JavaMicrobenchmarkHarnessObservation: + """ Represents one run of a single Java Microbenchmark Harness + """ + + def __init__(self, benchmark, primaryMetric, + forks, warmupIterations, measurementIterations, **counters): + self.name = benchmark + self.primaryMetric = primaryMetric + self.score = primaryMetric["score"] + self.scoreUnit = primaryMetric["scoreUnit"] + self.forks = forks + self.warmups = warmupIterations + self.runs = measurementIterations + self.counters = { + "mode" : counters["mode"], + "threads" : counters["threads"], + "warmups" : warmupIterations, + "warmupTime" : counters["warmupTime"], + "measurements" : measurementIterations, + "measurementTime" : counters["measurementTime"], + "jvmArgs" : counters["jvmArgs"] + } + + @property + def value(self): + """ Return the benchmark value.""" + return self.score + + @property + def unit(self): + return self.scoreUnit + + def __repr__(self): + return str(self.value) + + +class JavaMicrobenchmarkHarness(Benchmark): + """ A set of JavaMicrobenchmarkHarnessObservations. """ + + def __init__(self, name, runs): + """ Initialize a JavaMicrobenchmarkHarness. + + Parameters + ---------- + name: str + Name of the benchmark + forks: int + warmups: int + runs: int + runs: list(JavaMicrobenchmarkHarnessObservation) + Repetitions of JavaMicrobenchmarkHarnessObservation run. + + """ + self.name = name + self.runs = sorted(runs, key=lambda b: b.value) + unit = self.runs[0].unit + less_is_better = unit.endswith("/op") + values = [b.value for b in self.runs] + # Slight kludge to extract the UserCounters for each benchmark + self.counters = self.runs[0].counters + super().__init__(name, unit, less_is_better, values) + + def __repr__(self): + return "JavaMicrobenchmarkHarness[name={},runs={}]".format(self.name, self.runs) + + @classmethod + def from_json(cls, payload): + def group_key(x): + return x.name + + benchmarks = map(lambda x: JavaMicrobenchmarkHarnessObservation(**x), payload) + groups = groupby(sorted(benchmarks, key=group_key), group_key) + return [cls(k, list(bs)) for k, bs in groups] diff --git a/dev/archery/archery/benchmark/runner.py b/dev/archery/archery/benchmark/runner.py index 83eeb8a789f..a0880537280 100644 --- a/dev/archery/archery/benchmark/runner.py +++ b/dev/archery/archery/benchmark/runner.py @@ -22,8 +22,11 @@ from .core import BenchmarkSuite from .google import GoogleBenchmarkCommand, GoogleBenchmark +from .jmh import JavaMicrobenchmarkHarnessCommand, JavaMicrobenchmarkHarness from ..lang.cpp import CppCMakeDefinition, CppConfiguration +from ..lang.java import JavaMavenDefinition, JavaConfiguration from ..utils.cmake import CMakeBuild +from ..utils.maven import MavenBuild from ..utils.logger import logger @@ -215,3 +218,96 @@ def from_rev_or_path(src, root, rev_or_path, cmake_conf, **kwargs): cmake_def = CppCMakeDefinition(src_rev.cpp, cmake_conf) build_dir = os.path.join(root_rev, "build") return CppBenchmarkRunner(cmake_def.build(build_dir), **kwargs) + + +class JavaBenchmarkRunner(BenchmarkRunner): + """ Run suites for Java. """ + + def __init__(self, build, **kwargs): + """ Initialize a JavaBenchmarkRunner. """ + self.build = build + self.repetitions = 5 # default repetitions for Java + super().__init__(**kwargs) + + @staticmethod + def default_configuration(**kwargs): + """ Returns the default benchmark configuration. """ + return JavaConfiguration(**kwargs) + + def suite(self, name): + """ Returns the resulting benchmarks for a given suite. """ + # update .m2 directory, which installs target jars + self.build.build() + + suite_cmd = JavaMicrobenchmarkHarnessCommand(self.build, self.benchmark_filter) + + # Ensure there will be data + benchmark_names = suite_cmd.list_benchmarks() + if not benchmark_names: + return None + + results = suite_cmd.results(repetitions=self.repetitions) + benchmarks = JavaMicrobenchmarkHarness.from_json(results) + return BenchmarkSuite(name, benchmarks) + + @property + def list_benchmarks(self): + """ Returns all suite names """ + # Ensure build is up-to-date to run benchmarks + self.build.build() + + suite_cmd = JavaMicrobenchmarkHarnessCommand(self.build) + benchmark_names = suite_cmd.list_benchmarks() + for benchmark_name in benchmark_names: + yield "{}".format(benchmark_name) + + @property + def suites(self): + """ Returns all suite for a runner. """ + suite_name = "JavaBenchmark" + suite = self.suite(suite_name) + + # Filter may exclude all benchmarks + if not suite: + logger.debug("Suite {} executed but no results" + .format(suite_name)) + return + + yield suite + + @staticmethod + def from_rev_or_path(src, root, rev_or_path, maven_conf, **kwargs): + """ Returns a BenchmarkRunner from a path or a git revision. + + First, it checks if `rev_or_path` is a valid path (or string) of a json + object that can deserialize to a BenchmarkRunner. If so, it initialize + a StaticBenchmarkRunner from it. This allows memoizing the result of a + run in a file or a string. + + Second, it checks if `rev_or_path` points to a valid Maven build + directory. If so, it creates a JavaenchmarkRunner with this existing + MavenBuild. + + Otherwise, it assumes `rev_or_path` is a revision and clone/checkout + the given revision and create a fresh MavenBuild. + """ + build = None + if StaticBenchmarkRunner.is_json_result(rev_or_path): + return StaticBenchmarkRunner.from_json(rev_or_path, **kwargs) + elif MavenBuild.is_build_dir(rev_or_path): + maven_def = JavaMavenDefinition(rev_or_path, maven_conf) + return JavaBenchmarkRunner(maven_def.build(rev_or_path), **kwargs) + else: + # Revisions can references remote via the `/` character, ensure + # that the revision is path friendly + path_rev = rev_or_path.replace("/", "_") + root_rev = os.path.join(root, path_rev) + os.mkdir(root_rev) + + clone_dir = os.path.join(root_rev, "arrow") + # Possibly checkout the sources at given revision, no need to + # perform cleanup on cloned repository as root_rev is reclaimed. + src_rev, _ = src.at_revision(rev_or_path, clone_dir) + maven_def = JavaMavenDefinition(src_rev.java, maven_conf) + build_dir = os.path.join(root_rev, "arrow/java") + return JavaBenchmarkRunner(maven_def.build(build_dir), **kwargs) diff --git a/dev/archery/archery/cli.py b/dev/archery/archery/cli.py index a79ed1a8ace..cd7ec595e44 100644 --- a/dev/archery/archery/cli.py +++ b/dev/archery/archery/cli.py @@ -27,7 +27,7 @@ from .benchmark.codec import JsonEncoder from .benchmark.compare import RunnerComparator, DEFAULT_THRESHOLD -from .benchmark.runner import CppBenchmarkRunner +from .benchmark.runner import CppBenchmarkRunner, JavaBenchmarkRunner from .lang.cpp import CppCMakeDefinition, CppConfiguration from .utils.lint import linter, python_numpydoc, LintValidationException from .utils.logger import logger, ctx as log_ctx @@ -124,7 +124,7 @@ def java_toolchain_options(cmd): options = [ click.option("--java-home", metavar="", help="Path to Java Developers Kit."), - click.option("--java-flags", help="java compiler flags."), + click.option("--java-options", help="java compiler options."), ] return _apply_options(cmd, options) @@ -384,12 +384,15 @@ def check_language(ctx, param, value): click.option("--language", metavar="", type=str, default="cpp", show_default=True, callback=check_language, help="Specify target language for the benchmark"), - click.option("--mvn-extras", type=str, multiple=True, - help="Extra flags/options to pass to mvn. " - "Can be stacked"), + click.option("--build-extras", type=str, multiple=True, + help="Extra flags/options to pass to mvn build. " + "Can be stackd. For language=java"), + click.option("--benchmark-extras", type=str, multiple=True, + help="Extra flags/options to pass to mvn benchmark. " + "Can be stackd. For language=java"), click.option("--cmake-extras", type=str, multiple=True, help="Extra flags/options to pass to cmake invocation. " - "Can be stacked"), + "Can be stacked. For language=cpp"), ] cmd = java_toolchain_options(cmd) @@ -415,9 +418,23 @@ def benchmark_filter_options(cmd): @benchmark_common_options @click.pass_context def benchmark_list(ctx, rev_or_path, src, preserve, output, cmake_extras, - **kwargs): + language, build_extras, benchmark_extras, java_home, + java_options, **kwargs): """ List benchmark suite. """ + if language == "cpp": + ctx.forward(benchmark_list_cpp, **kwargs) + elif language == "java": + ctx.forward(benchmark_list_java, **kwargs) + + +@benchmark.command(name="list_cpp") +@click.pass_context +def benchmark_list_cpp(ctx, rev_or_path, src, preserve, output, cmake_extras, + language, build_extras, benchmark_extras, java_home, + java_options, **kwargs): + """ List cpp benchmark suite. + """ with tmpdir(preserve=preserve) as root: logger.debug("Running benchmark {}".format(rev_or_path)) @@ -431,6 +448,28 @@ def benchmark_list(ctx, rev_or_path, src, preserve, output, cmake_extras, click.echo(b, file=output) +@benchmark.command(name="list_java") +@click.pass_context +def benchmark_list_java(ctx, rev_or_path, src, preserve, output, language, + java_home, java_options, build_extras, benchmark_extras, + cmake_extras, cc, cxx, cxx_flags, cpp_package_prefix, + **kwargs): + """ List java benchmark suite. + """ + with tmpdir(preserve=preserve) as root: + logger.debug("Running benchmark {}".format(rev_or_path)) + + conf = JavaBenchmarkRunner.default_configuration( + java_home=java_home, java_options=java_options, + build_extras=build_extras, benchmark_extras=benchmark_extras, **kwargs) + + runner_base = JavaBenchmarkRunner.from_rev_or_path( + src, root, rev_or_path, conf) + + for b in runner_base.list_benchmarks: + click.echo(b, file=output) + + @benchmark.command(name="run", short_help="Run benchmark suite") @click.argument("rev_or_path", metavar="[]", default="WORKSPACE", required=False) @@ -441,7 +480,8 @@ def benchmark_list(ctx, rev_or_path, src, preserve, output, cmake_extras, "may improve result precision.")) @click.pass_context def benchmark_run(ctx, rev_or_path, src, preserve, output, cmake_extras, - suite_filter, benchmark_filter, repetitions, **kwargs): + language, suite_filter, benchmark_filter, repetations, + **kwargs): """ Run benchmark suite. This command will run the benchmark suite for a single build. This is @@ -474,6 +514,20 @@ def benchmark_run(ctx, rev_or_path, src, preserve, output, cmake_extras, \b archery benchmark run --output=run.json """ + if language == "cpp": + ctx.forward(benchmark_run_cpp, **kwargs) + elif language == "java": + ctx.forward(benchmark_run_java, **kwargs) + + +@benchmark.command(name="run_cpp") +@click.pass_context +def benchmark_run_cpp(ctx, rev_or_path, src, preserve, output, cmake_extras, + language, build_extras, benchmark_extras, java_home, + java_options, suite_filter, benchmark_filter, **kwargs): + """ Run cpp benchmark suite. + """ + with tmpdir(preserve=preserve) as root: logger.debug("Running benchmark {}".format(rev_or_path)) @@ -488,6 +542,29 @@ def benchmark_run(ctx, rev_or_path, src, preserve, output, cmake_extras, json.dump(runner_base, output, cls=JsonEncoder) +@benchmark.command(name="run_java") +@click.pass_context +def benchmark_run_java(ctx, rev_or_path, src, preserve, output, language, + java_home, java_options, build_extras, benchmark_extras, + cmake_extras, cc, cxx, cxx_flags, cpp_package_prefix, + suite_filter, benchmark_filter, **kwargs): + """ Run Java benchmark suite. + """ + + with tmpdir(preserve=preserve) as root: + logger.debug("Running benchmark {}".format(rev_or_path)) + + conf = JavaBenchmarkRunner.default_configuration( + java_home=java_home, java_options=java_options, + build_extras=build_extras, benchmark_extras=benchmark_extras, **kwargs) + + runner_base = JavaBenchmarkRunner.from_rev_or_path( + src, root, rev_or_path, conf, + benchmark_filter=benchmark_filter) + + json.dump(runner_base, output, cls=JsonEncoder) + + @benchmark.command(name="diff", short_help="Compare benchmark suites") @benchmark_common_options @benchmark_filter_options @@ -588,7 +665,7 @@ def benchmark_diff(ctx, src, preserve, output, language, cmake_extras, def benchmark_diff_cpp(ctx, src, preserve, output, language, cmake_extras, suite_filter, benchmark_filter, repetitions, threshold, contender, baseline, - mvn_extras, java_home, java_flags, + build_extras, benchmark_extras, java_home, java_options, **kwargs): """Compare (diff) cpp benchmark runs.""" @@ -623,14 +700,38 @@ def benchmark_diff_cpp(ctx, src, preserve, output, language, cmake_extras, @benchmark.command(name="diff_java") @click.pass_context def benchmark_diff_java(ctx, src, preserve, output, language, + java_home, java_options, build_extras, benchmark_extras, suite_filter, benchmark_filter, repetitions, threshold, contender, baseline, - cmake_extras, cc, cxx, cxx_flags, cxx_package_prefix, + cmake_extras, cc, cxx, cxx_flags, cpp_package_prefix, **kwargs): """Compare (diff) java benchmark runs.""" - print("Not supported yet") - exit(1) + with tmpdir(preserve=preserve) as root: + logger.debug("Comparing {} (contender) with {} (baseline)" + .format(contender, baseline)) + + conf = JavaBenchmarkRunner.default_configuration( + java_home=java_home, java_options=java_options, + build_extras=build_extras, benchmark_extras=benchmark_extras, + **kwargs) + + runner_cont = JavaBenchmarkRunner.from_rev_or_path( + src, root, contender, conf, + repetitions=repetitions, + benchmark_filter=benchmark_filter) + runner_base = JavaBenchmarkRunner.from_rev_or_path( + src, root, baseline, conf, + repetitions=repetitions, + benchmark_filter=benchmark_filter) + + runner_comp = RunnerComparator(runner_cont, runner_base, threshold) + + # TODO(kszucs): test that the output is properly formatted jsonlines + comparisons_json = _get_comparisons_as_json(runner_comp.comparisons) + formatted = _format_comparisons_with_pandas(comparisons_json) + output.write(formatted) + output.write('\n') def _get_comparisons_as_json(comparisons): @@ -652,6 +753,7 @@ def _format_comparisons_with_pandas(comparisons_json, no_counters): fields = ['benchmark', 'baseline', 'contender', 'change %'] if not no_counters: fields += ['counters'] + # fields += ['configurations'] df = df[fields].sort_values(by='change %', ascending=False) diff --git a/dev/archery/archery/lang/java.py b/dev/archery/archery/lang/java.py index 24743b67fd7..472fb800f22 100644 --- a/dev/archery/archery/lang/java.py +++ b/dev/archery/archery/lang/java.py @@ -15,7 +15,10 @@ # specific language governing permissions and limitations # under the License. +import os + from ..utils.command import Command, CommandStackMixin, default_bin +from ..utils.maven import MavenDefinition class Java(Command): @@ -28,3 +31,48 @@ def __init__(self, jar, *args, **kwargs): self.jar = jar self.argv = ("-jar", jar) Java.__init__(self, *args, **kwargs) + + +class JavaConfiguration: + def __init__(self, + + # toolchain + java_home=None, java_options=None, + # build & benchmark + build_extras=None, benchmark_extras=None): + self.java_home = java_home + self.java_options = java_options + + self.build_extras = build_extras + self.benchmark_extras = benchmark_extras + + @property + def build_definitions(self): + extras = list(self.build_extras) if self.build_extras else [] + return extras + + @property + def benchmark_definitions(self): + extras = list(self.benchmark_extras) if self.benchmark_extras else [] + return extras + + @property + def environment(self): + env = os.environ.copy() + + if self.java_home: + env["JAVA_HOME"] = self.java_home + + if self.java_options: + env["JAVA_OPTIONS"] = self.java_options + + return env + + +class JavaMavenDefinition(MavenDefinition): + def __init__(self, source, conf, **kwargs): + self.configuration = conf + super().__init__(source, **kwargs, + build_definitions=conf.build_definitions, + benchmark_definitions=conf.benchmark_definitions, + env=conf.environment) diff --git a/dev/archery/archery/utils/maven.py b/dev/archery/archery/utils/maven.py new file mode 100644 index 00000000000..a750c6972fc --- /dev/null +++ b/dev/archery/archery/utils/maven.py @@ -0,0 +1,200 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import os + +from .command import Command, default_bin + + +class Maven(Command): + def __init__(self, maven_bin=None): + self.bin = default_bin(maven_bin, "mvn") + + +maven = Maven() + + +class MavenDefinition: + """ MavenDefinition captures the maven invocation arguments. + + It allows creating build directories with the same definition, e.g. + ``` + build_1 = maven_def.build("/tmp/build-1") + build_2 = maven_def.build("/tmp/build-2") + + ... + + build1.install() + build2.install() + """ + + def __init__(self, source, build_definitions=None, + benchmark_definitions=None, env=None): + """ Initialize a MavenDefinition + + Parameters + ---------- + source : str + Source directory where the top-level pom.xml is + located. This is usually the root of the project. + build_definitions: list(str), optional + benchmark_definitions: list(str), optional + """ + self.source = os.path.abspath(source) + self.build_definitions = build_definitions if build_definitions else [] + self.benchmark_definitions = benchmark_definitions if benchmark_definitions else [] + self.env = env + + @property + def build_arguments(self): + """" Return the arguments to maven invocation for build. """ + arguments = self.build_definitions + [ + "-B", "-DskipTests", "-Drat.skip=true", + "-Dorg.slf4j.simpleLogger.log.org.apache.maven.cli.transfer.Slf4jMavenTransferListener=warn", + "-T", "2C", "install" + ] + return arguments + + def build(self, build_dir, force=False, cmd_kwargs=None, **kwargs): + """ Invoke maven into a build directory. + + Parameters + ---------- + build_dir : str + Directory in which the Maven build will be instantiated. + force : bool + not used now + """ + if os.path.exists(build_dir): + # Extra safety to ensure we're deleting a build folder. + if not MavenBuild.is_build_dir(build_dir): + raise FileExistsError( + "{} is not a maven build".format(build_dir) + ) + + cmd_kwargs = cmd_kwargs if cmd_kwargs else {} + maven(*self.build_arguments, cwd=build_dir, env=self.env, **cmd_kwargs) + return MavenBuild(build_dir, definition=self, **kwargs) + + @property + def list_arguments(self): + """" Return the arguments to maven invocation for list """ + arguments = [ + "-Dskip.perf.benchmarks=false", "-Dbenchmark.list=-lp", "install" + ] + return arguments + + @property + def benchmark_arguments(self): + """" Return the arguments to maven invocation for benchmark """ + arguments = self.benchmark_definitions + [ + "-Dskip.perf.benchmarks=false", "-Dbenchmark.fork=1", + "-Dbenchmark.jvmargs=\"-Darrow.enable_null_check_for_get=false -Darrow.enable_unsafe_memory_access=true\"", + "install" + ] + return arguments + + def __repr__(self): + return "MavenDefinition[source={}]".format(self.source) + + +class MavenBuild(Maven): + """ MavenBuild represents a build directory initialized by maven. + + The build instance can be used to build/test/install. It alleviates the + user to know which generator is used. + """ + + def __init__(self, build_dir, definition=None): + """ Initialize a MavenBuild. + + The caller must ensure that maven was invoked in the build directory. + + Parameters + ---------- + definition : MavenDefinition + The definition to build from. + build_dir : str + The build directory to setup into. + """ + assert MavenBuild.is_build_dir(build_dir) + super().__init__() + self.build_dir = os.path.abspath(build_dir) + self.definition = definition + + @property + def binaries_dir(self): + return self.build_dir + + def run(self, *argv, verbose=False, cwd=None, **kwargs): + extra = [] + if verbose: + extra.append("-X") + if cwd is None: + cwd = self.build_dir + # Commands must be ran under the directory where pom.xml exists + return super().run(*extra, *argv, **kwargs, cwd=cwd) + + def build(self, *argv, verbose=False, **kwargs): + definition_args = self.definition.build_arguments + cwd = self.binaries_dir + return self.run(*argv, *definition_args, verbose=verbose, cwd=cwd, + env=self.definition.env, **kwargs) + + def list(self, *argv, verbose=False, **kwargs): + definition_args = self.definition.list_arguments + cwd = self.binaries_dir + "/performance" + return self.run(*argv, *definition_args, verbose=verbose, cwd=cwd, + env=self.definition.env, **kwargs) + + def benchmark(self, *argv, verbose=False, **kwargs): + definition_args = self.definition.benchmark_arguments + cwd = self.binaries_dir + "/performance" + return self.run(*argv, *definition_args, verbose=verbose, cwd=cwd, + env=self.definition.env, **kwargs) + + @staticmethod + def is_build_dir(path): + """ Indicate if a path is Maven top directory. + + This method only checks for the existence of paths and does not do any + validation whatsoever. + """ + pom_xml = os.path.join(path, "pom.xml") + performance_dir = os.path.join(path, "performance") + return os.path.exists(pom_xml) and os.path.isdir(performance_dir) + + @staticmethod + def from_path(path): + """ Instantiate a Maven from a path. + + This is used to recover from an existing physical directory (created + with or without Maven). + + Note that this method is not idempotent as the original definition will + be lost. + """ + if not MavenBuild.is_build_dir(path): + raise ValueError("Not a valid MavenBuild path: {}".format(path)) + + return MavenBuild(path, definition=none) + + def __repr__(self): + return ("MavenBuild[" + "build = {}," + "definition = {}]".format(self.build_dir, + self.definition)) diff --git a/dev/archery/archery/utils/source.py b/dev/archery/archery/utils/source.py index d30b4f152e5..f7e47a5a1b6 100644 --- a/dev/archery/archery/utils/source.py +++ b/dev/archery/archery/utils/source.py @@ -68,6 +68,11 @@ def dev(self): """ Returns the dev directory of an Arrow sources. """ return self.path / "dev" + @property + def java(self): + """ Returns the java directory of an Arrow sources. """ + return self.path / "java" + @property def python(self): """ Returns the python directory of an Arrow sources. """ diff --git a/java/performance/pom.xml b/java/performance/pom.xml index dffe7f2cbd2..81e268d9769 100644 --- a/java/performance/pom.xml +++ b/java/performance/pom.xml @@ -99,8 +99,12 @@ true .* 1 + 5 5 + + jmh-result.json + json @@ -169,10 +173,17 @@ ${benchmark.filter} -f ${benchmark.forks} + -jvmArgs + ${benchmark.jvmargs} -wi ${benchmark.warmups} -i ${benchmark.runs} + ${benchmark.list} + -rff + ${benchmark.resultfile} + -rf + ${benchmark.resultformat} From 8148205302b4c5a6951f3bb3acb0c75dbb18ed2a Mon Sep 17 00:00:00 2001 From: Kazuaki Ishizaki Date: Fri, 13 Nov 2020 03:43:16 +0900 Subject: [PATCH 05/14] fix typo --- dev/archery/archery/benchmark/google.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev/archery/archery/benchmark/google.py b/dev/archery/archery/benchmark/google.py index c1644dcbd9c..51d81dccde3 100644 --- a/dev/archery/archery/benchmark/google.py +++ b/dev/archery/archery/benchmark/google.py @@ -161,7 +161,7 @@ def __init__(self, name, runs): super().__init__(name, unit, less_is_better, values, time_unit, times) def __repr__(self): - return "GoogleBenchmark[name={},runs={}]".format(self.names, self.runs) + return "GoogleBenchmark[name={},runs={}]".format(self.name, self.runs) @classmethod def from_json(cls, payload): From 31f7c0bb1b9ea6f477e73024f05e0a9cf15fe249 Mon Sep 17 00:00:00 2001 From: Kazuaki Ishizaki Date: Fri, 13 Nov 2020 10:32:21 +0900 Subject: [PATCH 06/14] fix lint errors remove unnecessary comments --- dev/archery/archery/benchmark/jmh.py | 26 ++++++++++++------------- dev/archery/archery/benchmark/runner.py | 3 ++- 2 files changed, 14 insertions(+), 15 deletions(-) diff --git a/dev/archery/archery/benchmark/jmh.py b/dev/archery/archery/benchmark/jmh.py index 4a4a6369bf5..e2d43e7b7bf 100644 --- a/dev/archery/archery/benchmark/jmh.py +++ b/dev/archery/archery/benchmark/jmh.py @@ -38,7 +38,6 @@ class JavaMicrobenchmarkHarnessCommand(Maven): """ def __init__(self, build, benchmark_filter=None): - #self.performance_dir = build.binaries_dir + "/performance" self.benchmark_filter = benchmark_filter self.build = build self.maven = Maven() @@ -47,9 +46,8 @@ def list_benchmarks(self): argv = [] if self.benchmark_filter: argv.append("-Dbenchmark.filter={}".format(self.benchmark_filter)) - #result = self.maven.run(*argv, cwd=self.performance_dir, - # stdout=subprocess.PIPE, stderr=subprocess.PIPE) - result = self.build.list(*argv, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + result = self.build.list( + *argv, stdout=subprocess.PIPE, stderr=subprocess.PIPE) """ Extract benchmark names from output. Assume the following output ... @@ -86,7 +84,6 @@ def results(self, repetitions=1): "-Dbenchmark.filter={}".format(self.benchmark_filter) ) - #self.maven.run(*argv, cwd=self.performance_dir, check=True) self.build.benchmark(*argv, check=True) return json.load(out) @@ -100,18 +97,18 @@ def __init__(self, benchmark, primaryMetric, self.name = benchmark self.primaryMetric = primaryMetric self.score = primaryMetric["score"] - self.scoreUnit = primaryMetric["scoreUnit"] + self.scoreUnit = primaryMetric["scoreUnit"] self.forks = forks self.warmups = warmupIterations self.runs = measurementIterations self.counters = { - "mode" : counters["mode"], - "threads" : counters["threads"], - "warmups" : warmupIterations, - "warmupTime" : counters["warmupTime"], - "measurements" : measurementIterations, - "measurementTime" : counters["measurementTime"], - "jvmArgs" : counters["jvmArgs"] + "mode": counters["mode"], + "threads": counters["threads"], + "warmups": warmupIterations, + "warmupTime": counters["warmupTime"], + "measurements": measurementIterations, + "measurementTime": counters["measurementTime"], + "jvmArgs": counters["jvmArgs"] } @property @@ -161,6 +158,7 @@ def from_json(cls, payload): def group_key(x): return x.name - benchmarks = map(lambda x: JavaMicrobenchmarkHarnessObservation(**x), payload) + benchmarks = map( + lambda x: JavaMicrobenchmarkHarnessObservation(**x), payload) groups = groupby(sorted(benchmarks, key=group_key), group_key) return [cls(k, list(bs)) for k, bs in groups] diff --git a/dev/archery/archery/benchmark/runner.py b/dev/archery/archery/benchmark/runner.py index a0880537280..b842fee9c3a 100644 --- a/dev/archery/archery/benchmark/runner.py +++ b/dev/archery/archery/benchmark/runner.py @@ -239,7 +239,8 @@ def suite(self, name): # update .m2 directory, which installs target jars self.build.build() - suite_cmd = JavaMicrobenchmarkHarnessCommand(self.build, self.benchmark_filter) + suite_cmd = JavaMicrobenchmarkHarnessCommand( + self.build, self.benchmark_filter) # Ensure there will be data benchmark_names = suite_cmd.list_benchmarks() From da46fca4de7701886f43e1807a17a7ec5dc57273 Mon Sep 17 00:00:00 2001 From: Kazuaki Ishizaki Date: Fri, 13 Nov 2020 11:41:40 +0900 Subject: [PATCH 07/14] fix lint errors --- dev/archery/archery/benchmark/jmh.py | 3 ++- dev/archery/archery/benchmark/runner.py | 1 - dev/archery/archery/cli.py | 16 +++++++++------- dev/archery/archery/utils/maven.py | 11 +++++++---- 4 files changed, 18 insertions(+), 13 deletions(-) diff --git a/dev/archery/archery/benchmark/jmh.py b/dev/archery/archery/benchmark/jmh.py index e2d43e7b7bf..11a2d0e0d77 100644 --- a/dev/archery/archery/benchmark/jmh.py +++ b/dev/archery/archery/benchmark/jmh.py @@ -151,7 +151,8 @@ def __init__(self, name, runs): super().__init__(name, unit, less_is_better, values) def __repr__(self): - return "JavaMicrobenchmarkHarness[name={},runs={}]".format(self.name, self.runs) + return "JavaMicrobenchmarkHarness[name={},runs={}]".format( + self.name, self.runs) @classmethod def from_json(cls, payload): diff --git a/dev/archery/archery/benchmark/runner.py b/dev/archery/archery/benchmark/runner.py index b842fee9c3a..06aedbba75f 100644 --- a/dev/archery/archery/benchmark/runner.py +++ b/dev/archery/archery/benchmark/runner.py @@ -292,7 +292,6 @@ def from_rev_or_path(src, root, rev_or_path, maven_conf, **kwargs): Otherwise, it assumes `rev_or_path` is a revision and clone/checkout the given revision and create a fresh MavenBuild. """ - build = None if StaticBenchmarkRunner.is_json_result(rev_or_path): return StaticBenchmarkRunner.from_json(rev_or_path, **kwargs) elif MavenBuild.is_build_dir(rev_or_path): diff --git a/dev/archery/archery/cli.py b/dev/archery/archery/cli.py index cd7ec595e44..be7737de9e9 100644 --- a/dev/archery/archery/cli.py +++ b/dev/archery/archery/cli.py @@ -451,9 +451,9 @@ def benchmark_list_cpp(ctx, rev_or_path, src, preserve, output, cmake_extras, @benchmark.command(name="list_java") @click.pass_context def benchmark_list_java(ctx, rev_or_path, src, preserve, output, language, - java_home, java_options, build_extras, benchmark_extras, - cmake_extras, cc, cxx, cxx_flags, cpp_package_prefix, - **kwargs): + java_home, java_options, build_extras, + benchmark_extras, cmake_extras, cc, cxx, cxx_flags, + cpp_package_prefix, **kwargs): """ List java benchmark suite. """ with tmpdir(preserve=preserve) as root: @@ -461,7 +461,8 @@ def benchmark_list_java(ctx, rev_or_path, src, preserve, output, language, conf = JavaBenchmarkRunner.default_configuration( java_home=java_home, java_options=java_options, - build_extras=build_extras, benchmark_extras=benchmark_extras, **kwargs) + build_extras=build_extras, benchmark_extras=benchmark_extras, + **kwargs) runner_base = JavaBenchmarkRunner.from_rev_or_path( src, root, rev_or_path, conf) @@ -556,7 +557,8 @@ def benchmark_run_java(ctx, rev_or_path, src, preserve, output, language, conf = JavaBenchmarkRunner.default_configuration( java_home=java_home, java_options=java_options, - build_extras=build_extras, benchmark_extras=benchmark_extras, **kwargs) + build_extras=build_extras, benchmark_extras=benchmark_extras, + **kwargs) runner_base = JavaBenchmarkRunner.from_rev_or_path( src, root, rev_or_path, conf, @@ -700,8 +702,8 @@ def benchmark_diff_cpp(ctx, src, preserve, output, language, cmake_extras, @benchmark.command(name="diff_java") @click.pass_context def benchmark_diff_java(ctx, src, preserve, output, language, - java_home, java_options, build_extras, benchmark_extras, - suite_filter, benchmark_filter, + java_home, java_options, build_extras, + benchmark_extras, suite_filter, benchmark_filter, repetitions, threshold, contender, baseline, cmake_extras, cc, cxx, cxx_flags, cpp_package_prefix, **kwargs): diff --git a/dev/archery/archery/utils/maven.py b/dev/archery/archery/utils/maven.py index a750c6972fc..6b027f0015e 100644 --- a/dev/archery/archery/utils/maven.py +++ b/dev/archery/archery/utils/maven.py @@ -56,7 +56,8 @@ def __init__(self, source, build_definitions=None, """ self.source = os.path.abspath(source) self.build_definitions = build_definitions if build_definitions else [] - self.benchmark_definitions = benchmark_definitions if benchmark_definitions else [] + self.benchmark_definitions =\ + benchmark_definitions if benchmark_definitions else [] self.env = env @property @@ -64,7 +65,8 @@ def build_arguments(self): """" Return the arguments to maven invocation for build. """ arguments = self.build_definitions + [ "-B", "-DskipTests", "-Drat.skip=true", - "-Dorg.slf4j.simpleLogger.log.org.apache.maven.cli.transfer.Slf4jMavenTransferListener=warn", + "-Dorg.slf4j.simpleLogger.log.org.apache.maven.cli.transfer."\ + "Slf4jMavenTransferListener=warn", "-T", "2C", "install" ] return arguments @@ -103,7 +105,8 @@ def benchmark_arguments(self): """" Return the arguments to maven invocation for benchmark """ arguments = self.benchmark_definitions + [ "-Dskip.perf.benchmarks=false", "-Dbenchmark.fork=1", - "-Dbenchmark.jvmargs=\"-Darrow.enable_null_check_for_get=false -Darrow.enable_unsafe_memory_access=true\"", + "-Dbenchmark.jvmargs=\"-Darrow.enable_null_check_for_get=false"\ + "-Darrow.enable_unsafe_memory_access=true\"", "install" ] return arguments @@ -191,7 +194,7 @@ def from_path(path): if not MavenBuild.is_build_dir(path): raise ValueError("Not a valid MavenBuild path: {}".format(path)) - return MavenBuild(path, definition=none) + return MavenBuild(path, definition=None) def __repr__(self): return ("MavenBuild[" From c17f50cf2c75313e716ec99c09840ffa675d8c48 Mon Sep 17 00:00:00 2001 From: Kazuaki Ishizaki Date: Fri, 13 Nov 2020 12:56:54 +0900 Subject: [PATCH 08/14] fix lint errors --- dev/archery/archery/utils/maven.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dev/archery/archery/utils/maven.py b/dev/archery/archery/utils/maven.py index 6b027f0015e..f18fb0b2da0 100644 --- a/dev/archery/archery/utils/maven.py +++ b/dev/archery/archery/utils/maven.py @@ -65,7 +65,7 @@ def build_arguments(self): """" Return the arguments to maven invocation for build. """ arguments = self.build_definitions + [ "-B", "-DskipTests", "-Drat.skip=true", - "-Dorg.slf4j.simpleLogger.log.org.apache.maven.cli.transfer."\ + "-Dorg.slf4j.simpleLogger.log.org.apache.maven.cli.transfer." "Slf4jMavenTransferListener=warn", "-T", "2C", "install" ] @@ -105,7 +105,7 @@ def benchmark_arguments(self): """" Return the arguments to maven invocation for benchmark """ arguments = self.benchmark_definitions + [ "-Dskip.perf.benchmarks=false", "-Dbenchmark.fork=1", - "-Dbenchmark.jvmargs=\"-Darrow.enable_null_check_for_get=false"\ + "-Dbenchmark.jvmargs=\"-Darrow.enable_null_check_for_get=false" "-Darrow.enable_unsafe_memory_access=true\"", "install" ] From e924ba2dc13b172cb33baf4a56b44518faa98f3c Mon Sep 17 00:00:00 2001 From: Kazuaki Ishizaki Date: Sat, 10 Apr 2021 09:08:04 +0000 Subject: [PATCH 09/14] address trivial comments --- dev/archery/archery/benchmark/jmh.py | 30 ++++++++++++------------- dev/archery/archery/benchmark/runner.py | 2 +- dev/archery/archery/cli.py | 6 ++--- 3 files changed, 19 insertions(+), 19 deletions(-) diff --git a/dev/archery/archery/benchmark/jmh.py b/dev/archery/archery/benchmark/jmh.py index 11a2d0e0d77..aa80240a816 100644 --- a/dev/archery/archery/benchmark/jmh.py +++ b/dev/archery/archery/benchmark/jmh.py @@ -21,6 +21,7 @@ from tempfile import NamedTemporaryFile from .core import Benchmark +from ..utils.command import Command from ..utils.maven import Maven @@ -30,11 +31,11 @@ def partition(pred, iterable): return list(filter(pred, t1)), list(filterfalse(pred, t2)) -class JavaMicrobenchmarkHarnessCommand(Maven): +class JavaMicrobenchmarkHarnessCommand(Command): """ Run a Java Micro Benchmark Harness This assumes the binary supports the standard command line options, - notably `--benchmark_filter`, `--benchmark_format`, etc... + notably `-Dbenchmark_filter` """ def __init__(self, build, benchmark_filter=None): @@ -42,6 +43,17 @@ def __init__(self, build, benchmark_filter=None): self.build = build self.maven = Maven() + """ Extract benchmark names from output between "Benchmarks:" and "[INFO]". + Assume the following output: + ... + Benchmarks: + org.apache.arrow.vector.IntBenchmarks.setIntDirectly + ... + org.apache.arrow.vector.IntBenchmarks.setWithValueHolder + org.apache.arrow.vector.IntBenchmarks.setWithWriter + ... + [INFO] + """ def list_benchmarks(self): argv = [] if self.benchmark_filter: @@ -49,17 +61,6 @@ def list_benchmarks(self): result = self.build.list( *argv, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - """ Extract benchmark names from output. Assume the following output - ... - Benchmarks: - org.apache.arrow.vector.IntBenchmarks.setIntDirectly - ... - org.apache.arrow.vector.IntBenchmarks.setWithValueHolder - org.apache.arrow.vector.IntBenchmarks.setWithWriter - ... - [INFO] - """ - lists = [] benchmarks = False for line in str.splitlines(result.stdout.decode("utf-8")): @@ -70,7 +71,6 @@ def list_benchmarks(self): if line.startswith("org.apache.arrow"): lists.append(line) if line.startswith("[INFO]"): - benchmarks = False break return lists @@ -151,7 +151,7 @@ def __init__(self, name, runs): super().__init__(name, unit, less_is_better, values) def __repr__(self): - return "JavaMicrobenchmarkHarness[name={},runs={}]".format( + return "JavaMicrobenchmark[name={},runs={}]".format( self.name, self.runs) @classmethod diff --git a/dev/archery/archery/benchmark/runner.py b/dev/archery/archery/benchmark/runner.py index 06aedbba75f..13bc95d2ef5 100644 --- a/dev/archery/archery/benchmark/runner.py +++ b/dev/archery/archery/benchmark/runner.py @@ -286,7 +286,7 @@ def from_rev_or_path(src, root, rev_or_path, maven_conf, **kwargs): run in a file or a string. Second, it checks if `rev_or_path` points to a valid Maven build - directory. If so, it creates a JavaenchmarkRunner with this existing + directory. If so, it creates a JavaBenchmarkRunner with this existing MavenBuild. Otherwise, it assumes `rev_or_path` is a revision and clone/checkout diff --git a/dev/archery/archery/cli.py b/dev/archery/archery/cli.py index be7737de9e9..056bb300058 100644 --- a/dev/archery/archery/cli.py +++ b/dev/archery/archery/cli.py @@ -367,7 +367,7 @@ def benchmark(ctx): def benchmark_common_options(cmd): def check_language(ctx, param, value): - if value != "cpp" and value != "java": + if value not in {"cpp", "java"}: raise click.BadParameter("cpp or java is supported now") return value @@ -386,10 +386,10 @@ def check_language(ctx, param, value): help="Specify target language for the benchmark"), click.option("--build-extras", type=str, multiple=True, help="Extra flags/options to pass to mvn build. " - "Can be stackd. For language=java"), + "Can be stacked. For language=java"), click.option("--benchmark-extras", type=str, multiple=True, help="Extra flags/options to pass to mvn benchmark. " - "Can be stackd. For language=java"), + "Can be stacked. For language=java"), click.option("--cmake-extras", type=str, multiple=True, help="Extra flags/options to pass to cmake invocation. " "Can be stacked. For language=cpp"), From 6e1161bcabcfad14c7301ed9d92ef6243162a8ff Mon Sep 17 00:00:00 2001 From: Kazuaki Ishizaki Date: Sat, 10 Apr 2021 10:24:20 +0000 Subject: [PATCH 10/14] address trivial comments --- dev/archery/archery/lang/java.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/dev/archery/archery/lang/java.py b/dev/archery/archery/lang/java.py index 472fb800f22..6cda0194aae 100644 --- a/dev/archery/archery/lang/java.py +++ b/dev/archery/archery/lang/java.py @@ -43,18 +43,16 @@ def __init__(self, self.java_home = java_home self.java_options = java_options - self.build_extras = build_extras - self.benchmark_extras = benchmark_extras + self.build_extras = list(build_extras) if build_extras else [] + self.benchmark_extras = list(self.benchmark_extras) if self.benchmark_extras else [] @property def build_definitions(self): - extras = list(self.build_extras) if self.build_extras else [] - return extras + return self.build_extras @property def benchmark_definitions(self): - extras = list(self.benchmark_extras) if self.benchmark_extras else [] - return extras + return self.benchmark_extras @property def environment(self): From 513a7ca6b20869f6b78c424e8b1d9fbe926a9b7f Mon Sep 17 00:00:00 2001 From: Kazuaki Ishizaki Date: Sat, 10 Apr 2021 10:45:55 +0000 Subject: [PATCH 11/14] normalize unit --- dev/archery/archery/benchmark/compare.py | 8 ------ dev/archery/archery/benchmark/jmh.py | 36 ++++++++++++++++++++++-- 2 files changed, 33 insertions(+), 11 deletions(-) diff --git a/dev/archery/archery/benchmark/compare.py b/dev/archery/archery/benchmark/compare.py index 46633525490..622b8017917 100644 --- a/dev/archery/archery/benchmark/compare.py +++ b/dev/archery/archery/benchmark/compare.py @@ -54,14 +54,6 @@ def formatter_for_unit(unit): return bytes_per_seconds_fmt elif unit == "items_per_second": return items_per_seconds_fmt - elif unit.startswith("ops/"): - def ops_per_time_fmt(value): - return "{:.3f} {}".format(value, unit) - return ops_per_time_fmt - elif unit.endswith("/op"): - def time_per_op_fmt(value): - return "{:.3f} {}".format(value, unit) - return time_per_op_fmt else: return lambda x: x diff --git a/dev/archery/archery/benchmark/jmh.py b/dev/archery/archery/benchmark/jmh.py index aa80240a816..c3e9ca90ff2 100644 --- a/dev/archery/archery/benchmark/jmh.py +++ b/dev/archery/archery/benchmark/jmh.py @@ -110,15 +110,45 @@ def __init__(self, benchmark, primaryMetric, "measurementTime": counters["measurementTime"], "jvmArgs": counters["jvmArgs"] } + self.reciprocalValue = True if self.scoreUnit.endswith("/op") else False + if self.scoreUnit.startswith("ops/"): + idx = self.scoreUnit.find("/") + self.normalizePerSec(self.scoreUnit[idx+1:]) + elif self.scoreUnit.endswith("/op"): + idx = self.scoreUnit.find("/") + self.normalizePerSec(self.scoreUnit[:idx]) + else: + self.normalizeFactor = 1 @property def value(self): """ Return the benchmark value.""" - return self.score + val = 1 / self.score if self.reciprocalValue else self.score + return val * self.normalzeFactor + + def normalizePerSec(self, unit): + if unit == "ns": + self.normalizeFactor = 1000 * 1000 * 1000 + elif unit == "us": + self.normalizeFactor = 1000 * 1000 + elif unit == "ms": + self.normalizeFactor = 1000 + elif unit == "min": + self.normalizeFactor = 1 / 60 + elif unit == "hr": + self.normalizeFactor = 1 / (60 * 60) + elif unit == "day": + self.normalizeFactor = 1 / (60 * 60 * 24) + else: + self.normalizeFactor = 1 @property def unit(self): - return self.scoreUnit + if self.scoreUnit.startswith("ops/"): + return "bytes_per_second" + elif self.scoreUnit.endswith("/op"): + return "items_per_second" + return "?" def __repr__(self): return str(self.value) @@ -144,7 +174,7 @@ def __init__(self, name, runs): self.name = name self.runs = sorted(runs, key=lambda b: b.value) unit = self.runs[0].unit - less_is_better = unit.endswith("/op") + less_is_better = False values = [b.value for b in self.runs] # Slight kludge to extract the UserCounters for each benchmark self.counters = self.runs[0].counters From 152255874a0ccf96852080d5cddbc1e971018c6e Mon Sep 17 00:00:00 2001 From: Kazuaki Ishizaki Date: Sat, 10 Apr 2021 16:47:28 +0000 Subject: [PATCH 12/14] refactor to reduce more splits --- dev/archery/archery/cli.py | 209 +++++++++-------------------- dev/archery/archery/lang/java.py | 2 +- dev/archery/archery/utils/maven.py | 1 + 3 files changed, 64 insertions(+), 148 deletions(-) diff --git a/dev/archery/archery/cli.py b/dev/archery/archery/cli.py index 056bb300058..55fa6856689 100644 --- a/dev/archery/archery/cli.py +++ b/dev/archery/archery/cli.py @@ -141,6 +141,7 @@ def _apply_options(cmd, options): help="Specify Arrow source directory") # toolchain @cpp_toolchain_options +@java_toolchain_options @click.option("--build-type", default=None, type=build_type, help="CMake's CMAKE_BUILD_TYPE") @click.option("--warn-level", default="production", type=warn_level_type, @@ -418,54 +419,30 @@ def benchmark_filter_options(cmd): @benchmark_common_options @click.pass_context def benchmark_list(ctx, rev_or_path, src, preserve, output, cmake_extras, - language, build_extras, benchmark_extras, java_home, - java_options, **kwargs): + java_home, java_options, build_extras, benchmark_extras, + language, **kwargs): """ List benchmark suite. """ - if language == "cpp": - ctx.forward(benchmark_list_cpp, **kwargs) - elif language == "java": - ctx.forward(benchmark_list_java, **kwargs) - - -@benchmark.command(name="list_cpp") -@click.pass_context -def benchmark_list_cpp(ctx, rev_or_path, src, preserve, output, cmake_extras, - language, build_extras, benchmark_extras, java_home, - java_options, **kwargs): - """ List cpp benchmark suite. - """ with tmpdir(preserve=preserve) as root: logger.debug("Running benchmark {}".format(rev_or_path)) - conf = CppBenchmarkRunner.default_configuration( - cmake_extras=cmake_extras, **kwargs) - - runner_base = CppBenchmarkRunner.from_rev_or_path( - src, root, rev_or_path, conf) - - for b in runner_base.list_benchmarks: - click.echo(b, file=output) - + if language == "cpp": + conf = CppBenchmarkRunner.default_configuration( + cmake_extras=cmake_extras, **kwargs) -@benchmark.command(name="list_java") -@click.pass_context -def benchmark_list_java(ctx, rev_or_path, src, preserve, output, language, - java_home, java_options, build_extras, - benchmark_extras, cmake_extras, cc, cxx, cxx_flags, - cpp_package_prefix, **kwargs): - """ List java benchmark suite. - """ - with tmpdir(preserve=preserve) as root: - logger.debug("Running benchmark {}".format(rev_or_path)) + runner_base = CppBenchmarkRunner.from_rev_or_path( + src, root, rev_or_path, conf) - conf = JavaBenchmarkRunner.default_configuration( - java_home=java_home, java_options=java_options, - build_extras=build_extras, benchmark_extras=benchmark_extras, - **kwargs) + elif language == "java": + for key in { 'cpp_package_prefix', 'cxx_flags', 'cxx', 'cc'}: + del kwargs[key] + conf = JavaBenchmarkRunner.default_configuration( + java_home=java_home, java_options=java_options, + build_extras=build_extras, benchmark_extras=benchmark_extras, + **kwargs) - runner_base = JavaBenchmarkRunner.from_rev_or_path( - src, root, rev_or_path, conf) + runner_base = JavaBenchmarkRunner.from_rev_or_path( + src, root, rev_or_path, conf) for b in runner_base.list_benchmarks: click.echo(b, file=output) @@ -515,54 +492,28 @@ def benchmark_run(ctx, rev_or_path, src, preserve, output, cmake_extras, \b archery benchmark run --output=run.json """ - if language == "cpp": - ctx.forward(benchmark_run_cpp, **kwargs) - elif language == "java": - ctx.forward(benchmark_run_java, **kwargs) - - -@benchmark.command(name="run_cpp") -@click.pass_context -def benchmark_run_cpp(ctx, rev_or_path, src, preserve, output, cmake_extras, - language, build_extras, benchmark_extras, java_home, - java_options, suite_filter, benchmark_filter, **kwargs): - """ Run cpp benchmark suite. - """ - with tmpdir(preserve=preserve) as root: logger.debug("Running benchmark {}".format(rev_or_path)) - conf = CppBenchmarkRunner.default_configuration( - cmake_extras=cmake_extras, **kwargs) - - runner_base = CppBenchmarkRunner.from_rev_or_path( - src, root, rev_or_path, conf, - repetitions=repetitions, - suite_filter=suite_filter, benchmark_filter=benchmark_filter) - - json.dump(runner_base, output, cls=JsonEncoder) - - -@benchmark.command(name="run_java") -@click.pass_context -def benchmark_run_java(ctx, rev_or_path, src, preserve, output, language, - java_home, java_options, build_extras, benchmark_extras, - cmake_extras, cc, cxx, cxx_flags, cpp_package_prefix, - suite_filter, benchmark_filter, **kwargs): - """ Run Java benchmark suite. - """ + if language == "cpp": + conf = CppBenchmarkRunner.default_configuration( + cmake_extras=cmake_extras, **kwargs) - with tmpdir(preserve=preserve) as root: - logger.debug("Running benchmark {}".format(rev_or_path)) + runner_base = CppBenchmarkRunner.from_rev_or_path( + src, root, rev_or_path, conf, + repetitions=repetitions, + suite_filter=suite_filter, benchmark_filter=benchmark_filter) - conf = JavaBenchmarkRunner.default_configuration( - java_home=java_home, java_options=java_options, - build_extras=build_extras, benchmark_extras=benchmark_extras, - **kwargs) + elif language == "java": + conf = JavaBenchmarkRunner.default_configuration( + java_home=java_home, java_options=java_options, + build_extras=build_extras, benchmark_extras=benchmark_extras, + **kwargs) - runner_base = JavaBenchmarkRunner.from_rev_or_path( - src, root, rev_or_path, conf, - benchmark_filter=benchmark_filter) + runner_base = JavaBenchmarkRunner.from_rev_or_path( + src, root, rev_or_path, conf, + repetitions=repetitions, + benchmark_filter=benchmark_filter) json.dump(runner_base, output, cls=JsonEncoder) @@ -656,38 +607,39 @@ def benchmark_diff(ctx, src, preserve, output, language, cmake_extras, # This should not recompute the benchmark from run.json archery --quiet benchmark diff WORKSPACE run.json > result.json """ - if language == "cpp": - ctx.forward(benchmark_diff_cpp, **kwargs) - elif language == "java": - ctx.forward(benchmark_diff_java, **kwargs) - - -@benchmark.command(name="diff_cpp") -@click.pass_context -def benchmark_diff_cpp(ctx, src, preserve, output, language, cmake_extras, - suite_filter, benchmark_filter, - repetitions, threshold, contender, baseline, - build_extras, benchmark_extras, java_home, java_options, - **kwargs): - """Compare (diff) cpp benchmark runs.""" - with tmpdir(preserve=preserve) as root: logger.debug("Comparing {} (contender) with {} (baseline)" .format(contender, baseline)) - conf = CppBenchmarkRunner.default_configuration( - cmake_extras=cmake_extras, **kwargs) - - runner_cont = CppBenchmarkRunner.from_rev_or_path( - src, root, contender, conf, - repetitions=repetitions, - suite_filter=suite_filter, - benchmark_filter=benchmark_filter) - runner_base = CppBenchmarkRunner.from_rev_or_path( - src, root, baseline, conf, - repetitions=repetitions, - suite_filter=suite_filter, - benchmark_filter=benchmark_filter) + if language == "cpp": + conf = CppBenchmarkRunner.default_configuration( + cmake_extras=cmake_extras, **kwargs) + + runner_cont = CppBenchmarkRunner.from_rev_or_path( + src, root, contender, conf, + repetitions=repetitions, + suite_filter=suite_filter, + benchmark_filter=benchmark_filter) + runner_base = CppBenchmarkRunner.from_rev_or_path( + src, root, baseline, conf, + repetitions=repetitions, + suite_filter=suite_filter, + benchmark_filter=benchmark_filter) + + elif language == "java": + conf = JavaBenchmarkRunner.default_configuration( + java_home=java_home, java_options=java_options, + build_extras=build_extras, benchmark_extras=benchmark_extras, + **kwargs) + + runner_cont = JavaBenchmarkRunner.from_rev_or_path( + src, root, contender, conf, + repetitions=repetitions, + benchmark_filter=benchmark_filter) + runner_base = JavaBenchmarkRunner.from_rev_or_path( + src, root, baseline, conf, + repetitions=repetitions, + benchmark_filter=benchmark_filter) runner_comp = RunnerComparator(runner_cont, runner_base, threshold) @@ -699,43 +651,6 @@ def benchmark_diff_cpp(ctx, src, preserve, output, language, cmake_extras, output.write('\n') -@benchmark.command(name="diff_java") -@click.pass_context -def benchmark_diff_java(ctx, src, preserve, output, language, - java_home, java_options, build_extras, - benchmark_extras, suite_filter, benchmark_filter, - repetitions, threshold, contender, baseline, - cmake_extras, cc, cxx, cxx_flags, cpp_package_prefix, - **kwargs): - """Compare (diff) java benchmark runs.""" - - with tmpdir(preserve=preserve) as root: - logger.debug("Comparing {} (contender) with {} (baseline)" - .format(contender, baseline)) - - conf = JavaBenchmarkRunner.default_configuration( - java_home=java_home, java_options=java_options, - build_extras=build_extras, benchmark_extras=benchmark_extras, - **kwargs) - - runner_cont = JavaBenchmarkRunner.from_rev_or_path( - src, root, contender, conf, - repetitions=repetitions, - benchmark_filter=benchmark_filter) - runner_base = JavaBenchmarkRunner.from_rev_or_path( - src, root, baseline, conf, - repetitions=repetitions, - benchmark_filter=benchmark_filter) - - runner_comp = RunnerComparator(runner_cont, runner_base, threshold) - - # TODO(kszucs): test that the output is properly formatted jsonlines - comparisons_json = _get_comparisons_as_json(runner_comp.comparisons) - formatted = _format_comparisons_with_pandas(comparisons_json) - output.write(formatted) - output.write('\n') - - def _get_comparisons_as_json(comparisons): buf = StringIO() for comparator in comparisons: diff --git a/dev/archery/archery/lang/java.py b/dev/archery/archery/lang/java.py index 6cda0194aae..20a9594df68 100644 --- a/dev/archery/archery/lang/java.py +++ b/dev/archery/archery/lang/java.py @@ -44,7 +44,7 @@ def __init__(self, self.java_options = java_options self.build_extras = list(build_extras) if build_extras else [] - self.benchmark_extras = list(self.benchmark_extras) if self.benchmark_extras else [] + self.benchmark_extras = list(benchmark_extras) if benchmark_extras else [] @property def build_definitions(self): diff --git a/dev/archery/archery/utils/maven.py b/dev/archery/archery/utils/maven.py index f18fb0b2da0..1eea11e4c95 100644 --- a/dev/archery/archery/utils/maven.py +++ b/dev/archery/archery/utils/maven.py @@ -89,6 +89,7 @@ def build(self, build_dir, force=False, cmd_kwargs=None, **kwargs): ) cmd_kwargs = cmd_kwargs if cmd_kwargs else {} + assert MavenBuild.is_build_dir(build_dir) maven(*self.build_arguments, cwd=build_dir, env=self.env, **cmd_kwargs) return MavenBuild(build_dir, definition=self, **kwargs) From 150d19c4a8636ee091345f543814f2f8b243f1a3 Mon Sep 17 00:00:00 2001 From: Kazuaki Ishizaki Date: Mon, 12 Apr 2021 10:01:41 +0000 Subject: [PATCH 13/14] updates --- dev/archery/archery/benchmark/google.py | 2 +- dev/archery/archery/benchmark/jmh.py | 40 ++++++++++++++----------- dev/archery/archery/benchmark/runner.py | 2 +- dev/archery/archery/cli.py | 37 ++++++++++++++++------- dev/archery/archery/utils/maven.py | 2 +- 5 files changed, 52 insertions(+), 31 deletions(-) diff --git a/dev/archery/archery/benchmark/google.py b/dev/archery/archery/benchmark/google.py index 51d81dccde3..c1644dcbd9c 100644 --- a/dev/archery/archery/benchmark/google.py +++ b/dev/archery/archery/benchmark/google.py @@ -161,7 +161,7 @@ def __init__(self, name, runs): super().__init__(name, unit, less_is_better, values, time_unit, times) def __repr__(self): - return "GoogleBenchmark[name={},runs={}]".format(self.name, self.runs) + return "GoogleBenchmark[name={},runs={}]".format(self.names, self.runs) @classmethod def from_json(cls, payload): diff --git a/dev/archery/archery/benchmark/jmh.py b/dev/archery/archery/benchmark/jmh.py index c3e9ca90ff2..c093ec37fb7 100644 --- a/dev/archery/archery/benchmark/jmh.py +++ b/dev/archery/archery/benchmark/jmh.py @@ -74,7 +74,7 @@ def list_benchmarks(self): break return lists - def results(self, repetitions=1): + def results(self, repetitions): with NamedTemporaryFile(suffix=".json") as out: argv = ["-Dbenchmark.runs={}".format(repetitions), "-Dbenchmark.resultfile={}".format(out.name), @@ -97,7 +97,7 @@ def __init__(self, benchmark, primaryMetric, self.name = benchmark self.primaryMetric = primaryMetric self.score = primaryMetric["score"] - self.scoreUnit = primaryMetric["scoreUnit"] + self.score_unit = primaryMetric["scoreUnit"] self.forks = forks self.warmups = warmupIterations self.runs = measurementIterations @@ -110,21 +110,21 @@ def __init__(self, benchmark, primaryMetric, "measurementTime": counters["measurementTime"], "jvmArgs": counters["jvmArgs"] } - self.reciprocalValue = True if self.scoreUnit.endswith("/op") else False - if self.scoreUnit.startswith("ops/"): - idx = self.scoreUnit.find("/") - self.normalizePerSec(self.scoreUnit[idx+1:]) - elif self.scoreUnit.endswith("/op"): - idx = self.scoreUnit.find("/") - self.normalizePerSec(self.scoreUnit[:idx]) + self.reciprocal_value = True if self.score_unit.endswith("/op") else False + if self.score_unit.startswith("ops/"): + idx = self.score_unit.find("/") + self.normalizePerSec(self.score_unit[idx+1:]) + elif self.score_unit.endswith("/op"): + idx = self.score_unit.find("/") + self.normalizePerSec(self.score_unit[:idx]) else: self.normalizeFactor = 1 @property def value(self): """ Return the benchmark value.""" - val = 1 / self.score if self.reciprocalValue else self.score - return val * self.normalzeFactor + val = 1 / self.score if self.reciprocal_value else self.score + return val * self.normalizeFactor def normalizePerSec(self, unit): if unit == "ns": @@ -144,11 +144,12 @@ def normalizePerSec(self, unit): @property def unit(self): - if self.scoreUnit.startswith("ops/"): - return "bytes_per_second" - elif self.scoreUnit.endswith("/op"): + if self.score_unit.startswith("ops/"): return "items_per_second" - return "?" + elif self.score_unit.endswith("/op"): + return "items_per_second" + else: + return "?" def __repr__(self): return str(self.value) @@ -174,11 +175,14 @@ def __init__(self, name, runs): self.name = name self.runs = sorted(runs, key=lambda b: b.value) unit = self.runs[0].unit - less_is_better = False + time_unit = "N/A" + less_is_better = not unit.endswith("per_second") values = [b.value for b in self.runs] + times = [] # Slight kludge to extract the UserCounters for each benchmark - self.counters = self.runs[0].counters - super().__init__(name, unit, less_is_better, values) + counters = self.runs[0].counters + super().__init__(name, unit, less_is_better, values, time_unit, times, + counters) def __repr__(self): return "JavaMicrobenchmark[name={},runs={}]".format( diff --git a/dev/archery/archery/benchmark/runner.py b/dev/archery/archery/benchmark/runner.py index 13bc95d2ef5..fc6d354b180 100644 --- a/dev/archery/archery/benchmark/runner.py +++ b/dev/archery/archery/benchmark/runner.py @@ -223,10 +223,10 @@ def from_rev_or_path(src, root, rev_or_path, cmake_conf, **kwargs): class JavaBenchmarkRunner(BenchmarkRunner): """ Run suites for Java. """ + # default repetitions is 5 for Java microbenchmark harness def __init__(self, build, **kwargs): """ Initialize a JavaBenchmarkRunner. """ self.build = build - self.repetitions = 5 # default repetitions for Java super().__init__(**kwargs) @staticmethod diff --git a/dev/archery/archery/cli.py b/dev/archery/archery/cli.py index 55fa6856689..8bc6a805f57 100644 --- a/dev/archery/archery/cli.py +++ b/dev/archery/archery/cli.py @@ -393,7 +393,7 @@ def check_language(ctx, param, value): "Can be stacked. For language=java"), click.option("--cmake-extras", type=str, multiple=True, help="Extra flags/options to pass to cmake invocation. " - "Can be stacked. For language=cpp"), + "Can be stacked. For language=cpp") ] cmd = java_toolchain_options(cmd) @@ -417,6 +417,7 @@ def benchmark_filter_options(cmd): @click.argument("rev_or_path", metavar="[]", default="WORKSPACE", required=False) @benchmark_common_options +@benchmark_filter_options @click.pass_context def benchmark_list(ctx, rev_or_path, src, preserve, output, cmake_extras, java_home, java_options, build_extras, benchmark_extras, @@ -453,12 +454,14 @@ def benchmark_list(ctx, rev_or_path, src, preserve, output, cmake_extras, default="WORKSPACE", required=False) @benchmark_common_options @benchmark_filter_options -@click.option("--repetitions", type=int, default=1, show_default=True, +@click.option("--repetitions", type=int, default=-1, help=("Number of repetitions of each benchmark. Increasing " - "may improve result precision.")) + "may improve result precision. " + "[default: 1 for cpp, 5 for java")) @click.pass_context def benchmark_run(ctx, rev_or_path, src, preserve, output, cmake_extras, - language, suite_filter, benchmark_filter, repetations, + java_home, java_options, build_extras, benchmark_extras, + language, suite_filter, benchmark_filter, repetitions, **kwargs): """ Run benchmark suite. @@ -499,17 +502,21 @@ def benchmark_run(ctx, rev_or_path, src, preserve, output, cmake_extras, conf = CppBenchmarkRunner.default_configuration( cmake_extras=cmake_extras, **kwargs) + repetitions = repetitions if repetitions != -1 else 1 runner_base = CppBenchmarkRunner.from_rev_or_path( src, root, rev_or_path, conf, repetitions=repetitions, suite_filter=suite_filter, benchmark_filter=benchmark_filter) elif language == "java": + for key in { 'cpp_package_prefix', 'cxx_flags', 'cxx', 'cc'}: + del kwargs[key] conf = JavaBenchmarkRunner.default_configuration( java_home=java_home, java_options=java_options, build_extras=build_extras, benchmark_extras=benchmark_extras, **kwargs) + repetitions = repetitions if repetitions != -1 else 5 runner_base = JavaBenchmarkRunner.from_rev_or_path( src, root, rev_or_path, conf, repetitions=repetitions, @@ -526,7 +533,8 @@ def benchmark_run(ctx, rev_or_path, src, preserve, output, cmake_extras, help="Regression failure threshold in percentage.") @click.option("--repetitions", type=int, default=1, show_default=True, help=("Number of repetitions of each benchmark. Increasing " - "may improve result precision.")) + "may improve result precision. " + "[default: 1 for cpp, 5 for java")) @click.option("--no-counters", type=BOOL, default=False, is_flag=True, help="Hide counters field in diff report.") @click.argument("contender", metavar="[", @@ -535,7 +543,8 @@ def benchmark_run(ctx, rev_or_path, src, preserve, output, cmake_extras, required=False) @click.pass_context def benchmark_diff(ctx, src, preserve, output, language, cmake_extras, - suite_filter, benchmark_filter, repitations, no_counters, + suite_filter, benchmark_filter, repetitions, no_counters, + java_home, java_options, build_extras, benchmark_extras, threshold, contender, baseline, **kwargs): """Compare (diff) benchmark runs. @@ -615,6 +624,7 @@ def benchmark_diff(ctx, src, preserve, output, language, cmake_extras, conf = CppBenchmarkRunner.default_configuration( cmake_extras=cmake_extras, **kwargs) + repetitions = repetitions if repetitions != -1 else 1 runner_cont = CppBenchmarkRunner.from_rev_or_path( src, root, contender, conf, repetitions=repetitions, @@ -627,11 +637,14 @@ def benchmark_diff(ctx, src, preserve, output, language, cmake_extras, benchmark_filter=benchmark_filter) elif language == "java": + for key in { 'cpp_package_prefix', 'cxx_flags', 'cxx', 'cc'}: + del kwargs[key] conf = JavaBenchmarkRunner.default_configuration( java_home=java_home, java_options=java_options, build_extras=build_extras, benchmark_extras=benchmark_extras, **kwargs) + repetitions = repetitions if repetitions != -1 else 5 runner_cont = JavaBenchmarkRunner.from_rev_or_path( src, root, contender, conf, repetitions=repetitions, @@ -645,8 +658,9 @@ def benchmark_diff(ctx, src, preserve, output, language, cmake_extras, # TODO(kszucs): test that the output is properly formatted jsonlines comparisons_json = _get_comparisons_as_json(runner_comp.comparisons) + ren_counters = language == "java" formatted = _format_comparisons_with_pandas(comparisons_json, - no_counters) + no_counters, ren_counters) output.write(formatted) output.write('\n') @@ -660,7 +674,8 @@ def _get_comparisons_as_json(comparisons): return buf.getvalue() -def _format_comparisons_with_pandas(comparisons_json, no_counters): +def _format_comparisons_with_pandas(comparisons_json, no_counters, + ren_counters): import pandas as pd df = pd.read_json(StringIO(comparisons_json), lines=True) # parse change % so we can sort by it @@ -670,9 +685,11 @@ def _format_comparisons_with_pandas(comparisons_json, no_counters): fields = ['benchmark', 'baseline', 'contender', 'change %'] if not no_counters: fields += ['counters'] - # fields += ['configurations'] - df = df[fields].sort_values(by='change %', ascending=False) + df = df[fields] + if ren_counters: + df = df.rename(columns={'counters': 'configurations'}) + df = df.sort_values(by='change %', ascending=False) def labelled(title, df): if len(df) == 0: diff --git a/dev/archery/archery/utils/maven.py b/dev/archery/archery/utils/maven.py index 1eea11e4c95..96a3bf5bd99 100644 --- a/dev/archery/archery/utils/maven.py +++ b/dev/archery/archery/utils/maven.py @@ -106,7 +106,7 @@ def benchmark_arguments(self): """" Return the arguments to maven invocation for benchmark """ arguments = self.benchmark_definitions + [ "-Dskip.perf.benchmarks=false", "-Dbenchmark.fork=1", - "-Dbenchmark.jvmargs=\"-Darrow.enable_null_check_for_get=false" + "-Dbenchmark.jvmargs=\"-Darrow.enable_null_check_for_get=false " "-Darrow.enable_unsafe_memory_access=true\"", "install" ] From 5d70561ea8d33828b7c5c49d380bafe0e3586225 Mon Sep 17 00:00:00 2001 From: Kazuaki Ishizaki Date: Mon, 12 Apr 2021 10:18:24 +0000 Subject: [PATCH 14/14] fix lint errors --- dev/archery/archery/benchmark/jmh.py | 4 +++- dev/archery/archery/cli.py | 6 +++--- dev/archery/archery/lang/java.py | 3 ++- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/dev/archery/archery/benchmark/jmh.py b/dev/archery/archery/benchmark/jmh.py index c093ec37fb7..f531b6de163 100644 --- a/dev/archery/archery/benchmark/jmh.py +++ b/dev/archery/archery/benchmark/jmh.py @@ -54,6 +54,7 @@ def __init__(self, build, benchmark_filter=None): ... [INFO] """ + def list_benchmarks(self): argv = [] if self.benchmark_filter: @@ -110,7 +111,8 @@ def __init__(self, benchmark, primaryMetric, "measurementTime": counters["measurementTime"], "jvmArgs": counters["jvmArgs"] } - self.reciprocal_value = True if self.score_unit.endswith("/op") else False + self.reciprocal_value = True if self.score_unit.endswith( + "/op") else False if self.score_unit.startswith("ops/"): idx = self.score_unit.find("/") self.normalizePerSec(self.score_unit[idx+1:]) diff --git a/dev/archery/archery/cli.py b/dev/archery/archery/cli.py index 8bc6a805f57..06dd6b60370 100644 --- a/dev/archery/archery/cli.py +++ b/dev/archery/archery/cli.py @@ -435,7 +435,7 @@ def benchmark_list(ctx, rev_or_path, src, preserve, output, cmake_extras, src, root, rev_or_path, conf) elif language == "java": - for key in { 'cpp_package_prefix', 'cxx_flags', 'cxx', 'cc'}: + for key in {'cpp_package_prefix', 'cxx_flags', 'cxx', 'cc'}: del kwargs[key] conf = JavaBenchmarkRunner.default_configuration( java_home=java_home, java_options=java_options, @@ -509,7 +509,7 @@ def benchmark_run(ctx, rev_or_path, src, preserve, output, cmake_extras, suite_filter=suite_filter, benchmark_filter=benchmark_filter) elif language == "java": - for key in { 'cpp_package_prefix', 'cxx_flags', 'cxx', 'cc'}: + for key in {'cpp_package_prefix', 'cxx_flags', 'cxx', 'cc'}: del kwargs[key] conf = JavaBenchmarkRunner.default_configuration( java_home=java_home, java_options=java_options, @@ -637,7 +637,7 @@ def benchmark_diff(ctx, src, preserve, output, language, cmake_extras, benchmark_filter=benchmark_filter) elif language == "java": - for key in { 'cpp_package_prefix', 'cxx_flags', 'cxx', 'cc'}: + for key in {'cpp_package_prefix', 'cxx_flags', 'cxx', 'cc'}: del kwargs[key] conf = JavaBenchmarkRunner.default_configuration( java_home=java_home, java_options=java_options, diff --git a/dev/archery/archery/lang/java.py b/dev/archery/archery/lang/java.py index 20a9594df68..bc169adf647 100644 --- a/dev/archery/archery/lang/java.py +++ b/dev/archery/archery/lang/java.py @@ -44,7 +44,8 @@ def __init__(self, self.java_options = java_options self.build_extras = list(build_extras) if build_extras else [] - self.benchmark_extras = list(benchmark_extras) if benchmark_extras else [] + self.benchmark_extras = list( + benchmark_extras) if benchmark_extras else [] @property def build_definitions(self):