From 96cdc394a23412fad29a2fc086873d531ab3618b Mon Sep 17 00:00:00 2001
From: Kazuaki Ishizaki <ishizaki@jp.ibm.com>
Date: Thu, 17 Sep 2020 15:48:06 +0000
Subject: [PATCH 01/14] add --language option

---
 dev/archery/archery/cli.py | 52 ++++++++++++++++++++++++++++++++++++--
 1 file changed, 50 insertions(+), 2 deletions(-)
diff --git a/dev/archery/archery/cli.py b/dev/archery/archery/cli.py
index bcaddf1c795..6e1b6fe0e09 100644
--- a/dev/archery/archery/cli.py
+++ b/dev/archery/archery/cli.py
@@ -120,6 +120,15 @@ def cpp_toolchain_options(cmd):
     return _apply_options(cmd, options)
 
 
+def java_toolchain_options(cmd):
+    options = [
+        click.option("--java-home", metavar="<java_home>",
+                     help="Path to Java Developers Kit."),
+        click.option("--java-flags", help="java compiler flags."),
+    ]
+    return _apply_options(cmd, options)
+
+
 def _apply_options(cmd, options):
     for option in options:
         cmd = option(cmd)
@@ -357,6 +366,11 @@ def benchmark(ctx):
 
 
 def benchmark_common_options(cmd):
+    def check_language(ctx, param, value):
+        if value != "cpp" and value != "java":
+            raise click.BadParameter("cpp or java is supported now")
+        return value
+
     options = [
         click.option("--src", metavar="<arrow_src>", show_default=True,
                      default=None, callback=validate_arrow_sources,
@@ -367,11 +381,18 @@ def benchmark_common_options(cmd):
         click.option("--output", metavar="<output>",
                      type=click.File("w", encoding="utf8"), default="-",
                      help="Capture output result into file."),
+        click.option("--language", metavar="<lang>", type=str, default="cpp",
+                     show_default=True, callback=check_language,
+                     help="Specify target language for the benchmark"),
+        click.option("--mvn-extras", type=str, multiple=True,
+                     help="Extra flags/options to pass to mvn. "
+                     "Can be stacked"),
         click.option("--cmake-extras", type=str, multiple=True,
                      help="Extra flags/options to pass to cmake invocation. "
                      "Can be stacked"),
     ]
 
+    cmd = java_toolchain_options(cmd)
     cmd = cpp_toolchain_options(cmd)
     return _apply_options(cmd, options)
 
@@ -483,8 +504,8 @@ def benchmark_run(ctx, rev_or_path, src, preserve, output, cmake_extras,
 @click.argument("baseline", metavar="[<baseline>]]", default="origin/master",
                 required=False)
 @click.pass_context
-def benchmark_diff(ctx, src, preserve, output, cmake_extras,
-                   suite_filter, benchmark_filter, repetitions, no_counters,
+def benchmark_diff(ctx, src, preserve, output, language, cmake_extras,
+                   suite_filter, benchmark_filter, repitations, no_counters,
                    threshold, contender, baseline, **kwargs):
     """Compare (diff) benchmark runs.
 
@@ -556,6 +577,20 @@ def benchmark_diff(ctx, src, preserve, output, cmake_extras,
     # This should not recompute the benchmark from run.json
     archery --quiet benchmark diff WORKSPACE run.json > result.json
     """
+    if language == "cpp":
+        ctx.forward(benchmark_diff_cpp, **kwargs)
+    elif language == "java":
+        ctx.forward(benchmark_diff_jav, **kwargs)
+
+@benchmark.command(name="diff_cpp")
+@click.pass_context
+def benchmark_diff_cpp(ctx, src, preserve, output, language, cmake_extras,
+                       suite_filter, benchmark_filter,
+                       repetitions, threshold, contender, baseline,
+                       mvn_extras, java_home, java_flags,
+                       **kwargs):
+    """Compare (diff) cpp benchmark runs."""
+
     with tmpdir(preserve=preserve) as root:
         logger.debug("Comparing {} (contender) with {} (baseline)"
                      .format(contender, baseline))
@@ -584,6 +619,19 @@ def benchmark_diff(ctx, src, preserve, output, cmake_extras,
         output.write('\n')
 
 
+@benchmark.command(name="diff_java")
+@click.pass_context
+def benchmark_diff_java(ctx, src, preserve, output, language,
+                        suite_filter, benchmark_filter,
+                        repetitions, threshold, contender, baseline,
+                        cmake_extras, cc, cxx, cxx_flags, cxx_package_prefix,
+                        **kwargs):
+    """Compare (diff) cpp benchmark runs."""
+
+    print("Not supported yet")
+    exit(1)
+
+
 def _get_comparisons_as_json(comparisons):
     buf = StringIO()
     for comparator in comparisons:

From a44677878e7dcea39d4f00fe8847bc1ca79fe6d7 Mon Sep 17 00:00:00 2001
From: Kazuaki Ishizaki <ishizaki@jp.ibm.com>
Date: Thu, 17 Sep 2020 15:50:40 +0000
Subject: [PATCH 02/14] refactor from_rev_or_path method

---
 dev/archery/archery/benchmark/runner.py | 72 +++++++++++++------------
 dev/archery/archery/cli.py              |  8 +--
 2 files changed, 42 insertions(+), 38 deletions(-)

diff --git a/dev/archery/archery/benchmark/runner.py b/dev/archery/archery/benchmark/runner.py
index 5718bcaf108..a213ee8cc58 100644
--- a/dev/archery/archery/benchmark/runner.py
+++ b/dev/archery/archery/benchmark/runner.py
@@ -50,40 +50,7 @@ def suites(self):
 
     @staticmethod
     def from_rev_or_path(src, root, rev_or_path, cmake_conf, **kwargs):
-        """ Returns a BenchmarkRunner from a path or a git revision.
-
-        First, it checks if `rev_or_path` is a valid path (or string) of a json
-        object that can deserialize to a BenchmarkRunner. If so, it initialize
-        a StaticBenchmarkRunner from it. This allows memoizing the result of a
-        run in a file or a string.
-
-        Second, it checks if `rev_or_path` points to a valid CMake build
-        directory.  If so, it creates a CppBenchmarkRunner with this existing
-        CMakeBuild.
-
-        Otherwise, it assumes `rev_or_path` is a revision and clone/checkout
-        the given revision and create a fresh CMakeBuild.
-        """
-        build = None
-        if StaticBenchmarkRunner.is_json_result(rev_or_path):
-            return StaticBenchmarkRunner.from_json(rev_or_path, **kwargs)
-        elif CMakeBuild.is_build_dir(rev_or_path):
-            build = CMakeBuild.from_path(rev_or_path)
-            return CppBenchmarkRunner(build, **kwargs)
-        else:
-            # Revisions can references remote via the `/` character, ensure
-            # that the revision is path friendly
-            path_rev = rev_or_path.replace("/", "_")
-            root_rev = os.path.join(root, path_rev)
-            os.mkdir(root_rev)
-
-            clone_dir = os.path.join(root_rev, "arrow")
-            # Possibly checkout the sources at given revision, no need to
-            # perform cleanup on cloned repository as root_rev is reclaimed.
-            src_rev, _ = src.at_revision(rev_or_path, clone_dir)
-            cmake_def = CppCMakeDefinition(src_rev.cpp, cmake_conf)
-            build_dir = os.path.join(root_rev, "build")
-            return CppBenchmarkRunner(cmake_def.build(build_dir), **kwargs)
+        raise NotImplementedError("BenchmarkRunner must implement from_rev_or_path")
 
 
 class StaticBenchmarkRunner(BenchmarkRunner):
@@ -210,3 +177,40 @@ def suites(self):
                 continue
 
             yield suite
+
+    @staticmethod
+    def from_rev_or_path(src, root, rev_or_path, cmake_conf, **kwargs):
+        """ Returns a BenchmarkRunner from a path or a git revision.
+
+        First, it checks if `rev_or_path` is a valid path (or string) of a json
+        object that can deserialize to a BenchmarkRunner. If so, it initialize
+        a StaticBenchmarkRunner from it. This allows memoizing the result of a
+        run in a file or a string.
+
+        Second, it checks if `rev_or_path` points to a valid CMake build
+        directory.  If so, it creates a CppBenchmarkRunner with this existing
+        CMakeBuild.
+
+        Otherwise, it assumes `rev_or_path` is a revision and clone/checkout
+        the given revision and create a fresh CMakeBuild.
+        """
+        build = None
+        if StaticBenchmarkRunner.is_json_result(rev_or_path):
+            return StaticBenchmarkRunner.from_json(rev_or_path, **kwargs)
+        elif CMakeBuild.is_build_dir(rev_or_path):
+            build = CMakeBuild.from_path(rev_or_path)
+            return CppBenchmarkRunner(build, **kwargs)
+        else:
+            # Revisions can references remote via the `/` character, ensure
+            # that the revision is path friendly
+            path_rev = rev_or_path.replace("/", "_")
+            root_rev = os.path.join(root, path_rev)
+            os.mkdir(root_rev)
+
+            clone_dir = os.path.join(root_rev, "arrow")
+            # Possibly checkout the sources at given revision, no need to
+            # perform cleanup on cloned repository as root_rev is reclaimed.
+            src_rev, _ = src.at_revision(rev_or_path, clone_dir)
+            cmake_def = CppCMakeDefinition(src_rev.cpp, cmake_conf)
+            build_dir = os.path.join(root_rev, "build")
+            return CppBenchmarkRunner(cmake_def.build(build_dir), **kwargs)
diff --git a/dev/archery/archery/cli.py b/dev/archery/archery/cli.py
index 6e1b6fe0e09..079754eb76e 100644
--- a/dev/archery/archery/cli.py
+++ b/dev/archery/archery/cli.py
@@ -424,7 +424,7 @@ def benchmark_list(ctx, rev_or_path, src, preserve, output, cmake_extras,
         conf = CppBenchmarkRunner.default_configuration(
             cmake_extras=cmake_extras, **kwargs)
 
-        runner_base = BenchmarkRunner.from_rev_or_path(
+        runner_base = CppBenchmarkRunner.from_rev_or_path(
             src, root, rev_or_path, conf)
 
         for b in runner_base.list_benchmarks:
@@ -480,7 +480,7 @@ def benchmark_run(ctx, rev_or_path, src, preserve, output, cmake_extras,
         conf = CppBenchmarkRunner.default_configuration(
             cmake_extras=cmake_extras, **kwargs)
 
-        runner_base = BenchmarkRunner.from_rev_or_path(
+        runner_base = CppBenchmarkRunner.from_rev_or_path(
             src, root, rev_or_path, conf,
             repetitions=repetitions,
             suite_filter=suite_filter, benchmark_filter=benchmark_filter)
@@ -598,12 +598,12 @@ def benchmark_diff_cpp(ctx, src, preserve, output, language, cmake_extras,
         conf = CppBenchmarkRunner.default_configuration(
             cmake_extras=cmake_extras, **kwargs)
 
-        runner_cont = BenchmarkRunner.from_rev_or_path(
+        runner_cont = CppBenchmarkRunner.from_rev_or_path(
             src, root, contender, conf,
             repetitions=repetitions,
             suite_filter=suite_filter,
             benchmark_filter=benchmark_filter)
-        runner_base = BenchmarkRunner.from_rev_or_path(
+        runner_base = CppBenchmarkRunner.from_rev_or_path(
             src, root, baseline, conf,
             repetitions=repetitions,
             suite_filter=suite_filter,

From 63fae86d77adf042cc27ceda37f0e58624db61f1 Mon Sep 17 00:00:00 2001
From: Kazuaki Ishizaki <ishizaki@jp.ibm.com>
Date: Fri, 18 Sep 2020 01:15:55 +0000
Subject: [PATCH 03/14] fix lint errors

---
 dev/archery/archery/benchmark/runner.py | 3 ++-
 dev/archery/archery/cli.py              | 7 ++++---
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/dev/archery/archery/benchmark/runner.py b/dev/archery/archery/benchmark/runner.py
index a213ee8cc58..83eeb8a789f 100644
--- a/dev/archery/archery/benchmark/runner.py
+++ b/dev/archery/archery/benchmark/runner.py
@@ -50,7 +50,8 @@ def suites(self):
 
     @staticmethod
     def from_rev_or_path(src, root, rev_or_path, cmake_conf, **kwargs):
-        raise NotImplementedError("BenchmarkRunner must implement from_rev_or_path")
+        raise NotImplementedError(
+            "BenchmarkRunner must implement from_rev_or_path")
 
 
 class StaticBenchmarkRunner(BenchmarkRunner):
diff --git a/dev/archery/archery/cli.py b/dev/archery/archery/cli.py
index 079754eb76e..a79ed1a8ace 100644
--- a/dev/archery/archery/cli.py
+++ b/dev/archery/archery/cli.py
@@ -27,7 +27,7 @@
 
 from .benchmark.codec import JsonEncoder
 from .benchmark.compare import RunnerComparator, DEFAULT_THRESHOLD
-from .benchmark.runner import BenchmarkRunner, CppBenchmarkRunner
+from .benchmark.runner import CppBenchmarkRunner
 from .lang.cpp import CppCMakeDefinition, CppConfiguration
 from .utils.lint import linter, python_numpydoc, LintValidationException
 from .utils.logger import logger, ctx as log_ctx
@@ -580,7 +580,8 @@ def benchmark_diff(ctx, src, preserve, output, language, cmake_extras,
     if language == "cpp":
         ctx.forward(benchmark_diff_cpp, **kwargs)
     elif language == "java":
-        ctx.forward(benchmark_diff_jav, **kwargs)
+        ctx.forward(benchmark_diff_java, **kwargs)
+
 
 @benchmark.command(name="diff_cpp")
 @click.pass_context
@@ -626,7 +627,7 @@ def benchmark_diff_java(ctx, src, preserve, output, language,
                         repetitions, threshold, contender, baseline,
                         cmake_extras, cc, cxx, cxx_flags, cxx_package_prefix,
                         **kwargs):
-    """Compare (diff) cpp benchmark runs."""
+    """Compare (diff) java benchmark runs."""
 
     print("Not supported yet")
     exit(1)

From 23bc0b34ca58fb568e6272e298d32a0a6ce5d7c0 Mon Sep 17 00:00:00 2001
From: Kazuaki Ishizaki <ishizaki@jp.ibm.com>
Date: Fri, 13 Nov 2020 03:43:10 +0900
Subject: [PATCH 04/14] support benchmark list, run, and diff with --language
 java

---
 dev/archery/archery/benchmark/compare.py |   8 +
 dev/archery/archery/benchmark/jmh.py     | 166 +++++++++++++++++++
 dev/archery/archery/benchmark/runner.py  |  96 +++++++++++
 dev/archery/archery/cli.py               | 126 ++++++++++++--
 dev/archery/archery/lang/java.py         |  48 ++++++
 dev/archery/archery/utils/maven.py       | 200 +++++++++++++++++++++++
 dev/archery/archery/utils/source.py      |   5 +
 java/performance/pom.xml                 |  11 ++
 8 files changed, 648 insertions(+), 12 deletions(-)
 create mode 100644 dev/archery/archery/benchmark/jmh.py
 create mode 100644 dev/archery/archery/utils/maven.py

diff --git a/dev/archery/archery/benchmark/compare.py b/dev/archery/archery/benchmark/compare.py
index 622b8017917..46633525490 100644
--- a/dev/archery/archery/benchmark/compare.py
+++ b/dev/archery/archery/benchmark/compare.py
@@ -54,6 +54,14 @@ def formatter_for_unit(unit):
         return bytes_per_seconds_fmt
     elif unit == "items_per_second":
         return items_per_seconds_fmt
+    elif unit.startswith("ops/"):
+        def ops_per_time_fmt(value):
+            return "{:.3f} {}".format(value, unit)
+        return ops_per_time_fmt
+    elif unit.endswith("/op"):
+        def time_per_op_fmt(value):
+            return "{:.3f} {}".format(value, unit)
+        return time_per_op_fmt
     else:
         return lambda x: x
 
diff --git a/dev/archery/archery/benchmark/jmh.py b/dev/archery/archery/benchmark/jmh.py
new file mode 100644
index 00000000000..4a4a6369bf5
--- /dev/null
+++ b/dev/archery/archery/benchmark/jmh.py
@@ -0,0 +1,166 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from itertools import filterfalse, groupby, tee
+import json
+import subprocess
+from tempfile import NamedTemporaryFile
+
+from .core import Benchmark
+from ..utils.maven import Maven
+
+
+def partition(pred, iterable):
+    # adapted from python's examples
+    t1, t2 = tee(iterable)
+    return list(filter(pred, t1)), list(filterfalse(pred, t2))
+
+
+class JavaMicrobenchmarkHarnessCommand(Maven):
+    """ Run a Java Micro Benchmark Harness
+
+    This assumes the binary supports the standard command line options,
+    notably `--benchmark_filter`, `--benchmark_format`, etc...
+    """
+
+    def __init__(self, build, benchmark_filter=None):
+        #self.performance_dir = build.binaries_dir + "/performance"
+        self.benchmark_filter = benchmark_filter
+        self.build = build
+        self.maven = Maven()
+
+    def list_benchmarks(self):
+        argv = []
+        if self.benchmark_filter:
+            argv.append("-Dbenchmark.filter={}".format(self.benchmark_filter))
+        #result = self.maven.run(*argv, cwd=self.performance_dir,
+        #                        stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+        result = self.build.list(*argv, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+
+        """ Extract benchmark names from output. Assume the following output
+          ...
+          Benchmarks:
+          org.apache.arrow.vector.IntBenchmarks.setIntDirectly
+          ...
+          org.apache.arrow.vector.IntBenchmarks.setWithValueHolder
+          org.apache.arrow.vector.IntBenchmarks.setWithWriter
+          ...
+          [INFO]
+        """
+
+        lists = []
+        benchmarks = False
+        for line in str.splitlines(result.stdout.decode("utf-8")):
+            if not benchmarks:
+                if line.startswith("Benchmarks:"):
+                    benchmarks = True
+            else:
+                if line.startswith("org.apache.arrow"):
+                    lists.append(line)
+                if line.startswith("[INFO]"):
+                    benchmarks = False
+                    break
+        return lists
+
+    def results(self, repetitions=1):
+        with NamedTemporaryFile(suffix=".json") as out:
+            argv = ["-Dbenchmark.runs={}".format(repetitions),
+                    "-Dbenchmark.resultfile={}".format(out.name),
+                    "-Dbenchmark.resultformat=json"]
+            if self.benchmark_filter:
+                argv.append(
+                    "-Dbenchmark.filter={}".format(self.benchmark_filter)
+                )
+
+            #self.maven.run(*argv, cwd=self.performance_dir, check=True)
+            self.build.benchmark(*argv, check=True)
+            return json.load(out)
+
+
+class JavaMicrobenchmarkHarnessObservation:
+    """ Represents one run of a single Java Microbenchmark Harness
+    """
+
+    def __init__(self, benchmark, primaryMetric,
+                 forks, warmupIterations, measurementIterations, **counters):
+        self.name = benchmark
+        self.primaryMetric = primaryMetric
+        self.score = primaryMetric["score"]
+        self.scoreUnit =  primaryMetric["scoreUnit"]
+        self.forks = forks
+        self.warmups = warmupIterations
+        self.runs = measurementIterations
+        self.counters = {
+            "mode" : counters["mode"],
+            "threads" : counters["threads"],
+            "warmups" : warmupIterations,
+            "warmupTime" : counters["warmupTime"],
+            "measurements" : measurementIterations,
+            "measurementTime" : counters["measurementTime"],
+            "jvmArgs" : counters["jvmArgs"]
+        }
+
+    @property
+    def value(self):
+        """ Return the benchmark value."""
+        return self.score
+
+    @property
+    def unit(self):
+        return self.scoreUnit
+
+    def __repr__(self):
+        return str(self.value)
+
+
+class JavaMicrobenchmarkHarness(Benchmark):
+    """ A set of JavaMicrobenchmarkHarnessObservations. """
+
+    def __init__(self, name, runs):
+        """ Initialize a JavaMicrobenchmarkHarness.
+
+        Parameters
+        ----------
+        name: str
+              Name of the benchmark
+        forks: int
+        warmups: int
+        runs: int
+        runs: list(JavaMicrobenchmarkHarnessObservation)
+              Repetitions of JavaMicrobenchmarkHarnessObservation run.
+
+        """
+        self.name = name
+        self.runs = sorted(runs, key=lambda b: b.value)
+        unit = self.runs[0].unit
+        less_is_better = unit.endswith("/op")
+        values = [b.value for b in self.runs]
+        # Slight kludge to extract the UserCounters for each benchmark
+        self.counters = self.runs[0].counters
+        super().__init__(name, unit, less_is_better, values)
+
+    def __repr__(self):
+        return "JavaMicrobenchmarkHarness[name={},runs={}]".format(self.name, self.runs)
+
+    @classmethod
+    def from_json(cls, payload):
+        def group_key(x):
+            return x.name
+
+        benchmarks = map(lambda x: JavaMicrobenchmarkHarnessObservation(**x), payload)
+        groups = groupby(sorted(benchmarks, key=group_key), group_key)
+        return [cls(k, list(bs)) for k, bs in groups]
diff --git a/dev/archery/archery/benchmark/runner.py b/dev/archery/archery/benchmark/runner.py
index 83eeb8a789f..a0880537280 100644
--- a/dev/archery/archery/benchmark/runner.py
+++ b/dev/archery/archery/benchmark/runner.py
@@ -22,8 +22,11 @@
 
 from .core import BenchmarkSuite
 from .google import GoogleBenchmarkCommand, GoogleBenchmark
+from .jmh import JavaMicrobenchmarkHarnessCommand, JavaMicrobenchmarkHarness
 from ..lang.cpp import CppCMakeDefinition, CppConfiguration
+from ..lang.java import JavaMavenDefinition, JavaConfiguration
 from ..utils.cmake import CMakeBuild
+from ..utils.maven import MavenBuild
 from ..utils.logger import logger
 
 
@@ -215,3 +218,96 @@ def from_rev_or_path(src, root, rev_or_path, cmake_conf, **kwargs):
             cmake_def = CppCMakeDefinition(src_rev.cpp, cmake_conf)
             build_dir = os.path.join(root_rev, "build")
             return CppBenchmarkRunner(cmake_def.build(build_dir), **kwargs)
+
+
+class JavaBenchmarkRunner(BenchmarkRunner):
+    """ Run suites for Java. """
+
+    def __init__(self, build, **kwargs):
+        """ Initialize a JavaBenchmarkRunner. """
+        self.build = build
+        self.repetitions = 5  # default repetitions for Java
+        super().__init__(**kwargs)
+
+    @staticmethod
+    def default_configuration(**kwargs):
+        """ Returns the default benchmark configuration. """
+        return JavaConfiguration(**kwargs)
+
+    def suite(self, name):
+        """ Returns the resulting benchmarks for a given suite. """
+        # update .m2 directory, which installs target jars
+        self.build.build()
+
+        suite_cmd = JavaMicrobenchmarkHarnessCommand(self.build, self.benchmark_filter)
+
+        # Ensure there will be data
+        benchmark_names = suite_cmd.list_benchmarks()
+        if not benchmark_names:
+            return None
+
+        results = suite_cmd.results(repetitions=self.repetitions)
+        benchmarks = JavaMicrobenchmarkHarness.from_json(results)
+        return BenchmarkSuite(name, benchmarks)
+
+    @property
+    def list_benchmarks(self):
+        """ Returns all suite names """
+        # Ensure build is up-to-date to run benchmarks
+        self.build.build()
+
+        suite_cmd = JavaMicrobenchmarkHarnessCommand(self.build)
+        benchmark_names = suite_cmd.list_benchmarks()
+        for benchmark_name in benchmark_names:
+            yield "{}".format(benchmark_name)
+
+    @property
+    def suites(self):
+        """ Returns all suite for a runner. """
+        suite_name = "JavaBenchmark"
+        suite = self.suite(suite_name)
+
+        # Filter may exclude all benchmarks
+        if not suite:
+            logger.debug("Suite {} executed but no results"
+                         .format(suite_name))
+            return
+
+        yield suite
+
+    @staticmethod
+    def from_rev_or_path(src, root, rev_or_path, maven_conf, **kwargs):
+        """ Returns a BenchmarkRunner from a path or a git revision.
+
+        First, it checks if `rev_or_path` is a valid path (or string) of a json
+        object that can deserialize to a BenchmarkRunner. If so, it initialize
+        a StaticBenchmarkRunner from it. This allows memoizing the result of a
+        run in a file or a string.
+
+        Second, it checks if `rev_or_path` points to a valid Maven build
+        directory.  If so, it creates a JavaenchmarkRunner with this existing
+        MavenBuild.
+
+        Otherwise, it assumes `rev_or_path` is a revision and clone/checkout
+        the given revision and create a fresh MavenBuild.
+        """
+        build = None
+        if StaticBenchmarkRunner.is_json_result(rev_or_path):
+            return StaticBenchmarkRunner.from_json(rev_or_path, **kwargs)
+        elif MavenBuild.is_build_dir(rev_or_path):
+            maven_def = JavaMavenDefinition(rev_or_path, maven_conf)
+            return JavaBenchmarkRunner(maven_def.build(rev_or_path), **kwargs)
+        else:
+            # Revisions can references remote via the `/` character, ensure
+            # that the revision is path friendly
+            path_rev = rev_or_path.replace("/", "_")
+            root_rev = os.path.join(root, path_rev)
+            os.mkdir(root_rev)
+
+            clone_dir = os.path.join(root_rev, "arrow")
+            # Possibly checkout the sources at given revision, no need to
+            # perform cleanup on cloned repository as root_rev is reclaimed.
+            src_rev, _ = src.at_revision(rev_or_path, clone_dir)
+            maven_def = JavaMavenDefinition(src_rev.java, maven_conf)
+            build_dir = os.path.join(root_rev, "arrow/java")
+            return JavaBenchmarkRunner(maven_def.build(build_dir), **kwargs)
diff --git a/dev/archery/archery/cli.py b/dev/archery/archery/cli.py
index a79ed1a8ace..cd7ec595e44 100644
--- a/dev/archery/archery/cli.py
+++ b/dev/archery/archery/cli.py
@@ -27,7 +27,7 @@
 
 from .benchmark.codec import JsonEncoder
 from .benchmark.compare import RunnerComparator, DEFAULT_THRESHOLD
-from .benchmark.runner import CppBenchmarkRunner
+from .benchmark.runner import CppBenchmarkRunner, JavaBenchmarkRunner
 from .lang.cpp import CppCMakeDefinition, CppConfiguration
 from .utils.lint import linter, python_numpydoc, LintValidationException
 from .utils.logger import logger, ctx as log_ctx
@@ -124,7 +124,7 @@ def java_toolchain_options(cmd):
     options = [
         click.option("--java-home", metavar="<java_home>",
                      help="Path to Java Developers Kit."),
-        click.option("--java-flags", help="java compiler flags."),
+        click.option("--java-options", help="java compiler options."),
     ]
     return _apply_options(cmd, options)
 
@@ -384,12 +384,15 @@ def check_language(ctx, param, value):
         click.option("--language", metavar="<lang>", type=str, default="cpp",
                      show_default=True, callback=check_language,
                      help="Specify target language for the benchmark"),
-        click.option("--mvn-extras", type=str, multiple=True,
-                     help="Extra flags/options to pass to mvn. "
-                     "Can be stacked"),
+        click.option("--build-extras", type=str, multiple=True,
+                     help="Extra flags/options to pass to mvn build. "
+                     "Can be stackd. For language=java"),
+        click.option("--benchmark-extras", type=str, multiple=True,
+                     help="Extra flags/options to pass to mvn benchmark. "
+                     "Can be stackd. For language=java"),
         click.option("--cmake-extras", type=str, multiple=True,
                      help="Extra flags/options to pass to cmake invocation. "
-                     "Can be stacked"),
+                     "Can be stacked. For language=cpp"),
     ]
 
     cmd = java_toolchain_options(cmd)
@@ -415,9 +418,23 @@ def benchmark_filter_options(cmd):
 @benchmark_common_options
 @click.pass_context
 def benchmark_list(ctx, rev_or_path, src, preserve, output, cmake_extras,
-                   **kwargs):
+                   language, build_extras, benchmark_extras, java_home,
+                   java_options, **kwargs):
     """ List benchmark suite.
     """
+    if language == "cpp":
+        ctx.forward(benchmark_list_cpp, **kwargs)
+    elif language == "java":
+        ctx.forward(benchmark_list_java, **kwargs)
+
+
+@benchmark.command(name="list_cpp")
+@click.pass_context
+def benchmark_list_cpp(ctx, rev_or_path, src, preserve, output, cmake_extras,
+                       language, build_extras, benchmark_extras, java_home,
+                       java_options, **kwargs):
+    """ List cpp benchmark suite.
+    """
     with tmpdir(preserve=preserve) as root:
         logger.debug("Running benchmark {}".format(rev_or_path))
 
@@ -431,6 +448,28 @@ def benchmark_list(ctx, rev_or_path, src, preserve, output, cmake_extras,
             click.echo(b, file=output)
 
 
+@benchmark.command(name="list_java")
+@click.pass_context
+def benchmark_list_java(ctx, rev_or_path, src, preserve, output, language,
+                        java_home, java_options, build_extras, benchmark_extras,
+                        cmake_extras, cc, cxx, cxx_flags, cpp_package_prefix,
+                        **kwargs):
+    """ List java benchmark suite.
+    """
+    with tmpdir(preserve=preserve) as root:
+        logger.debug("Running benchmark {}".format(rev_or_path))
+
+        conf = JavaBenchmarkRunner.default_configuration(
+            java_home=java_home, java_options=java_options,
+            build_extras=build_extras, benchmark_extras=benchmark_extras, **kwargs)
+
+        runner_base = JavaBenchmarkRunner.from_rev_or_path(
+            src, root, rev_or_path, conf)
+
+        for b in runner_base.list_benchmarks:
+            click.echo(b, file=output)
+
+
 @benchmark.command(name="run", short_help="Run benchmark suite")
 @click.argument("rev_or_path", metavar="[<rev_or_path>]",
                 default="WORKSPACE", required=False)
@@ -441,7 +480,8 @@ def benchmark_list(ctx, rev_or_path, src, preserve, output, cmake_extras,
                     "may improve result precision."))
 @click.pass_context
 def benchmark_run(ctx, rev_or_path, src, preserve, output, cmake_extras,
-                  suite_filter, benchmark_filter, repetitions, **kwargs):
+                  language, suite_filter, benchmark_filter, repetations,
+                  **kwargs):
     """ Run benchmark suite.
 
     This command will run the benchmark suite for a single build. This is
@@ -474,6 +514,20 @@ def benchmark_run(ctx, rev_or_path, src, preserve, output, cmake_extras,
     \b
     archery benchmark run --output=run.json
     """
+    if language == "cpp":
+        ctx.forward(benchmark_run_cpp, **kwargs)
+    elif language == "java":
+        ctx.forward(benchmark_run_java, **kwargs)
+
+
+@benchmark.command(name="run_cpp")
+@click.pass_context
+def benchmark_run_cpp(ctx, rev_or_path, src, preserve, output, cmake_extras,
+                      language, build_extras, benchmark_extras, java_home,
+                      java_options, suite_filter, benchmark_filter, **kwargs):
+    """ Run cpp benchmark suite.
+    """
+
     with tmpdir(preserve=preserve) as root:
         logger.debug("Running benchmark {}".format(rev_or_path))
 
@@ -488,6 +542,29 @@ def benchmark_run(ctx, rev_or_path, src, preserve, output, cmake_extras,
         json.dump(runner_base, output, cls=JsonEncoder)
 
 
+@benchmark.command(name="run_java")
+@click.pass_context
+def benchmark_run_java(ctx, rev_or_path, src, preserve, output, language,
+                       java_home, java_options, build_extras, benchmark_extras,
+                       cmake_extras, cc, cxx, cxx_flags, cpp_package_prefix,
+                       suite_filter, benchmark_filter, **kwargs):
+    """ Run Java benchmark suite.
+    """
+
+    with tmpdir(preserve=preserve) as root:
+        logger.debug("Running benchmark {}".format(rev_or_path))
+
+        conf = JavaBenchmarkRunner.default_configuration(
+            java_home=java_home, java_options=java_options,
+            build_extras=build_extras, benchmark_extras=benchmark_extras, **kwargs)
+
+        runner_base = JavaBenchmarkRunner.from_rev_or_path(
+            src, root, rev_or_path, conf,
+            benchmark_filter=benchmark_filter)
+
+        json.dump(runner_base, output, cls=JsonEncoder)
+
+
 @benchmark.command(name="diff", short_help="Compare benchmark suites")
 @benchmark_common_options
 @benchmark_filter_options
@@ -588,7 +665,7 @@ def benchmark_diff(ctx, src, preserve, output, language, cmake_extras,
 def benchmark_diff_cpp(ctx, src, preserve, output, language, cmake_extras,
                        suite_filter, benchmark_filter,
                        repetitions, threshold, contender, baseline,
-                       mvn_extras, java_home, java_flags,
+                       build_extras, benchmark_extras, java_home, java_options,
                        **kwargs):
     """Compare (diff) cpp benchmark runs."""
 
@@ -623,14 +700,38 @@ def benchmark_diff_cpp(ctx, src, preserve, output, language, cmake_extras,
 @benchmark.command(name="diff_java")
 @click.pass_context
 def benchmark_diff_java(ctx, src, preserve, output, language,
+                        java_home, java_options, build_extras, benchmark_extras,
                         suite_filter, benchmark_filter,
                         repetitions, threshold, contender, baseline,
-                        cmake_extras, cc, cxx, cxx_flags, cxx_package_prefix,
+                        cmake_extras, cc, cxx, cxx_flags, cpp_package_prefix,
                         **kwargs):
     """Compare (diff) java benchmark runs."""
 
-    print("Not supported yet")
-    exit(1)
+    with tmpdir(preserve=preserve) as root:
+        logger.debug("Comparing {} (contender) with {} (baseline)"
+                     .format(contender, baseline))
+
+        conf = JavaBenchmarkRunner.default_configuration(
+            java_home=java_home, java_options=java_options,
+            build_extras=build_extras, benchmark_extras=benchmark_extras,
+            **kwargs)
+
+        runner_cont = JavaBenchmarkRunner.from_rev_or_path(
+            src, root, contender, conf,
+            repetitions=repetitions,
+            benchmark_filter=benchmark_filter)
+        runner_base = JavaBenchmarkRunner.from_rev_or_path(
+            src, root, baseline, conf,
+            repetitions=repetitions,
+            benchmark_filter=benchmark_filter)
+
+        runner_comp = RunnerComparator(runner_cont, runner_base, threshold)
+
+        # TODO(kszucs): test that the output is properly formatted jsonlines
+        comparisons_json = _get_comparisons_as_json(runner_comp.comparisons)
+        formatted = _format_comparisons_with_pandas(comparisons_json)
+        output.write(formatted)
+        output.write('\n')
 
 
 def _get_comparisons_as_json(comparisons):
@@ -652,6 +753,7 @@ def _format_comparisons_with_pandas(comparisons_json, no_counters):
     fields = ['benchmark', 'baseline', 'contender', 'change %']
     if not no_counters:
         fields += ['counters']
+        # fields += ['configurations']
 
     df = df[fields].sort_values(by='change %', ascending=False)
 
diff --git a/dev/archery/archery/lang/java.py b/dev/archery/archery/lang/java.py
index 24743b67fd7..472fb800f22 100644
--- a/dev/archery/archery/lang/java.py
+++ b/dev/archery/archery/lang/java.py
@@ -15,7 +15,10 @@
 # specific language governing permissions and limitations
 # under the License.
 
+import os
+
 from ..utils.command import Command, CommandStackMixin, default_bin
+from ..utils.maven import MavenDefinition
 
 
 class Java(Command):
@@ -28,3 +31,48 @@ def __init__(self, jar, *args, **kwargs):
         self.jar = jar
         self.argv = ("-jar", jar)
         Java.__init__(self, *args, **kwargs)
+
+
+class JavaConfiguration:
+    def __init__(self,
+
+                 # toolchain
+                 java_home=None, java_options=None,
+                 # build & benchmark
+                 build_extras=None, benchmark_extras=None):
+        self.java_home = java_home
+        self.java_options = java_options
+
+        self.build_extras = build_extras
+        self.benchmark_extras = benchmark_extras
+
+    @property
+    def build_definitions(self):
+        extras = list(self.build_extras) if self.build_extras else []
+        return extras
+
+    @property
+    def benchmark_definitions(self):
+        extras = list(self.benchmark_extras) if self.benchmark_extras else []
+        return extras
+
+    @property
+    def environment(self):
+        env = os.environ.copy()
+
+        if self.java_home:
+            env["JAVA_HOME"] = self.java_home
+
+        if self.java_options:
+            env["JAVA_OPTIONS"] = self.java_options
+
+        return env
+
+
+class JavaMavenDefinition(MavenDefinition):
+    def __init__(self, source, conf, **kwargs):
+        self.configuration = conf
+        super().__init__(source, **kwargs,
+                         build_definitions=conf.build_definitions,
+                         benchmark_definitions=conf.benchmark_definitions,
+                         env=conf.environment)
diff --git a/dev/archery/archery/utils/maven.py b/dev/archery/archery/utils/maven.py
new file mode 100644
index 00000000000..a750c6972fc
--- /dev/null
+++ b/dev/archery/archery/utils/maven.py
@@ -0,0 +1,200 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import os
+
+from .command import Command, default_bin
+
+
+class Maven(Command):
+    def __init__(self, maven_bin=None):
+        self.bin = default_bin(maven_bin, "mvn")
+
+
+maven = Maven()
+
+
+class MavenDefinition:
+    """ MavenDefinition captures the maven invocation arguments.
+
+    It allows creating build directories with the same definition, e.g.
+    ```
+    build_1 = maven_def.build("/tmp/build-1")
+    build_2 = maven_def.build("/tmp/build-2")
+
+    ...
+
+    build1.install()
+    build2.install()
+    """
+
+    def __init__(self, source, build_definitions=None,
+                 benchmark_definitions=None, env=None):
+        """ Initialize a MavenDefinition
+
+        Parameters
+        ----------
+        source : str
+                 Source directory where the top-level pom.xml is
+                 located. This is usually the root of the project.
+        build_definitions: list(str), optional
+        benchmark_definitions: list(str), optional
+        """
+        self.source = os.path.abspath(source)
+        self.build_definitions = build_definitions if build_definitions else []
+        self.benchmark_definitions = benchmark_definitions if benchmark_definitions else []
+        self.env = env
+
+    @property
+    def build_arguments(self):
+        """" Return the arguments to maven invocation for build. """
+        arguments = self.build_definitions + [
+            "-B", "-DskipTests", "-Drat.skip=true",
+            "-Dorg.slf4j.simpleLogger.log.org.apache.maven.cli.transfer.Slf4jMavenTransferListener=warn",
+            "-T", "2C", "install"
+        ]
+        return arguments
+
+    def build(self, build_dir, force=False, cmd_kwargs=None, **kwargs):
+        """ Invoke maven into a build directory.
+
+        Parameters
+        ----------
+        build_dir : str
+                    Directory in which the Maven build will be instantiated.
+        force : bool
+                not used now
+        """
+        if os.path.exists(build_dir):
+            # Extra safety to ensure we're deleting a build folder.
+            if not MavenBuild.is_build_dir(build_dir):
+                raise FileExistsError(
+                    "{} is not a maven build".format(build_dir)
+                )
+
+        cmd_kwargs = cmd_kwargs if cmd_kwargs else {}
+        maven(*self.build_arguments, cwd=build_dir, env=self.env, **cmd_kwargs)
+        return MavenBuild(build_dir, definition=self, **kwargs)
+
+    @property
+    def list_arguments(self):
+        """" Return the arguments to maven invocation for list """
+        arguments = [
+            "-Dskip.perf.benchmarks=false", "-Dbenchmark.list=-lp", "install"
+        ]
+        return arguments
+
+    @property
+    def benchmark_arguments(self):
+        """" Return the arguments to maven invocation for benchmark """
+        arguments = self.benchmark_definitions + [
+            "-Dskip.perf.benchmarks=false", "-Dbenchmark.fork=1",
+            "-Dbenchmark.jvmargs=\"-Darrow.enable_null_check_for_get=false -Darrow.enable_unsafe_memory_access=true\"",
+            "install"
+        ]
+        return arguments
+
+    def __repr__(self):
+        return "MavenDefinition[source={}]".format(self.source)
+
+
+class MavenBuild(Maven):
+    """ MavenBuild represents a build directory initialized by maven.
+
+    The build instance can be used to build/test/install. It alleviates the
+    user to know which generator is used.
+    """
+
+    def __init__(self, build_dir, definition=None):
+        """ Initialize a MavenBuild.
+
+        The caller must ensure that maven was invoked in the build directory.
+
+        Parameters
+        ----------
+        definition : MavenDefinition
+                     The definition to build from.
+        build_dir : str
+                    The build directory to setup into.
+        """
+        assert MavenBuild.is_build_dir(build_dir)
+        super().__init__()
+        self.build_dir = os.path.abspath(build_dir)
+        self.definition = definition
+
+    @property
+    def binaries_dir(self):
+        return self.build_dir
+
+    def run(self, *argv, verbose=False, cwd=None, **kwargs):
+        extra = []
+        if verbose:
+            extra.append("-X")
+        if cwd is None:
+            cwd = self.build_dir
+        # Commands must be ran under the directory where pom.xml exists
+        return super().run(*extra, *argv, **kwargs, cwd=cwd)
+
+    def build(self, *argv, verbose=False, **kwargs):
+        definition_args = self.definition.build_arguments
+        cwd = self.binaries_dir
+        return self.run(*argv, *definition_args, verbose=verbose, cwd=cwd,
+                        env=self.definition.env, **kwargs)
+
+    def list(self, *argv, verbose=False, **kwargs):
+        definition_args = self.definition.list_arguments
+        cwd = self.binaries_dir + "/performance"
+        return self.run(*argv, *definition_args, verbose=verbose, cwd=cwd,
+                        env=self.definition.env, **kwargs)
+
+    def benchmark(self, *argv, verbose=False, **kwargs):
+        definition_args = self.definition.benchmark_arguments
+        cwd = self.binaries_dir + "/performance"
+        return self.run(*argv, *definition_args, verbose=verbose, cwd=cwd,
+                        env=self.definition.env, **kwargs)
+
+    @staticmethod
+    def is_build_dir(path):
+        """ Indicate if a path is Maven top directory.
+
+        This method only checks for the existence of paths and does not do any
+        validation whatsoever.
+        """
+        pom_xml = os.path.join(path, "pom.xml")
+        performance_dir = os.path.join(path, "performance")
+        return os.path.exists(pom_xml) and os.path.isdir(performance_dir)
+
+    @staticmethod
+    def from_path(path):
+        """ Instantiate a Maven from a path.
+
+        This is used to recover from an existing physical directory (created
+        with or without Maven).
+
+        Note that this method is not idempotent as the original definition will
+        be lost.
+        """
+        if not MavenBuild.is_build_dir(path):
+            raise ValueError("Not a valid MavenBuild path: {}".format(path))
+
+        return MavenBuild(path, definition=none)
+
+    def __repr__(self):
+        return ("MavenBuild["
+                "build = {},"
+                "definition = {}]".format(self.build_dir,
+                                          self.definition))
diff --git a/dev/archery/archery/utils/source.py b/dev/archery/archery/utils/source.py
index d30b4f152e5..f7e47a5a1b6 100644
--- a/dev/archery/archery/utils/source.py
+++ b/dev/archery/archery/utils/source.py
@@ -68,6 +68,11 @@ def dev(self):
         """ Returns the dev directory of an Arrow sources. """
         return self.path / "dev"
 
+    @property
+    def java(self):
+        """ Returns the java directory of an Arrow sources. """
+        return self.path / "java"
+
     @property
     def python(self):
         """ Returns the python directory of an Arrow sources. """
diff --git a/java/performance/pom.xml b/java/performance/pom.xml
index dffe7f2cbd2..81e268d9769 100644
--- a/java/performance/pom.xml
+++ b/java/performance/pom.xml
@@ -99,8 +99,12 @@
         <skip.perf.benchmarks>true</skip.perf.benchmarks>
         <benchmark.filter>.*</benchmark.filter>
         <benchmark.forks>1</benchmark.forks>
+        <benchmark.jvmargs> </benchmark.jvmargs>
         <benchmark.warmups>5</benchmark.warmups>
         <benchmark.runs>5</benchmark.runs>
+        <benchmark.list> </benchmark.list>
+        <benchmark.resultfile>jmh-result.json</benchmark.resultfile>
+        <benchmark.resultformat>json</benchmark.resultformat>
     </properties>
 
     <build>
@@ -169,10 +173,17 @@
                         <argument>${benchmark.filter}</argument>
                         <argument>-f</argument>
                         <argument>${benchmark.forks}</argument>
+                        <argument>-jvmArgs</argument>
+                        <argument>${benchmark.jvmargs}</argument>
                         <argument>-wi</argument>
                         <argument>${benchmark.warmups}</argument>
                         <argument>-i</argument>
                         <argument>${benchmark.runs}</argument>
+                        <argument>${benchmark.list}</argument>
+                        <argument>-rff</argument>
+                        <argument>${benchmark.resultfile}</argument>
+                        <argument>-rf</argument>
+                        <argument>${benchmark.resultformat}</argument>
                     </arguments>
                 </configuration>
             </plugin>

From 8148205302b4c5a6951f3bb3acb0c75dbb18ed2a Mon Sep 17 00:00:00 2001
From: Kazuaki Ishizaki <ishizaki@jp.ibm.com>
Date: Fri, 13 Nov 2020 03:43:16 +0900
Subject: [PATCH 05/14] fix typo

---
 dev/archery/archery/benchmark/google.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dev/archery/archery/benchmark/google.py b/dev/archery/archery/benchmark/google.py
index c1644dcbd9c..51d81dccde3 100644
--- a/dev/archery/archery/benchmark/google.py
+++ b/dev/archery/archery/benchmark/google.py
@@ -161,7 +161,7 @@ def __init__(self, name, runs):
         super().__init__(name, unit, less_is_better, values, time_unit, times)
 
     def __repr__(self):
-        return "GoogleBenchmark[name={},runs={}]".format(self.names, self.runs)
+        return "GoogleBenchmark[name={},runs={}]".format(self.name, self.runs)
 
     @classmethod
     def from_json(cls, payload):

From 31f7c0bb1b9ea6f477e73024f05e0a9cf15fe249 Mon Sep 17 00:00:00 2001
From: Kazuaki Ishizaki <ishizaki@jp.ibm.com>
Date: Fri, 13 Nov 2020 10:32:21 +0900
Subject: [PATCH 06/14] fix lint errors

remove unnecessary comments
---
 dev/archery/archery/benchmark/jmh.py    | 26 ++++++++++++-------------
 dev/archery/archery/benchmark/runner.py |  3 ++-
 2 files changed, 14 insertions(+), 15 deletions(-)

diff --git a/dev/archery/archery/benchmark/jmh.py b/dev/archery/archery/benchmark/jmh.py
index 4a4a6369bf5..e2d43e7b7bf 100644
--- a/dev/archery/archery/benchmark/jmh.py
+++ b/dev/archery/archery/benchmark/jmh.py
@@ -38,7 +38,6 @@ class JavaMicrobenchmarkHarnessCommand(Maven):
     """
 
     def __init__(self, build, benchmark_filter=None):
-        #self.performance_dir = build.binaries_dir + "/performance"
         self.benchmark_filter = benchmark_filter
         self.build = build
         self.maven = Maven()
@@ -47,9 +46,8 @@ def list_benchmarks(self):
         argv = []
         if self.benchmark_filter:
             argv.append("-Dbenchmark.filter={}".format(self.benchmark_filter))
-        #result = self.maven.run(*argv, cwd=self.performance_dir,
-        #                        stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-        result = self.build.list(*argv, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+        result = self.build.list(
+            *argv, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
 
         """ Extract benchmark names from output. Assume the following output
           ...
@@ -86,7 +84,6 @@ def results(self, repetitions=1):
                     "-Dbenchmark.filter={}".format(self.benchmark_filter)
                 )
 
-            #self.maven.run(*argv, cwd=self.performance_dir, check=True)
             self.build.benchmark(*argv, check=True)
             return json.load(out)
 
@@ -100,18 +97,18 @@ def __init__(self, benchmark, primaryMetric,
         self.name = benchmark
         self.primaryMetric = primaryMetric
         self.score = primaryMetric["score"]
-        self.scoreUnit =  primaryMetric["scoreUnit"]
+        self.scoreUnit = primaryMetric["scoreUnit"]
         self.forks = forks
         self.warmups = warmupIterations
         self.runs = measurementIterations
         self.counters = {
-            "mode" : counters["mode"],
-            "threads" : counters["threads"],
-            "warmups" : warmupIterations,
-            "warmupTime" : counters["warmupTime"],
-            "measurements" : measurementIterations,
-            "measurementTime" : counters["measurementTime"],
-            "jvmArgs" : counters["jvmArgs"]
+            "mode": counters["mode"],
+            "threads": counters["threads"],
+            "warmups": warmupIterations,
+            "warmupTime": counters["warmupTime"],
+            "measurements": measurementIterations,
+            "measurementTime": counters["measurementTime"],
+            "jvmArgs": counters["jvmArgs"]
         }
 
     @property
@@ -161,6 +158,7 @@ def from_json(cls, payload):
         def group_key(x):
             return x.name
 
-        benchmarks = map(lambda x: JavaMicrobenchmarkHarnessObservation(**x), payload)
+        benchmarks = map(
+            lambda x: JavaMicrobenchmarkHarnessObservation(**x), payload)
         groups = groupby(sorted(benchmarks, key=group_key), group_key)
         return [cls(k, list(bs)) for k, bs in groups]
diff --git a/dev/archery/archery/benchmark/runner.py b/dev/archery/archery/benchmark/runner.py
index a0880537280..b842fee9c3a 100644
--- a/dev/archery/archery/benchmark/runner.py
+++ b/dev/archery/archery/benchmark/runner.py
@@ -239,7 +239,8 @@ def suite(self, name):
         # update .m2 directory, which installs target jars
         self.build.build()
 
-        suite_cmd = JavaMicrobenchmarkHarnessCommand(self.build, self.benchmark_filter)
+        suite_cmd = JavaMicrobenchmarkHarnessCommand(
+            self.build, self.benchmark_filter)
 
         # Ensure there will be data
         benchmark_names = suite_cmd.list_benchmarks()

From da46fca4de7701886f43e1807a17a7ec5dc57273 Mon Sep 17 00:00:00 2001
From: Kazuaki Ishizaki <ishizaki@jp.ibm.com>
Date: Fri, 13 Nov 2020 11:41:40 +0900
Subject: [PATCH 07/14] fix lint errors

---
 dev/archery/archery/benchmark/jmh.py    |  3 ++-
 dev/archery/archery/benchmark/runner.py |  1 -
 dev/archery/archery/cli.py              | 16 +++++++++-------
 dev/archery/archery/utils/maven.py      | 11 +++++++----
 4 files changed, 18 insertions(+), 13 deletions(-)

diff --git a/dev/archery/archery/benchmark/jmh.py b/dev/archery/archery/benchmark/jmh.py
index e2d43e7b7bf..11a2d0e0d77 100644
--- a/dev/archery/archery/benchmark/jmh.py
+++ b/dev/archery/archery/benchmark/jmh.py
@@ -151,7 +151,8 @@ def __init__(self, name, runs):
         super().__init__(name, unit, less_is_better, values)
 
     def __repr__(self):
-        return "JavaMicrobenchmarkHarness[name={},runs={}]".format(self.name, self.runs)
+        return "JavaMicrobenchmarkHarness[name={},runs={}]".format(
+            self.name, self.runs)
 
     @classmethod
     def from_json(cls, payload):
diff --git a/dev/archery/archery/benchmark/runner.py b/dev/archery/archery/benchmark/runner.py
index b842fee9c3a..06aedbba75f 100644
--- a/dev/archery/archery/benchmark/runner.py
+++ b/dev/archery/archery/benchmark/runner.py
@@ -292,7 +292,6 @@ def from_rev_or_path(src, root, rev_or_path, maven_conf, **kwargs):
         Otherwise, it assumes `rev_or_path` is a revision and clone/checkout
         the given revision and create a fresh MavenBuild.
         """
-        build = None
         if StaticBenchmarkRunner.is_json_result(rev_or_path):
             return StaticBenchmarkRunner.from_json(rev_or_path, **kwargs)
         elif MavenBuild.is_build_dir(rev_or_path):
diff --git a/dev/archery/archery/cli.py b/dev/archery/archery/cli.py
index cd7ec595e44..be7737de9e9 100644
--- a/dev/archery/archery/cli.py
+++ b/dev/archery/archery/cli.py
@@ -451,9 +451,9 @@ def benchmark_list_cpp(ctx, rev_or_path, src, preserve, output, cmake_extras,
 @benchmark.command(name="list_java")
 @click.pass_context
 def benchmark_list_java(ctx, rev_or_path, src, preserve, output, language,
-                        java_home, java_options, build_extras, benchmark_extras,
-                        cmake_extras, cc, cxx, cxx_flags, cpp_package_prefix,
-                        **kwargs):
+                        java_home, java_options, build_extras,
+                        benchmark_extras, cmake_extras, cc, cxx, cxx_flags,
+                        cpp_package_prefix, **kwargs):
     """ List java benchmark suite.
     """
     with tmpdir(preserve=preserve) as root:
@@ -461,7 +461,8 @@ def benchmark_list_java(ctx, rev_or_path, src, preserve, output, language,
 
         conf = JavaBenchmarkRunner.default_configuration(
             java_home=java_home, java_options=java_options,
-            build_extras=build_extras, benchmark_extras=benchmark_extras, **kwargs)
+            build_extras=build_extras, benchmark_extras=benchmark_extras,
+            **kwargs)
 
         runner_base = JavaBenchmarkRunner.from_rev_or_path(
             src, root, rev_or_path, conf)
@@ -556,7 +557,8 @@ def benchmark_run_java(ctx, rev_or_path, src, preserve, output, language,
 
         conf = JavaBenchmarkRunner.default_configuration(
             java_home=java_home, java_options=java_options,
-            build_extras=build_extras, benchmark_extras=benchmark_extras, **kwargs)
+            build_extras=build_extras, benchmark_extras=benchmark_extras,
+            **kwargs)
 
         runner_base = JavaBenchmarkRunner.from_rev_or_path(
             src, root, rev_or_path, conf,
@@ -700,8 +702,8 @@ def benchmark_diff_cpp(ctx, src, preserve, output, language, cmake_extras,
 @benchmark.command(name="diff_java")
 @click.pass_context
 def benchmark_diff_java(ctx, src, preserve, output, language,
-                        java_home, java_options, build_extras, benchmark_extras,
-                        suite_filter, benchmark_filter,
+                        java_home, java_options, build_extras,
+                        benchmark_extras, suite_filter, benchmark_filter,
                         repetitions, threshold, contender, baseline,
                         cmake_extras, cc, cxx, cxx_flags, cpp_package_prefix,
                         **kwargs):
diff --git a/dev/archery/archery/utils/maven.py b/dev/archery/archery/utils/maven.py
index a750c6972fc..6b027f0015e 100644
--- a/dev/archery/archery/utils/maven.py
+++ b/dev/archery/archery/utils/maven.py
@@ -56,7 +56,8 @@ def __init__(self, source, build_definitions=None,
         """
         self.source = os.path.abspath(source)
         self.build_definitions = build_definitions if build_definitions else []
-        self.benchmark_definitions = benchmark_definitions if benchmark_definitions else []
+        self.benchmark_definitions =\
+            benchmark_definitions if benchmark_definitions else []
         self.env = env
 
     @property
@@ -64,7 +65,8 @@ def build_arguments(self):
         """" Return the arguments to maven invocation for build. """
         arguments = self.build_definitions + [
             "-B", "-DskipTests", "-Drat.skip=true",
-            "-Dorg.slf4j.simpleLogger.log.org.apache.maven.cli.transfer.Slf4jMavenTransferListener=warn",
+            "-Dorg.slf4j.simpleLogger.log.org.apache.maven.cli.transfer."\
+            "Slf4jMavenTransferListener=warn",
             "-T", "2C", "install"
         ]
         return arguments
@@ -103,7 +105,8 @@ def benchmark_arguments(self):
         """" Return the arguments to maven invocation for benchmark """
         arguments = self.benchmark_definitions + [
             "-Dskip.perf.benchmarks=false", "-Dbenchmark.fork=1",
-            "-Dbenchmark.jvmargs=\"-Darrow.enable_null_check_for_get=false -Darrow.enable_unsafe_memory_access=true\"",
+            "-Dbenchmark.jvmargs=\"-Darrow.enable_null_check_for_get=false"\
+            "-Darrow.enable_unsafe_memory_access=true\"",
             "install"
         ]
         return arguments
@@ -191,7 +194,7 @@ def from_path(path):
         if not MavenBuild.is_build_dir(path):
             raise ValueError("Not a valid MavenBuild path: {}".format(path))
 
-        return MavenBuild(path, definition=none)
+        return MavenBuild(path, definition=None)
 
     def __repr__(self):
         return ("MavenBuild["

From c17f50cf2c75313e716ec99c09840ffa675d8c48 Mon Sep 17 00:00:00 2001
From: Kazuaki Ishizaki <ishizaki@jp.ibm.com>
Date: Fri, 13 Nov 2020 12:56:54 +0900
Subject: [PATCH 08/14] fix lint errors

---
 dev/archery/archery/utils/maven.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/dev/archery/archery/utils/maven.py b/dev/archery/archery/utils/maven.py
index 6b027f0015e..f18fb0b2da0 100644
--- a/dev/archery/archery/utils/maven.py
+++ b/dev/archery/archery/utils/maven.py
@@ -65,7 +65,7 @@ def build_arguments(self):
         """" Return the arguments to maven invocation for build. """
         arguments = self.build_definitions + [
             "-B", "-DskipTests", "-Drat.skip=true",
-            "-Dorg.slf4j.simpleLogger.log.org.apache.maven.cli.transfer."\
+            "-Dorg.slf4j.simpleLogger.log.org.apache.maven.cli.transfer."
             "Slf4jMavenTransferListener=warn",
             "-T", "2C", "install"
         ]
@@ -105,7 +105,7 @@ def benchmark_arguments(self):
         """" Return the arguments to maven invocation for benchmark """
         arguments = self.benchmark_definitions + [
             "-Dskip.perf.benchmarks=false", "-Dbenchmark.fork=1",
-            "-Dbenchmark.jvmargs=\"-Darrow.enable_null_check_for_get=false"\
+            "-Dbenchmark.jvmargs=\"-Darrow.enable_null_check_for_get=false"
             "-Darrow.enable_unsafe_memory_access=true\"",
             "install"
         ]

From e924ba2dc13b172cb33baf4a56b44518faa98f3c Mon Sep 17 00:00:00 2001
From: Kazuaki Ishizaki <ishizaki@jp.ibm.com>
Date: Sat, 10 Apr 2021 09:08:04 +0000
Subject: [PATCH 09/14] address trivial comments

---
 dev/archery/archery/benchmark/jmh.py    | 30 ++++++++++++-------------
 dev/archery/archery/benchmark/runner.py |  2 +-
 dev/archery/archery/cli.py              |  6 ++---
 3 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/dev/archery/archery/benchmark/jmh.py b/dev/archery/archery/benchmark/jmh.py
index 11a2d0e0d77..aa80240a816 100644
--- a/dev/archery/archery/benchmark/jmh.py
+++ b/dev/archery/archery/benchmark/jmh.py
@@ -21,6 +21,7 @@
 from tempfile import NamedTemporaryFile
 
 from .core import Benchmark
+from ..utils.command import Command
 from ..utils.maven import Maven
 
 
@@ -30,11 +31,11 @@ def partition(pred, iterable):
     return list(filter(pred, t1)), list(filterfalse(pred, t2))
 
 
-class JavaMicrobenchmarkHarnessCommand(Maven):
+class JavaMicrobenchmarkHarnessCommand(Command):
     """ Run a Java Micro Benchmark Harness
 
     This assumes the binary supports the standard command line options,
-    notably `--benchmark_filter`, `--benchmark_format`, etc...
+    notably `-Dbenchmark_filter`
     """
 
     def __init__(self, build, benchmark_filter=None):
@@ -42,6 +43,17 @@ def __init__(self, build, benchmark_filter=None):
         self.build = build
         self.maven = Maven()
 
+    """ Extract benchmark names from output between "Benchmarks:" and "[INFO]".
+    Assume the following output:
+      ...
+      Benchmarks:
+      org.apache.arrow.vector.IntBenchmarks.setIntDirectly
+      ...
+      org.apache.arrow.vector.IntBenchmarks.setWithValueHolder
+      org.apache.arrow.vector.IntBenchmarks.setWithWriter
+      ...
+      [INFO]
+    """
     def list_benchmarks(self):
         argv = []
         if self.benchmark_filter:
@@ -49,17 +61,6 @@ def list_benchmarks(self):
         result = self.build.list(
             *argv, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
 
-        """ Extract benchmark names from output. Assume the following output
-          ...
-          Benchmarks:
-          org.apache.arrow.vector.IntBenchmarks.setIntDirectly
-          ...
-          org.apache.arrow.vector.IntBenchmarks.setWithValueHolder
-          org.apache.arrow.vector.IntBenchmarks.setWithWriter
-          ...
-          [INFO]
-        """
-
         lists = []
         benchmarks = False
         for line in str.splitlines(result.stdout.decode("utf-8")):
@@ -70,7 +71,6 @@ def list_benchmarks(self):
                 if line.startswith("org.apache.arrow"):
                     lists.append(line)
                 if line.startswith("[INFO]"):
-                    benchmarks = False
                     break
         return lists
 
@@ -151,7 +151,7 @@ def __init__(self, name, runs):
         super().__init__(name, unit, less_is_better, values)
 
     def __repr__(self):
-        return "JavaMicrobenchmarkHarness[name={},runs={}]".format(
+        return "JavaMicrobenchmark[name={},runs={}]".format(
             self.name, self.runs)
 
     @classmethod
diff --git a/dev/archery/archery/benchmark/runner.py b/dev/archery/archery/benchmark/runner.py
index 06aedbba75f..13bc95d2ef5 100644
--- a/dev/archery/archery/benchmark/runner.py
+++ b/dev/archery/archery/benchmark/runner.py
@@ -286,7 +286,7 @@ def from_rev_or_path(src, root, rev_or_path, maven_conf, **kwargs):
         run in a file or a string.
 
         Second, it checks if `rev_or_path` points to a valid Maven build
-        directory.  If so, it creates a JavaenchmarkRunner with this existing
+        directory.  If so, it creates a JavaBenchmarkRunner with this existing
         MavenBuild.
 
         Otherwise, it assumes `rev_or_path` is a revision and clone/checkout
diff --git a/dev/archery/archery/cli.py b/dev/archery/archery/cli.py
index be7737de9e9..056bb300058 100644
--- a/dev/archery/archery/cli.py
+++ b/dev/archery/archery/cli.py
@@ -367,7 +367,7 @@ def benchmark(ctx):
 
 def benchmark_common_options(cmd):
     def check_language(ctx, param, value):
-        if value != "cpp" and value != "java":
+        if value not in {"cpp", "java"}:
             raise click.BadParameter("cpp or java is supported now")
         return value
 
@@ -386,10 +386,10 @@ def check_language(ctx, param, value):
                      help="Specify target language for the benchmark"),
         click.option("--build-extras", type=str, multiple=True,
                      help="Extra flags/options to pass to mvn build. "
-                     "Can be stackd. For language=java"),
+                     "Can be stacked. For language=java"),
         click.option("--benchmark-extras", type=str, multiple=True,
                      help="Extra flags/options to pass to mvn benchmark. "
-                     "Can be stackd. For language=java"),
+                     "Can be stacked. For language=java"),
         click.option("--cmake-extras", type=str, multiple=True,
                      help="Extra flags/options to pass to cmake invocation. "
                      "Can be stacked. For language=cpp"),

From 6e1161bcabcfad14c7301ed9d92ef6243162a8ff Mon Sep 17 00:00:00 2001
From: Kazuaki Ishizaki <ishizaki@jp.ibm.com>
Date: Sat, 10 Apr 2021 10:24:20 +0000
Subject: [PATCH 10/14] address trivial comments

---
 dev/archery/archery/lang/java.py | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/dev/archery/archery/lang/java.py b/dev/archery/archery/lang/java.py
index 472fb800f22..6cda0194aae 100644
--- a/dev/archery/archery/lang/java.py
+++ b/dev/archery/archery/lang/java.py
@@ -43,18 +43,16 @@ def __init__(self,
         self.java_home = java_home
         self.java_options = java_options
 
-        self.build_extras = build_extras
-        self.benchmark_extras = benchmark_extras
+        self.build_extras = list(build_extras) if build_extras else []
+        self.benchmark_extras = list(self.benchmark_extras) if self.benchmark_extras else []
 
     @property
     def build_definitions(self):
-        extras = list(self.build_extras) if self.build_extras else []
-        return extras
+        return self.build_extras
 
     @property
     def benchmark_definitions(self):
-        extras = list(self.benchmark_extras) if self.benchmark_extras else []
-        return extras
+        return self.benchmark_extras
 
     @property
     def environment(self):

From 513a7ca6b20869f6b78c424e8b1d9fbe926a9b7f Mon Sep 17 00:00:00 2001
From: Kazuaki Ishizaki <ishizaki@jp.ibm.com>
Date: Sat, 10 Apr 2021 10:45:55 +0000
Subject: [PATCH 11/14] normalize unit

---
 dev/archery/archery/benchmark/compare.py |  8 ------
 dev/archery/archery/benchmark/jmh.py     | 36 ++++++++++++++++++++++--
 2 files changed, 33 insertions(+), 11 deletions(-)

diff --git a/dev/archery/archery/benchmark/compare.py b/dev/archery/archery/benchmark/compare.py
index 46633525490..622b8017917 100644
--- a/dev/archery/archery/benchmark/compare.py
+++ b/dev/archery/archery/benchmark/compare.py
@@ -54,14 +54,6 @@ def formatter_for_unit(unit):
         return bytes_per_seconds_fmt
     elif unit == "items_per_second":
         return items_per_seconds_fmt
-    elif unit.startswith("ops/"):
-        def ops_per_time_fmt(value):
-            return "{:.3f} {}".format(value, unit)
-        return ops_per_time_fmt
-    elif unit.endswith("/op"):
-        def time_per_op_fmt(value):
-            return "{:.3f} {}".format(value, unit)
-        return time_per_op_fmt
     else:
         return lambda x: x
 
diff --git a/dev/archery/archery/benchmark/jmh.py b/dev/archery/archery/benchmark/jmh.py
index aa80240a816..c3e9ca90ff2 100644
--- a/dev/archery/archery/benchmark/jmh.py
+++ b/dev/archery/archery/benchmark/jmh.py
@@ -110,15 +110,45 @@ def __init__(self, benchmark, primaryMetric,
             "measurementTime": counters["measurementTime"],
             "jvmArgs": counters["jvmArgs"]
         }
+        self.reciprocalValue = True if self.scoreUnit.endswith("/op") else False
+        if self.scoreUnit.startswith("ops/"):
+            idx = self.scoreUnit.find("/")
+            self.normalizePerSec(self.scoreUnit[idx+1:])
+        elif self.scoreUnit.endswith("/op"):
+            idx = self.scoreUnit.find("/")
+            self.normalizePerSec(self.scoreUnit[:idx])
+        else:
+            self.normalizeFactor = 1
 
     @property
     def value(self):
         """ Return the benchmark value."""
-        return self.score
+        val = 1 / self.score if self.reciprocalValue else self.score
+        return val * self.normalzeFactor
+
+    def normalizePerSec(self, unit):
+        if unit == "ns":
+            self.normalizeFactor = 1000 * 1000 * 1000
+        elif unit == "us":
+            self.normalizeFactor = 1000 * 1000
+        elif unit == "ms":
+            self.normalizeFactor = 1000
+        elif unit == "min":
+            self.normalizeFactor = 1 / 60
+        elif unit == "hr":
+            self.normalizeFactor = 1 / (60 * 60)
+        elif unit == "day":
+            self.normalizeFactor = 1 / (60 * 60 * 24)
+        else:
+            self.normalizeFactor = 1
 
     @property
     def unit(self):
-        return self.scoreUnit
+        if self.scoreUnit.startswith("ops/"):
+            return "bytes_per_second"
+        elif self.scoreUnit.endswith("/op"):
+            return "items_per_second"
+        return "?"
 
     def __repr__(self):
         return str(self.value)
@@ -144,7 +174,7 @@ def __init__(self, name, runs):
         self.name = name
         self.runs = sorted(runs, key=lambda b: b.value)
         unit = self.runs[0].unit
-        less_is_better = unit.endswith("/op")
+        less_is_better = False
         values = [b.value for b in self.runs]
         # Slight kludge to extract the UserCounters for each benchmark
         self.counters = self.runs[0].counters

From 152255874a0ccf96852080d5cddbc1e971018c6e Mon Sep 17 00:00:00 2001
From: Kazuaki Ishizaki <ishizaki@jp.ibm.com>
Date: Sat, 10 Apr 2021 16:47:28 +0000
Subject: [PATCH 12/14] refactor to reduce more splits

---
 dev/archery/archery/cli.py         | 209 +++++++++--------------------
 dev/archery/archery/lang/java.py   |   2 +-
 dev/archery/archery/utils/maven.py |   1 +
 3 files changed, 64 insertions(+), 148 deletions(-)

diff --git a/dev/archery/archery/cli.py b/dev/archery/archery/cli.py
index 056bb300058..55fa6856689 100644
--- a/dev/archery/archery/cli.py
+++ b/dev/archery/archery/cli.py
@@ -141,6 +141,7 @@ def _apply_options(cmd, options):
               help="Specify Arrow source directory")
 # toolchain
 @cpp_toolchain_options
+@java_toolchain_options
 @click.option("--build-type", default=None, type=build_type,
               help="CMake's CMAKE_BUILD_TYPE")
 @click.option("--warn-level", default="production", type=warn_level_type,
@@ -418,54 +419,30 @@ def benchmark_filter_options(cmd):
 @benchmark_common_options
 @click.pass_context
 def benchmark_list(ctx, rev_or_path, src, preserve, output, cmake_extras,
-                   language, build_extras, benchmark_extras, java_home,
-                   java_options, **kwargs):
+                   java_home, java_options, build_extras, benchmark_extras,
+                   language, **kwargs):
     """ List benchmark suite.
     """
-    if language == "cpp":
-        ctx.forward(benchmark_list_cpp, **kwargs)
-    elif language == "java":
-        ctx.forward(benchmark_list_java, **kwargs)
-
-
-@benchmark.command(name="list_cpp")
-@click.pass_context
-def benchmark_list_cpp(ctx, rev_or_path, src, preserve, output, cmake_extras,
-                       language, build_extras, benchmark_extras, java_home,
-                       java_options, **kwargs):
-    """ List cpp benchmark suite.
-    """
     with tmpdir(preserve=preserve) as root:
         logger.debug("Running benchmark {}".format(rev_or_path))
 
-        conf = CppBenchmarkRunner.default_configuration(
-            cmake_extras=cmake_extras, **kwargs)
-
-        runner_base = CppBenchmarkRunner.from_rev_or_path(
-            src, root, rev_or_path, conf)
-
-        for b in runner_base.list_benchmarks:
-            click.echo(b, file=output)
-
+        if language == "cpp":
+            conf = CppBenchmarkRunner.default_configuration(
+                cmake_extras=cmake_extras, **kwargs)
 
-@benchmark.command(name="list_java")
-@click.pass_context
-def benchmark_list_java(ctx, rev_or_path, src, preserve, output, language,
-                        java_home, java_options, build_extras,
-                        benchmark_extras, cmake_extras, cc, cxx, cxx_flags,
-                        cpp_package_prefix, **kwargs):
-    """ List java benchmark suite.
-    """
-    with tmpdir(preserve=preserve) as root:
-        logger.debug("Running benchmark {}".format(rev_or_path))
+            runner_base = CppBenchmarkRunner.from_rev_or_path(
+                src, root, rev_or_path, conf)
 
-        conf = JavaBenchmarkRunner.default_configuration(
-            java_home=java_home, java_options=java_options,
-            build_extras=build_extras, benchmark_extras=benchmark_extras,
-            **kwargs)
+        elif language == "java":
+            for key in { 'cpp_package_prefix', 'cxx_flags', 'cxx', 'cc'}:
+                del kwargs[key]
+            conf = JavaBenchmarkRunner.default_configuration(
+                java_home=java_home, java_options=java_options,
+                build_extras=build_extras, benchmark_extras=benchmark_extras,
+                **kwargs)
 
-        runner_base = JavaBenchmarkRunner.from_rev_or_path(
-            src, root, rev_or_path, conf)
+            runner_base = JavaBenchmarkRunner.from_rev_or_path(
+                src, root, rev_or_path, conf)
 
         for b in runner_base.list_benchmarks:
             click.echo(b, file=output)
@@ -515,54 +492,28 @@ def benchmark_run(ctx, rev_or_path, src, preserve, output, cmake_extras,
     \b
     archery benchmark run --output=run.json
     """
-    if language == "cpp":
-        ctx.forward(benchmark_run_cpp, **kwargs)
-    elif language == "java":
-        ctx.forward(benchmark_run_java, **kwargs)
-
-
-@benchmark.command(name="run_cpp")
-@click.pass_context
-def benchmark_run_cpp(ctx, rev_or_path, src, preserve, output, cmake_extras,
-                      language, build_extras, benchmark_extras, java_home,
-                      java_options, suite_filter, benchmark_filter, **kwargs):
-    """ Run cpp benchmark suite.
-    """
-
     with tmpdir(preserve=preserve) as root:
         logger.debug("Running benchmark {}".format(rev_or_path))
 
-        conf = CppBenchmarkRunner.default_configuration(
-            cmake_extras=cmake_extras, **kwargs)
-
-        runner_base = CppBenchmarkRunner.from_rev_or_path(
-            src, root, rev_or_path, conf,
-            repetitions=repetitions,
-            suite_filter=suite_filter, benchmark_filter=benchmark_filter)
-
-        json.dump(runner_base, output, cls=JsonEncoder)
-
-
-@benchmark.command(name="run_java")
-@click.pass_context
-def benchmark_run_java(ctx, rev_or_path, src, preserve, output, language,
-                       java_home, java_options, build_extras, benchmark_extras,
-                       cmake_extras, cc, cxx, cxx_flags, cpp_package_prefix,
-                       suite_filter, benchmark_filter, **kwargs):
-    """ Run Java benchmark suite.
-    """
+        if language == "cpp":
+            conf = CppBenchmarkRunner.default_configuration(
+                cmake_extras=cmake_extras, **kwargs)
 
-    with tmpdir(preserve=preserve) as root:
-        logger.debug("Running benchmark {}".format(rev_or_path))
+            runner_base = CppBenchmarkRunner.from_rev_or_path(
+                src, root, rev_or_path, conf,
+                repetitions=repetitions,
+                suite_filter=suite_filter, benchmark_filter=benchmark_filter)
 
-        conf = JavaBenchmarkRunner.default_configuration(
-            java_home=java_home, java_options=java_options,
-            build_extras=build_extras, benchmark_extras=benchmark_extras,
-            **kwargs)
+        elif language == "java":
+            conf = JavaBenchmarkRunner.default_configuration(
+                java_home=java_home, java_options=java_options,
+                build_extras=build_extras, benchmark_extras=benchmark_extras,
+                **kwargs)
 
-        runner_base = JavaBenchmarkRunner.from_rev_or_path(
-            src, root, rev_or_path, conf,
-            benchmark_filter=benchmark_filter)
+            runner_base = JavaBenchmarkRunner.from_rev_or_path(
+                src, root, rev_or_path, conf,
+                repetitions=repetitions,
+                benchmark_filter=benchmark_filter)
 
         json.dump(runner_base, output, cls=JsonEncoder)
 
@@ -656,38 +607,39 @@ def benchmark_diff(ctx, src, preserve, output, language, cmake_extras,
     # This should not recompute the benchmark from run.json
     archery --quiet benchmark diff WORKSPACE run.json > result.json
     """
-    if language == "cpp":
-        ctx.forward(benchmark_diff_cpp, **kwargs)
-    elif language == "java":
-        ctx.forward(benchmark_diff_java, **kwargs)
-
-
-@benchmark.command(name="diff_cpp")
-@click.pass_context
-def benchmark_diff_cpp(ctx, src, preserve, output, language, cmake_extras,
-                       suite_filter, benchmark_filter,
-                       repetitions, threshold, contender, baseline,
-                       build_extras, benchmark_extras, java_home, java_options,
-                       **kwargs):
-    """Compare (diff) cpp benchmark runs."""
-
     with tmpdir(preserve=preserve) as root:
         logger.debug("Comparing {} (contender) with {} (baseline)"
                      .format(contender, baseline))
 
-        conf = CppBenchmarkRunner.default_configuration(
-            cmake_extras=cmake_extras, **kwargs)
-
-        runner_cont = CppBenchmarkRunner.from_rev_or_path(
-            src, root, contender, conf,
-            repetitions=repetitions,
-            suite_filter=suite_filter,
-            benchmark_filter=benchmark_filter)
-        runner_base = CppBenchmarkRunner.from_rev_or_path(
-            src, root, baseline, conf,
-            repetitions=repetitions,
-            suite_filter=suite_filter,
-            benchmark_filter=benchmark_filter)
+        if language == "cpp":
+            conf = CppBenchmarkRunner.default_configuration(
+                cmake_extras=cmake_extras, **kwargs)
+
+            runner_cont = CppBenchmarkRunner.from_rev_or_path(
+                src, root, contender, conf,
+                repetitions=repetitions,
+                suite_filter=suite_filter,
+                benchmark_filter=benchmark_filter)
+            runner_base = CppBenchmarkRunner.from_rev_or_path(
+                src, root, baseline, conf,
+                repetitions=repetitions,
+                suite_filter=suite_filter,
+                benchmark_filter=benchmark_filter)
+
+        elif language == "java":
+            conf = JavaBenchmarkRunner.default_configuration(
+                java_home=java_home, java_options=java_options,
+                build_extras=build_extras, benchmark_extras=benchmark_extras,
+                **kwargs)
+
+            runner_cont = JavaBenchmarkRunner.from_rev_or_path(
+                src, root, contender, conf,
+                repetitions=repetitions,
+                benchmark_filter=benchmark_filter)
+            runner_base = JavaBenchmarkRunner.from_rev_or_path(
+                src, root, baseline, conf,
+                repetitions=repetitions,
+                benchmark_filter=benchmark_filter)
 
         runner_comp = RunnerComparator(runner_cont, runner_base, threshold)
 
@@ -699,43 +651,6 @@ def benchmark_diff_cpp(ctx, src, preserve, output, language, cmake_extras,
         output.write('\n')
 
 
-@benchmark.command(name="diff_java")
-@click.pass_context
-def benchmark_diff_java(ctx, src, preserve, output, language,
-                        java_home, java_options, build_extras,
-                        benchmark_extras, suite_filter, benchmark_filter,
-                        repetitions, threshold, contender, baseline,
-                        cmake_extras, cc, cxx, cxx_flags, cpp_package_prefix,
-                        **kwargs):
-    """Compare (diff) java benchmark runs."""
-
-    with tmpdir(preserve=preserve) as root:
-        logger.debug("Comparing {} (contender) with {} (baseline)"
-                     .format(contender, baseline))
-
-        conf = JavaBenchmarkRunner.default_configuration(
-            java_home=java_home, java_options=java_options,
-            build_extras=build_extras, benchmark_extras=benchmark_extras,
-            **kwargs)
-
-        runner_cont = JavaBenchmarkRunner.from_rev_or_path(
-            src, root, contender, conf,
-            repetitions=repetitions,
-            benchmark_filter=benchmark_filter)
-        runner_base = JavaBenchmarkRunner.from_rev_or_path(
-            src, root, baseline, conf,
-            repetitions=repetitions,
-            benchmark_filter=benchmark_filter)
-
-        runner_comp = RunnerComparator(runner_cont, runner_base, threshold)
-
-        # TODO(kszucs): test that the output is properly formatted jsonlines
-        comparisons_json = _get_comparisons_as_json(runner_comp.comparisons)
-        formatted = _format_comparisons_with_pandas(comparisons_json)
-        output.write(formatted)
-        output.write('\n')
-
-
 def _get_comparisons_as_json(comparisons):
     buf = StringIO()
     for comparator in comparisons:
diff --git a/dev/archery/archery/lang/java.py b/dev/archery/archery/lang/java.py
index 6cda0194aae..20a9594df68 100644
--- a/dev/archery/archery/lang/java.py
+++ b/dev/archery/archery/lang/java.py
@@ -44,7 +44,7 @@ def __init__(self,
         self.java_options = java_options
 
         self.build_extras = list(build_extras) if build_extras else []
-        self.benchmark_extras = list(self.benchmark_extras) if self.benchmark_extras else []
+        self.benchmark_extras = list(benchmark_extras) if benchmark_extras else []
 
     @property
     def build_definitions(self):
diff --git a/dev/archery/archery/utils/maven.py b/dev/archery/archery/utils/maven.py
index f18fb0b2da0..1eea11e4c95 100644
--- a/dev/archery/archery/utils/maven.py
+++ b/dev/archery/archery/utils/maven.py
@@ -89,6 +89,7 @@ def build(self, build_dir, force=False, cmd_kwargs=None, **kwargs):
                 )
 
         cmd_kwargs = cmd_kwargs if cmd_kwargs else {}
+        assert MavenBuild.is_build_dir(build_dir)
         maven(*self.build_arguments, cwd=build_dir, env=self.env, **cmd_kwargs)
         return MavenBuild(build_dir, definition=self, **kwargs)
 

From 150d19c4a8636ee091345f543814f2f8b243f1a3 Mon Sep 17 00:00:00 2001
From: Kazuaki Ishizaki <ishizaki@jp.ibm.com>
Date: Mon, 12 Apr 2021 10:01:41 +0000
Subject: [PATCH 13/14] updates

---
 dev/archery/archery/benchmark/google.py |  2 +-
 dev/archery/archery/benchmark/jmh.py    | 40 ++++++++++++++-----------
 dev/archery/archery/benchmark/runner.py |  2 +-
 dev/archery/archery/cli.py              | 37 ++++++++++++++++-------
 dev/archery/archery/utils/maven.py      |  2 +-
 5 files changed, 52 insertions(+), 31 deletions(-)

diff --git a/dev/archery/archery/benchmark/google.py b/dev/archery/archery/benchmark/google.py
index 51d81dccde3..c1644dcbd9c 100644
--- a/dev/archery/archery/benchmark/google.py
+++ b/dev/archery/archery/benchmark/google.py
@@ -161,7 +161,7 @@ def __init__(self, name, runs):
         super().__init__(name, unit, less_is_better, values, time_unit, times)
 
     def __repr__(self):
-        return "GoogleBenchmark[name={},runs={}]".format(self.name, self.runs)
+        return "GoogleBenchmark[name={},runs={}]".format(self.names, self.runs)
 
     @classmethod
     def from_json(cls, payload):
diff --git a/dev/archery/archery/benchmark/jmh.py b/dev/archery/archery/benchmark/jmh.py
index c3e9ca90ff2..c093ec37fb7 100644
--- a/dev/archery/archery/benchmark/jmh.py
+++ b/dev/archery/archery/benchmark/jmh.py
@@ -74,7 +74,7 @@ def list_benchmarks(self):
                     break
         return lists
 
-    def results(self, repetitions=1):
+    def results(self, repetitions):
         with NamedTemporaryFile(suffix=".json") as out:
             argv = ["-Dbenchmark.runs={}".format(repetitions),
                     "-Dbenchmark.resultfile={}".format(out.name),
@@ -97,7 +97,7 @@ def __init__(self, benchmark, primaryMetric,
         self.name = benchmark
         self.primaryMetric = primaryMetric
         self.score = primaryMetric["score"]
-        self.scoreUnit = primaryMetric["scoreUnit"]
+        self.score_unit = primaryMetric["scoreUnit"]
         self.forks = forks
         self.warmups = warmupIterations
         self.runs = measurementIterations
@@ -110,21 +110,21 @@ def __init__(self, benchmark, primaryMetric,
             "measurementTime": counters["measurementTime"],
             "jvmArgs": counters["jvmArgs"]
         }
-        self.reciprocalValue = True if self.scoreUnit.endswith("/op") else False
-        if self.scoreUnit.startswith("ops/"):
-            idx = self.scoreUnit.find("/")
-            self.normalizePerSec(self.scoreUnit[idx+1:])
-        elif self.scoreUnit.endswith("/op"):
-            idx = self.scoreUnit.find("/")
-            self.normalizePerSec(self.scoreUnit[:idx])
+        self.reciprocal_value = True if self.score_unit.endswith("/op") else False
+        if self.score_unit.startswith("ops/"):
+            idx = self.score_unit.find("/")
+            self.normalizePerSec(self.score_unit[idx+1:])
+        elif self.score_unit.endswith("/op"):
+            idx = self.score_unit.find("/")
+            self.normalizePerSec(self.score_unit[:idx])
         else:
             self.normalizeFactor = 1
 
     @property
     def value(self):
         """ Return the benchmark value."""
-        val = 1 / self.score if self.reciprocalValue else self.score
-        return val * self.normalzeFactor
+        val = 1 / self.score if self.reciprocal_value else self.score
+        return val * self.normalizeFactor
 
     def normalizePerSec(self, unit):
         if unit == "ns":
@@ -144,11 +144,12 @@ def normalizePerSec(self, unit):
 
     @property
     def unit(self):
-        if self.scoreUnit.startswith("ops/"):
-            return "bytes_per_second"
-        elif self.scoreUnit.endswith("/op"):
+        if self.score_unit.startswith("ops/"):
             return "items_per_second"
-        return "?"
+        elif self.score_unit.endswith("/op"):
+            return "items_per_second"
+        else:
+            return "?"
 
     def __repr__(self):
         return str(self.value)
@@ -174,11 +175,14 @@ def __init__(self, name, runs):
         self.name = name
         self.runs = sorted(runs, key=lambda b: b.value)
         unit = self.runs[0].unit
-        less_is_better = False
+        time_unit = "N/A"
+        less_is_better = not unit.endswith("per_second")
         values = [b.value for b in self.runs]
+        times = []
         # Slight kludge to extract the UserCounters for each benchmark
-        self.counters = self.runs[0].counters
-        super().__init__(name, unit, less_is_better, values)
+        counters = self.runs[0].counters
+        super().__init__(name, unit, less_is_better, values, time_unit, times,
+                         counters)
 
     def __repr__(self):
         return "JavaMicrobenchmark[name={},runs={}]".format(
diff --git a/dev/archery/archery/benchmark/runner.py b/dev/archery/archery/benchmark/runner.py
index 13bc95d2ef5..fc6d354b180 100644
--- a/dev/archery/archery/benchmark/runner.py
+++ b/dev/archery/archery/benchmark/runner.py
@@ -223,10 +223,10 @@ def from_rev_or_path(src, root, rev_or_path, cmake_conf, **kwargs):
 class JavaBenchmarkRunner(BenchmarkRunner):
     """ Run suites for Java. """
 
+    # default repetitions is 5 for Java microbenchmark harness
     def __init__(self, build, **kwargs):
         """ Initialize a JavaBenchmarkRunner. """
         self.build = build
-        self.repetitions = 5  # default repetitions for Java
         super().__init__(**kwargs)
 
     @staticmethod
diff --git a/dev/archery/archery/cli.py b/dev/archery/archery/cli.py
index 55fa6856689..8bc6a805f57 100644
--- a/dev/archery/archery/cli.py
+++ b/dev/archery/archery/cli.py
@@ -393,7 +393,7 @@ def check_language(ctx, param, value):
                      "Can be stacked. For language=java"),
         click.option("--cmake-extras", type=str, multiple=True,
                      help="Extra flags/options to pass to cmake invocation. "
-                     "Can be stacked. For language=cpp"),
+                     "Can be stacked. For language=cpp")
     ]
 
     cmd = java_toolchain_options(cmd)
@@ -417,6 +417,7 @@ def benchmark_filter_options(cmd):
 @click.argument("rev_or_path", metavar="[<rev_or_path>]",
                 default="WORKSPACE", required=False)
 @benchmark_common_options
+@benchmark_filter_options
 @click.pass_context
 def benchmark_list(ctx, rev_or_path, src, preserve, output, cmake_extras,
                    java_home, java_options, build_extras, benchmark_extras,
@@ -453,12 +454,14 @@ def benchmark_list(ctx, rev_or_path, src, preserve, output, cmake_extras,
                 default="WORKSPACE", required=False)
 @benchmark_common_options
 @benchmark_filter_options
-@click.option("--repetitions", type=int, default=1, show_default=True,
+@click.option("--repetitions", type=int, default=-1,
               help=("Number of repetitions of each benchmark. Increasing "
-                    "may improve result precision."))
+                    "may improve result precision. "
+                    "[default: 1 for cpp, 5 for java"))
 @click.pass_context
 def benchmark_run(ctx, rev_or_path, src, preserve, output, cmake_extras,
-                  language, suite_filter, benchmark_filter, repetations,
+                  java_home, java_options, build_extras, benchmark_extras,
+                  language, suite_filter, benchmark_filter, repetitions,
                   **kwargs):
     """ Run benchmark suite.
 
@@ -499,17 +502,21 @@ def benchmark_run(ctx, rev_or_path, src, preserve, output, cmake_extras,
             conf = CppBenchmarkRunner.default_configuration(
                 cmake_extras=cmake_extras, **kwargs)
 
+            repetitions = repetitions if repetitions != -1 else 1
             runner_base = CppBenchmarkRunner.from_rev_or_path(
                 src, root, rev_or_path, conf,
                 repetitions=repetitions,
                 suite_filter=suite_filter, benchmark_filter=benchmark_filter)
 
         elif language == "java":
+            for key in { 'cpp_package_prefix', 'cxx_flags', 'cxx', 'cc'}:
+                del kwargs[key]
             conf = JavaBenchmarkRunner.default_configuration(
                 java_home=java_home, java_options=java_options,
                 build_extras=build_extras, benchmark_extras=benchmark_extras,
                 **kwargs)
 
+            repetitions = repetitions if repetitions != -1 else 5
             runner_base = JavaBenchmarkRunner.from_rev_or_path(
                 src, root, rev_or_path, conf,
                 repetitions=repetitions,
@@ -526,7 +533,8 @@ def benchmark_run(ctx, rev_or_path, src, preserve, output, cmake_extras,
               help="Regression failure threshold in percentage.")
 @click.option("--repetitions", type=int, default=1, show_default=True,
               help=("Number of repetitions of each benchmark. Increasing "
-                    "may improve result precision."))
+                    "may improve result precision. "
+                    "[default: 1 for cpp, 5 for java"))
 @click.option("--no-counters", type=BOOL, default=False, is_flag=True,
               help="Hide counters field in diff report.")
 @click.argument("contender", metavar="[<contender>",
@@ -535,7 +543,8 @@ def benchmark_run(ctx, rev_or_path, src, preserve, output, cmake_extras,
                 required=False)
 @click.pass_context
 def benchmark_diff(ctx, src, preserve, output, language, cmake_extras,
-                   suite_filter, benchmark_filter, repitations, no_counters,
+                   suite_filter, benchmark_filter, repetitions, no_counters,
+                   java_home, java_options, build_extras, benchmark_extras,
                    threshold, contender, baseline, **kwargs):
     """Compare (diff) benchmark runs.
 
@@ -615,6 +624,7 @@ def benchmark_diff(ctx, src, preserve, output, language, cmake_extras,
             conf = CppBenchmarkRunner.default_configuration(
                 cmake_extras=cmake_extras, **kwargs)
 
+            repetitions = repetitions if repetitions != -1 else 1
             runner_cont = CppBenchmarkRunner.from_rev_or_path(
                 src, root, contender, conf,
                 repetitions=repetitions,
@@ -627,11 +637,14 @@ def benchmark_diff(ctx, src, preserve, output, language, cmake_extras,
                 benchmark_filter=benchmark_filter)
 
         elif language == "java":
+            for key in { 'cpp_package_prefix', 'cxx_flags', 'cxx', 'cc'}:
+                del kwargs[key]
             conf = JavaBenchmarkRunner.default_configuration(
                 java_home=java_home, java_options=java_options,
                 build_extras=build_extras, benchmark_extras=benchmark_extras,
                 **kwargs)
 
+            repetitions = repetitions if repetitions != -1 else 5
             runner_cont = JavaBenchmarkRunner.from_rev_or_path(
                 src, root, contender, conf,
                 repetitions=repetitions,
@@ -645,8 +658,9 @@ def benchmark_diff(ctx, src, preserve, output, language, cmake_extras,
 
         # TODO(kszucs): test that the output is properly formatted jsonlines
         comparisons_json = _get_comparisons_as_json(runner_comp.comparisons)
+        ren_counters = language == "java"
         formatted = _format_comparisons_with_pandas(comparisons_json,
-                                                    no_counters)
+                                                    no_counters, ren_counters)
         output.write(formatted)
         output.write('\n')
 
@@ -660,7 +674,8 @@ def _get_comparisons_as_json(comparisons):
     return buf.getvalue()
 
 
-def _format_comparisons_with_pandas(comparisons_json, no_counters):
+def _format_comparisons_with_pandas(comparisons_json, no_counters,
+                                    ren_counters):
     import pandas as pd
     df = pd.read_json(StringIO(comparisons_json), lines=True)
     # parse change % so we can sort by it
@@ -670,9 +685,11 @@ def _format_comparisons_with_pandas(comparisons_json, no_counters):
     fields = ['benchmark', 'baseline', 'contender', 'change %']
     if not no_counters:
         fields += ['counters']
-        # fields += ['configurations']
 
-    df = df[fields].sort_values(by='change %', ascending=False)
+    df = df[fields]
+    if ren_counters:
+        df = df.rename(columns={'counters': 'configurations'})
+    df = df.sort_values(by='change %', ascending=False)
 
     def labelled(title, df):
         if len(df) == 0:
diff --git a/dev/archery/archery/utils/maven.py b/dev/archery/archery/utils/maven.py
index 1eea11e4c95..96a3bf5bd99 100644
--- a/dev/archery/archery/utils/maven.py
+++ b/dev/archery/archery/utils/maven.py
@@ -106,7 +106,7 @@ def benchmark_arguments(self):
         """" Return the arguments to maven invocation for benchmark """
         arguments = self.benchmark_definitions + [
             "-Dskip.perf.benchmarks=false", "-Dbenchmark.fork=1",
-            "-Dbenchmark.jvmargs=\"-Darrow.enable_null_check_for_get=false"
+            "-Dbenchmark.jvmargs=\"-Darrow.enable_null_check_for_get=false "
             "-Darrow.enable_unsafe_memory_access=true\"",
             "install"
         ]

From 5d70561ea8d33828b7c5c49d380bafe0e3586225 Mon Sep 17 00:00:00 2001
From: Kazuaki Ishizaki <ishizaki@jp.ibm.com>
Date: Mon, 12 Apr 2021 10:18:24 +0000
Subject: [PATCH 14/14] fix lint errors

---
 dev/archery/archery/benchmark/jmh.py | 4 +++-
 dev/archery/archery/cli.py           | 6 +++---
 dev/archery/archery/lang/java.py     | 3 ++-
 3 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/dev/archery/archery/benchmark/jmh.py b/dev/archery/archery/benchmark/jmh.py
index c093ec37fb7..f531b6de163 100644
--- a/dev/archery/archery/benchmark/jmh.py
+++ b/dev/archery/archery/benchmark/jmh.py
@@ -54,6 +54,7 @@ def __init__(self, build, benchmark_filter=None):
       ...
       [INFO]
     """
+
     def list_benchmarks(self):
         argv = []
         if self.benchmark_filter:
@@ -110,7 +111,8 @@ def __init__(self, benchmark, primaryMetric,
             "measurementTime": counters["measurementTime"],
             "jvmArgs": counters["jvmArgs"]
         }
-        self.reciprocal_value = True if self.score_unit.endswith("/op") else False
+        self.reciprocal_value = True if self.score_unit.endswith(
+            "/op") else False
         if self.score_unit.startswith("ops/"):
             idx = self.score_unit.find("/")
             self.normalizePerSec(self.score_unit[idx+1:])
diff --git a/dev/archery/archery/cli.py b/dev/archery/archery/cli.py
index 8bc6a805f57..06dd6b60370 100644
--- a/dev/archery/archery/cli.py
+++ b/dev/archery/archery/cli.py
@@ -435,7 +435,7 @@ def benchmark_list(ctx, rev_or_path, src, preserve, output, cmake_extras,
                 src, root, rev_or_path, conf)
 
         elif language == "java":
-            for key in { 'cpp_package_prefix', 'cxx_flags', 'cxx', 'cc'}:
+            for key in {'cpp_package_prefix', 'cxx_flags', 'cxx', 'cc'}:
                 del kwargs[key]
             conf = JavaBenchmarkRunner.default_configuration(
                 java_home=java_home, java_options=java_options,
@@ -509,7 +509,7 @@ def benchmark_run(ctx, rev_or_path, src, preserve, output, cmake_extras,
                 suite_filter=suite_filter, benchmark_filter=benchmark_filter)
 
         elif language == "java":
-            for key in { 'cpp_package_prefix', 'cxx_flags', 'cxx', 'cc'}:
+            for key in {'cpp_package_prefix', 'cxx_flags', 'cxx', 'cc'}:
                 del kwargs[key]
             conf = JavaBenchmarkRunner.default_configuration(
                 java_home=java_home, java_options=java_options,
@@ -637,7 +637,7 @@ def benchmark_diff(ctx, src, preserve, output, language, cmake_extras,
                 benchmark_filter=benchmark_filter)
 
         elif language == "java":
-            for key in { 'cpp_package_prefix', 'cxx_flags', 'cxx', 'cc'}:
+            for key in {'cpp_package_prefix', 'cxx_flags', 'cxx', 'cc'}:
                 del kwargs[key]
             conf = JavaBenchmarkRunner.default_configuration(
                 java_home=java_home, java_options=java_options,
diff --git a/dev/archery/archery/lang/java.py b/dev/archery/archery/lang/java.py
index 20a9594df68..bc169adf647 100644
--- a/dev/archery/archery/lang/java.py
+++ b/dev/archery/archery/lang/java.py
@@ -44,7 +44,8 @@ def __init__(self,
         self.java_options = java_options
 
         self.build_extras = list(build_extras) if build_extras else []
-        self.benchmark_extras = list(benchmark_extras) if benchmark_extras else []
+        self.benchmark_extras = list(
+            benchmark_extras) if benchmark_extras else []
 
     @property
     def build_definitions(self):