From 945c133968fa8093b49c19209f03811bdde0bb04 Mon Sep 17 00:00:00 2001 From: Eric Chen Date: Wed, 16 Nov 2022 23:25:11 -0800 Subject: [PATCH 01/13] Adds benchmark cli and its unit tests. --- amazon/ionbenchmark/API.py | 9 + amazon/ionbenchmark/__init__.py | 9 + amazon/ionbenchmark/ion_benchmark_cli.py | 360 +++++++++++++++++++++++ amazon/ionbenchmark/util.py | 14 + requirements.txt | 4 +- tests/benchmark_sample_data/integers.ion | 1 + tests/test_benchmark_cli.py | 193 ++++++++++++ 7 files changed, 589 insertions(+), 1 deletion(-) create mode 100644 amazon/ionbenchmark/API.py create mode 100644 amazon/ionbenchmark/__init__.py create mode 100644 amazon/ionbenchmark/ion_benchmark_cli.py create mode 100644 amazon/ionbenchmark/util.py create mode 100644 tests/benchmark_sample_data/integers.ion create mode 100644 tests/test_benchmark_cli.py diff --git a/amazon/ionbenchmark/API.py b/amazon/ionbenchmark/API.py new file mode 100644 index 000000000..1211cf48a --- /dev/null +++ b/amazon/ionbenchmark/API.py @@ -0,0 +1,9 @@ +from enum import Enum + + +# Serialization/deserialization APIs to benchmark. +class API(Enum): + """Enumeration of the APIs.""" + SIMPLE_ION = 'simpleIon' + ITERATOR = 'iterator' + EVENT = 'event' diff --git a/amazon/ionbenchmark/__init__.py b/amazon/ionbenchmark/__init__.py new file mode 100644 index 000000000..58afa630e --- /dev/null +++ b/amazon/ionbenchmark/__init__.py @@ -0,0 +1,9 @@ +__author__ = 'Amazon.com, Inc.' +__version__ = '0.1.0' + +__all__ = [ + 'API', + 'ion_python_benchmark_cli' +] + +from amazon.ionbenchmark.ion_benchmark_cli import ion_python_benchmark_cli diff --git a/amazon/ionbenchmark/ion_benchmark_cli.py b/amazon/ionbenchmark/ion_benchmark_cli.py new file mode 100644 index 000000000..22655dc4e --- /dev/null +++ b/amazon/ionbenchmark/ion_benchmark_cli.py @@ -0,0 +1,360 @@ +"""A repeatable benchmark tool for ion-python implementation. + +Usage: + ion_python_benchmark_cli.py write [--api ] [--warmups ] [--iterations ] + [--c-extension ] [--binary ] + ion_python_benchmark_cli.py read [--api ] [--warmups ] [--iterations ] + [--c-extension ] + ion_python_benchmark_cli.py generate + ion_python_benchmark_cli.py (-h | --help) + ion_python_benchmark_cli.py (-v | --version) + +Command: + write Benchmark writing the given input file to the given output format(s). In order to isolate + writing from reading, during the setup phase write instructions are generated from the input file + and stored in memory. For large inputs, this can consume a lot of resources and take a long time + to execute. This may be reduced by using the --limit option to limit the number of entries that + are written. The cost of initializing the writer is included in each timed benchmark invocation. + Therefore, it is important to provide data that closely matches the size of the data written by a + single writer instance in the real world to ensure the initialization cost is properly amortized. + + read First, re-write the given input file to the given output format(s) (if necessary), then + benchmark reading the resulting log files. If this takes too long to complete, consider using + the --limit option to limit the number of entries that are read. Specifying non-default settings + for certain options will cause the input data to be re-encoded even if the requested format is the + same as the format of the provided input. These options are --ion-length-preallocation and + --ion-flush-period for input in the ion binary format. The cost of initializing the reader or + DOM loader is included in each timed benchmark invocation. Therefore, it is important to provide + data that closely matches the size of data read by a single reader/loader instance in the real + world to ensure the initialization cost is properly amortized. + + generate (EXPERIMENTAL) Generate random Ion data which can be used as input to the read/write commands. + Data size, data type and the path of output file are required options. The specifications of three + scalar types can be executed so far, decimal, string and timestamp. The command will generate approximately + the amount of data requested, but the actual size of the generated may be slightly larger or smaller than + requested. We don't implement this feature in this implementation. We rely on ion-java-benchmark-cli to + achieve the same outcomes. + +Options: + -h, --help Show this screen. + + --api The API to excise (simpleIon, iterator, nonBlocking). `simpleIon` refer to + simpleIon's load method. `iterator` refers to simpleIon's iterator type got by + setting `parse_eagerly` to false. `nonBlocking` refer to ion-python's event + based non-blocking API. Default to `simpleIon`. + + -w --warmups Number of benchmark warm-up iterations. [default: 10] + + -i --iterations Number of benchmark iterations. [default: 10] + + -c --c-extension If enable C extension, note that it only applys to simpleIon module currently. + [default: True] + + -b --binary If write in binary format. [default: True] + + -n --limit (NOT SUPPORTED YET) Maximum number of entries to process. By default, + all entries in each input file are processed. + + -p --profile (NOT SUPPORTED YET) Initiates a single iteration that repeats indefinitely until + terminated, allowing users to attach profiling tools. If this option is + specified, the --warmups, --iterations, and --forks options are ignored. An + error will be raised if this option is used when multiple values are specified + for other options. Not enabled by default. + + -u --time-unit (NOT SUPPORTED YET) + -o --results-file (NOT SUPPORTED YET) + -f --format (NOT SUPPORTED YET) + -I --ion-imports-for-input (NOT SUPPORTED YET) + +""" +import timeit +import tracemalloc +from pathlib import Path + +import amazon.ion.simpleion as ion +from docopt import docopt +from tabulate import tabulate + +from amazon.ionbenchmark.API import API +from amazon.ionbenchmark.util import str_to_bool, format_percentage, format_decimal, TOOL_VERSION + +BYTES_TO_MB = 1024 * 1024 +_IVM = b'\xE0\x01\x00\xEA' +write_memory_usage_peak = 0 +read_memory_usage_peak = 0 + + +def generate_simpleion_load_test_code(file, profiling, api='simpleIon', single_value=False, emit_bare_values=False, + parse_eagerly=True): + if not profiling: + if api == 'simpleIon': + def test_func(): + with open(file, "br") as fp: + data = ion.load(fp, single_value=single_value, emit_bare_values=emit_bare_values, + parse_eagerly=parse_eagerly) + return data + else: + def test_func(): + with open(file, "br") as fp: + iterator = ion.load(fp, single_value=single_value, emit_bare_values=emit_bare_values, + parse_eagerly=parse_eagerly) + data = list(iterator) + return data + else: + if api == 'simpleIon': + def test_func(): + tracemalloc.start() + with open(file, "br") as fp: + data = ion.load(fp, single_value=single_value, emit_bare_values=emit_bare_values, + parse_eagerly=parse_eagerly) + global read_memory_usage_peak + read_memory_usage_peak = tracemalloc.get_traced_memory()[1] / BYTES_TO_MB + tracemalloc.stop() + return data + else: + def test_func(): + tracemalloc.start() + with open(file, "br") as fp: + iterator = ion.load(fp, single_value=single_value, emit_bare_values=emit_bare_values, + parse_eagerly=parse_eagerly) + data = list(iterator) + global read_memory_usage_peak + read_memory_usage_peak = tracemalloc.get_traced_memory()[1] / BYTES_TO_MB + tracemalloc.stop() + + return data + + return test_func + + +def generate_event_test_code(file): + pass + + +def generate_simpleion_setup(c_ext, gc=True): + rtn = f'import amazon.ion.simpleion as ion;from amazon.ion.simple_types import IonPySymbol; ion.c_ext ={c_ext}; import tracemalloc' + if gc: + rtn += '; import gc; gc.enable()' + return rtn + + +def generate_event_setup(file, gc=True): + pass + + +def read_micro_benchmark_simpleion(iterations, warmups, c_ext, file, profiling): + file_size = Path(file).stat().st_size / BYTES_TO_MB + + setup_with_gc = generate_simpleion_setup(gc=True, c_ext=c_ext) + + test_code = generate_simpleion_load_test_code(file, emit_bare_values=False, profiling=profiling) + test_code_without_wrapper = generate_simpleion_load_test_code(file, emit_bare_values=True, profiling=profiling) + + # warm up + timeit.timeit(stmt=test_code, setup=setup_with_gc, number=warmups) + timeit.timeit(stmt=test_code_without_wrapper, setup=setup_with_gc, number=warmups) + + # iteration + result_with_gc = timeit.timeit(stmt=test_code, setup=setup_with_gc, number=iterations) / iterations + result_with_raw_value = \ + (timeit.timeit(stmt=test_code_without_wrapper, setup=setup_with_gc, number=iterations) / iterations) \ + if c_ext else result_with_gc + + return file_size, result_with_gc, result_with_raw_value + + +def read_micro_benchmark_iterator(iterations, warmups, c_ext, file, profiling): + file_size = Path(file).stat().st_size / BYTES_TO_MB + + # GC refers to reference cycles, not reference count + setup_with_gc = generate_simpleion_setup(gc=True, c_ext=c_ext) + + test_code = generate_simpleion_load_test_code(file, api='iterator', emit_bare_values=False, parse_eagerly=False, + profiling=profiling) + test_code_without_wrapper = generate_simpleion_load_test_code(file, api='iterator', emit_bare_values=True, + parse_eagerly=False, profiling=profiling) + + # warm up + timeit.timeit(stmt=test_code, setup=setup_with_gc, number=warmups) + timeit.timeit(stmt=test_code_without_wrapper, setup=setup_with_gc, number=warmups) + + # iteration + result_with_gc = timeit.timeit(stmt=test_code, setup=setup_with_gc, number=iterations) / iterations + result_with_raw_value = \ + (timeit.timeit(stmt=test_code_without_wrapper, setup=setup_with_gc, number=iterations) / iterations) \ + if c_ext else result_with_gc + + return file_size, result_with_gc, result_with_raw_value + + +def read_micro_benchmark_event(iterations, warmups, c_ext, file=None): + pass + + +def read_micro_benchmark_and_profiling(read_micro_benchmark_function, iterations, warmups, file, c_ext): + if not file: + raise Exception("Invalid file: file can not be none.") + if not read_micro_benchmark_function: + raise Exception("Invalid micro benchmark function: micro benchmark function can not be none.") + + # memory profiling + read_micro_benchmark_function(iterations=1, warmups=0, file=file, c_ext=c_ext, profiling=True) + + # performance benchmark + file_size, result_with_gc, result_with_raw_value = \ + read_micro_benchmark_function(iterations=iterations, warmups=warmups, file=file, c_ext=c_ext, profiling=False) + + # calculate metrics + conversion_time = result_with_gc - result_with_raw_value + # generate report + read_generate_report(file_size, result_with_gc, + conversion_time if conversion_time > 0 else 0, + (conversion_time / result_with_gc) if conversion_time > 0 else 0, + read_memory_usage_peak) + + return file_size, result_with_gc, conversion_time, read_memory_usage_peak + + +def read_generate_report(file_size, total_time, conversion_time, wrapper_time_percentage, memory_usage_peak): + table = [['file_size (MB)', 'total_time (s)', 'conversion_\ntime (s)', 'conversion_time/\ntotal_time (%)', + 'memory_usage_peak (MB)'], + [format_decimal(file_size), + format_decimal(total_time), + format_decimal(conversion_time), + format_percentage(wrapper_time_percentage), + format_decimal(memory_usage_peak)]] + print('\n') + print(tabulate(table, tablefmt='fancy_grid')) + + +def generate_simpleion_dump_test_code(obj, profiling, binary=True): + if not profiling: + def test_func(): + return ion.dumps(obj=obj, binary=binary) + else: + def test_func(): + tracemalloc.start() + data = ion.dumps(obj=obj, binary=binary) + global write_memory_usage_peak + write_memory_usage_peak = tracemalloc.get_traced_memory()[1] / BYTES_TO_MB + tracemalloc.stop() + + return data + + return test_func + + +def write_micro_benchmark_simpleion(iterations, warmups, c_ext, obj, file, binary, profiling): + file_size = Path(file).stat().st_size / BYTES_TO_MB + + # GC refers to reference cycles, not reference count + setup_with_gc = generate_simpleion_setup(gc=True, c_ext=c_ext) + + test_func = generate_simpleion_dump_test_code(obj, profiling=profiling, binary=binary) + + # warm up + timeit.timeit(stmt=test_func, setup=setup_with_gc, number=warmups) + + # iteration + result_with_gc = timeit.timeit(stmt=test_func, setup=setup_with_gc, number=iterations) / iterations + + return file_size, result_with_gc + + +def write_micro_benchmark_iterator(iterations, warmups, c_ext, obj, file, binary, profiling): + file_size = Path(file).stat().st_size / BYTES_TO_MB + + # GC refers to reference cycles, not reference count + setup_with_gc = generate_simpleion_setup(gc=True, c_ext=c_ext) + + test_code = generate_simpleion_dump_test_code(obj, profiling, binary=binary) + + # warm up + timeit.timeit(stmt=test_code, setup=setup_with_gc, number=warmups) + + # iteration + result_with_gc = timeit.timeit(stmt=test_code, setup=setup_with_gc, number=iterations) / iterations + + return file_size, result_with_gc + + +def write_micro_benchmark_event(iterations, warmups, c_ext, obj, binary, file=None): + pass + + +def write_micro_benchmark_and_profiling(write_micro_benchmark_function, iterations, warmups, obj, c_ext, binary, file): + if not obj: + raise Exception("Invalid obj: object can not be none.") + if not write_micro_benchmark_function: + raise Exception("Invalid micro benchmark function: micro benchmark function can not be none.") + # Memory Profiling + write_micro_benchmark_function(iterations=1, warmups=0, obj=obj, c_ext=c_ext, file=file, binary=binary, profiling=True) + + # Performance Benchmark + file_size, result_with_gc = \ + write_micro_benchmark_function(iterations=iterations, warmups=warmups, obj=obj, c_ext=c_ext, file=file, + binary=binary, profiling=False) + + # generate report + write_generate_report(file_size, result_with_gc, write_memory_usage_peak) + + return file_size, result_with_gc, write_memory_usage_peak + + +def write_generate_report(file_size, total_time, memory_usage_peak): + table = [['file_size (MB)', 'total_time (s)', 'memory_usage_peak (MB)'], + [format_decimal(file_size), + format_decimal(total_time), + format_decimal(memory_usage_peak)]] + print('\n') + print(tabulate(table, tablefmt='fancy_grid')) + + +def ion_python_benchmark_cli(arguments): + if arguments['--version'] or arguments['-v']: + print(TOOL_VERSION) + return TOOL_VERSION + elif arguments['generate']: + print('Generate feature is not supported yet') + return 0 + if not arguments['']: + raise Exception('Invalid input file') + file = arguments[''] + iterations = int(arguments['--iterations']) + warmups = int(arguments['--warmups']) + c_ext = str_to_bool(arguments['--c-extension']) + binary = str_to_bool(arguments['--binary']) + + if arguments['read']: + api = arguments['--api'] + if not api or api == API.SIMPLE_ION.value: + read_micro_benchmark_function = read_micro_benchmark_simpleion + elif api == API.ITERATOR.value: + read_micro_benchmark_function = read_micro_benchmark_iterator + elif api == API.EVENT.value: + read_micro_benchmark_function = read_micro_benchmark_event + else: + raise Exception(f'Invalid API option {api}.') + + return read_micro_benchmark_and_profiling(read_micro_benchmark_function, iterations, warmups, file, c_ext) + + elif arguments['write']: + api = arguments['--api'] + if not api or api == API.SIMPLE_ION.value: + write_micro_benchmark_function = write_micro_benchmark_simpleion + elif api == API.ITERATOR.value: + write_micro_benchmark_function = write_micro_benchmark_iterator + elif api == API.EVENT.value: + write_micro_benchmark_function = write_micro_benchmark_event + else: + raise Exception(f'Invalid API option {api}.') + + with open(file) as fp: + obj = ion.load(fp, parse_eagerly=(api != API.ITERATOR.value), single_value=False) + + return write_micro_benchmark_and_profiling(write_micro_benchmark_function, iterations, warmups, obj, c_ext, + binary, file) + + +if __name__ == '__main__': + ion_python_benchmark_cli(docopt(__doc__, help=True)) diff --git a/amazon/ionbenchmark/util.py b/amazon/ionbenchmark/util.py new file mode 100644 index 000000000..28ece9936 --- /dev/null +++ b/amazon/ionbenchmark/util.py @@ -0,0 +1,14 @@ +TOOL_VERSION = '1.0.0' + + +def str_to_bool(v): + return v.lower() in ("true", "1") + + +def format_percentage(v): + return "{:.2%}".format(v) + + +def format_decimal(v): + return "{:.2e}".format(v) + diff --git a/requirements.txt b/requirements.txt index 86f1b54c5..ce83e5f1d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -14,4 +14,6 @@ six==1.16.0 toml==0.10.2 tox==3.23.1 virtualenv==20.4.7 -setuptools<=60.5.0 \ No newline at end of file +setuptools<=60.5.0 +docopt==0.6.2 +tabulate==0.9.0 \ No newline at end of file diff --git a/tests/benchmark_sample_data/integers.ion b/tests/benchmark_sample_data/integers.ion new file mode 100644 index 000000000..56a6051ca --- /dev/null +++ b/tests/benchmark_sample_data/integers.ion @@ -0,0 +1 @@ +1 \ No newline at end of file diff --git a/tests/test_benchmark_cli.py b/tests/test_benchmark_cli.py new file mode 100644 index 000000000..6786033fb --- /dev/null +++ b/tests/test_benchmark_cli.py @@ -0,0 +1,193 @@ +import time +from itertools import chain +from os.path import abspath, join, dirname, getsize + +from docopt import docopt + +from amazon.ion import simpleion +from amazon.ionbenchmark import ion_benchmark_cli +from amazon.ionbenchmark.ion_benchmark_cli import generate_simpleion_load_test_code, generate_simpleion_dump_test_code, \ + read_micro_benchmark_simpleion, BYTES_TO_MB, read_micro_benchmark_iterator, ion_python_benchmark_cli +from amazon.ionbenchmark.util import str_to_bool, TOOL_VERSION +from tests import parametrize +from tests.test_simpleion import generate_scalars_text +from tests.writer_util import SIMPLE_SCALARS_MAP_TEXT + +doc = ion_benchmark_cli.__doc__ + + +@parametrize( + '1', + 'true', + 'True', + 'TRue', +) +def test_str_to_bool_true(p): + assert str_to_bool(p) == True + + +@parametrize( + '0', + '2', + 'false', + '?', + 'test' +) +def test_str_to_bool_true(p): + assert str_to_bool(p) == False + + +def generate_test_path(p): + return join(dirname(abspath(__file__)), 'benchmark_sample_data', p) + + +@parametrize( + generate_test_path('integers.ion') +) +def test_generate_simpleion_load_test_code(path): + actual = generate_simpleion_load_test_code(path, profiling=False, api='simpleIon', single_value=False, + emit_bare_values=False, parse_eagerly=True) + + # make sure we generated the desired load function + with open(path) as fp: + expect = simpleion.load(fp, single_value=False, parse_eagerly=True) + + # make sure the return values are same + assert actual() == expect + + +@parametrize( + *tuple(chain( + generate_scalars_text(SIMPLE_SCALARS_MAP_TEXT), + )) +) +def test_generate_simpleion_dump_test_code(obj): + actual = generate_simpleion_dump_test_code(obj, profiling=False, binary=False) + + # make sure we generated the desired dumps function + expect = simpleion.dumps(obj, binary=False) + + # make sure the return values are same + assert actual() == expect + + +@parametrize( + generate_test_path('integers.ion') +) +def test_read_micro_benchmark_simpleion(f): + file_size, result_with_gc, result_with_raw_value = \ + read_micro_benchmark_simpleion(iterations=1, warmups=0, c_ext=True, file=f, profiling=False) + + assert file_size == getsize(f) / BYTES_TO_MB + assert result_with_gc > 0 + assert result_with_raw_value > 0 + + +@parametrize( + generate_test_path('integers.ion') +) +def test_read_micro_benchmark_iterator(f): + file_size, result_with_gc, result_with_raw_value = \ + read_micro_benchmark_iterator(iterations=1, warmups=0, c_ext=True, file=f, profiling=False) + + assert file_size == getsize(f) / BYTES_TO_MB + assert result_with_gc > 0 + assert result_with_raw_value > 0 + + +def execution_with_command(c): + return ion_python_benchmark_cli(docopt(doc, argv=c)) + + +def test_option_version(): + assert execution_with_command('-v') == TOOL_VERSION + + +def test_option_write(file=generate_test_path('integers.ion')): + # make sure it reads successfully + file_size, result_with_gc, write_memory_usage_peak = \ + execution_with_command(['write', file]) + + assert file_size == getsize(file) / BYTES_TO_MB + assert result_with_gc > 0 + assert write_memory_usage_peak > 0 + + +def test_option_read(file=generate_test_path('integers.ion')): + # make sure it reads successfully + file_size, result_with_gc, conversion_time, read_memory_usage_peak = \ + execution_with_command(['read', file]) + + assert file_size == getsize(file) / BYTES_TO_MB + assert result_with_gc > 0 + assert read_memory_usage_peak > 0 + + +def test_option_write_c_extension(file=generate_test_path('integers.ion')): + file_size, result_with_gc, write_memory_usage_peak = \ + execution_with_command(['write', file, '--c-extension', 'true']) + + file_size_2, result_with_gc_2, write_memory_usage_peak_2 = \ + execution_with_command(['write', file, '--c-extension', 'false']) + + assert file_size == getsize(file) / BYTES_TO_MB + # This is a straightforward way to check if C extension is enabled from the top-level python layer. + # For most of the case, C extension is supposed to be faster than regular python implementation + assert result_with_gc < result_with_gc_2 + assert write_memory_usage_peak < write_memory_usage_peak_2 + + +def test_option_read_c_extension(file=generate_test_path('integers.ion')): + file_size, result_with_gc, conversion_time, read_memory_usage_peak = \ + execution_with_command(['read', file, '--c-extension', 'true']) + + file_size_2, result_with_gc_2, conversion_time_2, read_memory_usage_peak_2 = \ + execution_with_command(['read', file, '--c-extension', 'false']) + + assert file_size == getsize(file) / BYTES_TO_MB + # This is a straightforward way to check if C extension is enabled from the top-level python layer. + # For most of the case, C extension is supposed to be faster than regular python implementation + assert result_with_gc < result_with_gc_2 + + # For load, C extension does not show advantage of memory usage like dump. So we do not have below assertion + # assert read_memory_usage_peak > read_memory_usage_peak_2 + + +def test_option_read_iterations(file=generate_test_path('integers.ion')): + # warmup + execution_with_command(['read', file, '--c-extension', 'true', '--iterations', '10']) + + start = time.perf_counter() + execution_with_command(['read', file, '--c-extension', 'true', '--iterations', '1']) + stop = time.perf_counter() + time_1 = stop - start + + start = time.perf_counter() + execution_with_command(['read', file, '--c-extension', 'true', '--iterations', '100']) + stop = time.perf_counter() + time_2 = stop - start + + # Executing 100 times should be longer than benchmark only once, but don't have to be exact 100x faster. + assert time_2 > time_1 + + +def test_option_write_iterations(file=generate_test_path('integers.ion')): + # warmup + execution_with_command(['write', file, '--c-extension', 'true', '--iterations', '10']) + + start = time.perf_counter() + execution_with_command(['write', file, '--c-extension', 'true', '--iterations', '1']) + stop = time.perf_counter() + time_1 = stop - start + + start = time.perf_counter() + execution_with_command(['write', file, '--c-extension', 'true', '--iterations', '100']) + stop = time.perf_counter() + time_2 = stop - start + + # Executing 100 times should be longer than benchmark only once, but don't have to be exact 100x faster. + assert time_2 > time_1 + + +def test_option_generate(): + assert execution_with_command(['generate']) == 0 From 4a60dd235a0fce4b5022ceeee175280ea4c821cc Mon Sep 17 00:00:00 2001 From: Eric Chen Date: Sat, 10 Dec 2022 15:57:56 -0800 Subject: [PATCH 02/13] refactor --binary to --format. --- amazon/ionbenchmark/Format.py | 7 +++++++ amazon/ionbenchmark/ion_benchmark_cli.py | 11 ++++++----- 2 files changed, 13 insertions(+), 5 deletions(-) create mode 100644 amazon/ionbenchmark/Format.py diff --git a/amazon/ionbenchmark/Format.py b/amazon/ionbenchmark/Format.py new file mode 100644 index 000000000..f5b63d6a7 --- /dev/null +++ b/amazon/ionbenchmark/Format.py @@ -0,0 +1,7 @@ +from enum import Enum + + +class Format(Enum): + """Enumeration of the formats.""" + ION_TEXT = 'ion_text' + ION_BINARY = 'ion_binary' diff --git a/amazon/ionbenchmark/ion_benchmark_cli.py b/amazon/ionbenchmark/ion_benchmark_cli.py index 22655dc4e..51802c188 100644 --- a/amazon/ionbenchmark/ion_benchmark_cli.py +++ b/amazon/ionbenchmark/ion_benchmark_cli.py @@ -2,9 +2,9 @@ Usage: ion_python_benchmark_cli.py write [--api ] [--warmups ] [--iterations ] - [--c-extension ] [--binary ] + [--c-extension ] [--format ] ion_python_benchmark_cli.py read [--api ] [--warmups ] [--iterations ] - [--c-extension ] + [--c-extension ] [--format ] ion_python_benchmark_cli.py generate ion_python_benchmark_cli.py (-h | --help) ion_python_benchmark_cli.py (-v | --version) @@ -50,7 +50,8 @@ -c --c-extension If enable C extension, note that it only applys to simpleIon module currently. [default: True] - -b --binary If write in binary format. [default: True] + -f --format Format to benchmark, from the set (ion_binary | ion_text). May be specified + multiple times to compare different formats. [default: ion_binary] -n --limit (NOT SUPPORTED YET) Maximum number of entries to process. By default, all entries in each input file are processed. @@ -63,7 +64,6 @@ -u --time-unit (NOT SUPPORTED YET) -o --results-file (NOT SUPPORTED YET) - -f --format (NOT SUPPORTED YET) -I --ion-imports-for-input (NOT SUPPORTED YET) """ @@ -76,6 +76,7 @@ from tabulate import tabulate from amazon.ionbenchmark.API import API +from amazon.ionbenchmark.Format import Format from amazon.ionbenchmark.util import str_to_bool, format_percentage, format_decimal, TOOL_VERSION BYTES_TO_MB = 1024 * 1024 @@ -323,7 +324,7 @@ def ion_python_benchmark_cli(arguments): iterations = int(arguments['--iterations']) warmups = int(arguments['--warmups']) c_ext = str_to_bool(arguments['--c-extension']) - binary = str_to_bool(arguments['--binary']) + binary = arguments['--format'] == Format.ION_BINARY.value if arguments['read']: api = arguments['--api'] From fdec2e51b9de141e96e0c0a34bb55f1fcaeb13d3 Mon Sep 17 00:00:00 2001 From: Eric Chen Date: Sat, 10 Dec 2022 16:01:30 -0800 Subject: [PATCH 03/13] refactor description and move --limit options to the butoon. --- amazon/ionbenchmark/ion_benchmark_cli.py | 26 +++--------------------- 1 file changed, 3 insertions(+), 23 deletions(-) diff --git a/amazon/ionbenchmark/ion_benchmark_cli.py b/amazon/ionbenchmark/ion_benchmark_cli.py index 51802c188..5161b5ce6 100644 --- a/amazon/ionbenchmark/ion_benchmark_cli.py +++ b/amazon/ionbenchmark/ion_benchmark_cli.py @@ -5,7 +5,6 @@ [--c-extension ] [--format ] ion_python_benchmark_cli.py read [--api ] [--warmups ] [--iterations ] [--c-extension ] [--format ] - ion_python_benchmark_cli.py generate ion_python_benchmark_cli.py (-h | --help) ion_python_benchmark_cli.py (-v | --version) @@ -13,27 +12,10 @@ write Benchmark writing the given input file to the given output format(s). In order to isolate writing from reading, during the setup phase write instructions are generated from the input file and stored in memory. For large inputs, this can consume a lot of resources and take a long time - to execute. This may be reduced by using the --limit option to limit the number of entries that - are written. The cost of initializing the writer is included in each timed benchmark invocation. - Therefore, it is important to provide data that closely matches the size of the data written by a - single writer instance in the real world to ensure the initialization cost is properly amortized. + to execute. read First, re-write the given input file to the given output format(s) (if necessary), then - benchmark reading the resulting log files. If this takes too long to complete, consider using - the --limit option to limit the number of entries that are read. Specifying non-default settings - for certain options will cause the input data to be re-encoded even if the requested format is the - same as the format of the provided input. These options are --ion-length-preallocation and - --ion-flush-period for input in the ion binary format. The cost of initializing the reader or - DOM loader is included in each timed benchmark invocation. Therefore, it is important to provide - data that closely matches the size of data read by a single reader/loader instance in the real - world to ensure the initialization cost is properly amortized. - - generate (EXPERIMENTAL) Generate random Ion data which can be used as input to the read/write commands. - Data size, data type and the path of output file are required options. The specifications of three - scalar types can be executed so far, decimal, string and timestamp. The command will generate approximately - the amount of data requested, but the actual size of the generated may be slightly larger or smaller than - requested. We don't implement this feature in this implementation. We rely on ion-java-benchmark-cli to - achieve the same outcomes. + benchmark reading the resulting log files. Options: -h, --help Show this screen. @@ -53,9 +35,6 @@ -f --format Format to benchmark, from the set (ion_binary | ion_text). May be specified multiple times to compare different formats. [default: ion_binary] - -n --limit (NOT SUPPORTED YET) Maximum number of entries to process. By default, - all entries in each input file are processed. - -p --profile (NOT SUPPORTED YET) Initiates a single iteration that repeats indefinitely until terminated, allowing users to attach profiling tools. If this option is specified, the --warmups, --iterations, and --forks options are ignored. An @@ -65,6 +44,7 @@ -u --time-unit (NOT SUPPORTED YET) -o --results-file (NOT SUPPORTED YET) -I --ion-imports-for-input (NOT SUPPORTED YET) + -n --limit (NOT SUPPORTED YET) """ import timeit From f9b849a52934ebf7b1c2ca63b2e6bdb39904d462 Mon Sep 17 00:00:00 2001 From: Eric Chen Date: Sat, 10 Dec 2022 16:02:42 -0800 Subject: [PATCH 04/13] change nonBlocking to event in API description. --- amazon/ionbenchmark/ion_benchmark_cli.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/amazon/ionbenchmark/ion_benchmark_cli.py b/amazon/ionbenchmark/ion_benchmark_cli.py index 5161b5ce6..2b388d025 100644 --- a/amazon/ionbenchmark/ion_benchmark_cli.py +++ b/amazon/ionbenchmark/ion_benchmark_cli.py @@ -20,9 +20,9 @@ Options: -h, --help Show this screen. - --api The API to excise (simpleIon, iterator, nonBlocking). `simpleIon` refer to + --api The API to excise (simpleIon, iterator, event). `simpleIon` refers to simpleIon's load method. `iterator` refers to simpleIon's iterator type got by - setting `parse_eagerly` to false. `nonBlocking` refer to ion-python's event + setting `parse_eagerly` to false. `event` refers to ion-python's event based non-blocking API. Default to `simpleIon`. -w --warmups Number of benchmark warm-up iterations. [default: 10] From 5e2926144e77335225e25f060a6f04c32072ed34 Mon Sep 17 00:00:00 2001 From: Eric Chen Date: Sat, 10 Dec 2022 16:20:48 -0800 Subject: [PATCH 05/13] Some refactors. --- amazon/ionbenchmark/ion_benchmark_cli.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/amazon/ionbenchmark/ion_benchmark_cli.py b/amazon/ionbenchmark/ion_benchmark_cli.py index 2b388d025..ea5d00306 100644 --- a/amazon/ionbenchmark/ion_benchmark_cli.py +++ b/amazon/ionbenchmark/ion_benchmark_cli.py @@ -295,9 +295,6 @@ def ion_python_benchmark_cli(arguments): if arguments['--version'] or arguments['-v']: print(TOOL_VERSION) return TOOL_VERSION - elif arguments['generate']: - print('Generate feature is not supported yet') - return 0 if not arguments['']: raise Exception('Invalid input file') file = arguments[''] From 07df40b1b52d1c02a68f7ec3734bac390a4b83ff Mon Sep 17 00:00:00 2001 From: Eric Chen Date: Sun, 11 Dec 2022 13:21:42 -0800 Subject: [PATCH 06/13] Refactor profiling to memory_profile --- amazon/ionbenchmark/ion_benchmark_cli.py | 47 ++++++++++++++---------- 1 file changed, 27 insertions(+), 20 deletions(-) diff --git a/amazon/ionbenchmark/ion_benchmark_cli.py b/amazon/ionbenchmark/ion_benchmark_cli.py index ea5d00306..f1c0a31d9 100644 --- a/amazon/ionbenchmark/ion_benchmark_cli.py +++ b/amazon/ionbenchmark/ion_benchmark_cli.py @@ -29,7 +29,9 @@ -i --iterations Number of benchmark iterations. [default: 10] - -c --c-extension If enable C extension, note that it only applys to simpleIon module currently. + -t --iterator If returns an iterator for simpleIon C extension. + + -c --c-extension If enables C extension, note that it only applys to simpleIon module currently. [default: True] -f --format Format to benchmark, from the set (ion_binary | ion_text). May be specified @@ -65,9 +67,9 @@ read_memory_usage_peak = 0 -def generate_simpleion_load_test_code(file, profiling, api='simpleIon', single_value=False, emit_bare_values=False, +def generate_simpleion_load_test_code(file, memory_profiling, api='simpleIon', single_value=False, emit_bare_values=False, parse_eagerly=True): - if not profiling: + if not memory_profiling: if api == 'simpleIon': def test_func(): with open(file, "br") as fp: @@ -123,13 +125,14 @@ def generate_event_setup(file, gc=True): pass -def read_micro_benchmark_simpleion(iterations, warmups, c_ext, file, profiling): +def read_micro_benchmark_simpleion(iterations, warmups, c_ext, file, memory_profiling): file_size = Path(file).stat().st_size / BYTES_TO_MB setup_with_gc = generate_simpleion_setup(gc=True, c_ext=c_ext) - test_code = generate_simpleion_load_test_code(file, emit_bare_values=False, profiling=profiling) - test_code_without_wrapper = generate_simpleion_load_test_code(file, emit_bare_values=True, profiling=profiling) + test_code = generate_simpleion_load_test_code(file, emit_bare_values=False, memory_profiling=memory_profiling) + test_code_without_wrapper = generate_simpleion_load_test_code(file, emit_bare_values=True, + memory_profiling=memory_profiling) # warm up timeit.timeit(stmt=test_code, setup=setup_with_gc, number=warmups) @@ -144,16 +147,18 @@ def read_micro_benchmark_simpleion(iterations, warmups, c_ext, file, profiling): return file_size, result_with_gc, result_with_raw_value -def read_micro_benchmark_iterator(iterations, warmups, c_ext, file, profiling): +def read_micro_benchmark_iterator(iterations, warmups, c_ext, file, memory_profiling): file_size = Path(file).stat().st_size / BYTES_TO_MB # GC refers to reference cycles, not reference count setup_with_gc = generate_simpleion_setup(gc=True, c_ext=c_ext) test_code = generate_simpleion_load_test_code(file, api='iterator', emit_bare_values=False, parse_eagerly=False, - profiling=profiling) - test_code_without_wrapper = generate_simpleion_load_test_code(file, api='iterator', emit_bare_values=True, - parse_eagerly=False, profiling=profiling) + memory_profiling=memory_profiling) + test_code_without_wrapper = generate_simpleion_load_test_code(file, + api='iterator', emit_bare_values=True, + parse_eagerly=False, + memory_profiling=memory_profiling) # warm up timeit.timeit(stmt=test_code, setup=setup_with_gc, number=warmups) @@ -179,11 +184,12 @@ def read_micro_benchmark_and_profiling(read_micro_benchmark_function, iterations raise Exception("Invalid micro benchmark function: micro benchmark function can not be none.") # memory profiling - read_micro_benchmark_function(iterations=1, warmups=0, file=file, c_ext=c_ext, profiling=True) + read_micro_benchmark_function(iterations=1, warmups=0, file=file, c_ext=c_ext, memory_profiling=True) # performance benchmark file_size, result_with_gc, result_with_raw_value = \ - read_micro_benchmark_function(iterations=iterations, warmups=warmups, file=file, c_ext=c_ext, profiling=False) + read_micro_benchmark_function(iterations=iterations, warmups=warmups, file=file, c_ext=c_ext, + memory_profiling=False) # calculate metrics conversion_time = result_with_gc - result_with_raw_value @@ -208,8 +214,8 @@ def read_generate_report(file_size, total_time, conversion_time, wrapper_time_pe print(tabulate(table, tablefmt='fancy_grid')) -def generate_simpleion_dump_test_code(obj, profiling, binary=True): - if not profiling: +def generate_simpleion_dump_test_code(obj, memory_profiling, binary=True): + if not memory_profiling: def test_func(): return ion.dumps(obj=obj, binary=binary) else: @@ -225,13 +231,13 @@ def test_func(): return test_func -def write_micro_benchmark_simpleion(iterations, warmups, c_ext, obj, file, binary, profiling): +def write_micro_benchmark_simpleion(iterations, warmups, c_ext, obj, file, binary, memory_profiling): file_size = Path(file).stat().st_size / BYTES_TO_MB # GC refers to reference cycles, not reference count setup_with_gc = generate_simpleion_setup(gc=True, c_ext=c_ext) - test_func = generate_simpleion_dump_test_code(obj, profiling=profiling, binary=binary) + test_func = generate_simpleion_dump_test_code(obj, memory_profiling=memory_profiling, binary=binary) # warm up timeit.timeit(stmt=test_func, setup=setup_with_gc, number=warmups) @@ -242,13 +248,13 @@ def write_micro_benchmark_simpleion(iterations, warmups, c_ext, obj, file, binar return file_size, result_with_gc -def write_micro_benchmark_iterator(iterations, warmups, c_ext, obj, file, binary, profiling): +def write_micro_benchmark_iterator(iterations, warmups, c_ext, obj, file, binary, memory_profiling): file_size = Path(file).stat().st_size / BYTES_TO_MB # GC refers to reference cycles, not reference count setup_with_gc = generate_simpleion_setup(gc=True, c_ext=c_ext) - test_code = generate_simpleion_dump_test_code(obj, profiling, binary=binary) + test_code = generate_simpleion_dump_test_code(obj, memory_profiling, binary=binary) # warm up timeit.timeit(stmt=test_code, setup=setup_with_gc, number=warmups) @@ -269,12 +275,13 @@ def write_micro_benchmark_and_profiling(write_micro_benchmark_function, iteratio if not write_micro_benchmark_function: raise Exception("Invalid micro benchmark function: micro benchmark function can not be none.") # Memory Profiling - write_micro_benchmark_function(iterations=1, warmups=0, obj=obj, c_ext=c_ext, file=file, binary=binary, profiling=True) + write_micro_benchmark_function(iterations=1, warmups=0, obj=obj, c_ext=c_ext, file=file, binary=binary, + memory_profiling=True) # Performance Benchmark file_size, result_with_gc = \ write_micro_benchmark_function(iterations=iterations, warmups=warmups, obj=obj, c_ext=c_ext, file=file, - binary=binary, profiling=False) + binary=binary, memory_profiling=False) # generate report write_generate_report(file_size, result_with_gc, write_memory_usage_peak) From b5381de2c01dbdef6b2ee4526a8790806153efcd Mon Sep 17 00:00:00 2001 From: Eric Chen Date: Mon, 12 Dec 2022 02:07:26 -0800 Subject: [PATCH 07/13] deprecated iterator API --- amazon/ionbenchmark/API.py | 1 - amazon/ionbenchmark/ion_benchmark_cli.py | 107 ++++------------------- 2 files changed, 17 insertions(+), 91 deletions(-) diff --git a/amazon/ionbenchmark/API.py b/amazon/ionbenchmark/API.py index 1211cf48a..fe8e26cdd 100644 --- a/amazon/ionbenchmark/API.py +++ b/amazon/ionbenchmark/API.py @@ -5,5 +5,4 @@ class API(Enum): """Enumeration of the APIs.""" SIMPLE_ION = 'simpleIon' - ITERATOR = 'iterator' EVENT = 'event' diff --git a/amazon/ionbenchmark/ion_benchmark_cli.py b/amazon/ionbenchmark/ion_benchmark_cli.py index f1c0a31d9..52e69cf82 100644 --- a/amazon/ionbenchmark/ion_benchmark_cli.py +++ b/amazon/ionbenchmark/ion_benchmark_cli.py @@ -20,17 +20,14 @@ Options: -h, --help Show this screen. - --api The API to excise (simpleIon, iterator, event). `simpleIon` refers to - simpleIon's load method. `iterator` refers to simpleIon's iterator type got by - setting `parse_eagerly` to false. `event` refers to ion-python's event + --api The API to excise (simpleIon, event). `simpleIon` refers to + simpleIon's load method. `event` refers to ion-python's event based non-blocking API. Default to `simpleIon`. -w --warmups Number of benchmark warm-up iterations. [default: 10] -i --iterations Number of benchmark iterations. [default: 10] - -t --iterator If returns an iterator for simpleIon C extension. - -c --c-extension If enables C extension, note that it only applys to simpleIon module currently. [default: True] @@ -67,45 +64,22 @@ read_memory_usage_peak = 0 -def generate_simpleion_load_test_code(file, memory_profiling, api='simpleIon', single_value=False, emit_bare_values=False, - parse_eagerly=True): +def generate_simpleion_load_test_code(file, memory_profiling, single_value=False, emit_bare_values=False): if not memory_profiling: - if api == 'simpleIon': - def test_func(): - with open(file, "br") as fp: - data = ion.load(fp, single_value=single_value, emit_bare_values=emit_bare_values, - parse_eagerly=parse_eagerly) - return data - else: - def test_func(): - with open(file, "br") as fp: - iterator = ion.load(fp, single_value=single_value, emit_bare_values=emit_bare_values, - parse_eagerly=parse_eagerly) - data = list(iterator) - return data + def test_func(): + with open(file, "br") as fp: + data = ion.load(fp, single_value=single_value, emit_bare_values=emit_bare_values) + data = data + return data else: - if api == 'simpleIon': - def test_func(): - tracemalloc.start() - with open(file, "br") as fp: - data = ion.load(fp, single_value=single_value, emit_bare_values=emit_bare_values, - parse_eagerly=parse_eagerly) - global read_memory_usage_peak - read_memory_usage_peak = tracemalloc.get_traced_memory()[1] / BYTES_TO_MB - tracemalloc.stop() - return data - else: - def test_func(): - tracemalloc.start() - with open(file, "br") as fp: - iterator = ion.load(fp, single_value=single_value, emit_bare_values=emit_bare_values, - parse_eagerly=parse_eagerly) - data = list(iterator) - global read_memory_usage_peak - read_memory_usage_peak = tracemalloc.get_traced_memory()[1] / BYTES_TO_MB - tracemalloc.stop() - - return data + def test_func(): + tracemalloc.start() + with open(file, "br") as fp: + data = ion.load(fp, single_value=single_value, emit_bare_values=emit_bare_values) + global read_memory_usage_peak + read_memory_usage_peak = tracemalloc.get_traced_memory()[1] / BYTES_TO_MB + tracemalloc.stop() + return data return test_func @@ -147,32 +121,6 @@ def read_micro_benchmark_simpleion(iterations, warmups, c_ext, file, memory_prof return file_size, result_with_gc, result_with_raw_value -def read_micro_benchmark_iterator(iterations, warmups, c_ext, file, memory_profiling): - file_size = Path(file).stat().st_size / BYTES_TO_MB - - # GC refers to reference cycles, not reference count - setup_with_gc = generate_simpleion_setup(gc=True, c_ext=c_ext) - - test_code = generate_simpleion_load_test_code(file, api='iterator', emit_bare_values=False, parse_eagerly=False, - memory_profiling=memory_profiling) - test_code_without_wrapper = generate_simpleion_load_test_code(file, - api='iterator', emit_bare_values=True, - parse_eagerly=False, - memory_profiling=memory_profiling) - - # warm up - timeit.timeit(stmt=test_code, setup=setup_with_gc, number=warmups) - timeit.timeit(stmt=test_code_without_wrapper, setup=setup_with_gc, number=warmups) - - # iteration - result_with_gc = timeit.timeit(stmt=test_code, setup=setup_with_gc, number=iterations) / iterations - result_with_raw_value = \ - (timeit.timeit(stmt=test_code_without_wrapper, setup=setup_with_gc, number=iterations) / iterations) \ - if c_ext else result_with_gc - - return file_size, result_with_gc, result_with_raw_value - - def read_micro_benchmark_event(iterations, warmups, c_ext, file=None): pass @@ -248,23 +196,6 @@ def write_micro_benchmark_simpleion(iterations, warmups, c_ext, obj, file, binar return file_size, result_with_gc -def write_micro_benchmark_iterator(iterations, warmups, c_ext, obj, file, binary, memory_profiling): - file_size = Path(file).stat().st_size / BYTES_TO_MB - - # GC refers to reference cycles, not reference count - setup_with_gc = generate_simpleion_setup(gc=True, c_ext=c_ext) - - test_code = generate_simpleion_dump_test_code(obj, memory_profiling, binary=binary) - - # warm up - timeit.timeit(stmt=test_code, setup=setup_with_gc, number=warmups) - - # iteration - result_with_gc = timeit.timeit(stmt=test_code, setup=setup_with_gc, number=iterations) / iterations - - return file_size, result_with_gc - - def write_micro_benchmark_event(iterations, warmups, c_ext, obj, binary, file=None): pass @@ -314,8 +245,6 @@ def ion_python_benchmark_cli(arguments): api = arguments['--api'] if not api or api == API.SIMPLE_ION.value: read_micro_benchmark_function = read_micro_benchmark_simpleion - elif api == API.ITERATOR.value: - read_micro_benchmark_function = read_micro_benchmark_iterator elif api == API.EVENT.value: read_micro_benchmark_function = read_micro_benchmark_event else: @@ -327,15 +256,13 @@ def ion_python_benchmark_cli(arguments): api = arguments['--api'] if not api or api == API.SIMPLE_ION.value: write_micro_benchmark_function = write_micro_benchmark_simpleion - elif api == API.ITERATOR.value: - write_micro_benchmark_function = write_micro_benchmark_iterator elif api == API.EVENT.value: write_micro_benchmark_function = write_micro_benchmark_event else: raise Exception(f'Invalid API option {api}.') with open(file) as fp: - obj = ion.load(fp, parse_eagerly=(api != API.ITERATOR.value), single_value=False) + obj = ion.load(fp, parse_eagerly=True, single_value=False) return write_micro_benchmark_and_profiling(write_micro_benchmark_function, iterations, warmups, obj, c_ext, binary, file) From e258cff22a726b94c8a8384262da8d14b69f63a5 Mon Sep 17 00:00:00 2001 From: Eric Chen Date: Mon, 12 Dec 2022 10:07:02 -0800 Subject: [PATCH 08/13] fix tests. --- tests/test_benchmark_cli.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/tests/test_benchmark_cli.py b/tests/test_benchmark_cli.py index 6786033fb..225854c0a 100644 --- a/tests/test_benchmark_cli.py +++ b/tests/test_benchmark_cli.py @@ -131,10 +131,7 @@ def test_option_write_c_extension(file=generate_test_path('integers.ion')): execution_with_command(['write', file, '--c-extension', 'false']) assert file_size == getsize(file) / BYTES_TO_MB - # This is a straightforward way to check if C extension is enabled from the top-level python layer. - # For most of the case, C extension is supposed to be faster than regular python implementation - assert result_with_gc < result_with_gc_2 - assert write_memory_usage_peak < write_memory_usage_peak_2 + assert file_size_2 == getsize(file) / BYTES_TO_MB def test_option_read_c_extension(file=generate_test_path('integers.ion')): From c30b4a34effd0cee5462facec37fabc6f6b00f92 Mon Sep 17 00:00:00 2001 From: Eric Chen Date: Mon, 12 Dec 2022 12:11:50 -0800 Subject: [PATCH 09/13] copyright --- amazon/ionbenchmark/ion_benchmark_cli.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/amazon/ionbenchmark/ion_benchmark_cli.py b/amazon/ionbenchmark/ion_benchmark_cli.py index 52e69cf82..30387d056 100644 --- a/amazon/ionbenchmark/ion_benchmark_cli.py +++ b/amazon/ionbenchmark/ion_benchmark_cli.py @@ -1,3 +1,16 @@ +# Copyright 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). +# You may not use this file except in compliance with the License. +# A copy of the License is located at: +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is +# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS +# OF ANY KIND, either express or implied. See the License for the +# specific language governing permissions and limitations under the +# License. """A repeatable benchmark tool for ion-python implementation. Usage: From 6e51ddaf06205944e4fee6d66bcc056e28ab7c21 Mon Sep 17 00:00:00 2001 From: Eric Chen Date: Tue, 13 Dec 2022 23:16:01 -0800 Subject: [PATCH 10/13] Adds iterator option. --- amazon/ionbenchmark/ion_benchmark_cli.py | 88 ++++++++++++++++-------- amazon/ionbenchmark/util.py | 4 +- tests/test_benchmark_cli.py | 16 ++--- 3 files changed, 71 insertions(+), 37 deletions(-) diff --git a/amazon/ionbenchmark/ion_benchmark_cli.py b/amazon/ionbenchmark/ion_benchmark_cli.py index 30387d056..40516d745 100644 --- a/amazon/ionbenchmark/ion_benchmark_cli.py +++ b/amazon/ionbenchmark/ion_benchmark_cli.py @@ -14,9 +14,9 @@ """A repeatable benchmark tool for ion-python implementation. Usage: - ion_python_benchmark_cli.py write [--api ] [--warmups ] [--iterations ] + ion_python_benchmark_cli.py write [--api ] [--iterator ] [--warmups ] [--iterations ] [--c-extension ] [--format ] - ion_python_benchmark_cli.py read [--api ] [--warmups ] [--iterations ] + ion_python_benchmark_cli.py read [--api ] [--iterator ] [--warmups ] [--iterations ] [--c-extension ] [--format ] ion_python_benchmark_cli.py (-h | --help) ion_python_benchmark_cli.py (-v | --version) @@ -33,15 +33,17 @@ Options: -h, --help Show this screen. - --api The API to excise (simpleIon, event). `simpleIon` refers to + --api The API to exercise (simpleIon, event). `simpleIon` refers to simpleIon's load method. `event` refers to ion-python's event based non-blocking API. Default to `simpleIon`. + -t --iterator If returns an iterator for simpleIon C extension read API. [default: False] + -w --warmups Number of benchmark warm-up iterations. [default: 10] -i --iterations Number of benchmark iterations. [default: 10] - -c --c-extension If enables C extension, note that it only applys to simpleIon module currently. + -c --c-extension If the C extension is enabled, note that it only applies to simpleIon module. [default: True] -f --format Format to benchmark, from the set (ion_binary | ion_text). May be specified @@ -77,23 +79,50 @@ read_memory_usage_peak = 0 -def generate_simpleion_load_test_code(file, memory_profiling, single_value=False, emit_bare_values=False): +def generate_simpleion_load_test_code(file, memory_profiling, iterator=False, single_value=False, + emit_bare_values=False): if not memory_profiling: - def test_func(): - with open(file, "br") as fp: - data = ion.load(fp, single_value=single_value, emit_bare_values=emit_bare_values) - data = data - return data + if not iterator: + def test_func(): + with open(file, "br") as fp: + data = ion.load(fp, single_value=single_value, emit_bare_values=emit_bare_values) + return data + else: + def test_func(): + with open(file, "br") as fp: + it = ion.load(fp, single_value=single_value, emit_bare_values=emit_bare_values, + parse_eagerly=False) + while True: + try: + next(it) + except StopIteration: + break + return it else: - def test_func(): - tracemalloc.start() - with open(file, "br") as fp: - data = ion.load(fp, single_value=single_value, emit_bare_values=emit_bare_values) - global read_memory_usage_peak - read_memory_usage_peak = tracemalloc.get_traced_memory()[1] / BYTES_TO_MB - tracemalloc.stop() - return data - + if not iterator: + def test_func(): + tracemalloc.start() + with open(file, "br") as fp: + data = ion.load(fp, single_value=single_value, emit_bare_values=emit_bare_values) + global read_memory_usage_peak + read_memory_usage_peak = tracemalloc.get_traced_memory()[1] / BYTES_TO_MB + tracemalloc.stop() + return data + else: + def test_func(): + tracemalloc.start() + with open(file, "br") as fp: + it = ion.load(fp, single_value=single_value, emit_bare_values=emit_bare_values, + parse_eagerly=False) + while True: + try: + next(it) + except StopIteration: + break + global read_memory_usage_peak + read_memory_usage_peak = tracemalloc.get_traced_memory()[1] / BYTES_TO_MB + tracemalloc.stop() + return it return test_func @@ -112,14 +141,16 @@ def generate_event_setup(file, gc=True): pass -def read_micro_benchmark_simpleion(iterations, warmups, c_ext, file, memory_profiling): +def read_micro_benchmark_simpleion(iterations, warmups, c_ext, file, memory_profiling, iterator=False): file_size = Path(file).stat().st_size / BYTES_TO_MB - setup_with_gc = generate_simpleion_setup(gc=True, c_ext=c_ext) + setup_with_gc = generate_simpleion_setup(c_ext=c_ext, gc=False) - test_code = generate_simpleion_load_test_code(file, emit_bare_values=False, memory_profiling=memory_profiling) + test_code = generate_simpleion_load_test_code(file, emit_bare_values=False, memory_profiling=memory_profiling, + iterator=iterator) test_code_without_wrapper = generate_simpleion_load_test_code(file, emit_bare_values=True, - memory_profiling=memory_profiling) + memory_profiling=memory_profiling, + iterator=iterator) # warm up timeit.timeit(stmt=test_code, setup=setup_with_gc, number=warmups) @@ -138,19 +169,20 @@ def read_micro_benchmark_event(iterations, warmups, c_ext, file=None): pass -def read_micro_benchmark_and_profiling(read_micro_benchmark_function, iterations, warmups, file, c_ext): +def read_micro_benchmark_and_profiling(read_micro_benchmark_function, iterations, warmups, file, c_ext, iterator): if not file: raise Exception("Invalid file: file can not be none.") if not read_micro_benchmark_function: raise Exception("Invalid micro benchmark function: micro benchmark function can not be none.") # memory profiling - read_micro_benchmark_function(iterations=1, warmups=0, file=file, c_ext=c_ext, memory_profiling=True) + read_micro_benchmark_function(iterations=1, warmups=0, file=file, c_ext=c_ext, memory_profiling=True, + iterator=iterator) # performance benchmark file_size, result_with_gc, result_with_raw_value = \ read_micro_benchmark_function(iterations=iterations, warmups=warmups, file=file, c_ext=c_ext, - memory_profiling=False) + memory_profiling=False, iterator=iterator) # calculate metrics conversion_time = result_with_gc - result_with_raw_value @@ -253,6 +285,7 @@ def ion_python_benchmark_cli(arguments): warmups = int(arguments['--warmups']) c_ext = str_to_bool(arguments['--c-extension']) binary = arguments['--format'] == Format.ION_BINARY.value + iterator = str_to_bool(arguments['--iterator']) if arguments['read']: api = arguments['--api'] @@ -263,7 +296,8 @@ def ion_python_benchmark_cli(arguments): else: raise Exception(f'Invalid API option {api}.') - return read_micro_benchmark_and_profiling(read_micro_benchmark_function, iterations, warmups, file, c_ext) + return read_micro_benchmark_and_profiling(read_micro_benchmark_function, iterations, warmups, file, c_ext, + iterator) elif arguments['write']: api = arguments['--api'] diff --git a/amazon/ionbenchmark/util.py b/amazon/ionbenchmark/util.py index 28ece9936..d8bad2dfd 100644 --- a/amazon/ionbenchmark/util.py +++ b/amazon/ionbenchmark/util.py @@ -2,7 +2,9 @@ def str_to_bool(v): - return v.lower() in ("true", "1") + if isinstance(v, str): + return v.lower() in ("true", "1") + return None def format_percentage(v): diff --git a/tests/test_benchmark_cli.py b/tests/test_benchmark_cli.py index 225854c0a..279b46ad4 100644 --- a/tests/test_benchmark_cli.py +++ b/tests/test_benchmark_cli.py @@ -7,7 +7,7 @@ from amazon.ion import simpleion from amazon.ionbenchmark import ion_benchmark_cli from amazon.ionbenchmark.ion_benchmark_cli import generate_simpleion_load_test_code, generate_simpleion_dump_test_code, \ - read_micro_benchmark_simpleion, BYTES_TO_MB, read_micro_benchmark_iterator, ion_python_benchmark_cli + read_micro_benchmark_simpleion, BYTES_TO_MB, ion_python_benchmark_cli from amazon.ionbenchmark.util import str_to_bool, TOOL_VERSION from tests import parametrize from tests.test_simpleion import generate_scalars_text @@ -45,8 +45,7 @@ def generate_test_path(p): generate_test_path('integers.ion') ) def test_generate_simpleion_load_test_code(path): - actual = generate_simpleion_load_test_code(path, profiling=False, api='simpleIon', single_value=False, - emit_bare_values=False, parse_eagerly=True) + actual = generate_simpleion_load_test_code(path, memory_profiling=False, single_value=False, emit_bare_values=False) # make sure we generated the desired load function with open(path) as fp: @@ -62,7 +61,7 @@ def test_generate_simpleion_load_test_code(path): )) ) def test_generate_simpleion_dump_test_code(obj): - actual = generate_simpleion_dump_test_code(obj, profiling=False, binary=False) + actual = generate_simpleion_dump_test_code(obj, memory_profiling=False, binary=False) # make sure we generated the desired dumps function expect = simpleion.dumps(obj, binary=False) @@ -76,7 +75,8 @@ def test_generate_simpleion_dump_test_code(obj): ) def test_read_micro_benchmark_simpleion(f): file_size, result_with_gc, result_with_raw_value = \ - read_micro_benchmark_simpleion(iterations=1, warmups=0, c_ext=True, file=f, profiling=False) + read_micro_benchmark_simpleion(iterations=1, warmups=0, c_ext=True, file=f, memory_profiling=False, + iterator=False) assert file_size == getsize(f) / BYTES_TO_MB assert result_with_gc > 0 @@ -88,7 +88,8 @@ def test_read_micro_benchmark_simpleion(f): ) def test_read_micro_benchmark_iterator(f): file_size, result_with_gc, result_with_raw_value = \ - read_micro_benchmark_iterator(iterations=1, warmups=0, c_ext=True, file=f, profiling=False) + read_micro_benchmark_simpleion(iterations=1, warmups=0, c_ext=True, file=f, memory_profiling=False, + iterator=True) assert file_size == getsize(f) / BYTES_TO_MB assert result_with_gc > 0 @@ -185,6 +186,3 @@ def test_option_write_iterations(file=generate_test_path('integers.ion')): # Executing 100 times should be longer than benchmark only once, but don't have to be exact 100x faster. assert time_2 > time_1 - -def test_option_generate(): - assert execution_with_command(['generate']) == 0 From 5177b123be38a98ec01060012c7be9069bc77307 Mon Sep 17 00:00:00 2001 From: Eric Chen Date: Wed, 14 Dec 2022 00:43:18 -0800 Subject: [PATCH 11/13] temporarily disable pypy. --- .github/workflows/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 5df4cf832..9d1a0a97c 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -9,7 +9,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ['3.7', '3.8', '3.9', '3.10', 'pypy-3.7', 'pypy-3.8'] + python-version: ['3.7', '3.8', '3.9', '3.10'] steps: - uses: actions/checkout@v2 - name: Set up Python From 9ac6ae88a58de9030b4354200726b04671de73d3 Mon Sep 17 00:00:00 2001 From: Eric Chen Date: Wed, 14 Dec 2022 14:17:53 -0800 Subject: [PATCH 12/13] disable memory profiling for pypy --- .github/workflows/main.yml | 2 +- amazon/ionbenchmark/ion_benchmark_cli.py | 51 ++++++++++++++---------- tests/test_benchmark_cli.py | 38 ++---------------- 3 files changed, 34 insertions(+), 57 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 9d1a0a97c..5df4cf832 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -9,7 +9,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ['3.7', '3.8', '3.9', '3.10'] + python-version: ['3.7', '3.8', '3.9', '3.10', 'pypy-3.7', 'pypy-3.8'] steps: - uses: actions/checkout@v2 - name: Set up Python diff --git a/amazon/ionbenchmark/ion_benchmark_cli.py b/amazon/ionbenchmark/ion_benchmark_cli.py index 40516d745..77716b9bb 100644 --- a/amazon/ionbenchmark/ion_benchmark_cli.py +++ b/amazon/ionbenchmark/ion_benchmark_cli.py @@ -62,8 +62,8 @@ """ import timeit -import tracemalloc from pathlib import Path +import platform import amazon.ion.simpleion as ion from docopt import docopt @@ -72,6 +72,10 @@ from amazon.ionbenchmark.API import API from amazon.ionbenchmark.Format import Format from amazon.ionbenchmark.util import str_to_bool, format_percentage, format_decimal, TOOL_VERSION +pypy = platform.python_implementation() == 'PyPy' +if not pypy: + import tracemalloc + BYTES_TO_MB = 1024 * 1024 _IVM = b'\xE0\x01\x00\xEA' @@ -130,10 +134,13 @@ def generate_event_test_code(file): pass -def generate_simpleion_setup(c_ext, gc=True): - rtn = f'import amazon.ion.simpleion as ion;from amazon.ion.simple_types import IonPySymbol; ion.c_ext ={c_ext}; import tracemalloc' +def generate_simpleion_setup(c_extension, memory_profiling, gc=True): + rtn = f'import amazon.ion.simpleion as ion;from amazon.ion.simple_types import IonPySymbol; ion.c_ext ={c_extension}' + if memory_profiling: + rtn += '; import tracemalloc' if gc: rtn += '; import gc; gc.enable()' + return rtn @@ -141,10 +148,10 @@ def generate_event_setup(file, gc=True): pass -def read_micro_benchmark_simpleion(iterations, warmups, c_ext, file, memory_profiling, iterator=False): +def read_micro_benchmark_simpleion(iterations, warmups, c_extension, file, memory_profiling, iterator=False): file_size = Path(file).stat().st_size / BYTES_TO_MB - setup_with_gc = generate_simpleion_setup(c_ext=c_ext, gc=False) + setup_with_gc = generate_simpleion_setup(c_extension=c_extension, gc=False, memory_profiling=memory_profiling) test_code = generate_simpleion_load_test_code(file, emit_bare_values=False, memory_profiling=memory_profiling, iterator=iterator) @@ -160,28 +167,29 @@ def read_micro_benchmark_simpleion(iterations, warmups, c_ext, file, memory_prof result_with_gc = timeit.timeit(stmt=test_code, setup=setup_with_gc, number=iterations) / iterations result_with_raw_value = \ (timeit.timeit(stmt=test_code_without_wrapper, setup=setup_with_gc, number=iterations) / iterations) \ - if c_ext else result_with_gc + if c_extension else result_with_gc return file_size, result_with_gc, result_with_raw_value -def read_micro_benchmark_event(iterations, warmups, c_ext, file=None): +def read_micro_benchmark_event(iterations, warmups, c_extension, file=None): pass -def read_micro_benchmark_and_profiling(read_micro_benchmark_function, iterations, warmups, file, c_ext, iterator): +def read_micro_benchmark_and_profiling(read_micro_benchmark_function, iterations, warmups, file, c_extension, iterator): if not file: raise Exception("Invalid file: file can not be none.") if not read_micro_benchmark_function: raise Exception("Invalid micro benchmark function: micro benchmark function can not be none.") # memory profiling - read_micro_benchmark_function(iterations=1, warmups=0, file=file, c_ext=c_ext, memory_profiling=True, - iterator=iterator) + if not pypy: + read_micro_benchmark_function(iterations=1, warmups=0, file=file, c_extension=c_extension, memory_profiling=True, + iterator=iterator) # performance benchmark file_size, result_with_gc, result_with_raw_value = \ - read_micro_benchmark_function(iterations=iterations, warmups=warmups, file=file, c_ext=c_ext, + read_micro_benchmark_function(iterations=iterations, warmups=warmups, file=file, c_extension=c_extension, memory_profiling=False, iterator=iterator) # calculate metrics @@ -224,11 +232,11 @@ def test_func(): return test_func -def write_micro_benchmark_simpleion(iterations, warmups, c_ext, obj, file, binary, memory_profiling): +def write_micro_benchmark_simpleion(iterations, warmups, c_extension, obj, file, binary, memory_profiling): file_size = Path(file).stat().st_size / BYTES_TO_MB # GC refers to reference cycles, not reference count - setup_with_gc = generate_simpleion_setup(gc=True, c_ext=c_ext) + setup_with_gc = generate_simpleion_setup(gc=True, c_extension=c_extension, memory_profiling=memory_profiling) test_func = generate_simpleion_dump_test_code(obj, memory_profiling=memory_profiling, binary=binary) @@ -241,22 +249,23 @@ def write_micro_benchmark_simpleion(iterations, warmups, c_ext, obj, file, binar return file_size, result_with_gc -def write_micro_benchmark_event(iterations, warmups, c_ext, obj, binary, file=None): +def write_micro_benchmark_event(iterations, warmups, c_extension, obj, binary, file=None): pass -def write_micro_benchmark_and_profiling(write_micro_benchmark_function, iterations, warmups, obj, c_ext, binary, file): +def write_micro_benchmark_and_profiling(write_micro_benchmark_function, iterations, warmups, obj, c_extension, binary, file): if not obj: raise Exception("Invalid obj: object can not be none.") if not write_micro_benchmark_function: raise Exception("Invalid micro benchmark function: micro benchmark function can not be none.") # Memory Profiling - write_micro_benchmark_function(iterations=1, warmups=0, obj=obj, c_ext=c_ext, file=file, binary=binary, - memory_profiling=True) + if not pypy: + write_micro_benchmark_function(iterations=1, warmups=0, obj=obj, c_extension=c_extension, file=file, binary=binary, + memory_profiling=True) # Performance Benchmark file_size, result_with_gc = \ - write_micro_benchmark_function(iterations=iterations, warmups=warmups, obj=obj, c_ext=c_ext, file=file, + write_micro_benchmark_function(iterations=iterations, warmups=warmups, obj=obj, c_extension=c_extension, file=file, binary=binary, memory_profiling=False) # generate report @@ -283,7 +292,7 @@ def ion_python_benchmark_cli(arguments): file = arguments[''] iterations = int(arguments['--iterations']) warmups = int(arguments['--warmups']) - c_ext = str_to_bool(arguments['--c-extension']) + c_extension = str_to_bool(arguments['--c-extension']) if not pypy else False binary = arguments['--format'] == Format.ION_BINARY.value iterator = str_to_bool(arguments['--iterator']) @@ -296,7 +305,7 @@ def ion_python_benchmark_cli(arguments): else: raise Exception(f'Invalid API option {api}.') - return read_micro_benchmark_and_profiling(read_micro_benchmark_function, iterations, warmups, file, c_ext, + return read_micro_benchmark_and_profiling(read_micro_benchmark_function, iterations, warmups, file, c_extension, iterator) elif arguments['write']: @@ -311,7 +320,7 @@ def ion_python_benchmark_cli(arguments): with open(file) as fp: obj = ion.load(fp, parse_eagerly=True, single_value=False) - return write_micro_benchmark_and_profiling(write_micro_benchmark_function, iterations, warmups, obj, c_ext, + return write_micro_benchmark_and_profiling(write_micro_benchmark_function, iterations, warmups, obj, c_extension, binary, file) diff --git a/tests/test_benchmark_cli.py b/tests/test_benchmark_cli.py index 279b46ad4..be0cfd0b3 100644 --- a/tests/test_benchmark_cli.py +++ b/tests/test_benchmark_cli.py @@ -7,7 +7,7 @@ from amazon.ion import simpleion from amazon.ionbenchmark import ion_benchmark_cli from amazon.ionbenchmark.ion_benchmark_cli import generate_simpleion_load_test_code, generate_simpleion_dump_test_code, \ - read_micro_benchmark_simpleion, BYTES_TO_MB, ion_python_benchmark_cli + BYTES_TO_MB, ion_python_benchmark_cli from amazon.ionbenchmark.util import str_to_bool, TOOL_VERSION from tests import parametrize from tests.test_simpleion import generate_scalars_text @@ -70,32 +70,6 @@ def test_generate_simpleion_dump_test_code(obj): assert actual() == expect -@parametrize( - generate_test_path('integers.ion') -) -def test_read_micro_benchmark_simpleion(f): - file_size, result_with_gc, result_with_raw_value = \ - read_micro_benchmark_simpleion(iterations=1, warmups=0, c_ext=True, file=f, memory_profiling=False, - iterator=False) - - assert file_size == getsize(f) / BYTES_TO_MB - assert result_with_gc > 0 - assert result_with_raw_value > 0 - - -@parametrize( - generate_test_path('integers.ion') -) -def test_read_micro_benchmark_iterator(f): - file_size, result_with_gc, result_with_raw_value = \ - read_micro_benchmark_simpleion(iterations=1, warmups=0, c_ext=True, file=f, memory_profiling=False, - iterator=True) - - assert file_size == getsize(f) / BYTES_TO_MB - assert result_with_gc > 0 - assert result_with_raw_value > 0 - - def execution_with_command(c): return ion_python_benchmark_cli(docopt(doc, argv=c)) @@ -111,7 +85,7 @@ def test_option_write(file=generate_test_path('integers.ion')): assert file_size == getsize(file) / BYTES_TO_MB assert result_with_gc > 0 - assert write_memory_usage_peak > 0 + assert write_memory_usage_peak >= 0 def test_option_read(file=generate_test_path('integers.ion')): @@ -121,7 +95,7 @@ def test_option_read(file=generate_test_path('integers.ion')): assert file_size == getsize(file) / BYTES_TO_MB assert result_with_gc > 0 - assert read_memory_usage_peak > 0 + assert read_memory_usage_peak >= 0 def test_option_write_c_extension(file=generate_test_path('integers.ion')): @@ -143,12 +117,6 @@ def test_option_read_c_extension(file=generate_test_path('integers.ion')): execution_with_command(['read', file, '--c-extension', 'false']) assert file_size == getsize(file) / BYTES_TO_MB - # This is a straightforward way to check if C extension is enabled from the top-level python layer. - # For most of the case, C extension is supposed to be faster than regular python implementation - assert result_with_gc < result_with_gc_2 - - # For load, C extension does not show advantage of memory usage like dump. So we do not have below assertion - # assert read_memory_usage_peak > read_memory_usage_peak_2 def test_option_read_iterations(file=generate_test_path('integers.ion')): From 32e1011b2f63cc73dda3e7897bbafadc75f20168 Mon Sep 17 00:00:00 2001 From: Eric Chen Date: Wed, 14 Dec 2022 16:24:29 -0800 Subject: [PATCH 13/13] remove iterator option for write remove iterator for write fixed a typo --- amazon/ionbenchmark/ion_benchmark_cli.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/amazon/ionbenchmark/ion_benchmark_cli.py b/amazon/ionbenchmark/ion_benchmark_cli.py index 77716b9bb..9110c647e 100644 --- a/amazon/ionbenchmark/ion_benchmark_cli.py +++ b/amazon/ionbenchmark/ion_benchmark_cli.py @@ -14,8 +14,8 @@ """A repeatable benchmark tool for ion-python implementation. Usage: - ion_python_benchmark_cli.py write [--api ] [--iterator ] [--warmups ] [--iterations ] - [--c-extension ] [--format ] + ion_python_benchmark_cli.py write [--api ] [--warmups ] [--c-extension ] [--iterations ] + [--format ] ion_python_benchmark_cli.py read [--api ] [--iterator ] [--warmups ] [--iterations ] [--c-extension ] [--format ] ion_python_benchmark_cli.py (-h | --help)