diff --git a/barretenberg/README.md b/barretenberg/README.md index a6487a68e6d2..c8a3d1fab03d 100644 --- a/barretenberg/README.md +++ b/barretenberg/README.md @@ -7,6 +7,39 @@ As the spec solidifies, this should be less of an issue. Aztec and Barretenberg **This code is highly experimental, use at your own risk!** +### Benchmarks! + +Table represents time in ms to build circuit and proof for each test on n threads. +Ignores proving key construction. + +#### x86_64 + +``` ++--------------------------+------------+---------------+-----------+-----------+-----------+-----------+-----------+ +| Test | Gate Count | Subgroup Size | 1 | 4 | 16 | 32 | 64 | ++--------------------------+------------+---------------+-----------+-----------+-----------+-----------+-----------+ +| sha256 | 38799 | 65536 | 5947 | 1653 | 729 | 476 | 388 | +| ecdsa_secp256k1 | 41049 | 65536 | 6005 | 2060 | 963 | 693 | 583 | +| ecdsa_secp256r1 | 67331 | 131072 | 12186 | 3807 | 1612 | 1351 | 1137 | +| schnorr | 33740 | 65536 | 5817 | 1696 | 688 | 532 | 432 | +| double_verify_proof | 505513 | 524288 | 47841 | 15824 | 7970 | 6784 | 6082 | ++--------------------------+------------+---------------+-----------+-----------+-----------+-----------+-----------+ +``` + +#### WASM + +``` ++--------------------------+------------+---------------+-----------+-----------+-----------+-----------+-----------+ +| Test | Gate Count | Subgroup Size | 1 | 4 | 16 | 32 | 64 | ++--------------------------+------------+---------------+-----------+-----------+-----------+-----------+-----------+ +| sha256 | 38799 | 65536 | 18764 | 5116 | 1854 | 1524 | 1635 | +| ecdsa_secp256k1 | 41049 | 65536 | 19129 | 5595 | 2255 | 2097 | 2166 | +| ecdsa_secp256r1 | 67331 | 131072 | 38815 | 11257 | 4744 | 3633 | 3702 | +| schnorr | 33740 | 65536 | 18649 | 5244 | 2019 | 1498 | 1702 | +| double_verify_proof | 505513 | 524288 | 149652 | 45702 | 20811 | 16979 | 15679 | ++--------------------------+------------+---------------+-----------+-----------+-----------+-----------+-----------+ +``` + ### Dependencies - cmake >= 3.24 diff --git a/barretenberg/acir_tests/bench_acir_tests.sh b/barretenberg/acir_tests/bench_acir_tests.sh new file mode 100755 index 000000000000..b338e82e30b2 --- /dev/null +++ b/barretenberg/acir_tests/bench_acir_tests.sh @@ -0,0 +1,67 @@ +#!/bin/bash + +TEST_NAMES=("$@") +THREADS=(1 4 16 32 64) +BENCHMARKS=$(mktemp) + +if [ "${#TEST_NAMES[@]}" -eq 0 ]; then + TEST_NAMES=(sha256 ecdsa_secp256k1 ecdsa_secp256r1 schnorr double_verify_proof) +fi + +for TEST in ${TEST_NAMES[@]}; do + for HC in ${THREADS[@]}; do + HARDWARE_CONCURRENCY=$HC BENCHMARK_FD=3 ./run_acir_tests.sh $TEST 3>>$BENCHMARKS + done +done + +# Build results into string with \n delimited rows and space delimited values. +TABLE_DATA="" +for TEST in ${TEST_NAMES[@]}; do + GATE_COUNT=$(jq -r --arg test "$TEST" 'select(.name == "gate_count" and .acir_test == $test) | .value' $BENCHMARKS | uniq) + SUBGROUP_SIZE=$(jq -r --arg test "$TEST" 'select(.name == "subgroup_size" and .acir_test == $test) | .value' $BENCHMARKS | uniq) + # Name in col 1, gate count in col 2, subgroup size in col 3. + TABLE_DATA+="$TEST $GATE_COUNT $SUBGROUP_SIZE" + # Each thread timing in subsequent cols. + for HC in "${THREADS[@]}"; do + RESULT=$(cat $BENCHMARKS | jq -r --arg test "$TEST" --argjson hc $HC 'select(.name == "proof_construction_time" and .acir_test == $test and .threads == $hc) | .value') + TABLE_DATA+=" $RESULT" + done + TABLE_DATA+=$'\n' +done + +# Trim the trailing newline. +TABLE_DATA="${TABLE_DATA%$'\n'}" + +echo +echo Table represents time in ms to build circuit and proof for each test on n threads. +echo Ignores proving key construction. +echo +# Use awk to print the table +echo -e "$TABLE_DATA" | awk -v threads="${THREADS[*]}" 'BEGIN { + split(threads, t, " "); + len_threads = length(t); + print "+--------------------------+------------+---------------+" genseparator(len_threads); + print "| Test | Gate Count | Subgroup Size |" genthreadheaders(t, len_threads); + print "+--------------------------+------------+---------------+" genseparator(len_threads); +} +{ + printf("| %-24s | %-10s | %-13s |", $1, $2, $3); + for (i = 4; i <= len_threads+3; i++) { + printf " %9s |", $(i); + } + print ""; +} +END { + print "+--------------------------+------------+---------------+" genseparator(len_threads); +} +function genseparator(len, res) { + for (i = 1; i <= len; i++) res = res "-----------+"; + return res; +} +function genthreadheaders(t, len, res) { + for (i = 1; i <= len; i++) res = res sprintf(" %9s |", t[i]); + return res; +} +' + +rm $BENCHMARKS \ No newline at end of file diff --git a/barretenberg/acir_tests/flows/prove_and_verify.sh b/barretenberg/acir_tests/flows/prove_and_verify.sh index 216c3742a65a..ac78ecc53d7d 100755 --- a/barretenberg/acir_tests/flows/prove_and_verify.sh +++ b/barretenberg/acir_tests/flows/prove_and_verify.sh @@ -4,5 +4,5 @@ set -eu if [ -n "$VERBOSE" ]; then $BIN prove_and_verify -v -c $CRS_PATH -b ./target/acir.gz else - $BIN prove_and_verify -c $CRS_PATH -b ./target/acir.gz > /dev/null 2>&1 + $BIN prove_and_verify -c $CRS_PATH -b ./target/acir.gz fi \ No newline at end of file diff --git a/barretenberg/acir_tests/run_acir_tests.sh b/barretenberg/acir_tests/run_acir_tests.sh index d7f5a73aa7d8..16395b06f7e4 100755 --- a/barretenberg/acir_tests/run_acir_tests.sh +++ b/barretenberg/acir_tests/run_acir_tests.sh @@ -9,7 +9,7 @@ FLOW=${FLOW:-prove_and_verify} CRS_PATH=~/.bb-crs BRANCH="master" VERBOSE=${VERBOSE:-} -NAMED_TEST=${1:-} +TEST_NAMES=("$@") FLOW_SCRIPT=$(realpath ./flows/${FLOW}.sh) @@ -47,12 +47,15 @@ function test() { cd $1 set +e + start=$(date +%s%3N) $FLOW_SCRIPT result=$? + end=$(date +%s%3N) + duration=$((end - start)) set -eu if [ $result -eq 0 ]; then - echo -e "\033[32mPASSED\033[0m" + echo -e "\033[32mPASSED\033[0m ($duration ms)" else echo -e "\033[31mFAILED\033[0m" exit 1 @@ -61,9 +64,11 @@ function test() { cd .. } -if [ -n "$NAMED_TEST" ]; then - echo -n "Testing $NAMED_TEST... " - test $NAMED_TEST +if [ "${#TEST_NAMES[@]}" -ne 0 ]; then + for NAMED_TEST in "${TEST_NAMES[@]}"; do + echo -n "Testing $NAMED_TEST... " + test $NAMED_TEST + done else for TEST_NAME in $(find -maxdepth 1 -type d -not -path '.' | sed 's|^\./||'); do echo -n "Testing $TEST_NAME... " diff --git a/barretenberg/cpp/dockerfiles/Dockerfile.wasm-linux-clang b/barretenberg/cpp/dockerfiles/Dockerfile.wasm-linux-clang index 8be67fce55d4..bdcd6d3edc77 100644 --- a/barretenberg/cpp/dockerfiles/Dockerfile.wasm-linux-clang +++ b/barretenberg/cpp/dockerfiles/Dockerfile.wasm-linux-clang @@ -4,11 +4,8 @@ WORKDIR /usr/src/barretenberg/cpp COPY ./scripts/install-wasi-sdk.sh ./scripts/install-wasi-sdk.sh RUN ./scripts/install-wasi-sdk.sh COPY . . -# Building both wasm's in parallel reduces build from 120s to 80s. -RUN (cmake --preset wasm && cmake --build --preset wasm) & \ - (cmake --preset wasm-threads && cmake --build --preset wasm-threads) & \ - wait - +RUN cmake --preset wasm && cmake --build --preset wasm +RUN cmake --preset wasm-threads && cmake --build --preset wasm-threads FROM scratch WORKDIR /usr/src/barretenberg/cpp diff --git a/barretenberg/cpp/src/barretenberg/bb/main.cpp b/barretenberg/cpp/src/barretenberg/bb/main.cpp index 169dc6a10f49..57289874c849 100644 --- a/barretenberg/cpp/src/barretenberg/bb/main.cpp +++ b/barretenberg/cpp/src/barretenberg/bb/main.cpp @@ -1,9 +1,13 @@ +#include "barretenberg/dsl/acir_format/acir_format.hpp" +#include "barretenberg/dsl/types.hpp" #include "config.hpp" #include "get_bytecode.hpp" #include "get_crs.hpp" #include "get_witness.hpp" #include "log.hpp" +#include #include +#include #include #include #include @@ -16,6 +20,9 @@ using namespace barretenberg; std::string CRS_PATH = "./crs"; bool verbose = false; +const std::filesystem::path current_path = std::filesystem::current_path(); +const auto current_dir = current_path.filename().string(); + acir_proofs::AcirComposer init(acir_format::acir_format& constraint_system) { acir_proofs::AcirComposer acir_composer(0, verbose); @@ -69,7 +76,20 @@ bool proveAndVerify(const std::string& bytecodePath, const std::string& witnessP auto witness = get_witness(witnessPath); auto acir_composer = init(constraint_system); + Timer pk_timer; + acir_composer.init_proving_key(constraint_system); + write_benchmark("pk_construction_time", pk_timer.milliseconds(), "acir_test", current_dir); + write_benchmark("gate_count", acir_composer.get_total_circuit_size(), "acir_test", current_dir); + write_benchmark("subgroup_size", acir_composer.get_circuit_subgroup_size(), "acir_test", current_dir); + + Timer proof_timer; auto proof = acir_composer.create_proof(constraint_system, witness, recursive); + write_benchmark("proof_construction_time", proof_timer.milliseconds(), "acir_test", current_dir); + + Timer vk_timer; + acir_composer.init_verification_key(); + write_benchmark("vk_construction_time", vk_timer.milliseconds(), "acir_test", current_dir); + auto verified = acir_composer.verify_proof(proof, recursive); vinfo("verified: ", verified); diff --git a/barretenberg/cpp/src/barretenberg/common/benchmark.hpp b/barretenberg/cpp/src/barretenberg/common/benchmark.hpp new file mode 100644 index 000000000000..5d3034bbe6fd --- /dev/null +++ b/barretenberg/cpp/src/barretenberg/common/benchmark.hpp @@ -0,0 +1,105 @@ +#include "barretenberg/common/throw_or_abort.hpp" +#include "barretenberg/env/hardware_concurrency.hpp" +#include +#include +#include +#include +#include +#include +#include + +namespace { +/** + * If user provides the env var BENCHMARK_FD write benchmarks to this fd, otherwise default to -1 (disable). + * e.g: + * BENCHMARK_FD=3 bb 3> benchmarks.jsonl + */ +auto bfd = []() { + try { + static auto bfd_str = std::getenv("BENCHMARK_FD"); + int bfd = bfd_str ? (int)std::stoul(bfd_str) : -1; + if (bfd >= 0 && (fcntl(bfd, F_GETFD) == -1 || errno == EBADF)) { + throw_or_abort("fd is not open. Did you redirect in your shell?"); + } + return bfd; + } catch (std::exception const& e) { + std::string inner_msg = e.what(); + throw_or_abort("Invalid BENCHMARK_FD: " + inner_msg); + } +}(); +} // namespace + +template struct TypeTraits; + +template struct TypeTraits::value>::type> { + static const char* type; +}; + +template +const char* TypeTraits::value>::type>::type = "number"; + +template <> struct TypeTraits { + static const char* type; +}; + +const char* TypeTraits::type = "string"; + +template <> struct TypeTraits { + static const char* type; +}; + +const char* TypeTraits::type = "number"; + +template <> struct TypeTraits { + static const char* type; +}; + +const char* TypeTraits::type = "bool"; + +// Helper function to get the current timestamp in the desired format +std::string getCurrentTimestamp() +{ + std::time_t now = std::time(nullptr); + std::tm* now_tm = std::gmtime(&now); + char buf[21] = { 0 }; + strftime(buf, sizeof(buf), "%Y-%m-%dT%H:%M:%SZ", now_tm); + return std::string(buf); +} + +template std::string toString(const T& value) +{ + std::ostringstream oss; + oss << value; + return oss.str(); +} + +void appendToStream(std::ostringstream&) +{ + // base case: do nothing +} + +template +void appendToStream(std::ostringstream& oss, const K& key, const V& value, Args... args) +{ + oss << ", \"" << key << "\": \"" << toString(value) << "\""; + appendToStream(oss, args...); // recursively process the remaining arguments +} + +template void write_benchmark(const std::string& name, const T& value, Args... args) +{ + if (bfd == -1) { + return; + } + std::ostringstream oss; + oss << "{\"timestamp\": \"" << getCurrentTimestamp() << "\", " + << "\"name\": \"" << name << "\", " + << "\"type\": \"" << TypeTraits::type << "\", " + << "\"value\": " << value << ", " + << "\"threads\": " << env_hardware_concurrency(); + + appendToStream(oss, args...); // unpack and append the key-value pairs + + oss << "}" << std::endl; + const std::string& tmp = oss.str(); + write((int)bfd, tmp.c_str(), tmp.size()); +} \ No newline at end of file diff --git a/barretenberg/cpp/src/barretenberg/common/timer.hpp b/barretenberg/cpp/src/barretenberg/common/timer.hpp index b291b3f213e4..b4d760e4393d 100644 --- a/barretenberg/cpp/src/barretenberg/common/timer.hpp +++ b/barretenberg/cpp/src/barretenberg/common/timer.hpp @@ -55,6 +55,15 @@ class Timer { return nanos; } + /** + * @brief Return the number of nanoseconds elapsed since the start of the timer. + */ + [[nodiscard]] int64_t milliseconds() const + { + int64_t nanos = nanoseconds(); + return nanos / 1000000; + } + /** * @brief Return the number of seconds elapsed since the start of the timer. */ diff --git a/barretenberg/cpp/src/barretenberg/env/hardware_concurrency.cpp b/barretenberg/cpp/src/barretenberg/env/hardware_concurrency.cpp index 82df4687674b..3aedfcbd83c9 100644 --- a/barretenberg/cpp/src/barretenberg/env/hardware_concurrency.cpp +++ b/barretenberg/cpp/src/barretenberg/env/hardware_concurrency.cpp @@ -1,10 +1,24 @@ #include "hardware_concurrency.hpp" +#include +#include +#include +#include #include extern "C" { uint32_t env_hardware_concurrency() { - return std::thread::hardware_concurrency(); +#ifndef __wasm__ + try { +#endif + static auto val = std::getenv("HARDWARE_CONCURRENCY"); + static const uint32_t cores = val ? (uint32_t)std::stoul(val) : std::thread::hardware_concurrency(); + return cores; +#ifndef __wasm__ + } catch (std::exception const&) { + throw std::runtime_error("HARDWARE_CONCURRENCY invalid."); + } +#endif } } \ No newline at end of file diff --git a/barretenberg/cpp/src/barretenberg/plonk/composer/ultra_composer.hpp b/barretenberg/cpp/src/barretenberg/plonk/composer/ultra_composer.hpp index 0d365a6c0c4c..5dd90ca3492a 100644 --- a/barretenberg/cpp/src/barretenberg/plonk/composer/ultra_composer.hpp +++ b/barretenberg/cpp/src/barretenberg/plonk/composer/ultra_composer.hpp @@ -26,8 +26,6 @@ class UltraComposer { std::shared_ptr circuit_proving_key; std::shared_ptr circuit_verification_key; - // The crs_factory holds the path to the srs and exposes methods to extract the srs elements - bool computed_witness = false; // This variable controls the amount with which the lookup table and witness values need to be shifted diff --git a/barretenberg/ts/README.md b/barretenberg/ts/README.md index 2993b8c91322..42cd716059af 100644 --- a/barretenberg/ts/README.md +++ b/barretenberg/ts/README.md @@ -13,17 +13,22 @@ Note there are two independent WASM builds, one with threading enabled and one w memory flag is set within the WASM itself. If you're running in a context where you can't have shared memory, we want to fallback to single threaded performance. -Performance for 2^19 (small witness generation phase): +The following output is from `bench_acir_tests.sh` script. -- 16 core (not hyperthreads) x86: ~15s. -- 10 core M1 Mac Pro: ~20s. +Table represents time in ms to build circuit and proof for each test on n threads. +Ignores proving key construction. -Linear scaling was observed up to 32 cores. - -Witness generation phase is not multithreaded, and an interesting 512k circuit can take ~12s. This results in: - -- 16 core (not hyperthreads) x86: ~28s. -- 10 core M1 Mac Pro: ~32s. +``` ++--------------------------+------------+---------------+-----------+-----------+-----------+-----------+-----------+ +| Test | Gate Count | Subgroup Size | 1 | 4 | 16 | 32 | 64 | ++--------------------------+------------+---------------+-----------+-----------+-----------+-----------+-----------+ +| sha256 | 38799 | 65536 | 18764 | 5116 | 1854 | 1524 | 1635 | +| ecdsa_secp256k1 | 41049 | 65536 | 19129 | 5595 | 2255 | 2097 | 2166 | +| ecdsa_secp256r1 | 67331 | 131072 | 38815 | 11257 | 4744 | 3633 | 3702 | +| schnorr | 33740 | 65536 | 18649 | 5244 | 2019 | 1498 | 1702 | +| double_verify_proof | 505513 | 524288 | 149652 | 45702 | 20811 | 16979 | 15679 | ++--------------------------+------------+---------------+-----------+-----------+-----------+-----------+-----------+ +``` ## Using as a standalone binary diff --git a/barretenberg/ts/src/benchmark/index.ts b/barretenberg/ts/src/benchmark/index.ts new file mode 100644 index 000000000000..20752349100c --- /dev/null +++ b/barretenberg/ts/src/benchmark/index.ts @@ -0,0 +1,26 @@ +import * as fs from 'fs'; +export * from './timer.js'; + +const bfd = (() => { + const bfdStr = process.env.BENCHMARK_FD; + const bfd = bfdStr ? parseInt(bfdStr) : -1; + if (bfd >= 0 && !fs.fstatSync(bfd)) { + throw new Error('fd is not open. Did you redirect in your shell?'); + } + return bfd; +})(); + +export function writeBenchmark(name: string, value: T, labels: Record = {}) { + if (bfd === -1) { + return; + } + const data = { + timestamp: new Date().toISOString(), + name, + type: typeof value, + value, + ...labels, + }; + const jsonl = JSON.stringify(data) + '\n'; + fs.writeSync(bfd, jsonl); +} diff --git a/barretenberg/ts/src/benchmark/timer.ts b/barretenberg/ts/src/benchmark/timer.ts new file mode 100644 index 000000000000..b74be9819ea6 --- /dev/null +++ b/barretenberg/ts/src/benchmark/timer.ts @@ -0,0 +1,41 @@ +/** + * Timer class to measure time intervals in milliseconds and seconds. + * Upon instantiation, it stores the current timestamp as the starting point. + * The 'ms()' method returns the elapsed time in milliseconds, + * while the 's()' method returns the elapsed time in seconds. + * + * @example + * const timer = new Timer(); + * setTimeout(() =\> \{ + * console.log(`Elapsed time: ${timer.ms()} ms`); + * \}, 1000); + */ +export class Timer { + private start: number; + + constructor() { + this.start = new Date().getTime(); + } + + /** + * Returns the elapsed time in milliseconds since the Timer instance was created. + * Provides a simple and convenient way to measure the time duration between two events + * or monitor performance of specific code sections. + * + * @returns The elapsed time in milliseconds. + */ + public ms() { + return new Date().getTime() - this.start; + } + + /** + * Returns the time elapsed since the Timer instance was created, in seconds. + * The value is calculated by subtracting the initial start time from the current time + * and dividing the result by 1000 to convert milliseconds to seconds. + * + * @returns The elapsed time in seconds. + */ + public s() { + return (new Date().getTime() - this.start) / 1000; + } +} diff --git a/barretenberg/ts/src/main.ts b/barretenberg/ts/src/main.ts index e362fddee820..a985ea59e796 100755 --- a/barretenberg/ts/src/main.ts +++ b/barretenberg/ts/src/main.ts @@ -5,6 +5,8 @@ import { readFileSync, writeFileSync } from 'fs'; import { gunzipSync } from 'zlib'; import { Command } from 'commander'; import { acvmInfoJson } from './info.js'; +import { Timer, writeBenchmark } from './benchmark/index.js'; +import path from 'path'; createDebug.log = console.error.bind(console); const debug = createDebug('bb.js'); @@ -15,6 +17,7 @@ const debug = createDebug('bb.js'); // aware of this discrepancy, when creating proofs in bb versus // creating the same proofs in the node CLI. const MAX_CIRCUIT_SIZE = 2 ** 19; +const threads = +process.env.HARDWARE_CONCURRENCY! || undefined; function getBytecode(bytecodePath: string) { const encodedCircuit = readFileSync(bytecodePath); @@ -41,7 +44,7 @@ async function computeCircuitSize(bytecodePath: string, api: Barretenberg) { } async function init(bytecodePath: string, crsPath: string) { - const api = await Barretenberg.new(); + const api = await Barretenberg.new(threads); const circuitSize = await getGates(bytecodePath, api); const subgroupSize = Math.pow(2, Math.ceil(Math.log2(circuitSize))); @@ -63,7 +66,7 @@ async function init(bytecodePath: string, crsPath: string) { await api.srsInitSrs(new RawBuffer(crs.getG1Data()), crs.numPoints, new RawBuffer(crs.getG2Data())); const acirComposer = await api.acirNewAcirComposer(subgroupSize); - return { api, acirComposer, circuitSize: subgroupSize }; + return { api, acirComposer, circuitSize, subgroupSize }; } async function initLite() { @@ -80,12 +83,24 @@ async function initLite() { } export async function proveAndVerify(bytecodePath: string, witnessPath: string, crsPath: string, isRecursive: boolean) { - const { api, acirComposer } = await init(bytecodePath, crsPath); + /* eslint-disable camelcase */ + const acir_test = path.basename(process.cwd()); + + const { api, acirComposer, circuitSize, subgroupSize } = await init(bytecodePath, crsPath); try { debug(`creating proof...`); const bytecode = getBytecode(bytecodePath); const witness = getWitness(witnessPath); + + const pkTimer = new Timer(); + await api.acirInitProvingKey(acirComposer, bytecode); + writeBenchmark('pk_construction_time', pkTimer.ms(), { acir_test, threads }); + writeBenchmark('gate_count', circuitSize, { acir_test, threads }); + writeBenchmark('subgroup_size', subgroupSize, { acir_test, threads }); + + const proofTimer = new Timer(); const proof = await api.acirCreateProof(acirComposer, bytecode, witness, isRecursive); + writeBenchmark('proof_construction_time', proofTimer.ms(), { acir_test, threads }); debug(`verifying...`); const verified = await api.acirVerifyProof(acirComposer, proof, isRecursive); @@ -94,6 +109,7 @@ export async function proveAndVerify(bytecodePath: string, witnessPath: string, } finally { await api.destroy(); } + /* eslint-enable camelcase */ } export async function prove(