diff --git a/barretenberg/cpp/CMakePresets.json b/barretenberg/cpp/CMakePresets.json index c54e0b3419ee..7927b0ba494a 100644 --- a/barretenberg/cpp/CMakePresets.json +++ b/barretenberg/cpp/CMakePresets.json @@ -124,6 +124,18 @@ "SMT": "ON" } }, + { + "name": "tsan", + "displayName": "Debugging build with thread sanitizer on Clang-16", + "description": "Build with thread sanitizer on clang16 with debugging information", + "inherits": "clang16-dbg", + "binaryDir": "build-tsan", + "environment": { + "CFLAGS": "-fsanitize=thread", + "CXXFLAGS": "-fsanitize=thread", + "LDFLAGS": "-fsanitize=thread" + } + }, { "name": "coverage", "displayName": "Build with coverage", @@ -190,9 +202,9 @@ "generator": "Unix Makefiles", "inherits": "clang16", "environment": { - "CFLAGS": "-fxray-instrument -fxray-instruction-threshold=100", - "CXXFLAGS": "-fxray-instrument -fxray-instruction-threshold=100", - "LDFLAGS": "-fxray-instrument -fxray-instruction-threshold=100" + "CFLAGS": "-fxray-instrument", + "CXXFLAGS": "-fxray-instrument -fxray-instruction-threshold=500 -DXRAY=1", + "LDFLAGS": "-fxray-instrument -fxray-instruction-threshold=500 -DXRAY=1" }, "binaryDir": "build-xray" }, @@ -202,9 +214,9 @@ "description": "Build with Clang and enable detailed LLVM XRay for profiling", "inherits": "xray", "environment": { - "CFLAGS": "-fxray-instrument -fxray-instruction-threshold=100 -finline-max-stacksize=150", - "CXXFLAGS": "-fxray-instrument -fxray-instruction-threshold=100 -finline-max-stacksize=150", - "LDFLAGS": "-fxray-instrument -fxray-instruction-threshold=100 -finline-max-stacksize=150" + "CFLAGS": "-fxray-instrument -fxray-instruction-threshold=100 -finline-max-stacksize=150 -DXRAY=1", + "CXXFLAGS": "-fxray-instrument -fxray-instruction-threshold=100 -finline-max-stacksize=150 -DXRAY=1", + "LDFLAGS": "-fxray-instrument -fxray-instruction-threshold=100 -finline-max-stacksize=150 -DXRAY=1" }, "binaryDir": "build-xray-verbose" }, @@ -276,6 +288,11 @@ "inherits": "clang16", "configurePreset": "smt-verification" }, + { + "name": "tsan", + "inherits": "default", + "configurePreset": "tsan" + }, { "name": "coverage", "inherits": "default", diff --git a/barretenberg/cpp/scripts/collect_profile_information.sh b/barretenberg/cpp/scripts/collect_profile_information.sh index 0b7d79ef8edd..28ca73fe4d2a 100755 --- a/barretenberg/cpp/scripts/collect_profile_information.sh +++ b/barretenberg/cpp/scripts/collect_profile_information.sh @@ -1,8 +1,9 @@ #!/bin/bash set -eu -PRESET=${1:-xray-1thread} # can also be 'xray' +PRESET=${1:-xray} # can also be 'xray-1thread' ONLY_PROCESS=${2:-} +EXECUTABLE=${3:-ultra_honk_rounds_bench} # Move above script dir. cd $(dirname $0)/.. @@ -15,10 +16,10 @@ cd build-$PRESET if [ -z "$ONLY_PROCESS" ]; then # Clear old profile data. - rm -f xray-log.honk_bench_main_simple.* + rm -f xray-log.$EXECUTABLE.* # Run benchmark with profiling. - XRAY_OPTIONS="patch_premain=true xray_mode=xray-basic verbosity=1" ./bin/honk_bench_main_simple + XRAY_OPTIONS="patch_premain=true xray_mode=xray-basic verbosity=1" ./bin/$EXECUTABLE fi function shorten_cpp_names() { @@ -37,8 +38,8 @@ function shorten_cpp_names() { } # Process benchmark file. -llvm-xray-16 stack xray-log.honk_bench_main_simple.* \ - --instr_map=./bin/honk_bench_main_simple --stack-format=flame --aggregate-threads --aggregation-type=time --all-stacks \ +llvm-xray-16 stack xray-log.$EXECUTABLE.* \ + --instr_map=./bin/$EXECUTABLE --stack-format=flame --aggregate-threads --aggregation-type=time --all-stacks \ | node ../scripts/llvm_xray_stack_flame_corrector.js \ | shorten_cpp_names \ | ../scripts/flamegraph.pl --width 1200 --fontsize 10 \ diff --git a/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/CMakeLists.txt b/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/CMakeLists.txt index fad8d9f141df..ed6122bb41e5 100644 --- a/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/CMakeLists.txt +++ b/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/CMakeLists.txt @@ -2,6 +2,7 @@ set(BENCHMARK_SOURCES standard_plonk.bench.cpp ultra_honk.bench.cpp + ultra_honk_rounds.bench.cpp ultra_plonk.bench.cpp ) @@ -19,17 +20,4 @@ foreach(BENCHMARK_SOURCE ${BENCHMARK_SOURCES}) add_executable(${BENCHMARK_NAME}_bench main.bench.cpp ${BENCHMARK_SOURCE} benchmark_utilities.hpp) target_link_libraries(${BENCHMARK_NAME}_bench ${LINKED_LIBRARIES}) add_custom_target(run_${BENCHMARK_NAME} COMMAND ${BENCHMARK_NAME} WORKING_DIRECTORY ${CMAKE_BINARY_DIR}) -endforeach() - -add_executable( - honk_bench_main_simple - main.simple.cpp -) - -target_link_libraries( - honk_bench_main_simple - PRIVATE - stdlib_sha256 - stdlib_keccak - stdlib_merkle_tree -) +endforeach() \ No newline at end of file diff --git a/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/benchmark_utilities.hpp b/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/benchmark_utilities.hpp index 242aa86764d5..5053d89bd541 100644 --- a/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/benchmark_utilities.hpp +++ b/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/benchmark_utilities.hpp @@ -1,7 +1,10 @@ #pragma once #include +#include #include "barretenberg/honk/composer/ultra_composer.hpp" +#include "barretenberg/honk/proof_system/ultra_prover.hpp" +#include "barretenberg/plonk/composer/ultra_composer.hpp" #include "barretenberg/proof_system/types/circuit_type.hpp" #include "barretenberg/stdlib/encryption/ecdsa/ecdsa.hpp" #include "barretenberg/stdlib/hash/keccak/keccak.hpp" @@ -203,6 +206,26 @@ void construct_proof_with_specified_num_gates(State& state, } } +inline proof_system::honk::UltraProver get_prover( + proof_system::honk::UltraComposer& composer, + void (*test_circuit_function)(proof_system::honk::UltraComposer::CircuitBuilder&, size_t), + size_t num_iterations) +{ + proof_system::honk::UltraComposer::CircuitBuilder builder; + test_circuit_function(builder, num_iterations); + std::shared_ptr instance = composer.create_instance(builder); + return composer.create_prover(instance); +} + +inline proof_system::plonk::UltraProver get_prover( + proof_system::plonk::UltraComposer& composer, + void (*test_circuit_function)(proof_system::honk::UltraComposer::CircuitBuilder&, size_t), + size_t num_iterations) +{ + proof_system::plonk::UltraComposer::CircuitBuilder builder; + test_circuit_function(builder, num_iterations); + return composer.create_prover(builder); +} /** * @brief Performs proof constuction for benchmarks based on a provided circuit function * @@ -219,29 +242,18 @@ void construct_proof_with_specified_num_iterations(State& state, size_t)) noexcept { barretenberg::srs::init_crs_factory("../srs_db/ignition"); + + Composer composer; + auto num_iterations = static_cast(state.range(0)); for (auto _ : state) { // Constuct circuit and prover; don't include this part in measurement state.PauseTiming(); - auto builder = typename Composer::CircuitBuilder(); - test_circuit_function(builder, num_iterations); - - auto composer = Composer(); - if constexpr (proof_system::IsAnyOf) { - auto instance = composer.create_instance(builder); - auto ext_prover = composer.create_prover(instance); - state.ResumeTiming(); - - // Construct proof - auto proof = ext_prover.construct_proof(); - - } else { - auto ext_prover = composer.create_prover(builder); - state.ResumeTiming(); + auto prover = get_prover(composer, test_circuit_function, num_iterations); + state.ResumeTiming(); - // Construct proof - auto proof = ext_prover.construct_proof(); - } + // Construct proof + auto proof = prover.construct_proof(); } } diff --git a/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/main.simple.cpp b/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/main.simple.cpp deleted file mode 100644 index 84a2f3c8c88c..000000000000 --- a/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/main.simple.cpp +++ /dev/null @@ -1,66 +0,0 @@ -/* Entry point for profiling with e.g. LLVM xray. - * This provides a simple entrypoint to bypass artifacts with - * TODO(AD): Consider if we can directly profile the bench executables. - */ -#include -#include -#include - -#include "barretenberg/honk/composer/ultra_composer.hpp" -#include "barretenberg/proof_system/circuit_builder/ultra_circuit_builder.hpp" -#include "barretenberg/proof_system/types/circuit_type.hpp" -#include "barretenberg/stdlib/encryption/ecdsa/ecdsa.hpp" -#include "barretenberg/stdlib/hash/keccak/keccak.hpp" -#include "barretenberg/stdlib/hash/sha256/sha256.hpp" -#include "barretenberg/stdlib/merkle_tree/membership.hpp" -#include "barretenberg/stdlib/merkle_tree/memory_store.hpp" -#include "barretenberg/stdlib/merkle_tree/memory_tree.hpp" -#include "barretenberg/stdlib/merkle_tree/merkle_tree.hpp" -#include "barretenberg/stdlib/primitives/bool/bool.hpp" -#include "barretenberg/stdlib/primitives/curves/secp256k1.hpp" -#include "barretenberg/stdlib/primitives/field/field.hpp" -#include "barretenberg/stdlib/primitives/packed_byte_array/packed_byte_array.hpp" -#include "barretenberg/stdlib/primitives/witness/witness.hpp" - -using namespace proof_system; - -template void generate_sha256_test_circuit(Builder& builder, size_t num_iterations) -{ - std::string in; - in.resize(32); - plonk::stdlib::packed_byte_array input(&builder, in); - for (size_t i = 0; i < num_iterations; i++) { - input = plonk::stdlib::sha256(input); - } -} - -BBERG_INSTRUMENT BBERG_NOINLINE void sumcheck_profiling(honk::UltraProver& ext_prover) -{ - ext_prover.construct_proof(); - for (size_t i = 0; i < 200; i++) { - // Bench sumcheck - ext_prover.execute_relation_check_rounds(); - } -} - -/** - * @brief Benchmark: Construction of a Ultra Honk proof for a circuit determined by the provided circuit function - */ -void construct_proof_ultra() noexcept -{ - barretenberg::srs::init_crs_factory("../srs_db/ignition"); - // Constuct circuit and prover; don't include this part in measurement - honk::UltraComposer::CircuitBuilder builder; - generate_sha256_test_circuit(builder, 1); - std::cout << "gates: " << builder.get_total_circuit_size() << std::endl; - - honk::UltraComposer composer; - std::shared_ptr instance = composer.create_instance(builder); - honk::UltraProver ext_prover = composer.create_prover(instance); - sumcheck_profiling(ext_prover); -} - -int main() -{ - construct_proof_ultra(); -} diff --git a/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/ultra_honk_rounds.bench.cpp b/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/ultra_honk_rounds.bench.cpp new file mode 100644 index 000000000000..8b4e7145596b --- /dev/null +++ b/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/ultra_honk_rounds.bench.cpp @@ -0,0 +1,68 @@ +#include + +#include "barretenberg/benchmark/honk_bench/benchmark_utilities.hpp" +#include "barretenberg/honk/composer/ultra_composer.hpp" +#include "barretenberg/honk/proof_system/ultra_prover.hpp" +#include "barretenberg/proof_system/circuit_builder/ultra_circuit_builder.hpp" + +using namespace benchmark; +using namespace proof_system; + +// The rounds to measure +enum { PREAMBLE, WIRE_COMMITMENTS, SORTED_LIST_ACCUMULATOR, GRAND_PRODUCT_COMPUTATION, RELATION_CHECK, ZEROMORPH }; + +/** + * @details Benchmark ultrahonk by performing all the rounds, but only measuring one. + * Note: As a result the very short rounds take a long time for statistical significance, so recommended to set their + * iterations to 1. + * @param state - The google benchmark state. + * @param prover - The ultrahonk prover. + * @param index - The pass to measure. + **/ +BBERG_PROFILE static void test_round_inner(State& state, honk::UltraProver& prover, size_t index) noexcept +{ + auto time_if_index = [&](size_t target_index, auto&& func) -> void { + if (index == target_index) { + state.ResumeTiming(); + func(); + state.PauseTiming(); + } else { + func(); + } + }; + for (auto _ : state) { + state.PauseTiming(); + time_if_index(PREAMBLE, [&] { prover.execute_preamble_round(); }); + time_if_index(WIRE_COMMITMENTS, [&] { prover.execute_wire_commitments_round(); }); + time_if_index(SORTED_LIST_ACCUMULATOR, [&] { prover.execute_sorted_list_accumulator_round(); }); + time_if_index(GRAND_PRODUCT_COMPUTATION, [&] { prover.execute_grand_product_computation_round(); }); + time_if_index(RELATION_CHECK, [&] { prover.execute_relation_check_rounds(); }); + time_if_index(ZEROMORPH, [&] { prover.execute_zeromorph_rounds(); }); + state.ResumeTiming(); + } +} +BBERG_PROFILE static void test_round(State& state, size_t index) noexcept +{ + barretenberg::srs::init_crs_factory("../srs_db/ignition"); + + honk::UltraComposer composer; + // TODO(AD) benchmark both sparse and dense circuits? + honk::UltraProver prover = + bench_utils::get_prover(composer, &bench_utils::generate_keccak_test_circuit, 1); + test_round_inner(state, prover, index); +} +#define ROUND_BENCHMARK(round) \ + static void ROUND_##round(State& state) noexcept \ + { \ + test_round(state, round); \ + } \ + BENCHMARK(ROUND_##round)->Unit(::benchmark::kMillisecond) + +// Fast rounds take a long time to benchmark because of how we compute statistical significance. +// Limit to one iteration so we don't spend a lot of time redoing full proofs just to measure this part. +ROUND_BENCHMARK(PREAMBLE)->Iterations(1); +ROUND_BENCHMARK(WIRE_COMMITMENTS)->Iterations(1); +ROUND_BENCHMARK(SORTED_LIST_ACCUMULATOR)->Iterations(1); +ROUND_BENCHMARK(GRAND_PRODUCT_COMPUTATION)->Iterations(1); +ROUND_BENCHMARK(RELATION_CHECK); +ROUND_BENCHMARK(ZEROMORPH); \ No newline at end of file diff --git a/barretenberg/cpp/src/barretenberg/common/compiler_hints.hpp b/barretenberg/cpp/src/barretenberg/common/compiler_hints.hpp index c3bdf3cf6b94..1815816a3c41 100644 --- a/barretenberg/cpp/src/barretenberg/common/compiler_hints.hpp +++ b/barretenberg/cpp/src/barretenberg/common/compiler_hints.hpp @@ -6,13 +6,11 @@ #define BBERG_INLINE __attribute__((always_inline)) inline #endif -// TODO(AD): Other compilers -#if defined(__clang__) -#define BBERG_INSTRUMENT [[clang::xray_always_instrument]] -#define BBERG_NO_INSTRUMENT [[clang::xray_never_instrument]] -#define BBERG_NOINLINE [[clang::noinline]] +// TODO(AD): Other instrumentation? +#ifdef XRAY +#define BBERG_PROFILE [[clang::xray_always_instrument]] [[clang::noinline]] +#define BBERG_NO_PROFILE [[clang::xray_never_instrument]] #else -#define BBERG_INSTRUMENT -#define BBERG_NO_INSTRUMENT -#define BBERG_NOINLINE +#define BBERG_PROFILE +#define BBERG_NO_PROFILE #endif \ No newline at end of file diff --git a/barretenberg/cpp/src/barretenberg/common/parallel_for_mutex_pool.cpp b/barretenberg/cpp/src/barretenberg/common/parallel_for_mutex_pool.cpp index 47e03b5ea85d..d3a1afac5099 100644 --- a/barretenberg/cpp/src/barretenberg/common/parallel_for_mutex_pool.cpp +++ b/barretenberg/cpp/src/barretenberg/common/parallel_for_mutex_pool.cpp @@ -52,7 +52,7 @@ class ThreadPool { std::condition_variable complete_condition_; bool stop = false; - BBERG_NO_INSTRUMENT void worker_loop(size_t thread_index); + BBERG_NO_PROFILE void worker_loop(size_t thread_index); void do_iterations() { diff --git a/barretenberg/cpp/src/barretenberg/common/thread.cpp b/barretenberg/cpp/src/barretenberg/common/thread.cpp index 04fdbe184547..04a71f5746b6 100644 --- a/barretenberg/cpp/src/barretenberg/common/thread.cpp +++ b/barretenberg/cpp/src/barretenberg/common/thread.cpp @@ -85,4 +85,4 @@ void parallel_for(size_t num_iterations, const std::function& func // parallel_for_queued(num_iterations, func); #endif #endif -} +} \ No newline at end of file diff --git a/barretenberg/cpp/src/barretenberg/common/thread.hpp b/barretenberg/cpp/src/barretenberg/common/thread.hpp index c28e85951234..96e3df740929 100644 --- a/barretenberg/cpp/src/barretenberg/common/thread.hpp +++ b/barretenberg/cpp/src/barretenberg/common/thread.hpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include @@ -21,4 +22,4 @@ inline size_t get_num_cpus_pow2() return static_cast(1ULL << numeric::get_msb(get_num_cpus())); } -void parallel_for(size_t num_iterations, const std::function& func); \ No newline at end of file +void parallel_for(size_t num_iterations, const std::function& func); diff --git a/barretenberg/cpp/src/barretenberg/honk/proof_system/ultra_prover.hpp b/barretenberg/cpp/src/barretenberg/honk/proof_system/ultra_prover.hpp index 4be07e693a87..a592fde47555 100644 --- a/barretenberg/cpp/src/barretenberg/honk/proof_system/ultra_prover.hpp +++ b/barretenberg/cpp/src/barretenberg/honk/proof_system/ultra_prover.hpp @@ -23,12 +23,12 @@ template class UltraProver_ { public: explicit UltraProver_(std::shared_ptr); - void execute_preamble_round(); - void execute_wire_commitments_round(); - void execute_sorted_list_accumulator_round(); - void execute_grand_product_computation_round(); - void execute_relation_check_rounds(); - void execute_zeromorph_rounds(); + BBERG_PROFILE void execute_preamble_round(); + BBERG_PROFILE void execute_wire_commitments_round(); + BBERG_PROFILE void execute_sorted_list_accumulator_round(); + BBERG_PROFILE void execute_grand_product_computation_round(); + BBERG_PROFILE void execute_relation_check_rounds(); + BBERG_PROFILE void execute_zeromorph_rounds(); plonk::proof& export_proof(); plonk::proof& construct_proof(); diff --git a/barretenberg/cpp/src/barretenberg/polynomials/polynomial.cpp b/barretenberg/cpp/src/barretenberg/polynomials/polynomial.cpp index e3f94e9c8720..727575b7d561 100644 --- a/barretenberg/cpp/src/barretenberg/polynomials/polynomial.cpp +++ b/barretenberg/cpp/src/barretenberg/polynomials/polynomial.cpp @@ -18,10 +18,16 @@ namespace barretenberg { /** * Constructors / Destructors **/ + +/** + * @brief Initialize a Polynomial to size 'initial_size', zeroing memory. + * + * @param initial_size The initial size of the polynomial. + */ template -Polynomial::Polynomial(const size_t size_) +Polynomial::Polynomial(size_t initial_size) : coefficients_(nullptr) - , size_(size_) + , size_(initial_size) { if (capacity() > 0) { coefficients_ = allocate_aligned_memory(sizeof(Fr) * capacity()); @@ -29,6 +35,25 @@ Polynomial::Polynomial(const size_t size_) memset(static_cast(coefficients_.get()), 0, sizeof(Fr) * capacity()); } +/** + * @brief Initialize a Polynomial to size 'initial_size'. + * Important: This does NOT zero memory. + * + * @param initial_size The initial size of the polynomial. + * @param flag Signals that we do not zero memory. + */ +template +Polynomial::Polynomial(size_t initial_size, DontZeroMemory flag) + : coefficients_(nullptr) + , size_(initial_size) +{ + // Flag is unused, but we don't memset 0 if passed. + (void)flag; + if (capacity() > 0) { + coefficients_ = allocate_aligned_memory(sizeof(Fr) * capacity()); + } +} + template Polynomial::Polynomial(const Polynomial& other) : Polynomial(other, other.size()) @@ -433,29 +458,28 @@ template Polynomial Polynomial::partial_evaluate_mle(std:: size_t n_l = 1 << (n - 1); // Temporary buffer of half the size of the polynomial - pointer tmp_ptr = allocate_aligned_memory(sizeof(Fr) * n_l); - auto tmp = tmp_ptr.get(); - - Fr* prev = coefficients_.get(); + Polynomial intermediate(n_l, DontZeroMemory::FLAG); // Evaluate variable X_{n-1} at u_{m-1} Fr u_l = evaluation_points[m - 1]; - for (size_t i = 0; i < n_l; ++i) { - tmp[i] = prev[i] + u_l * (prev[i + n_l] - prev[i]); + + for (size_t i = 0; i < n_l; i++) { + // Initiate our intermediate results using this polynomial. + intermediate[i] = at(i) + u_l * (at(i + n_l) - at(i)); } // Evaluate m-1 variables X_{n-l-1}, ..., X_{n-2} at m-1 remaining values u_0,...,u_{m-2}) for (size_t l = 1; l < m; ++l) { n_l = 1 << (n - l - 1); u_l = evaluation_points[m - l - 1]; for (size_t i = 0; i < n_l; ++i) { - tmp[i] = tmp[i] + u_l * (tmp[i + n_l] - tmp[i]); + intermediate[i] += u_l * (intermediate[i + n_l] - intermediate[i]); } } // Construct resulting polynomial g(X_0,…,X_{n-m-1})) = p(X_0,…,X_{n-m-1},u_0,...u_{m-1}) from buffer - auto result = Polynomial(n_l); + Polynomial result(n_l, DontZeroMemory::FLAG); for (size_t idx = 0; idx < n_l; ++idx) { - result[idx] = tmp[idx]; + result[idx] = intermediate[idx]; } return result; diff --git a/barretenberg/cpp/src/barretenberg/polynomials/polynomial.hpp b/barretenberg/cpp/src/barretenberg/polynomials/polynomial.hpp index b0ebaae33f62..63caded798a3 100644 --- a/barretenberg/cpp/src/barretenberg/polynomials/polynomial.hpp +++ b/barretenberg/cpp/src/barretenberg/polynomials/polynomial.hpp @@ -7,6 +7,8 @@ #include namespace barretenberg { +enum class DontZeroMemory { FLAG }; + template class Polynomial { public: /** @@ -21,9 +23,11 @@ template class Polynomial { using const_iterator = Fr const*; using FF = Fr; - Polynomial(const size_t initial_size); + Polynomial(size_t initial_size); + // Constructor that does not initialize values, use with caution to save time. + Polynomial(size_t initial_size, DontZeroMemory flag); Polynomial(const Polynomial& other); - Polynomial(const Polynomial& other, const size_t target_size); + Polynomial(const Polynomial& other, size_t target_size); Polynomial(Polynomial&& other) noexcept;