From 84993aa622070aae7b7bd112789fb8af57131e13 Mon Sep 17 00:00:00 2001 From: ledwards2225 Date: Thu, 22 Jun 2023 19:08:26 +0000 Subject: [PATCH 1/7] basic multithreading working --- .../composer/standard_honk_composer.test.cpp | 3 +- .../honk/sumcheck/sumcheck_round.hpp | 134 ++++++++++++++++-- .../honk/sumcheck/sumcheck_round.test.cpp | 35 +++++ 3 files changed, 156 insertions(+), 16 deletions(-) diff --git a/cpp/src/barretenberg/honk/composer/standard_honk_composer.test.cpp b/cpp/src/barretenberg/honk/composer/standard_honk_composer.test.cpp index 04ada2ef9b..76c632124f 100644 --- a/cpp/src/barretenberg/honk/composer/standard_honk_composer.test.cpp +++ b/cpp/src/barretenberg/honk/composer/standard_honk_composer.test.cpp @@ -391,7 +391,8 @@ TEST_F(StandardHonkComposerTests, SumcheckEvaluations) uint32_t b_idx = circuit_constructor.add_variable(b); uint32_t c_idx = circuit_constructor.add_variable(c); uint32_t d_idx = circuit_constructor.add_variable(d); - for (size_t i = 0; i < 16; i++) { + size_t num_iterations = 1 << 16; + for (size_t i = 0; i < num_iterations; i++) { circuit_constructor.create_add_gate( { a_idx, b_idx, c_idx, fr::one(), fr::one(), fr::neg_one(), fr::zero() }); circuit_constructor.create_add_gate( diff --git a/cpp/src/barretenberg/honk/sumcheck/sumcheck_round.hpp b/cpp/src/barretenberg/honk/sumcheck/sumcheck_round.hpp index a90d422e51..82733a0ed9 100644 --- a/cpp/src/barretenberg/honk/sumcheck/sumcheck_round.hpp +++ b/cpp/src/barretenberg/honk/sumcheck/sumcheck_round.hpp @@ -2,7 +2,9 @@ #include "barretenberg/common/log.hpp" #include #include +#include #include +#include "barretenberg/common/thread.hpp" #include "polynomials/barycentric_data.hpp" #include "polynomials/univariate.hpp" #include "polynomials/pow.hpp" @@ -120,7 +122,7 @@ template class SumcheckRound { * In practice, multivariates is one of ProverPolynomials or FoldedPolynomials. * */ - void extend_edges(auto& multivariates, size_t edge_idx) + void extend_edges(auto& extended_edges, auto& multivariates, size_t edge_idx) { size_t univariate_idx = 0; // TODO(#391) zip for (auto& poly : multivariates) { @@ -140,20 +142,82 @@ template class SumcheckRound { const PowUnivariate& pow_univariate, const FF alpha) { - // For each edge_idx = 2i, we need to multiply the whole contribution by zeta^{2^{2i}} - // This means that each univariate for each relation needs an extra multiplication. - FF pow_challenge = pow_univariate.partial_evaluation_constant; - for (size_t edge_idx = 0; edge_idx < round_size; edge_idx += 2) { - extend_edges(polynomials, edge_idx); - - // Compute the i-th edge's univariate contribution, - // scale it by the pow polynomial's constant and zeta power "c_l ⋅ ζ_{l+1}ⁱ" - // and add it to the accumulators for Sˡ(Xₗ) - accumulate_relation_univariates<>(relation_parameters, pow_challenge); - // Update the pow polynomial's contribution c_l ⋅ ζ_{l+1}ⁱ for the next edge. - pow_challenge *= pow_univariate.zeta_pow_sqr; + // Precompute the vector of required powers of zeta + std::vector pow_challenges; + pow_challenges.resize(round_size >> 1); + pow_challenges[0] = pow_univariate.partial_evaluation_constant; + for (size_t i = 1; i < (round_size >> 1); ++i) { + pow_challenges[i] = pow_challenges[i - 1] * pow_univariate.zeta_pow_sqr; } + // TODO(luke): should we always use multiple threads but simply reduce the number of threads to ensure + // sufficient iterations per thread? + size_t num_threads = get_num_cpus(); + size_t iterations_per_thread = round_size / num_threads; + size_t min_iterations_per_thread = 1 << 6; + + // use multithreading only if operations per thread meets minimum threshold + bool use_multithreading = (iterations_per_thread >= min_iterations_per_thread); + + if (use_multithreading) { + info("round size = ", round_size); + + std::vector partial_univariate_accumulators; + partial_univariate_accumulators.resize(num_threads); + for (auto& accum : partial_univariate_accumulators) { + zero_univariates(accum); + } + + std::vector> partial_extended_edges; + partial_extended_edges.resize(num_threads); + + omp_set_num_threads(int(num_threads)); + + // std::array idxs = { 3, 0, 2, 1 }; + // for (size_t thread_idx = 0; thread_idx < num_threads; ++thread_idx) + parallel_for(num_threads, [&](size_t thread_idx) { + // info("omp_get_num_threads() = ", omp_get_num_threads()); + size_t start = thread_idx * iterations_per_thread; + size_t end = (thread_idx + 1) * iterations_per_thread; + + // For each edge_idx = 2i, we need to multiply the whole contribution by zeta^{2^{2i}} + // This means that each univariate for each relation needs an extra multiplication. + for (size_t edge_idx = start; edge_idx < end; edge_idx += 2) { + extend_edges(partial_extended_edges[thread_idx], polynomials, edge_idx); + + // Update the pow polynomial's contribution c_l ⋅ ζ_{l+1}ⁱ for the next edge. + FF pow_challenge = pow_challenges[edge_idx >> 1]; + + // Compute the i-th edge's univariate contribution, + // scale it by the pow polynomial's constant and zeta power "c_l ⋅ ζ_{l+1}ⁱ" + // and add it to the accumulators for Sˡ(Xₗ) + accumulate_relation_univariates<>(partial_univariate_accumulators[thread_idx], + partial_extended_edges[thread_idx], + relation_parameters, + pow_challenge); + } + }); // parallel_for + + // Accumulate the partial univariate accumulators from each thread into the full accumulators + for (auto& partial_accum : partial_univariate_accumulators) { + add_nested_tuples(univariate_accumulators, partial_accum); + } + } else { + // For each edge_idx = 2i, we need to multiply the whole contribution by zeta^{2^{2i}} + // This means that each univariate for each relation needs an extra multiplication. + for (size_t edge_idx = 0; edge_idx < round_size; edge_idx += 2) { + extend_edges(extended_edges, polynomials, edge_idx); + + // Update the pow polynomial's contribution c_l ⋅ ζ_{l+1}ⁱ for the next edge. + FF pow_challenge = pow_challenges[edge_idx >> 1]; + + // Compute the i-th edge's univariate contribution, + // scale it by the pow polynomial's constant and zeta power "c_l ⋅ ζ_{l+1}ⁱ" + // and add it to the accumulators for Sˡ(Xₗ) + accumulate_relation_univariates<>( + univariate_accumulators, extended_edges, relation_parameters, pow_challenge); + } + } return batch_over_relations(alpha); } @@ -238,14 +302,18 @@ template class SumcheckRound { * appropriate scaling factors, produces S_l. */ template - void accumulate_relation_univariates(const RelationParameters& relation_parameters, const FF& scaling_factor) + void accumulate_relation_univariates(RelationUnivariates& univariate_accumulators, + const auto& extended_edges, + const RelationParameters& relation_parameters, + const FF& scaling_factor) { std::get(relations).add_edge_contribution( std::get(univariate_accumulators), extended_edges, relation_parameters, scaling_factor); // Repeat for the next relation. if constexpr (relation_idx + 1 < NUM_RELATIONS) { - accumulate_relation_univariates(relation_parameters, scaling_factor); + accumulate_relation_univariates( + univariate_accumulators, extended_edges, relation_parameters, scaling_factor); } } @@ -401,5 +469,41 @@ template class SumcheckRound { apply_to_tuple_of_arrays(operation, tuple); } } + + /** + * @brief Componentwise addition of two tuples + * @details Used for adding tuples of Univariates but in general works for any object for which += is + * defined. The result is stored in the first tuple. + * + * @tparam T Type of the elements contained in the tuples + * @param tuple_1 First summand. Result stored in this tuple + * @param tuple_2 Second summand + */ + template + static constexpr void add_tuples(std::tuple& tuple_1, const std::tuple& tuple_2) + { + [&](std::index_sequence) { ((std::get(tuple_1) += std::get(tuple_2)), ...); } + (std::make_index_sequence{}); + } + + /** + * @brief Componentwise addition of nested tuples (tuples of tuples) + * @details Used for summing tuples of tuples of Univariates. Needed for Sumcheck multithreading. Each thread + * accumulates realtion contributions across a portion of the hypecube and then the results are accumulated into a + * single nested tuple. + * + * @tparam Tuple + * @tparam Index Index into outer tuple + * @param tuple_1 First nested tuple summand. Result stored here + * @param tuple_2 Second summand + */ + template + static constexpr void add_nested_tuples(Tuple& tuple_1, const Tuple& tuple_2) + { + if constexpr (Index < std::tuple_size::value) { + add_tuples(std::get(tuple_1), std::get(tuple_2)); + add_nested_tuples(tuple_1, tuple_2); + } + } }; } // namespace proof_system::honk::sumcheck diff --git a/cpp/src/barretenberg/honk/sumcheck/sumcheck_round.test.cpp b/cpp/src/barretenberg/honk/sumcheck/sumcheck_round.test.cpp index c5e7260be4..cdc1db9219 100644 --- a/cpp/src/barretenberg/honk/sumcheck/sumcheck_round.test.cpp +++ b/cpp/src/barretenberg/honk/sumcheck/sumcheck_round.test.cpp @@ -361,4 +361,39 @@ TEST(SumcheckRound, TuplesOfEvaluationArrays) EXPECT_EQ(std::get<1>(tuple_of_arrays)[1], 0); } +/** + * @brief Test utility functions for adding two tuples of tuples of Univariates + * + */ +TEST(SumcheckRound, AddTuplesOfTuplesOfUnivariates) +{ + using Flavor = proof_system::honk::flavor::Standard; + using FF = typename Flavor::FF; + + // Define some arbitrary univariates + Univariate univariate_1({ 1, 2 }); + Univariate univariate_2({ 2, 4 }); + Univariate univariate_3({ 3, 4, 5 }); + + Univariate univariate_4({ 3, 6 }); + Univariate univariate_5({ 8, 1 }); + Univariate univariate_6({ 3, 7, 1 }); + + Univariate expected_sum_1 = univariate_1 + univariate_4; + Univariate expected_sum_2 = univariate_2 + univariate_5; + Univariate expected_sum_3 = univariate_3 + univariate_6; + + // Construct two tuples of tuples + auto tuple_of_tuples_1 = + std::make_tuple(std::make_tuple(univariate_1), std::make_tuple(univariate_2, univariate_3)); + auto tuple_of_tuples_2 = + std::make_tuple(std::make_tuple(univariate_4), std::make_tuple(univariate_5, univariate_6)); + + SumcheckRound::add_nested_tuples(tuple_of_tuples_1, tuple_of_tuples_2); + + EXPECT_EQ(std::get<0>(std::get<0>(tuple_of_tuples_1)), expected_sum_1); + EXPECT_EQ(std::get<0>(std::get<1>(tuple_of_tuples_1)), expected_sum_2); + EXPECT_EQ(std::get<1>(std::get<1>(tuple_of_tuples_1)), expected_sum_3); +} + } // namespace test_sumcheck_round From 5f3c23e11a8f738491e1d6b8b1da5f0336184fc1 Mon Sep 17 00:00:00 2001 From: ledwards2225 Date: Thu, 22 Jun 2023 21:27:42 +0000 Subject: [PATCH 2/7] cleanup --- .../compare_honk_branch_vs_baseline.sh | 1 + .../honk/proof_system/ultra_prover.cpp | 21 +++++ .../honk/sumcheck/sumcheck_round.hpp | 91 +++++++------------ 3 files changed, 57 insertions(+), 56 deletions(-) diff --git a/cpp/src/barretenberg/benchmark/honk_bench/compare_honk_branch_vs_baseline.sh b/cpp/src/barretenberg/benchmark/honk_bench/compare_honk_branch_vs_baseline.sh index 71bf3ee5b9..612514c414 100755 --- a/cpp/src/barretenberg/benchmark/honk_bench/compare_honk_branch_vs_baseline.sh +++ b/cpp/src/barretenberg/benchmark/honk_bench/compare_honk_branch_vs_baseline.sh @@ -35,6 +35,7 @@ bin/$BENCH_TARGET --benchmark_format=json > $BRANCH_RESULTS # Checkout baseline branch, run benchmarks, save results in json format echo -e "\nConfiguring and building $BENCH_TARGET in $BASELINE_BRANCH branch..\n" git checkout master > /dev/null +cd $BASE_DIR rm -rf $BUILD_DIR cmake --preset bench > /dev/null && cmake --build --preset bench --target $BENCH_TARGET cd build-bench diff --git a/cpp/src/barretenberg/honk/proof_system/ultra_prover.cpp b/cpp/src/barretenberg/honk/proof_system/ultra_prover.cpp index ba61525755..2f90b42736 100644 --- a/cpp/src/barretenberg/honk/proof_system/ultra_prover.cpp +++ b/cpp/src/barretenberg/honk/proof_system/ultra_prover.cpp @@ -1,6 +1,7 @@ #include "ultra_prover.hpp" #include #include +#include "barretenberg/common/timer.hpp" #include "barretenberg/honk/proof_system/prover_library.hpp" #include "barretenberg/honk/sumcheck/sumcheck.hpp" #include @@ -292,47 +293,67 @@ template plonk::proof& UltraProver_::export_proof() template plonk::proof& UltraProver_::construct_proof() { + Timer timer; // Add circuit size public input size and public inputs to transcript. execute_preamble_round(); + info("execute_preamble_round: ", timer.seconds(), "s"); + Timer timer2; // Compute first three wire commitments execute_wire_commitments_round(); queue.process_queue(); + info("execute_wire_commitments_round: ", timer2.seconds(), "s"); + Timer timer3; // Compute sorted list accumulator and commitment execute_sorted_list_accumulator_round(); queue.process_queue(); + info("execute_sorted_list_accumulator_round: ", timer3.seconds(), "s"); + Timer timer4; // Fiat-Shamir: beta & gamma // Compute grand product(s) and commitments. execute_grand_product_computation_round(); queue.process_queue(); + info("execute_grand_product_computation_round: ", timer4.seconds(), "s"); + Timer timer5; // Fiat-Shamir: alpha // Run sumcheck subprotocol. execute_relation_check_rounds(); + info("execute_relation_check_rounds: ", timer5.seconds(), "s"); + Timer timer6; // Fiat-Shamir: rho // Compute Fold polynomials and their commitments. execute_univariatization_round(); queue.process_queue(); + info("execute_univariatization_round: ", timer6.seconds(), "s"); + Timer timer7; // Fiat-Shamir: r // Compute Fold evaluations execute_pcs_evaluation_round(); + info("execute_pcs_evaluation_round: ", timer7.seconds(), "s"); + Timer timer8; // Fiat-Shamir: nu // Compute Shplonk batched quotient commitment Q execute_shplonk_batched_quotient_round(); queue.process_queue(); + info("execute_shplonk_batched_quotient_round: ", timer8.seconds(), "s"); + Timer timer9; // Fiat-Shamir: z // Compute partial evaluation Q_z execute_shplonk_partial_evaluation_round(); + info("execute_shplonk_partial_evaluation_round: ", timer9.seconds(), "s"); + Timer timer10; // Fiat-Shamir: z // Compute PCS opening proof (either KZG quotient commitment or IPA opening proof) execute_final_pcs_round(); + info("execute_final_pcs_round: ", timer10.seconds(), "s"); return export_proof(); } diff --git a/cpp/src/barretenberg/honk/sumcheck/sumcheck_round.hpp b/cpp/src/barretenberg/honk/sumcheck/sumcheck_round.hpp index 82733a0ed9..338a314282 100644 --- a/cpp/src/barretenberg/honk/sumcheck/sumcheck_round.hpp +++ b/cpp/src/barretenberg/honk/sumcheck/sumcheck_round.hpp @@ -80,8 +80,6 @@ template class SumcheckRound { RelationUnivariates univariate_accumulators; RelationEvaluations relation_evaluations; - ExtendedEdges extended_edges; - // TODO(#224)(Cody): this should go away BarycentricData barycentric_2_to_max = BarycentricData(); @@ -150,63 +148,36 @@ template class SumcheckRound { pow_challenges[i] = pow_challenges[i - 1] * pow_univariate.zeta_pow_sqr; } - // TODO(luke): should we always use multiple threads but simply reduce the number of threads to ensure - // sufficient iterations per thread? - size_t num_threads = get_num_cpus(); - size_t iterations_per_thread = round_size / num_threads; - size_t min_iterations_per_thread = 1 << 6; - - // use multithreading only if operations per thread meets minimum threshold - bool use_multithreading = (iterations_per_thread >= min_iterations_per_thread); + // Determine number of threads for multithreading. + // Note: Multithreading is "on" for every round but we reduce the number of threads from the max available based + // on a specified minimum number of iterations per thread. This eventually leads to the use of a single thread. + size_t max_num_threads = get_num_cpus(); // number of available threads + size_t min_iterations_per_thread = 1 << 6; // min number of iterations for which we'll spin up a unique thread + size_t desired_num_threads = round_size / min_iterations_per_thread; + size_t num_threads = std::min(desired_num_threads, max_num_threads); // fewer than max if justified + num_threads = num_threads > 0 ? num_threads : 1; // ensure num threads is >= 1 + size_t iterations_per_thread = round_size / num_threads; // actual iterations per thread + + // Constuct univariate accumulator containers; one per thread + std::vector thread_univariate_accumulators; + thread_univariate_accumulators.resize(num_threads); + for (auto& accum : thread_univariate_accumulators) { + zero_univariates(accum); + } - if (use_multithreading) { - info("round size = ", round_size); + // Constuct extended edge containers; one per thread + std::vector> extended_edges; + extended_edges.resize(num_threads); - std::vector partial_univariate_accumulators; - partial_univariate_accumulators.resize(num_threads); - for (auto& accum : partial_univariate_accumulators) { - zero_univariates(accum); - } + // Accumulate the contribution from each sub-relation accross each edge of the hyper-cube + parallel_for(num_threads, [&](size_t thread_idx) { + size_t start = thread_idx * iterations_per_thread; + size_t end = (thread_idx + 1) * iterations_per_thread; - std::vector> partial_extended_edges; - partial_extended_edges.resize(num_threads); - - omp_set_num_threads(int(num_threads)); - - // std::array idxs = { 3, 0, 2, 1 }; - // for (size_t thread_idx = 0; thread_idx < num_threads; ++thread_idx) - parallel_for(num_threads, [&](size_t thread_idx) { - // info("omp_get_num_threads() = ", omp_get_num_threads()); - size_t start = thread_idx * iterations_per_thread; - size_t end = (thread_idx + 1) * iterations_per_thread; - - // For each edge_idx = 2i, we need to multiply the whole contribution by zeta^{2^{2i}} - // This means that each univariate for each relation needs an extra multiplication. - for (size_t edge_idx = start; edge_idx < end; edge_idx += 2) { - extend_edges(partial_extended_edges[thread_idx], polynomials, edge_idx); - - // Update the pow polynomial's contribution c_l ⋅ ζ_{l+1}ⁱ for the next edge. - FF pow_challenge = pow_challenges[edge_idx >> 1]; - - // Compute the i-th edge's univariate contribution, - // scale it by the pow polynomial's constant and zeta power "c_l ⋅ ζ_{l+1}ⁱ" - // and add it to the accumulators for Sˡ(Xₗ) - accumulate_relation_univariates<>(partial_univariate_accumulators[thread_idx], - partial_extended_edges[thread_idx], - relation_parameters, - pow_challenge); - } - }); // parallel_for - - // Accumulate the partial univariate accumulators from each thread into the full accumulators - for (auto& partial_accum : partial_univariate_accumulators) { - add_nested_tuples(univariate_accumulators, partial_accum); - } - } else { // For each edge_idx = 2i, we need to multiply the whole contribution by zeta^{2^{2i}} // This means that each univariate for each relation needs an extra multiplication. - for (size_t edge_idx = 0; edge_idx < round_size; edge_idx += 2) { - extend_edges(extended_edges, polynomials, edge_idx); + for (size_t edge_idx = start; edge_idx < end; edge_idx += 2) { + extend_edges(extended_edges[thread_idx], polynomials, edge_idx); // Update the pow polynomial's contribution c_l ⋅ ζ_{l+1}ⁱ for the next edge. FF pow_challenge = pow_challenges[edge_idx >> 1]; @@ -214,10 +185,18 @@ template class SumcheckRound { // Compute the i-th edge's univariate contribution, // scale it by the pow polynomial's constant and zeta power "c_l ⋅ ζ_{l+1}ⁱ" // and add it to the accumulators for Sˡ(Xₗ) - accumulate_relation_univariates<>( - univariate_accumulators, extended_edges, relation_parameters, pow_challenge); + accumulate_relation_univariates<>(thread_univariate_accumulators[thread_idx], + extended_edges[thread_idx], + relation_parameters, + pow_challenge); } + }); // parallel_for + + // Accumulate the per-thread univariate accumulators into a single accumulator + for (auto& accumulators : thread_univariate_accumulators) { + add_nested_tuples(univariate_accumulators, accumulators); } + // Batch the univariate contributions from each sub-relation to obtain the round univariate return batch_over_relations(alpha); } From 61691e5bd1dc7081278e7a6f306ec5dac8c178dc Mon Sep 17 00:00:00 2001 From: ledwards2225 Date: Thu, 22 Jun 2023 21:45:19 +0000 Subject: [PATCH 3/7] remove omp header --- cpp/src/barretenberg/honk/sumcheck/sumcheck_round.hpp | 1 - 1 file changed, 1 deletion(-) diff --git a/cpp/src/barretenberg/honk/sumcheck/sumcheck_round.hpp b/cpp/src/barretenberg/honk/sumcheck/sumcheck_round.hpp index 338a314282..994f5680aa 100644 --- a/cpp/src/barretenberg/honk/sumcheck/sumcheck_round.hpp +++ b/cpp/src/barretenberg/honk/sumcheck/sumcheck_round.hpp @@ -2,7 +2,6 @@ #include "barretenberg/common/log.hpp" #include #include -#include #include #include "barretenberg/common/thread.hpp" #include "polynomials/barycentric_data.hpp" From 336cabc11533e063abaa56679489ed2348caa765 Mon Sep 17 00:00:00 2001 From: ledwards2225 Date: Thu, 22 Jun 2023 22:53:42 +0000 Subject: [PATCH 4/7] fix bug in sumcheck round test --- cpp/src/barretenberg/honk/sumcheck/sumcheck_round.test.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/barretenberg/honk/sumcheck/sumcheck_round.test.cpp b/cpp/src/barretenberg/honk/sumcheck/sumcheck_round.test.cpp index cdc1db9219..30fbbe543e 100644 --- a/cpp/src/barretenberg/honk/sumcheck/sumcheck_round.test.cpp +++ b/cpp/src/barretenberg/honk/sumcheck/sumcheck_round.test.cpp @@ -36,7 +36,7 @@ static Univariate compute_round_univariate( const RelationParameters& relation_parameters, const FF alpha) { - size_t round_size = 1; + size_t round_size = 2; // Improvement(Cody): This is ugly? Maye supply some/all of this data through "flavor" class? auto round = SumcheckRound(round_size); From 5b76ea49264c4c130d57909b16fa9a5a98608793 Mon Sep 17 00:00:00 2001 From: ledwards2225 Date: Fri, 23 Jun 2023 15:01:35 +0000 Subject: [PATCH 5/7] remove debug code --- .../honk/proof_system/ultra_prover.cpp | 21 ------------------- .../honk/sumcheck/sumcheck_round.hpp | 7 ++++--- 2 files changed, 4 insertions(+), 24 deletions(-) diff --git a/cpp/src/barretenberg/honk/proof_system/ultra_prover.cpp b/cpp/src/barretenberg/honk/proof_system/ultra_prover.cpp index 2f90b42736..ba61525755 100644 --- a/cpp/src/barretenberg/honk/proof_system/ultra_prover.cpp +++ b/cpp/src/barretenberg/honk/proof_system/ultra_prover.cpp @@ -1,7 +1,6 @@ #include "ultra_prover.hpp" #include #include -#include "barretenberg/common/timer.hpp" #include "barretenberg/honk/proof_system/prover_library.hpp" #include "barretenberg/honk/sumcheck/sumcheck.hpp" #include @@ -293,67 +292,47 @@ template plonk::proof& UltraProver_::export_proof() template plonk::proof& UltraProver_::construct_proof() { - Timer timer; // Add circuit size public input size and public inputs to transcript. execute_preamble_round(); - info("execute_preamble_round: ", timer.seconds(), "s"); - Timer timer2; // Compute first three wire commitments execute_wire_commitments_round(); queue.process_queue(); - info("execute_wire_commitments_round: ", timer2.seconds(), "s"); - Timer timer3; // Compute sorted list accumulator and commitment execute_sorted_list_accumulator_round(); queue.process_queue(); - info("execute_sorted_list_accumulator_round: ", timer3.seconds(), "s"); - Timer timer4; // Fiat-Shamir: beta & gamma // Compute grand product(s) and commitments. execute_grand_product_computation_round(); queue.process_queue(); - info("execute_grand_product_computation_round: ", timer4.seconds(), "s"); - Timer timer5; // Fiat-Shamir: alpha // Run sumcheck subprotocol. execute_relation_check_rounds(); - info("execute_relation_check_rounds: ", timer5.seconds(), "s"); - Timer timer6; // Fiat-Shamir: rho // Compute Fold polynomials and their commitments. execute_univariatization_round(); queue.process_queue(); - info("execute_univariatization_round: ", timer6.seconds(), "s"); - Timer timer7; // Fiat-Shamir: r // Compute Fold evaluations execute_pcs_evaluation_round(); - info("execute_pcs_evaluation_round: ", timer7.seconds(), "s"); - Timer timer8; // Fiat-Shamir: nu // Compute Shplonk batched quotient commitment Q execute_shplonk_batched_quotient_round(); queue.process_queue(); - info("execute_shplonk_batched_quotient_round: ", timer8.seconds(), "s"); - Timer timer9; // Fiat-Shamir: z // Compute partial evaluation Q_z execute_shplonk_partial_evaluation_round(); - info("execute_shplonk_partial_evaluation_round: ", timer9.seconds(), "s"); - Timer timer10; // Fiat-Shamir: z // Compute PCS opening proof (either KZG quotient commitment or IPA opening proof) execute_final_pcs_round(); - info("execute_final_pcs_round: ", timer10.seconds(), "s"); return export_proof(); } diff --git a/cpp/src/barretenberg/honk/sumcheck/sumcheck_round.hpp b/cpp/src/barretenberg/honk/sumcheck/sumcheck_round.hpp index 994f5680aa..c8c4b0c7f0 100644 --- a/cpp/src/barretenberg/honk/sumcheck/sumcheck_round.hpp +++ b/cpp/src/barretenberg/honk/sumcheck/sumcheck_round.hpp @@ -150,7 +150,8 @@ template class SumcheckRound { // Determine number of threads for multithreading. // Note: Multithreading is "on" for every round but we reduce the number of threads from the max available based // on a specified minimum number of iterations per thread. This eventually leads to the use of a single thread. - size_t max_num_threads = get_num_cpus(); // number of available threads + // For now we use a power of 2 number of threads simply to ensure the round size is evenly divided. + size_t max_num_threads = get_num_cpus_pow2(); // number of available threads (power of 2) size_t min_iterations_per_thread = 1 << 6; // min number of iterations for which we'll spin up a unique thread size_t desired_num_threads = round_size / min_iterations_per_thread; size_t num_threads = std::min(desired_num_threads, max_num_threads); // fewer than max if justified @@ -189,9 +190,9 @@ template class SumcheckRound { relation_parameters, pow_challenge); } - }); // parallel_for + }); - // Accumulate the per-thread univariate accumulators into a single accumulator + // Accumulate the per-thread univariate accumulators into a single set of accumulators for (auto& accumulators : thread_univariate_accumulators) { add_nested_tuples(univariate_accumulators, accumulators); } From 3fa8623fb76d2c477ffad74ebd03596ac0fc0265 Mon Sep 17 00:00:00 2001 From: ledwards2225 Date: Fri, 23 Jun 2023 15:17:15 +0000 Subject: [PATCH 6/7] some cleanup --- .../honk/composer/standard_honk_composer.test.cpp | 3 +-- cpp/src/barretenberg/honk/sumcheck/sumcheck_round.hpp | 4 ++++ 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/cpp/src/barretenberg/honk/composer/standard_honk_composer.test.cpp b/cpp/src/barretenberg/honk/composer/standard_honk_composer.test.cpp index 76c632124f..04ada2ef9b 100644 --- a/cpp/src/barretenberg/honk/composer/standard_honk_composer.test.cpp +++ b/cpp/src/barretenberg/honk/composer/standard_honk_composer.test.cpp @@ -391,8 +391,7 @@ TEST_F(StandardHonkComposerTests, SumcheckEvaluations) uint32_t b_idx = circuit_constructor.add_variable(b); uint32_t c_idx = circuit_constructor.add_variable(c); uint32_t d_idx = circuit_constructor.add_variable(d); - size_t num_iterations = 1 << 16; - for (size_t i = 0; i < num_iterations; i++) { + for (size_t i = 0; i < 16; i++) { circuit_constructor.create_add_gate( { a_idx, b_idx, c_idx, fr::one(), fr::one(), fr::neg_one(), fr::zero() }); circuit_constructor.create_add_gate( diff --git a/cpp/src/barretenberg/honk/sumcheck/sumcheck_round.hpp b/cpp/src/barretenberg/honk/sumcheck/sumcheck_round.hpp index c8c4b0c7f0..16ede729a1 100644 --- a/cpp/src/barretenberg/honk/sumcheck/sumcheck_round.hpp +++ b/cpp/src/barretenberg/honk/sumcheck/sumcheck_round.hpp @@ -140,6 +140,7 @@ template class SumcheckRound { const FF alpha) { // Precompute the vector of required powers of zeta + // TODO(luke): Parallelize this std::vector pow_challenges; pow_challenges.resize(round_size >> 1); pow_challenges[0] = pow_univariate.partial_evaluation_constant; @@ -320,6 +321,9 @@ template class SumcheckRound { } public: + // TODO(luke): Potentially make RelationUnivarites (tuple of tuples of Univariates) a class and make these utility + // functions class methods. Alternatively, move all of these tuple utilities (and the ones living elsewhere) to + // their own module. /** * Utility methods for tuple of tuples of Univariates */ From a8588fde1aa0391146a89b1f94dbf57cf4f0c180 Mon Sep 17 00:00:00 2001 From: ledwards2225 Date: Fri, 23 Jun 2023 19:15:39 +0000 Subject: [PATCH 7/7] resize --- cpp/src/barretenberg/honk/sumcheck/sumcheck_round.hpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/cpp/src/barretenberg/honk/sumcheck/sumcheck_round.hpp b/cpp/src/barretenberg/honk/sumcheck/sumcheck_round.hpp index 16ede729a1..6c81dc30bc 100644 --- a/cpp/src/barretenberg/honk/sumcheck/sumcheck_round.hpp +++ b/cpp/src/barretenberg/honk/sumcheck/sumcheck_round.hpp @@ -141,8 +141,7 @@ template class SumcheckRound { { // Precompute the vector of required powers of zeta // TODO(luke): Parallelize this - std::vector pow_challenges; - pow_challenges.resize(round_size >> 1); + std::vector pow_challenges(round_size >> 1); pow_challenges[0] = pow_univariate.partial_evaluation_constant; for (size_t i = 1; i < (round_size >> 1); ++i) { pow_challenges[i] = pow_challenges[i - 1] * pow_univariate.zeta_pow_sqr; @@ -160,8 +159,7 @@ template class SumcheckRound { size_t iterations_per_thread = round_size / num_threads; // actual iterations per thread // Constuct univariate accumulator containers; one per thread - std::vector thread_univariate_accumulators; - thread_univariate_accumulators.resize(num_threads); + std::vector thread_univariate_accumulators(num_threads); for (auto& accum : thread_univariate_accumulators) { zero_univariates(accum); }