diff --git a/barretenberg/cpp/scripts/benchmark_remote.sh b/barretenberg/cpp/scripts/benchmark_remote.sh index e1d5907eab2d..05a61392584b 100755 --- a/barretenberg/cpp/scripts/benchmark_remote.sh +++ b/barretenberg/cpp/scripts/benchmark_remote.sh @@ -9,20 +9,19 @@ set -eu BENCHMARK=${1:-goblin_bench} COMMAND=${2:-./$BENCHMARK} +PRESET=${3:-clang16} +BUILD_DIR=${4:-build} # Move above script dir. cd $(dirname $0)/.. -# Create lock file -ssh $BB_SSH_KEY $BB_SSH_INSTANCE "touch ~/BENCHMARKING_IN_PROGRESS" - # Configure and build. -cmake --preset clang16 -cmake --build --preset clang16 --target $BENCHMARK +cmake --preset $PRESET +cmake --build --preset $PRESET --target $BENCHMARK source scripts/_benchmark_remote_lock.sh -cd build +cd $BUILD_DIR scp $BB_SSH_KEY ./bin/$BENCHMARK $BB_SSH_INSTANCE:$BB_SSH_CPP_PATH/build ssh $BB_SSH_KEY $BB_SSH_INSTANCE \ "cd $BB_SSH_CPP_PATH/build ; $COMMAND" diff --git a/barretenberg/cpp/src/barretenberg/ecc/fields/field_declarations.hpp b/barretenberg/cpp/src/barretenberg/ecc/fields/field_declarations.hpp index af92a45ae168..3e7f433be6e0 100644 --- a/barretenberg/cpp/src/barretenberg/ecc/fields/field_declarations.hpp +++ b/barretenberg/cpp/src/barretenberg/ecc/fields/field_declarations.hpp @@ -326,11 +326,34 @@ template struct alignas(32) field { **/ static void split_into_endomorphism_scalars(const field& k, field& k1, field& k2) { - // if the modulus is a 256-bit integer, we need to use a basis where g1, g2 have been shifted by 2^384 + // if the modulus is a >= 255-bit integer, we need to use a basis where g1, g2 have been shifted by 2^384 if constexpr (Params::modulus_3 >= 0x4000000000000000ULL) { split_into_endomorphism_scalars_384(k, k1, k2); - return; + } else { + std::pair, std::array> ret = split_into_endomorphism_scalars(k); + k1.data[0] = ret.first[0]; + k1.data[1] = ret.first[1]; + + // TODO(https://github.com/AztecProtocol/barretenberg/issues/851): We should move away from this hack by + // returning pair of uint64_t[2] instead of a half-set field +#if !defined(__clang__) && defined(__GNUC__) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Warray-bounds" +#endif + k2.data[0] = ret.second[0]; // NOLINT + k2.data[1] = ret.second[1]; +#if !defined(__clang__) && defined(__GNUC__) +#pragma GCC diagnostic pop +#endif } + } + + // NOTE: this form is only usable if the modulus is 254 bits or less, otherwise see + // split_into_endomorphism_scalars_384. + // TODO(https://github.com/AztecProtocol/barretenberg/issues/851): Unify these APIs. + static std::pair, std::array> split_into_endomorphism_scalars(const field& k) + { + static_assert(Params::modulus_3 < 0x4000000000000000ULL); field input = k.reduce_once(); constexpr field endo_g1 = { Params::endo_g1_lo, Params::endo_g1_mid, Params::endo_g1_hi, 0 }; @@ -369,15 +392,14 @@ template struct alignas(32) field { field t1 = (q2_lo - q1_lo).reduce_once(); field beta = cube_root_of_unity(); field t2 = (t1 * beta + input).reduce_once(); - k2.data[0] = t1.data[0]; - k2.data[1] = t1.data[1]; - k1.data[0] = t2.data[0]; - k1.data[1] = t2.data[1]; + return { + { t2.data[0], t2.data[1] }, + { t1.data[0], t1.data[1] }, + }; } static void split_into_endomorphism_scalars_384(const field& input, field& k1_out, field& k2_out) { - constexpr field minus_b1f{ Params::endo_minus_b1_lo, Params::endo_minus_b1_mid, diff --git a/barretenberg/cpp/src/barretenberg/ecc/groups/affine_element.test.cpp b/barretenberg/cpp/src/barretenberg/ecc/groups/affine_element.test.cpp index 4dd06af4c38f..8dc10905f5a6 100644 --- a/barretenberg/cpp/src/barretenberg/ecc/groups/affine_element.test.cpp +++ b/barretenberg/cpp/src/barretenberg/ecc/groups/affine_element.test.cpp @@ -4,6 +4,7 @@ #include "barretenberg/ecc/curves/grumpkin/grumpkin.hpp" #include "barretenberg/ecc/curves/secp256k1/secp256k1.hpp" #include "barretenberg/ecc/curves/secp256r1/secp256r1.hpp" +#include "barretenberg/ecc/groups/element.hpp" #include "barretenberg/serialize/test_helper.hpp" #include @@ -129,3 +130,80 @@ TEST(AffineElement, Msgpack) auto [actual, expected] = msgpack_roundtrip(secp256k1::g1::affine_element{ 1, 1 }); EXPECT_EQ(actual, expected); } + +namespace bb::group_elements { +// mul_with_endomorphism and mul_without_endomorphism are private in affine_element. +// We could make those public to test or create other public utilities, but to keep the API intact we +// instead mark TestElementPrivate as a friend class so that our test functions can have access. +class TestElementPrivate { + public: + template + static Element mul_without_endomorphism(const Element& element, const Scalar& scalar) + { + return element.mul_without_endomorphism(scalar); + } + template + static Element mul_with_endomorphism(const Element& element, const Scalar& scalar) + { + return element.mul_with_endomorphism(scalar); + } +}; +} // namespace bb::group_elements + +// Our endomorphism-specialized multiplication should match our generic multiplication +TEST(AffineElement, MulWithEndomorphismMatchesMulWithoutEndomorphism) +{ + for (int i = 0; i < 100; i++) { + auto x1 = bb::group_elements::element(grumpkin::g1::affine_element::random_element()); + auto f1 = grumpkin::fr::random_element(); + auto r1 = bb::group_elements::TestElementPrivate::mul_without_endomorphism(x1, f1); + auto r2 = bb::group_elements::TestElementPrivate::mul_with_endomorphism(x1, f1); + EXPECT_EQ(r1, r2); + } +} + +// Multiplication of a point at infinity by a scalar should be a point at infinity +TEST(AffineElement, InfinityMulByScalarIsInfinity) +{ + auto result = grumpkin::g1::affine_element::infinity() * grumpkin::fr::random_element(); + EXPECT_TRUE(result.is_point_at_infinity()); +} + +// Batched multiplication of points should match +TEST(AffineElement, BatchMulMatchesNonBatchMul) +{ + constexpr size_t num_points = 512; + std::vector affine_points; + for (size_t i = 0; i < num_points - 1; ++i) { + affine_points.emplace_back(grumpkin::g1::affine_element::random_element()); + } + // Include a point at infinity to test the mixed infinity + non-infinity case + affine_points.emplace_back(grumpkin::g1::affine_element::infinity()); + grumpkin::fr exponent = grumpkin::fr::random_element(); + std::vector result = + grumpkin::g1::element::batch_mul_with_endomorphism(affine_points, exponent); + size_t i = 0; + for (grumpkin::g1::affine_element& el : result) { + EXPECT_EQ(el, affine_points[i] * exponent); + i++; + } +} + +// Batched multiplication of a point at infinity by a scalar should result in points at infinity +TEST(AffineElement, InfinityBatchMulByScalarIsInfinity) +{ + constexpr size_t num_points = 1024; + std::vector affine_points; + for (size_t i = 0; i < num_points; ++i) { + affine_points.emplace_back(grumpkin::g1::affine_element::infinity()); + } + grumpkin::fr exponent = grumpkin::fr::random_element(); + std::vector result = + grumpkin::g1::element::batch_mul_with_endomorphism(affine_points, exponent); + for (grumpkin::g1::affine_element& el : result) { + EXPECT_TRUE(el.is_point_at_infinity()); + if (!el.is_point_at_infinity()) { + break; // dont spam with errors + } + } +} \ No newline at end of file diff --git a/barretenberg/cpp/src/barretenberg/ecc/groups/element.hpp b/barretenberg/cpp/src/barretenberg/ecc/groups/element.hpp index 0c34effbfaa0..f73f7ab5fe0f 100644 --- a/barretenberg/cpp/src/barretenberg/ecc/groups/element.hpp +++ b/barretenberg/cpp/src/barretenberg/ecc/groups/element.hpp @@ -95,15 +95,17 @@ template class alignas(32) element { const std::span>& second_group, const std::span>& results) noexcept; static std::vector> batch_mul_with_endomorphism( - const std::span>& points, const Fr& exponent) noexcept; + const std::span>& points, const Fr& scalar) noexcept; Fq x; Fq y; Fq z; private: - element mul_without_endomorphism(const Fr& exponent) const noexcept; - element mul_with_endomorphism(const Fr& exponent) const noexcept; + // For test access to mul_without_endomorphism + friend class TestElementPrivate; + element mul_without_endomorphism(const Fr& scalar) const noexcept; + element mul_with_endomorphism(const Fr& scalar) const noexcept; template > static element random_coordinates_on_curve(numeric::RNG* engine = nullptr) noexcept; diff --git a/barretenberg/cpp/src/barretenberg/ecc/groups/element_impl.hpp b/barretenberg/cpp/src/barretenberg/ecc/groups/element_impl.hpp index e97884a66bea..24e45600ae46 100644 --- a/barretenberg/cpp/src/barretenberg/ecc/groups/element_impl.hpp +++ b/barretenberg/cpp/src/barretenberg/ecc/groups/element_impl.hpp @@ -3,6 +3,7 @@ #include "barretenberg/common/thread.hpp" #include "barretenberg/ecc/groups/element.hpp" #include "element.hpp" +#include // NOLINTBEGIN(readability-implicit-bool-conversion, cppcoreguidelines-avoid-c-arrays) namespace bb::group_elements { @@ -594,101 +595,115 @@ element element::random_element(numeric::RNG* engine) noex } template -element element::mul_without_endomorphism(const Fr& exponent) const noexcept +element element::mul_without_endomorphism(const Fr& scalar) const noexcept { - const uint256_t converted_scalar(exponent); + const uint256_t converted_scalar(scalar); if (converted_scalar == 0) { - element result{ Fq::zero(), Fq::zero(), Fq::zero() }; - result.self_set_infinity(); - return result; + return element::infinity(); } - element work_element(*this); + element accumulator(*this); const uint64_t maximum_set_bit = converted_scalar.get_msb(); // This is simpler and doublings of infinity should be fast. We should think if we want to defend against the // timing leak here (if used with ECDSA it can sometimes lead to private key compromise) for (uint64_t i = maximum_set_bit - 1; i < maximum_set_bit; --i) { - work_element.self_dbl(); + accumulator.self_dbl(); if (converted_scalar.get_bit(i)) { - work_element += *this; + accumulator += *this; } } - return work_element; + return accumulator; } +namespace detail { +// Represents the result of +using EndoScalars = std::pair, std::array>; + +/** + * @brief Handles the WNAF computation for scalars that are split using an endomorphism, + * achieved through `split_into_endomorphism_scalars`. It facilitates efficient computation of elliptic curve + * point multiplication by optimizing the representation of these scalars. + * + * @tparam Element The data type of elements in the elliptic curve. + * @tparam NUM_ROUNDS The number of computation rounds for WNAF. + */ +template struct EndomorphismWnaf { + // NUM_WNAF_BITS: Number of bits per window in the WNAF representation. + static constexpr size_t NUM_WNAF_BITS = 4; + // table: Stores the WNAF representation of the scalars. + std::array table; + // skew and endo_skew: Indicate if our original scalar is even or odd. + bool skew = false; + bool endo_skew = false; + + /** + * @param scalars A pair of 128-bit scalars (as two uint64_t arrays), split using an endomorphism. + */ + EndomorphismWnaf(const EndoScalars& scalars) + { + wnaf::fixed_wnaf(&scalars.first[0], &table[0], skew, 0, 2, NUM_WNAF_BITS); + wnaf::fixed_wnaf(&scalars.second[0], &table[1], endo_skew, 0, 2, NUM_WNAF_BITS); + } +}; + +} // namespace detail + template -element element::mul_with_endomorphism(const Fr& exponent) const noexcept +element element::mul_with_endomorphism(const Fr& scalar) const noexcept { - const Fr converted_scalar = exponent.from_montgomery_form(); + // Consider the infinity flag, return infinity if set + if (is_point_at_infinity()) { + return element::infinity(); + } + constexpr size_t NUM_ROUNDS = 32; + const Fr converted_scalar = scalar.from_montgomery_form(); if (converted_scalar.is_zero()) { - element result{ Fq::zero(), Fq::zero(), Fq::zero() }; - result.self_set_infinity(); - return result; + return element::infinity(); } + static constexpr size_t LOOKUP_SIZE = 8; + std::array lookup_table; - constexpr size_t lookup_size = 8; - constexpr size_t num_rounds = 32; - constexpr size_t num_wnaf_bits = 4; - std::array lookup_table; - - element d2 = element(*this); - d2.self_dbl(); + element d2 = dbl(); lookup_table[0] = element(*this); - for (size_t i = 1; i < lookup_size; ++i) { + for (size_t i = 1; i < LOOKUP_SIZE; ++i) { lookup_table[i] = lookup_table[i - 1] + d2; } - uint64_t wnaf_table[num_rounds * 2]; - Fr endo_scalar; - Fr::split_into_endomorphism_scalars(converted_scalar, endo_scalar, *(Fr*)&endo_scalar.data[2]); // NOLINT - - bool skew = false; - bool endo_skew = false; - - wnaf::fixed_wnaf(&endo_scalar.data[0], &wnaf_table[0], skew, 0, 2, num_wnaf_bits); - wnaf::fixed_wnaf(&endo_scalar.data[2], &wnaf_table[1], endo_skew, 0, 2, num_wnaf_bits); - - element work_element{ T::one_x, T::one_y, Fq::one() }; - work_element.self_set_infinity(); - - uint64_t wnaf_entry = 0; - uint64_t index = 0; - bool sign = false; + detail::EndoScalars endo_scalars = Fr::split_into_endomorphism_scalars(converted_scalar); + detail::EndomorphismWnaf wnaf{ endo_scalars }; + element accumulator{ T::one_x, T::one_y, Fq::one() }; + accumulator.self_set_infinity(); Fq beta = Fq::cube_root_of_unity(); - for (size_t i = 0; i < num_rounds * 2; ++i) { - wnaf_entry = wnaf_table[i]; - index = wnaf_entry & 0x0fffffffU; - sign = static_cast((wnaf_entry >> 31) & 1); + for (size_t i = 0; i < NUM_ROUNDS * 2; ++i) { + uint64_t wnaf_entry = wnaf.table[i]; + uint64_t index = wnaf_entry & 0x0fffffffU; + bool sign = static_cast((wnaf_entry >> 31) & 1); const bool is_odd = ((i & 1) == 1); auto to_add = lookup_table[static_cast(index)]; to_add.y.self_conditional_negate(sign ^ is_odd); if (is_odd) { to_add.x *= beta; } - work_element += to_add; + accumulator += to_add; - if (i != ((2 * num_rounds) - 1) && is_odd) { + if (i != ((2 * NUM_ROUNDS) - 1) && is_odd) { for (size_t j = 0; j < 4; ++j) { - work_element.self_dbl(); + accumulator.self_dbl(); } } } - auto temporary = -lookup_table[0]; - if (skew) { - work_element += temporary; + if (wnaf.skew) { + accumulator += -lookup_table[0]; } - - temporary = { lookup_table[0].x * beta, lookup_table[0].y, lookup_table[0].z }; - - if (endo_skew) { - work_element += temporary; + if (wnaf.endo_skew) { + accumulator += element{ lookup_table[0].x * beta, lookup_table[0].y, lookup_table[0].z }; } - return work_element; + return accumulator; } /** @@ -775,18 +790,18 @@ void element::batch_affine_add(const std::span> Vector of new points where each point is exponentâ‹…points[i] */ template std::vector> element::batch_mul_with_endomorphism( - const std::span>& points, const Fr& exponent) noexcept + const std::span>& points, const Fr& scalar) noexcept { BB_OP_COUNT_TIME(); typedef affine_element affine_element; @@ -887,7 +902,7 @@ std::vector> element::batch_mul_with_endomo /*finite_field_multiplications_per_iteration=*/6); }; // Compute wnaf for scalar - const Fr converted_scalar = exponent.from_montgomery_form(); + const Fr converted_scalar = scalar.from_montgomery_form(); // If the scalar is zero, just set results to the point at infinity if (converted_scalar.is_zero()) { @@ -911,10 +926,9 @@ std::vector> element::batch_mul_with_endomo return results; } - constexpr size_t lookup_size = 8; - constexpr size_t num_rounds = 32; - constexpr size_t num_wnaf_bits = 4; - std::array, lookup_size> lookup_table; + constexpr size_t LOOKUP_SIZE = 8; + constexpr size_t NUM_ROUNDS = 32; + std::array, LOOKUP_SIZE> lookup_table; for (auto& table : lookup_table) { table.resize(num_points); } @@ -924,8 +938,10 @@ std::vector> element::batch_mul_with_endomo num_points, [&temp_point_vector, &lookup_table, &points](size_t start, size_t end) { for (size_t i = start; i < end; ++i) { - temp_point_vector[i] = points[i]; - lookup_table[0][i] = points[i]; + // If the point is at infinity we fix-up the result later + // To avoid 'trying to invert zero in the field' we set the point to 'one' here + temp_point_vector[i] = points[i].is_point_at_infinity() ? affine_element::one() : points[i]; + lookup_table[0][i] = points[i].is_point_at_infinity() ? affine_element::one() : points[i]; } }, /*finite_field_additions_per_iteration=*/0, @@ -938,7 +954,7 @@ std::vector> element::batch_mul_with_endomo // Construct lookup table batch_affine_double(&temp_point_vector[0]); - for (size_t j = 1; j < lookup_size; ++j) { + for (size_t j = 1; j < LOOKUP_SIZE; ++j) { run_loop_in_parallel_if_effective( num_points, [j, &lookup_table](size_t start, size_t end) { @@ -956,18 +972,8 @@ std::vector> element::batch_mul_with_endomo batch_affine_add_internal(&temp_point_vector[0], &lookup_table[j][0]); } - uint64_t wnaf_table[num_rounds * 2]; - Fr endo_scalar; - - // Split single scalar into 2 scalar in endo form - Fr::split_into_endomorphism_scalars(converted_scalar, endo_scalar, *(Fr*)&endo_scalar.data[2]); // NOLINT - - bool skew = false; - bool endo_skew = false; - - // Construct wnaf forms from scalars - wnaf::fixed_wnaf(&endo_scalar.data[0], &wnaf_table[0], skew, 0, 2, num_wnaf_bits); - wnaf::fixed_wnaf(&endo_scalar.data[2], &wnaf_table[1], endo_skew, 0, 2, num_wnaf_bits); + detail::EndoScalars endo_scalars = Fr::split_into_endomorphism_scalars(converted_scalar); + detail::EndomorphismWnaf wnaf{ endo_scalars }; std::vector work_elements(num_points); @@ -977,7 +983,7 @@ std::vector> element::batch_mul_with_endomo bool sign = 0; // Prepare elements for the first batch addition for (size_t j = 0; j < 2; ++j) { - wnaf_entry = wnaf_table[j]; + wnaf_entry = wnaf.table[j]; index = wnaf_entry & 0x0fffffffU; sign = static_cast((wnaf_entry >> 31) & 1); const bool is_odd = ((j & 1) == 1); @@ -1010,8 +1016,8 @@ std::vector> element::batch_mul_with_endomo // First cycle of addition batch_affine_add_internal(&temp_point_vector[0], &work_elements[0]); // Run through SM logic in wnaf form (excluding the skew) - for (size_t j = 2; j < num_rounds * 2; ++j) { - wnaf_entry = wnaf_table[j]; + for (size_t j = 2; j < NUM_ROUNDS * 2; ++j) { + wnaf_entry = wnaf.table[j]; index = wnaf_entry & 0x0fffffffU; sign = static_cast((wnaf_entry >> 31) & 1); const bool is_odd = ((j & 1) == 1); @@ -1044,7 +1050,7 @@ std::vector> element::batch_mul_with_endomo } // Apply skew for the first endo scalar - if (skew) { + if (wnaf.skew) { run_loop_in_parallel_if_effective( num_points, [&lookup_table, &temp_point_vector](size_t start, size_t end) { @@ -1063,12 +1069,11 @@ std::vector> element::batch_mul_with_endomo batch_affine_add_internal(&temp_point_vector[0], &work_elements[0]); } // Apply skew for the second endo scalar - if (endo_skew) { + if (wnaf.endo_skew) { run_loop_in_parallel_if_effective( num_points, [beta, &lookup_table, &temp_point_vector](size_t start, size_t end) { for (size_t i = start; i < end; ++i) { - temp_point_vector[i] = lookup_table[0][i]; temp_point_vector[i].x *= beta; } @@ -1082,6 +1087,22 @@ std::vector> element::batch_mul_with_endomo /*sequential_copy_ops_per_iteration=*/1); batch_affine_add_internal(&temp_point_vector[0], &work_elements[0]); } + // handle points at infinity explicitly + run_loop_in_parallel_if_effective( + num_points, + [&](size_t start, size_t end) { + for (size_t i = start; i < end; ++i) { + work_elements[i] = + points[i].is_point_at_infinity() ? work_elements[i].set_infinity() : work_elements[i]; + } + }, + /*finite_field_additions_per_iteration=*/0, + /*finite_field_multiplications_per_iteration=*/1, + /*finite_field_inversions_per_iteration=*/0, + /*group_element_additions_per_iteration=*/0, + /*group_element_doublings_per_iteration=*/0, + /*scalar_multiplications_per_iteration=*/0, + /*sequential_copy_ops_per_iteration=*/1); return work_elements; } diff --git a/barretenberg/cpp/src/barretenberg/ecc/groups/wnaf.hpp b/barretenberg/cpp/src/barretenberg/ecc/groups/wnaf.hpp index e7da2ad084aa..c022809208aa 100644 --- a/barretenberg/cpp/src/barretenberg/ecc/groups/wnaf.hpp +++ b/barretenberg/cpp/src/barretenberg/ecc/groups/wnaf.hpp @@ -156,6 +156,19 @@ inline void fixed_wnaf_packed( wnaf[0] = ((slice + predicate) >> 1UL) | (point_index); } +/** + * @brief Performs fixed-window non-adjacent form (WNAF) computation for scalar multiplication. + * + * WNAF is a method for representing integers which optimizes the number of non-zero terms, which in turn optimizes + * the number of point doublings in scalar multiplication, in turn aiding efficiency. + * + * @param scalar Pointer to 128-bit scalar for which WNAF is to be computed. + * @param wnaf Pointer to num_points+1 size array where the computed WNAF will be stored. + * @param skew_map Reference to a boolean variable which will be set based on the least significant bit of the scalar. + * @param point_index The index of the point being computed in the context of multiple point multiplication. + * @param num_points The number of points being computed in parallel. + * @param wnaf_bits The number of bits to use in each window of the WNAF representation. + */ inline void fixed_wnaf(const uint64_t* scalar, uint64_t* wnaf, bool& skew_map,