diff --git a/cpp/src/aztec/plonk/composer/ultra_composer.cpp b/cpp/src/aztec/plonk/composer/ultra_composer.cpp index 3114533ecc..40f3ee1bbe 100644 --- a/cpp/src/aztec/plonk/composer/ultra_composer.cpp +++ b/cpp/src/aztec/plonk/composer/ultra_composer.cpp @@ -1238,8 +1238,18 @@ std::vector UltraComposer::decompose_into_default_range(const uint32_t return sublimb_indices; } +/** + * @brief Constrain a variable to a range + * + * @details Checks if the range [0, target_range] already exists. If it doesn't, then creates a new range. Then tags + * variable as belonging to this set. + * + * @param variable_index + * @param target_range + */ void UltraComposer::create_new_range_constraint(const uint32_t variable_index, const uint64_t target_range) { + ASSERT(target_range != 0); if (range_lists.count(target_range) == 0) { range_lists.insert({ target_range, create_range_list(target_range) }); } @@ -1677,24 +1687,50 @@ void UltraComposer::apply_aux_selectors(const AUX_SELECTORS type) * Applies range constraints to two 70-bit limbs, splititng each into 5 14-bit sublimbs. * We can efficiently chain together two 70-bit limb checks in 3 gates, using auxiliary gates **/ -void UltraComposer::range_constrain_two_limbs(const uint32_t lo_idx, const uint32_t hi_idx) +void UltraComposer::range_constrain_two_limbs(const uint32_t lo_idx, + const uint32_t hi_idx, + const size_t lo_limb_bits, + const size_t hi_limb_bits) { - constexpr uint64_t SUBLIMB_SIZE = 1ULL << 14; + // Validate limbs are <= 70 bits. If limbs are larger we require more witnesses and cannot use our limb accumulation + // custom gate + ASSERT(lo_limb_bits <= (14 * 5)); + ASSERT(hi_limb_bits <= (14 * 5)); - const auto get_sublimbs = [&](const uint32_t& limb_idx) { + // Sometimes we try to use limbs that are too large. It's easier to catch this issue here + const auto get_sublimbs = [&](const uint32_t& limb_idx, const std::array& sublimb_masks) { const uint256_t limb = get_variable(limb_idx); - constexpr uint256_t SUBLIMB_MASK = (uint256_t(1) << 14) - 1; + // we can use constant 2^14 - 1 mask here. If the sublimb value exceeds the expected value then witness will + // fail the range check below + // We also use zero_idx to substitute variables that should be zero + constexpr uint256_t MAX_SUBLIMB_MASK = (uint256_t(1) << 14) - 1; std::array sublimb_indices; - sublimb_indices[0] = add_variable(limb & SUBLIMB_MASK); - sublimb_indices[1] = add_variable((limb >> 14) & SUBLIMB_MASK); - sublimb_indices[2] = add_variable((limb >> 28) & SUBLIMB_MASK); - sublimb_indices[3] = add_variable((limb >> 42) & SUBLIMB_MASK); - sublimb_indices[4] = add_variable((limb >> 56) & SUBLIMB_MASK); + sublimb_indices[0] = sublimb_masks[0] != 0 ? add_variable(limb & MAX_SUBLIMB_MASK) : zero_idx; + sublimb_indices[1] = sublimb_masks[1] != 0 ? add_variable((limb >> 14) & MAX_SUBLIMB_MASK) : zero_idx; + sublimb_indices[2] = sublimb_masks[2] != 0 ? add_variable((limb >> 28) & MAX_SUBLIMB_MASK) : zero_idx; + sublimb_indices[3] = sublimb_masks[3] != 0 ? add_variable((limb >> 42) & MAX_SUBLIMB_MASK) : zero_idx; + sublimb_indices[4] = sublimb_masks[4] != 0 ? add_variable((limb >> 56) & MAX_SUBLIMB_MASK) : zero_idx; return sublimb_indices; }; - const std::array lo_sublimbs = get_sublimbs(lo_idx); - const std::array hi_sublimbs = get_sublimbs(hi_idx); + const auto get_limb_masks = [](size_t limb_bits) { + std::array sublimb_masks; + sublimb_masks[0] = limb_bits >= 14 ? 14 : limb_bits; + sublimb_masks[1] = limb_bits >= 28 ? 14 : (limb_bits > 14 ? limb_bits - 14 : 0); + sublimb_masks[2] = limb_bits >= 42 ? 14 : (limb_bits > 28 ? limb_bits - 28 : 0); + sublimb_masks[3] = limb_bits >= 56 ? 14 : (limb_bits > 42 ? limb_bits - 42 : 0); + sublimb_masks[4] = (limb_bits > 56 ? limb_bits - 56 : 0); + + for (auto& mask : sublimb_masks) { + mask = (1ULL << mask) - 1ULL; + } + return sublimb_masks; + }; + + const auto lo_masks = get_limb_masks(lo_limb_bits); + const auto hi_masks = get_limb_masks(hi_limb_bits); + const std::array lo_sublimbs = get_sublimbs(lo_idx, lo_masks); + const std::array hi_sublimbs = get_sublimbs(hi_idx, hi_masks); w_l.emplace_back(lo_sublimbs[0]); w_r.emplace_back(lo_sublimbs[1]); @@ -1716,31 +1752,43 @@ void UltraComposer::range_constrain_two_limbs(const uint32_t lo_idx, const uint3 apply_aux_selectors(AUX_SELECTORS::NONE); n += 3; - create_new_range_constraint(lo_sublimbs[0], SUBLIMB_SIZE - 1); - create_new_range_constraint(lo_sublimbs[1], SUBLIMB_SIZE - 1); - create_new_range_constraint(lo_sublimbs[2], SUBLIMB_SIZE - 1); - create_new_range_constraint(lo_sublimbs[3], SUBLIMB_SIZE - 1); - create_new_range_constraint(lo_sublimbs[4], SUBLIMB_SIZE - 1); - - create_new_range_constraint(hi_sublimbs[0], SUBLIMB_SIZE - 1); - create_new_range_constraint(hi_sublimbs[1], SUBLIMB_SIZE - 1); - create_new_range_constraint(hi_sublimbs[2], SUBLIMB_SIZE - 1); - create_new_range_constraint(hi_sublimbs[3], SUBLIMB_SIZE - 1); - create_new_range_constraint(hi_sublimbs[4], SUBLIMB_SIZE - 1); + for (size_t i = 0; i < 5; i++) { + if (lo_masks[i] != 0) { + create_new_range_constraint(lo_sublimbs[i], lo_masks[i]); + } + if (hi_masks[i] != 0) { + create_new_range_constraint(hi_sublimbs[i], hi_masks[i]); + } + } }; -std::array UltraComposer::decompose_non_native_field_double_width_limb(const uint32_t limb_idx) +/** + * @brief Decompose a single witness into two, where the lowest is DEFAULT_NON_NATIVE_FIELD_LIMB_BITS (68) range + * constrained and the lowst is num_limb_bits - DEFAULT.. range constrained. + * + * @details Doesn't create gates constraining the limbs to each other. + * + * @param limb_idx The index of the limb that will be decomposed + * @param num_limb_bits The range we want to constrain the original limb to + * @return std::array The indices of new limbs. + */ +std::array UltraComposer::decompose_non_native_field_double_width_limb(const uint32_t limb_idx, + const size_t num_limb_bits) { - constexpr barretenberg::fr LIMB_MASK = (uint256_t(1) << 68) - 1; + ASSERT(uint256_t(get_variable_reference(limb_idx)) < (uint256_t(1) << num_limb_bits)); + constexpr barretenberg::fr LIMB_MASK = (uint256_t(1) << DEFAULT_NON_NATIVE_FIELD_LIMB_BITS) - 1; const uint256_t value = get_variable(limb_idx); const uint256_t low = value & LIMB_MASK; - const uint256_t hi = value >> 68; - ASSERT(low + (hi << 68) == value); + const uint256_t hi = value >> DEFAULT_NON_NATIVE_FIELD_LIMB_BITS; + ASSERT(low + (hi << DEFAULT_NON_NATIVE_FIELD_LIMB_BITS) == value); const uint32_t low_idx = add_variable(low); const uint32_t hi_idx = add_variable(hi); - range_constrain_two_limbs(low_idx, hi_idx); + ASSERT(num_limb_bits > DEFAULT_NON_NATIVE_FIELD_LIMB_BITS); + const size_t lo_bits = DEFAULT_NON_NATIVE_FIELD_LIMB_BITS; + const size_t hi_bits = num_limb_bits - DEFAULT_NON_NATIVE_FIELD_LIMB_BITS; + range_constrain_two_limbs(low_idx, hi_idx, lo_bits, hi_bits); return std::array{ low_idx, hi_idx }; } @@ -1758,9 +1806,7 @@ std::array UltraComposer::decompose_non_native_field_double_width_l * N.B. this method does NOT evaluate the prime field component of non-native field multiplications **/ std::array UltraComposer::evaluate_non_native_field_multiplication( - const non_native_field_witnesses& input, - const bool range_constrain_quotient_and_remainder, - const bool range_constrain_outputs) + const non_native_field_witnesses& input, const bool range_constrain_quotient_and_remainder) { std::array a{ @@ -1788,11 +1834,13 @@ std::array UltraComposer::evaluate_non_native_field_multiplication( get_variable(input.r[3]), }; - constexpr barretenberg::fr LIMB_SHIFT = uint256_t(1) << 68; - constexpr barretenberg::fr LIMB_SHIFT_2 = uint256_t(1) << 136; - constexpr barretenberg::fr LIMB_SHIFT_3 = uint256_t(1) << 204; - constexpr barretenberg::fr LIMB_RSHIFT = barretenberg::fr(1) / barretenberg::fr(uint256_t(1) << 68); - constexpr barretenberg::fr LIMB_RSHIFT_2 = barretenberg::fr(1) / barretenberg::fr(uint256_t(1) << 136); + constexpr barretenberg::fr LIMB_SHIFT = uint256_t(1) << DEFAULT_NON_NATIVE_FIELD_LIMB_BITS; + constexpr barretenberg::fr LIMB_SHIFT_2 = uint256_t(1) << (2 * DEFAULT_NON_NATIVE_FIELD_LIMB_BITS); + constexpr barretenberg::fr LIMB_SHIFT_3 = uint256_t(1) << (3 * DEFAULT_NON_NATIVE_FIELD_LIMB_BITS); + constexpr barretenberg::fr LIMB_RSHIFT = + barretenberg::fr(1) / barretenberg::fr(uint256_t(1) << DEFAULT_NON_NATIVE_FIELD_LIMB_BITS); + constexpr barretenberg::fr LIMB_RSHIFT_2 = + barretenberg::fr(1) / barretenberg::fr(uint256_t(1) << (2 * DEFAULT_NON_NATIVE_FIELD_LIMB_BITS)); barretenberg::fr lo_0 = a[0] * b[0] - r[0] + (a[1] * b[0] + a[0] * b[1]) * LIMB_SHIFT; barretenberg::fr lo_1 = (lo_0 + q[0] * input.neg_modulus[0] + @@ -1914,20 +1962,14 @@ std::array UltraComposer::evaluate_non_native_field_multiplication( 0, }); - // Sometimes we may want to apply this step separately, after adding additional terms into lo_1 and hi_2 - // For example, if we want to evaluate a field multiplication combined with several field additions. - if (range_constrain_outputs) { - range_constrain_two_limbs(hi_3_idx, lo_1_idx); - } - return std::array{ lo_1_idx, hi_3_idx }; } /** * Compute the limb-multiplication part of a non native field mul * - * i.e. compute the low 204 and high 204 bit components of `a * b` where `a, b` are nnf elements composed of 4 68-bit - *limbs + * i.e. compute the low 204 and high 204 bit components of `a * b` where `a, b` are nnf elements composed of 4 + * limbs with size DEFAULT_NON_NATIVE_FIELD_LIMB_BITS * **/ std::array UltraComposer::evaluate_partial_non_native_field_multiplication( @@ -1947,7 +1989,7 @@ std::array UltraComposer::evaluate_partial_non_native_field_multipl get_variable(input.b[3]), }; - constexpr barretenberg::fr LIMB_SHIFT = uint256_t(1) << 68; + constexpr barretenberg::fr LIMB_SHIFT = uint256_t(1) << DEFAULT_NON_NATIVE_FIELD_LIMB_BITS; barretenberg::fr lo_0 = a[0] * b[0] + (a[1] * b[0] + a[0] * b[1]) * LIMB_SHIFT; diff --git a/cpp/src/aztec/plonk/composer/ultra_composer.hpp b/cpp/src/aztec/plonk/composer/ultra_composer.hpp index b899e6b91c..98530b8870 100644 --- a/cpp/src/aztec/plonk/composer/ultra_composer.hpp +++ b/cpp/src/aztec/plonk/composer/ultra_composer.hpp @@ -17,9 +17,10 @@ class UltraComposer : public ComposerBase { // The plookup range proof requires work linear in range size, thus cannot be used directly for // large ranges such as 2^64. For such ranges the element will be decomposed into smaller // chuncks according to the parameter below - static constexpr size_t DEFAULT_PLOOKUP_RANGE_BITNUM = 9; + static constexpr size_t DEFAULT_PLOOKUP_RANGE_BITNUM = 14; static constexpr size_t DEFAULT_PLOOKUP_RANGE_STEP_SIZE = 3; static constexpr size_t DEFAULT_PLOOKUP_RANGE_SIZE = (1 << DEFAULT_PLOOKUP_RANGE_BITNUM) - 1; + static constexpr size_t DEFAULT_NON_NATIVE_FIELD_LIMB_BITS = 68; static constexpr uint32_t UNINITIALIZED_MEMORY_RECORD = UINT32_MAX; struct non_native_field_witnesses { @@ -313,12 +314,14 @@ class UltraComposer : public ComposerBase { /** * Non Native Field Arithmetic **/ - void range_constrain_two_limbs(const uint32_t lo_idx, const uint32_t hi_idx); - std::array decompose_non_native_field_double_width_limb(const uint32_t limb_idx); + void range_constrain_two_limbs(const uint32_t lo_idx, + const uint32_t hi_idx, + const size_t lo_limb_bits = DEFAULT_NON_NATIVE_FIELD_LIMB_BITS, + const size_t hi_limb_bits = DEFAULT_NON_NATIVE_FIELD_LIMB_BITS); + std::array decompose_non_native_field_double_width_limb( + const uint32_t limb_idx, const size_t num_limb_bits = (2 * DEFAULT_NON_NATIVE_FIELD_LIMB_BITS)); std::array evaluate_non_native_field_multiplication( - const non_native_field_witnesses& input, - const bool range_constrain_quotient_and_remainder = true, - const bool range_constrain_remainders = true); + const non_native_field_witnesses& input, const bool range_constrain_quotient_and_remainder = true); std::array evaluate_partial_non_native_field_multiplication(const non_native_field_witnesses& input); typedef std::pair scaled_witness; typedef std::tuple add_simple; diff --git a/cpp/src/aztec/plonk/composer/ultra_composer.test.cpp b/cpp/src/aztec/plonk/composer/ultra_composer.test.cpp index c7618744bf..ce04995c98 100644 --- a/cpp/src/aztec/plonk/composer/ultra_composer.test.cpp +++ b/cpp/src/aztec/plonk/composer/ultra_composer.test.cpp @@ -704,7 +704,8 @@ TEST(ultra_composer, non_native_field_multiplication) waffle::UltraComposer::non_native_field_witnesses inputs{ a_indices, b_indices, q_indices, r_indices, modulus_limbs, fr(uint256_t(modulus)), }; - composer.evaluate_non_native_field_multiplication(inputs); + const auto [lo_1_idx, hi_1_idx] = composer.evaluate_non_native_field_multiplication(inputs); + composer.range_constrain_two_limbs(lo_1_idx, hi_1_idx, 70, 70); auto prover = composer.create_prover(); auto verifier = composer.create_verifier(); diff --git a/cpp/src/aztec/stdlib/encryption/ecdsa/ecdsa.test.cpp b/cpp/src/aztec/stdlib/encryption/ecdsa/ecdsa.test.cpp index a84d954e77..1e1c2c8f19 100644 --- a/cpp/src/aztec/stdlib/encryption/ecdsa/ecdsa.test.cpp +++ b/cpp/src/aztec/stdlib/encryption/ecdsa/ecdsa.test.cpp @@ -13,7 +13,7 @@ namespace test_stdlib_ecdsa { using Composer = waffle::UltraComposer; using curve = stdlib::secp256k1; -HEAVY_TEST(stdlib_ecdsa, verify_signature) +TEST(stdlib_ecdsa, verify_signature) { Composer composer = Composer(); diff --git a/cpp/src/aztec/stdlib/primitives/bigfield/bigfield.hpp b/cpp/src/aztec/stdlib/primitives/bigfield/bigfield.hpp index 69fafc867f..1e03582c34 100644 --- a/cpp/src/aztec/stdlib/primitives/bigfield/bigfield.hpp +++ b/cpp/src/aztec/stdlib/primitives/bigfield/bigfield.hpp @@ -99,7 +99,10 @@ template class bigfield { bigfield(const bigfield& other); bigfield(bigfield&& other); - static bigfield create_from_u512_as_witness(Composer* ctx, const uint512_t& value, const bool can_overflow = false); + static bigfield create_from_u512_as_witness(Composer* ctx, + const uint512_t& value, + const bool can_overflow = false, + const size_t maximum_bitlength = 0); static bigfield from_witness(Composer* ctx, const barretenberg::field& input) { @@ -166,6 +169,7 @@ template class bigfield { uint512_t get_value() const; uint512_t get_maximum_value() const; + bigfield add_to_lower_limb(const field_t& other, uint256_t other_maximum_value) const; bigfield operator+(const bigfield& other) const; bigfield operator-(const bigfield& other) const; bigfield operator*(const bigfield& other) const; diff --git a/cpp/src/aztec/stdlib/primitives/bigfield/bigfield.test.cpp b/cpp/src/aztec/stdlib/primitives/bigfield/bigfield.test.cpp index 9325a51e77..378ea9012f 100644 --- a/cpp/src/aztec/stdlib/primitives/bigfield/bigfield.test.cpp +++ b/cpp/src/aztec/stdlib/primitives/bigfield/bigfield.test.cpp @@ -19,9 +19,9 @@ #include #define GET_COMPOSER_NAME_STRING(composer) \ - (typeid(composer) == typeid(waffle::StandardComposer) ? "StandardPlonk" \ - : typeid(composer) == typeid(waffle::TurboComposer) ? "TurboPlonk" \ - : "NULLPlonk") + (typeid(composer) == typeid(waffle::StandardComposer) \ + ? "StandardPlonk" \ + : typeid(composer) == typeid(waffle::TurboComposer) ? "TurboPlonk" : "NULLPlonk") namespace test_stdlib_bigfield { using namespace barretenberg; @@ -103,7 +103,7 @@ template class stdlib_bigfield : public testing::Test { static void test_mul() { auto composer = Composer(); - size_t num_repetitions = 1; + size_t num_repetitions = 4; for (size_t i = 0; i < num_repetitions; ++i) { fq inputs[3]{ fq::random_element(), fq::random_element(), fq::random_element() }; fq_ct a(witness_ct(&composer, fr(uint256_t(inputs[0]).slice(0, fq_ct::NUM_LIMB_BITS * 2))), @@ -115,7 +115,8 @@ template class stdlib_bigfield : public testing::Test { uint64_t before = composer.get_num_gates(); fq_ct c = a * b; uint64_t after = composer.get_num_gates(); - if (i == num_repetitions - 1) { + // Don't profile 1st repetition. It sets up a lookup table, cost is not representative of a typical mul + if (i == num_repetitions - 2) { std::cerr << "num gates per mul = " << after - before << std::endl; benchmark_info(GET_COMPOSER_NAME_STRING(Composer), "Bigfield", "MUL", "Gate Count", after - before); } diff --git a/cpp/src/aztec/stdlib/primitives/bigfield/bigfield_impl.hpp b/cpp/src/aztec/stdlib/primitives/bigfield/bigfield_impl.hpp index d32dbdd0df..105c2a7d3e 100644 --- a/cpp/src/aztec/stdlib/primitives/bigfield/bigfield_impl.hpp +++ b/cpp/src/aztec/stdlib/primitives/bigfield/bigfield_impl.hpp @@ -42,6 +42,13 @@ bigfield::bigfield(const field_t& low_bits_in, const bool can_overflow, const size_t maximum_bitlength) { + ASSERT((can_overflow == true && maximum_bitlength == 0) || + (can_overflow == false && (maximum_bitlength == 0 || maximum_bitlength > (3 * NUM_LIMB_BITS)))); + + // Check that the values of two parts are within specified bounds + ASSERT(uint256_t(low_bits_in.get_value()) < (uint256_t(1) << (NUM_LIMB_BITS * 2))); + ASSERT(uint256_t(high_bits_in.get_value()) < (uint256_t(1) << (NUM_LIMB_BITS * 2))); + context = low_bits_in.context == nullptr ? high_bits_in.context : low_bits_in.context; field_t limb_0(context); field_t limb_1(context); @@ -101,8 +108,8 @@ bigfield::bigfield(const field_t& low_bits_in, std::vector high_accumulator; if constexpr (C::type == waffle::PLOOKUP) { - const auto limb_witnesses = - context->decompose_non_native_field_double_width_limb(high_bits_in.normalize().witness_index); + const auto limb_witnesses = context->decompose_non_native_field_double_width_limb( + high_bits_in.normalize().witness_index, (size_t)num_high_limb_bits); limb_2.witness_index = limb_witnesses[0]; limb_3.witness_index = limb_witnesses[1]; field_t::evaluate_linear_identity(high_bits_in, -limb_2, -limb_3 * shift_1, field_t(0)); @@ -123,7 +130,13 @@ bigfield::bigfield(const field_t& low_bits_in, binary_basis_limbs[0] = Limb(limb_0, DEFAULT_MAXIMUM_LIMB); binary_basis_limbs[1] = Limb(limb_1, DEFAULT_MAXIMUM_LIMB); binary_basis_limbs[2] = Limb(limb_2, DEFAULT_MAXIMUM_LIMB); - binary_basis_limbs[3] = Limb(limb_3, can_overflow ? DEFAULT_MAXIMUM_LIMB : DEFAULT_MAXIMUM_MOST_SIGNIFICANT_LIMB); + if (maximum_bitlength > 0) { + uint256_t max_limb_value = (uint256_t(1) << (maximum_bitlength - (3 * NUM_LIMB_BITS))) - 1; + binary_basis_limbs[3] = Limb(limb_3, max_limb_value); + } else { + binary_basis_limbs[3] = + Limb(limb_3, can_overflow ? DEFAULT_MAXIMUM_LIMB : DEFAULT_MAXIMUM_MOST_SIGNIFICANT_LIMB); + } prime_basis_limb = low_bits_in + (high_bits_in * shift_2); } @@ -147,9 +160,27 @@ bigfield::bigfield(bigfield&& other) , prime_basis_limb(other.prime_basis_limb) {} +/** + * @brief Creates a bigfield element from a uint512_t. + * Bigfield element is constructed as a witness and not a circuit constant + * + * @param ctx + * @param value + * @param can_overflow Can the input value have more than log2(modulus) bits? + * @param maximum_bitlength Provide the explicit maximum bitlength if known. Otherwise bigfield max value will be either + * log2(modulus) bits iff can_overflow = false, or (4 * NUM_LIMB_BITS) iff can_overflow = true + * @return bigfield + * + * @details This method is 1 gate more efficient than constructing from 2 field_ct elements. + */ template -bigfield bigfield::create_from_u512_as_witness(C* ctx, const uint512_t& value, const bool can_overflow) +bigfield bigfield::create_from_u512_as_witness(C* ctx, + const uint512_t& value, + const bool can_overflow, + const size_t maximum_bitlength) { + ASSERT((can_overflow == true && maximum_bitlength == 0) || + (can_overflow == false && (maximum_bitlength == 0 || maximum_bitlength > (3 * NUM_LIMB_BITS)))); std::array limbs; limbs[0] = value.slice(0, NUM_LIMB_BITS).lo; limbs[1] = value.slice(NUM_LIMB_BITS, NUM_LIMB_BITS * 2).lo; @@ -179,8 +210,8 @@ bigfield bigfield::create_from_u512_as_witness(C* ctx, const uint512 -1, 0 }, true); - ctx->range_constrain_two_limbs(limb_0.witness_index, limb_1.witness_index); - ctx->range_constrain_two_limbs(limb_2.witness_index, limb_3.witness_index); + + uint64_t num_last_limb_bits = (can_overflow) ? NUM_LIMB_BITS : NUM_LAST_LIMB_BITS; bigfield result(ctx); result.binary_basis_limbs[0] = Limb(limb_0, DEFAULT_MAXIMUM_LIMB); @@ -188,12 +219,26 @@ bigfield bigfield::create_from_u512_as_witness(C* ctx, const uint512 result.binary_basis_limbs[2] = Limb(limb_2, DEFAULT_MAXIMUM_LIMB); result.binary_basis_limbs[3] = Limb(limb_3, can_overflow ? DEFAULT_MAXIMUM_LIMB : DEFAULT_MAXIMUM_MOST_SIGNIFICANT_LIMB); + + // if maximum_bitlength is set, this supercedes can_overflow + if (maximum_bitlength > 0) { + ASSERT(maximum_bitlength > 3 * NUM_LIMB_BITS); + num_last_limb_bits = maximum_bitlength - (3 * NUM_LIMB_BITS); + uint256_t max_limb_value = (uint256_t(1) << num_last_limb_bits) - 1; + result.binary_basis_limbs[3].maximum_value = max_limb_value; + } result.prime_basis_limb = prime_limb; + ctx->range_constrain_two_limbs( + limb_0.witness_index, limb_1.witness_index, (size_t)NUM_LIMB_BITS, (size_t)NUM_LIMB_BITS); + ctx->range_constrain_two_limbs( + limb_2.witness_index, limb_3.witness_index, (size_t)NUM_LIMB_BITS, (size_t)num_last_limb_bits); + return result; } else { return bigfield(witness_t(ctx, fr(limbs[0] + limbs[1] * shift_1)), witness_t(ctx, fr(limbs[2] + limbs[3] * shift_1)), - can_overflow); + can_overflow, + maximum_bitlength); } } @@ -283,6 +328,40 @@ template uint512_t bigfield::get_maximum_value() return t0 + t1 + t2 + t3; } +/** + * @brief Add a field element to the lower limb. CAUTION (the element has to be constrained before using this function) + * + * @details Sometimes we need to add a small constrained value to a bigfield element (for example, a boolean value), but + * we don't want to construct a full bigfield element for that as it would take too many gates. If the maximum value of + * the field element being added is small enough, we can simply add it to the lowest limb and increase its maximum + * value. That will create 2 additional constraints instead of 5/3 needed to add 2 bigfield elements and several needed + * to construct a bigfield element. + * + * @tparam C Composer + * @tparam T Field Parameters + * @param other Field element that will be added to the lower + * @param other_maximum_value The maximum value of other + * @return bigfield Result + */ +template +bigfield bigfield::add_to_lower_limb(const field_t& other, uint256_t other_maximum_value) const +{ + reduction_check(); + ASSERT((other_maximum_value + binary_basis_limbs[0].maximum_value) <= get_maximum_unreduced_limb_value()); + // needed cause a constant doesn't have a valid context + C* ctx = context ? context : other.context; + + if (is_constant() && other.is_constant()) { + return bigfield(ctx, uint256_t((get_value() + uint256_t(other.get_value())) % modulus_u512)); + } + bigfield result = *this; + result.binary_basis_limbs[0].maximum_value = binary_basis_limbs[0].maximum_value + other_maximum_value; + + result.binary_basis_limbs[0].element = binary_basis_limbs[0].element + other; + result.prime_basis_limb = prime_basis_limb + other; + return result; +} + template bigfield bigfield::operator+(const bigfield& other) const { reduction_check(); @@ -621,13 +700,8 @@ template bigfield bigfield::operator*(const } return (*this).operator*(other); } - quotient = bigfield(witness_t(ctx, fr(quotient_value.slice(0, NUM_LIMB_BITS * 2).lo)), - witness_t(ctx, fr(quotient_value.slice(NUM_LIMB_BITS * 2, NUM_LIMB_BITS * 4).lo)), - false, - num_quotient_bits); - remainder = bigfield( - witness_t(ctx, fr(remainder_value.slice(0, NUM_LIMB_BITS * 2).lo)), - witness_t(ctx, fr(remainder_value.slice(NUM_LIMB_BITS * 2, NUM_LIMB_BITS * 3 + NUM_LAST_LIMB_BITS).lo))); + quotient = create_from_u512_as_witness(ctx, quotient_value, false, num_quotient_bits); + remainder = create_from_u512_as_witness(ctx, remainder_value); }; // Call `evaluate_multiply_add` to validate the correctness of our computed quotient and remainder @@ -739,13 +813,9 @@ bigfield bigfield::internal_div(const std::vector& numerat if (check_for_zero) { denominator.assert_is_not_equal(zero()); } - quotient = bigfield(witness_t(ctx, fr(quotient_value.slice(0, NUM_LIMB_BITS * 2).lo)), - witness_t(ctx, fr(quotient_value.slice(NUM_LIMB_BITS * 2, NUM_LIMB_BITS * 4).lo)), - false, - num_quotient_bits); - inverse = bigfield( - witness_t(ctx, fr(inverse_value.slice(0, NUM_LIMB_BITS * 2).lo)), - witness_t(ctx, fr(inverse_value.slice(NUM_LIMB_BITS * 2, NUM_LIMB_BITS * 3 + NUM_LAST_LIMB_BITS).lo))); + + quotient = create_from_u512_as_witness(ctx, quotient_value, false, num_quotient_bits); + inverse = create_from_u512_as_witness(ctx, inverse_value); } unsafe_evaluate_multiply_add(denominator, inverse, { unreduced_zero() }, quotient, numerators); @@ -804,13 +874,8 @@ template bigfield bigfield::sqr() const return sqr(); } - quotient = bigfield(witness_t(ctx, fr(quotient_value.slice(0, NUM_LIMB_BITS * 2).lo)), - witness_t(ctx, fr(quotient_value.slice(NUM_LIMB_BITS * 2, NUM_LIMB_BITS * 4).lo)), - false, - num_quotient_bits); - remainder = bigfield( - witness_t(ctx, fr(remainder_value.slice(0, NUM_LIMB_BITS * 2).lo)), - witness_t(ctx, fr(remainder_value.slice(NUM_LIMB_BITS * 2, NUM_LIMB_BITS * 3 + NUM_LAST_LIMB_BITS).lo))); + quotient = create_from_u512_as_witness(ctx, quotient_value, false, num_quotient_bits); + remainder = create_from_u512_as_witness(ctx, remainder_value); }; unsafe_evaluate_square_add(*this, {}, quotient, remainder); @@ -877,13 +942,8 @@ template bigfield bigfield::sqradd(const st uint512_t quotient_value = quotient_1024.lo; uint256_t remainder_value = remainder_1024.lo.lo; - quotient = bigfield(witness_t(ctx, fr(quotient_value.slice(0, NUM_LIMB_BITS * 2).lo)), - witness_t(ctx, fr(quotient_value.slice(NUM_LIMB_BITS * 2, NUM_LIMB_BITS * 4).lo)), - false, - num_quotient_bits); - remainder = bigfield( - witness_t(ctx, fr(remainder_value.slice(0, NUM_LIMB_BITS * 2))), - witness_t(ctx, fr(remainder_value.slice(NUM_LIMB_BITS * 2, NUM_LIMB_BITS * 3 + NUM_LAST_LIMB_BITS)))); + quotient = create_from_u512_as_witness(ctx, quotient_value, false, num_quotient_bits); + remainder = create_from_u512_as_witness(ctx, remainder_value); }; unsafe_evaluate_square_add(*this, to_add, quotient, remainder); return remainder; @@ -940,13 +1000,8 @@ bigfield bigfield::madd(const bigfield& to_mul, const std::vector bigfield::mult_madd(const std::vector& mul_left, bigfield remainder; bigfield quotient; // Constrain quotient to mitigate CRT overflow attacks - quotient = bigfield(witness_t(ctx, fr(quotient_value.slice(0, NUM_LIMB_BITS * 2).lo)), - witness_t(ctx, fr(quotient_value.slice(NUM_LIMB_BITS * 2, NUM_LIMB_BITS * 4).lo)), - false, - num_quotient_bits); + quotient = create_from_u512_as_witness(ctx, quotient_value, false, num_quotient_bits); + if (fix_remainder_to_zero) { remainder = zero(); // remainder needs to be defined as wire value and not selector values to satisfy // UltraPlonk's bigfield custom gates remainder.convert_constant_to_witness(ctx); } else { - remainder = bigfield( - witness_t(ctx, fr(remainder_value.slice(0, NUM_LIMB_BITS * 2).lo)), - witness_t(ctx, fr(remainder_value.slice(NUM_LIMB_BITS * 2, NUM_LIMB_BITS * 3 + NUM_LAST_LIMB_BITS).lo))); + remainder = create_from_u512_as_witness(ctx, remainder_value); } unsafe_evaluate_multiple_multiply_add(new_input_left, new_input_right, new_to_add, quotient, { remainder }); @@ -1929,7 +1980,7 @@ void bigfield::unsafe_evaluate_multiply_add(const bigfield& input_left, modulus, }; // N.B. this method also evaluates the prime field component of the non-native field mul - const auto [lo_idx, hi_idx] = ctx->evaluate_non_native_field_multiplication(witnesses, false, false); + const auto [lo_idx, hi_idx] = ctx->evaluate_non_native_field_multiplication(witnesses, false); barretenberg::fr neg_prime = -barretenberg::fr(uint256_t(target_basis.modulus)); field_t::evaluate_polynomial_identity(left.prime_basis_limb, @@ -1945,7 +1996,8 @@ void bigfield::unsafe_evaluate_multiply_add(const bigfield& input_left, // if both the hi and lo output limbs have less than 70 bits, we can use our custom // limb accumulation gate (accumulates 2 field elements, each composed of 5 14-bit limbs, in 3 gates) if (carry_lo_msb <= 70 && carry_hi_msb <= 70) { - ctx->range_constrain_two_limbs(hi.witness_index, lo.witness_index); + ctx->range_constrain_two_limbs( + hi.witness_index, lo.witness_index, size_t(carry_lo_msb), size_t(carry_hi_msb)); } else { ctx->decompose_into_default_range(hi.normalize().witness_index, carry_hi_msb); ctx->decompose_into_default_range(lo.normalize().witness_index, carry_lo_msb); @@ -2365,7 +2417,7 @@ void bigfield::unsafe_evaluate_multiple_multiply_add(const std::vectorevaluate_non_native_field_multiplication(witnesses, false, false); + const auto [lo_1_idx, hi_1_idx] = ctx->evaluate_non_native_field_multiplication(witnesses, false); barretenberg::fr neg_prime = -barretenberg::fr(uint256_t(target_basis.modulus)); @@ -2383,7 +2435,8 @@ void bigfield::unsafe_evaluate_multiple_multiply_add(const std::vectorrange_constrain_two_limbs(hi.witness_index, lo.witness_index); + ctx->range_constrain_two_limbs( + hi.witness_index, lo.witness_index, (size_t)carry_lo_msb, (size_t)carry_hi_msb); } else { ctx->decompose_into_default_range(hi.normalize().witness_index, carry_hi_msb); ctx->decompose_into_default_range(lo.normalize().witness_index, carry_lo_msb); diff --git a/cpp/src/aztec/stdlib/primitives/biggroup/biggroup_nafs.hpp b/cpp/src/aztec/stdlib/primitives/biggroup/biggroup_nafs.hpp index c6c38cf045..b92f6750c5 100644 --- a/cpp/src/aztec/stdlib/primitives/biggroup/biggroup_nafs.hpp +++ b/cpp/src/aztec/stdlib/primitives/biggroup/biggroup_nafs.hpp @@ -125,143 +125,190 @@ typename element::secp256k1_wnaf_pair element::compu constexpr size_t num_bits = 129; - const auto compute_single_wnaf = - [ctx](const secp256k1::fr& k, const auto stagger, const bool is_negative, const bool is_lo = false) { - constexpr size_t num_rounds = ((num_bits + wnaf_size - 1) / wnaf_size); - const uint64_t stagger_mask = (1ULL << stagger) - 1; - const uint64_t stagger_scalar = k.data[0] & stagger_mask; - - uint64_t wnaf_values[num_rounds] = { 0 }; - bool skew_without_stagger; - uint256_t k_u256{ k.data[0], k.data[1], k.data[2], k.data[3] }; - k_u256 = k_u256 >> stagger; - if (is_lo) { - barretenberg::wnaf::fixed_wnaf( - &k_u256.data[0], &wnaf_values[0], skew_without_stagger, 0); - } else { - barretenberg::wnaf::fixed_wnaf( - &k_u256.data[0], &wnaf_values[0], skew_without_stagger, 0); - } - const size_t num_rounds_adjusted = ((num_bits + wnaf_size - 1 - stagger) / wnaf_size); - - const auto compute_staggered_wnaf_fragment = - [](const uint64_t fragment_u64, const uint64_t stagger, bool is_negative, bool wnaf_skew) { - if (stagger == 0) { - return std::make_pair((uint64_t)0, (bool)wnaf_skew); - } - int fragment = static_cast(fragment_u64); - - if (is_negative) { - fragment = -fragment; - } - if (!is_negative && wnaf_skew) { - fragment -= (1 << stagger); - } else if (is_negative && wnaf_skew) { - fragment += (1 << stagger); - } - bool output_skew = (fragment_u64 % 2) == 0; - if (!is_negative && output_skew) { - fragment += 1; - } else if (is_negative && output_skew) { - fragment -= 1; - } - - uint64_t output_fragment; - if (fragment < 0) { - output_fragment = static_cast((int)((1ULL << (wnaf_size - 1))) + (fragment / 2 - 1)); - } else { - output_fragment = static_cast((1ULL << (wnaf_size - 1)) - 1ULL + - (uint64_t)((uint64_t)fragment / 2 + 1)); - } - - return std::make_pair((uint64_t)output_fragment, (bool)output_skew); - }; - - const auto [first_fragment, skew] = - compute_staggered_wnaf_fragment(stagger_scalar, stagger, is_negative, skew_without_stagger); - - constexpr uint64_t wnaf_window_size = (1ULL << (wnaf_size - 1)); - const auto get_wnaf_wires = [ctx](uint64_t* wnaf_values, bool is_negative, size_t rounds) { - std::vector> wnaf_entries; - for (size_t i = 0; i < rounds; ++i) { - bool predicate = bool((wnaf_values[i] >> 31U) & 1U); - uint64_t offset_entry; - if ((!predicate && !is_negative) || (predicate && is_negative)) { - offset_entry = wnaf_window_size + (wnaf_values[i] & 0xffffff); - } else { - offset_entry = wnaf_window_size - 1 - (wnaf_values[i] & 0xffffff); - } - field_t entry(witness_t(ctx, offset_entry)); - - // TODO: Do these need to be range constrained? we use these witnesses - // to index a size-16 ROM lookup table, which performs an implicit range constraint - entry.create_range_constraint(wnaf_size); - wnaf_entries.emplace_back(entry); + /** + * @brief Compute WNAF of a single 129-bit scalar + * + * @param k Scalar + * @param stagger The number of bits that are used in "staggering" + * @param is_negative If it should be subtracted + * @param is_lo True if it's the low scalar + */ + const auto compute_single_wnaf = [ctx](const secp256k1::fr& k, + const auto stagger, + const bool is_negative, + const bool is_lo = false) { + // The number of rounds is the minimal required to cover the whole scalar with wnaf_size windows + constexpr size_t num_rounds = ((num_bits + wnaf_size - 1) / wnaf_size); + // Stagger mask is needed to retrieve the lowest bits that will not be used in montgomery ladder directly + const uint64_t stagger_mask = (1ULL << stagger) - 1; + // Stagger scalar represents the lower "staggered" bits that are not used in the ladder + const uint64_t stagger_scalar = k.data[0] & stagger_mask; + + uint64_t wnaf_values[num_rounds] = { 0 }; + bool skew_without_stagger; + uint256_t k_u256{ k.data[0], k.data[1], k.data[2], k.data[3] }; + k_u256 = k_u256 >> stagger; + if (is_lo) { + barretenberg::wnaf::fixed_wnaf( + &k_u256.data[0], &wnaf_values[0], skew_without_stagger, 0); + } else { + barretenberg::wnaf::fixed_wnaf( + &k_u256.data[0], &wnaf_values[0], skew_without_stagger, 0); + } + + // Number of rounds that are needed to reconstruct the scalar without staggered bits + const size_t num_rounds_excluding_stagger_bits = ((num_bits + wnaf_size - 1 - stagger) / wnaf_size); + + /** + * @brief Compute the stagger-related part of WNAF and the final skew + * + * @param fragment_u64 Stagger-masked lower bits of the skalar + * @param stagger The number of staggering bits + * @param is_negative If the initial scalar is supposed to be subtracted + * @param wnaf_skew The skew of the stagger-right-shifted part of the skalar + * + */ + const auto compute_staggered_wnaf_fragment = + [](const uint64_t fragment_u64, const uint64_t stagger, bool is_negative, bool wnaf_skew) { + // If there is not stagger then there is no need to change anyhing + if (stagger == 0) { + return std::make_pair((uint64_t)0, (bool)wnaf_skew); + } + int fragment = static_cast(fragment_u64); + // Inverse the fragment if it's negative + if (is_negative) { + fragment = -fragment; + } + // If the value is positive and there is a skew in wnaf, subtract 2ˢᵗᵃᵍᵍᵉʳ. If negative and there is + // skew, then add + if (!is_negative && wnaf_skew) { + fragment -= (1 << stagger); + } else if (is_negative && wnaf_skew) { + fragment += (1 << stagger); + } + // If the lowest bit is zero, then set final skew to 1 and add 1 to the absolute value of the fragment + bool output_skew = (fragment_u64 % 2) == 0; + if (!is_negative && output_skew) { + fragment += 1; + } else if (is_negative && output_skew) { + fragment -= 1; } - return wnaf_entries; - }; - std::vector> wnaf = get_wnaf_wires(&wnaf_values[0], is_negative, num_rounds_adjusted); - field_t negative_skew = witness_t(ctx, is_negative ? 0 : skew); - field_t positive_skew = witness_t(ctx, is_negative ? skew : 0); - negative_skew.create_range_constraint(1); - positive_skew.create_range_constraint(1); - (negative_skew + positive_skew).create_range_constraint(1); - - const auto reconstruct_bigfield_from_wnaf = [ctx](const std::vector>& wnaf, - const field_t& positive_skew, - const field_t& stagger_fragment, - const size_t stagger, - const size_t rounds) { - std::vector> accumulator; - for (size_t i = 0; i < rounds; ++i) { - field_t entry = wnaf[rounds - 1 - i]; - entry *= 2; - entry *= static_cast>(uint256_t(1) << (i * wnaf_size)); - accumulator.emplace_back(entry); + uint64_t output_fragment; + if (fragment < 0) { + output_fragment = static_cast((int)((1ULL << (wnaf_size - 1))) + (fragment / 2 - 1)); + } else { + output_fragment = static_cast((1ULL << (wnaf_size - 1)) - 1ULL + + (uint64_t)((uint64_t)fragment / 2 + 1)); } - field_t sum = field_t::accumulate(accumulator); - sum = sum * field_t(barretenberg::fr(1ULL << stagger)); - sum += (stagger_fragment * 2); - sum += positive_skew; - sum = sum.normalize(); - // TODO: improve efficiency by creating a constructor that does NOT require us to range constrain - // limbs (we already know (sum < 2^{130})) - Fr reconstructed = Fr(sum, field_t::from_witness_index(ctx, ctx->zero_idx), false); - return reconstructed; - }; - field_t stagger_fragment = witness_t(ctx, first_fragment); - Fr wnaf_sum = - reconstruct_bigfield_from_wnaf(wnaf, positive_skew, stagger_fragment, stagger, num_rounds_adjusted); + return std::make_pair((uint64_t)output_fragment, (bool)output_skew); + }; - uint256_t negative_constant_wnaf_offset(0); + // Compute the lowest fragment and final skew + const auto [first_fragment, skew] = + compute_staggered_wnaf_fragment(stagger_scalar, stagger, is_negative, skew_without_stagger); + + constexpr uint64_t wnaf_window_size = (1ULL << (wnaf_size - 1)); + /** + * @brief Compute wnaf values, convert them into witness field elements and range constrain them + * + */ + const auto get_wnaf_wires = [ctx](uint64_t* wnaf_values, bool is_negative, size_t rounds) { + std::vector> wnaf_entries; + for (size_t i = 0; i < rounds; ++i) { + // Predicate == sign of current wnaf value + bool predicate = bool((wnaf_values[i] >> 31U) & 1U); + uint64_t offset_entry; + // If the signs of current entry and the whole scalar are the same, then add the lowest bits of current + // wnaf value to the windows size to form an entry. Otherwise, subract the lowest bits along with 1 + if ((!predicate && !is_negative) || (predicate && is_negative)) { + // TODO: Why is this mask fixed? + offset_entry = wnaf_window_size + (wnaf_values[i] & 0xffffff); + } else { + offset_entry = wnaf_window_size - 1 - (wnaf_values[i] & 0xffffff); + } + field_t entry(witness_t(ctx, offset_entry)); - for (size_t i = 0; i < num_rounds_adjusted; ++i) { - negative_constant_wnaf_offset += - uint256_t(wnaf_window_size * 2 - 1) * (uint256_t(1) << (i * wnaf_size)); + // TODO: Do these need to be range constrained? we use these witnesses + // to index a size-16 ROM lookup table, which performs an implicit range constraint + entry.create_range_constraint(wnaf_size); + wnaf_entries.emplace_back(entry); } - negative_constant_wnaf_offset = negative_constant_wnaf_offset << stagger; - if (stagger > 0) { - negative_constant_wnaf_offset += ((1ULL << wnaf_size) - 1ULL); // FROM STAGGER FRAMGENT + return wnaf_entries; + }; + + // Get wnaf witnesses + std::vector> wnaf = get_wnaf_wires(&wnaf_values[0], is_negative, num_rounds_excluding_stagger_bits); + // Compute and constrain skews + field_t negative_skew = witness_t(ctx, is_negative ? 0 : skew); + field_t positive_skew = witness_t(ctx, is_negative ? skew : 0); + negative_skew.create_range_constraint(1); + positive_skew.create_range_constraint(1); + (negative_skew + positive_skew).create_range_constraint(1); + + const auto reconstruct_bigfield_from_wnaf = [ctx](const std::vector>& wnaf, + const field_t& positive_skew, + const field_t& stagger_fragment, + const size_t stagger, + const size_t rounds) { + std::vector> accumulator; + // Collect positive wnaf entries for accumulation + for (size_t i = 0; i < rounds; ++i) { + field_t entry = wnaf[rounds - 1 - i]; + entry *= static_cast>(uint256_t(1) << (i * wnaf_size)); + accumulator.emplace_back(entry); } - field_t skew_offset = - (negative_skew + field_t(barretenberg::fr(negative_constant_wnaf_offset))).normalize(); + // Accumulate entries, shift by stagger and add the stagger itself + field_t sum = field_t::accumulate(accumulator); + sum = sum * field_t(barretenberg::fr(1ULL << stagger)); + sum += (stagger_fragment); + sum = sum.normalize(); + // TODO: improve efficiency by creating a constructor that does NOT require us to range constrain + // limbs (we already know (sum < 2^{130})) + // Convert this value to bigfield element + Fr reconstructed = Fr(sum, field_t::from_witness_index(ctx, ctx->zero_idx), false); + // Double the final value and add the skew + reconstructed = (reconstructed + reconstructed).add_to_lower_limb(positive_skew, uint256_t(1)); + return reconstructed; + }; - // TODO: improve efficiency by removing range constraint on lo_offset and hi_offset (we already know are - // boolean) - Fr offset = Fr(skew_offset, field_t(ctx, barretenberg::fr(0)), false); + // Initialize stagger witness + field_t stagger_fragment = witness_t(ctx, first_fragment); - Fr reconstructed = wnaf_sum - offset; + // Reconstruct bigfield x_pos + Fr wnaf_sum = reconstruct_bigfield_from_wnaf( + wnaf, positive_skew, stagger_fragment, stagger, num_rounds_excluding_stagger_bits); - secp256k1_wnaf wnaf_out{ .wnaf = wnaf, - .positive_skew = positive_skew, - .negative_skew = negative_skew, - .least_significant_wnaf_fragment = stagger_fragment, - .has_wnaf_fragment = (stagger > 0) }; + // Start reconstructing x_neg + uint256_t negative_constant_wnaf_offset(0); - return std::make_pair((Fr)reconstructed, (secp256k1_wnaf)wnaf_out); - }; + // Construct 0xF..F + for (size_t i = 0; i < num_rounds_excluding_stagger_bits; ++i) { + negative_constant_wnaf_offset += uint256_t(wnaf_window_size * 2 - 1) * (uint256_t(1) << (i * wnaf_size)); + } + // Shift by stagger + negative_constant_wnaf_offset = negative_constant_wnaf_offset << stagger; + // Add for stagger + if (stagger > 0) { + negative_constant_wnaf_offset += ((1ULL << wnaf_size) - 1ULL); // FROM STAGGER FRAMGENT + } + + // TODO: improve efficiency by removing range constraint on lo_offset and hi_offset (we already know are + // boolean) + // Add the skew to the bigfield constant + Fr offset = Fr(nullptr, negative_constant_wnaf_offset).add_to_lower_limb(negative_skew, uint256_t(1)); + // x_pos - x_neg + Fr reconstructed = wnaf_sum - offset; + + secp256k1_wnaf wnaf_out{ .wnaf = wnaf, + .positive_skew = positive_skew, + .negative_skew = negative_skew, + .least_significant_wnaf_fragment = stagger_fragment, + .has_wnaf_fragment = (stagger > 0) }; + + return std::make_pair((Fr)reconstructed, (secp256k1_wnaf)wnaf_out); + }; secp256k1::fr k(scalar.get_value().lo); secp256k1::fr klo(0); @@ -364,7 +411,7 @@ std::vector> element::compute_wnaf(const Fr& scalar) // If Fr is a non-native field element, we can't just accumulate the wnaf entries into a single value, // as we could overflow the circuit modulus // - // We add the first 34 wnaf entries into a 'low' 138-bit accumulator (138 = 2 68 bit limbs) + // We add the first 34 wnaf entries into a 'low' 136-bit accumulator (136 = 2 68 bit limbs) // We add the remaining wnaf entries into a 'high' accumulator // We can then directly construct a Fr element from the accumulators. // However we cannot underflow our accumulators, and our wnafs represent negative and positive values @@ -372,8 +419,10 @@ std::vector> element::compute_wnaf(const Fr& scalar) // [-15, -13, -11, ..., 13, 15] // // To map from the raw value to the actual value, we must compute `value * 2 - 15` - // However, we do not subtract off the -15 term when constructing our low and high accumulators (instead just - // multiplying by 2) This ensures the low accumulator will not underflow + // However, we do not subtract off the -15 term when constructing our low and high accumulators. Instead of + // multiplying by two when accumulating we simply add the accumulated value to itself. This way it automatically + // updates multiplicative constants without computing new witnesses. This ensures the low accumulator will not + // underflow // // Once we hvae reconstructed an Fr element out of our accumulators, // we ALSO construct an Fr element from the constant offset terms we left out @@ -383,7 +432,6 @@ std::vector> element::compute_wnaf(const Fr& scalar) std::vector> half_accumulators; for (size_t i = 0; i < half_round_length; ++i) { field_t entry = wnafs[half_round_length - 1 - i]; - entry *= 2; entry *= static_cast>(uint256_t(1) << (i * 4)); half_accumulators.emplace_back(entry); } @@ -392,7 +440,6 @@ std::vector> element::compute_wnaf(const Fr& scalar) const size_t midpoint = num_rounds - (Fr::NUM_LIMB_BITS * 2) / WNAF_SIZE; auto hi_accumulators = reconstruct_half_wnaf(&wnaf_entries[0], midpoint); auto lo_accumulators = reconstruct_half_wnaf(&wnaf_entries[midpoint], num_rounds - midpoint); - uint256_t negative_lo(0); uint256_t negative_hi(0); for (size_t i = 0; i < midpoint; ++i) { @@ -401,12 +448,16 @@ std::vector> element::compute_wnaf(const Fr& scalar) for (size_t i = 0; i < (num_rounds - midpoint); ++i) { negative_lo += uint256_t(15) * (uint256_t(1) << (i * 4)); } - + ASSERT((num_rounds - midpoint) * 4 == 136); + // If skew == 1 lo_offset = 0, else = 0xf...f field_t lo_offset = - (wnaf_entries[wnaf_entries.size() - 1] + field_t(barretenberg::fr(negative_lo))).normalize(); - Fr offset = Fr(lo_offset, field_t(barretenberg::fr(negative_hi)), true); + (-field_t(barretenberg::fr(negative_lo))) + .madd(wnaf_entries[wnaf_entries.size() - 1], field_t(barretenberg::fr(negative_lo))) + .normalize(); + Fr offset = + Fr(lo_offset, field_t(barretenberg::fr(negative_hi)) + wnaf_entries[wnaf_entries.size() - 1], true); Fr reconstructed = Fr(lo_accumulators, hi_accumulators, true); - reconstructed = reconstructed - offset; + reconstructed = (reconstructed + reconstructed) - offset; reconstructed.assert_is_in_field(); reconstructed.assert_equal(scalar); }