diff --git a/cpp/src/barretenberg/crypto/pedersen_commitment/c_bind.cpp b/cpp/src/barretenberg/crypto/pedersen_commitment/c_bind.cpp index 1b58491c3c..f503481ce7 100644 --- a/cpp/src/barretenberg/crypto/pedersen_commitment/c_bind.cpp +++ b/cpp/src/barretenberg/crypto/pedersen_commitment/c_bind.cpp @@ -30,7 +30,6 @@ WASM_EXPORT void pedersen_plookup_compress_fields(uint8_t const* left, uint8_t c barretenberg::fr::serialize_to_buffer(r, result); } - WASM_EXPORT void pedersen__compress(uint8_t const* inputs_buffer, uint8_t* output) { std::vector to_compress; diff --git a/cpp/src/barretenberg/crypto/pedersen_commitment/convert_buffer_to_field.hpp b/cpp/src/barretenberg/crypto/pedersen_commitment/convert_buffer_to_field.hpp index 10adacf4ce..9b657280ad 100644 --- a/cpp/src/barretenberg/crypto/pedersen_commitment/convert_buffer_to_field.hpp +++ b/cpp/src/barretenberg/crypto/pedersen_commitment/convert_buffer_to_field.hpp @@ -5,6 +5,16 @@ namespace crypto { namespace pedersen_commitment { +/** + * @brief Converts input uint8_t buffers into vector of field elements. Used to hash the Transcript in a SNARK-friendly + * manner for recursive circuits. + * + * `buffer` is an unstructured byte array we want to convert these into field elements + * prior to hashing. We do this by splitting buffer into 31-byte chunks. + * + * @param buffer + * @return std::vector + */ inline std::vector convert_buffer_to_field(const std::vector& input) { const size_t num_bytes = input.size(); diff --git a/cpp/src/barretenberg/crypto/pedersen_commitment/pedersen.cpp b/cpp/src/barretenberg/crypto/pedersen_commitment/pedersen.cpp index 639dfa2440..8e3aa99ef1 100644 --- a/cpp/src/barretenberg/crypto/pedersen_commitment/pedersen.cpp +++ b/cpp/src/barretenberg/crypto/pedersen_commitment/pedersen.cpp @@ -105,16 +105,16 @@ grumpkin::fq compress_native(const std::vector& input) +grumpkin::fq compress_native_buffer_to_field(const std::vector& input, const size_t hash_index) { const auto elements = convert_buffer_to_field(input); - grumpkin::fq result_fq = compress_native(elements); + grumpkin::fq result_fq = compress_native(elements, hash_index); return result_fq; } -grumpkin::fq compress_native(const std::vector& input) +grumpkin::fq compress_native(const std::vector& input, const size_t hash_index) { - return compress_native_buffer_to_field(input); + return compress_native_buffer_to_field(input, hash_index); } } // namespace pedersen_commitment diff --git a/cpp/src/barretenberg/crypto/pedersen_commitment/pedersen.hpp b/cpp/src/barretenberg/crypto/pedersen_commitment/pedersen.hpp index d7275aa6ac..0600e13b52 100644 --- a/cpp/src/barretenberg/crypto/pedersen_commitment/pedersen.hpp +++ b/cpp/src/barretenberg/crypto/pedersen_commitment/pedersen.hpp @@ -22,7 +22,7 @@ template grumpkin::fq compress_native(const std::array& input); +grumpkin::fq compress_native(const std::vector& input, const size_t hash_index = 0); grumpkin::fq compress_native(const std::vector>& input_pairs); diff --git a/cpp/src/barretenberg/crypto/pedersen_commitment/pedersen_lookup.cpp b/cpp/src/barretenberg/crypto/pedersen_commitment/pedersen_lookup.cpp index e2333acfa6..5e6288e8df 100644 --- a/cpp/src/barretenberg/crypto/pedersen_commitment/pedersen_lookup.cpp +++ b/cpp/src/barretenberg/crypto/pedersen_commitment/pedersen_lookup.cpp @@ -28,11 +28,12 @@ grumpkin::g1::element merkle_damgard_compress(const std::vector& i const size_t num_inputs = inputs.size(); grumpkin::fq result = (pedersen_iv_table[iv]).x; - for (size_t i = 0; i < num_inputs; i++) { + result = hash_pair(result, num_inputs); + for (size_t i = 0; i < num_inputs - 1; i++) { result = hash_pair(result, inputs[i]); } - return (hash_single(result, false) + hash_single(grumpkin::fq(num_inputs), true)); + return (hash_single(result, false) + hash_single(inputs[num_inputs - 1], true)); } grumpkin::g1::element merkle_damgard_compress(const std::vector& inputs, const std::vector& ivs) @@ -46,7 +47,8 @@ grumpkin::g1::element merkle_damgard_compress(const std::vector& i const size_t num_inputs = inputs.size(); grumpkin::fq result = (pedersen_iv_table[0]).x; - for (size_t i = 0; i < 2 * num_inputs; i++) { + result = hash_pair(result, num_inputs); + for (size_t i = 0; i < 2 * num_inputs - 1; i++) { if ((i & 1) == 0) { grumpkin::fq iv_result = (pedersen_iv_table[ivs[i >> 1]]).x; result = hash_pair(result, iv_result); @@ -54,8 +56,7 @@ grumpkin::g1::element merkle_damgard_compress(const std::vector& i result = hash_pair(result, inputs[i >> 1]); } } - - return (hash_single(result, false) + hash_single(grumpkin::fq(num_inputs), true)); + return (hash_single(result, false) + hash_single(inputs[num_inputs - 1], true)); } grumpkin::g1::element merkle_damgard_tree_compress(const std::vector& inputs, @@ -111,16 +112,16 @@ grumpkin::fq compress_native(const std::vector& inputs, const std: return commit_native(inputs, hash_indices).x; } -grumpkin::fq compress_native_buffer_to_field(const std::vector& input) +grumpkin::fq compress_native_buffer_to_field(const std::vector& input, const size_t hash_index) { const auto elements = convert_buffer_to_field(input); - grumpkin::fq result_fq = compress_native(elements); + grumpkin::fq result_fq = compress_native(elements, hash_index); return result_fq; } -std::vector compress_native(const std::vector& input) +std::vector compress_native(const std::vector& input, const size_t hash_index) { - const auto result_fq = compress_native_buffer_to_field(input); + const auto result_fq = compress_native_buffer_to_field(input, hash_index); uint256_t result_u256(result_fq); const size_t num_bytes = input.size(); diff --git a/cpp/src/barretenberg/crypto/pedersen_commitment/pedersen_lookup.hpp b/cpp/src/barretenberg/crypto/pedersen_commitment/pedersen_lookup.hpp index 0f99b13fbb..b77fac9688 100644 --- a/cpp/src/barretenberg/crypto/pedersen_commitment/pedersen_lookup.hpp +++ b/cpp/src/barretenberg/crypto/pedersen_commitment/pedersen_lookup.hpp @@ -13,9 +13,9 @@ grumpkin::g1::element merkle_damgard_tree_compress(const std::vector& inputs, const size_t hash_index = 0); grumpkin::fq compress_native(const std::vector& inputs, const std::vector& hash_indices); -std::vector compress_native(const std::vector& input); +std::vector compress_native(const std::vector& input, const size_t hash_index = 0); -grumpkin::fq compress_native_buffer_to_field(const std::vector& input); +grumpkin::fq compress_native_buffer_to_field(const std::vector& input, const size_t hash_index = 0); template grumpkin::fq compress_native(const std::array& inputs) { diff --git a/cpp/src/barretenberg/crypto/pedersen_commitment/pedersen_lookup.test.cpp b/cpp/src/barretenberg/crypto/pedersen_commitment/pedersen_lookup.test.cpp index 82d0b4f7ed..a06f5cea58 100644 --- a/cpp/src/barretenberg/crypto/pedersen_commitment/pedersen_lookup.test.cpp +++ b/cpp/src/barretenberg/crypto/pedersen_commitment/pedersen_lookup.test.cpp @@ -157,7 +157,9 @@ TEST(pedersen_lookup, merkle_damgard_compress) const auto result = crypto::pedersen_commitment::lookup::merkle_damgard_compress(inputs, iv); - fq intermediate = (grumpkin::g1::affine_one * fr(iv + 1)).x; + auto iv_hash = compute_expected((grumpkin::g1::affine_one * fr(iv + 1)).x, 0); + auto length = compute_expected(fq(m), (crypto::pedersen_hash::lookup::NUM_PEDERSEN_TABLES / 2)); + fq intermediate = affine_element(iv_hash + length).x; for (size_t i = 0; i < m; i++) { intermediate = affine_element(compute_expected(intermediate, 0) + @@ -165,10 +167,7 @@ TEST(pedersen_lookup, merkle_damgard_compress) .x; } - EXPECT_EQ(affine_element(result).x, - affine_element(compute_expected(intermediate, 0) + - compute_expected(fq(m), (crypto::pedersen_hash::lookup::NUM_PEDERSEN_TABLES / 2))) - .x); + EXPECT_EQ(affine_element(result).x, intermediate); } TEST(pedersen_lookup, merkle_damgard_compress_multiple_iv) @@ -188,7 +187,11 @@ TEST(pedersen_lookup, merkle_damgard_compress_multiple_iv) const auto result = crypto::pedersen_commitment::lookup::merkle_damgard_compress(inputs, ivs); const size_t initial_iv = 0; - fq intermediate = (grumpkin::g1::affine_one * fr(initial_iv + 1)).x; + auto iv_hash = compute_expected((grumpkin::g1::affine_one * fr(initial_iv + 1)).x, 0); + + auto length = compute_expected(fq(m), (crypto::pedersen_hash::lookup::NUM_PEDERSEN_TABLES / 2)); + fq intermediate = affine_element(iv_hash + length).x; + for (size_t i = 0; i < 2 * m; i++) { if ((i & 1) == 0) { const auto iv = (grumpkin::g1::affine_one * fr(ivs[i >> 1] + 1)).x; @@ -204,10 +207,7 @@ TEST(pedersen_lookup, merkle_damgard_compress_multiple_iv) } } - EXPECT_EQ(affine_element(result).x, - affine_element(compute_expected(intermediate, 0) + - compute_expected(fq(m), (crypto::pedersen_hash::lookup::NUM_PEDERSEN_TABLES / 2))) - .x); + EXPECT_EQ(affine_element(result).x, intermediate); } TEST(pedersen_lookup, merkle_damgard_tree_compress) diff --git a/cpp/src/barretenberg/honk/composer/ultra_honk_composer.hpp b/cpp/src/barretenberg/honk/composer/ultra_honk_composer.hpp index e17087aa88..1073ec7825 100644 --- a/cpp/src/barretenberg/honk/composer/ultra_honk_composer.hpp +++ b/cpp/src/barretenberg/honk/composer/ultra_honk_composer.hpp @@ -371,11 +371,12 @@ class UltraHonkComposer { }; // std::array decompose_non_native_field_double_width_limb( // const uint32_t limb_idx, const size_t num_limb_bits = (2 * DEFAULT_NON_NATIVE_FIELD_LIMB_BITS)); - std::array queue_non_native_field_multiplication( + std::array evaluate_non_native_field_multiplication( const UltraCircuitConstructor::non_native_field_witnesses& input, const bool range_constrain_quotient_and_remainder = true) { - return circuit_constructor.queue_non_native_field_multiplication(input, range_constrain_quotient_and_remainder); + return circuit_constructor.evaluate_non_native_field_multiplication(input, + range_constrain_quotient_and_remainder); }; // std::array evaluate_partial_non_native_field_multiplication(const non_native_field_witnesses& // input); typedef std::pair scaled_witness; typedef std::tuple concept StandardFlavor = IsAnyOf; +template +concept StandardFlavor = IsAnyOf; template class StandardProver_ { diff --git a/cpp/src/barretenberg/honk/proof_system/ultra_prover.hpp b/cpp/src/barretenberg/honk/proof_system/ultra_prover.hpp index bcf665711c..9bbe7314f2 100644 --- a/cpp/src/barretenberg/honk/proof_system/ultra_prover.hpp +++ b/cpp/src/barretenberg/honk/proof_system/ultra_prover.hpp @@ -13,7 +13,8 @@ namespace proof_system::honk { // We won't compile this class with honk::flavor::Standard, but we will like want to compile it (at least for testing) // with a flavor that uses the curve Grumpkin, or a flavor that does/does not have zk, etc. -template concept UltraFlavor = IsAnyOf; +template +concept UltraFlavor = IsAnyOf; template class UltraProver_ { using FF = typename Flavor::FF; diff --git a/cpp/src/barretenberg/join_split_example/proofs/join_split/join_split.test.cpp b/cpp/src/barretenberg/join_split_example/proofs/join_split/join_split.test.cpp index aa7e516609..2b81096c42 100644 --- a/cpp/src/barretenberg/join_split_example/proofs/join_split/join_split.test.cpp +++ b/cpp/src/barretenberg/join_split_example/proofs/join_split/join_split.test.cpp @@ -806,11 +806,12 @@ TEST_F(join_split_tests, test_0_input_notes_and_detect_circuit_change) // The below part detects any changes in the join-split circuit - constexpr uint32_t CIRCUIT_GATE_COUNT = 185573; + constexpr uint32_t CIRCUIT_GATE_COUNT = 183834; constexpr uint32_t GATES_NEXT_POWER_OF_TWO = 524288; - const uint256_t VK_HASH("13eb88883e80efb9bf306af2962cd1a49e9fa1b0bfb2d4b563b95217a17bcc74"); + const uint256_t VK_HASH("5c2e0fe914dbbf23d6bac6ae4db9a7e43d98c0b9d71c9200208dbce24a815c6e"); auto number_of_gates_js = result.number_of_gates; + std::cout << get_verification_key()->sha256_hash() << std::endl; auto vk_hash_js = get_verification_key()->sha256_hash(); if (!CIRCUIT_CHANGE_EXPECTED) { diff --git a/cpp/src/barretenberg/plonk/composer/splitting_tmp/ultra_plonk_composer.hpp b/cpp/src/barretenberg/plonk/composer/splitting_tmp/ultra_plonk_composer.hpp index 53c8ff8f90..703e037e64 100644 --- a/cpp/src/barretenberg/plonk/composer/splitting_tmp/ultra_plonk_composer.hpp +++ b/cpp/src/barretenberg/plonk/composer/splitting_tmp/ultra_plonk_composer.hpp @@ -380,13 +380,14 @@ class UltraPlonkComposer { }; // std::array decompose_non_native_field_double_width_limb( // const uint32_t limb_idx, const size_t num_limb_bits = (2 * DEFAULT_NON_NATIVE_FIELD_LIMB_BITS)); - std::array queue_non_native_field_multiplication( + std::array evaluate_non_native_field_multiplication( const UltraCircuitConstructor::non_native_field_witnesses& input, const bool range_constrain_quotient_and_remainder = true) { - return circuit_constructor.queue_non_native_field_multiplication(input, range_constrain_quotient_and_remainder); + return circuit_constructor.evaluate_non_native_field_multiplication(input, + range_constrain_quotient_and_remainder); }; - // std::array evaluate_partial_non_native_field_multiplication(const non_native_field_witnesses& + // std::array queue_partial_non_native_field_multiplication(const non_native_field_witnesses& // input); typedef std::pair scaled_witness; typedef std::tuple add_simple; std::array evaluate_non_native_field_subtraction( // add_simple limb0, diff --git a/cpp/src/barretenberg/plonk/composer/splitting_tmp/ultra_plonk_composer.test.cpp b/cpp/src/barretenberg/plonk/composer/splitting_tmp/ultra_plonk_composer.test.cpp index a5f6e09822..fe95dfd298 100644 --- a/cpp/src/barretenberg/plonk/composer/splitting_tmp/ultra_plonk_composer.test.cpp +++ b/cpp/src/barretenberg/plonk/composer/splitting_tmp/ultra_plonk_composer.test.cpp @@ -781,7 +781,7 @@ TEST(ultra_plonk_composer_splitting_tmp, non_native_field_multiplication) UltraCircuitConstructor::non_native_field_witnesses inputs{ a_indices, b_indices, q_indices, r_indices, modulus_limbs, fr(uint256_t(modulus)), }; - const auto [lo_1_idx, hi_1_idx] = composer.queue_non_native_field_multiplication(inputs); + const auto [lo_1_idx, hi_1_idx] = composer.evaluate_non_native_field_multiplication(inputs); composer.range_constrain_two_limbs(lo_1_idx, hi_1_idx, 70, 70); auto prover = composer.create_prover(); diff --git a/cpp/src/barretenberg/plonk/composer/ultra_composer.cpp b/cpp/src/barretenberg/plonk/composer/ultra_composer.cpp index 1f3e9fb3ed..ce0f4eac7b 100644 --- a/cpp/src/barretenberg/plonk/composer/ultra_composer.cpp +++ b/cpp/src/barretenberg/plonk/composer/ultra_composer.cpp @@ -1138,7 +1138,30 @@ std::vector UltraComposer::decompose_into_default_range(const uint32_t const auto limb_idx = add_variable(sublimbs[i]); sublimb_indices.emplace_back(limb_idx); if ((i == sublimbs.size() - 1) && has_remainder_bits) { - create_new_range_constraint(limb_idx, last_limb_range); + if ((target_range_bitnum - last_limb_size) < DEFAULT_PLOOKUP_RANGE_CUTOFF_BITNUM) { + // we don't want to make a new range table. + // X = limb, L = last limb range, K = sublimb mask. L < X + // we want X <= L + // i.e. L - X >= 0 and L - X <= K + // equivalent to saying L - X <= K + // D = L - X + // D + X - L + barretenberg::fr diff = uint256_t(last_limb_range) - get_variable(limb_idx); + uint32_t diff_idx = add_variable(diff); + create_add_gate({ + .a = limb_idx, + .b = zero_idx, + .c = diff_idx, + .a_scaling = 1, + .b_scaling = 0, + .c_scaling = 1, + .const_scaling = -barretenberg::fr(last_limb_range), + }); + create_new_range_constraint(diff_idx, sublimb_mask); + create_new_range_constraint(limb_idx, sublimb_mask); + } else { + create_new_range_constraint(limb_idx, last_limb_range); + } } else { create_new_range_constraint(limb_idx, sublimb_mask); } @@ -1860,22 +1883,18 @@ std::array UltraComposer::decompose_non_native_field_double_width_l } /** - * @brief Queue up non-native field multiplication data. + * @brief Process a non-native field multiplication data. * - * @details The data queued represents a non-native field multiplication identity a * b = q * p + r, + * @details The data represents a non-native field multiplication identity a * b = q * p + r, * where a, b, q, r are all emulated non-native field elements that are each split across 4 distinct witness variables. * - * Without this queue some functions, such as proof_system::plonk::stdlib::element::double_montgomery_ladder, would - * duplicate non-native field operations, which can be quite expensive. We queue up these operations, and remove - * duplicates in the circuit finishing stage of the proving key computation. - * * The non-native field modulus, p, is a circuit constant * * The return value are the witness indices of the two remainder limbs `lo_1, hi_2` * * N.B.: This method does NOT evaluate the prime field component of non-native field multiplications. **/ -std::array UltraComposer::queue_non_native_field_multiplication( +std::array UltraComposer::evaluate_non_native_field_multiplication( const non_native_field_witnesses& input, const bool range_constrain_quotient_and_remainder) { @@ -1903,10 +1922,11 @@ std::array UltraComposer::queue_non_native_field_multiplication( get_variable(input.r[2]), get_variable(input.r[3]), }; - constexpr barretenberg::fr LIMB_SHIFT = uint256_t(1) << DEFAULT_NON_NATIVE_FIELD_LIMB_BITS; constexpr barretenberg::fr LIMB_SHIFT_2 = uint256_t(1) << (2 * DEFAULT_NON_NATIVE_FIELD_LIMB_BITS); constexpr barretenberg::fr LIMB_SHIFT_3 = uint256_t(1) << (3 * DEFAULT_NON_NATIVE_FIELD_LIMB_BITS); + constexpr barretenberg::fr LIMB_RSHIFT = + barretenberg::fr(1) / barretenberg::fr(uint256_t(1) << DEFAULT_NON_NATIVE_FIELD_LIMB_BITS); constexpr barretenberg::fr LIMB_RSHIFT_2 = barretenberg::fr(1) / barretenberg::fr(uint256_t(1) << (2 * DEFAULT_NON_NATIVE_FIELD_LIMB_BITS)); @@ -1955,68 +1975,114 @@ std::array UltraComposer::queue_non_native_field_multiplication( range_constrain_two_limbs(input.q[2], input.q[3]); } - // Add witnesses into the multiplication cache - // (when finalising the circuit, we will remove duplicates; several dups produced by biggroup.hpp methods) - cached_non_native_field_multiplication cache_entry{ - .a = input.a, - .b = input.b, - .q = input.q, - .r = input.r, - .cross_terms = { lo_0_idx, lo_1_idx, hi_0_idx, hi_1_idx, hi_2_idx, hi_3_idx }, - .neg_modulus = input.neg_modulus, - }; - cached_non_native_field_multiplications.emplace_back(cache_entry); + // product gate 1 + // (lo_0 + q_0(p_0 + p_1*2^b) + q_1(p_0*2^b) - (r_1)2^b)2^-2b - lo_1 = 0 + create_big_add_gate({ input.q[0], + input.q[1], + input.r[1], + lo_1_idx, + input.neg_modulus[0] + input.neg_modulus[1] * LIMB_SHIFT, + input.neg_modulus[0] * LIMB_SHIFT, + -LIMB_SHIFT, + -LIMB_SHIFT.sqr(), + 0 }, + true); + + w_l.emplace_back(input.a[1]); + w_r.emplace_back(input.b[1]); + w_o.emplace_back(input.r[0]); + w_4.emplace_back(lo_0_idx); + apply_aux_selectors(AUX_SELECTORS::NON_NATIVE_FIELD_1); + ++num_gates; + w_l.emplace_back(input.a[0]); + w_r.emplace_back(input.b[0]); + w_o.emplace_back(input.a[3]); + w_4.emplace_back(input.b[3]); + apply_aux_selectors(AUX_SELECTORS::NON_NATIVE_FIELD_2); + ++num_gates; + w_l.emplace_back(input.a[2]); + w_r.emplace_back(input.b[2]); + w_o.emplace_back(input.r[3]); + w_4.emplace_back(hi_0_idx); + apply_aux_selectors(AUX_SELECTORS::NON_NATIVE_FIELD_3); + ++num_gates; + w_l.emplace_back(input.a[1]); + w_r.emplace_back(input.b[1]); + w_o.emplace_back(input.r[2]); + w_4.emplace_back(hi_1_idx); + apply_aux_selectors(AUX_SELECTORS::NONE); + ++num_gates; + + /** + * product gate 6 + * + * hi_2 - hi_1 - lo_1 - q[2](p[1].2^b + p[0]) - q[3](p[0].2^b) = 0 + * + **/ + create_big_add_gate( + { + input.q[2], + input.q[3], + lo_1_idx, + hi_1_idx, + -input.neg_modulus[1] * LIMB_SHIFT - input.neg_modulus[0], + -input.neg_modulus[0] * LIMB_SHIFT, + -1, + -1, + 0, + }, + true); + + /** + * product gate 7 + * + * hi_3 - (hi_2 - q[0](p[3].2^b + p[2]) - q[1](p[2].2^b + p[1])).2^-2b + **/ + create_big_add_gate({ + hi_3_idx, + input.q[0], + input.q[1], + hi_2_idx, + -1, + input.neg_modulus[3] * LIMB_RSHIFT + input.neg_modulus[2] * LIMB_RSHIFT_2, + input.neg_modulus[2] * LIMB_RSHIFT + input.neg_modulus[1] * LIMB_RSHIFT_2, + LIMB_RSHIFT_2, + 0, + }); return std::array{ lo_1_idx, hi_3_idx }; } /** * @brief Called in `compute_proving_key` when finalizing circuit. - * Iterates over the cached_non_native_field_multiplication objects, + * Iterates over the cached_partial_non_native_field_multiplication objects, * removes duplicates, and instantiates the remainder as constraints` */ void UltraComposer::process_non_native_field_multiplications() { - std::sort(cached_non_native_field_multiplications.begin(), cached_non_native_field_multiplications.end()); - - auto last = - std::unique(cached_non_native_field_multiplications.begin(), cached_non_native_field_multiplications.end()); + for (size_t i = 0; i < cached_partial_non_native_field_multiplications.size(); ++i) { + auto& c = cached_partial_non_native_field_multiplications[i]; + for (size_t j = 0; j < 5; ++j) { + c.a[j] = real_variable_index[c.a[j]]; + c.b[j] = real_variable_index[c.b[j]]; + } + } + std::sort(cached_partial_non_native_field_multiplications.begin(), + cached_partial_non_native_field_multiplications.end()); - auto it = cached_non_native_field_multiplications.begin(); + auto last = std::unique(cached_partial_non_native_field_multiplications.begin(), + cached_partial_non_native_field_multiplications.end()); - constexpr barretenberg::fr LIMB_SHIFT = uint256_t(1) << DEFAULT_NON_NATIVE_FIELD_LIMB_BITS; - constexpr barretenberg::fr LIMB_RSHIFT = - barretenberg::fr(1) / barretenberg::fr(uint256_t(1) << DEFAULT_NON_NATIVE_FIELD_LIMB_BITS); - constexpr barretenberg::fr LIMB_RSHIFT_2 = - barretenberg::fr(1) / barretenberg::fr(uint256_t(1) << (2 * DEFAULT_NON_NATIVE_FIELD_LIMB_BITS)); + auto it = cached_partial_non_native_field_multiplications.begin(); // iterate over the cached items and create constraints while (it != last) { const auto input = *it; - const uint32_t lo_0_idx = input.cross_terms.lo_0_idx; - const uint32_t lo_1_idx = input.cross_terms.lo_1_idx; - const uint32_t hi_0_idx = input.cross_terms.hi_0_idx; - const uint32_t hi_1_idx = input.cross_terms.hi_1_idx; - const uint32_t hi_2_idx = input.cross_terms.hi_2_idx; - const uint32_t hi_3_idx = input.cross_terms.hi_3_idx; - - // product gate 1 - // (lo_0 + q_0(p_0 + p_1*2^b) + q_1(p_0*2^b) - (r_1)2^b)2^-2b - lo_1 = 0 - create_big_add_gate({ input.q[0], - input.q[1], - input.r[1], - lo_1_idx, - input.neg_modulus[0] + input.neg_modulus[1] * LIMB_SHIFT, - input.neg_modulus[0] * LIMB_SHIFT, - -LIMB_SHIFT, - -LIMB_SHIFT.sqr(), - 0 }, - true); w_l.emplace_back(input.a[1]); w_r.emplace_back(input.b[1]); - w_o.emplace_back(input.r[0]); - w_4.emplace_back(lo_0_idx); + w_o.emplace_back(zero_idx); + w_4.emplace_back(input.lo_0); apply_aux_selectors(AUX_SELECTORS::NON_NATIVE_FIELD_1); ++num_gates; w_l.emplace_back(input.a[0]); @@ -2027,65 +2093,34 @@ void UltraComposer::process_non_native_field_multiplications() ++num_gates; w_l.emplace_back(input.a[2]); w_r.emplace_back(input.b[2]); - w_o.emplace_back(input.r[3]); - w_4.emplace_back(hi_0_idx); + w_o.emplace_back(zero_idx); + w_4.emplace_back(input.hi_0); apply_aux_selectors(AUX_SELECTORS::NON_NATIVE_FIELD_3); ++num_gates; w_l.emplace_back(input.a[1]); w_r.emplace_back(input.b[1]); - w_o.emplace_back(input.r[2]); - w_4.emplace_back(hi_1_idx); + w_o.emplace_back(zero_idx); + w_4.emplace_back(input.hi_1); apply_aux_selectors(AUX_SELECTORS::NONE); ++num_gates; - - /** - * product gate 6 - * - * hi_2 - hi_1 - lo_1 - q[2](p[1].2^b + p[0]) - q[3](p[0].2^b) = 0 - * - **/ - create_big_add_gate( - { - input.q[2], - input.q[3], - lo_1_idx, - hi_1_idx, - -input.neg_modulus[1] * LIMB_SHIFT - input.neg_modulus[0], - -input.neg_modulus[0] * LIMB_SHIFT, - -1, - -1, - 0, - }, - true); - - /** - * product gate 7 - * - * hi_3 - (hi_2 - q[0](p[3].2^b + p[2]) - q[1](p[2].2^b + p[1])).2^-2b - **/ - create_big_add_gate({ - hi_3_idx, - input.q[0], - input.q[1], - hi_2_idx, - -1, - input.neg_modulus[3] * LIMB_RSHIFT + input.neg_modulus[2] * LIMB_RSHIFT_2, - input.neg_modulus[2] * LIMB_RSHIFT + input.neg_modulus[1] * LIMB_RSHIFT_2, - LIMB_RSHIFT_2, - 0, - }); ++it; } } /** - * Compute the limb-multiplication part of a non native field mul + * @brief Queue the limb-multiplication part of a non native field mul * * i.e. compute the low 204 and high 204 bit components of `a * b` where `a, b` are nnf elements composed of 4 * limbs with size DEFAULT_NON_NATIVE_FIELD_LIMB_BITS * + * @details The data queued represents part of a non-native field multiplication identity a * b = q * p + r, + * where a, b, q, r are all emulated non-native field elements that are each split across 4 distinct witness variables. + * + * Without this queue some functions, such as proof_system::plonk::stdlib::element::double_montgomery_ladder, would + * duplicate non-native field operations, which can be quite expensive. We queue up these operations, and remove + * duplicates in the circuit finishing stage of the proving key computation. **/ -std::array UltraComposer::evaluate_partial_non_native_field_multiplication( +std::array UltraComposer::queue_partial_non_native_field_multiplication( const non_native_field_witnesses& input) { @@ -2113,30 +2148,16 @@ std::array UltraComposer::evaluate_partial_non_native_field_multipl const uint32_t hi_0_idx = add_variable(hi_0); const uint32_t hi_1_idx = add_variable(hi_1); - w_l.emplace_back(input.a[1]); - w_r.emplace_back(input.b[1]); - w_o.emplace_back(zero_idx); - w_4.emplace_back(lo_0_idx); - apply_aux_selectors(AUX_SELECTORS::NON_NATIVE_FIELD_1); - ++num_gates; - w_l.emplace_back(input.a[0]); - w_r.emplace_back(input.b[0]); - w_o.emplace_back(input.a[3]); - w_4.emplace_back(input.b[3]); - apply_aux_selectors(AUX_SELECTORS::NON_NATIVE_FIELD_2); - ++num_gates; - w_l.emplace_back(input.a[2]); - w_r.emplace_back(input.b[2]); - w_o.emplace_back(zero_idx); - w_4.emplace_back(hi_0_idx); - apply_aux_selectors(AUX_SELECTORS::NON_NATIVE_FIELD_3); - ++num_gates; - w_l.emplace_back(input.a[1]); - w_r.emplace_back(input.b[1]); - w_o.emplace_back(zero_idx); - w_4.emplace_back(hi_1_idx); - apply_aux_selectors(AUX_SELECTORS::NONE); - ++num_gates; + // Add witnesses into the multiplication cache + // (when finalising the circuit, we will remove duplicates; several dups produced by biggroup.hpp methods) + cached_partial_non_native_field_multiplication cache_entry{ + .a = input.a, + .b = input.b, + .lo_0 = lo_0_idx, + .hi_0 = hi_0_idx, + .hi_1 = hi_1_idx, + }; + cached_partial_non_native_field_multiplications.emplace_back(cache_entry); return std::array{ lo_0_idx, hi_1_idx }; } diff --git a/cpp/src/barretenberg/plonk/composer/ultra_composer.hpp b/cpp/src/barretenberg/plonk/composer/ultra_composer.hpp index 6f22b40bb0..eb0e5b0e92 100644 --- a/cpp/src/barretenberg/plonk/composer/ultra_composer.hpp +++ b/cpp/src/barretenberg/plonk/composer/ultra_composer.hpp @@ -29,6 +29,11 @@ class UltraComposer : public ComposerBase { // large ranges such as 2^64. For such ranges the element will be decomposed into smaller // chuncks according to the parameter below static constexpr size_t DEFAULT_PLOOKUP_RANGE_BITNUM = 14; + // (DEFAULT_PLOOKUP_RANGE_BITNUM - DEFAULT_PLOOKUP_RANGE_CUTOFF_SIZE) = maximum size of range table that + // `decompose_into_default_range` will create in addition to the DEFAULT_PLOOKUP_RANGE_BITNUM table e.g. we don't + // want to create a range table of size (DEFAULT_PLOOKUP_RANGE_BITNUM - 1) if it contains very few entries; each + // table has a O(1 << bitnum) constraint cost to create + static constexpr size_t DEFAULT_PLOOKUP_RANGE_CUTOFF_BITNUM = 4; static constexpr size_t DEFAULT_PLOOKUP_RANGE_STEP_SIZE = 3; static constexpr size_t DEFAULT_PLOOKUP_RANGE_SIZE = (1 << DEFAULT_PLOOKUP_RANGE_BITNUM) - 1; static constexpr size_t DEFAULT_NON_NATIVE_FIELD_LIMB_BITS = 68; @@ -36,7 +41,7 @@ class UltraComposer : public ComposerBase { static constexpr size_t NUMBER_OF_GATES_PER_RAM_ACCESS = 2; static constexpr size_t NUMBER_OF_ARITHMETIC_GATES_PER_RAM_ARRAY = 1; // number of gates created per non-native field operation in process_non_native_field_multiplications - static constexpr size_t GATES_PER_NON_NATIVE_FIELD_MULTIPLICATION_ARITHMETIC = 7; + static constexpr size_t GATES_PER_PARTIAL_NON_NATIVE_FIELD_MULTIPLICATION_ARITHMETIC = 4; struct non_native_field_witnesses { // first 4 array elements = limbs // 5th element = prime basis limb @@ -58,30 +63,27 @@ class UltraComposer : public ComposerBase { }; /** - * @brief Used to store instructions to create non_native_field_multiplication gates. + * @brief Used to store instructions to create partial_non_native_field_multiplication gates. * We want to cache these (and remove duplicates) as the stdlib code can end up multiplying the same inputs * repeatedly. */ - struct cached_non_native_field_multiplication { + struct cached_partial_non_native_field_multiplication { std::array a; std::array b; - std::array q; - std::array r; - non_native_field_multiplication_cross_terms cross_terms; - std::array neg_modulus; + barretenberg::fr lo_0; + barretenberg::fr hi_0; + barretenberg::fr hi_1; - bool operator==(const cached_non_native_field_multiplication& other) const + bool operator==(const cached_partial_non_native_field_multiplication& other) const { bool valid = true; for (size_t i = 0; i < 5; ++i) { valid = valid && (a[i] == other.a[i]); valid = valid && (b[i] == other.b[i]); - valid = valid && (q[i] == other.q[i]); - valid = valid && (r[i] == other.r[i]); } return valid; } - bool operator<(const cached_non_native_field_multiplication& other) const + bool operator<(const cached_partial_non_native_field_multiplication& other) const { if (a < other.a) { return true; @@ -90,22 +92,13 @@ class UltraComposer : public ComposerBase { if (b < other.b) { return true; } - if (b == other.b) { - if (q < other.q) { - return true; - } - if (q == other.q) { - if (r < other.r) { - return true; - } - } - } } return false; } }; - std::vector cached_non_native_field_multiplications; + std::vector cached_partial_non_native_field_multiplications; + void process_non_native_field_multiplications(); enum AUX_SELECTORS { @@ -392,13 +385,23 @@ class UltraComposer : public ComposerBase { rangecount += ram_range_sizes[i]; } } - std::vector nnf_copy(cached_non_native_field_multiplications); + + std::vector pnnf_copy( + cached_partial_non_native_field_multiplications); + for (size_t i = 0; i < pnnf_copy.size(); ++i) { + auto& c = pnnf_copy[i]; + for (size_t j = 0; j < 5; ++j) { + c.a[j] = real_variable_index[c.a[j]]; + c.b[j] = real_variable_index[c.b[j]]; + } + } // update nnfcount - std::sort(nnf_copy.begin(), nnf_copy.end()); + std::sort(pnnf_copy.begin(), pnnf_copy.end()); + auto plast = std::unique(pnnf_copy.begin(), pnnf_copy.end()); - auto last = std::unique(nnf_copy.begin(), nnf_copy.end()); - const size_t num_nnf_ops = static_cast(std::distance(nnf_copy.begin(), last)); - nnfcount = num_nnf_ops * GATES_PER_NON_NATIVE_FIELD_MULTIPLICATION_ARITHMETIC; + nnfcount = static_cast(std::distance(pnnf_copy.begin(), plast)) * + GATES_PER_PARTIAL_NON_NATIVE_FIELD_MULTIPLICATION_ARITHMETIC; + ; } /** @@ -545,9 +548,9 @@ class UltraComposer : public ComposerBase { const size_t hi_limb_bits = DEFAULT_NON_NATIVE_FIELD_LIMB_BITS); std::array decompose_non_native_field_double_width_limb( const uint32_t limb_idx, const size_t num_limb_bits = (2 * DEFAULT_NON_NATIVE_FIELD_LIMB_BITS)); - std::array queue_non_native_field_multiplication( + std::array evaluate_non_native_field_multiplication( const non_native_field_witnesses& input, const bool range_constrain_quotient_and_remainder = true); - std::array evaluate_partial_non_native_field_multiplication(const non_native_field_witnesses& input); + std::array queue_partial_non_native_field_multiplication(const non_native_field_witnesses& input); typedef std::pair scaled_witness; typedef std::tuple add_simple; std::array evaluate_non_native_field_subtraction( diff --git a/cpp/src/barretenberg/plonk/composer/ultra_composer.test.cpp b/cpp/src/barretenberg/plonk/composer/ultra_composer.test.cpp index 0ec324aa47..0ac6de40dc 100644 --- a/cpp/src/barretenberg/plonk/composer/ultra_composer.test.cpp +++ b/cpp/src/barretenberg/plonk/composer/ultra_composer.test.cpp @@ -649,7 +649,7 @@ TYPED_TEST(ultra_composer, non_native_field_multiplication) UltraComposer::non_native_field_witnesses inputs{ a_indices, b_indices, q_indices, r_indices, modulus_limbs, fr(uint256_t(modulus)), }; - const auto [lo_1_idx, hi_1_idx] = composer.queue_non_native_field_multiplication(inputs); + const auto [lo_1_idx, hi_1_idx] = composer.evaluate_non_native_field_multiplication(inputs); composer.range_constrain_two_limbs(lo_1_idx, hi_1_idx, 70, 70); TestFixture::prove_and_verify(composer, /*expected_result=*/true); diff --git a/cpp/src/barretenberg/plonk/proof_system/verification_key/verification_key.cpp b/cpp/src/barretenberg/plonk/proof_system/verification_key/verification_key.cpp index f6cf92bd73..e66d3b56b8 100644 --- a/cpp/src/barretenberg/plonk/proof_system/verification_key/verification_key.cpp +++ b/cpp/src/barretenberg/plonk/proof_system/verification_key/verification_key.cpp @@ -51,43 +51,34 @@ barretenberg::fr compress_native_evaluation_domain(barretenberg::evaluation_doma */ barretenberg::fr verification_key_data::compress_native(const size_t hash_index) { - barretenberg::evaluation_domain domain = evaluation_domain(circuit_size); - barretenberg::fr compressed_domain = - compress_native_evaluation_domain(domain, proof_system::ComposerType(composer_type)); - - constexpr size_t num_limb_bits = plonk::NUM_LIMB_BITS_IN_FIELD_SIMULATION; - - const auto split_bigfield_limbs = [](const uint256_t& element) { - std::vector limbs; - limbs.push_back(element.slice(0, num_limb_bits)); - limbs.push_back(element.slice(num_limb_bits, num_limb_bits * 2)); - limbs.push_back(element.slice(num_limb_bits * 2, num_limb_bits * 3)); - limbs.push_back(element.slice(num_limb_bits * 3, num_limb_bits * 4)); - return limbs; - }; - - std::vector preimage_data; - preimage_data.emplace_back(composer_type); - preimage_data.emplace_back(compressed_domain); - preimage_data.emplace_back(num_public_inputs); + barretenberg::evaluation_domain eval_domain = evaluation_domain(circuit_size); + + std::vector preimage_data; + + preimage_data.push_back(static_cast(proof_system::ComposerType(composer_type))); + + const uint256_t domain = eval_domain.domain; + const uint256_t generator = eval_domain.generator; + const uint256_t public_inputs = num_public_inputs; + + ASSERT(domain < (uint256_t(1) << 32)); + ASSERT(generator < (uint256_t(1) << 16)); + ASSERT(public_inputs < (uint256_t(1) << 32)); + + write(preimage_data, static_cast(uint256_t(generator))); + write(preimage_data, static_cast(uint256_t(domain))); + write(preimage_data, static_cast(public_inputs)); for (const auto& [tag, selector] : commitments) { - const auto x_limbs = split_bigfield_limbs(selector.x); - const auto y_limbs = split_bigfield_limbs(selector.y); - - preimage_data.push_back(x_limbs[0]); - preimage_data.push_back(x_limbs[1]); - preimage_data.push_back(x_limbs[2]); - preimage_data.push_back(x_limbs[3]); - - preimage_data.push_back(y_limbs[0]); - preimage_data.push_back(y_limbs[1]); - preimage_data.push_back(y_limbs[2]); - preimage_data.push_back(y_limbs[3]); + write(preimage_data, selector.y); + write(preimage_data, selector.x); } + write(preimage_data, eval_domain.root); + barretenberg::fr compressed_key; - if (proof_system::ComposerType(composer_type) == proof_system::ComposerType::PLOOKUP) { - compressed_key = crypto::pedersen_commitment::lookup::compress_native(preimage_data, hash_index); + if (proof_system::ComposerType(composer_type) == ComposerType::PLOOKUP) { + compressed_key = from_buffer( + crypto::pedersen_commitment::lookup::compress_native(preimage_data, hash_index)); } else { compressed_key = crypto::pedersen_commitment::compress_native(preimage_data, hash_index); } diff --git a/cpp/src/barretenberg/proof_system/circuit_constructors/ultra_circuit_constructor.cpp b/cpp/src/barretenberg/proof_system/circuit_constructors/ultra_circuit_constructor.cpp index 569ad0ca7f..52812f354c 100644 --- a/cpp/src/barretenberg/proof_system/circuit_constructors/ultra_circuit_constructor.cpp +++ b/cpp/src/barretenberg/proof_system/circuit_constructors/ultra_circuit_constructor.cpp @@ -1355,7 +1355,7 @@ std::array UltraCircuitConstructor::decompose_non_native_field_doub * @details The data queued represents a non-native field multiplication identity a * b = q * p + r, * where a, b, q, r are all emulated non-native field elements that are each split across 4 distinct witness variables. * - * Without this queue some functions, such as proof_system::plonk::stdlib::element::double_montgomery_ladder, would + * Without this queue some functions, such as proof_system::plonk::stdlib::element::multiple_montgomery_ladder, would * duplicate non-native field operations, which can be quite expensive. We queue up these operations, and remove * duplicates in the circuit finishing stage of the proving key computation. * @@ -1365,7 +1365,7 @@ std::array UltraCircuitConstructor::decompose_non_native_field_doub * * N.B.: This method does NOT evaluate the prime field component of non-native field multiplications. **/ -std::array UltraCircuitConstructor::queue_non_native_field_multiplication( +std::array UltraCircuitConstructor::evaluate_non_native_field_multiplication( const non_native_field_witnesses& input, const bool range_constrain_quotient_and_remainder) { @@ -1393,10 +1393,11 @@ std::array UltraCircuitConstructor::queue_non_native_field_multipli get_variable(input.r[2]), get_variable(input.r[3]), }; - constexpr barretenberg::fr LIMB_SHIFT = uint256_t(1) << DEFAULT_NON_NATIVE_FIELD_LIMB_BITS; constexpr barretenberg::fr LIMB_SHIFT_2 = uint256_t(1) << (2 * DEFAULT_NON_NATIVE_FIELD_LIMB_BITS); constexpr barretenberg::fr LIMB_SHIFT_3 = uint256_t(1) << (3 * DEFAULT_NON_NATIVE_FIELD_LIMB_BITS); + constexpr barretenberg::fr LIMB_RSHIFT = + barretenberg::fr(1) / barretenberg::fr(uint256_t(1) << DEFAULT_NON_NATIVE_FIELD_LIMB_BITS); constexpr barretenberg::fr LIMB_RSHIFT_2 = barretenberg::fr(1) / barretenberg::fr(uint256_t(1) << (2 * DEFAULT_NON_NATIVE_FIELD_LIMB_BITS)); @@ -1444,17 +1445,81 @@ std::array UltraCircuitConstructor::queue_non_native_field_multipli range_constrain_two_limbs(input.q[0], input.q[1]); range_constrain_two_limbs(input.q[2], input.q[3]); } - // Add witnesses into the multiplication cache - // (when finalising the circuit, we will remove duplicates; several dups produced by biggroup.hpp methods) - cached_non_native_field_multiplication cache_entry{ - .a = input.a, - .b = input.b, - .q = input.q, - .r = input.r, - .cross_terms = { lo_0_idx, lo_1_idx, hi_0_idx, hi_1_idx, hi_2_idx, hi_3_idx }, - .neg_modulus = input.neg_modulus, - }; - cached_non_native_field_multiplications.emplace_back(cache_entry); + + // product gate 1 + // (lo_0 + q_0(p_0 + p_1*2^b) + q_1(p_0*2^b) - (r_1)2^b)2^-2b - lo_1 = 0 + create_big_add_gate({ input.q[0], + input.q[1], + input.r[1], + lo_1_idx, + input.neg_modulus[0] + input.neg_modulus[1] * LIMB_SHIFT, + input.neg_modulus[0] * LIMB_SHIFT, + -LIMB_SHIFT, + -LIMB_SHIFT.sqr(), + 0 }, + true); + + w_l.emplace_back(input.a[1]); + w_r.emplace_back(input.b[1]); + w_o.emplace_back(input.r[0]); + w_4.emplace_back(lo_0_idx); + apply_aux_selectors(AUX_SELECTORS::NON_NATIVE_FIELD_1); + ++num_gates; + w_l.emplace_back(input.a[0]); + w_r.emplace_back(input.b[0]); + w_o.emplace_back(input.a[3]); + w_4.emplace_back(input.b[3]); + apply_aux_selectors(AUX_SELECTORS::NON_NATIVE_FIELD_2); + ++num_gates; + w_l.emplace_back(input.a[2]); + w_r.emplace_back(input.b[2]); + w_o.emplace_back(input.r[3]); + w_4.emplace_back(hi_0_idx); + apply_aux_selectors(AUX_SELECTORS::NON_NATIVE_FIELD_3); + ++num_gates; + w_l.emplace_back(input.a[1]); + w_r.emplace_back(input.b[1]); + w_o.emplace_back(input.r[2]); + w_4.emplace_back(hi_1_idx); + apply_aux_selectors(AUX_SELECTORS::NONE); + ++num_gates; + + /** + * product gate 6 + * + * hi_2 - hi_1 - lo_1 - q[2](p[1].2^b + p[0]) - q[3](p[0].2^b) = 0 + * + **/ + create_big_add_gate( + { + input.q[2], + input.q[3], + lo_1_idx, + hi_1_idx, + -input.neg_modulus[1] * LIMB_SHIFT - input.neg_modulus[0], + -input.neg_modulus[0] * LIMB_SHIFT, + -1, + -1, + 0, + }, + true); + + /** + * product gate 7 + * + * hi_3 - (hi_2 - q[0](p[3].2^b + p[2]) - q[1](p[2].2^b + p[1])).2^-2b + **/ + create_big_add_gate({ + hi_3_idx, + input.q[0], + input.q[1], + hi_2_idx, + -1, + input.neg_modulus[3] * LIMB_RSHIFT + input.neg_modulus[2] * LIMB_RSHIFT_2, + input.neg_modulus[2] * LIMB_RSHIFT + input.neg_modulus[1] * LIMB_RSHIFT_2, + LIMB_RSHIFT_2, + 0, + }); return std::array{ lo_1_idx, hi_3_idx }; } @@ -1466,46 +1531,29 @@ std::array UltraCircuitConstructor::queue_non_native_field_multipli */ void UltraCircuitConstructor::process_non_native_field_multiplications() { - std::sort(cached_non_native_field_multiplications.begin(), cached_non_native_field_multiplications.end()); - - auto last = - std::unique(cached_non_native_field_multiplications.begin(), cached_non_native_field_multiplications.end()); + for (size_t i = 0; i < cached_partial_non_native_field_multiplications.size(); ++i) { + auto& c = cached_partial_non_native_field_multiplications[i]; + for (size_t j = 0; j < 5; ++j) { + c.a[j] = real_variable_index[c.a[j]]; + c.b[j] = real_variable_index[c.b[j]]; + } + } + std::sort(cached_partial_non_native_field_multiplications.begin(), + cached_partial_non_native_field_multiplications.end()); - auto it = cached_non_native_field_multiplications.begin(); + auto last = std::unique(cached_partial_non_native_field_multiplications.begin(), + cached_partial_non_native_field_multiplications.end()); - constexpr barretenberg::fr LIMB_SHIFT = uint256_t(1) << DEFAULT_NON_NATIVE_FIELD_LIMB_BITS; - constexpr barretenberg::fr LIMB_RSHIFT = - barretenberg::fr(1) / barretenberg::fr(uint256_t(1) << DEFAULT_NON_NATIVE_FIELD_LIMB_BITS); - constexpr barretenberg::fr LIMB_RSHIFT_2 = - barretenberg::fr(1) / barretenberg::fr(uint256_t(1) << (2 * DEFAULT_NON_NATIVE_FIELD_LIMB_BITS)); + auto it = cached_partial_non_native_field_multiplications.begin(); // iterate over the cached items and create constraints while (it != last) { const auto input = *it; - const uint32_t lo_0_idx = input.cross_terms.lo_0_idx; - const uint32_t lo_1_idx = input.cross_terms.lo_1_idx; - const uint32_t hi_0_idx = input.cross_terms.hi_0_idx; - const uint32_t hi_1_idx = input.cross_terms.hi_1_idx; - const uint32_t hi_2_idx = input.cross_terms.hi_2_idx; - const uint32_t hi_3_idx = input.cross_terms.hi_3_idx; - - // product gate 1 - // (lo_0 + q_0(p_0 + p_1*2^b) + q_1(p_0*2^b) - (r_1)2^b)2^-2b - lo_1 = 0 - create_big_add_gate({ input.q[0], - input.q[1], - input.r[1], - lo_1_idx, - input.neg_modulus[0] + input.neg_modulus[1] * LIMB_SHIFT, - input.neg_modulus[0] * LIMB_SHIFT, - -LIMB_SHIFT, - -LIMB_SHIFT.sqr(), - 0 }, - true); w_l.emplace_back(input.a[1]); w_r.emplace_back(input.b[1]); - w_o.emplace_back(input.r[0]); - w_4.emplace_back(lo_0_idx); + w_o.emplace_back(zero_idx); + w_4.emplace_back(input.lo_0); apply_aux_selectors(AUX_SELECTORS::NON_NATIVE_FIELD_1); ++num_gates; w_l.emplace_back(input.a[0]); @@ -1516,53 +1564,16 @@ void UltraCircuitConstructor::process_non_native_field_multiplications() ++num_gates; w_l.emplace_back(input.a[2]); w_r.emplace_back(input.b[2]); - w_o.emplace_back(input.r[3]); - w_4.emplace_back(hi_0_idx); + w_o.emplace_back(zero_idx); + w_4.emplace_back(input.hi_0); apply_aux_selectors(AUX_SELECTORS::NON_NATIVE_FIELD_3); ++num_gates; w_l.emplace_back(input.a[1]); w_r.emplace_back(input.b[1]); - w_o.emplace_back(input.r[2]); - w_4.emplace_back(hi_1_idx); + w_o.emplace_back(zero_idx); + w_4.emplace_back(input.hi_1); apply_aux_selectors(AUX_SELECTORS::NONE); ++num_gates; - - /** - * product gate 6 - * - * hi_2 - hi_1 - lo_1 - q[2](p[1].2^b + p[0]) - q[3](p[0].2^b) = 0 - * - **/ - create_big_add_gate( - { - input.q[2], - input.q[3], - lo_1_idx, - hi_1_idx, - -input.neg_modulus[1] * LIMB_SHIFT - input.neg_modulus[0], - -input.neg_modulus[0] * LIMB_SHIFT, - -1, - -1, - 0, - }, - true); - - /** - * product gate 7 - * - * hi_3 - (hi_2 - q[0](p[3].2^b + p[2]) - q[1](p[2].2^b + p[1])).2^-2b - **/ - create_big_add_gate({ - hi_3_idx, - input.q[0], - input.q[1], - hi_2_idx, - -1, - input.neg_modulus[3] * LIMB_RSHIFT + input.neg_modulus[2] * LIMB_RSHIFT_2, - input.neg_modulus[2] * LIMB_RSHIFT + input.neg_modulus[1] * LIMB_RSHIFT_2, - LIMB_RSHIFT_2, - 0, - }); ++it; } } @@ -1574,7 +1585,7 @@ void UltraCircuitConstructor::process_non_native_field_multiplications() * limbs with size DEFAULT_NON_NATIVE_FIELD_LIMB_BITS * **/ -std::array UltraCircuitConstructor::evaluate_partial_non_native_field_multiplication( +std::array UltraCircuitConstructor::queue_partial_non_native_field_multiplication( const non_native_field_witnesses& input) { @@ -1602,30 +1613,16 @@ std::array UltraCircuitConstructor::evaluate_partial_non_native_fie const uint32_t hi_0_idx = add_variable(hi_0); const uint32_t hi_1_idx = add_variable(hi_1); - w_l.emplace_back(input.a[1]); - w_r.emplace_back(input.b[1]); - w_o.emplace_back(zero_idx); - w_4.emplace_back(lo_0_idx); - apply_aux_selectors(AUX_SELECTORS::NON_NATIVE_FIELD_1); - ++num_gates; - w_l.emplace_back(input.a[0]); - w_r.emplace_back(input.b[0]); - w_o.emplace_back(input.a[3]); - w_4.emplace_back(input.b[3]); - apply_aux_selectors(AUX_SELECTORS::NON_NATIVE_FIELD_2); - ++num_gates; - w_l.emplace_back(input.a[2]); - w_r.emplace_back(input.b[2]); - w_o.emplace_back(zero_idx); - w_4.emplace_back(hi_0_idx); - apply_aux_selectors(AUX_SELECTORS::NON_NATIVE_FIELD_3); - ++num_gates; - w_l.emplace_back(input.a[1]); - w_r.emplace_back(input.b[1]); - w_o.emplace_back(zero_idx); - w_4.emplace_back(hi_1_idx); - apply_aux_selectors(AUX_SELECTORS::NONE); - ++num_gates; + // Add witnesses into the multiplication cache + // (when finalising the circuit, we will remove duplicates; several dups produced by biggroup.hpp methods) + cached_partial_non_native_field_multiplication cache_entry{ + .a = input.a, + .b = input.b, + .lo_0 = lo_0_idx, + .hi_0 = hi_0_idx, + .hi_1 = hi_1_idx, + }; + cached_partial_non_native_field_multiplications.emplace_back(cache_entry); return std::array{ lo_0_idx, hi_1_idx }; } diff --git a/cpp/src/barretenberg/proof_system/circuit_constructors/ultra_circuit_constructor.hpp b/cpp/src/barretenberg/proof_system/circuit_constructors/ultra_circuit_constructor.hpp index 84b18b13bc..21bc952393 100644 --- a/cpp/src/barretenberg/proof_system/circuit_constructors/ultra_circuit_constructor.hpp +++ b/cpp/src/barretenberg/proof_system/circuit_constructors/ultra_circuit_constructor.hpp @@ -167,45 +167,29 @@ class UltraCircuitConstructor : public CircuitConstructorBase ultra_selector_names() - { - std::vector result{ "q_m", "q_c", "q_1", "q_2", "q_3", "q_4", - "q_arith", "q_sort", "q_elliptic", "q_aux", "table_type" }; - return result; - } - struct non_native_field_multiplication_cross_terms { - uint32_t lo_0_idx; - uint32_t lo_1_idx; - uint32_t hi_0_idx; - uint32_t hi_1_idx; - uint32_t hi_2_idx; - uint32_t hi_3_idx; - }; /** - * @brief Used to store instructions to create non_native_field_multiplication gates. + * @brief Used to store instructions to create partial_non_native_field_multiplication gates. * We want to cache these (and remove duplicates) as the stdlib code can end up multiplying the same inputs * repeatedly. */ - struct cached_non_native_field_multiplication { + struct cached_partial_non_native_field_multiplication { std::array a; std::array b; - std::array q; - std::array r; - non_native_field_multiplication_cross_terms cross_terms; - std::array neg_modulus; + barretenberg::fr lo_0; + barretenberg::fr hi_0; + barretenberg::fr hi_1; - bool operator==(const cached_non_native_field_multiplication& other) const + bool operator==(const cached_partial_non_native_field_multiplication& other) const { bool valid = true; for (size_t i = 0; i < 5; ++i) { valid = valid && (a[i] == other.a[i]); valid = valid && (b[i] == other.b[i]); - valid = valid && (q[i] == other.q[i]); - valid = valid && (r[i] == other.r[i]); } return valid; } - bool operator<(const cached_non_native_field_multiplication& other) const + + bool operator<(const cached_partial_non_native_field_multiplication& other) const { if (a < other.a) { return true; @@ -214,27 +198,33 @@ class UltraCircuitConstructor : public CircuitConstructorBase ultra_selector_names() + { + std::vector result{ "q_m", "q_c", "q_1", "q_2", "q_3", "q_4", + "q_arith", "q_sort", "q_elliptic", "q_aux", "table_type" }; + return result; + } + struct non_native_field_multiplication_cross_terms { + uint32_t lo_0_idx; + uint32_t lo_1_idx; + uint32_t hi_0_idx; + uint32_t hi_1_idx; + uint32_t hi_2_idx; + uint32_t hi_3_idx; + }; + /** - * @brief CircuitDataBackup is a structure we use to store all the information about the circuit that is needed to - * restore it back to a pre-finalized state - * @details In check_circuit method in UltraCircuitConstructor we want to check that the whole circuit works, but - * ultra circuits need to have ram, rom and range gates added in the end for the check to be complete as well as the - * set permutation check, so we finalize the circuit when we check it. This structure allows us to restore the - * circuit to the state before the finalization. + * @brief CircuitDataBackup is a structure we use to store all the information about the circuit that is needed + * to restore it back to a pre-finalized state + * @details In check_circuit method in UltraCircuitConstructor we want to check that the whole circuit works, + * but ultra circuits need to have ram, rom and range gates added in the end for the check to be complete as + * well as the set permutation check, so we finalize the circuit when we check it. This structure allows us to + * restore the circuit to the state before the finalization. */ struct CircuitDataBackup { std::vector public_inputs; @@ -272,8 +262,8 @@ class UltraCircuitConstructor : public CircuitConstructorBase memory_write_records; std::map range_lists; - std::vector - cached_non_native_field_multiplications; + std::vector + cached_partial_non_native_field_multiplications; size_t num_gates; bool circuit_finalised = false; @@ -326,14 +316,14 @@ class UltraCircuitConstructor : public CircuitConstructorBaserange_lists; stored_state.circuit_finalised = circuit_constructor->circuit_finalised; stored_state.num_gates = circuit_constructor->num_gates; - stored_state.cached_non_native_field_multiplications = - circuit_constructor->cached_non_native_field_multiplications; + stored_state.cached_partial_non_native_field_multiplications = + circuit_constructor->cached_partial_non_native_field_multiplications; return stored_state; } @@ -398,7 +388,8 @@ class UltraCircuitConstructor : public CircuitConstructorBaserange_lists = range_lists; circuit_constructor->circuit_finalised = circuit_finalised; circuit_constructor->num_gates = num_gates; - circuit_constructor->cached_non_native_field_multiplications = cached_non_native_field_multiplications; + circuit_constructor->cached_partial_non_native_field_multiplications = + cached_partial_non_native_field_multiplications; circuit_constructor->w_l.resize(num_gates); circuit_constructor->w_r.resize(num_gates); circuit_constructor->w_o.resize(num_gates); @@ -511,8 +502,8 @@ class UltraCircuitConstructor : public CircuitConstructorBase ram_arrays; @@ -572,7 +564,7 @@ class UltraCircuitConstructor : public CircuitConstructorBase memory_write_records; - std::vector cached_non_native_field_multiplications; + std::vector cached_partial_non_native_field_multiplications; void process_non_native_field_multiplications(); @@ -638,11 +630,11 @@ class UltraCircuitConstructor : public CircuitConstructorBase size imbalance between sorted and non-sorted sets. Checking for this - * and throwing an error would require a refactor of the Composer to catelog all 'orphan' variables not - * assigned to gates. + * this range constraint will increase the size of the 'sorted set' of range-constrained integers + *by 1. The 'non-sorted set' of range-constrained integers is a subset of the wire indices of all + *arithmetic gates. No arithemtic gate => size imbalance between sorted and non-sorted sets. Checking + *for this and throwing an error would require a refactor of the Composer to catelog all 'orphan' + *variables not assigned to gates. **/ create_new_range_constraint(variable_index, 1ULL << num_bits, msg); } else { @@ -704,8 +696,8 @@ class UltraCircuitConstructor : public CircuitConstructorBase decompose_non_native_field_double_width_limb( const uint32_t limb_idx, const size_t num_limb_bits = (2 * DEFAULT_NON_NATIVE_FIELD_LIMB_BITS)); - std::array queue_non_native_field_multiplication( + std::array evaluate_non_native_field_multiplication( const non_native_field_witnesses& input, const bool range_constrain_quotient_and_remainder = true); - std::array evaluate_partial_non_native_field_multiplication(const non_native_field_witnesses& input); + std::array queue_partial_non_native_field_multiplication(const non_native_field_witnesses& input); typedef std::pair scaled_witness; typedef std::tuple add_simple; std::array evaluate_non_native_field_subtraction( diff --git a/cpp/src/barretenberg/proof_system/circuit_constructors/ultra_circuit_constructor.test.cpp b/cpp/src/barretenberg/proof_system/circuit_constructors/ultra_circuit_constructor.test.cpp index aea6e78bde..1b9706e756 100644 --- a/cpp/src/barretenberg/proof_system/circuit_constructors/ultra_circuit_constructor.test.cpp +++ b/cpp/src/barretenberg/proof_system/circuit_constructors/ultra_circuit_constructor.test.cpp @@ -643,7 +643,7 @@ TEST(ultra_circuit_constructor, non_native_field_multiplication) proof_system::UltraCircuitConstructor::non_native_field_witnesses inputs{ a_indices, b_indices, q_indices, r_indices, modulus_limbs, fr(uint256_t(modulus)), }; - const auto [lo_1_idx, hi_1_idx] = circuit_constructor.queue_non_native_field_multiplication(inputs); + const auto [lo_1_idx, hi_1_idx] = circuit_constructor.evaluate_non_native_field_multiplication(inputs); circuit_constructor.range_constrain_two_limbs(lo_1_idx, hi_1_idx, 70, 70); auto saved_state = UltraCircuitConstructor::CircuitDataBackup::store_full_state(circuit_constructor); diff --git a/cpp/src/barretenberg/solidity_helpers/circuits/recursive_circuit.hpp b/cpp/src/barretenberg/solidity_helpers/circuits/recursive_circuit.hpp index a6dcc0b87c..7274600b84 100644 --- a/cpp/src/barretenberg/solidity_helpers/circuits/recursive_circuit.hpp +++ b/cpp/src/barretenberg/solidity_helpers/circuits/recursive_circuit.hpp @@ -125,7 +125,7 @@ template class RecursiveCircuit { throw_or_abort("inner proof result != 1"); } - circuit_output.aggregation_state.add_proof_outputs_as_public_inputs(); + circuit_output.aggregation_state.assign_object_to_proof_outputs(); if (outer_composer.failed()) { throw_or_abort("outer composer failed"); diff --git a/cpp/src/barretenberg/stdlib/commitment/pedersen/pedersen_plookup.cpp b/cpp/src/barretenberg/stdlib/commitment/pedersen/pedersen_plookup.cpp index 6dbc70e5d1..c5ba72f8d3 100644 --- a/cpp/src/barretenberg/stdlib/commitment/pedersen/pedersen_plookup.cpp +++ b/cpp/src/barretenberg/stdlib/commitment/pedersen/pedersen_plookup.cpp @@ -14,19 +14,34 @@ using namespace plookup; using namespace barretenberg; template -point pedersen_plookup_commitment::compress_to_point(const field_t& left, const field_t& right) +point pedersen_plookup_commitment::compress_to_point(const field_t& left, + const field_t& right, + const bool skip_rhs_range_check) { auto p2 = pedersen_plookup_hash::hash_single(left, false); - auto p1 = pedersen_plookup_hash::hash_single(right, true); + auto p1 = pedersen_plookup_hash::hash_single(right, true, skip_rhs_range_check); return pedersen_plookup_hash::add_points(p1, p2); } -template field_t pedersen_plookup_commitment::compress(const field_t& left, const field_t& right) +template +field_t pedersen_plookup_commitment::compress(const field_t& left, + const field_t& right, + const bool skip_rhs_range_check) { - return compress_to_point(left, right).x; + return compress_to_point(left, right, skip_rhs_range_check).x; } +/** + * @brief Compress a vector of field elements into a grumpkin point. + * This serves as the basis for a collision-resistant hash function. + * Note that this does NOT produce a hash that can be modelled as a random oracle. + * + * @tparam C + * @param inputs + * @param iv initialization vector + * @return point + */ template point pedersen_plookup_commitment::merkle_damgard_compress(const std::vector& inputs, const field_t& iv) { @@ -34,13 +49,19 @@ point pedersen_plookup_commitment::merkle_damgard_compress(const std::vect return point{ 0, 0 }; } + // The first two inputs to the Merkle-Damgard construction are the initialization vector and the number of elements + // being hashed. Including the length ensures that hashes of different lengths cannot collide. Starting the hash + // with these 2 inputs is optimal in the case that the IV is constant. i.e. the 1st 3 calls to `hash_single` are + // over constants and cost no constraints. r = H(iv, num_inputs) is constant and the 1st half of H(r, inputs[0]) is + // also constant auto result = plookup_read::get_lookup_accumulators(MultiTableId::PEDERSEN_IV, iv)[ColumnIdx::C2][0]; auto num_inputs = inputs.size(); - for (size_t i = 0; i < num_inputs; i++) { + result = compress(result, field_t(num_inputs)); + for (size_t i = 0; i < num_inputs - 1; i++) { result = compress(result, inputs[i]); } - return compress_to_point(result, field_t(num_inputs)); + return compress_to_point(result, inputs[num_inputs - 1]); } template @@ -53,7 +74,9 @@ point pedersen_plookup_commitment::merkle_damgard_compress(const std::vect } auto result = plookup_read::get_lookup_accumulators(MultiTableId::PEDERSEN_IV, 0)[ColumnIdx::C2][0]; - for (size_t i = 0; i < 2 * num_inputs; i++) { + result = compress(result, field_t(num_inputs)); + + for (size_t i = 0; i < 2 * num_inputs - 1; i++) { if ((i & 1) == 0) { auto iv_result = plookup_read::get_lookup_accumulators(MultiTableId::PEDERSEN_IV, ivs[i >> 1])[ColumnIdx::C2][0]; @@ -63,7 +86,25 @@ point pedersen_plookup_commitment::merkle_damgard_compress(const std::vect } } - return compress_to_point(result, field_t(num_inputs)); + return compress_to_point(result, inputs[num_inputs - 1]); +} + +template +point pedersen_plookup_commitment::merkle_damgard_compress_with_relaxed_range_constraints( + const std::vector& inputs, const field_t& iv) +{ + if (inputs.size() == 0) { + return point{ 0, 0 }; + } + + auto result = plookup_read::get_lookup_accumulators(MultiTableId::PEDERSEN_IV, iv)[ColumnIdx::C2][0]; + auto num_inputs = inputs.size(); + result = compress(result, field_t(num_inputs)); + for (size_t i = 0; i < num_inputs - 1; i++) { + result = compress(result, inputs[i], true); + } + + return compress_to_point(result, inputs[num_inputs - 1], true); } template @@ -102,6 +143,13 @@ point pedersen_plookup_commitment::commit(const std::vector& inpu return merkle_damgard_compress(inputs, field_t(hash_index)); } +template +point pedersen_plookup_commitment::commit_with_relaxed_range_constraints(const std::vector& inputs, + const size_t hash_index) +{ + return merkle_damgard_compress_with_relaxed_range_constraints(inputs, field_t(hash_index)); +} + template point pedersen_plookup_commitment::commit(const std::vector& inputs, const std::vector& hash_indices) @@ -114,6 +162,24 @@ point pedersen_plookup_commitment::commit(const std::vector& inpu return merkle_damgard_compress(inputs, hash_indices_); } +/** + * @brief Calls `compress` but instructs the Pedersen hash method `hash_single` + * to not apply range constraints on the input elements. + * + * Use this method when the input elements are known to be <= 2^252 + * + * @tparam C + * @param inputs + * @param hash_index + * @return field_t + */ +template +field_t pedersen_plookup_commitment::compress_with_relaxed_range_constraints(const std::vector& inputs, + const size_t hash_index) +{ + return commit_with_relaxed_range_constraints(inputs, hash_index).x; +} + template field_t pedersen_plookup_commitment::compress(const std::vector& inputs, const size_t hash_index) { diff --git a/cpp/src/barretenberg/stdlib/commitment/pedersen/pedersen_plookup.hpp b/cpp/src/barretenberg/stdlib/commitment/pedersen/pedersen_plookup.hpp index 42cc8252d2..90076a3035 100644 --- a/cpp/src/barretenberg/stdlib/commitment/pedersen/pedersen_plookup.hpp +++ b/cpp/src/barretenberg/stdlib/commitment/pedersen/pedersen_plookup.hpp @@ -17,14 +17,18 @@ template class pedersen_plookup_commitment { public: static point commit(const std::vector& inputs, const size_t hash_index = 0); static point commit(const std::vector& inputs, const std::vector& hash_indices); + static point commit_with_relaxed_range_constraints(const std::vector& inputs, const size_t hash_index = 0); - static field_t compress(const field_t& left, const field_t& right); + static field_t compress(const field_t& left, const field_t& right, const bool skip_rhs_range_check = false); static field_t compress(const std::vector& inputs, const size_t hash_index = 0); static field_t compress(const packed_byte_array& input) { return compress(input.get_limbs()); } static field_t compress(const std::vector& inputs, const std::vector& hash_indices); static field_t compress(const std::vector>& input_pairs); + static field_t compress_with_relaxed_range_constraints(const std::vector& inputs, + const size_t hash_index = 0); + template static field_t compress(const std::array& inputs) { std::vector in(inputs.begin(), inputs.end()); @@ -33,9 +37,12 @@ template class pedersen_plookup_commitment { static point merkle_damgard_compress(const std::vector& inputs, const field_t& iv); static point merkle_damgard_compress(const std::vector& inputs, const std::vector& ivs); + static point merkle_damgard_compress_with_relaxed_range_constraints(const std::vector& inputs, + const field_t& iv); + static point merkle_damgard_tree_compress(const std::vector& inputs, const std::vector& ivs); - static point compress_to_point(const field_t& left, const field_t& right); + static point compress_to_point(const field_t& left, const field_t& right, const bool skip_rhs_range_check = false); }; extern template class pedersen_plookup_commitment; diff --git a/cpp/src/barretenberg/stdlib/hash/pedersen/pedersen_plookup.cpp b/cpp/src/barretenberg/stdlib/hash/pedersen/pedersen_plookup.cpp index 2801648f54..9b5e84cda0 100644 --- a/cpp/src/barretenberg/stdlib/hash/pedersen/pedersen_plookup.cpp +++ b/cpp/src/barretenberg/stdlib/hash/pedersen/pedersen_plookup.cpp @@ -79,7 +79,8 @@ point pedersen_plookup_hash::add_points(const point& p1, const point& p2, /** * Hash a single field element using lookup tables. */ -template point pedersen_plookup_hash::hash_single(const field_t& scalar, const bool parity) +template +point pedersen_plookup_hash::hash_single(const field_t& scalar, const bool parity, const bool skip_range_check) { if (scalar.is_constant()) { C* ctx = scalar.get_context(); @@ -93,6 +94,10 @@ template point pedersen_plookup_hash::hash_single(const field const field_t y_lo = witness_t(ctx, uint256_t(scalar.get_value()).slice(0, 126)); ReadData lookup_hi, lookup_lo; + + // If `skip_range_check = true`, this implies the input scalar is 252 bits maximum. + // i.e. we do not require a check that scalar slice sums < p . + // We can also likely use a multitable with 1 less lookup if (parity) { lookup_lo = plookup_read::get_lookup_accumulators(MultiTableId::PEDERSEN_RIGHT_LO, y_lo); lookup_hi = plookup_read::get_lookup_accumulators(MultiTableId::PEDERSEN_RIGHT_HI, y_hi); @@ -101,17 +106,35 @@ template point pedersen_plookup_hash::hash_single(const field lookup_hi = plookup_read::get_lookup_accumulators(MultiTableId::PEDERSEN_LEFT_HI, y_hi); } - // Check if (r_hi - y_hi) is 128 bits and if (r_hi - y_hi) == 0, then - // (r_lo - y_lo) must be 126 bits. - constexpr uint256_t modulus = fr::modulus; - const field_t r_lo = witness_t(ctx, modulus.slice(0, 126)); - const field_t r_hi = witness_t(ctx, modulus.slice(126, 256)); + // validate slices equal scalar + // TODO(suyash?): can remove this gate if we use a single lookup accumulator for HI + LO combined + // can recover y_hi, y_lo from Column 1 of the the lookup accumulator output + scalar.add_two(-y_hi * (uint256_t(1) << 126), -y_lo).assert_equal(0); + + // if skip_range_check = true we assume input max size is 252 bits => final lookup scalar slice value must be 0 + if (skip_range_check) { + lookup_hi[ColumnIdx::C1][lookup_hi[ColumnIdx::C1].size() - 1].assert_equal(0); + } + if (!skip_range_check) { + // Check that y_hi * 2^126 + y_lo < fr::modulus when evaluated over the integers + constexpr uint256_t modulus = fr::modulus; + const field_t r_lo = field_t(ctx, modulus.slice(0, 126)); + const field_t r_hi = field_t(ctx, modulus.slice(126, 256)); - const field_t term_hi = r_hi - y_hi; - const field_t term_lo = (r_lo - y_lo) * field_t(term_hi == field_t(0)); - term_hi.normalize().create_range_constraint(128); - term_lo.normalize().create_range_constraint(126); + bool need_borrow = (uint256_t(y_lo.get_value()) > uint256_t(r_lo.get_value())); + field_t borrow = field_t::from_witness(ctx, need_borrow); + // directly call `create_new_range_constraint` to avoid creating an arithmetic gate + scalar.get_context()->create_new_range_constraint(borrow.get_witness_index(), 1, "borrow"); + + // Hi range check = r_hi - y_hi - borrow + // Lo range check = r_lo - y_lo + borrow * 2^{126} + field_t hi = (r_hi - y_hi) - borrow; + field_t lo = (r_lo - y_lo) + (borrow * (uint256_t(1) << 126)); + + hi.create_range_constraint(128); + lo.create_range_constraint(126); + } const size_t num_lookups_lo = lookup_lo[ColumnIdx::C1].size(); const size_t num_lookups_hi = lookup_hi[ColumnIdx::C1].size(); diff --git a/cpp/src/barretenberg/stdlib/hash/pedersen/pedersen_plookup.hpp b/cpp/src/barretenberg/stdlib/hash/pedersen/pedersen_plookup.hpp index 5d099c5c3f..2467e3fdb5 100644 --- a/cpp/src/barretenberg/stdlib/hash/pedersen/pedersen_plookup.hpp +++ b/cpp/src/barretenberg/stdlib/hash/pedersen/pedersen_plookup.hpp @@ -23,7 +23,7 @@ template class pedersen_plookup_hash { public: static point add_points(const point& p1, const point& p2, const AddType add_type = ONE); - static point hash_single(const field_t& in, const bool parity); + static point hash_single(const field_t& in, const bool parity, const bool skip_range_check = false); static field_t hash_multiple(const std::vector& in, const size_t hash_index = 0); }; diff --git a/cpp/src/barretenberg/stdlib/primitives/bigfield/bigfield_impl.hpp b/cpp/src/barretenberg/stdlib/primitives/bigfield/bigfield_impl.hpp index d324bcc4af..96d4c022df 100644 --- a/cpp/src/barretenberg/stdlib/primitives/bigfield/bigfield_impl.hpp +++ b/cpp/src/barretenberg/stdlib/primitives/bigfield/bigfield_impl.hpp @@ -593,18 +593,19 @@ template bigfield bigfield::operator-(const result.binary_basis_limbs[3].element = binary_basis_limbs[3].element + barretenberg::fr(to_add_3); if constexpr (C::type == ComposerType::PLOOKUP) { - if (result.prime_basis_limb.multiplicative_constant == 1 && - other.prime_basis_limb.multiplicative_constant == 1 && !result.is_constant() && !other.is_constant()) { + if (prime_basis_limb.multiplicative_constant == 1 && other.prime_basis_limb.multiplicative_constant == 1 && + !is_constant() && !other.is_constant()) { bool limbconst = result.binary_basis_limbs[0].element.is_constant(); limbconst = limbconst || result.binary_basis_limbs[1].element.is_constant(); limbconst = limbconst || result.binary_basis_limbs[2].element.is_constant(); limbconst = limbconst || result.binary_basis_limbs[3].element.is_constant(); - limbconst = limbconst || result.prime_basis_limb.is_constant(); + limbconst = limbconst || prime_basis_limb.is_constant(); limbconst = limbconst || other.binary_basis_limbs[0].element.is_constant(); limbconst = limbconst || other.binary_basis_limbs[1].element.is_constant(); limbconst = limbconst || other.binary_basis_limbs[2].element.is_constant(); limbconst = limbconst || other.binary_basis_limbs[3].element.is_constant(); limbconst = limbconst || other.prime_basis_limb.is_constant(); + limbconst = limbconst || (prime_basis_limb.witness_index == other.prime_basis_limb.witness_index); if (!limbconst) { std::pair x0{ result.binary_basis_limbs[0].element.witness_index, binary_basis_limbs[0].element.multiplicative_constant }; @@ -631,10 +632,11 @@ template bigfield bigfield::operator-(const barretenberg::fr c3(result.binary_basis_limbs[3].element.additive_constant - other.binary_basis_limbs[3].element.additive_constant); - uint32_t xp(result.prime_basis_limb.witness_index); + uint32_t xp(prime_basis_limb.witness_index); uint32_t yp(other.prime_basis_limb.witness_index); - barretenberg::fr cp(result.prime_basis_limb.additive_constant - - other.prime_basis_limb.additive_constant); + barretenberg::fr cp(prime_basis_limb.additive_constant - other.prime_basis_limb.additive_constant); + uint512_t constant_to_add_mod_p = (constant_to_add) % prime_basis.modulus; + cp += barretenberg::fr(constant_to_add_mod_p.lo); const auto output_witnesses = ctx->evaluate_non_native_field_subtraction( { x0, y0, c0 }, { x1, y1, c1 }, { x2, y2, c2 }, { x3, y3, c3 }, { xp, yp, cp }); @@ -1982,7 +1984,7 @@ void bigfield::unsafe_evaluate_multiply_add(const bigfield& input_left, modulus, }; // N.B. this method also evaluates the prime field component of the non-native field mul - const auto [lo_idx, hi_idx] = ctx->queue_non_native_field_multiplication(witnesses, false); + const auto [lo_idx, hi_idx] = ctx->evaluate_non_native_field_multiplication(witnesses, false); barretenberg::fr neg_prime = -barretenberg::fr(uint256_t(target_basis.modulus)); field_t::evaluate_polynomial_identity(left.prime_basis_limb, @@ -2267,7 +2269,7 @@ void bigfield::unsafe_evaluate_multiple_multiply_add(const std::vector> limb_0_accumulator; std::vector> limb_2_accumulator; std::vector> prime_limb_accumulator; @@ -2320,7 +2322,7 @@ void bigfield::unsafe_evaluate_multiple_multiply_add(const std::vectorevaluate_partial_non_native_field_multiplication(mul_witnesses); + const auto [lo_2_idx, hi_2_idx] = ctx->queue_partial_non_native_field_multiplication(mul_witnesses); field_t lo_2 = field_t::from_witness_index(ctx, lo_2_idx); field_t hi_2 = field_t::from_witness_index(ctx, hi_2_idx); @@ -2416,7 +2418,7 @@ void bigfield::unsafe_evaluate_multiple_multiply_add(const std::vectorqueue_non_native_field_multiplication(witnesses, false); + const auto [lo_1_idx, hi_1_idx] = ctx->evaluate_non_native_field_multiplication(witnesses, false); barretenberg::fr neg_prime = -barretenberg::fr(uint256_t(target_basis.modulus)); diff --git a/cpp/src/barretenberg/stdlib/primitives/biggroup/biggroup.hpp b/cpp/src/barretenberg/stdlib/primitives/biggroup/biggroup.hpp index 29d8e472c1..4330c5b6b4 100644 --- a/cpp/src/barretenberg/stdlib/primitives/biggroup/biggroup.hpp +++ b/cpp/src/barretenberg/stdlib/primitives/biggroup/biggroup.hpp @@ -50,15 +50,15 @@ template class element { void validate_on_curve() const { - Fq xx = x.sqr(); - Fq rhs = y.sqr(); Fq b(get_context(), uint256_t(NativeGroup::curve_b)); - Fq lhs = xx.madd(x, { b }); - if constexpr (NativeGroup::has_a) { + if constexpr (!NativeGroup::has_a) { + // we validate y^2 = x^3 + b by setting "fix_remainder_zero = true" when calling mult_madd + Fq::mult_madd({ x.sqr(), y }, { x, -y }, { b }, true); + } else { Fq a(get_context(), uint256_t(NativeGroup::curve_a)); - lhs = lhs + (a * x); + // we validate y^2 = x^3 + ax + b by setting "fix_remainder_zero = true" when calling mult_madd + Fq::mult_madd({ x.sqr(), x, y }, { -x, a, y }, { b }, true); } - lhs.assert_equal(rhs); } static element one(Composer* ctx) @@ -99,6 +99,7 @@ template class element { *this = *this - other; return *this; } + std::array add_sub(const element& other) const; element operator*(const Fr& other) const; @@ -139,7 +140,7 @@ template class element { bool is_element = false; chain_add_accumulator(){}; - explicit chain_add_accumulator(element& input) + explicit chain_add_accumulator(const element& input) { x3_prev = input.x; y3_prev = input.y; @@ -161,10 +162,8 @@ template class element { element montgomery_ladder(const element& other) const; element montgomery_ladder(const chain_add_accumulator& accumulator); - element double_montgomery_ladder(const element& add1, const element& add2) const; - element double_montgomery_ladder(const chain_add_accumulator& add1, const element& add2) const; - element double_montgomery_ladder(const chain_add_accumulator& add1, const chain_add_accumulator& add2) const; - element double_into_montgomery_ladder(const element& to_add) const; + element multiple_montgomery_ladder(const std::vector& to_add) const; + element quadruple_and_add(const std::vector& to_add) const; typename NativeGroup::affine_element get_value() const { @@ -256,12 +255,13 @@ template class element { template ::value>> static std::array, 5> create_group_element_rom_tables( - const std::array& elements); + const std::array& elements, std::array& limb_max); template ::value>> static element read_group_element_rom_tables(const std::array, 5>& tables, - const field_t& index); + const field_t& index, + const std::array& limb_max); static std::pair compute_offset_generators(const size_t num_rounds); @@ -277,6 +277,7 @@ template class element { element operator[](const size_t idx) const { return element_table[idx]; } std::array element_table; std::array, 5> coordinates; + std::array limb_max; // tracks the maximum limb size represented in each element_table entry }; template ::value>> @@ -310,7 +311,6 @@ template class element { P1.element_table[i] = P1.element_table[i - 1] + d2; } for (size_t i = 0; i < 8; ++i) { - // TODO: DO WE NEED TO REDUCE THESE ELEMENTS???? P1.element_table[i] = (-P1.element_table[15 - i]); } for (size_t i = 0; i < 16; ++i) { @@ -322,8 +322,8 @@ template class element { endoP1.element_table[i].x = P1.element_table[i].x * beta; endoP1.element_table[15 - i].x = endoP1.element_table[i].x; } - P1.coordinates = create_group_element_rom_tables<16>(P1.element_table); - endoP1.coordinates = create_group_element_rom_tables<16>(endoP1.element_table); + P1.coordinates = create_group_element_rom_tables<16>(P1.element_table, P1.limb_max); + endoP1.coordinates = create_group_element_rom_tables<16>(endoP1.element_table, endoP1.limb_max); auto result = std::make_pair, four_bit_table_plookup<>>( (four_bit_table_plookup<>)P1, (four_bit_table_plookup<>)endoP1); return result; @@ -391,6 +391,7 @@ template class element { std::array element_table; std::array, 5> coordinates; + std::array limb_max; }; using twin_lookup_table = typename std:: @@ -418,10 +419,10 @@ template class element { endo_table.element_table[i + 8].x = base_table[7 - i].x * beta; endo_table.element_table[i + 8].y = base_table[7 - i].y; - endo_table.element_table[7 - i] = (-endo_table.element_table[i + 8]).reduce(); + endo_table.element_table[7 - i] = (-endo_table.element_table[i + 8]); } - endo_table.coordinates = create_group_element_rom_tables<16>(endo_table.element_table); + endo_table.coordinates = create_group_element_rom_tables<16>(endo_table.element_table, endo_table.limb_max); } else { std::array endo_inputs(inputs); for (auto& input : endo_inputs) { @@ -451,10 +452,10 @@ template class element { endo_table.element_table[i + 16].x = base_table[15 - i].x * beta; endo_table.element_table[i + 16].y = base_table[15 - i].y; - endo_table.element_table[15 - i] = (-endo_table.element_table[i + 16]).reduce(); + endo_table.element_table[15 - i] = (-endo_table.element_table[i + 16]); } - endo_table.coordinates = create_group_element_rom_tables<32>(endo_table.element_table); + endo_table.coordinates = create_group_element_rom_tables<32>(endo_table.element_table, endo_table.limb_max); } return std::make_pair, lookup_table_plookup<5>>((lookup_table_plookup<5>)base_table, (lookup_table_plookup<5>)endo_table); @@ -472,11 +473,16 @@ template class element { num_points = points.size(); num_fives = num_points / 5; + // size-6 table is expensive and only benefits us if creating them reduces the number of total tables if (num_fives * 5 == (num_points - 1)) { num_fives -= 1; num_sixes = 1; - } else { - num_sixes = 0; + } else if (num_fives * 5 == (num_points - 2) && num_fives >= 2) { + num_fives -= 2; + num_sixes = 2; + } else if (num_fives * 5 == (num_points - 3) && num_fives >= 3) { + num_fives -= 3; + num_sixes = 3; } has_quad = ((num_fives * 5 + num_sixes * 6) < num_points - 3) && (num_points >= 4); @@ -490,33 +496,40 @@ template class element { has_singleton = num_points != ((num_fives * 5 + num_sixes * 6) + ((size_t)has_quad * 4) + ((size_t)has_triple * 3) + ((size_t)has_twin * 2)); + size_t offset = 0; + for (size_t i = 0; i < num_sixes; ++i) { + six_tables.push_back(lookup_table_plookup<6>({ + points[offset + 6 * i], + points[offset + 6 * i + 1], + points[offset + 6 * i + 2], + points[offset + 6 * i + 3], + points[offset + 6 * i + 4], + points[offset + 6 * i + 5], + })); + } + offset += 6 * num_sixes; for (size_t i = 0; i < num_fives; ++i) { - five_tables.push_back(lookup_table_plookup<5>( - { points[5 * i], points[5 * i + 1], points[5 * i + 2], points[5 * i + 3], points[5 * i + 4] })); - } - - if (num_sixes == 1) { - six_tables.push_back(lookup_table_plookup<6>({ points[5 * num_fives], - points[5 * num_fives + 1], - points[5 * num_fives + 2], - points[5 * num_fives + 3], - points[5 * num_fives + 4], - points[5 * num_fives + 5] })); + five_tables.push_back(lookup_table_plookup<5>({ + points[offset + 5 * i], + points[offset + 5 * i + 1], + points[offset + 5 * i + 2], + points[offset + 5 * i + 3], + points[offset + 5 * i + 4], + })); } + offset += 5 * num_fives; if (has_quad) { - quad_tables.push_back(quad_lookup_table({ points[5 * num_fives], - points[5 * num_fives + 1], - points[5 * num_fives + 2], - points[5 * num_fives + 3] })); + quad_tables.push_back( + quad_lookup_table({ points[offset], points[offset + 1], points[offset + 2], points[offset + 3] })); } if (has_triple) { - triple_tables.push_back(triple_lookup_table( - { points[5 * num_fives], points[5 * num_fives + 1], points[5 * num_fives + 2] })); + triple_tables.push_back( + triple_lookup_table({ points[offset], points[offset + 1], points[offset + 2] })); } if (has_twin) { - twin_tables.push_back(twin_lookup_table({ points[5 * num_fives], points[5 * num_fives + 1] })); + twin_tables.push_back(twin_lookup_table({ points[offset], points[offset + 1] })); } if (has_singleton) { @@ -587,37 +600,36 @@ template class element { element::chain_add_accumulator get_chain_add_accumulator(std::vector>& naf_entries) const { std::vector round_accumulator; + for (size_t j = 0; j < num_sixes; ++j) { + round_accumulator.push_back(six_tables[j].get({ naf_entries[6 * j], + naf_entries[6 * j + 1], + naf_entries[6 * j + 2], + naf_entries[6 * j + 3], + naf_entries[6 * j + 4], + naf_entries[6 * j + 5] })); + } + size_t offset = num_sixes * 6; for (size_t j = 0; j < num_fives; ++j) { - round_accumulator.push_back(five_tables[j].get({ naf_entries[5 * j], - naf_entries[5 * j + 1], - naf_entries[5 * j + 2], - naf_entries[5 * j + 3], - naf_entries[5 * j + 4] })); - } - - if (num_sixes == 1) { - round_accumulator.push_back(six_tables[0].get({ naf_entries[num_fives * 5], - naf_entries[num_fives * 5 + 1], - naf_entries[num_fives * 5 + 2], - naf_entries[num_fives * 5 + 3], - naf_entries[num_fives * 5 + 4], - naf_entries[num_fives * 5 + 5] })); + round_accumulator.push_back(five_tables[j].get({ naf_entries[offset + j * 5], + naf_entries[offset + j * 5 + 1], + naf_entries[offset + j * 5 + 2], + naf_entries[offset + j * 5 + 3], + naf_entries[offset + j * 5 + 4] })); } - + offset += num_fives * 5; if (has_quad) { - round_accumulator.push_back(quad_tables[0].get({ naf_entries[num_fives * 5], - naf_entries[num_fives * 5 + 1], - naf_entries[num_fives * 5 + 2], - naf_entries[num_fives * 5 + 3] })); + round_accumulator.push_back(quad_tables[0].get({ naf_entries[offset], + naf_entries[offset + 1], + naf_entries[offset + 2], + naf_entries[offset + 3] })); } if (has_triple) { - round_accumulator.push_back(triple_tables[0].get( - { naf_entries[num_fives * 5], naf_entries[num_fives * 5 + 1], naf_entries[num_fives * 5 + 2] })); + round_accumulator.push_back( + triple_tables[0].get({ naf_entries[offset], naf_entries[offset + 1], naf_entries[offset + 2] })); } if (has_twin) { - round_accumulator.push_back( - twin_tables[0].get({ naf_entries[num_fives * 5], naf_entries[num_fives * 5 + 1] })); + round_accumulator.push_back(twin_tables[0].get({ naf_entries[offset], naf_entries[offset + 1] })); } if (has_singleton) { round_accumulator.push_back(singletons[0].conditional_negate(naf_entries[num_points - 1])); @@ -640,37 +652,37 @@ template class element { element get(std::vector>& naf_entries) const { std::vector round_accumulator; - for (size_t j = 0; j < num_fives; ++j) { - round_accumulator.push_back(five_tables[j].get({ naf_entries[5 * j], - naf_entries[5 * j + 1], - naf_entries[5 * j + 2], - naf_entries[5 * j + 3], - naf_entries[5 * j + 4] })); + for (size_t j = 0; j < num_sixes; ++j) { + round_accumulator.push_back(six_tables[j].get({ naf_entries[6 * j], + naf_entries[6 * j + 1], + naf_entries[6 * j + 2], + naf_entries[6 * j + 3], + naf_entries[6 * j + 4], + naf_entries[6 * j + 5] })); } + size_t offset = num_sixes * 6; - if (num_sixes == 1) { - round_accumulator.push_back(six_tables[0].get({ naf_entries[num_fives * 5], - naf_entries[num_fives * 5 + 1], - naf_entries[num_fives * 5 + 2], - naf_entries[num_fives * 5 + 3], - naf_entries[num_fives * 5 + 4], - naf_entries[num_fives * 5 + 5] })); + for (size_t j = 0; j < num_fives; ++j) { + round_accumulator.push_back(five_tables[j].get({ naf_entries[offset + 5 * j], + naf_entries[offset + 5 * j + 1], + naf_entries[offset + 5 * j + 2], + naf_entries[offset + 5 * j + 3], + naf_entries[offset + 5 * j + 4] })); } + offset += num_fives * 5; + if (has_quad) { - round_accumulator.push_back(quad_tables[0].get(naf_entries[num_fives * 5], - naf_entries[num_fives * 5 + 1], - naf_entries[num_fives * 5 + 2], - naf_entries[num_fives * 5 + 3])); + round_accumulator.push_back(quad_tables[0].get( + naf_entries[offset], naf_entries[offset + 1], naf_entries[offset + 2], naf_entries[offset + 3])); } if (has_triple) { - round_accumulator.push_back(triple_tables[0].get( - naf_entries[num_fives * 5], naf_entries[num_fives * 5 + 1], naf_entries[num_fives * 5 + 2])); + round_accumulator.push_back( + triple_tables[0].get(naf_entries[offset], naf_entries[offset + 1], naf_entries[offset + 2])); } if (has_twin) { - round_accumulator.push_back( - twin_tables[0].get(naf_entries[num_fives * 5], naf_entries[num_fives * 5 + 1])); + round_accumulator.push_back(twin_tables[0].get(naf_entries[offset], naf_entries[offset + 1])); } if (has_singleton) { round_accumulator.push_back(singletons[0].conditional_negate(naf_entries[num_points - 1])); @@ -862,67 +874,6 @@ template class element { return element::chain_add_end(accumulator); } - // chain_add_accumulator get_chain_initial_entry() const - // { - // std::vector add_accumulator; - // for (size_t i = 0; i < num_quads; ++i) { - // add_accumulator.push_back(quad_tables[i][0]); - // } - // if (has_twin) { - // add_accumulator.push_back(twin_tables[0][0]); - // } - // if (has_triple) { - // add_accumulator.push_back(triple_tables[0][0]); - // } - // if (has_singleton) { - // add_accumulator.push_back(singletons[0]); - // } - // if (add_accumulator.size() >= 2) { - // chain_add_accumulator output = element::chain_add_start(add_accumulator[0], add_accumulator[1]); - // for (size_t i = 2; i < add_accumulator.size(); ++i) { - // output = element::chain_add(add_accumulator[i], output); - // } - // return output; - // } - // return chain_add_accumulator(add_accumulator[0]); - // } - - // element::chain_add_accumulator get_chain_add_accumulator(std::vector>& naf_entries) const - // { - // std::vector round_accumulator; - // for (size_t j = 0; j < num_quads; ++j) { - // round_accumulator.push_back(quad_tables[j].get( - // naf_entries[4 * j], naf_entries[4 * j + 1], naf_entries[4 * j + 2], naf_entries[4 * j + 3])); - // } - - // if (has_triple) { - // round_accumulator.push_back(triple_tables[0].get( - // naf_entries[num_quads * 4], naf_entries[num_quads * 4 + 1], naf_entries[num_quads * 4 + 2])); - // } - // if (has_twin) { - // round_accumulator.push_back( - // twin_tables[0].get(naf_entries[num_quads * 4], naf_entries[num_quads * 4 + 1])); - // } - // if (has_singleton) { - // round_accumulator.push_back(singletons[0].conditional_negate(naf_entries[num_points - 1])); - // } - - // element::chain_add_accumulator accumulator; - // if (round_accumulator.size() == 1) { - // accumulator.x3_prev = round_accumulator[0].x; - // accumulator.y3_prev = round_accumulator[0].y; - // accumulator.is_element = true; - // return accumulator; - // } else if (round_accumulator.size() == 2) { - // return element::chain_add_start(round_accumulator[0], round_accumulator[1]); - // } else { - // accumulator = element::chain_add_start(round_accumulator[0], round_accumulator[1]); - // for (size_t j = 2; j < round_accumulator.size(); ++j) { - // accumulator = element::chain_add(round_accumulator[j], accumulator); - // } - // } - // return (accumulator); - // } std::vector quad_tables; std::vector triple_tables; std::vector twin_tables; diff --git a/cpp/src/barretenberg/stdlib/primitives/biggroup/biggroup.test.cpp b/cpp/src/barretenberg/stdlib/primitives/biggroup/biggroup.test.cpp index e3b6576e9f..85b4e2325c 100644 --- a/cpp/src/barretenberg/stdlib/primitives/biggroup/biggroup.test.cpp +++ b/cpp/src/barretenberg/stdlib/primitives/biggroup/biggroup.test.cpp @@ -415,7 +415,7 @@ template class stdlib_biggroup : public testing::Test { EXPECT_VERIFICATION(composer); } - static void test_double_montgomery_ladder() + static void test_multiple_montgomery_ladder() { Composer composer = Composer(); size_t num_repetitions = 10; @@ -423,19 +423,17 @@ template class stdlib_biggroup : public testing::Test { affine_element acc_small(element::random_element()); element_ct acc_big = element_ct::from_witness(&composer, acc_small); - affine_element add_1_small_0(element::random_element()); - element_ct add_1_big_0 = element_ct::from_witness(&composer, add_1_small_0); - affine_element add_2_small_0(element::random_element()); - element_ct add_2_big_0 = element_ct::from_witness(&composer, add_2_small_0); - - affine_element add_1_small_1(element::random_element()); - element_ct add_1_big_1 = element_ct::from_witness(&composer, add_1_small_1); - affine_element add_2_small_1(element::random_element()); - element_ct add_2_big_1 = element_ct::from_witness(&composer, add_2_small_1); - - typename element_ct::chain_add_accumulator add_1 = element_ct::chain_add_start(add_1_big_0, add_1_big_1); - typename element_ct::chain_add_accumulator add_2 = element_ct::chain_add_start(add_2_big_0, add_2_big_1); - acc_big.double_montgomery_ladder(add_1, add_2); + std::vector to_add; + for (size_t j = 0; j < i; ++j) { + affine_element add_1_small_0(element::random_element()); + element_ct add_1_big_0 = element_ct::from_witness(&composer, add_1_small_0); + affine_element add_2_small_0(element::random_element()); + element_ct add_2_big_0 = element_ct::from_witness(&composer, add_2_small_0); + typename element_ct::chain_add_accumulator add_1 = + element_ct::chain_add_start(add_1_big_0, add_2_big_0); + to_add.emplace_back(add_1); + } + acc_big.multiple_montgomery_ladder(to_add); } EXPECT_VERIFICATION(composer); @@ -890,10 +888,10 @@ HEAVY_TYPED_TEST(stdlib_biggroup, chain_add) TestFixture::test_chain_add(); } -HEAVY_TYPED_TEST(stdlib_biggroup, double_montgomery_ladder) +HEAVY_TYPED_TEST(stdlib_biggroup, multiple_montgomery_ladder) { - TestFixture::test_double_montgomery_ladder(); + TestFixture::test_multiple_montgomery_ladder(); } HEAVY_TYPED_TEST(stdlib_biggroup, compute_naf) diff --git a/cpp/src/barretenberg/stdlib/primitives/biggroup/biggroup_batch_mul.hpp b/cpp/src/barretenberg/stdlib/primitives/biggroup/biggroup_batch_mul.hpp index c237d0dfbb..ebeb0aee5b 100644 --- a/cpp/src/barretenberg/stdlib/primitives/biggroup/biggroup_batch_mul.hpp +++ b/cpp/src/barretenberg/stdlib/primitives/biggroup/biggroup_batch_mul.hpp @@ -41,14 +41,11 @@ element element::wnaf_batch_mul(const std::vector to_add; + for (size_t j = 0; j < points.size(); ++j) { + to_add.emplace_back(point_tables[j][wnaf_entries[j][i]]); } - // accumulator = accumulator.dbl(); - // accumulator = accumulator.montgomery_ladder(to_add); - accumulator = accumulator.double_into_montgomery_ladder(to_add); + accumulator = accumulator.quadruple_and_add(to_add); } for (size_t i = 0; i < points.size(); ++i) { diff --git a/cpp/src/barretenberg/stdlib/primitives/biggroup/biggroup_bn254.hpp b/cpp/src/barretenberg/stdlib/primitives/biggroup/biggroup_bn254.hpp index 834a1b01ef..924cd0d7ec 100644 --- a/cpp/src/barretenberg/stdlib/primitives/biggroup/biggroup_bn254.hpp +++ b/cpp/src/barretenberg/stdlib/primitives/biggroup/biggroup_bn254.hpp @@ -117,34 +117,45 @@ element element::bn254_endo_batch_mul_with_generator return to_add; }; - for (size_t i = 1; i < num_rounds / 2; ++i) { + // Perform multiple rounds of the montgomery ladder algoritm per "iteration" of our main loop. + // This is in order to reduce the number of field reductions required when calling `multiple_montgomery_ladder` + constexpr size_t num_rounds_per_iteration = 4; - auto add_1 = get_point_to_add(i * 2 - 1); - auto add_2 = get_point_to_add(i * 2); + // we require that we perform max of one generator per iteration + static_assert(num_rounds_per_iteration < 8); - // TODO update this to work if num_bits is odd - if ((i * 2) % 8 == 0) { - add_1 = element::chain_add(generator_table[generator_wnaf[(i * 2 - 8) / 8]], add_1); - add_1 = element::chain_add(generator_endo_table[generator_endo_wnaf[(i * 2 - 8) / 8]], add_1); - } - if (!add_1.is_element) { - accumulator = accumulator.double_montgomery_ladder(add_1, add_2); - } else { - accumulator = accumulator.double_montgomery_ladder(element(add_1.x3_prev, add_1.y3_prev), - element(add_2.x3_prev, add_2.y3_prev)); + size_t num_iterations = num_rounds / num_rounds_per_iteration; + num_iterations += ((num_iterations * num_rounds_per_iteration) == num_rounds) ? 0 : 1; + const size_t num_rounds_per_final_iteration = + (num_rounds - 1) - ((num_iterations - 1) * num_rounds_per_iteration); + + size_t generator_idx = 0; + for (size_t i = 0; i < num_iterations; ++i) { + + const size_t inner_num_rounds = + (i != num_iterations - 1) ? num_rounds_per_iteration : num_rounds_per_final_iteration; + std::vector to_add; + + for (size_t j = 0; j < inner_num_rounds; ++j) { + to_add.emplace_back(get_point_to_add(i * num_rounds_per_iteration + j + 1)); } - } - if ((num_rounds & 0x01ULL) == 0x00ULL) { - auto add_1 = get_point_to_add(num_rounds - 1); - add_1 = element::chain_add(generator_table[generator_wnaf[generator_wnaf.size() - 2]], add_1); - add_1 = element::chain_add(generator_endo_table[generator_endo_wnaf[generator_wnaf.size() - 2]], add_1); - if (add_1.is_element) { - element temp(add_1.x3_prev, add_1.y3_prev); - accumulator = accumulator.montgomery_ladder(temp); - } else { - accumulator = accumulator.montgomery_ladder(add_1); + bool add_generator_this_round = false; + size_t add_idx = 0; + for (size_t j = 0; j < inner_num_rounds; ++j) { + add_generator_this_round = ((i * num_rounds_per_iteration + j) % 8) == 6; + if (add_generator_this_round) { + add_idx = j; + break; + } + } + if (add_generator_this_round) { + to_add[add_idx] = element::chain_add(generator_table[generator_wnaf[generator_idx]], to_add[add_idx]); + to_add[add_idx] = + element::chain_add(generator_endo_table[generator_endo_wnaf[generator_idx]], to_add[add_idx]); + generator_idx++; } + accumulator = accumulator.multiple_montgomery_ladder(to_add); } for (size_t i = 0; i < small_points.size(); ++i) { @@ -333,12 +344,12 @@ element element::bn254_endo_batch_mul(const std::vec * 1. Extract NAF value for bit `2*i - 1` for each scalar multiplier and store in `nafs` vector. * 2. Use `nafs` vector to derive the point that we need (`add_1`) to add into our accumulator. * 3. Repeat the above 2 steps but for bit `2 * i` (`add_2`) - * 4. Compute `accumulator = 4 * accumulator + 2 * add_1 + add_2` using `double_montgomery_ladder` method + * 4. Compute `accumulator = 4 * accumulator + 2 * add_1 + add_2` using `multiple_montgomery_ladder` method * * The purpose of the above is to minimize the number of required range checks (vs a simple double and add algo). * - * When computing two iterations of the montgomery ladder algorithm, we can neglect computing the y-coordinate of - *the 1st ladder output. See `double_montgomery_ladder` for more details. + * When computing repeated iterations of the montgomery ladder algorithm, we can neglect computing the y-coordinate + *of each ladder output. See `multiple_montgomery_ladder` for more details. **/ for (size_t i = 1; i < num_rounds / 2; ++i) { // `nafs` tracks the naf value for each point for the current round @@ -365,14 +376,8 @@ element element::bn254_endo_batch_mul(const std::vec } element::chain_add_accumulator add_2 = point_table.get_chain_add_accumulator(nafs); - // Perform the double montgomery ladder. We need to convert our chain_add_accumulator types into regular - // elements if the accumuator does not contain a y-coordinate - if (!add_1.is_element) { - accumulator = accumulator.double_montgomery_ladder(add_1, add_2); - } else { - accumulator = accumulator.double_montgomery_ladder(element(add_1.x3_prev, add_1.y3_prev), - element(add_2.x3_prev, add_2.y3_prev)); - } + // Perform the double montgomery ladder. + accumulator = accumulator.multiple_montgomery_ladder({ add_1, add_2 }); } // we need to iterate 1 more time if the number of rounds is even @@ -382,12 +387,7 @@ element element::bn254_endo_batch_mul(const std::vec nafs.emplace_back(naf_entries[j][num_rounds - 1]); } element::chain_add_accumulator add_1 = point_table.get_chain_add_accumulator(nafs); - if (add_1.is_element) { - element temp(add_1.x3_prev, add_1.y3_prev); - accumulator = accumulator.montgomery_ladder(temp); - } else { - accumulator = accumulator.montgomery_ladder(add_1); - } + accumulator = accumulator.multiple_montgomery_ladder({ add_1 }); } /** diff --git a/cpp/src/barretenberg/stdlib/primitives/biggroup/biggroup_impl.hpp b/cpp/src/barretenberg/stdlib/primitives/biggroup/biggroup_impl.hpp index 78d6f37855..bff1805db3 100644 --- a/cpp/src/barretenberg/stdlib/primitives/biggroup/biggroup_impl.hpp +++ b/cpp/src/barretenberg/stdlib/primitives/biggroup/biggroup_impl.hpp @@ -80,6 +80,39 @@ element element::operator-(const element& other) con return element(x_3, y_3); } + +/** + * @brief Compute (*this) + other AND (*this) - other as a size-2 array + * + * @details We require this operation when computing biggroup lookup tables for + * multi-scalar-multiplication. This combined method reduces the number of + * field additions, field subtractions required (as well as 1 less assert_is_not_equal check) + * + * @tparam C + * @tparam Fq + * @tparam Fr + * @tparam G + * @param other + * @return std::array, 2> + */ +template +std::array, 2> element::add_sub(const element& other) const +{ + other.x.assert_is_not_equal(x); + + const Fq denominator = other.x - x; + const Fq x2x1 = -(other.x + x); + + const Fq lambda1 = Fq::div_without_denominator_check({ other.y, -y }, denominator); + const Fq x_3 = lambda1.sqradd({ x2x1 }); + const Fq y_3 = lambda1.madd(x - x_3, { -y }); + const Fq lambda2 = Fq::div_without_denominator_check({ -other.y, -y }, denominator); + const Fq x_4 = lambda2.sqradd({ x2x1 }); + const Fq y_4 = lambda2.madd(x - x_4, { -y }); + + return { element(x_3, y_3), element(x_4, y_4) }; +} + template element element::dbl() const { Fq two_x = x + x; @@ -294,200 +327,217 @@ element element::montgomery_ladder(const chain_add_a } /** - * Compute (4 * (*this)) + (2 * add1) + add2 - * If we chain two iterations of the montgomery ladder together, we can squeeze out a non-native field reduction. + * @brief Compute 4.P + to_add[0] + ... + to_add[to_add.size() - 1] * - * Total number of field reductions = 9 + * @details Used in wnaf_batch_mul method. Combining operations requires fewer bigfield reductions. * - * Two calls to mont ladder woud require 10 + * Method computes R[i] = (2P + A[0]) + (2P + A[1]) + A[2] + ... + A[n-1] * - * Using doublings and additions would require 12! - **/ + * @tparam C + * @tparam Fq + * @tparam Fr + * @tparam G + * @param to_add + * @return element + */ template -element element::double_montgomery_ladder(const element& add1, const element& add2) const +element element::quadruple_and_add(const std::vector& to_add) const { - add1.x.assert_is_not_equal(x); - const Fq lambda_1 = Fq::div_without_denominator_check({ add1.y, -y }, (add1.x - x)); - - const Fq x_3 = lambda_1.sqradd({ -add1.x, -x }); - - const Fq minus_lambda_2 = - lambda_1 + Fq::div_without_denominator_check({ y + y }, (x_3 - x)); // (y + y) / (x_3 - x); - - const Fq x_4 = minus_lambda_2.sqradd({ -x, -x_3 }); - - // We can avoid computing y_4, instead substituting the expression `minus_lambda_2 * (x_4 - x) - y` where needed. - // This is cheaper, because we can evaluate two field multiplications (or a field multiplication + a field division) - // with only one non-native field reduction. - // E.g. evaluating (a * b) + (c * d) = e mod p only requires 1 quotient and remainder. - // Defining the quotient and remainder elements is the major cost of a non-native field multiplication - // because each requires ~256 bits of range checks - const Fq x_sub_x4 = x - x_4; - - const Fq x4_sub_add2x = x_4 - add2.x; + const Fq two_x = x + x; + Fq x_1; + Fq minus_lambda_dbl; + if constexpr (G::has_a) { + Fq a(get_context(), uint256_t(G::curve_a)); + minus_lambda_dbl = Fq::msub_div({ x }, { (two_x + x) }, (y + y), { a }); + x_1 = minus_lambda_dbl.sqradd({ -(two_x) }); + } else { + minus_lambda_dbl = Fq::msub_div({ x }, { (two_x + x) }, (y + y), {}); + x_1 = minus_lambda_dbl.sqradd({ -(two_x) }); + } - // msub_div; 'compute a multiplication and a division and multiply the two together. Requires only 1 non native - // field reduction` - const Fq lambda_3 = Fq::msub_div({ minus_lambda_2 }, { (x_sub_x4) }, (x4_sub_add2x), { y, add2.y }); + ASSERT(to_add.size() > 0); + to_add[0].x.assert_is_not_equal(x_1); - // validate we can use incomplete addition formulae - x_4.assert_is_not_equal(add2.x); + const Fq x_minus_x_1 = x - x_1; - const Fq x_5 = lambda_3.sqradd({ -x_4, -add2.x }); - const Fq x5_sub_x4 = x_5 - x_4; + const Fq lambda_1 = Fq::msub_div({ minus_lambda_dbl }, { x_minus_x_1 }, (x_1 - to_add[0].x), { to_add[0].y, y }); - const Fq half_minus_lambda_4_minus_lambda_3 = Fq::msub_div({ minus_lambda_2 }, { x_sub_x4 }, (x5_sub_x4), { y }); + const Fq x_3 = lambda_1.sqradd({ -to_add[0].x, -x_1 }); - const Fq minus_lambda_4_minus_lambda_3 = half_minus_lambda_4_minus_lambda_3 + half_minus_lambda_4_minus_lambda_3; - const Fq minus_lambda_4 = minus_lambda_4_minus_lambda_3 + lambda_3; - const Fq x_6 = minus_lambda_4.sqradd({ -x_4, -x_5 }); + const Fq half_minus_lambda_2_minus_lambda_1 = + Fq::msub_div({ minus_lambda_dbl }, { x_minus_x_1 }, (x_3 - x_1), { y }); - const Fq x6_sub_x4 = x_6 - x_4; + const Fq minus_lambda_2_minus_lambda_1 = half_minus_lambda_2_minus_lambda_1 + half_minus_lambda_2_minus_lambda_1; + const Fq minus_lambda_2 = minus_lambda_2_minus_lambda_1 + lambda_1; - // y_6 = -L_4 * (x_6 - x_4) - L_2 * (x - x_4) + y - const Fq y_6 = Fq::dual_madd(minus_lambda_4, (x6_sub_x4), minus_lambda_2, x_sub_x4, { y }); + const Fq x_4 = minus_lambda_2.sqradd({ -x_1, -x_3 }); - return element(x_6, y_6); -} + const Fq x_4_sub_x_1 = x_4 - x_1; -/** - * If we chain two iterations of the montgomery ladder together, we can squeeze out a non-native field reduction - * - **/ -template -element element::double_montgomery_ladder(const chain_add_accumulator& add1, - const element& add2) const -{ - if (add1.is_element) { - throw_or_abort("An accumulator expected"); + if (to_add.size() == 1) { + const Fq y_4 = Fq::dual_madd(minus_lambda_2, x_4_sub_x_1, minus_lambda_dbl, x_minus_x_1, { y }); + return element(x_4, y_4); } - add1.x3_prev.assert_is_not_equal(x); - Fq lambda_1 = Fq::msub_div( - { add1.lambda_prev }, { (add1.x1_prev - add1.x3_prev) }, (x - add1.x3_prev), { -add1.y1_prev, -y }); - - const Fq x_3 = lambda_1.sqradd({ -add1.x3_prev, -x }); - - const Fq minus_lambda_2 = - lambda_1 + Fq::div_without_denominator_check({ y + y }, (x_3 - x)); // (y + y) / (x_3 - x); - - const Fq x_4 = minus_lambda_2.sqradd({ -x, -x_3 }); - - // We can avoid computing y_4, instead substituting the expression `minus_lambda_2 * (x_4 - x) - y` where needed. - // This is cheaper, because we can evaluate two field multiplications (or a field multiplication + a field division) - // with only one non-native field reduction. - // E.g. evaluating (a * b) + (c * d) = e mod p only requires 1 quotient and remainder, which is the major cost - // of a non-native field multiplication - const Fq x_sub_x4 = x - x_4; + to_add[1].x.assert_is_not_equal(to_add[0].x); - const Fq x4_sub_add2x = x_4 - add2.x; - const Fq lambda_3 = Fq::msub_div({ minus_lambda_2 }, { (x_sub_x4) }, (x4_sub_add2x), { y, add2.y }); + Fq minus_lambda_3 = Fq::msub_div( + { minus_lambda_dbl, minus_lambda_2 }, { x_minus_x_1, x_4_sub_x_1 }, (x_4 - to_add[1].x), { y, -(to_add[1].y) }); - x_4.assert_is_not_equal(add2.x); + // X5 = L3.L3 - X4 - XB + const Fq x_5 = minus_lambda_3.sqradd({ -x_4, -to_add[1].x }); - const Fq x_5 = lambda_3.sqradd({ -x_4, -add2.x }); - const Fq x5_sub_x4 = x_5 - x_4; + if (to_add.size() == 2) { + // Y5 = L3.(XB - X5) - YB + const Fq y_5 = minus_lambda_3.madd(x_5 - to_add[1].x, { -to_add[1].y }); + return element(x_5, y_5); + } - const Fq half_minus_lambda_4_minus_lambda_3 = Fq::msub_div({ minus_lambda_2 }, { x_sub_x4 }, (x5_sub_x4), { y }); + Fq x_prev = x_5; + Fq minus_lambda_prev = minus_lambda_3; - const Fq minus_lambda_4_minus_lambda_3 = half_minus_lambda_4_minus_lambda_3 + half_minus_lambda_4_minus_lambda_3; - const Fq minus_lambda_4 = minus_lambda_4_minus_lambda_3 + lambda_3; - const Fq x_6 = minus_lambda_4.sqradd({ -x_4, -x_5 }); + for (size_t i = 2; i < to_add.size(); ++i) { - const Fq x6_sub_x4 = x_6 - x_4; + to_add[i].x.assert_is_not_equal(to_add[i - 1].x); + // Lambda = Yprev - Yadd[i] / Xprev - Xadd[i] + // = -Lprev.(Xprev - Xadd[i-1]) - Yadd[i - 1] - Yadd[i] / Xprev - Xadd[i] + const Fq minus_lambda = Fq::msub_div({ minus_lambda_prev }, + { to_add[i - 1].x - x_prev }, + (to_add[i].x - x_prev), + { to_add[i - 1].y, to_add[i].y }); + // X = Lambda * Lambda - Xprev - Xadd[i] + const Fq x_out = minus_lambda.sqradd({ -x_prev, -to_add[i].x }); - const Fq y_6 = Fq::dual_madd(minus_lambda_4, (x6_sub_x4), minus_lambda_2, x_sub_x4, { y }); + x_prev = x_out; + minus_lambda_prev = minus_lambda; + } + const Fq y_out = minus_lambda_prev.madd(x_prev - to_add[to_add.size() - 1].x, { -to_add[to_add.size() - 1].y }); - return element(x_6, y_6); + return element(x_prev, y_out); } /** - * If we chain two iterations of the montgomery ladder together, we can squeeze out a non-native field reduction + * @brief Perform repeated iterations of the montgomery ladder algorithm. * - **/ + * For points P, Q, montgomery ladder computes R = (P + Q) + P + * i.e. it's "double-and-add" without explicit doublings. + * + * This method can apply repeated iterations of the montgomery ladder. + * Each iteration reduces the number of field multiplications by 1, at the cost of more additions. + * (i.e. we don't compute intermediate y-coordinates). + * + * The number of additions scales with the size of the input vector. The optimal input size appears to be 4. + * + * @tparam C + * @tparam Fq + * @tparam Fr + * @tparam G + * @param add + * @return element + */ template -element element::double_montgomery_ladder(const chain_add_accumulator& add1, - const chain_add_accumulator& add2) const +element element::multiple_montgomery_ladder( + const std::vector& add) const { - if ((add1.is_element) || (add2.is_element)) { - throw_or_abort("An accumulator expected"); - } - add1.x3_prev.assert_is_not_equal(x); - // add1.y = lambda_prev * (x1_prev - x3_prev) - y1_prev - Fq lambda_1 = Fq::msub_div( - { add1.lambda_prev }, { (add1.x1_prev - add1.x3_prev) }, (x - add1.x3_prev), { -add1.y1_prev, -y }); - - const Fq x_3 = lambda_1.sqradd({ -add1.x3_prev, -x }); - - const Fq minus_lambda_2 = - lambda_1 + Fq::div_without_denominator_check({ y + y }, (x_3 - x)); // (y + y) / (x_3 - x); - - const Fq x_4 = minus_lambda_2.sqradd({ -x, -x_3 }); - - // We can avoid computing y_4, instead substituting the expression `minus_lambda_2 * (x_4 - x) - y` where needed. - // This is cheaper, because we can evaluate two field multiplications (or a field multiplication + a field division) - // with only one non-native field reduction. - // E.g. evaluating (a * b) + (c * d) = e mod p only requires 1 quotient and remainder, which is the major cost - // of a non-native field multiplication - const Fq x_sub_x4 = x - x_4; - - const Fq x4_sub_add2x = x_4 - add2.x3_prev; - - const Fq lambda_3 = Fq::msub_div({ minus_lambda_2, add2.lambda_prev }, - { (x_sub_x4), (add2.x1_prev - add2.x3_prev) }, - (x4_sub_add2x), - { y, -add2.y1_prev }); - - x_4.assert_is_not_equal(add2.x3_prev); - - const Fq x_5 = lambda_3.sqradd({ -x_4, -add2.x3_prev }); - const Fq x5_sub_x4 = x_5 - x_4; - - const Fq half_minus_lambda_4_minus_lambda_3 = Fq::msub_div({ minus_lambda_2 }, { x_sub_x4 }, (x5_sub_x4), { y }); + struct composite_y { + std::vector mul_left; + std::vector mul_right; + std::vector add; + bool is_negative = false; + }; + + Fq previous_x = x; + composite_y previous_y{ std::vector(), std::vector(), std::vector(), false }; + for (size_t i = 0; i < add.size(); ++i) { + previous_x.assert_is_not_equal(add[i].x3_prev); + + // composite_y add_y; + bool negate_add_y = (i > 0) && !previous_y.is_negative; + std::vector lambda1_left; + std::vector lambda1_right; + std::vector lambda1_add; + + if (i == 0) { + lambda1_add.emplace_back(-y); + } else { + lambda1_left = previous_y.mul_left; + lambda1_right = previous_y.mul_right; + lambda1_add = previous_y.add; + } - const Fq minus_lambda_4_minus_lambda_3 = half_minus_lambda_4_minus_lambda_3 + half_minus_lambda_4_minus_lambda_3; - const Fq minus_lambda_4 = minus_lambda_4_minus_lambda_3 + lambda_3; - const Fq x_6 = minus_lambda_4.sqradd({ -x_4, -x_5 }); + if (!add[i].is_element) { + lambda1_left.emplace_back(add[i].lambda_prev); + lambda1_right.emplace_back(negate_add_y ? add[i].x3_prev - add[i].x1_prev + : add[i].x1_prev - add[i].x3_prev); + lambda1_add.emplace_back(negate_add_y ? add[i].y1_prev : -add[i].y1_prev); + } else if (i > 0) { + lambda1_add.emplace_back(negate_add_y ? -add[i].y3_prev : add[i].y3_prev); + } + // if previous_y is negated then add stays positive + // if previous_y is positive then add stays negated + // | add.y is negated | previous_y is negated | output of msub_div is -lambda | + // | --- | --- | --- | + // | no | yes | yes | + // | yes | no | no | + + Fq lambda1; + if (!add[i].is_element || i > 0) { + bool flip_lambda1_denominator = !negate_add_y; + Fq denominator = flip_lambda1_denominator ? previous_x - add[i].x3_prev : add[i].x3_prev - previous_x; + lambda1 = Fq::msub_div(lambda1_left, lambda1_right, denominator, lambda1_add); + } else { + lambda1 = Fq::div_without_denominator_check({ add[i].y3_prev - y }, (add[i].x3_prev - x)); + } - const Fq x6_sub_x4 = x_6 - x_4; + Fq x_3 = lambda1.madd(lambda1, { -add[i].x3_prev, -previous_x }); - const Fq y_6 = Fq::dual_madd(minus_lambda_4, (x6_sub_x4), minus_lambda_2, x_sub_x4, { y }); + // We can avoid computing y_4, instead substituting the expression `minus_lambda_2 * (x_4 - x) - y` where + // needed. This is cheaper, because we can evaluate two field multiplications (or a field multiplication + a + // field division) with only one non-native field reduction. E.g. evaluating (a * b) + (c * d) = e mod p only + // requires 1 quotient and remainder, which is the major cost of a non-native field multiplication + Fq lambda2; + if (i == 0) { + lambda2 = Fq::div_without_denominator_check({ y + y }, (previous_x - x_3)) - lambda1; + } else { + Fq l2_denominator = previous_y.is_negative ? previous_x - x_3 : x_3 - previous_x; + Fq partial_lambda2 = + Fq::msub_div(previous_y.mul_left, previous_y.mul_right, l2_denominator, previous_y.add); + partial_lambda2 = partial_lambda2 + partial_lambda2; + lambda2 = partial_lambda2 - lambda1; + } - return element(x_6, y_6); -} -/** - * If we chain two iterations of the montgomery ladder together, we can squeeze out a non-native field reduction - **/ -template -element element::double_into_montgomery_ladder(const element& add1) const -{ - const Fq two_x = x + x; - Fq x_1; - Fq minus_lambda_dbl; - if constexpr (G::has_a) { - Fq a(get_context(), uint256_t(G::curve_a)); - minus_lambda_dbl = Fq::msub_div({ x }, { (two_x + x) }, (y + y), { a }); - x_1 = minus_lambda_dbl.sqradd({ -(two_x) }); - } else { - minus_lambda_dbl = Fq::msub_div({ x }, { (two_x + x) }, (y + y), {}); - x_1 = minus_lambda_dbl.sqradd({ -(two_x) }); + Fq x_4 = lambda2.sqradd({ -x_3, -previous_x }); + composite_y y_4; + if (i == 0) { + // We want to make sure that at the final iteration, `y_previous.is_negative = false` + // Each iteration flips the sign of y_previous.is_negative. + // i.e. whether we store y_4 or -y_4 depends on the number of points we have + bool num_points_even = ((add.size() & 0x01UL) == 0); + y_4.add.emplace_back(num_points_even ? y : -y); + y_4.mul_left.emplace_back(lambda2); + y_4.mul_right.emplace_back(num_points_even ? x_4 - previous_x : previous_x - x_4); + y_4.is_negative = num_points_even; + } else { + y_4.is_negative = !previous_y.is_negative; + y_4.mul_left.emplace_back(lambda2); + y_4.mul_right.emplace_back(previous_y.is_negative ? previous_x - x_4 : x_4 - previous_x); + // append terms in previous_y to y_4. We want to make sure the terms above are added into the start of y_4. + // This is to ensure they are cached correctly when + // `composer::evaluate_partial_non_native_field_multiplication` is called. + // (the 1st mul_left, mul_right elements will trigger composer::evaluate_non_native_field_multiplication + // when Fq::mult_madd is called - this term cannot be cached so we want to make sure it is unique) + std::copy(previous_y.mul_left.begin(), previous_y.mul_left.end(), std::back_inserter(y_4.mul_left)); + std::copy(previous_y.mul_right.begin(), previous_y.mul_right.end(), std::back_inserter(y_4.mul_right)); + std::copy(previous_y.add.begin(), previous_y.add.end(), std::back_inserter(y_4.add)); + } + previous_x = x_4; + previous_y = y_4; } + Fq x_out = previous_x; - add1.x.assert_is_not_equal(x_1); - - const Fq x_minus_x_1 = x - x_1; - const Fq lambda_1 = Fq::msub_div({ minus_lambda_dbl }, { x_minus_x_1 }, (x_1 - add1.x), { add1.y, y }); - - const Fq x_3 = lambda_1.sqradd({ -add1.x, -x_1 }); - const Fq half_minus_lambda_2_minus_lambda_1 = - Fq::msub_div({ minus_lambda_dbl }, { x_minus_x_1 }, (x_3 - x_1), { y }); - const Fq minus_lambda_2_minus_lambda_1 = half_minus_lambda_2_minus_lambda_1 + half_minus_lambda_2_minus_lambda_1; - const Fq minus_lambda_2 = minus_lambda_2_minus_lambda_1 + lambda_1; - - const Fq x_4 = minus_lambda_2.sqradd({ -x_1, -x_3 }); - - const Fq y_4 = Fq::dual_madd(minus_lambda_2, (x_4 - x_1), minus_lambda_dbl, x_minus_x_1, { y }); + ASSERT(!previous_y.is_negative); - return element(x_4, y_4); + Fq y_out = Fq::mult_madd(previous_y.mul_left, previous_y.mul_right, previous_y.add); + return element(x_out, y_out); } /** @@ -551,6 +601,7 @@ element element::batch_mul(const std::vector& scalars, const size_t max_num_bits) { + const size_t num_points = points.size(); ASSERT(scalars.size() == num_points); batch_lookup_table point_table(points); @@ -563,38 +614,25 @@ element element::batch_mul(const std::vector> nafs; - for (size_t j = 0; j < num_points; ++j) { - nafs.emplace_back(naf_entries[j][i * 2 - 1]); - } - element::chain_add_accumulator add_1 = point_table.get_chain_add_accumulator(nafs); - for (size_t j = 0; j < num_points; ++j) { - nafs[j] = (naf_entries[j][i * 2]); - } - element::chain_add_accumulator add_2 = point_table.get_chain_add_accumulator(nafs); - if (!add_1.is_element) { - accumulator = accumulator.double_montgomery_ladder(add_1, add_2); - } else { - accumulator = accumulator.double_montgomery_ladder(element(add_1.x3_prev, add_1.y3_prev), - element(add_2.x3_prev, add_2.y3_prev)); + constexpr size_t num_rounds_per_iteration = 4; + size_t num_iterations = num_rounds / num_rounds_per_iteration; + num_iterations += ((num_iterations * num_rounds_per_iteration) == num_rounds) ? 0 : 1; + const size_t num_rounds_per_final_iteration = (num_rounds - 1) - ((num_iterations - 1) * num_rounds_per_iteration); + for (size_t i = 0; i < num_iterations; ++i) { + + std::vector> nafs(num_points); + std::vector to_add; + const size_t inner_num_rounds = + (i != num_iterations - 1) ? num_rounds_per_iteration : num_rounds_per_final_iteration; + for (size_t j = 0; j < inner_num_rounds; ++j) { + for (size_t k = 0; k < num_points; ++k) { + nafs[k] = (naf_entries[k][i * num_rounds_per_iteration + j + 1]); + } + to_add.emplace_back(point_table.get_chain_add_accumulator(nafs)); } + accumulator = accumulator.multiple_montgomery_ladder(to_add); } - if ((num_rounds & 0x01ULL) == 0x00ULL) { - std::vector> nafs; - for (size_t j = 0; j < points.size(); ++j) { - nafs.emplace_back(naf_entries[j][num_rounds - 1]); - } - element::chain_add_accumulator add_1 = point_table.get_chain_add_accumulator(nafs); - if (add_1.is_element) { - element temp(add_1.x3_prev, add_1.y3_prev); - accumulator = accumulator.montgomery_ladder(temp); - } else { - accumulator = accumulator.montgomery_ladder(add_1); - } - } - for (size_t i = 0; i < num_points; ++i) { element skew = accumulator - points[i]; Fq out_x = accumulator.x.conditional_select(skew.x, naf_entries[i][num_rounds]); diff --git a/cpp/src/barretenberg/stdlib/primitives/biggroup/biggroup_nafs.hpp b/cpp/src/barretenberg/stdlib/primitives/biggroup/biggroup_nafs.hpp index 63257d5687..49e43ba6e9 100644 --- a/cpp/src/barretenberg/stdlib/primitives/biggroup/biggroup_nafs.hpp +++ b/cpp/src/barretenberg/stdlib/primitives/biggroup/biggroup_nafs.hpp @@ -243,9 +243,15 @@ typename element::secp256k1_wnaf_pair element::compu // Compute and constrain skews field_t negative_skew = witness_t(ctx, is_negative ? 0 : skew); field_t positive_skew = witness_t(ctx, is_negative ? skew : 0); - negative_skew.create_range_constraint(1); - positive_skew.create_range_constraint(1); - (negative_skew + positive_skew).create_range_constraint(1); + if constexpr (C::type == ComposerType::PLOOKUP) { + ctx->create_new_range_constraint(negative_skew.witness_index, 1, "biggroup_nafs"); + ctx->create_new_range_constraint(positive_skew.witness_index, 1, "biggroup_nafs"); + ctx->create_new_range_constraint((negative_skew + positive_skew).witness_index, 1, "biggroup_nafs"); + } else { + ctx->create_range_constraint(negative_skew.witness_index, 1, "biggroup_nafs"); + ctx->create_range_constraint(positive_skew.witness_index, 1, "biggroup_nafs"); + ctx->create_range_constraint((negative_skew + positive_skew).witness_index, 1, "biggroup_nafs"); + } const auto reconstruct_bigfield_from_wnaf = [ctx](const std::vector>& wnaf, const field_t& positive_skew, @@ -378,14 +384,21 @@ std::vector> element::compute_wnaf(const Fr& scalar) offset_entry = (1ULL << (WNAF_SIZE - 1)) - 1 - (wnaf_values[i] & 0xffffff); } field_t entry(witness_t(ctx, offset_entry)); - - entry.create_range_constraint(WNAF_SIZE); + if constexpr (C::type == ComposerType::PLOOKUP) { + ctx->create_new_range_constraint(entry.witness_index, 1ULL << (WNAF_SIZE), "biggroup_nafs"); + } else { + ctx->create_range_constraint(entry.witness_index, WNAF_SIZE, "biggroup_nafs"); + } wnaf_entries.emplace_back(entry); } // add skew wnaf_entries.emplace_back(witness_t(ctx, skew)); - wnaf_entries[wnaf_entries.size() - 1].create_range_constraint(1); + if constexpr (C::type == ComposerType::PLOOKUP) { + ctx->create_new_range_constraint(wnaf_entries[wnaf_entries.size() - 1].witness_index, 1, "biggroup_nafs"); + } else { + ctx->create_range_constraint(wnaf_entries[wnaf_entries.size() - 1].witness_index, 1, "biggroup_nafs"); + } // TODO: VALIDATE SUM DOES NOT OVERFLOW P @@ -494,15 +507,25 @@ std::vector> element::compute_naf(const Fr& scalar, cons bit.context = ctx; bit.witness_index = witness_t(ctx, true).witness_index; // flip sign bit.witness_bool = true; - ctx->create_range_constraint( - bit.witness_index, 1, "biggroup_nafs: compute_naf extracted too many bits in non-next_entry case"); + if constexpr (C::type == ComposerType::PLOOKUP) { + ctx->create_new_range_constraint( + bit.witness_index, 1, "biggroup_nafs: compute_naf extracted too many bits in non-next_entry case"); + } else { + ctx->create_range_constraint( + bit.witness_index, 1, "biggroup_nafs: compute_naf extracted too many bits in non-next_entry case"); + } naf_entries[num_rounds - i - 1] = bit; } else { bool_t bit(ctx, false); bit.witness_index = witness_t(ctx, false).witness_index; // don't flip sign bit.witness_bool = false; - ctx->create_range_constraint( - bit.witness_index, 1, "biggroup_nafs: compute_naf extracted too many bits in next_entry case"); + if constexpr (C::type == ComposerType::PLOOKUP) { + ctx->create_new_range_constraint( + bit.witness_index, 1, "biggroup_nafs: compute_naf extracted too many bits in next_entry case"); + } else { + ctx->create_range_constraint( + bit.witness_index, 1, "biggroup_nafs: compute_naf extracted too many bits in next_entry case"); + } naf_entries[num_rounds - i - 1] = bit; } } diff --git a/cpp/src/barretenberg/stdlib/primitives/biggroup/biggroup_secp256k1.hpp b/cpp/src/barretenberg/stdlib/primitives/biggroup/biggroup_secp256k1.hpp index 7d510e2794..756f955a7a 100644 --- a/cpp/src/barretenberg/stdlib/primitives/biggroup/biggroup_secp256k1.hpp +++ b/cpp/src/barretenberg/stdlib/primitives/biggroup/biggroup_secp256k1.hpp @@ -86,15 +86,18 @@ element element::secp256k1_ecdsa_mul(const element& // See `stdlib/memory/rom_table.hpp` for how indirect array accesses are implemented in UltraPlonk const auto& add_1 = endoP2_table[u2_hi_wnaf.wnaf[2 * i]]; const auto& add_2 = P2_table[u2_lo_wnaf.wnaf[2 * i + 1]]; - accumulator = accumulator.double_montgomery_ladder(add_1, add_2); - const auto& add_3 = endoP1_table[u1_hi_wnaf.wnaf[i]]; const auto& add_4 = P1_table[u1_lo_wnaf.wnaf[i]]; - accumulator = accumulator.double_montgomery_ladder(add_3, add_4); - const auto& add_5 = endoP2_table[u2_hi_wnaf.wnaf[2 * i + 1]]; const auto& add_6 = P2_table[u2_lo_wnaf.wnaf[2 * i + 2]]; - accumulator = accumulator.double_montgomery_ladder(add_5, add_6); + + accumulator = accumulator.multiple_montgomery_ladder({ element::chain_add_accumulator(add_1), + element::chain_add_accumulator(add_2), + element::chain_add_accumulator(add_3) }); + + accumulator = accumulator.multiple_montgomery_ladder({ element::chain_add_accumulator(add_4), + element::chain_add_accumulator(add_5), + element::chain_add_accumulator(add_6) }); } /** diff --git a/cpp/src/barretenberg/stdlib/primitives/biggroup/biggroup_tables.hpp b/cpp/src/barretenberg/stdlib/primitives/biggroup/biggroup_tables.hpp index 9098799a7b..7f31017f67 100644 --- a/cpp/src/barretenberg/stdlib/primitives/biggroup/biggroup_tables.hpp +++ b/cpp/src/barretenberg/stdlib/primitives/biggroup/biggroup_tables.hpp @@ -4,10 +4,27 @@ namespace proof_system::plonk { namespace stdlib { using plookup::MultiTableId; + +/** + * @brief Constructs a ROM table to look up linear combinations of group elements + * + * @tparam C + * @tparam Fq + * @tparam Fr + * @tparam G + * @tparam num_elements + * @tparam typename + * @param rom_data the ROM table we are writing into + * @param limb_max the maximum size of each limb in the ROM table. + * + * @details When reading a group element *out* of the ROM table, we must know the maximum value of each coordinate's + * limbs. We take this value to be the maximum of the maximum values of the input limbs into the table! + * @return std::array, 5> + */ template template std::array, 5> element::create_group_element_rom_tables( - const std::array& rom_data) + const std::array& rom_data, std::array& limb_max) { std::vector, 2>> x_lo_limbs; std::vector, 2>> x_hi_limbs; @@ -16,6 +33,15 @@ std::array, 5> element::create_group_element_rom std::vector, 2>> prime_limbs; for (size_t i = 0; i < num_elements; ++i) { + limb_max[0] = std::max(limb_max[0], rom_data[i].x.binary_basis_limbs[0].maximum_value); + limb_max[1] = std::max(limb_max[1], rom_data[i].x.binary_basis_limbs[1].maximum_value); + limb_max[2] = std::max(limb_max[2], rom_data[i].x.binary_basis_limbs[2].maximum_value); + limb_max[3] = std::max(limb_max[3], rom_data[i].x.binary_basis_limbs[3].maximum_value); + limb_max[4] = std::max(limb_max[4], rom_data[i].y.binary_basis_limbs[0].maximum_value); + limb_max[5] = std::max(limb_max[5], rom_data[i].y.binary_basis_limbs[1].maximum_value); + limb_max[6] = std::max(limb_max[6], rom_data[i].y.binary_basis_limbs[2].maximum_value); + limb_max[7] = std::max(limb_max[7], rom_data[i].y.binary_basis_limbs[3].maximum_value); + x_lo_limbs.emplace_back(std::array, 2>{ rom_data[i].x.binary_basis_limbs[0].element, rom_data[i].x.binary_basis_limbs[1].element }); x_hi_limbs.emplace_back(std::array, 2>{ rom_data[i].x.binary_basis_limbs[2].element, @@ -39,7 +65,7 @@ std::array, 5> element::create_group_element_rom template template element element::read_group_element_rom_tables( - const std::array, 5>& tables, const field_t& index) + const std::array, 5>& tables, const field_t& index, const std::array& limb_max) { const auto xlo = tables[0][index]; const auto xhi = tables[1][index]; @@ -49,6 +75,15 @@ element element::read_group_element_rom_tables( Fq x_fq(xlo[0], xlo[1], xhi[0], xhi[1], xyprime[0]); Fq y_fq(ylo[0], ylo[1], yhi[0], yhi[1], xyprime[1]); + x_fq.binary_basis_limbs[0].maximum_value = limb_max[0]; + x_fq.binary_basis_limbs[1].maximum_value = limb_max[1]; + x_fq.binary_basis_limbs[2].maximum_value = limb_max[2]; + x_fq.binary_basis_limbs[3].maximum_value = limb_max[3]; + y_fq.binary_basis_limbs[0].maximum_value = limb_max[4]; + y_fq.binary_basis_limbs[1].maximum_value = limb_max[5]; + y_fq.binary_basis_limbs[2].maximum_value = limb_max[6]; + y_fq.binary_basis_limbs[3].maximum_value = limb_max[7]; + const auto output = element(x_fq, y_fq); return output; } @@ -64,17 +99,17 @@ element::four_bit_table_plookup::four_bit_table_plookup(const e element_table[i] = element_table[i - 1] + d2; } for (size_t i = 0; i < 8; ++i) { - element_table[i] = (-element_table[15 - i]).reduce(); + element_table[i] = (-element_table[15 - i]); } - coordinates = create_group_element_rom_tables<16>(element_table); + coordinates = create_group_element_rom_tables<16>(element_table, limb_max); } template template element element::four_bit_table_plookup::operator[](const field_t& index) const { - return read_group_element_rom_tables<16>(coordinates, index); + return read_group_element_rom_tables<16>(coordinates, index, limb_max); } template @@ -146,109 +181,134 @@ template element::lookup_table_plookup::lookup_table_plookup(const std::array& inputs) { if constexpr (length == 2) { - element_table[0] = inputs[1] + inputs[0]; - element_table[1] = inputs[1] - inputs[0]; + auto [A0, A1] = inputs[1].add_sub(inputs[0]); + element_table[0] = A0; + element_table[1] = A1; } else if constexpr (length == 3) { - element R0 = inputs[1] + inputs[0]; - element R1 = inputs[1] - inputs[0]; - element_table[0] = inputs[2] + R0; // C + B + A - element_table[1] = inputs[2] + R1; // C + B - A - element_table[2] = inputs[2] - R1; // C - B + A - element_table[3] = inputs[2] - R0; // C - B - A - } else if constexpr (length == 4) { - element T0 = inputs[1] + inputs[0]; - element T1 = inputs[1] - inputs[0]; - element T2 = inputs[3] + inputs[2]; - element T3 = inputs[3] - inputs[2]; + auto [R0, R1] = inputs[1].add_sub(inputs[0]); // B ± A - element_table[0] = T2 + T0; // D + C + B + A - element_table[1] = T2 + T1; // D + C + B - A - element_table[2] = T2 - T1; // D + C - B + A - element_table[3] = T2 - T0; // D + C - B - A - element_table[4] = T3 + T0; // D - C + B + A - element_table[5] = T3 + T1; // D - C + B - A - element_table[6] = T3 - T1; // D - C - B + A - element_table[7] = T3 - T0; // D - C - B - A - } else if constexpr (length == 5) { - element A0 = inputs[1] + inputs[0]; // B + A - element A1 = inputs[1] - inputs[0]; // B - A + auto [T0, T1] = inputs[2].add_sub(R0); // C ± (B + A) + auto [T2, T3] = inputs[2].add_sub(R1); // C ± (B - A) - element T2 = inputs[3] + inputs[2]; // D + C - element T3 = inputs[3] - inputs[2]; // D - C - - element E0 = inputs[4] + T2; // E + D + C // 0 0 0 - element E1 = inputs[4] + T3; // E + D - C // 0 0 1 - element E2 = inputs[4] - T3; // E - D + C // 0 1 0 - element E3 = inputs[4] - T2; // E - D - C // 0 1 1 - - element_table[0] = E0 + A0; // E + D + C + B + A // 0 0 0 0 0 - element_table[1] = E0 + A1; // E + D + C + B - A // 0 0 0 0 1 - element_table[2] = E0 - A1; // E + D + C - B + A // 0 0 0 1 0 - element_table[3] = E0 - A0; // E + D + C - B - A // 0 0 0 1 1 - element_table[4] = E1 + A0; // E + D - C + B + A // 0 0 1 0 0 - element_table[5] = E1 + A1; // E + D - C + B - A // 0 0 1 0 1 - element_table[6] = E1 - A1; // E + D - C - B + A // 0 0 1 1 0 - element_table[7] = E1 - A0; // E + D - C - B - A // 0 0 1 1 1 - element_table[8] = E2 + A0; // E - D + C + B + A // 0 1 0 0 0 - element_table[9] = E2 + A1; // E - D + C + B - A // 0 1 0 0 1 - element_table[10] = E2 - A1; // E - D + C - B + A // 0 1 0 1 0 - element_table[11] = E2 - A0; // E - D - C - B - A // 0 1 0 1 1 - element_table[12] = E3 + A0; // E - D - C + B + A // 0 1 1 0 0 - element_table[13] = E3 + A1; // E - D - C + B - A // 0 1 1 0 1 - element_table[14] = E3 - A1; // E - D - C - B + A // 0 1 1 1 0 - element_table[15] = E3 - A0; // E - D - C - B - A // 0 1 1 1 1 + element_table[0] = T0; + element_table[1] = T2; + element_table[2] = T3; + element_table[3] = T1; + } else if constexpr (length == 4) { + auto [T0, T1] = inputs[1].add_sub(inputs[0]); // B ± A + auto [T2, T3] = inputs[3].add_sub(inputs[2]); // D ± C + + auto [F0, F3] = T2.add_sub(T0); // (D + C) ± (B + A) + auto [F1, F2] = T2.add_sub(T1); // (D + C) ± (B - A) + auto [F4, F7] = T3.add_sub(T0); // (D - C) ± (B + A) + auto [F5, F6] = T3.add_sub(T1); // (D - C) ± (B - A) + + element_table[0] = F0; + element_table[1] = F1; + element_table[2] = F2; + element_table[3] = F3; + element_table[4] = F4; + element_table[5] = F5; + element_table[6] = F6; + element_table[7] = F7; + } else if constexpr (length == 5) { + auto [A0, A1] = inputs[1].add_sub(inputs[0]); // B ± A + auto [T2, T3] = inputs[3].add_sub(inputs[2]); // D ± C + + auto [E0, E3] = inputs[4].add_sub(T2); // E ± (D + C) + auto [E1, E2] = inputs[4].add_sub(T3); // E ± (D - C) + + auto [F0, F3] = E0.add_sub(A0); + auto [F1, F2] = E0.add_sub(A1); + auto [F4, F7] = E1.add_sub(A0); + auto [F5, F6] = E1.add_sub(A1); + auto [F8, F11] = E2.add_sub(A0); + auto [F9, F10] = E2.add_sub(A1); + auto [F12, F15] = E3.add_sub(A0); + auto [F13, F14] = E3.add_sub(A1); + + element_table[0] = F0; + element_table[1] = F1; + element_table[2] = F2; + element_table[3] = F3; + element_table[4] = F4; + element_table[5] = F5; + element_table[6] = F6; + element_table[7] = F7; + element_table[8] = F8; + element_table[9] = F9; + element_table[10] = F10; + element_table[11] = F11; + element_table[12] = F12; + element_table[13] = F13; + element_table[14] = F14; + element_table[15] = F15; } else if constexpr (length == 6) { // 44 adds! Only use this if it saves us adding another table to a multi-scalar-multiplication - element A0 = inputs[1] + inputs[0]; // B + A - element A1 = inputs[1] - inputs[0]; // B - A - element E0 = inputs[4] + inputs[3]; // E + D - element E1 = inputs[4] - inputs[3]; // E - D - - element C0 = inputs[2] + A0; // C + B + A - element C1 = inputs[2] + A1; // C + B - A - element C2 = inputs[2] - A1; // C - B + A - element C3 = inputs[2] - A0; // C - B - A - - element F0 = inputs[5] + E0; // F + E + D - element F1 = inputs[5] + E1; // F + E - D - element F2 = inputs[5] - E1; // F - E + D - element F3 = inputs[5] - E0; // F - E - E - - element_table[0] = F0 + C0; - element_table[1] = F0 + C1; - element_table[2] = F0 + C2; - element_table[3] = F0 + C3; - element_table[4] = F0 - C3; - element_table[5] = F0 - C2; - element_table[6] = F0 - C1; - element_table[7] = F0 - C0; - - element_table[8] = F1 + C0; - element_table[9] = F1 + C1; - element_table[10] = F1 + C2; - element_table[11] = F1 + C3; - element_table[12] = F1 - C3; - element_table[13] = F1 - C2; - element_table[14] = F1 - C1; - element_table[15] = F1 - C0; - - element_table[16] = F2 + C0; - element_table[17] = F2 + C1; - element_table[18] = F2 + C2; - element_table[19] = F2 + C3; - element_table[20] = F2 - C3; - element_table[21] = F2 - C2; - element_table[22] = F2 - C1; - element_table[23] = F2 - C0; - - element_table[24] = F3 + C0; - element_table[25] = F3 + C1; - element_table[26] = F3 + C2; - element_table[27] = F3 + C3; - element_table[28] = F3 - C3; - element_table[29] = F3 - C2; - element_table[30] = F3 - C1; - element_table[31] = F3 - C0; + + auto [A0, A1] = inputs[1].add_sub(inputs[0]); + auto [E0, E1] = inputs[4].add_sub(inputs[3]); + auto [C0, C3] = inputs[2].add_sub(A0); + auto [C1, C2] = inputs[2].add_sub(A1); + + auto [F0, F3] = inputs[5].add_sub(E0); + auto [F1, F2] = inputs[5].add_sub(E1); + + auto [R0, R7] = F0.add_sub(C0); + auto [R1, R6] = F0.add_sub(C1); + auto [R2, R5] = F0.add_sub(C2); + auto [R3, R4] = F0.add_sub(C3); + + auto [S0, S7] = F1.add_sub(C0); + auto [S1, S6] = F1.add_sub(C1); + auto [S2, S5] = F1.add_sub(C2); + auto [S3, S4] = F1.add_sub(C3); + + auto [U0, U7] = F2.add_sub(C0); + auto [U1, U6] = F2.add_sub(C1); + auto [U2, U5] = F2.add_sub(C2); + auto [U3, U4] = F2.add_sub(C3); + + auto [W0, W7] = F3.add_sub(C0); + auto [W1, W6] = F3.add_sub(C1); + auto [W2, W5] = F3.add_sub(C2); + auto [W3, W4] = F3.add_sub(C3); + + element_table[0] = R0; + element_table[1] = R1; + element_table[2] = R2; + element_table[3] = R3; + element_table[4] = R4; + element_table[5] = R5; + element_table[6] = R6; + element_table[7] = R7; + + element_table[8] = S0; + element_table[9] = S1; + element_table[10] = S2; + element_table[11] = S3; + element_table[12] = S4; + element_table[13] = S5; + element_table[14] = S6; + element_table[15] = S7; + + element_table[16] = U0; + element_table[17] = U1; + element_table[18] = U2; + element_table[19] = U3; + element_table[20] = U4; + element_table[21] = U5; + element_table[22] = U6; + element_table[23] = U7; + + element_table[24] = W0; + element_table[25] = W1; + element_table[26] = W2; + element_table[27] = W3; + element_table[28] = W4; + element_table[29] = W5; + element_table[30] = W6; + element_table[31] = W7; } else if constexpr (length == 7) { // 82 adds! This one is not worth using... @@ -341,9 +401,9 @@ element::lookup_table_plookup::lookup_table_plookup(con element_table[63] = G3 - E0; } for (size_t i = 0; i < table_size / 2; ++i) { - element_table[i + table_size / 2] = (-element_table[table_size / 2 - 1 - i]).reduce(); + element_table[i + table_size / 2] = (-element_table[table_size / 2 - 1 - i]); } - coordinates = create_group_element_rom_tables(element_table); + coordinates = create_group_element_rom_tables(element_table, limb_max); } template @@ -356,7 +416,7 @@ element element::lookup_table_plookup::ge accumulators.emplace_back(field_t(bits[i]) * (1ULL << i)); } field_t index = field_t::accumulate(accumulators); - return read_group_element_rom_tables(coordinates, index); + return read_group_element_rom_tables(coordinates, index, limb_max); } /** diff --git a/cpp/src/barretenberg/stdlib/recursion/aggregation_state/aggregation_state.hpp b/cpp/src/barretenberg/stdlib/recursion/aggregation_state/aggregation_state.hpp index 64d7e69fc8..005ce5aec2 100644 --- a/cpp/src/barretenberg/stdlib/recursion/aggregation_state/aggregation_state.hpp +++ b/cpp/src/barretenberg/stdlib/recursion/aggregation_state/aggregation_state.hpp @@ -28,9 +28,45 @@ template struct aggregation_state { // has_data == other.has_data; can't compare as native }; + /** + * @brief TODO(@dbanks12 please migrate A3 circuits to using `assign_object_to_proof_outputs`. Much safer to not + * independently track `proof_witness_indices` and whether object has been assigned to public inputs) + * + */ void add_proof_outputs_as_public_inputs() { - ASSERT(proof_witness_indices.size() > 0); + auto* context = P0.get_context(); + context->add_recursive_proof(proof_witness_indices); + } + + void assign_object_to_proof_outputs() + { + if (proof_witness_indices.size() == 0) { + std::cerr << "warning. calling `add_proof_outputs_as_public_inputs`, but aggregation object already has " + "assigned proof outputs to public inputs."; + return; + } + + P0 = P0.reduce(); + P1 = P1.reduce(); + proof_witness_indices = { + P0.x.binary_basis_limbs[0].element.normalize().witness_index, + P0.x.binary_basis_limbs[1].element.normalize().witness_index, + P0.x.binary_basis_limbs[2].element.normalize().witness_index, + P0.x.binary_basis_limbs[3].element.normalize().witness_index, + P0.y.binary_basis_limbs[0].element.normalize().witness_index, + P0.y.binary_basis_limbs[1].element.normalize().witness_index, + P0.y.binary_basis_limbs[2].element.normalize().witness_index, + P0.y.binary_basis_limbs[3].element.normalize().witness_index, + P1.x.binary_basis_limbs[0].element.normalize().witness_index, + P1.x.binary_basis_limbs[1].element.normalize().witness_index, + P1.x.binary_basis_limbs[2].element.normalize().witness_index, + P1.x.binary_basis_limbs[3].element.normalize().witness_index, + P1.y.binary_basis_limbs[0].element.normalize().witness_index, + P1.y.binary_basis_limbs[1].element.normalize().witness_index, + P1.y.binary_basis_limbs[2].element.normalize().witness_index, + P1.y.binary_basis_limbs[3].element.normalize().witness_index, + }; auto* context = P0.get_context(); diff --git a/cpp/src/barretenberg/stdlib/recursion/transcript/transcript.hpp b/cpp/src/barretenberg/stdlib/recursion/transcript/transcript.hpp index 5ca0439d9c..9db2f12dc9 100644 --- a/cpp/src/barretenberg/stdlib/recursion/transcript/transcript.hpp +++ b/cpp/src/barretenberg/stdlib/recursion/transcript/transcript.hpp @@ -12,10 +12,11 @@ #include "../../commitment/pedersen/pedersen_plookup.hpp" #include "../../primitives/bigfield/bigfield.hpp" #include "../../primitives/biggroup/biggroup.hpp" -#include "../../primitives/bool/bool.hpp" #include "../../primitives/field/field.hpp" #include "../../primitives/witness/witness.hpp" +#include "../../primitives/bool/bool.hpp" +#include "../verification_key//verification_key.hpp" namespace proof_system::plonk { namespace stdlib { namespace recursion { @@ -133,196 +134,140 @@ template class Transcript { ++current_round; return; } - const size_t bytes_per_element = 31; - - // split element into 2 limbs and insert into element_buffer - // each entry in element_buffer is 31 bytes - const auto split = [&](field_pt& work_element, - std::vector& element_buffer, - const field_pt& element, - size_t& current_byte_counter, - const size_t num_bytes) { - uint256_t element_u256(element.get_value()); - size_t hi_bytes = bytes_per_element - current_byte_counter; - if (hi_bytes >= num_bytes) { - // hmm - size_t new_byte_counter = current_byte_counter + num_bytes; - field_pt hi = element; - const size_t leftovers = bytes_per_element - new_byte_counter; - field_pt buffer_shift = - field_pt(context, barretenberg::fr(uint256_t(1) << ((uint64_t)leftovers * 8ULL))); - work_element = work_element + (hi * buffer_shift); - work_element = work_element.normalize(); - current_byte_counter = new_byte_counter; - if (current_byte_counter == bytes_per_element) { - current_byte_counter = 0; - element_buffer.push_back(work_element); - work_element = field_pt(context, barretenberg::fr(0)); - } - return; - } - const size_t lo_bytes = num_bytes - hi_bytes; - field_pt lo = witness_t(context, barretenberg::fr(element_u256.slice(0, lo_bytes * 8))); - field_pt hi = witness_t(context, barretenberg::fr(element_u256.slice(lo_bytes * 8, 256))); - lo.create_range_constraint(lo_bytes * 8); - hi.create_range_constraint(hi_bytes * 8); - field_pt shift(context, barretenberg::fr(uint256_t(1ULL) << (uint64_t)lo_bytes * 8ULL)); - field_pt sum = lo + (hi * shift); - if (!element.is_constant() || !sum.is_constant()) { - sum.assert_equal(element); - } - current_byte_counter = (current_byte_counter + num_bytes) % bytes_per_element; - - // if current_byte_counter == 0 we've rolled over - if (current_byte_counter == 0) { - element_buffer.push_back(work_element + hi); - element_buffer.push_back(lo); - work_element = field_pt(context, 0); - } else { - work_element = work_element + hi; - - element_buffer.push_back(work_element); - - field_t lo_shift( - context, barretenberg::fr(uint256_t(1ULL) << ((31ULL - (uint64_t)current_byte_counter) * 8ULL))); - work_element = (lo * lo_shift); - work_element = work_element.normalize(); - } - }; - - std::vector compression_buffer; field_pt working_element(context); - size_t byte_counter = 0; + // maximum number of bytes we can store in a field element w/o wrapping modulus is 31. + // while we could store more *bits*, we want `preimage_buffer` to mirror how data is formatted + // when we serialize field/group elements natively (i.e. a byte array) + static constexpr size_t NUM_BITS_PER_PREIMAGE_ELEMENT = 31UL * 8UL; + PedersenPreimageBuilder preimage_buffer(context); if (current_round > 0) { - split(working_element, compression_buffer, field_pt(current_challenge), byte_counter, 32); + preimage_buffer.add_element(current_challenge); } for (auto manifest_element : get_manifest().get_round_manifest(current_round).elements) { if (manifest_element.num_bytes == 32 && manifest_element.name != "public_inputs") { - split(working_element, - compression_buffer, - get_field_element(manifest_element.name), - byte_counter, - manifest_element.num_bytes); + preimage_buffer.add_element(get_field_element(manifest_element.name)); } else if (manifest_element.num_bytes == 64 && manifest_element.name != "public_inputs") { group_pt point = get_circuit_group_element(manifest_element.name); - field_pt y_hi = - point.y.binary_basis_limbs[2].element + (point.y.binary_basis_limbs[3].element * fq_pt::shift_1); - field_pt y_lo = - point.y.binary_basis_limbs[0].element + (point.y.binary_basis_limbs[1].element * fq_pt::shift_1); - field_pt x_hi = - point.x.binary_basis_limbs[2].element + (point.x.binary_basis_limbs[3].element * fq_pt::shift_1); - field_pt x_lo = - point.x.binary_basis_limbs[0].element + (point.x.binary_basis_limbs[1].element * fq_pt::shift_1); - const size_t lo_bytes = fq_pt::NUM_LIMB_BITS / 4; - const size_t hi_bytes = 32 - lo_bytes; - - split(working_element, compression_buffer, y_hi, byte_counter, hi_bytes); - split(working_element, compression_buffer, y_lo, byte_counter, lo_bytes); - split(working_element, compression_buffer, x_hi, byte_counter, hi_bytes); - split(working_element, compression_buffer, x_lo, byte_counter, lo_bytes); + // In our buffer, we want to represent each field element as occupying 256 bits of data (to match what + // the native transcript does) + const auto& x = point.x; + const auto& y = point.y; + constexpr size_t last_limb_bits = 256 - (fq_pt::NUM_LIMB_BITS * 3); + preimage_buffer.add_element_with_existing_range_constraint(y.binary_basis_limbs[3].element, + last_limb_bits); + preimage_buffer.add_element_with_existing_range_constraint(y.binary_basis_limbs[2].element, + fq_pt::NUM_LIMB_BITS); + preimage_buffer.add_element_with_existing_range_constraint(y.binary_basis_limbs[1].element, + fq_pt::NUM_LIMB_BITS); + preimage_buffer.add_element_with_existing_range_constraint(y.binary_basis_limbs[0].element, + fq_pt::NUM_LIMB_BITS); + preimage_buffer.add_element_with_existing_range_constraint(x.binary_basis_limbs[3].element, + last_limb_bits); + preimage_buffer.add_element_with_existing_range_constraint(x.binary_basis_limbs[2].element, + fq_pt::NUM_LIMB_BITS); + preimage_buffer.add_element_with_existing_range_constraint(x.binary_basis_limbs[1].element, + fq_pt::NUM_LIMB_BITS); + preimage_buffer.add_element_with_existing_range_constraint(x.binary_basis_limbs[0].element, + fq_pt::NUM_LIMB_BITS); + } else if (manifest_element.name == "public_inputs") { std::vector field_array = get_field_element_vector(manifest_element.name); for (size_t i = 0; i < field_array.size(); ++i) { - split(working_element, compression_buffer, field_array[i], byte_counter, 32); + preimage_buffer.add_element(field_array[i]); } } else if (manifest_element.num_bytes < 32 && manifest_element.name != "public_inputs") { - split(working_element, - compression_buffer, - get_field_element(manifest_element.name), - byte_counter, - manifest_element.num_bytes); + // TODO(zac): init round data is being grabbed out of the manifest and not the vkey + preimage_buffer.add_element_with_existing_range_constraint(get_field_element(manifest_element.name), + manifest_element.num_bytes * 8); } } - std::vector> round_challenges; + std::vector round_challenges_new; - if (byte_counter != 0) { - const uint256_t down_shift = uint256_t(1) << uint256_t((bytes_per_element - byte_counter) * 8); - working_element = working_element / barretenberg::fr(down_shift); - working_element = working_element.normalize(); + field_pt T0; + T0 = preimage_buffer.compress(0); - compression_buffer.push_back(working_element); - } + // helper method to slice a challenge into 128-bit slices + const auto slice_into_halves = [&](const field_pt& in, const size_t low_bits = 128) { + uint256_t v = in.get_value(); + uint256_t lo = v.slice(0, low_bits); + uint256_t hi = v.slice(low_bits, 256); - field_pt T0; - if constexpr (Composer::type == ComposerType::PLOOKUP) { - T0 = stdlib::pedersen_plookup_commitment::compress(compression_buffer); - } else { - T0 = stdlib::pedersen_commitment::compress(compression_buffer); - } - byte_array compressed_buffer(T0); + field_pt y_lo = field_pt::from_witness(context, lo); + field_pt y_hi = field_pt::from_witness(context, hi); - // TODO(@zac-williamson) make this a Poseidon hash - byte_array base_hash; - if constexpr (Composer::type == ComposerType::PLOOKUP) { - std::vector compression_buffer; - field_pt working_element(context); - size_t byte_counter = 0; - split(working_element, compression_buffer, field_pt(compressed_buffer), byte_counter, 32); - if (byte_counter != 0) { - const uint256_t down_shift = uint256_t(1) << uint256_t((bytes_per_element - byte_counter) * 8); - working_element = working_element / barretenberg::fr(down_shift); - working_element = working_element.normalize(); - compression_buffer.push_back(working_element); + y_lo.create_range_constraint(low_bits); + y_hi.create_range_constraint(254 - low_bits); + + in.add_two(-y_lo, -y_hi * (uint256_t(1) << low_bits)).assert_equal(0); + + // Validate the sum of our two halves does not exceed the circuit modulus over the integers + constexpr uint256_t modulus = fr::modulus; + const field_pt r_lo = field_pt(context, modulus.slice(0, low_bits)); + const field_pt r_hi = field_pt(context, modulus.slice(low_bits, 256)); + + bool need_borrow = (uint256_t(y_lo.get_value()) > uint256_t(r_lo.get_value())); + field_pt borrow = field_pt::from_witness(context, need_borrow); + + // directly call `create_new_range_constraint` to avoid creating an arithmetic gate + if constexpr (Composer::type == ComposerType::PLOOKUP) { + context->create_new_range_constraint(borrow.get_witness_index(), 1, "borrow"); + } else { + context->create_range_constraint(borrow.get_witness_index(), 1, "borrow"); } - base_hash = stdlib::pedersen_plookup_commitment::compress(compression_buffer); + + // Hi range check = r_hi - y_hi - borrow + // Lo range check = r_lo - y_lo + borrow * 2^{126} + field_pt res_hi = (r_hi - y_hi) - borrow; + field_pt res_lo = (r_lo - y_lo) + (borrow * (uint256_t(1) << low_bits)); + + res_hi.create_range_constraint(modulus.get_msb() + 1 - low_bits); + res_lo.create_range_constraint(low_bits); + + return std::array{ y_lo, y_hi }; + }; + + field_pt base_hash; + if constexpr (Composer::type == ComposerType::PLOOKUP) { + base_hash = stdlib::pedersen_plookup_commitment::compress(std::vector{ T0 }, 0); } else { - base_hash = stdlib::blake3s(compressed_buffer); + base_hash = stdlib::pedersen_commitment::compress(std::vector{ T0 }, 0); } - byte_array first(field_pt(0), 16); - first.write(base_hash.slice(0, 16)); - round_challenges.push_back(first); + auto hash_halves = slice_into_halves(base_hash); + round_challenges_new.push_back(hash_halves[1]); if (num_challenges > 1) { - byte_array second(field_pt(0), 16); - second.write(base_hash.slice(16, 16)); - round_challenges.push_back(second); + round_challenges_new.push_back(hash_halves[0]); } + base_hash = (slice_into_halves(base_hash, 8)[1] * 256).normalize(); - // This block of code only executes for num_challenges > 2, which (currently) only happens in the nu round when - // we need to generate short scalars. In this case, we generate 32-byte challenges and split them in half to get - // the relevant challenges. + // This block of code only executes for num_challenges > 2, which (currently) only happens in the nu round + // when we need to generate short scalars. In this case, we generate 32-byte challenges and split them in + // half to get the relevant challenges. for (size_t i = 2; i < num_challenges; i += 2) { - byte_array rolling_buffer = base_hash; - byte_array hash_output; + // TODO(@zac-williamson) make this a Poseidon hash not a Pedersen hash + field_pt hash_output; if constexpr (Composer::type == ComposerType::PLOOKUP) { - // TODO(@zac-williamson) make this a Poseidon hash not a Pedersen hash - std::vector compression_buffer; - field_pt working_element(context); - size_t byte_counter = 0; - split(working_element, compression_buffer, field_pt(rolling_buffer), byte_counter, 32); - split(working_element, compression_buffer, field_pt(field_pt(i / 2)), byte_counter, 1); - if (byte_counter != 0) { - const uint256_t down_shift = uint256_t(1) << uint256_t((bytes_per_element - byte_counter) * 8); - working_element = working_element / barretenberg::fr(down_shift); - working_element = working_element.normalize(); - compression_buffer.push_back(working_element); - } - hash_output = stdlib::pedersen_plookup_commitment::compress(compression_buffer); + hash_output = stdlib::pedersen_plookup_commitment::compress( + std::vector{ (base_hash + field_pt(i / 2)).normalize() }, 0); } else { - rolling_buffer.write(byte_array(field_pt(i / 2), 1)); - hash_output = stdlib::blake3s(rolling_buffer); + hash_output = stdlib::pedersen_commitment::compress( + std::vector{ (base_hash + field_pt(i / 2)).normalize() }, 0); } - byte_array hi(field_pt(0), 16); - hi.write(hash_output.slice(0, 16)); - round_challenges.push_back(hi); - + auto hash_halves = slice_into_halves(hash_output); + round_challenges_new.push_back(hash_halves[1]); if (i + 1 < num_challenges) { - byte_array lo(field_pt(0), 16); - lo.write(hash_output.slice(16, 16)); - round_challenges.push_back(lo); + round_challenges_new.push_back(hash_halves[0]); } } - - current_challenge = round_challenges[round_challenges.size() - 1]; + current_challenge = round_challenges_new[round_challenges_new.size() - 1]; ++current_round; - challenge_keys.push_back(challenge_name); std::vector challenge_elements; - for (const auto& challenge : round_challenges) { - challenge_elements.push_back(static_cast(challenge)); + for (const auto& challenge : round_challenges_new) { + challenge_elements.push_back(challenge); } challenge_values.push_back(challenge_elements); } @@ -420,7 +365,7 @@ template class Transcript { private: transcript::Transcript transcript_base; - byte_array current_challenge; + field_pt current_challenge; mutable std::vector field_vector_keys; mutable std::vector> field_vector_values; diff --git a/cpp/src/barretenberg/stdlib/recursion/verification_key/verification_key.hpp b/cpp/src/barretenberg/stdlib/recursion/verification_key/verification_key.hpp index 57b8a39207..915f63e5a3 100644 --- a/cpp/src/barretenberg/stdlib/recursion/verification_key/verification_key.hpp +++ b/cpp/src/barretenberg/stdlib/recursion/verification_key/verification_key.hpp @@ -20,10 +20,163 @@ #include "../../commitment/pedersen/pedersen_plookup.hpp" #include "../../primitives/curves/bn254.hpp" +#include "barretenberg/crypto/pedersen_commitment/convert_buffer_to_field.hpp" + namespace proof_system::plonk { namespace stdlib { namespace recursion { +/** + * @brief Constructs a packed buffer of field elements to be fed into a Pedersen compress function + * Goal is to concatenate multiple inputs together into a single field element if the inputs are known to be + * small. Produces a vector of field elements where the maximum number of bits per element is `bits_per_element`. + * + * @details When calling `pedersen::compress` on the final buffer, we can skip the range checks normally performed in + * the compress method, because we know the sums of the scalar slices cannot exceed the field modulus. This requires + * `bits_per_element < modulus bits` + * @tparam Composer + * @tparam bits_per_element + */ +template struct PedersenPreimageBuilder { + using field_pt = field_t; + using witness_pt = witness_t; + + Composer* context; + + PedersenPreimageBuilder(Composer* ctx = nullptr) + : context(ctx){}; + + field_pt compress(const size_t hash_index) + { + // we can only use relaxed range checks in pedersen::compress iff bits_per_element < modulus bits + static_assert(bits_per_element < uint256_t(barretenberg::fr::modulus).get_msb()); + + if (current_bit_counter != 0) { + const uint256_t down_shift = uint256_t(1) << uint256_t((bits_per_element - current_bit_counter)); + for (auto& x : work_element) { + x = x / barretenberg::fr(down_shift); + } + preimage_data.push_back(field_pt::accumulate(work_element)); + } + if constexpr (Composer::type == ComposerType::PLOOKUP) { + return pedersen_plookup_commitment::compress_with_relaxed_range_constraints(preimage_data, + hash_index); + } else { + return pedersen_commitment::compress(preimage_data, hash_index); + } + } + + /** + * @brief preimage_data is a bit-array where `bits_per_element` number of bits are packed into a single field + * element + */ + std::vector preimage_data; + + /** + * @brief work_element represents the leading element to be added into `preimage_data`. + * Vector is composed of field elements that represent bit chunks of a known length, + * such that the sum of the bit chunks < bits_per_element + */ + std::vector work_element; + + size_t current_bit_counter = 0; + + void add_element(const field_pt& element) { slice_element(element, 256); } + + void add_element_with_existing_range_constraint(const field_pt& element, const size_t num_bits) + { + slice_element(element, num_bits); + } + + /** + * @brief Populate `preimage_data` with element whose size is known to be `num_bits`. + * `preimage_data` is treated as a bit-array where `bits_per_element` number of bits are packed into a single field + * element. `slice_element` will: + * + * 1. determine how many bits are remaining in work_element + * 2. if remaining bits > num_bits, slice `element` into 2 chunks hi/lo + * 3. fill work_element with `hi` chunk (or the full element if possible) + * 4. (if work_element is full) combine work_element chunks into a field element and push onto `preimage_data` + * 4. (if required) create a new work_element and populate with `lo` + * + * @param element + * @param num_bits + */ + void slice_element(const field_pt& element, const size_t num_bits) + { + ASSERT(context != nullptr); + uint256_t element_u256(element.get_value()); + size_t hi_bits = bits_per_element - current_bit_counter; + if (hi_bits >= num_bits) { + // hmm + size_t new_bit_counter = current_bit_counter + num_bits; + field_pt hi = element; + const size_t leftovers = bits_per_element - new_bit_counter; + field_pt buffer_shift = field_pt(context, barretenberg::fr(uint256_t(1) << ((uint64_t)leftovers))); + work_element.emplace_back(hi * buffer_shift); + current_bit_counter = new_bit_counter; + if (current_bit_counter == bits_per_element) { + current_bit_counter = 0; + preimage_data.push_back(field_pt::accumulate(work_element)); + + work_element = std::vector(); + } + return; + } + const size_t lo_bits = num_bits - hi_bits; + field_pt lo = witness_t(context, barretenberg::fr(element_u256.slice(0, lo_bits))); + field_pt hi = witness_t(context, barretenberg::fr(element_u256.slice(lo_bits, 256))); + lo.create_range_constraint(lo_bits); + hi.create_range_constraint(hi_bits); + field_pt shift(context, barretenberg::fr(uint256_t(1ULL) << (uint64_t)lo_bits)); + if (!element.is_constant() || !lo.is_constant() || !hi.is_constant()) { + lo.add_two(hi * shift, -element).assert_equal(0); + } + + constexpr uint256_t modulus = barretenberg::fr::modulus; + constexpr size_t modulus_bits = modulus.get_msb(); + + // If our input is a full field element we must validate the sum of our slices is < p + if (num_bits >= modulus_bits) { + const field_pt r_lo = field_pt(context, modulus.slice(0, lo_bits)); + const field_pt r_hi = field_pt(context, modulus.slice(lo_bits, num_bits)); + + bool need_borrow = (uint256_t(lo.get_value()) > uint256_t(r_lo.get_value())); + field_pt borrow = field_pt::from_witness(context, need_borrow); + + // directly call `create_new_range_constraint` to avoid creating an arithmetic gate + if constexpr (Composer::type == ComposerType::PLOOKUP) { + context->create_new_range_constraint(borrow.get_witness_index(), 1, "borrow"); + } else { + context->create_range_constraint(borrow.get_witness_index(), 1, "borrow"); + } + // Hi range check = r_hi - y_hi - borrow + // Lo range check = r_lo - y_lo + borrow * 2^{126} + field_t res_hi = (r_hi - hi) - borrow; + field_t res_lo = (r_lo - lo) + (borrow * (uint256_t(1) << lo_bits)); + + res_hi.create_range_constraint(modulus_bits + 1 - lo_bits); + res_lo.create_range_constraint(lo_bits); + } + current_bit_counter = (current_bit_counter + num_bits) % bits_per_element; + + // if current_bit_counter == 0 we've rolled over + if (current_bit_counter == 0) { + work_element.emplace_back(hi); + preimage_data.push_back(field_pt::accumulate(work_element)); + preimage_data.push_back(lo); + work_element = std::vector(); + } else { + work_element.emplace_back(hi); + preimage_data.push_back(field_pt::accumulate(work_element)); + field_t lo_shift(context, + barretenberg::fr(uint256_t(1ULL) << ((bits_per_element - (uint64_t)current_bit_counter)))); + work_element = std::vector(); + work_element.emplace_back(lo * lo_shift); + } + }; +}; + template struct evaluation_domain { static evaluation_domain from_witness(Composer* ctx, const barretenberg::evaluation_domain& input) { @@ -51,44 +204,6 @@ template struct evaluation_domain { return domain; } - field_t compress() const - { - if constexpr (Composer::type == ComposerType::PLOOKUP) { - field_t out = pedersen_plookup_commitment::compress({ - root, - domain, - generator, - }); - return out; - } else { - field_t out = pedersen_commitment::compress({ - root, - domain, - generator, - }); - return out; - } - } - - static barretenberg::fr compress_native(const barretenberg::evaluation_domain& input) - { - barretenberg::fr out; - if constexpr (Composer::type == ComposerType::PLOOKUP) { - out = crypto::pedersen_commitment::lookup::compress_native({ - input.root, - input.domain, - input.generator, - }); - } else { - out = crypto::pedersen_commitment::compress_native({ - input.root, - input.domain, - input.generator, - }); - } - return out; - } - field_t root; field_t root_inverse; field_t domain; @@ -120,9 +235,15 @@ template struct verification_key { key->num_public_inputs = witness_t(ctx, input_key->num_public_inputs); key->domain = evaluation_domain::from_witness(ctx, input_key->domain); key->contains_recursive_proof = witness_t(ctx, input_key->contains_recursive_proof); - for (const auto& [tag, value] : input_key->commitments) { - key->commitments.insert({ tag, Curve::g1_ct::from_witness(ctx, value) }); + // We do not perform on_curve() circuit checks when constructing the Curve::g1_ct element. + // The assumption is that the circuit creator is honest and that the verification key hash (or some other + // method) will be used to ensure the provided key matches the key produced by the circuit creator. + // If the circuit creator is not honest, the entire set of circuit constraints being proved over cannot be + // trusted! + const typename Curve::fq_ct x = Curve::fq_ct::from_witness(ctx, value.x); + const typename Curve::fq_ct y = Curve::fq_ct::from_witness(ctx, value.y); + key->commitments.insert({ tag, typename Curve::g1_ct(x, y) }); } return key; @@ -189,71 +310,65 @@ template struct verification_key { public: field_t compress(size_t const hash_index = 0) { - field_t compressed_domain = domain.compress(); - - std::vector> preimage_data; - preimage_data.push_back(Composer::type); - preimage_data.push_back(compressed_domain); - preimage_data.push_back(num_public_inputs); + PedersenPreimageBuilder preimage_buffer(context); + + field_t composer_type = witness_t::create_constant_witness(context, Composer::type); + domain.generator.create_range_constraint(16, "domain.generator"); + domain.domain.create_range_constraint(32, "domain.generator"); + num_public_inputs.create_range_constraint(32, "num_public_inputs"); + preimage_buffer.add_element_with_existing_range_constraint(composer_type, 8); + preimage_buffer.add_element_with_existing_range_constraint(domain.generator, 16); // coset generator is small + preimage_buffer.add_element_with_existing_range_constraint(domain.domain, 32); + preimage_buffer.add_element_with_existing_range_constraint(num_public_inputs, 32); + constexpr size_t limb_bits = Curve::fq_ct::NUM_LIMB_BITS; + constexpr size_t last_limb_bits = 256 - (limb_bits * 3); for (const auto& [tag, selector] : commitments) { - preimage_data.push_back(selector.x.binary_basis_limbs[0].element); - preimage_data.push_back(selector.x.binary_basis_limbs[1].element); - preimage_data.push_back(selector.x.binary_basis_limbs[2].element); - preimage_data.push_back(selector.x.binary_basis_limbs[3].element); - preimage_data.push_back(selector.y.binary_basis_limbs[0].element); - preimage_data.push_back(selector.y.binary_basis_limbs[1].element); - preimage_data.push_back(selector.y.binary_basis_limbs[2].element); - preimage_data.push_back(selector.y.binary_basis_limbs[3].element); - } - - field_t compressed_key; - if constexpr (Composer::type == ComposerType::PLOOKUP) { - compressed_key = pedersen_plookup_commitment::compress(preimage_data, hash_index); - } else { - compressed_key = pedersen_commitment::compress(preimage_data, hash_index); + const auto& x = selector.x; + const auto& y = selector.y; + preimage_buffer.add_element_with_existing_range_constraint(y.binary_basis_limbs[3].element, last_limb_bits); + preimage_buffer.add_element_with_existing_range_constraint(y.binary_basis_limbs[2].element, limb_bits); + preimage_buffer.add_element_with_existing_range_constraint(y.binary_basis_limbs[1].element, limb_bits); + preimage_buffer.add_element_with_existing_range_constraint(y.binary_basis_limbs[0].element, limb_bits); + preimage_buffer.add_element_with_existing_range_constraint(x.binary_basis_limbs[3].element, last_limb_bits); + preimage_buffer.add_element_with_existing_range_constraint(x.binary_basis_limbs[2].element, limb_bits); + preimage_buffer.add_element_with_existing_range_constraint(x.binary_basis_limbs[1].element, limb_bits); + preimage_buffer.add_element_with_existing_range_constraint(x.binary_basis_limbs[0].element, limb_bits); } + preimage_buffer.add_element(domain.root); + field_t compressed_key = preimage_buffer.compress(hash_index); return compressed_key; } - static barretenberg::fr compress_native(const std::shared_ptr& key, - const size_t hash_index = 0) + static barretenberg::fr compress_native(const std::shared_ptr& key, const size_t = 0) { - barretenberg::fr compressed_domain = evaluation_domain::compress_native(key->domain); - - constexpr size_t num_limb_bits = bn254::fq_ct::NUM_LIMB_BITS; - const auto split_bigfield_limbs = [](const uint256_t& element) { - std::vector limbs; - limbs.push_back(element.slice(0, num_limb_bits)); - limbs.push_back(element.slice(num_limb_bits, num_limb_bits * 2)); - limbs.push_back(element.slice(num_limb_bits * 2, num_limb_bits * 3)); - limbs.push_back(element.slice(num_limb_bits * 3, num_limb_bits * 4)); - return limbs; - }; - - std::vector preimage_data; - preimage_data.push_back(Composer::type); - preimage_data.push_back(compressed_domain); - preimage_data.push_back(key->num_public_inputs); + std::vector preimage_data; + + preimage_data.push_back(static_cast(Composer::type)); + + const uint256_t domain = key->domain.domain; + const uint256_t generator = key->domain.generator; + const uint256_t num_public_inputs = key->num_public_inputs; + + ASSERT(domain < (uint256_t(1) << 32)); + ASSERT(generator < (uint256_t(1) << 16)); + ASSERT(num_public_inputs < (uint256_t(1) << 32)); + + write(preimage_data, static_cast(uint256_t(key->domain.generator))); + write(preimage_data, static_cast(uint256_t(key->domain.domain))); + write(preimage_data, static_cast(key->num_public_inputs)); for (const auto& [tag, selector] : key->commitments) { - const auto x_limbs = split_bigfield_limbs(selector.x); - const auto y_limbs = split_bigfield_limbs(selector.y); - - preimage_data.push_back(x_limbs[0]); - preimage_data.push_back(x_limbs[1]); - preimage_data.push_back(x_limbs[2]); - preimage_data.push_back(x_limbs[3]); - - preimage_data.push_back(y_limbs[0]); - preimage_data.push_back(y_limbs[1]); - preimage_data.push_back(y_limbs[2]); - preimage_data.push_back(y_limbs[3]); + write(preimage_data, selector.y); + write(preimage_data, selector.x); } + write(preimage_data, key->domain.root); + barretenberg::fr compressed_key; if constexpr (Composer::type == ComposerType::PLOOKUP) { - compressed_key = crypto::pedersen_commitment::lookup::compress_native(preimage_data, hash_index); + compressed_key = + from_buffer(crypto::pedersen_commitment::lookup::compress_native(preimage_data)); } else { - compressed_key = crypto::pedersen_commitment::compress_native(preimage_data, hash_index); + compressed_key = crypto::pedersen_commitment::compress_native(preimage_data); } return compressed_key; } diff --git a/cpp/src/barretenberg/stdlib/recursion/verifier/verifier.hpp b/cpp/src/barretenberg/stdlib/recursion/verifier/verifier.hpp index aa185e34d7..009e732b20 100644 --- a/cpp/src/barretenberg/stdlib/recursion/verifier/verifier.hpp +++ b/cpp/src/barretenberg/stdlib/recursion/verifier/verifier.hpp @@ -37,7 +37,6 @@ void populate_kate_element_map(typename Curve::Composer* ctx, typename Curve::fr_ct& batch_opening_scalar) { using fr_ct = typename Curve::fr_ct; - using g1_ct = typename Curve::g1_ct; const auto& polynomial_manifest = key->polynomial_manifest; for (size_t i = 0; i < key->polynomial_manifest.size(); ++i) { const auto& item = polynomial_manifest[i]; @@ -45,14 +44,14 @@ void populate_kate_element_map(typename Curve::Composer* ctx, const std::string poly_label(item.polynomial_label); switch (item.source) { case PolynomialSource::WITNESS: { - const auto element = transcript.get_group_element(label); - ASSERT(element.on_curve()); - if (element.is_point_at_infinity()) { + // get_circuit_group_element validates that the point produced lies on the curve + const auto element = transcript.get_circuit_group_element(label); + ASSERT(element.get_value().on_curve()); + if (element.get_value().is_point_at_infinity()) { std::cerr << label << " witness is point at infinity! Error!" << std::endl; ctx->failure("witness " + label + " is point at infinity"); } - // g1_ct::from_witness validates that the point produced lies on the curve - kate_g1_elements.insert({ label, g1_ct::from_witness(ctx, element) }); + kate_g1_elements.insert({ label, element }); break; } case PolynomialSource::SELECTOR: @@ -89,15 +88,15 @@ void populate_kate_element_map(typename Curve::Composer* ctx, fr_ct z_power = 1; for (size_t i = 0; i < program_settings::program_width; ++i) { std::string quotient_label = "T_" + std::to_string(i + 1); - const auto element = transcript.get_group_element(quotient_label); + const auto element = transcript.get_circuit_group_element(quotient_label); - kate_g1_elements.insert({ quotient_label, g1_ct::from_witness(ctx, element) }); + kate_g1_elements.insert({ quotient_label, element }); kate_fr_elements_at_zeta_large.insert({ quotient_label, quotient_nu * z_power }); z_power *= key->z_pow_n; } - const auto PI_Z = transcript.get_group_element("PI_Z"); - const auto PI_Z_OMEGA = transcript.get_group_element("PI_Z_OMEGA"); + const auto PI_Z = transcript.get_circuit_group_element("PI_Z"); + const auto PI_Z_OMEGA = transcript.get_circuit_group_element("PI_Z_OMEGA"); fr_ct u = transcript.get_challenge_field_element("separator", 0); @@ -105,10 +104,10 @@ void populate_kate_element_map(typename Curve::Composer* ctx, proof_system::plonk::compute_kate_batch_evaluation(key, transcript); batch_opening_scalar = -batch_evaluation; - kate_g1_elements.insert({ "PI_Z_OMEGA", g1_ct::from_witness(ctx, PI_Z_OMEGA) }); + kate_g1_elements.insert({ "PI_Z_OMEGA", PI_Z_OMEGA }); kate_fr_elements_at_zeta_large.insert({ "PI_Z_OMEGA", zeta * key->domain.root * u }); - kate_g1_elements.insert({ "PI_Z", g1_ct::from_witness(ctx, PI_Z) }); + kate_g1_elements.insert({ "PI_Z", PI_Z }); kate_fr_elements_at_zeta.insert({ "PI_Z", zeta }); } @@ -287,18 +286,6 @@ aggregation_state verify_proof(typename Curve::Composer* context, for (const auto& [label, fr_value] : kate_fr_elements_at_zeta_omega) { const auto& g1_value = kate_g1_elements[label]; - // if (fr_value.get_value() == 0 && fr_value.witness_index != IS_CONSTANT ) - // { - // std::cerr << "bad scalar zero at " << label << std::endl; - // } - // if (fr_value.get_value() == 0 && fr_value.witness_index == IS_CONSTANT) { - // std::cerr << "scalar zero at " << label << std::endl; - // continue; - // } - - // if (fr_value.get_value() == 0 && fr_value.witness_index == IS_CONSTANT) { - // continue; - // } double_opening_scalars.emplace_back(fr_value); double_opening_elements.emplace_back(g1_value); } @@ -320,8 +307,7 @@ aggregation_state verify_proof(typename Curve::Composer* context, opening_elements.push_back(previous_output.P0); opening_scalars.push_back(random_separator); - rhs_elements.push_back( - (-(previous_output.P1)).reduce()); // TODO: use .normalize() instead? (As per defi bridge project) + rhs_elements.push_back((-(previous_output.P1))); rhs_scalars.push_back(random_separator); } @@ -344,6 +330,10 @@ aggregation_state verify_proof(typename Curve::Composer* context, const fr_ct l1 = public_inputs[idx1]; const fr_ct l2 = public_inputs[idx2]; const fr_ct l3 = public_inputs[idx3]; + l0.create_range_constraint(fq_ct::NUM_LIMB_BITS, "l0"); + l1.create_range_constraint(fq_ct::NUM_LIMB_BITS, "l1"); + l2.create_range_constraint(fq_ct::NUM_LIMB_BITS, "l2"); + l3.create_range_constraint(fq_ct::NUM_LAST_LIMB_BITS, "l3"); return fq_ct(l0, l1, l2, l3, false); }; @@ -369,7 +359,7 @@ aggregation_state verify_proof(typename Curve::Composer* context, opening_elements.push_back(g1_ct(x0, y0)); opening_scalars.push_back(recursion_separator_challenge); - rhs_elements.push_back((-g1_ct(x1, y1)).normalize()); + rhs_elements.push_back((-g1_ct(x1, y1))); rhs_scalars.push_back(recursion_separator_challenge); } @@ -380,13 +370,13 @@ aggregation_state verify_proof(typename Curve::Composer* context, for (const auto& to_add : elements_to_add) { opening_result = opening_result + to_add; } - opening_result = opening_result.normalize(); g1_ct rhs = g1_ct::template wnaf_batch_mul<128>(rhs_elements, rhs_scalars); - rhs = rhs + PI_Z; - rhs = (-rhs).normalize(); - std::vector proof_witness_indices{ + rhs = (-rhs) - PI_Z; + + // TODO(zac: remove this once a3-packages has migrated to calling `assign_object_to_proof_outputs`) + std::vector proof_witness_indices = { opening_result.x.binary_basis_limbs[0].element.normalize().witness_index, opening_result.x.binary_basis_limbs[1].element.normalize().witness_index, opening_result.x.binary_basis_limbs[2].element.normalize().witness_index, @@ -404,10 +394,10 @@ aggregation_state verify_proof(typename Curve::Composer* context, rhs.y.binary_basis_limbs[2].element.normalize().witness_index, rhs.y.binary_basis_limbs[3].element.normalize().witness_index, }; - - return aggregation_state{ - opening_result, rhs, transcript.get_field_element_vector("public_inputs"), proof_witness_indices, true, + auto result = aggregation_state{ + opening_result, rhs, transcript.get_field_element_vector("public_inputs"), proof_witness_indices, true }; + return result; } } // namespace recursion diff --git a/cpp/src/barretenberg/stdlib/recursion/verifier/verifier.test.cpp b/cpp/src/barretenberg/stdlib/recursion/verifier/verifier.test.cpp index 82e8042b02..db3dfc2cc1 100644 --- a/cpp/src/barretenberg/stdlib/recursion/verifier/verifier.test.cpp +++ b/cpp/src/barretenberg/stdlib/recursion/verifier/verifier.test.cpp @@ -212,6 +212,8 @@ template class stdlib_verifier : public testing::Test { stdlib::recursion::verify_proof( &outer_composer, verification_key_b, recursive_manifest, recursive_proof_b, previous_output); + verification_key_b->compress(); + verification_key->compress(); return { output, verification_key }; } @@ -301,7 +303,7 @@ template class stdlib_verifier : public testing::Test { EXPECT_EQ(inner_proof_result, barretenberg::fq12::one()); - circuit_output.aggregation_state.add_proof_outputs_as_public_inputs(); + circuit_output.aggregation_state.assign_object_to_proof_outputs(); EXPECT_EQ(outer_composer.failed(), false); @@ -346,7 +348,7 @@ template class stdlib_verifier : public testing::Test { EXPECT_EQ(inner_proof_result, barretenberg::fq12::one()); - circuit_output.aggregation_state.add_proof_outputs_as_public_inputs(); + circuit_output.aggregation_state.assign_object_to_proof_outputs(); EXPECT_EQ(outer_composer.failed(), false); @@ -379,6 +381,9 @@ template class stdlib_verifier : public testing::Test { InnerComposer inner_composer_a = InnerComposer("../srs_db/ignition"); InnerComposer inner_composer_b = InnerComposer("../srs_db/ignition"); + OuterComposer mid_composer_a = OuterComposer("../srs_db/ignition"); + OuterComposer mid_composer_b = OuterComposer("../srs_db/ignition"); + OuterComposer outer_composer = OuterComposer("../srs_db/ignition"); std::vector inner_inputs{ barretenberg::fr::random_element(), @@ -388,7 +393,27 @@ template class stdlib_verifier : public testing::Test { create_inner_circuit(inner_composer_a, inner_inputs); create_inner_circuit(inner_composer_b, inner_inputs); - auto circuit_output = create_double_outer_circuit(inner_composer_a, inner_composer_b, outer_composer); + auto circuit_output_a = create_outer_circuit(inner_composer_a, mid_composer_a); + + uint256_t a0 = circuit_output_a.aggregation_state.P0.x.binary_basis_limbs[1].element.get_value(); + uint256_t a1 = circuit_output_a.aggregation_state.P0.y.binary_basis_limbs[1].element.get_value(); + uint256_t a2 = circuit_output_a.aggregation_state.P1.x.binary_basis_limbs[1].element.get_value(); + uint256_t a3 = circuit_output_a.aggregation_state.P1.y.binary_basis_limbs[1].element.get_value(); + + ASSERT(a0.get_msb() <= 68); + ASSERT(a1.get_msb() <= 68); + ASSERT(a2.get_msb() <= 68); + ASSERT(a3.get_msb() <= 68); + + circuit_output_a.aggregation_state.assign_object_to_proof_outputs(); + + auto circuit_output_b = create_outer_circuit(inner_composer_b, mid_composer_b); + + circuit_output_b.aggregation_state.assign_object_to_proof_outputs(); + + auto circuit_output = create_double_outer_circuit(mid_composer_a, mid_composer_b, outer_composer); + + circuit_output.aggregation_state.assign_object_to_proof_outputs(); g1::affine_element P[2]; P[0].x = barretenberg::fq(circuit_output.aggregation_state.P0.x.get_value().lo); @@ -398,8 +423,8 @@ template class stdlib_verifier : public testing::Test { barretenberg::fq12 inner_proof_result = barretenberg::pairing::reduced_ate_pairing_batch_precomputed( P, circuit_output.verification_key->reference_string->get_precomputed_g2_lines(), 2); - EXPECT_EQ(circuit_output.aggregation_state.public_inputs[0].get_value(), inner_inputs[0]); - EXPECT_EQ(circuit_output.aggregation_state.public_inputs[1].get_value(), inner_inputs[1]); + EXPECT_EQ(circuit_output_a.aggregation_state.public_inputs[0].get_value(), inner_inputs[0]); + EXPECT_EQ(circuit_output_a.aggregation_state.public_inputs[1].get_value(), inner_inputs[1]); EXPECT_EQ(inner_proof_result, barretenberg::fq12::one()); @@ -456,7 +481,6 @@ template class stdlib_verifier : public testing::Test { EXPECT_EQ(inner_proof_result, barretenberg::fq12::one()); printf("composer gates = %zu\n", outer_composer.get_num_gates()); - auto prover = outer_composer.create_prover(); auto verifier = outer_composer.create_verifier(); @@ -638,14 +662,23 @@ HEAVY_TYPED_TEST(stdlib_verifier, recursive_proof_composition) HEAVY_TYPED_TEST(stdlib_verifier, recursive_proof_composition_ultra_no_tables) { - TestFixture::test_recursive_proof_composition_ultra_no_tables(); + if constexpr (TypeParam::type == ComposerType::PLOOKUP) { + TestFixture::test_recursive_proof_composition_ultra_no_tables(); + } else { + // no point running this if we're not in UltraPlonk + GTEST_SKIP(); + } }; -// CircleCI can't cope with this. -// HEAVY_TYPED_TEST(stdlib_verifier, double_verification) -// { -// TestFixture::test_double_verification(); -// }; +HEAVY_TYPED_TEST(stdlib_verifier, double_verification) +{ + if constexpr (TypeParam::type == ComposerType::PLOOKUP) { + TestFixture::test_double_verification(); + } else { + // CircleCI can't cope with non-ultraplonk version. + GTEST_SKIP(); + } +}; HEAVY_TYPED_TEST(stdlib_verifier, recursive_proof_composition_with_variable_verification_key_a) { diff --git a/cpp/src/barretenberg/stdlib/recursion/verifier/verifier_turbo.test.cpp b/cpp/src/barretenberg/stdlib/recursion/verifier/verifier_turbo.test.cpp index 1f4410148b..36c128f3ea 100644 --- a/cpp/src/barretenberg/stdlib/recursion/verifier/verifier_turbo.test.cpp +++ b/cpp/src/barretenberg/stdlib/recursion/verifier/verifier_turbo.test.cpp @@ -188,7 +188,7 @@ template class stdlib_verifier_turbo : public testing:: EXPECT_EQ(inner_proof_result, barretenberg::fq12::one()); - circuit_output.aggregation_state.add_proof_outputs_as_public_inputs(); + circuit_output.aggregation_state.assign_object_to_proof_outputs(); EXPECT_EQ(outer_composer.failed(), false); std::cout << "creating prover" << std::endl; diff --git a/cpp/src/barretenberg/transcript/transcript.cpp b/cpp/src/barretenberg/transcript/transcript.cpp index 9c448949c9..8f597c454b 100644 --- a/cpp/src/barretenberg/transcript/transcript.cpp +++ b/cpp/src/barretenberg/transcript/transcript.cpp @@ -46,10 +46,12 @@ std::array Keccak256Hasher::hash(std std::array Blake3sHasher::hash(std::vector const& buffer) { - std::vector hash_result = blake3::blake3s(buffer); + grumpkin::fq input = grumpkin::fq::serialize_from_buffer(&buffer[0]); + grumpkin::fq compressed = crypto::pedersen_commitment::compress_native({ input }); + std::vector res = to_buffer(compressed); std::array result; for (size_t i = 0; i < PRNG_OUTPUT_SIZE; ++i) { - result[i] = hash_result[i]; + result[i] = res[i]; } return result; } @@ -59,10 +61,12 @@ std::array Blake3sHasher::hash_plookup // TODO(@zac-williamson) Change to call a Poseidon hash and create a PoseidonHasher // (not making the name change right now as it will break concurrent work w. getting recursion working in Noir) // We also need to implement a Poseidon gadget - std::vector compressed_buffer = crypto::pedersen_commitment::lookup::compress_native(buffer); + grumpkin::fq input = grumpkin::fq::serialize_from_buffer(&buffer[0]); + grumpkin::fq compressed = crypto::pedersen_commitment::lookup::compress_native({ input }); + std::vector res = to_buffer(compressed); std::array result; for (size_t i = 0; i < PRNG_OUTPUT_SIZE; ++i) { - result[i] = compressed_buffer[i]; + result[i] = res[i]; } return result; } @@ -259,7 +263,11 @@ void Transcript::apply_fiat_shamir(const std::string& challenge_name /*, const b } std::vector rolling_buffer(base_hash.begin(), base_hash.end()); - rolling_buffer.push_back(0); + if (hasher == HashType::Keccak256) { + rolling_buffer.push_back(0); + } else { + rolling_buffer[31] = (0); + } // Compute how many hashes we need so that we have enough distinct chunks of 'random' bytes to distribute // across the num_challenges. diff --git a/sol/src/ultra/keys/RecursiveUltraVerificationKey.sol b/sol/src/ultra/keys/RecursiveUltraVerificationKey.sol index 4b118cdef6..748d20fcb9 100644 --- a/sol/src/ultra/keys/RecursiveUltraVerificationKey.sol +++ b/sol/src/ultra/keys/RecursiveUltraVerificationKey.sol @@ -1,72 +1,72 @@ -// Verification Key Hash: 507de35addf16b79526d713259492d5d1764fdb6ce55ff4ccb03c147b72f381a +// Verification Key Hash: b665bc769f274feb94ea7f9997fa684b414aa8b9b9bac0227c7ce2e1cbd3d115 // SPDX-License-Identifier: Apache-2.0 // Copyright 2022 Aztec pragma solidity >=0.8.4; library RecursiveUltraVerificationKey { function verificationKeyHash() internal pure returns (bytes32) { - return 0x507de35addf16b79526d713259492d5d1764fdb6ce55ff4ccb03c147b72f381a; + return 0xb665bc769f274feb94ea7f9997fa684b414aa8b9b9bac0227c7ce2e1cbd3d115; } function loadVerificationKey(uint256 _vk, uint256 _omegaInverseLoc) internal pure { assembly { - mstore(add(_vk, 0x00), 0x0000000000000000000000000000000000000000000000000000000000080000) // vk.circuit_size + mstore(add(_vk, 0x00), 0x0000000000000000000000000000000000000000000000000000000000040000) // vk.circuit_size mstore(add(_vk, 0x20), 0x0000000000000000000000000000000000000000000000000000000000000010) // vk.num_inputs - mstore(add(_vk, 0x40), 0x2260e724844bca5251829353968e4915305258418357473a5c1d597f613f6cbd) // vk.work_root - mstore(add(_vk, 0x60), 0x3064486657634403844b0eac78ca882cfd284341fcb0615a15cfcd17b14d8201) // vk.domain_inverse - mstore(add(_vk, 0x80), 0x18fe72968b540c1dad6c7648fcb3407edfc489d8dcf3fdce314c1f0e72684c43) // vk.Q1.x - mstore(add(_vk, 0xa0), 0x16f49263ee016852edfed2e84bf44c22b31064b9034b62059329b2af2f349c37) // vk.Q1.y - mstore(add(_vk, 0xc0), 0x1c382676d0f8e5691def3a60d533850f573c36aa200ab364c091acc4a7eb094f) // vk.Q2.x - mstore(add(_vk, 0xe0), 0x17c05ca7ea679681a3cf772fabcf2c1a988e39910f1ba8de3d1f68ffb0effda1) // vk.Q2.y - mstore(add(_vk, 0x100), 0x257d75dead2d8cbb2f63b3592a762a2c2dbe0195a533736fd01982370e768676) // vk.Q3.x - mstore(add(_vk, 0x120), 0x258b6d74446f5e532bce6e1a62372a82986eac9801c13a8553f373c30398a47c) // vk.Q3.y - mstore(add(_vk, 0x140), 0x290ff6a808f6abe7508a8c884ea0fc2f819e23a5b6d7c2dd1105da2a3f0637e0) // vk.Q4.x - mstore(add(_vk, 0x160), 0x2e6c3c419be44ed56b61069a06e980360f58830ad52b38bb69de92c456ebf0ca) // vk.Q4.y - mstore(add(_vk, 0x180), 0x282e6e14bbedfc7ef013feb4877ce9098389abfd3ad8899c957be4fdb20d0454) // vk.Q_M.x - mstore(add(_vk, 0x1a0), 0x2483d06975c3965d3f2d205ddeff196b90ca5883878bffc0bd190a357fee947e) // vk.Q_M.y - mstore(add(_vk, 0x1c0), 0x09af8fed71838d47b0052d8e3fdda11f55c62a6f2cb9aab24edd90b5e9640e9c) // vk.Q_C.x - mstore(add(_vk, 0x1e0), 0x2bdf7549fa146188dd750d032d9dec911c5799ca99f72405c4ac49f3f9e3a51a) // vk.Q_C.y - mstore(add(_vk, 0x200), 0x1479a535c87c413301d82c5ae1598b46c03117a57b878416d1143bb48f1df8bf) // vk.Q_ARITHMETIC.x - mstore(add(_vk, 0x220), 0x03203e3c02cc68282d93507d0ad9d56304d5a4b2908233bcb6f8682f8b264532) // vk.Q_ARITHMETIC.y - mstore(add(_vk, 0x240), 0x0cccd1de3f4ef2a2bfffbb7a91f8be2c49e9dc9b565ba4312015a88558f40d20) // vk.QSORT.x - mstore(add(_vk, 0x260), 0x092c5bd4edb996d6c1189a2682f6e93ede4b9aff7f07823605c894f833316718) // vk.QSORT.y - mstore(add(_vk, 0x280), 0x20089848d81ee4e8d7700679e7b5ed017916e2ee28bf76c0e0f4862274637bb8) // vk.Q_ELLIPTIC.x - mstore(add(_vk, 0x2a0), 0x0faae100924d24a70708e49a08ba2ba9df261088bf04e7b4c3f811cc0d8995fe) // vk.Q_ELLIPTIC.y - mstore(add(_vk, 0x2c0), 0x2de71f46452329536fe14dfff808692c405b9ef1ae47c451be8383ded868af5c) // vk.Q_AUX.x - mstore(add(_vk, 0x2e0), 0x0a520e2f877f19cc69aad2396bf741e6864a9f0b657887e80165b794f7612e71) // vk.Q_AUX.y - mstore(add(_vk, 0x300), 0x2779b1b7b8433eeee7333a1372feb4587da74e2c93cc54917e201748ed847204) // vk.SIGMA1.x - mstore(add(_vk, 0x320), 0x2198823f66ad59612f6cb77aff9437388abdbcc4d8f6eac792d8bca7d1b341d9) // vk.SIGMA1.y - mstore(add(_vk, 0x340), 0x1f6732b9d128931b2e32b2cae73b029720cca3cef23fee25363d520ed0ba3f92) // vk.SIGMA2.x - mstore(add(_vk, 0x360), 0x15fb336844e68b08361c10b83e7d6ea0f011958774e58e5f7c43e6606e989ecc) // vk.SIGMA2.y - mstore(add(_vk, 0x380), 0x0984b1b6c723afb4713656abf30b06e2ad04c054dd3acf016a6db1ee7111ca11) // vk.SIGMA3.x - mstore(add(_vk, 0x3a0), 0x03421d01f19c6b91e477648819f57d888b3b23b67599266293bddf91a2636ff1) // vk.SIGMA3.y - mstore(add(_vk, 0x3c0), 0x2f77cda90d366b151b17c5667f10526ab0fe144aecb307e00ede6039365bcfa0) // vk.SIGMA4.x - mstore(add(_vk, 0x3e0), 0x0d1e8f758babcbbf134dfe341c262ee25d0254cba8f5487ad5bddd190f27a9e8) // vk.SIGMA4.y - mstore(add(_vk, 0x400), 0x2f61a890b9f1dff4ef5c8b0eafe9b71c7a23dc4c8a6791d9c01418310f4a7b2e) // vk.TABLE1.x - mstore(add(_vk, 0x420), 0x07c8a51d1881fcdfe1cb7dcefc48a44047c7f5386797d5f8553ce2e12e8daba0) // vk.TABLE1.y - mstore(add(_vk, 0x440), 0x1adf56913dea23b7b14c952933b0b40fc476dc2697a758ec9df73802b0596c2f) // vk.TABLE2.x - mstore(add(_vk, 0x460), 0x212a1759e19285a35a70a245cca6477f89b6f156e4425cf52cfccb4594f59152) // vk.TABLE2.y - mstore(add(_vk, 0x480), 0x1527f8c19085ac209ebddbccae4dd0ca58b078e56fd20d651ce3a3194697b191) // vk.TABLE3.x - mstore(add(_vk, 0x4a0), 0x02247dca9c3cb09318aa6100a2a7c628281c69bc41cfda34aa72c263b69344b4) // vk.TABLE3.y - mstore(add(_vk, 0x4c0), 0x12eea56d2ada3befa5db215ea5ebbd37b5ce95fcd1cf7adb94d5a1784876b4f7) // vk.TABLE4.x - mstore(add(_vk, 0x4e0), 0x190df1146fbdd5cc79e8817ebcd6311e35cf5cc38795cee26371a707d685e05a) // vk.TABLE4.y - mstore(add(_vk, 0x500), 0x019b3a1970f9f77b13538cd8071ea3ee7c556fd98009e2a04be044ead0a94623) // vk.TABLE_TYPE.x - mstore(add(_vk, 0x520), 0x159cbdae3e194fe45524a171befdcb98b55c8d495fc463c98ac690eee947119f) // vk.TABLE_TYPE.y - mstore(add(_vk, 0x540), 0x16b2f7fa29f578aae3d4c0b8220101570adfcc9e8aa8a148267208540de189f1) // vk.ID1.x - mstore(add(_vk, 0x560), 0x2344a211fbbacc281de980197e4f12155d90d55a67f4ad08398bac665f813953) // vk.ID1.y - mstore(add(_vk, 0x580), 0x1af709df675db1688b95927324e71c5e551436ba7cb32478570a9cfaebf90614) // vk.ID2.x - mstore(add(_vk, 0x5a0), 0x2b83e76f61aa5cd70218c38e693ae0a99e9a2f4a192af5c77dbd27fa605fdae4) // vk.ID2.y - mstore(add(_vk, 0x5c0), 0x038c89635a8b6ec9766d5f98d13c16f8c312088f830610de72c00edf8c3b7800) // vk.ID3.x - mstore(add(_vk, 0x5e0), 0x1863d9217ba6c6764fa02298efe25fabfbe454a27431b970a6afff5d1986fadb) // vk.ID3.y - mstore(add(_vk, 0x600), 0x259a5dd47d44d6240407c26718201a122fb4b6b38d838f6e24d1c75515016761) // vk.ID4.x - mstore(add(_vk, 0x620), 0x14db344b735ffe084107e5cea07b00e4c41a82f0073f76e0536cd7118d78866f) // vk.ID4.y + mstore(add(_vk, 0x40), 0x19ddbcaf3a8d46c15c0176fbb5b95e4dc57088ff13f4d1bd84c6bfa57dcdc0e0) // vk.work_root + mstore(add(_vk, 0x60), 0x30644259cd94e7dd5045d7a27013b7fcd21c9e3b7fa75222e7bda49b729b0401) // vk.domain_inverse + mstore(add(_vk, 0x80), 0x16f7fc6133c8fb2dab06c57392df697a53357ecd918d749d1c981dcd0ee6d849) // vk.Q1.x + mstore(add(_vk, 0xa0), 0x2ba047103f9f86b84058d718a082e2faa53e50109e7cb880d2cbb7a1bf98da89) // vk.Q1.y + mstore(add(_vk, 0xc0), 0x1b9d146737dbb7759e0cad93ad4a7669880a062aceb7b46b8485327976d7285c) // vk.Q2.x + mstore(add(_vk, 0xe0), 0x11de7c3d638acc90e7f844c08658d0588da864268e00576d26aaca3cf49af350) // vk.Q2.y + mstore(add(_vk, 0x100), 0x1466840d8ad2dfde3a55d4c98412a05807bbe8aac33c27ba100c1e621fbebba0) // vk.Q3.x + mstore(add(_vk, 0x120), 0x2198ce44955b8ac6e21ddcbb66acd9df7596ad9e5fcf22f2227e8bbb51fe44ee) // vk.Q3.y + mstore(add(_vk, 0x140), 0x18b96a49db3644e2986f811b8c104e8eb88aa5eb9aec0ca109322a64885688bd) // vk.Q4.x + mstore(add(_vk, 0x160), 0x2ffec963826849cabd279a2b9f9a26f81518eb65d882f47a32470fc52f53def0) // vk.Q4.y + mstore(add(_vk, 0x180), 0x09dd725897471fddc177b241d7abc402705acfa452707388fa62666ad454598c) // vk.Q_M.x + mstore(add(_vk, 0x1a0), 0x03a46eb7ed69136e109e2761fb707da7cee18b3d05e581f24d77853b3b03581e) // vk.Q_M.y + mstore(add(_vk, 0x1c0), 0x304db51670cb2c59e3088431803e82bce8c81b38eefa267871ae2103ca7842ca) // vk.Q_C.x + mstore(add(_vk, 0x1e0), 0x1d7ec7d8d4a74e337de26b7adaecb8beb03d8cd647aa180bc08de840038710d5) // vk.Q_C.y + mstore(add(_vk, 0x200), 0x1db65122bf0f0a58fe07bd7342d3e26b07923041cb7d2158d13fb7b5328da40e) // vk.Q_ARITHMETIC.x + mstore(add(_vk, 0x220), 0x1691db1eeedbcb4f7646959cf363c00b7e26812a225edf5a6972d815270770f5) // vk.Q_ARITHMETIC.y + mstore(add(_vk, 0x240), 0x2a63b6a306e30d87f4b8597cbd1dcecff5fc7cacb774247fca6531e3d347ada4) // vk.QSORT.x + mstore(add(_vk, 0x260), 0x2849d2901fcd1f048924fb77e9451ad45d80f9f842418146b1fde0a7c752fc5f) // vk.QSORT.y + mstore(add(_vk, 0x280), 0x0e42866979ddac27ac729352dd0f844da4fb5a1c3e2480b5b940acd12304c700) // vk.Q_ELLIPTIC.x + mstore(add(_vk, 0x2a0), 0x017ac9a40547e866bdb914dc2b73661c0ec8aa67956c8c9bf406795f75e15c53) // vk.Q_ELLIPTIC.y + mstore(add(_vk, 0x2c0), 0x1ad08199bf79952adff0aa3a9c04a26f18ad7deed1fbed0548f2c83ddf913ef9) // vk.Q_AUX.x + mstore(add(_vk, 0x2e0), 0x151df9277b110c615c058f7f783105d03cab938f23884afed1897d0049715d21) // vk.Q_AUX.y + mstore(add(_vk, 0x300), 0x0bd26d62138b721fdc08fd7d52cd3dfaa37399eb416af0ec6237f9ec1a63a5c0) // vk.SIGMA1.x + mstore(add(_vk, 0x320), 0x103282cd2ef4210ac390d70a1cba58c6792a5d872ae0337615f8ac9997d300ef) // vk.SIGMA1.y + mstore(add(_vk, 0x340), 0x08abaa91c69ffa73d80d9a9562020c2a104771f07cf4099cbbe9a0071befb1cc) // vk.SIGMA2.x + mstore(add(_vk, 0x360), 0x1a82e5cd4a2c3de77afb2ca76c89b54991a4db3939a5c24806af01a0f69a2366) // vk.SIGMA2.y + mstore(add(_vk, 0x380), 0x26d50e2d19c429d1a2987d5249b88e388f93339fc05f52939fa2e1f4be653918) // vk.SIGMA3.x + mstore(add(_vk, 0x3a0), 0x0a49cd57e79633ea43cc3172e819327ce260682d8b571d0964678a153c17e959) // vk.SIGMA3.y + mstore(add(_vk, 0x3c0), 0x1c82f3e7c57b08ef90fda6fe39427b815a835c8559b64eac0a4b213998f6802c) // vk.SIGMA4.x + mstore(add(_vk, 0x3e0), 0x098bad014a270b6f5e4c90cbd299c15c5fd190457f0e78a5f849243e86688868) // vk.SIGMA4.y + mstore(add(_vk, 0x400), 0x215a055ec0bf7d7ab5e005b4260258aaadfd8ae9005a09060fdd0cee02dc3fea) // vk.TABLE1.x + mstore(add(_vk, 0x420), 0x1841eba177a34b1eb908727fe2e54bf33fc82b6e58dfd044acd4ba05ca80c837) // vk.TABLE1.y + mstore(add(_vk, 0x440), 0x018eb037682044ebf9cad76f777bf379b94c4d31d4351ce9677ff146a744555c) // vk.TABLE2.x + mstore(add(_vk, 0x460), 0x2bf87d72f0aef257c728503c900516f9274ab06eb54804651218438e40f06c25) // vk.TABLE2.y + mstore(add(_vk, 0x480), 0x13b003b384fb50e00994bf62a0057f44344be47383d59a7e9f1319d710ab5263) // vk.TABLE3.x + mstore(add(_vk, 0x4a0), 0x1a5f338a3d05fb46ea46855e6c36dbdb23c5f20a56acc795324fe2958189ec39) // vk.TABLE3.y + mstore(add(_vk, 0x4c0), 0x1365fd683dbad2c4c55b02dd33c4b96fde00e5bb3f52be20ead95484e130aee1) // vk.TABLE4.x + mstore(add(_vk, 0x4e0), 0x2da2ba1d27548e452cc863758acf156eb268f577b7d08ba58e7bbf2d28f6f23c) // vk.TABLE4.y + mstore(add(_vk, 0x500), 0x0ef908712f03ce2e4db3ef557abbde7c584d8c831165ba40ab43124526c53cc1) // vk.TABLE_TYPE.x + mstore(add(_vk, 0x520), 0x009dd642bc5eb1869048b59d2052645208cc5a14537814568d9c985c93319e55) // vk.TABLE_TYPE.y + mstore(add(_vk, 0x540), 0x0f973c9af1150675ae6dac1ea8ea366e5b8db13bb9c2237ab11c40dfb644ebf5) // vk.ID1.x + mstore(add(_vk, 0x560), 0x06b0c966f9edab490ac15a176d35d56996cc66854268197989a53ab0d1368188) // vk.ID1.y + mstore(add(_vk, 0x580), 0x09e719130bb46416efa070d08d82cc07fe0ed3bd8685616b92b4b9619e0807b2) // vk.ID2.x + mstore(add(_vk, 0x5a0), 0x18f35ee01438dda2443da27299404d09ccfff098a0ceac2e9a10bf2a96bc11ac) // vk.ID2.y + mstore(add(_vk, 0x5c0), 0x0cb835c737d324b9ff5bba45988dc4921104803b7e37649f8c628f0de26361ac) // vk.ID3.x + mstore(add(_vk, 0x5e0), 0x18ca0ac87859387aa32c6939f7a4a0d322879a3fdb1ef85d06addcddc13acea5) // vk.ID3.y + mstore(add(_vk, 0x600), 0x0047304b09efd9315a96d9e802c9a50c1964076026e5f17aff825d6cfc38d823) // vk.ID4.x + mstore(add(_vk, 0x620), 0x21c9f3aa4cbe8ee21422052f7c22d3d8a5a9a89c262a5a5cb52d8802f6106c49) // vk.ID4.y mstore(add(_vk, 0x640), 0x01) // vk.contains_recursive_proof mstore(add(_vk, 0x660), 0) // vk.recursive_proof_public_input_indices mstore(add(_vk, 0x680), 0x260e01b251f6f1c7e7ff4e580791dee8ea51d87a358e038b4efe30fac09383c1) // vk.g2_x.X.c1 mstore(add(_vk, 0x6a0), 0x0118c4d5b837bcc2bc89b5b398b5974e9f5944073b32078b7e231fec938883b0) // vk.g2_x.X.c0 mstore(add(_vk, 0x6c0), 0x04fc6369f7110fe3d25156c1bb9a72859cf2a04641f99ba4ee413c80da6a5fe4) // vk.g2_x.Y.c1 mstore(add(_vk, 0x6e0), 0x22febda3c0c0632a56475b4214e5615e11e6dd3f96e6cea2854a87d4dacc5e55) // vk.g2_x.Y.c0 - mstore(_omegaInverseLoc, 0x06e402c0a314fb67a15cf806664ae1b722dbc0efe66e6c81d98f9924ca535321) // vk.work_root_inverse + mstore(_omegaInverseLoc, 0x036853f083780e87f8d7c71d111119c57dbe118c22d5ad707a82317466c5174c) // vk.work_root_inverse } } }