diff --git a/barretenberg/cpp/scripts/benchmark_remote.sh b/barretenberg/cpp/scripts/benchmark_remote.sh
index e1d5907eab2d..05a61392584b 100755
--- a/barretenberg/cpp/scripts/benchmark_remote.sh
+++ b/barretenberg/cpp/scripts/benchmark_remote.sh
@@ -9,20 +9,19 @@ set -eu
 
 BENCHMARK=${1:-goblin_bench}
 COMMAND=${2:-./$BENCHMARK}
+PRESET=${3:-clang16}
+BUILD_DIR=${4:-build}
 
 # Move above script dir.
 cd $(dirname $0)/..
 
-# Create lock file
-ssh $BB_SSH_KEY $BB_SSH_INSTANCE "touch ~/BENCHMARKING_IN_PROGRESS"
-
 # Configure and build.
-cmake --preset clang16
-cmake --build --preset clang16 --target $BENCHMARK
+cmake --preset $PRESET
+cmake --build --preset $PRESET --target $BENCHMARK
 
 source scripts/_benchmark_remote_lock.sh
 
-cd build
+cd $BUILD_DIR
 scp $BB_SSH_KEY ./bin/$BENCHMARK $BB_SSH_INSTANCE:$BB_SSH_CPP_PATH/build
 ssh $BB_SSH_KEY $BB_SSH_INSTANCE \
   "cd $BB_SSH_CPP_PATH/build ; $COMMAND"
diff --git a/barretenberg/cpp/src/barretenberg/ecc/fields/field_declarations.hpp b/barretenberg/cpp/src/barretenberg/ecc/fields/field_declarations.hpp
index af92a45ae168..3e7f433be6e0 100644
--- a/barretenberg/cpp/src/barretenberg/ecc/fields/field_declarations.hpp
+++ b/barretenberg/cpp/src/barretenberg/ecc/fields/field_declarations.hpp
@@ -326,11 +326,34 @@ template <class Params_> struct alignas(32) field {
      **/
     static void split_into_endomorphism_scalars(const field& k, field& k1, field& k2)
     {
-        // if the modulus is a 256-bit integer, we need to use a basis where g1, g2 have been shifted by 2^384
+        // if the modulus is a >= 255-bit integer, we need to use a basis where g1, g2 have been shifted by 2^384
         if constexpr (Params::modulus_3 >= 0x4000000000000000ULL) {
             split_into_endomorphism_scalars_384(k, k1, k2);
-            return;
+        } else {
+            std::pair<std::array<uint64_t, 2>, std::array<uint64_t, 2>> ret = split_into_endomorphism_scalars(k);
+            k1.data[0] = ret.first[0];
+            k1.data[1] = ret.first[1];
+
+            // TODO(https://github.com/AztecProtocol/barretenberg/issues/851): We should move away from this hack by
+            // returning pair of uint64_t[2] instead of a half-set field
+#if !defined(__clang__) && defined(__GNUC__)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Warray-bounds"
+#endif
+            k2.data[0] = ret.second[0]; // NOLINT
+            k2.data[1] = ret.second[1];
+#if !defined(__clang__) && defined(__GNUC__)
+#pragma GCC diagnostic pop
+#endif
         }
+    }
+
+    // NOTE: this form is only usable if the modulus is 254 bits or less, otherwise see
+    // split_into_endomorphism_scalars_384.
+    // TODO(https://github.com/AztecProtocol/barretenberg/issues/851): Unify these APIs.
+    static std::pair<std::array<uint64_t, 2>, std::array<uint64_t, 2>> split_into_endomorphism_scalars(const field& k)
+    {
+        static_assert(Params::modulus_3 < 0x4000000000000000ULL);
         field input = k.reduce_once();
 
         constexpr field endo_g1 = { Params::endo_g1_lo, Params::endo_g1_mid, Params::endo_g1_hi, 0 };
@@ -369,15 +392,14 @@ template <class Params_> struct alignas(32) field {
         field t1 = (q2_lo - q1_lo).reduce_once();
         field beta = cube_root_of_unity();
         field t2 = (t1 * beta + input).reduce_once();
-        k2.data[0] = t1.data[0];
-        k2.data[1] = t1.data[1];
-        k1.data[0] = t2.data[0];
-        k1.data[1] = t2.data[1];
+        return {
+            { t2.data[0], t2.data[1] },
+            { t1.data[0], t1.data[1] },
+        };
     }
 
     static void split_into_endomorphism_scalars_384(const field& input, field& k1_out, field& k2_out)
     {
-
         constexpr field minus_b1f{
             Params::endo_minus_b1_lo,
             Params::endo_minus_b1_mid,
diff --git a/barretenberg/cpp/src/barretenberg/ecc/groups/affine_element.test.cpp b/barretenberg/cpp/src/barretenberg/ecc/groups/affine_element.test.cpp
index 4dd06af4c38f..8dc10905f5a6 100644
--- a/barretenberg/cpp/src/barretenberg/ecc/groups/affine_element.test.cpp
+++ b/barretenberg/cpp/src/barretenberg/ecc/groups/affine_element.test.cpp
@@ -4,6 +4,7 @@
 #include "barretenberg/ecc/curves/grumpkin/grumpkin.hpp"
 #include "barretenberg/ecc/curves/secp256k1/secp256k1.hpp"
 #include "barretenberg/ecc/curves/secp256r1/secp256r1.hpp"
+#include "barretenberg/ecc/groups/element.hpp"
 #include "barretenberg/serialize/test_helper.hpp"
 #include <fstream>
 
@@ -129,3 +130,80 @@ TEST(AffineElement, Msgpack)
     auto [actual, expected] = msgpack_roundtrip(secp256k1::g1::affine_element{ 1, 1 });
     EXPECT_EQ(actual, expected);
 }
+
+namespace bb::group_elements {
+// mul_with_endomorphism and mul_without_endomorphism are private in affine_element.
+// We could make those public to test or create other public utilities, but to keep the API intact we
+// instead mark TestElementPrivate as a friend class so that our test functions can have access.
+class TestElementPrivate {
+  public:
+    template <typename Element, typename Scalar>
+    static Element mul_without_endomorphism(const Element& element, const Scalar& scalar)
+    {
+        return element.mul_without_endomorphism(scalar);
+    }
+    template <typename Element, typename Scalar>
+    static Element mul_with_endomorphism(const Element& element, const Scalar& scalar)
+    {
+        return element.mul_with_endomorphism(scalar);
+    }
+};
+} // namespace bb::group_elements
+
+// Our endomorphism-specialized multiplication should match our generic multiplication
+TEST(AffineElement, MulWithEndomorphismMatchesMulWithoutEndomorphism)
+{
+    for (int i = 0; i < 100; i++) {
+        auto x1 = bb::group_elements::element(grumpkin::g1::affine_element::random_element());
+        auto f1 = grumpkin::fr::random_element();
+        auto r1 = bb::group_elements::TestElementPrivate::mul_without_endomorphism(x1, f1);
+        auto r2 = bb::group_elements::TestElementPrivate::mul_with_endomorphism(x1, f1);
+        EXPECT_EQ(r1, r2);
+    }
+}
+
+// Multiplication of a point at infinity by a scalar should be a point at infinity
+TEST(AffineElement, InfinityMulByScalarIsInfinity)
+{
+    auto result = grumpkin::g1::affine_element::infinity() * grumpkin::fr::random_element();
+    EXPECT_TRUE(result.is_point_at_infinity());
+}
+
+// Batched multiplication of points should match
+TEST(AffineElement, BatchMulMatchesNonBatchMul)
+{
+    constexpr size_t num_points = 512;
+    std::vector<grumpkin::g1::affine_element> affine_points;
+    for (size_t i = 0; i < num_points - 1; ++i) {
+        affine_points.emplace_back(grumpkin::g1::affine_element::random_element());
+    }
+    // Include a point at infinity to test the mixed infinity + non-infinity case
+    affine_points.emplace_back(grumpkin::g1::affine_element::infinity());
+    grumpkin::fr exponent = grumpkin::fr::random_element();
+    std::vector<grumpkin::g1::affine_element> result =
+        grumpkin::g1::element::batch_mul_with_endomorphism(affine_points, exponent);
+    size_t i = 0;
+    for (grumpkin::g1::affine_element& el : result) {
+        EXPECT_EQ(el, affine_points[i] * exponent);
+        i++;
+    }
+}
+
+// Batched multiplication of a point at infinity by a scalar should result in points at infinity
+TEST(AffineElement, InfinityBatchMulByScalarIsInfinity)
+{
+    constexpr size_t num_points = 1024;
+    std::vector<grumpkin::g1::affine_element> affine_points;
+    for (size_t i = 0; i < num_points; ++i) {
+        affine_points.emplace_back(grumpkin::g1::affine_element::infinity());
+    }
+    grumpkin::fr exponent = grumpkin::fr::random_element();
+    std::vector<grumpkin::g1::affine_element> result =
+        grumpkin::g1::element::batch_mul_with_endomorphism(affine_points, exponent);
+    for (grumpkin::g1::affine_element& el : result) {
+        EXPECT_TRUE(el.is_point_at_infinity());
+        if (!el.is_point_at_infinity()) {
+            break; // dont spam with errors
+        }
+    }
+}
\ No newline at end of file
diff --git a/barretenberg/cpp/src/barretenberg/ecc/groups/element.hpp b/barretenberg/cpp/src/barretenberg/ecc/groups/element.hpp
index 0c34effbfaa0..f73f7ab5fe0f 100644
--- a/barretenberg/cpp/src/barretenberg/ecc/groups/element.hpp
+++ b/barretenberg/cpp/src/barretenberg/ecc/groups/element.hpp
@@ -95,15 +95,17 @@ template <class Fq, class Fr, class Params> class alignas(32) element {
                                  const std::span<affine_element<Fq, Fr, Params>>& second_group,
                                  const std::span<affine_element<Fq, Fr, Params>>& results) noexcept;
     static std::vector<affine_element<Fq, Fr, Params>> batch_mul_with_endomorphism(
-        const std::span<affine_element<Fq, Fr, Params>>& points, const Fr& exponent) noexcept;
+        const std::span<affine_element<Fq, Fr, Params>>& points, const Fr& scalar) noexcept;
 
     Fq x;
     Fq y;
     Fq z;
 
   private:
-    element mul_without_endomorphism(const Fr& exponent) const noexcept;
-    element mul_with_endomorphism(const Fr& exponent) const noexcept;
+    // For test access to mul_without_endomorphism
+    friend class TestElementPrivate;
+    element mul_without_endomorphism(const Fr& scalar) const noexcept;
+    element mul_with_endomorphism(const Fr& scalar) const noexcept;
 
     template <typename = typename std::enable_if<Params::can_hash_to_curve>>
     static element random_coordinates_on_curve(numeric::RNG* engine = nullptr) noexcept;
diff --git a/barretenberg/cpp/src/barretenberg/ecc/groups/element_impl.hpp b/barretenberg/cpp/src/barretenberg/ecc/groups/element_impl.hpp
index e97884a66bea..24e45600ae46 100644
--- a/barretenberg/cpp/src/barretenberg/ecc/groups/element_impl.hpp
+++ b/barretenberg/cpp/src/barretenberg/ecc/groups/element_impl.hpp
@@ -3,6 +3,7 @@
 #include "barretenberg/common/thread.hpp"
 #include "barretenberg/ecc/groups/element.hpp"
 #include "element.hpp"
+#include <cstdint>
 
 // NOLINTBEGIN(readability-implicit-bool-conversion, cppcoreguidelines-avoid-c-arrays)
 namespace bb::group_elements {
@@ -594,101 +595,115 @@ element<Fq, Fr, T> element<Fq, Fr, T>::random_element(numeric::RNG* engine) noex
 }
 
 template <class Fq, class Fr, class T>
-element<Fq, Fr, T> element<Fq, Fr, T>::mul_without_endomorphism(const Fr& exponent) const noexcept
+element<Fq, Fr, T> element<Fq, Fr, T>::mul_without_endomorphism(const Fr& scalar) const noexcept
 {
-    const uint256_t converted_scalar(exponent);
+    const uint256_t converted_scalar(scalar);
 
     if (converted_scalar == 0) {
-        element result{ Fq::zero(), Fq::zero(), Fq::zero() };
-        result.self_set_infinity();
-        return result;
+        return element::infinity();
     }
 
-    element work_element(*this);
+    element accumulator(*this);
     const uint64_t maximum_set_bit = converted_scalar.get_msb();
     // This is simpler and doublings of infinity should be fast. We should think if we want to defend against the
     // timing leak here (if used with ECDSA it can sometimes lead to private key compromise)
     for (uint64_t i = maximum_set_bit - 1; i < maximum_set_bit; --i) {
-        work_element.self_dbl();
+        accumulator.self_dbl();
         if (converted_scalar.get_bit(i)) {
-            work_element += *this;
+            accumulator += *this;
         }
     }
-    return work_element;
+    return accumulator;
 }
 
+namespace detail {
+// Represents the result of
+using EndoScalars = std::pair<std::array<uint64_t, 2>, std::array<uint64_t, 2>>;
+
+/**
+ * @brief Handles the WNAF computation for scalars that are split using an endomorphism,
+ * achieved through `split_into_endomorphism_scalars`. It facilitates efficient computation of elliptic curve
+ * point multiplication by optimizing the representation of these scalars.
+ *
+ * @tparam Element The data type of elements in the elliptic curve.
+ * @tparam NUM_ROUNDS The number of computation rounds for WNAF.
+ */
+template <typename Element, std::size_t NUM_ROUNDS> struct EndomorphismWnaf {
+    // NUM_WNAF_BITS: Number of bits per window in the WNAF representation.
+    static constexpr size_t NUM_WNAF_BITS = 4;
+    // table: Stores the WNAF representation of the scalars.
+    std::array<uint64_t, NUM_ROUNDS * 2> table;
+    // skew and endo_skew: Indicate if our original scalar is even or odd.
+    bool skew = false;
+    bool endo_skew = false;
+
+    /**
+     * @param scalars A pair of 128-bit scalars (as two uint64_t arrays), split using an endomorphism.
+     */
+    EndomorphismWnaf(const EndoScalars& scalars)
+    {
+        wnaf::fixed_wnaf(&scalars.first[0], &table[0], skew, 0, 2, NUM_WNAF_BITS);
+        wnaf::fixed_wnaf(&scalars.second[0], &table[1], endo_skew, 0, 2, NUM_WNAF_BITS);
+    }
+};
+
+} // namespace detail
+
 template <class Fq, class Fr, class T>
-element<Fq, Fr, T> element<Fq, Fr, T>::mul_with_endomorphism(const Fr& exponent) const noexcept
+element<Fq, Fr, T> element<Fq, Fr, T>::mul_with_endomorphism(const Fr& scalar) const noexcept
 {
-    const Fr converted_scalar = exponent.from_montgomery_form();
+    // Consider the infinity flag, return infinity if set
+    if (is_point_at_infinity()) {
+        return element::infinity();
+    }
+    constexpr size_t NUM_ROUNDS = 32;
+    const Fr converted_scalar = scalar.from_montgomery_form();
 
     if (converted_scalar.is_zero()) {
-        element result{ Fq::zero(), Fq::zero(), Fq::zero() };
-        result.self_set_infinity();
-        return result;
+        return element::infinity();
     }
+    static constexpr size_t LOOKUP_SIZE = 8;
+    std::array<element, LOOKUP_SIZE> lookup_table;
 
-    constexpr size_t lookup_size = 8;
-    constexpr size_t num_rounds = 32;
-    constexpr size_t num_wnaf_bits = 4;
-    std::array<element, lookup_size> lookup_table;
-
-    element d2 = element(*this);
-    d2.self_dbl();
+    element d2 = dbl();
     lookup_table[0] = element(*this);
-    for (size_t i = 1; i < lookup_size; ++i) {
+    for (size_t i = 1; i < LOOKUP_SIZE; ++i) {
         lookup_table[i] = lookup_table[i - 1] + d2;
     }
 
-    uint64_t wnaf_table[num_rounds * 2];
-    Fr endo_scalar;
-    Fr::split_into_endomorphism_scalars(converted_scalar, endo_scalar, *(Fr*)&endo_scalar.data[2]); // NOLINT
-
-    bool skew = false;
-    bool endo_skew = false;
-
-    wnaf::fixed_wnaf(&endo_scalar.data[0], &wnaf_table[0], skew, 0, 2, num_wnaf_bits);
-    wnaf::fixed_wnaf(&endo_scalar.data[2], &wnaf_table[1], endo_skew, 0, 2, num_wnaf_bits);
-
-    element work_element{ T::one_x, T::one_y, Fq::one() };
-    work_element.self_set_infinity();
-
-    uint64_t wnaf_entry = 0;
-    uint64_t index = 0;
-    bool sign = false;
+    detail::EndoScalars endo_scalars = Fr::split_into_endomorphism_scalars(converted_scalar);
+    detail::EndomorphismWnaf<element, NUM_ROUNDS> wnaf{ endo_scalars };
+    element accumulator{ T::one_x, T::one_y, Fq::one() };
+    accumulator.self_set_infinity();
     Fq beta = Fq::cube_root_of_unity();
 
-    for (size_t i = 0; i < num_rounds * 2; ++i) {
-        wnaf_entry = wnaf_table[i];
-        index = wnaf_entry & 0x0fffffffU;
-        sign = static_cast<bool>((wnaf_entry >> 31) & 1);
+    for (size_t i = 0; i < NUM_ROUNDS * 2; ++i) {
+        uint64_t wnaf_entry = wnaf.table[i];
+        uint64_t index = wnaf_entry & 0x0fffffffU;
+        bool sign = static_cast<bool>((wnaf_entry >> 31) & 1);
         const bool is_odd = ((i & 1) == 1);
         auto to_add = lookup_table[static_cast<size_t>(index)];
         to_add.y.self_conditional_negate(sign ^ is_odd);
         if (is_odd) {
             to_add.x *= beta;
         }
-        work_element += to_add;
+        accumulator += to_add;
 
-        if (i != ((2 * num_rounds) - 1) && is_odd) {
+        if (i != ((2 * NUM_ROUNDS) - 1) && is_odd) {
             for (size_t j = 0; j < 4; ++j) {
-                work_element.self_dbl();
+                accumulator.self_dbl();
             }
         }
     }
 
-    auto temporary = -lookup_table[0];
-    if (skew) {
-        work_element += temporary;
+    if (wnaf.skew) {
+        accumulator += -lookup_table[0];
     }
-
-    temporary = { lookup_table[0].x * beta, lookup_table[0].y, lookup_table[0].z };
-
-    if (endo_skew) {
-        work_element += temporary;
+    if (wnaf.endo_skew) {
+        accumulator += element{ lookup_table[0].x * beta, lookup_table[0].y, lookup_table[0].z };
     }
 
-    return work_element;
+    return accumulator;
 }
 
 /**
@@ -775,18 +790,18 @@ void element<Fq, Fr, T>::batch_affine_add(const std::span<affine_element<Fq, Fr,
 }
 
 /**
- * @brief Multiply each point by the same exponent
+ * @brief Multiply each point by the same scalar
  *
- * @details We use the fact that all points are being multiplied by the same exponent to batch the operations (perform
+ * @details We use the fact that all points are being multiplied by the same scalar to batch the operations (perform
  * batch affine additions and doublings with batch inversion trick)
  *
  * @param points The span of individual points that need to be scaled
- * @param exponent The scalar we multiply all the points by
+ * @param scalar The scalar we multiply all the points by
  * @return std::vector<affine_element<Fq, Fr, T>> Vector of new points where each point is exponent⋅points[i]
  */
 template <class Fq, class Fr, class T>
 std::vector<affine_element<Fq, Fr, T>> element<Fq, Fr, T>::batch_mul_with_endomorphism(
-    const std::span<affine_element<Fq, Fr, T>>& points, const Fr& exponent) noexcept
+    const std::span<affine_element<Fq, Fr, T>>& points, const Fr& scalar) noexcept
 {
     BB_OP_COUNT_TIME();
     typedef affine_element<Fq, Fr, T> affine_element;
@@ -887,7 +902,7 @@ std::vector<affine_element<Fq, Fr, T>> element<Fq, Fr, T>::batch_mul_with_endomo
             /*finite_field_multiplications_per_iteration=*/6);
     };
     // Compute wnaf for scalar
-    const Fr converted_scalar = exponent.from_montgomery_form();
+    const Fr converted_scalar = scalar.from_montgomery_form();
 
     // If the scalar is zero, just set results to the point at infinity
     if (converted_scalar.is_zero()) {
@@ -911,10 +926,9 @@ std::vector<affine_element<Fq, Fr, T>> element<Fq, Fr, T>::batch_mul_with_endomo
         return results;
     }
 
-    constexpr size_t lookup_size = 8;
-    constexpr size_t num_rounds = 32;
-    constexpr size_t num_wnaf_bits = 4;
-    std::array<std::vector<affine_element>, lookup_size> lookup_table;
+    constexpr size_t LOOKUP_SIZE = 8;
+    constexpr size_t NUM_ROUNDS = 32;
+    std::array<std::vector<affine_element>, LOOKUP_SIZE> lookup_table;
     for (auto& table : lookup_table) {
         table.resize(num_points);
     }
@@ -924,8 +938,10 @@ std::vector<affine_element<Fq, Fr, T>> element<Fq, Fr, T>::batch_mul_with_endomo
         num_points,
         [&temp_point_vector, &lookup_table, &points](size_t start, size_t end) {
             for (size_t i = start; i < end; ++i) {
-                temp_point_vector[i] = points[i];
-                lookup_table[0][i] = points[i];
+                // If the point is at infinity we fix-up the result later
+                // To avoid 'trying to invert zero in the field' we set the point to 'one' here
+                temp_point_vector[i] = points[i].is_point_at_infinity() ? affine_element::one() : points[i];
+                lookup_table[0][i] = points[i].is_point_at_infinity() ? affine_element::one() : points[i];
             }
         },
         /*finite_field_additions_per_iteration=*/0,
@@ -938,7 +954,7 @@ std::vector<affine_element<Fq, Fr, T>> element<Fq, Fr, T>::batch_mul_with_endomo
 
     // Construct lookup table
     batch_affine_double(&temp_point_vector[0]);
-    for (size_t j = 1; j < lookup_size; ++j) {
+    for (size_t j = 1; j < LOOKUP_SIZE; ++j) {
         run_loop_in_parallel_if_effective(
             num_points,
             [j, &lookup_table](size_t start, size_t end) {
@@ -956,18 +972,8 @@ std::vector<affine_element<Fq, Fr, T>> element<Fq, Fr, T>::batch_mul_with_endomo
         batch_affine_add_internal(&temp_point_vector[0], &lookup_table[j][0]);
     }
 
-    uint64_t wnaf_table[num_rounds * 2];
-    Fr endo_scalar;
-
-    // Split single scalar into 2 scalar in endo form
-    Fr::split_into_endomorphism_scalars(converted_scalar, endo_scalar, *(Fr*)&endo_scalar.data[2]); // NOLINT
-
-    bool skew = false;
-    bool endo_skew = false;
-
-    // Construct wnaf forms from scalars
-    wnaf::fixed_wnaf(&endo_scalar.data[0], &wnaf_table[0], skew, 0, 2, num_wnaf_bits);
-    wnaf::fixed_wnaf(&endo_scalar.data[2], &wnaf_table[1], endo_skew, 0, 2, num_wnaf_bits);
+    detail::EndoScalars endo_scalars = Fr::split_into_endomorphism_scalars(converted_scalar);
+    detail::EndomorphismWnaf<element, NUM_ROUNDS> wnaf{ endo_scalars };
 
     std::vector<affine_element> work_elements(num_points);
 
@@ -977,7 +983,7 @@ std::vector<affine_element<Fq, Fr, T>> element<Fq, Fr, T>::batch_mul_with_endomo
     bool sign = 0;
     // Prepare elements for the first batch addition
     for (size_t j = 0; j < 2; ++j) {
-        wnaf_entry = wnaf_table[j];
+        wnaf_entry = wnaf.table[j];
         index = wnaf_entry & 0x0fffffffU;
         sign = static_cast<bool>((wnaf_entry >> 31) & 1);
         const bool is_odd = ((j & 1) == 1);
@@ -1010,8 +1016,8 @@ std::vector<affine_element<Fq, Fr, T>> element<Fq, Fr, T>::batch_mul_with_endomo
     // First cycle of addition
     batch_affine_add_internal(&temp_point_vector[0], &work_elements[0]);
     // Run through SM logic in wnaf form (excluding the skew)
-    for (size_t j = 2; j < num_rounds * 2; ++j) {
-        wnaf_entry = wnaf_table[j];
+    for (size_t j = 2; j < NUM_ROUNDS * 2; ++j) {
+        wnaf_entry = wnaf.table[j];
         index = wnaf_entry & 0x0fffffffU;
         sign = static_cast<bool>((wnaf_entry >> 31) & 1);
         const bool is_odd = ((j & 1) == 1);
@@ -1044,7 +1050,7 @@ std::vector<affine_element<Fq, Fr, T>> element<Fq, Fr, T>::batch_mul_with_endomo
     }
 
     // Apply skew for the first endo scalar
-    if (skew) {
+    if (wnaf.skew) {
         run_loop_in_parallel_if_effective(
             num_points,
             [&lookup_table, &temp_point_vector](size_t start, size_t end) {
@@ -1063,12 +1069,11 @@ std::vector<affine_element<Fq, Fr, T>> element<Fq, Fr, T>::batch_mul_with_endomo
         batch_affine_add_internal(&temp_point_vector[0], &work_elements[0]);
     }
     // Apply skew for the second endo scalar
-    if (endo_skew) {
+    if (wnaf.endo_skew) {
         run_loop_in_parallel_if_effective(
             num_points,
             [beta, &lookup_table, &temp_point_vector](size_t start, size_t end) {
                 for (size_t i = start; i < end; ++i) {
-
                     temp_point_vector[i] = lookup_table[0][i];
                     temp_point_vector[i].x *= beta;
                 }
@@ -1082,6 +1087,22 @@ std::vector<affine_element<Fq, Fr, T>> element<Fq, Fr, T>::batch_mul_with_endomo
             /*sequential_copy_ops_per_iteration=*/1);
         batch_affine_add_internal(&temp_point_vector[0], &work_elements[0]);
     }
+    // handle points at infinity explicitly
+    run_loop_in_parallel_if_effective(
+        num_points,
+        [&](size_t start, size_t end) {
+            for (size_t i = start; i < end; ++i) {
+                work_elements[i] =
+                    points[i].is_point_at_infinity() ? work_elements[i].set_infinity() : work_elements[i];
+            }
+        },
+        /*finite_field_additions_per_iteration=*/0,
+        /*finite_field_multiplications_per_iteration=*/1,
+        /*finite_field_inversions_per_iteration=*/0,
+        /*group_element_additions_per_iteration=*/0,
+        /*group_element_doublings_per_iteration=*/0,
+        /*scalar_multiplications_per_iteration=*/0,
+        /*sequential_copy_ops_per_iteration=*/1);
 
     return work_elements;
 }
diff --git a/barretenberg/cpp/src/barretenberg/ecc/groups/wnaf.hpp b/barretenberg/cpp/src/barretenberg/ecc/groups/wnaf.hpp
index e7da2ad084aa..c022809208aa 100644
--- a/barretenberg/cpp/src/barretenberg/ecc/groups/wnaf.hpp
+++ b/barretenberg/cpp/src/barretenberg/ecc/groups/wnaf.hpp
@@ -156,6 +156,19 @@ inline void fixed_wnaf_packed(
     wnaf[0] = ((slice + predicate) >> 1UL) | (point_index);
 }
 
+/**
+ * @brief Performs fixed-window non-adjacent form (WNAF) computation for scalar multiplication.
+ *
+ * WNAF is a method for representing integers which optimizes the number of non-zero terms, which in turn optimizes
+ * the number of point doublings in scalar multiplication, in turn aiding efficiency.
+ *
+ * @param scalar Pointer to 128-bit scalar for which WNAF is to be computed.
+ * @param wnaf Pointer to num_points+1 size array where the computed WNAF will be stored.
+ * @param skew_map Reference to a boolean variable which will be set based on the least significant bit of the scalar.
+ * @param point_index The index of the point being computed in the context of multiple point multiplication.
+ * @param num_points The number of points being computed in parallel.
+ * @param wnaf_bits The number of bits to use in each window of the WNAF representation.
+ */
 inline void fixed_wnaf(const uint64_t* scalar,
                        uint64_t* wnaf,
                        bool& skew_map,