From 1bb446c9ef39cc3284ba2346e1f420d4904e0d81 Mon Sep 17 00:00:00 2001
From: Michael Tautschnig <tautschn@amazon.com>
Date: Mon, 27 Apr 2026 19:12:07 +0000
Subject: [PATCH 1/2] Fix subnormal division precision loss in float_utilst and
 float_bvt

When the dividend of a floating-point division is subnormal, its unpacked
fraction has leading zeros (the hidden bit is 0).  The rounder's
normalization step then left-shifts the quotient by a correspondingly
larger amount to bring the leading 1 to the top of the buffer, which moves
the `have_remainder` sticky bit up into the round position with all-zero
bits below it.  A genuine round-up (round=1, sticky=1) is thereby mistaken
for a tie (round=1, sticky=0) and broken to even, yielding a 1-ULP error.

This only surfaces when the quotient lands back in the normal range and so
needs rounding -- in practice subnormal/subnormal division.  For example
0x1p-149f / 0x3p-149f (mathematically 1/3) returned 0x1.555554p-2f instead
of the correct round-to-nearest-even 0x1.555556p-2f.  A subnormal divided
by a normal yields an (exact) subnormal or zero and does not trigger it.

Fix: widen div_width by spec.f extra bits (the worst-case number of leading
zeros in a subnormal fraction) so the sticky bit has room below the round
position and survives the normalization shift.  The same change is applied
to both float_utilst::div and float_bvt::div.

Add a regression test (regression/cbmc/Float-div-subnormal) covering the
subnormal/subnormal case on both the bit-flattening and the SMT (z3) back
ends; it fails on the unpatched solver and passes with this change.

This widening is paid by every float division: div_width grows from 49 to
72 bits for float and from 105 to 157 for double.  Measured end-to-end with
the default flattening back end:

  - single double division:        ~0.10s -> ~0.13s
  - four chained double divisions:  ~0.39s -> ~6.5s

A follow-up commit reduces this cost by pre-normalizing the (subnormal)
dividend instead of widening the divider unconditionally.

Co-authored-by: Kiro <kiro-agent@users.noreply.github.com>
---
 regression/cbmc/Float-div-subnormal/main.c    | 47 +++++++++++++++++++
 regression/cbmc/Float-div-subnormal/test.desc | 10 ++++
 .../cbmc/Float-div-subnormal/test_smt.desc    | 10 ++++
 src/solvers/floatbv/float_bv.cpp              | 12 ++++-
 src/solvers/floatbv/float_utils.cpp           |  9 +++-
 5 files changed, 86 insertions(+), 2 deletions(-)
 create mode 100644 regression/cbmc/Float-div-subnormal/main.c
 create mode 100644 regression/cbmc/Float-div-subnormal/test.desc
 create mode 100644 regression/cbmc/Float-div-subnormal/test_smt.desc

diff --git a/regression/cbmc/Float-div-subnormal/main.c b/regression/cbmc/Float-div-subnormal/main.c
new file mode 100644
index 00000000000..b33709fb944
--- /dev/null
+++ b/regression/cbmc/Float-div-subnormal/main.c
@@ -0,0 +1,47 @@
+// Test division with a subnormal dividend.
+//
+// Bug: float_utilst and float_bvt used an insufficient division width,
+// causing 1-ULP errors when the unpacked dividend had many leading zeros
+// (which happens for subnormal operands).  After the rounder's
+// normalization shift left-shifts the quotient to place the leading 1 at
+// the top of the buffer, the bits below the round position were all zero,
+// so the `have_remainder` sticky bit was effectively lost: a true
+// round-up (round=1, sticky=1) was mistaken for a tie (round=1, sticky=0)
+// and broken to even.  The error only surfaces when the quotient lands
+// back in the normal range and needs rounding, i.e. for subnormal/subnormal
+// division; a subnormal divided by a normal yields an (exact) subnormal or
+// zero and does not trigger it.
+
+#include <assert.h>
+#include <float.h>
+
+int main()
+{
+  // Smallest positive subnormal divided by three times the smallest
+  // subnormal: mathematically 1/3, whose IEEE-754 round-to-nearest-even
+  // value is 0x1.555556p-2.  The buggy build returned 0x1.555554p-2
+  // (1 ULP low) because the sticky bit was lost.
+  float a, b;
+  __CPROVER_assume(a == 0x1p-149f);
+  __CPROVER_assume(b == 0x3p-149f);
+  float r = a / b;
+  assert(r == 0x1.555556p-2f);
+
+  // A second subnormal/subnormal case, 7/4 = 1.75 exactly.
+  float c, d;
+  __CPROVER_assume(c == 0x7p-149f);
+  __CPROVER_assume(d == 0x4p-149f);
+  float r2 = c / d;
+  assert(r2 == 0x1.cp+0f);
+
+  // Exact subnormal/normal sanity check: FLT_MIN is the smallest positive
+  // normal float; FLT_MIN * 0.5f is subnormal, and dividing it by 2 stays
+  // exactly representable.
+  float e, f;
+  __CPROVER_assume(e == FLT_MIN * 0.5f);
+  __CPROVER_assume(f == 2.0f);
+  float r3 = e / f;
+  assert(r3 == FLT_MIN * 0.25f);
+
+  return 0;
+}
diff --git a/regression/cbmc/Float-div-subnormal/test.desc b/regression/cbmc/Float-div-subnormal/test.desc
new file mode 100644
index 00000000000..07192cbf394
--- /dev/null
+++ b/regression/cbmc/Float-div-subnormal/test.desc
@@ -0,0 +1,10 @@
+CORE no-new-smt
+main.c
+
+^EXIT=0$
+^SIGNAL=0$
+^VERIFICATION SUCCESSFUL$
+--
+^warning: ignoring
+--
+Division with subnormal dividend.
diff --git a/regression/cbmc/Float-div-subnormal/test_smt.desc b/regression/cbmc/Float-div-subnormal/test_smt.desc
new file mode 100644
index 00000000000..7e43bb5601d
--- /dev/null
+++ b/regression/cbmc/Float-div-subnormal/test_smt.desc
@@ -0,0 +1,10 @@
+CORE no-new-smt
+main.c
+--smt2 --z3
+^EXIT=0$
+^SIGNAL=0$
+^VERIFICATION SUCCESSFUL$
+--
+^warning: ignoring
+--
+Division with subnormal dividend (SMT path).
diff --git a/src/solvers/floatbv/float_bv.cpp b/src/solvers/floatbv/float_bv.cpp
index ee657069fe6..c52c5de0fb3 100644
--- a/src/solvers/floatbv/float_bv.cpp
+++ b/src/solvers/floatbv/float_bv.cpp
@@ -757,7 +757,17 @@ exprt float_bvt::div(
 
   std::size_t fraction_width=
     to_unsignedbv_type(unpacked1.fraction.type()).get_width();
-  std::size_t div_width=fraction_width*2+1;
+  // Division width: we need enough bits below the round position so that
+  // `have_remainder` (used as a single sticky bit) survives the left-shift
+  // performed by the rounder's normalization step.  When the unpacked
+  // dividend is subnormal it has up to spec.f leading zeros, so the
+  // normalization shift is correspondingly larger and would otherwise move
+  // the sticky into the round position with all-zero bits below it; the
+  // extra bits give the sticky room to survive.  fraction_width == spec.f + 1
+  // by construction (unpack concatenates the hidden bit onto the fraction),
+  // so derive the extra width from it as a single source of truth.
+  const std::size_t extra_div_bits = fraction_width - 1; // == spec.f
+  std::size_t div_width = fraction_width * 2 + 1 + extra_div_bits;
 
   // pad fraction1 with zeros
   const concatenation_exprt fraction1(
diff --git a/src/solvers/floatbv/float_utils.cpp b/src/solvers/floatbv/float_utils.cpp
index ba72bb09b21..7e649bfbb8e 100644
--- a/src/solvers/floatbv/float_utils.cpp
+++ b/src/solvers/floatbv/float_utils.cpp
@@ -615,7 +615,14 @@ bvt float_utilst::div(const bvt &src1, const bvt &src2)
   const unbiased_floatt unpacked1=unpack(src1);
   const unbiased_floatt unpacked2=unpack(src2);
 
-  std::size_t div_width=unpacked1.fraction.size()*2+1;
+  // Division width: we need enough bits below the round position so that
+  // `have_remainder` (used as a single sticky bit) survives the left-shift
+  // performed by the rounder's normalization step.  When the unpacked
+  // dividend is subnormal it has up to spec.f leading zeros, so the
+  // normalization shift is correspondingly larger and would otherwise move
+  // the sticky into the round position with all-zero bits below it; the
+  // extra spec.f bits give the sticky room to survive.
+  std::size_t div_width = unpacked1.fraction.size() * 2 + 1 + spec.f;
 
   // pad fraction1 with zeros
   bvt fraction1=unpacked1.fraction;

From 7104852cc00aec7a3e9d3decc2e9de8a855db5e0 Mon Sep 17 00:00:00 2001
From: Michael Tautschnig <tautschn@amazon.com>
Date: Thu, 11 Jun 2026 06:38:53 +0000
Subject: [PATCH 2/2] float division: pre-normalize the dividend instead of
 widening the divider

The previous commit fixes the subnormal-dividend precision loss by widening
div_width by spec.f bits.  That widening is paid by every float division,
including normal/normal, and roughly squares the bit-blasted divider's cost
(measured ~17x on a chained double division).

Instead, normalize the dividend before dividing: left-align its fraction and
decrease the exponent by the same amount (value-preserving) using the
existing normalization_shift helper.  A normalized dividend has no leading
zeros, so the original minimal div_width (fraction*2+1) is sufficient and
the quotient keeps full precision.  The exponent arithmetic is widened to
accommodate the more negative exponent of a freshly normalized (formerly
subnormal) dividend.

Applied to both float_utilst::div and float_bvt::div.

The regression test from the previous commit still fails on the unpatched
solver and passes here, on both the flattening and SMT (z3) back ends; the
regression/cbmc/Float* suite (CORE and THOROUGH) passes unchanged.

Performance, measured end-to-end with the default flattening back end
(base = unpatched develop, (a) = previous commit's widening, (b) = this):

  - single double division:        base ~0.10s   (a) ~0.13s   (b) ~0.10s
  - four chained double divisions:  base ~0.39s   (a) ~6.5s    (b) ~3.2s

Pre-normalization removes the regression for isolated divisions and roughly
halves it for chained divisions; the residual cost is the per-division
normalization shifter, which is far cheaper than the wider divider.

Co-authored-by: Kiro <kiro-agent@users.noreply.github.com>
---
 src/solvers/floatbv/float_bv.cpp    | 37 ++++++++++++++++-------------
 src/solvers/floatbv/float_utils.cpp | 33 +++++++++++++++----------
 2 files changed, 40 insertions(+), 30 deletions(-)

diff --git a/src/solvers/floatbv/float_bv.cpp b/src/solvers/floatbv/float_bv.cpp
index c52c5de0fb3..9e057e7846e 100644
--- a/src/solvers/floatbv/float_bv.cpp
+++ b/src/solvers/floatbv/float_bv.cpp
@@ -757,21 +757,24 @@ exprt float_bvt::div(
 
   std::size_t fraction_width=
     to_unsignedbv_type(unpacked1.fraction.type()).get_width();
-  // Division width: we need enough bits below the round position so that
-  // `have_remainder` (used as a single sticky bit) survives the left-shift
-  // performed by the rounder's normalization step.  When the unpacked
-  // dividend is subnormal it has up to spec.f leading zeros, so the
-  // normalization shift is correspondingly larger and would otherwise move
-  // the sticky into the round position with all-zero bits below it; the
-  // extra bits give the sticky room to survive.  fraction_width == spec.f + 1
-  // by construction (unpack concatenates the hidden bit onto the fraction),
-  // so derive the extra width from it as a single source of truth.
-  const std::size_t extra_div_bits = fraction_width - 1; // == spec.f
-  std::size_t div_width = fraction_width * 2 + 1 + extra_div_bits;
+
+  // Pre-normalize the dividend.  A subnormal dividend has leading zeros in
+  // its fraction (the hidden bit is 0); dividing it directly would give a
+  // quotient with too few significant bits, so the `have_remainder` sticky
+  // bit is lost during the rounder's normalization shift and ties round the
+  // wrong way (a 1-ULP error).  Left-aligning the fraction and decreasing the
+  // exponent by the same amount preserves the value but guarantees a
+  // normalized dividend, so the minimal division width suffices and
+  // normal/normal division pays no extra cost.
+  exprt dividend_fraction = unpacked1.fraction;
+  exprt dividend_exponent = unpacked1.exponent;
+  normalization_shift(dividend_fraction, dividend_exponent);
+
+  std::size_t div_width = fraction_width * 2 + 1;
 
   // pad fraction1 with zeros
   const concatenation_exprt fraction1(
-    unpacked1.fraction,
+    dividend_fraction,
     from_integer(0, unsignedbv_typet(div_width - fraction_width)),
     unsignedbv_typet(div_width));
 
@@ -795,12 +798,12 @@ exprt float_bvt::div(
     concatenation_exprt(
       result.fraction, have_remainder, unsignedbv_typet(div_width+1));
 
-  // We will subtract the exponents;
-  // to account for overflow, we add a bit.
-  const typecast_exprt exponent1(
-    unpacked1.exponent, signedbv_typet(spec.e + 1));
+  // We will subtract the exponents; allow two extra bits so the more
+  // negative exponent of a freshly normalized (formerly subnormal) dividend
+  // cannot overflow the subtraction.
+  const typecast_exprt exponent1(dividend_exponent, signedbv_typet(spec.e + 2));
   const typecast_exprt exponent2(
-    unpacked2.exponent, signedbv_typet(spec.e + 1));
+    unpacked2.exponent, signedbv_typet(spec.e + 2));
 
   // subtract exponents
   const minus_exprt added_exponent(exponent1, exponent2);
diff --git a/src/solvers/floatbv/float_utils.cpp b/src/solvers/floatbv/float_utils.cpp
index 7e649bfbb8e..db09b384a27 100644
--- a/src/solvers/floatbv/float_utils.cpp
+++ b/src/solvers/floatbv/float_utils.cpp
@@ -615,17 +615,22 @@ bvt float_utilst::div(const bvt &src1, const bvt &src2)
   const unbiased_floatt unpacked1=unpack(src1);
   const unbiased_floatt unpacked2=unpack(src2);
 
-  // Division width: we need enough bits below the round position so that
-  // `have_remainder` (used as a single sticky bit) survives the left-shift
-  // performed by the rounder's normalization step.  When the unpacked
-  // dividend is subnormal it has up to spec.f leading zeros, so the
-  // normalization shift is correspondingly larger and would otherwise move
-  // the sticky into the round position with all-zero bits below it; the
-  // extra spec.f bits give the sticky room to survive.
-  std::size_t div_width = unpacked1.fraction.size() * 2 + 1 + spec.f;
+  // Pre-normalize the dividend.  A subnormal dividend has leading zeros in
+  // its fraction (the hidden bit is 0); dividing it directly would give a
+  // quotient with too few significant bits, so the `have_remainder` sticky
+  // bit is lost during the rounder's normalization shift and ties round the
+  // wrong way (a 1-ULP error).  Left-aligning the fraction and decreasing the
+  // exponent by the same amount preserves the value but guarantees a
+  // normalized dividend, so the minimal division width suffices and
+  // normal/normal division pays no extra cost.
+  bvt dividend_fraction = unpacked1.fraction;
+  bvt dividend_exponent = unpacked1.exponent;
+  normalization_shift(dividend_fraction, dividend_exponent);
+
+  std::size_t div_width = dividend_fraction.size() * 2 + 1;
 
   // pad fraction1 with zeros
-  bvt fraction1=unpacked1.fraction;
+  bvt fraction1 = dividend_fraction;
   fraction1.reserve(div_width);
   while(fraction1.size()<div_width)
     fraction1.insert(fraction1.begin(), const_literal(false));
@@ -650,10 +655,12 @@ bvt float_utilst::div(const bvt &src1, const bvt &src2)
   // We will subtract the exponents;
   // to account for overflow, we add a bit.
   // we add a second bit for the adjust by extra fraction bits
-  const bvt exponent1=
-    bv_utils.sign_extension(unpacked1.exponent, unpacked1.exponent.size()+2);
-  const bvt exponent2=
-    bv_utils.sign_extension(unpacked2.exponent, unpacked2.exponent.size()+2);
+  // The dividend exponent may have grown during normalization; extend both
+  // exponents to a common width.
+  const std::size_t exp_width =
+    std::max(dividend_exponent.size(), unpacked2.exponent.size()) + 2;
+  const bvt exponent1 = bv_utils.sign_extension(dividend_exponent, exp_width);
+  const bvt exponent2 = bv_utils.sign_extension(unpacked2.exponent, exp_width);
 
   // subtract exponents
   bvt added_exponent=bv_utils.sub(exponent1, exponent2);