From 0a8ce5b45e21fa88e4e9832bc6ada38b3ba68eeb Mon Sep 17 00:00:00 2001
From: Adam Getchell <acgetchell@ucdavis.edu>
Date: Fri, 10 Apr 2026 16:42:55 -0700
Subject: [PATCH] =?UTF-8?q?perf:=20custom=20f64=20=E2=86=92=20BigRational?=
 =?UTF-8?q?=20via=20IEEE=20754=20bit=20decomposition=20(#63)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Replace `BigRational::from_float(x)` in `f64_to_bigrational` with manual
IEEE 754 binary64 bit decomposition and `BigRational::new_raw`, bypassing
the unnecessary GCD normalization that `from_float` performs internally.

- Decompose f64 into sign, biased exponent, and significand fields
- Strip trailing zeros from the significand so the fraction is already
  in lowest terms (odd numerator over power-of-two denominator)
- Handle zero (±0.0), subnormals, and normal values; panic on NaN/Inf
- Add 15 dedicated unit tests: ±zero, integers, fractions, powers of
  two, subnormals, round-trip fidelity, lowest-terms, and panic cases
- Add IEEE 754-2019 [9] and Goldberg [10] references to REFERENCES.md
- Add module-level and function-level doc citations
---
 REFERENCES.md |  22 ++++++
 src/exact.rs  | 190 ++++++++++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 208 insertions(+), 4 deletions(-)
diff --git a/REFERENCES.md b/REFERENCES.md
index 992a51f..31ef24d 100644
--- a/REFERENCES.md
+++ b/REFERENCES.md
@@ -70,3 +70,25 @@ elimination [7] in `BigRational`. See `src/exact.rs` for the full architecture d
    [DOI](https://doi.org/10.1007/PL00009321) ·
    [PDF](https://people.eecs.berkeley.edu/~jrs/papers/robustr.pdf)
    Also: Technical Report CMU-CS-96-140, Carnegie Mellon University, May 1996.
+
+### f64 → BigRational conversion (`f64_to_bigrational`)
+
+`f64_to_bigrational` converts an f64 to an exact `BigRational` by decomposing the IEEE 754
+binary64 bit representation into its sign, exponent, and significand fields.  Because every
+finite f64 is exactly `±m × 2^e` (where `m` is an integer), the rational can be constructed
+directly via `BigRational::new_raw` without GCD normalization — trailing zeros in the
+significand are stripped first so the fraction is already in lowest terms.
+
+See references [9-10] below.
+
+9. IEEE Computer Society. "IEEE Standard for Floating-Point Arithmetic." *IEEE Std 754-2019*
+   (Revision of IEEE 754-2008), 2019.
+   [DOI](https://doi.org/10.1109/IEEESTD.2019.8766229)
+   Section 3.4 (binary64 format): 1 sign bit, 11 exponent bits (bias 1023), 52 trailing
+   significand bits; subnormals have biased exponent 0 with implicit leading 0.
+10. Goldberg, David. "What Every Computer Scientist Should Know About Floating-Point
+    Arithmetic." *ACM Computing Surveys* 23.1 (1991): 5–48.
+    [DOI](https://doi.org/10.1145/103162.103163) ·
+    [PDF](https://www.validlab.com/goldberg/paper.pdf)
+    Comprehensive survey of IEEE 754 representation, rounding, and exact rational
+    reconstruction of floating-point values.
diff --git a/src/exact.rs b/src/exact.rs
index ce164db..4b5944b 100644
--- a/src/exact.rs
+++ b/src/exact.rs
@@ -27,6 +27,15 @@
 //! Since all arithmetic is exact, any non-zero pivot gives the correct result
 //! (there is no numerical stability concern).  Every finite `f64` is exactly
 //! representable as a rational, so the result is provably correct.
+//!
+//! ## f64 → `BigRational` conversion
+//!
+//! All entry conversions use `f64_to_bigrational`, which decomposes the
+//! IEEE 754 binary64 bit representation (\[9\]) into sign, exponent, and
+//! significand and constructs a `BigRational` directly — avoiding the GCD
+//! normalization that `BigRational::from_float` performs.  See Goldberg
+//! \[10\] for background on floating-point representation and exact
+//! rational reconstruction.  Reference numbers refer to `REFERENCES.md`.
 
 use num_bigint::{BigInt, Sign};
 use num_rational::BigRational;
@@ -67,15 +76,62 @@ fn validate_finite_vec<const D: usize>(v: &Vector<D>) -> Result<(), LaError> {
     Ok(())
 }
 
-/// Convert an `f64` to an exact `BigRational`.
+/// Convert an `f64` to an exact `BigRational` via IEEE 754 bit decomposition.
 ///
-/// Every finite `f64` is exactly representable as a rational number (`m × 2^e`),
-/// so this conversion is lossless.
+/// Every finite `f64` is exactly representable as `±m × 2^e` where `m` is a
+/// non-negative integer and `e` is an integer.  This function extracts `(m, e)`
+/// directly from the IEEE 754 binary64 bit layout \[9\], strips trailing zeros
+/// from `m` so the resulting fraction is already in lowest terms, then
+/// constructs a `BigRational` via `new_raw` — bypassing the GCD reduction
+/// that `BigRational::from_float` performs internally.
+///
+/// See `REFERENCES.md` \[9-10\] for the IEEE 754 standard and Goldberg's
+/// survey of floating-point representation.
 ///
 /// # Panics
 /// Panics if `x` is NaN or infinite.
 fn f64_to_bigrational(x: f64) -> BigRational {
-    BigRational::from_float(x).expect("non-finite matrix entry in exact determinant")
+    let bits = x.to_bits();
+    let biased_exp = ((bits >> 52) & 0x7FF) as i32;
+    let fraction = bits & 0x000F_FFFF_FFFF_FFFF;
+
+    // ±0.0
+    if biased_exp == 0 && fraction == 0 {
+        return BigRational::from_integer(BigInt::from(0));
+    }
+
+    // NaN / Inf — callers must validate finiteness before reaching here.
+    assert!(biased_exp != 0x7FF, "non-finite f64 in exact conversion");
+
+    let (mantissa, raw_exp) = if biased_exp == 0 {
+        // Subnormal: (-1)^s × 0.fraction × 2^(-1022)
+        //          = (-1)^s × fraction × 2^(-1074)
+        (fraction, -1074_i32)
+    } else {
+        // Normal: (-1)^s × 1.fraction × 2^(biased_exp - 1023)
+        //       = (-1)^s × (2^52 | fraction) × 2^(biased_exp - 1075)
+        ((1u64 << 52) | fraction, biased_exp - 1075)
+    };
+
+    // Strip trailing zeros so the fraction is already in lowest terms:
+    // after stripping, mantissa is odd and the denominator (if any) is a
+    // power of 2, so gcd(mantissa, denom) = 1.
+    let tz = mantissa.trailing_zeros();
+    let mantissa = mantissa >> tz;
+    let exponent = raw_exp + tz.cast_signed();
+
+    let is_negative = bits >> 63 != 0;
+    let numer = if is_negative {
+        -BigInt::from(mantissa)
+    } else {
+        BigInt::from(mantissa)
+    };
+
+    if exponent >= 0 {
+        BigRational::new_raw(numer << exponent.cast_unsigned(), BigInt::from(1u32))
+    } else {
+        BigRational::new_raw(numer, BigInt::from(1u32) << (-exponent).cast_unsigned())
+    }
 }
 
 /// Compute the exact determinant of a `D×D` matrix using the Bareiss algorithm
@@ -1158,6 +1214,132 @@ mod tests {
         assert_eq!(gauss_solve(&a, &b), Err(LaError::Singular { pivot_col: 1 }));
     }
 
+    // -----------------------------------------------------------------------
+    // f64_to_bigrational tests
+    // -----------------------------------------------------------------------
+
+    #[test]
+    fn f64_to_bigrational_positive_zero() {
+        let r = f64_to_bigrational(0.0);
+        assert_eq!(r, BigRational::from_integer(BigInt::from(0)));
+    }
+
+    #[test]
+    fn f64_to_bigrational_negative_zero() {
+        let r = f64_to_bigrational(-0.0);
+        assert_eq!(r, BigRational::from_integer(BigInt::from(0)));
+    }
+
+    #[test]
+    fn f64_to_bigrational_one() {
+        let r = f64_to_bigrational(1.0);
+        assert_eq!(r, BigRational::from_integer(BigInt::from(1)));
+    }
+
+    #[test]
+    fn f64_to_bigrational_negative_one() {
+        let r = f64_to_bigrational(-1.0);
+        assert_eq!(r, BigRational::from_integer(BigInt::from(-1)));
+    }
+
+    #[test]
+    fn f64_to_bigrational_half() {
+        let r = f64_to_bigrational(0.5);
+        assert_eq!(r, BigRational::new(BigInt::from(1), BigInt::from(2)));
+    }
+
+    #[test]
+    fn f64_to_bigrational_quarter() {
+        let r = f64_to_bigrational(0.25);
+        assert_eq!(r, BigRational::new(BigInt::from(1), BigInt::from(4)));
+    }
+
+    #[test]
+    fn f64_to_bigrational_negative_three_and_a_half() {
+        // -3.5 = -7/2
+        let r = f64_to_bigrational(-3.5);
+        assert_eq!(r, BigRational::new(BigInt::from(-7), BigInt::from(2)));
+    }
+
+    #[test]
+    fn f64_to_bigrational_integer() {
+        let r = f64_to_bigrational(42.0);
+        assert_eq!(r, BigRational::from_integer(BigInt::from(42)));
+    }
+
+    #[test]
+    fn f64_to_bigrational_power_of_two() {
+        let r = f64_to_bigrational(1024.0);
+        assert_eq!(r, BigRational::from_integer(BigInt::from(1024)));
+    }
+
+    #[test]
+    fn f64_to_bigrational_subnormal() {
+        let tiny = 5e-324_f64; // smallest positive subnormal
+        assert!(tiny.is_subnormal());
+        let r = f64_to_bigrational(tiny);
+        // 5e-324 = 1 × 2^(-1074)
+        assert_eq!(
+            r,
+            BigRational::new(BigInt::from(1), BigInt::from(1u32) << 1074u32)
+        );
+    }
+
+    #[test]
+    fn f64_to_bigrational_already_lowest_terms() {
+        // 0.5 should produce numer=1, denom=2 (already reduced).
+        let r = f64_to_bigrational(0.5);
+        assert_eq!(*r.numer(), BigInt::from(1));
+        assert_eq!(*r.denom(), BigInt::from(2));
+    }
+
+    #[test]
+    fn f64_to_bigrational_round_trip() {
+        // -0.0 is excluded: it maps to BigRational(0) which round-trips
+        // to +0.0 (correct; tested separately in f64_to_bigrational_negative_zero).
+        let values = [
+            0.0,
+            1.0,
+            -1.0,
+            0.5,
+            0.25,
+            0.1,
+            42.0,
+            -3.5,
+            1e10,
+            1e-10,
+            f64::MAX / 2.0,
+            f64::MIN_POSITIVE,
+            5e-324,
+        ];
+        for &v in &values {
+            let r = f64_to_bigrational(v);
+            let back = r.to_f64().expect("round-trip to_f64 failed");
+            assert!(
+                v.to_bits() == back.to_bits(),
+                "round-trip failed for {v}: got {back}"
+            );
+        }
+    }
+
+    #[test]
+    #[should_panic(expected = "non-finite f64 in exact conversion")]
+    fn f64_to_bigrational_panics_on_nan() {
+        f64_to_bigrational(f64::NAN);
+    }
+
+    #[test]
+    #[should_panic(expected = "non-finite f64 in exact conversion")]
+    fn f64_to_bigrational_panics_on_inf() {
+        f64_to_bigrational(f64::INFINITY);
+    }
+
+    #[test]
+    #[should_panic(expected = "non-finite f64 in exact conversion")]
+    fn f64_to_bigrational_panics_on_neg_inf() {
+        f64_to_bigrational(f64::NEG_INFINITY);
+    }
+
     // -----------------------------------------------------------------------
     // validate_finite_vec tests
     // -----------------------------------------------------------------------