Skip to content

Commit 8ae9de6

Browse files
bytegrrrlheinezen
authored andcommitted
Add pure FixedPoint sqrt implementation and tests
1 parent f227563 commit 8ae9de6

2 files changed

Lines changed: 69 additions & 1 deletion

File tree

libopenage/util/fixed_point.h

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
#pragma once
44

55
#include <algorithm>
6+
#include <bit>
67
#include <climits>
78
#include <cmath>
89
#include <iomanip>
@@ -446,8 +447,37 @@ class FixedPoint {
446447
return is;
447448
}
448449

450+
/**
451+
* Pure FixedPoint sqrt implementation using Heron's Algorithm.
452+
*
453+
* Note that this function is undefined for negative values.
454+
*/
449455
constexpr double sqrt() {
450-
return std::sqrt(this->to_double());
456+
// Zero can cause issues later, so deal with now.
457+
if (this->raw_value == 0) {
458+
return 0.0;
459+
}
460+
461+
// A greater shift = more precision, but can overflow the intermediate type if too large.
462+
size_t max_shift = std::countl_zero((unsigned_intermediate_type)this->raw_value) - 1;
463+
size_t shift = max_shift > fractional_bits ? fractional_bits : max_shift;
464+
shift &= ~1;
465+
466+
// We can't use the safe shift since the shift value is unknown at compile time.
467+
intermediate_type n = (intermediate_type)this->raw_value << shift;
468+
intermediate_type guess = (intermediate_type)1 << fractional_bits;
469+
470+
for (size_t _i = 0; _i < fractional_bits; _i++) {
471+
intermediate_type prev = guess;
472+
guess = (guess + n / guess) / 2;
473+
if (guess == prev)
474+
break;
475+
}
476+
477+
// The sqrt operation halves the number of bits, so we'll we'll have to calculate a shift back
478+
size_t unshift = fractional_bits - (shift + fractional_bits) / 2;
479+
480+
return from_raw_value(guess << unshift).to_double();
451481
}
452482

453483
constexpr double atan2(const FixedPoint &n) {

libopenage/util/fixed_point_test.cpp

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,44 @@ void fixed_point() {
156156
TESTEQUALS_FLOAT((c/b).to_double(), -4.75/3.5, 0.1);
157157
}
158158

159+
// Pure FixedPoint sqrt tests
160+
{
161+
using T = FixedPoint<int64_t, 32, int64_t>;
162+
TESTEQUALS_FLOAT(T(41231.131).sqrt(), 203.0545025356, 1e-7);
163+
TESTEQUALS_FLOAT(T(547965.116).sqrt(), 740.2466588915, 1e-7);
164+
165+
TESTEQUALS_FLOAT(T(2).sqrt(), T::sqrt_2(), 1e-9);
166+
TESTEQUALS_FLOAT(2 / T::pi().sqrt(), T::inv2_sqrt_pi(), 1e-9);
167+
168+
// Powers of two (anything over 2^15 will overflow (2^16)^2 = 2^32 >).
169+
for (size_t i = 0; i < 15; i++) {
170+
int64_t value = 1 << i;
171+
TESTEQUALS_FLOAT(T(value * value).sqrt(), value, 1e-7);
172+
}
173+
174+
for (size_t i = 0; i < 100; i++) {
175+
double value = 14.25 * i;
176+
TESTEQUALS_FLOAT(T(value * value).sqrt(), value, 1e-7);
177+
}
178+
179+
// This one can go up to 2^63, but that would take years.
180+
for (uint32_t i = 0; i < (1u << 16); i++) {
181+
T value = T::from_raw_value(i * i);
182+
TESTEQUALS_FLOAT(value.sqrt(), std::sqrt(value.to_double()), 1e-7);
183+
}
184+
185+
// We lose some precision when raw_type == intermediate_type
186+
for (uint64_t i = 1; i < (1ul << 63); i = (i << 1) ^ i) {
187+
T value = T::from_raw_value(i * i);
188+
TESTEQUALS_FLOAT(value.sqrt(), std::sqrt(value.to_double()), 1e-4);
189+
}
190+
191+
using FP16_16 = FixedPoint<uint32_t, 16, uint64_t>;
192+
for (uint32_t i = 0; i < (1u << 16); i++) {
193+
FP16_16 value = FP16_16::from_raw_value(i);
194+
TESTEQUALS_FLOAT(value.sqrt(), std::sqrt(value.to_double()), 1e-4);
195+
}
196+
}
159197
}
160198

161199
}}} // openage::util::tests

0 commit comments

Comments
 (0)