Skip to content

Commit 49f1562

Browse files
committed
Optimize memory usage of norm candidates
This uses a sync.Pool and reference counting on the Candidates which is complicated but necessary in the highly concurret code.
1 parent 00c771d commit 49f1562

5 files changed

Lines changed: 400 additions & 116 deletions

File tree

arithmetic.go

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
package main
2+
3+
import "math/bits"
4+
5+
// Low level arithmetic routines for 128 bit numbers
6+
7+
// Mul128full multiplies two 128-bit values (a1<<64|a0) and (b1<<64|b0),
8+
// returning the 256-bit product as little-endian words r0…r3.
9+
func Mul128full(a1, a0, b1, b0 uint64) (r3, r2, r1, r0 uint64) {
10+
r1, r0 = bits.Mul64(a0, b0)
11+
r2a, r1a := bits.Mul64(a0, b1)
12+
r2b, r1b := bits.Mul64(a1, b0)
13+
r3, r2 = bits.Mul64(a1, b1)
14+
15+
r1, c := bits.Add64(r1, r1a, 0)
16+
r2, c = bits.Add64(r2, r2a, c)
17+
r3, _ = bits.Add64(r3, 0, c)
18+
19+
r1, c = bits.Add64(r1, r1b, 0)
20+
r2, c = bits.Add64(r2, r2b, c)
21+
r3, _ = bits.Add64(r3, 0, c)
22+
23+
return
24+
}
25+
26+
// Mul128 takes two 128-bit numbers, represented by their high (a1, b1) and
27+
// low (a0, b0) 64-bit parts, and multiplies them.
28+
// It returns the 128-bit result as its high (r1) and low (r0) parts,
29+
// and a boolean indicating if an overflow occurred (result > 2^128 - 1).
30+
// If overflow is true, r1 and r0 will be zero.
31+
func Mul128(a1, a0, b1, b0 uint64) (r1, r0 uint64, overflow bool) {
32+
33+
// Case 1: If both high parts (a1, b1) are non-zero, it must overflow.
34+
if a1 != 0 && b1 != 0 {
35+
return 0, 0, true // 0 Mul64 calls
36+
}
37+
38+
// Calculate a0 * b0. This gives the low 64 bits (ll_l) and the
39+
// start of the high 64 bits (ll_h).
40+
ll_h, ll_l := bits.Mul64(a0, b0) // 1st Mul64
41+
42+
var mid_h, mid_l uint64
43+
44+
// Case 2: a1 is non-zero (so b1 must be 0). Calculate a1 * b0.
45+
if a1 != 0 {
46+
mid_h, mid_l = bits.Mul64(a1, b0) // 2nd Mul64
47+
} else if b1 != 0 {
48+
// Case 3: b1 is non-zero (so a1 must be 0). Calculate a0 * b1.
49+
mid_h, mid_l = bits.Mul64(a0, b1) // 2nd Mul64
50+
} else {
51+
// Case 4: Both a1 and b1 are 0. The result is simply a0 * b0.
52+
// No overflow possible. mid_h and mid_l remain 0.
53+
return ll_h, ll_l, false // 1 Mul64 call total
54+
}
55+
56+
// Check if the middle multiplication overflowed (mid_h != 0).
57+
// This would mean the result is >= 2^128.
58+
if mid_h != 0 {
59+
return 0, 0, true
60+
}
61+
62+
// Add the high part of a0*b0 (ll_h) to the low part of the
63+
// middle multiplication (mid_l). This forms the high part (r1)
64+
// of the final result.
65+
r1, carry := bits.Add64(ll_h, mid_l, 0)
66+
67+
// If the addition produced a carry, it means the result overflowed.
68+
if carry != 0 {
69+
return 0, 0, true
70+
}
71+
72+
// No overflow. Assign the low part (r0) and return.
73+
r0 = ll_l
74+
return r1, r0, false // 2 Mul64 calls total
75+
}
76+
77+
// Mul128x64 multiplies one 128-bit value (a1<<64|a0) and one 64 bit value b
78+
// returning the 192-bit product as little-endian words r0…r2.
79+
func Mul128x64(a1, a0, b uint64) (r2, r1, r0 uint64) {
80+
r1, r0 = bits.Mul64(a0, b)
81+
r2, r1a := bits.Mul64(a1, b)
82+
83+
r1, c := bits.Add64(r1, r1a, 0)
84+
r2, _ = bits.Add64(r2, 0, c)
85+
86+
return
87+
}

arithmetic_test.go

Lines changed: 156 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,156 @@
1+
package main
2+
3+
import (
4+
"math/big"
5+
"math/rand/v2"
6+
"testing"
7+
)
8+
9+
// helper to combine hi/lo into big.Int
10+
func u128(hi, lo uint64) *big.Int {
11+
i := new(big.Int).Lsh(new(big.Int).SetUint64(hi), 64)
12+
return i.Or(i, new(big.Int).SetUint64(lo))
13+
}
14+
15+
func TestMul128full(t *testing.T) {
16+
cases := []struct {
17+
a1, a0, b1, b0 uint64
18+
exp *big.Int
19+
}{
20+
{0, 0, 0, 0, big.NewInt(0)},
21+
{0, 1, 0, 2, big.NewInt(2)},
22+
{0, 0xFFFFFFFFFFFFFFFF, 0, 0xFFFFFFFFFFFFFFFF, new(big.Int).Sub(new(big.Int).Lsh(big.NewInt(1), 128), big.NewInt(1))},
23+
{1, 0, 0, 1, new(big.Int).Lsh(big.NewInt(1), 64)},
24+
{1, 1, 1, 1, new(big.Int).Mul(u128(1, 1), u128(1, 1))},
25+
}
26+
for _, c := range cases {
27+
r3, r2, r1, r0 := Mul128full(c.a1, c.a0, c.b1, c.b0)
28+
prod := new(big.Int).Mul(u128(c.a1, c.a0), u128(c.b1, c.b0))
29+
// extract words
30+
mask := new(big.Int).Sub(new(big.Int).Lsh(big.NewInt(1), 64), big.NewInt(1))
31+
e0 := new(big.Int).And(prod, mask).Uint64()
32+
e1 := new(big.Int).And(new(big.Int).Rsh(prod, 64), mask).Uint64()
33+
e2 := new(big.Int).And(new(big.Int).Rsh(prod, 128), mask).Uint64()
34+
e3 := new(big.Int).And(new(big.Int).Rsh(prod, 192), mask).Uint64()
35+
if e0 != r0 || e1 != r1 || e2 != r2 || e3 != r3 {
36+
t.Errorf("Mul128full(%#x,%#x,%#x,%#x) = (%#x,%#x,%#x,%#x), want (%#x,%#x,%#x,%#x)",
37+
c.a1, c.a0, c.b1, c.b0, r3, r2, r1, r0, e3, e2, e1, e0)
38+
}
39+
}
40+
}
41+
42+
func TestMul128(t *testing.T) {
43+
max128 := new(big.Int).Sub(new(big.Int).Lsh(big.NewInt(1), 128), big.NewInt(1))
44+
cases := []struct {
45+
a1, a0, b1, b0 uint64
46+
overflow bool
47+
}{
48+
{0, 0, 0, 0, false},
49+
{0, 1, 0, 2, false},
50+
{0, 0xFFFFFFFFFFFFFFFF, 0, 0xFFFFFFFFFFFFFFFF, false},
51+
{1, 0, 1, 0, true},
52+
}
53+
for _, c := range cases {
54+
r1, r0, ov := Mul128(c.a1, c.a0, c.b1, c.b0)
55+
expVal := new(big.Int).Mul(u128(c.a1, c.a0), u128(c.b1, c.b0))
56+
if expVal.Cmp(max128) > 0 {
57+
if !ov {
58+
t.Errorf("Mul128 expected overflow for inputs (%#x,%#x,%#x,%#x)", c.a1, c.a0, c.b1, c.b0)
59+
}
60+
continue
61+
}
62+
if ov {
63+
t.Errorf("Unexpected overflow for inputs (%#x,%#x,%#x,%#x)", c.a1, c.a0, c.b1, c.b0)
64+
continue
65+
}
66+
if u128(r1, r0).Cmp(expVal) != 0 {
67+
t.Errorf("Mul128(%#x,%#x,%#x,%#x) = (%#x,%#x), want %v", c.a1, c.a0, c.b1, c.b0, r1, r0, expVal)
68+
}
69+
}
70+
}
71+
72+
func TestMul128x64(t *testing.T) {
73+
cases := []struct {
74+
a1, a0, b uint64
75+
exp *big.Int
76+
}{
77+
{0, 0, 0, big.NewInt(0)},
78+
{0, 1, 2, big.NewInt(2)},
79+
{1, 0, 1, new(big.Int).Lsh(big.NewInt(1), 64)},
80+
{0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF,
81+
new(big.Int).Mul(u128(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF), new(big.Int).SetUint64(0xFFFFFFFFFFFFFFFF))},
82+
}
83+
for _, c := range cases {
84+
r2, r1, r0 := Mul128x64(c.a1, c.a0, c.b)
85+
prod := new(big.Int).Mul(u128(c.a1, c.a0), new(big.Int).SetUint64(c.b))
86+
mask := new(big.Int).Sub(new(big.Int).Lsh(big.NewInt(1), 64), big.NewInt(1))
87+
e0 := new(big.Int).And(prod, mask).Uint64()
88+
e1 := new(big.Int).And(new(big.Int).Rsh(prod, 64), mask).Uint64()
89+
e2 := new(big.Int).And(new(big.Int).Rsh(prod, 128), mask).Uint64()
90+
if e0 != r0 || e1 != r1 || e2 != r2 {
91+
t.Errorf("Mul128x64(%#x,%#x,%#x) = (%#x,%#x,%#x), want (%#x,%#x,%#x)",
92+
c.a1, c.a0, c.b, r2, r1, r0, e2, e1, e0)
93+
}
94+
}
95+
}
96+
97+
// Randomized tests
98+
func TestMul128fullRandom(t *testing.T) {
99+
rng := rand.New(rand.NewPCG(42, 43))
100+
mask := new(big.Int).Sub(new(big.Int).Lsh(big.NewInt(1), 64), big.NewInt(1))
101+
for i := 0; i < 100_000; i++ {
102+
a1, a0, b1, b0 := rng.Uint64(), rng.Uint64(), rng.Uint64(), rng.Uint64()
103+
exp := new(big.Int).Mul(u128(a1, a0), u128(b1, b0))
104+
r3, r2, r1, r0 := Mul128full(a1, a0, b1, b0)
105+
e0 := new(big.Int).And(exp, mask).Uint64()
106+
e1 := new(big.Int).And(new(big.Int).Rsh(exp, 64), mask).Uint64()
107+
e2 := new(big.Int).And(new(big.Int).Rsh(exp, 128), mask).Uint64()
108+
e3 := new(big.Int).And(new(big.Int).Rsh(exp, 192), mask).Uint64()
109+
if e0 != r0 || e1 != r1 || e2 != r2 || e3 != r3 {
110+
t.Errorf("Random Mul128full mismatch at iteration %d", i)
111+
return
112+
}
113+
}
114+
}
115+
116+
func TestMul128Random(t *testing.T) {
117+
rng := rand.New(rand.NewPCG(42, 43))
118+
max128 := new(big.Int).Sub(new(big.Int).Lsh(big.NewInt(1), 128), big.NewInt(1))
119+
for i := 0; i < 100_000; i++ {
120+
a1, a0, b1, b0 := rng.Uint64(), rng.Uint64(), rng.Uint64(), rng.Uint64()
121+
exp := new(big.Int).Mul(u128(a1, a0), u128(b1, b0))
122+
r1, r0, ov := Mul128(a1, a0, b1, b0)
123+
if exp.Cmp(max128) > 0 {
124+
if !ov {
125+
t.Errorf("Random Mul128 expected overflow at iteration %d", i)
126+
return
127+
}
128+
} else {
129+
if ov {
130+
t.Errorf("Random Mul128 unexpected overflow at iteration %d", i)
131+
return
132+
}
133+
if u128(r1, r0).Cmp(exp) != 0 {
134+
t.Errorf("Random Mul128 mismatch at iteration %d", i)
135+
return
136+
}
137+
}
138+
}
139+
}
140+
141+
func TestMul128x64Random(t *testing.T) {
142+
rng := rand.New(rand.NewPCG(42, 43))
143+
mask := new(big.Int).Sub(new(big.Int).Lsh(big.NewInt(1), 64), big.NewInt(1))
144+
for i := 0; i < 100_000; i++ {
145+
a1, a0, b := rng.Uint64(), rng.Uint64(), rng.Uint64()
146+
exp := new(big.Int).Mul(u128(a1, a0), new(big.Int).SetUint64(b))
147+
r2, r1, r0 := Mul128x64(a1, a0, b)
148+
e0 := new(big.Int).And(exp, mask).Uint64()
149+
e1 := new(big.Int).And(new(big.Int).Rsh(exp, 64), mask).Uint64()
150+
e2 := new(big.Int).And(new(big.Int).Rsh(exp, 128), mask).Uint64()
151+
if e0 != r0 || e1 != r1 || e2 != r2 {
152+
t.Errorf("Random Mul128x64 mismatch at iteration %d", i)
153+
return
154+
}
155+
}
156+
}

find_arctan_formulae.go

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ var (
4747
maxGroupLength int // only find groups with this many members, max
4848
arctanCacheSize int // cache this many arctan values
4949
groupCacheSize int // cache this many group values
50-
normCacheSize int // cache this many norm values
50+
normCacheSize int // cache this many candidate norm lists
5151
pslqMaxFails int // stop PSLQ with this many failures in a row
5252
pslqMaxNoNew int // stop PSLQ if no new formulae for this long
5353
pslqTrimStart bool // if set, trim PSLQ norms from the start (larger fractions)
@@ -101,7 +101,7 @@ func init() {
101101
flag.IntVar(&maxGroupLength, "max-group-length", 10, "Maximum number of members for prime groups")
102102
flag.IntVar(&arctanCacheSize, "arctan-cache-size", 10_000, "Cache this many arctan values")
103103
flag.IntVar(&groupCacheSize, "group-cache-size", 10_000, "Cache this many group values")
104-
flag.IntVar(&normCacheSize, "norm-cache-size", 5*runtime.NumCPU(), "Cache this many norm values")
104+
flag.IntVar(&normCacheSize, "norm-cache-size", 10*runtime.NumCPU(), "Cache this many candidate norm lists")
105105
flag.IntVar(&pslqMaxFails, "max-fails", 1000, "Stop PSLQ with this many failures in a row")
106106
flag.IntVar(&pslqMaxNoNew, "max-no-new", 10000, "Stop PSLQ if no new results for this many iterations")
107107
flag.BoolVar(&pslqTrimStart, "trim-start", false, "If set, trim PSLQ list from start if too big")
@@ -819,10 +819,7 @@ func main() {
819819
log.Fatal(err)
820820
}
821821

822-
normCache, err = lru.New[string, []Candidate](normCacheSize)
823-
if err != nil {
824-
log.Fatal(err)
825-
}
822+
initCandidatesCache(normCacheSize)
826823

827824
// Initialise PSLQ engine from command line arguments
828825

0 commit comments

Comments
 (0)