Skip to content

Commit 0244baf

Browse files
committed
perf: use plain maps in giga cachekv store
1 parent ed499eb commit 0244baf

File tree

2 files changed

+130
-33
lines changed

2 files changed

+130
-33
lines changed

.agents/skills/optimize.md

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
---
2+
name: optimize
3+
description: Run a profiling-driven optimization loop for a specific function
4+
argument-hint: "<function-name> e.g. executeEVMTxWithGigaExecutor"
5+
allowed-tools:
6+
- Read
7+
- Write
8+
- Edit
9+
- Glob
10+
- Grep
11+
- Bash
12+
- Task
13+
- AskUserQuestion
14+
---
15+
16+
# Optimization Loop for: $ARGUMENTS
17+
18+
You are running a profiling-driven optimization loop focused on the function `$ARGUMENTS`.
19+
20+
## References
21+
22+
Read `benchmark/CLAUDE.md` for benchmark commands, environment variables, profiling, and the full optimization loop steps.
23+
24+
## Workflow
25+
26+
Execute the optimization loop from benchmark/CLAUDE.md section "Optimization loop", but focused on `$ARGUMENTS`:
27+
28+
### Phase 1: Understand the target function
29+
30+
1. Find the function `$ARGUMENTS` in the codebase using Grep
31+
2. Read the function and its callers/callees to understand the hot path
32+
3. Identify what packages, types, and helpers it uses
33+
34+
### Phase 2: Profile
35+
36+
4. Run the benchmark: `GIGA_EXECUTOR=true GIGA_OCC=true benchmark/benchmark.sh`
37+
5. Wait for it to complete (default DURATION=120s)
38+
39+
### Phase 3: Analyze (focused on target function)
40+
41+
6. Run pprof analysis focused on `$ARGUMENTS` and its call tree. Run these in parallel:
42+
- CPU: `go tool pprof -top -cum -nodecount=40 /tmp/sei-bench/pprof/cpu.pb.gz 2>&1 | head -60`
43+
- fgprof: `go tool pprof -top -cum -nodecount=40 /tmp/sei-bench/pprof/fgprof.pb.gz 2>&1 | head -60`
44+
- Heap (alloc_space): `go tool pprof -alloc_space -top -cum -nodecount=40 /tmp/sei-bench/pprof/heap.pb.gz 2>&1 | head -60`
45+
- Heap (alloc_objects): `go tool pprof -alloc_objects -top -cum -nodecount=40 /tmp/sei-bench/pprof/heap.pb.gz 2>&1 | head -60`
46+
- Block: `go tool pprof -top -cum -nodecount=40 /tmp/sei-bench/pprof/block.pb.gz 2>&1 | head -60`
47+
- Mutex: `go tool pprof -top -cum -nodecount=40 /tmp/sei-bench/pprof/mutex.pb.gz 2>&1 | head -60`
48+
7. Use `go tool pprof -text -focus='$ARGUMENTS' /tmp/sei-bench/pprof/cpu.pb.gz` to get function-focused breakdown
49+
8. Open flamegraphs on separate ports for the user to inspect:
50+
- `go tool pprof -http=:8080 /tmp/sei-bench/pprof/cpu.pb.gz &`
51+
- `go tool pprof -http=:8081 /tmp/sei-bench/pprof/fgprof.pb.gz &`
52+
- `go tool pprof -http=:8082 -alloc_space /tmp/sei-bench/pprof/heap.pb.gz &`
53+
54+
### Phase 4: Summarize and discuss
55+
56+
9. Present findings to the user:
57+
- TPS from the benchmark run (extract from `/tmp/sei-bench/tps.txt`)
58+
- Where `$ARGUMENTS` and its callees spend the most time (CPU, wall-clock)
59+
- Biggest allocation hotspots within the function's call tree
60+
- Any contention (block/mutex) in the function's path
61+
- Top 2-3 candidate optimizations with expected impact and trade-offs
62+
10. Ask the user which optimization direction to pursue. Do NOT write any code until the user picks.
63+
64+
### Phase 5: Implement
65+
66+
11. Implement the chosen optimization
67+
12. Run `gofmt -s -w` on all modified `.go` files
68+
13. Commit the change
69+
70+
### Phase 6: Compare
71+
72+
14. Record the commit hash before and after the optimization
73+
15. Run comparison: `benchmark/benchmark-compare.sh baseline=<before-commit> candidate=<after-commit>`
74+
16. Open diff flamegraphs for the user:
75+
- `go tool pprof -http=:8083 -diff_base /tmp/sei-bench/baseline/pprof/cpu.pb.gz /tmp/sei-bench/candidate/pprof/cpu.pb.gz &`
76+
- `go tool pprof -http=:8084 -diff_base /tmp/sei-bench/baseline/pprof/fgprof.pb.gz /tmp/sei-bench/candidate/pprof/fgprof.pb.gz &`
77+
- `go tool pprof -http=:8085 -diff_base /tmp/sei-bench/baseline/pprof/heap.pb.gz /tmp/sei-bench/candidate/pprof/heap.pb.gz &`
78+
79+
### Phase 7: Validate
80+
81+
17. Present results:
82+
- TPS delta (baseline vs candidate)
83+
- CPU diff: `go tool pprof -top -diff_base /tmp/sei-bench/baseline/pprof/cpu.pb.gz /tmp/sei-bench/candidate/pprof/cpu.pb.gz`
84+
- Heap diff: `go tool pprof -alloc_space -top -diff_base /tmp/sei-bench/baseline/pprof/heap.pb.gz /tmp/sei-bench/candidate/pprof/heap.pb.gz`
85+
18. Ask the user: keep, iterate, or revert?
86+
19. If keep and user approves, ask whether to open a PR
87+
88+
## Important rules
89+
90+
- ALWAYS ask the user before writing any optimization code (step 10)
91+
- ALWAYS ask the user before opening a PR (step 19)
92+
- Cross-session benchmark numbers are NOT comparable. Only compare within the same `benchmark-compare.sh` run.
93+
- Run `gofmt -s -w` on all modified Go files before committing
94+
- If `$ARGUMENTS` is empty or not found, ask the user to provide the function name

giga/deps/store/cachekv.go

Lines changed: 36 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,8 @@ import (
1313
// Store wraps an in-memory cache around an underlying types.KVStore.
1414
type Store struct {
1515
mtx sync.RWMutex
16-
cache *sync.Map
17-
deleted *sync.Map
16+
cache map[string]*types.CValue
17+
deleted map[string]struct{}
1818
parent types.KVStore
1919
storeKey types.StoreKey
2020
cacheSize int
@@ -25,8 +25,8 @@ var _ types.CacheKVStore = (*Store)(nil)
2525
// NewStore creates a new Store object
2626
func NewStore(parent types.KVStore, storeKey types.StoreKey, cacheSize int) *Store {
2727
return &Store{
28-
cache: &sync.Map{},
29-
deleted: &sync.Map{},
28+
cache: make(map[string]*types.CValue),
29+
deleted: make(map[string]struct{}),
3030
parent: parent,
3131
storeKey: storeKey,
3232
cacheSize: cacheSize,
@@ -44,8 +44,11 @@ func (store *Store) GetStoreType() types.StoreType {
4444

4545
// getFromCache queries the write-through cache for a value by key.
4646
func (store *Store) getFromCache(key []byte) []byte {
47-
if cv, ok := store.cache.Load(UnsafeBytesToStr(key)); ok {
48-
return cv.(*types.CValue).Value()
47+
store.mtx.RLock()
48+
cv, ok := store.cache[UnsafeBytesToStr(key)]
49+
store.mtx.RUnlock()
50+
if ok {
51+
return cv.Value()
4952
}
5053
return store.parent.Get(key)
5154
}
@@ -84,12 +87,11 @@ func (store *Store) Write() {
8487
// Not the best, but probably not a bottleneck depending.
8588
keys := []string{}
8689

87-
store.cache.Range(func(key, value any) bool {
88-
if value.(*types.CValue).Dirty() {
89-
keys = append(keys, key.(string))
90+
for key, value := range store.cache {
91+
if value.Dirty() {
92+
keys = append(keys, key)
9093
}
91-
return true
92-
})
94+
}
9395
sort.Strings(keys)
9496
// TODO: Consider allowing usage of Batch, which would allow the write to
9597
// at least happen atomically.
@@ -103,10 +105,10 @@ func (store *Store) Write() {
103105
continue
104106
}
105107

106-
cacheValue, ok := store.cache.Load(key)
107-
if ok && cacheValue.(*types.CValue).Value() != nil {
108+
cacheValue, ok := store.cache[key]
109+
if ok && cacheValue.Value() != nil {
108110
// It already exists in the parent, hence delete it.
109-
store.parent.Set([]byte(key), cacheValue.(*types.CValue).Value())
111+
store.parent.Set([]byte(key), cacheValue.Value())
110112
}
111113
}
112114

@@ -115,14 +117,11 @@ func (store *Store) Write() {
115117
// writes immediately visible until Commit(). By keeping the cache populated
116118
// with clean entries, subsequent reads will still hit the cache instead of
117119
// falling through to the parent which can't read uncommitted data.
118-
store.cache.Range(func(key, value any) bool {
119-
cv := value.(*types.CValue)
120-
// Replace with a clean (non-dirty) version of the same value
121-
store.cache.Store(key, types.NewCValue(cv.Value(), false))
122-
return true
123-
})
120+
for key, cv := range store.cache {
121+
store.cache[key] = types.NewCValue(cv.Value(), false)
122+
}
124123
// Clear the deleted map since those deletes have been sent to parent
125-
store.deleted = &sync.Map{}
124+
store.deleted = make(map[string]struct{})
126125
}
127126

128127
// CacheWrap implements CacheWrapper.
@@ -142,18 +141,20 @@ func (store *Store) VersionExists(version int64) bool {
142141
// Only entrypoint to mutate store.cache.
143142
func (store *Store) setCacheValue(key, value []byte, deleted bool, dirty bool) {
144143
types.AssertValidKey(key)
144+
store.mtx.Lock()
145+
defer store.mtx.Unlock()
145146

146147
keyStr := UnsafeBytesToStr(key)
147-
store.cache.Store(keyStr, types.NewCValue(value, dirty))
148+
store.cache[keyStr] = types.NewCValue(value, dirty)
148149
if deleted {
149-
store.deleted.Store(keyStr, struct{}{})
150+
store.deleted[keyStr] = struct{}{}
150151
} else {
151-
store.deleted.Delete(keyStr)
152+
delete(store.deleted, keyStr)
152153
}
153154
}
154155

155156
func (store *Store) isDeleted(key string) bool {
156-
_, ok := store.deleted.Load(key)
157+
_, ok := store.deleted[key]
157158
return ok
158159
}
159160

@@ -169,24 +170,26 @@ func (store *Store) DeleteAll(start, end []byte) error {
169170
}
170171

171172
func (store *Store) GetAllKeyStrsInRange(start, end []byte) (res []string) {
173+
store.mtx.RLock()
174+
defer store.mtx.RUnlock()
175+
172176
keyStrs := map[string]struct{}{}
173177
for _, pk := range store.parent.GetAllKeyStrsInRange(start, end) {
174178
keyStrs[pk] = struct{}{}
175179
}
176-
store.cache.Range(func(key, value any) bool {
177-
kbz := []byte(key.(string))
180+
for key, value := range store.cache {
181+
kbz := []byte(key)
178182
if bytes.Compare(kbz, start) < 0 || bytes.Compare(kbz, end) >= 0 {
179183
// we don't want to break out of the iteration since cache isn't sorted
180-
return true
184+
continue
181185
}
182-
cv := value.(*types.CValue)
186+
cv := value
183187
if cv.Value() == nil {
184-
delete(keyStrs, key.(string))
188+
delete(keyStrs, key)
185189
} else {
186-
keyStrs[key.(string)] = struct{}{}
190+
keyStrs[key] = struct{}{}
187191
}
188-
return true
189-
})
192+
}
190193
for k := range keyStrs {
191194
res = append(res, k)
192195
}

0 commit comments

Comments
 (0)