perf: use plain maps in giga cachekv store

pdrobnjak · pdrobnjak · commit 0244baf53c64 · 2026-02-13T18:08:16.000+01:00
diff --git a/.agents/skills/optimize.md b/.agents/skills/optimize.md
@@ -0,0 +1,94 @@
+---
+name: optimize
+description: Run a profiling-driven optimization loop for a specific function
+argument-hint: "<function-name> e.g. executeEVMTxWithGigaExecutor"
+allowed-tools:
+  - Read
+  - Write
+  - Edit
+  - Glob
+  - Grep
+  - Bash
+  - Task
+  - AskUserQuestion
+---
+
+# Optimization Loop for: $ARGUMENTS
+
+You are running a profiling-driven optimization loop focused on the function `$ARGUMENTS`.
+
+## References
+
+Read `benchmark/CLAUDE.md` for benchmark commands, environment variables, profiling, and the full optimization loop steps.
+
+## Workflow
+
+Execute the optimization loop from benchmark/CLAUDE.md section "Optimization loop", but focused on `$ARGUMENTS`:
+
+### Phase 1: Understand the target function
+
+1. Find the function `$ARGUMENTS` in the codebase using Grep
+2. Read the function and its callers/callees to understand the hot path
+3. Identify what packages, types, and helpers it uses
+
+### Phase 2: Profile
+
+4. Run the benchmark: `GIGA_EXECUTOR=true GIGA_OCC=true benchmark/benchmark.sh`
+5. Wait for it to complete (default DURATION=120s)
+
+### Phase 3: Analyze (focused on target function)
+
+6. Run pprof analysis focused on `$ARGUMENTS` and its call tree. Run these in parallel:
+   - CPU: `go tool pprof -top -cum -nodecount=40 /tmp/sei-bench/pprof/cpu.pb.gz 2>&1 | head -60`
+   - fgprof: `go tool pprof -top -cum -nodecount=40 /tmp/sei-bench/pprof/fgprof.pb.gz 2>&1 | head -60`
+   - Heap (alloc_space): `go tool pprof -alloc_space -top -cum -nodecount=40 /tmp/sei-bench/pprof/heap.pb.gz 2>&1 | head -60`
+   - Heap (alloc_objects): `go tool pprof -alloc_objects -top -cum -nodecount=40 /tmp/sei-bench/pprof/heap.pb.gz 2>&1 | head -60`
+   - Block: `go tool pprof -top -cum -nodecount=40 /tmp/sei-bench/pprof/block.pb.gz 2>&1 | head -60`
+   - Mutex: `go tool pprof -top -cum -nodecount=40 /tmp/sei-bench/pprof/mutex.pb.gz 2>&1 | head -60`
+7. Use `go tool pprof -text -focus='$ARGUMENTS' /tmp/sei-bench/pprof/cpu.pb.gz` to get function-focused breakdown
+8. Open flamegraphs on separate ports for the user to inspect:
+   - `go tool pprof -http=:8080 /tmp/sei-bench/pprof/cpu.pb.gz &`
+   - `go tool pprof -http=:8081 /tmp/sei-bench/pprof/fgprof.pb.gz &`
+   - `go tool pprof -http=:8082 -alloc_space /tmp/sei-bench/pprof/heap.pb.gz &`
+
+### Phase 4: Summarize and discuss
+
+9. Present findings to the user:
+   - TPS from the benchmark run (extract from `/tmp/sei-bench/tps.txt`)
+   - Where `$ARGUMENTS` and its callees spend the most time (CPU, wall-clock)
+   - Biggest allocation hotspots within the function's call tree
+   - Any contention (block/mutex) in the function's path
+   - Top 2-3 candidate optimizations with expected impact and trade-offs
+10. Ask the user which optimization direction to pursue. Do NOT write any code until the user picks.
+
+### Phase 5: Implement
+
+11. Implement the chosen optimization
+12. Run `gofmt -s -w` on all modified `.go` files
+13. Commit the change
+
+### Phase 6: Compare
+
+14. Record the commit hash before and after the optimization
+15. Run comparison: `benchmark/benchmark-compare.sh baseline=<before-commit> candidate=<after-commit>`
+16. Open diff flamegraphs for the user:
+    - `go tool pprof -http=:8083 -diff_base /tmp/sei-bench/baseline/pprof/cpu.pb.gz /tmp/sei-bench/candidate/pprof/cpu.pb.gz &`
+    - `go tool pprof -http=:8084 -diff_base /tmp/sei-bench/baseline/pprof/fgprof.pb.gz /tmp/sei-bench/candidate/pprof/fgprof.pb.gz &`
+    - `go tool pprof -http=:8085 -diff_base /tmp/sei-bench/baseline/pprof/heap.pb.gz /tmp/sei-bench/candidate/pprof/heap.pb.gz &`
+
+### Phase 7: Validate
+
+17. Present results:
+    - TPS delta (baseline vs candidate)
+    - CPU diff: `go tool pprof -top -diff_base /tmp/sei-bench/baseline/pprof/cpu.pb.gz /tmp/sei-bench/candidate/pprof/cpu.pb.gz`
+    - Heap diff: `go tool pprof -alloc_space -top -diff_base /tmp/sei-bench/baseline/pprof/heap.pb.gz /tmp/sei-bench/candidate/pprof/heap.pb.gz`
+18. Ask the user: keep, iterate, or revert?
+19. If keep and user approves, ask whether to open a PR
+
+## Important rules
+
+- ALWAYS ask the user before writing any optimization code (step 10)
+- ALWAYS ask the user before opening a PR (step 19)
+- Cross-session benchmark numbers are NOT comparable. Only compare within the same `benchmark-compare.sh` run.
+- Run `gofmt -s -w` on all modified Go files before committing
+- If `$ARGUMENTS` is empty or not found, ask the user to provide the function name
diff --git a/giga/deps/store/cachekv.go b/giga/deps/store/cachekv.go
@@ -13,8 +13,8 @@ import (
 // Store wraps an in-memory cache around an underlying types.KVStore.
 type Store struct {
 	mtx       sync.RWMutex
-	cache     *sync.Map
-	deleted   *sync.Map
+	cache     map[string]*types.CValue
+	deleted   map[string]struct{}
 	parent    types.KVStore
 	storeKey  types.StoreKey
 	cacheSize int
@@ -25,8 +25,8 @@ var _ types.CacheKVStore = (*Store)(nil)
 // NewStore creates a new Store object
 func NewStore(parent types.KVStore, storeKey types.StoreKey, cacheSize int) *Store {
 	return &Store{
-		cache:     &sync.Map{},
-		deleted:   &sync.Map{},
+		cache:     make(map[string]*types.CValue),
+		deleted:   make(map[string]struct{}),
 		parent:    parent,
 		storeKey:  storeKey,
 		cacheSize: cacheSize,
@@ -44,8 +44,11 @@ func (store *Store) GetStoreType() types.StoreType {
 
 // getFromCache queries the write-through cache for a value by key.
 func (store *Store) getFromCache(key []byte) []byte {
-	if cv, ok := store.cache.Load(UnsafeBytesToStr(key)); ok {
-		return cv.(*types.CValue).Value()
+	store.mtx.RLock()
+	cv, ok := store.cache[UnsafeBytesToStr(key)]
+	store.mtx.RUnlock()
+	if ok {
+		return cv.Value()
 	}
 	return store.parent.Get(key)
 }
@@ -84,12 +87,11 @@ func (store *Store) Write() {
 	// Not the best, but probably not a bottleneck depending.
 	keys := []string{}
 
-	store.cache.Range(func(key, value any) bool {
-		if value.(*types.CValue).Dirty() {
-			keys = append(keys, key.(string))
+	for key, value := range store.cache {
+		if value.Dirty() {
+			keys = append(keys, key)
 		}
-		return true
-	})
+	}
 	sort.Strings(keys)
 	// TODO: Consider allowing usage of Batch, which would allow the write to
 	// at least happen atomically.
@@ -103,10 +105,10 @@ func (store *Store) Write() {
 			continue
 		}
 
-		cacheValue, ok := store.cache.Load(key)
-		if ok && cacheValue.(*types.CValue).Value() != nil {
+		cacheValue, ok := store.cache[key]
+		if ok && cacheValue.Value() != nil {
 			// It already exists in the parent, hence delete it.
-			store.parent.Set([]byte(key), cacheValue.(*types.CValue).Value())
+			store.parent.Set([]byte(key), cacheValue.Value())
 		}
 	}
 
@@ -115,14 +117,11 @@ func (store *Store) Write() {
 	// writes immediately visible until Commit(). By keeping the cache populated
 	// with clean entries, subsequent reads will still hit the cache instead of
 	// falling through to the parent which can't read uncommitted data.
-	store.cache.Range(func(key, value any) bool {
-		cv := value.(*types.CValue)
-		// Replace with a clean (non-dirty) version of the same value
-		store.cache.Store(key, types.NewCValue(cv.Value(), false))
-		return true
-	})
+	for key, cv := range store.cache {
+		store.cache[key] = types.NewCValue(cv.Value(), false)
+	}
 	// Clear the deleted map since those deletes have been sent to parent
-	store.deleted = &sync.Map{}
+	store.deleted = make(map[string]struct{})
 }
 
 // CacheWrap implements CacheWrapper.
@@ -142,18 +141,20 @@ func (store *Store) VersionExists(version int64) bool {
 // Only entrypoint to mutate store.cache.
 func (store *Store) setCacheValue(key, value []byte, deleted bool, dirty bool) {
 	types.AssertValidKey(key)
+	store.mtx.Lock()
+	defer store.mtx.Unlock()
 
 	keyStr := UnsafeBytesToStr(key)
-	store.cache.Store(keyStr, types.NewCValue(value, dirty))
+	store.cache[keyStr] = types.NewCValue(value, dirty)
 	if deleted {
-		store.deleted.Store(keyStr, struct{}{})
+		store.deleted[keyStr] = struct{}{}
 	} else {
-		store.deleted.Delete(keyStr)
+		delete(store.deleted, keyStr)
 	}
 }
 
 func (store *Store) isDeleted(key string) bool {
-	_, ok := store.deleted.Load(key)
+	_, ok := store.deleted[key]
 	return ok
 }
 
@@ -169,24 +170,26 @@ func (store *Store) DeleteAll(start, end []byte) error {
 }
 
 func (store *Store) GetAllKeyStrsInRange(start, end []byte) (res []string) {
+	store.mtx.RLock()
+	defer store.mtx.RUnlock()
+
 	keyStrs := map[string]struct{}{}
 	for _, pk := range store.parent.GetAllKeyStrsInRange(start, end) {
 		keyStrs[pk] = struct{}{}
 	}
-	store.cache.Range(func(key, value any) bool {
-		kbz := []byte(key.(string))
+	for key, value := range store.cache {
+		kbz := []byte(key)
 		if bytes.Compare(kbz, start) < 0 || bytes.Compare(kbz, end) >= 0 {
 			// we don't want to break out of the iteration since cache isn't sorted
-			return true
+			continue
 		}
-		cv := value.(*types.CValue)
+		cv := value
 		if cv.Value() == nil {
-			delete(keyStrs, key.(string))
+			delete(keyStrs, key)
 		} else {
-			keyStrs[key.(string)] = struct{}{}
+			keyStrs[key] = struct{}{}
 		}
-		return true
-	})
+	}
 	for k := range keyStrs {
 		res = append(res, k)
 	}