Skip to content
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
99 changes: 88 additions & 11 deletions .github/workflows/benchmark.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,16 @@ permissions: {}
push:
branches:
- main
pull_request:
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK for testing the branch but let's discuss if we want that for every PR.

Copy link
Copy Markdown
Contributor Author

@chatton chatton Feb 25, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do you mean if we want to run them every PR? Actually, what we can do is just run them on PRs if the files have changed. Maybe we can move things to an testing/**/benchmarking directory WDYT?

It will still only push results once merged to main.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

will merge as is for now, and then we can adjust in a follow up in needed

branches:
- main
workflow_dispatch:

jobs:
evm-benchmark:
name: EVM Contract Benchmark
runs-on: ubuntu-latest
timeout-minutes: 30
permissions:
contents: write
issues: write
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Set up Go
Expand All @@ -29,29 +29,106 @@ jobs:
run: |
cd test/e2e && go test -tags evm -bench=. -benchmem -run='^$' \
-timeout=10m --evm-binary=../../build/evm | tee output.txt
- name: Store benchmark result
- name: Run Block Executor benchmarks
run: |
go test -bench=BenchmarkProduceBlock -benchmem -run='^$' \
./block/internal/executing/... > block_executor_output.txt
- name: Upload benchmark results
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
with:
name: evm-benchmark-results
path: |
test/e2e/output.txt
block_executor_output.txt

spamoor-benchmark:
name: Spamoor Trace Benchmark
runs-on: ubuntu-latest
timeout-minutes: 30
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Set up Go
uses: actions/setup-go@7a3fe6cf4cb3a834922a1244abfce67bcef6a0c5 # v6.2.0
with:
go-version-file: ./go.mod
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@b5ca514318bd6ebac0fb2aedd5d36ec1b5c232a2 # v3.10.0
- name: Build binaries
run: make build-evm build-da
- name: Run Spamoor smoke test
run: |
cd test/e2e && BENCH_JSON_OUTPUT=spamoor_bench.json go test -tags evm \
-run='^TestSpamoorSmoke$' -v -timeout=15m --evm-binary=../../build/evm
- name: Upload benchmark results
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
with:
name: spamoor-benchmark-results
path: test/e2e/spamoor_bench.json

# single job to push all results to gh-pages sequentially, avoiding race conditions
publish-benchmarks:
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍 Makes sense!

name: Publish Benchmark Results
needs: [evm-benchmark, spamoor-benchmark]
runs-on: ubuntu-latest
permissions:
contents: write
issues: write
pull-requests: write
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Download EVM benchmark results
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
with:
name: evm-benchmark-results
- name: Download Spamoor benchmark results
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
with:
name: spamoor-benchmark-results
path: test/e2e/

# only update the benchmark baseline on push/dispatch, not on PRs
- name: Store EVM Contract Roundtrip result
uses: benchmark-action/github-action-benchmark@4bdcce38c94cec68da58d012ac24b7b1155efe8b # v1.20.7
with:
name: EVM Contract Roundtrip
tool: 'go'
output-file-path: test/e2e/output.txt
auto-push: true
auto-push: ${{ github.event_name != 'pull_request' }}
save-data-file: ${{ github.event_name != 'pull_request' }}
github-token: ${{ secrets.GITHUB_TOKEN }}
alert-threshold: '150%'
fail-on-alert: true
comment-on-alert: true

- name: Run Block Executor benchmarks
run: |
go test -bench=BenchmarkProduceBlock -benchmem -run='^$' \
./block/internal/executing/... > block_executor_output.txt
- name: Store Block Executor benchmark result
# delete local gh-pages so the next benchmark action step fetches fresh from remote
- name: Reset local gh-pages branch
run: git branch -D gh-pages || true

- name: Store Block Executor result
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

because there are now multiple jobs pushing benchmarks, in order to not hit race conditions, we can gather up all the results, then sequentially push them all one by one.

uses: benchmark-action/github-action-benchmark@4bdcce38c94cec68da58d012ac24b7b1155efe8b # v1.20.7
with:
name: Block Executor Benchmark
tool: 'go'
output-file-path: block_executor_output.txt
auto-push: true
auto-push: ${{ github.event_name != 'pull_request' }}
save-data-file: ${{ github.event_name != 'pull_request' }}
github-token: ${{ secrets.GITHUB_TOKEN }}
alert-threshold: '150%'
fail-on-alert: true
comment-on-alert: true

# delete local gh-pages so the next benchmark action step fetches fresh from remote
- name: Reset local gh-pages branch
run: git branch -D gh-pages || true

- name: Store Spamoor Trace result
uses: benchmark-action/github-action-benchmark@4bdcce38c94cec68da58d012ac24b7b1155efe8b # v1.20.7
with:
name: Spamoor Trace Benchmarks
tool: 'customSmallerIsBetter'
output-file-path: test/e2e/spamoor_bench.json
auto-push: ${{ github.event_name != 'pull_request' }}
save-data-file: ${{ github.event_name != 'pull_request' }}
github-token: ${{ secrets.GITHUB_TOKEN }}
alert-threshold: '150%'
fail-on-alert: true
Expand Down
6 changes: 6 additions & 0 deletions test/e2e/evm_spamoor_smoke_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"context"
"fmt"
"net/http"
"os"
"path/filepath"
"testing"
"time"
Expand Down Expand Up @@ -164,6 +165,11 @@ func TestSpamoorSmoke(t *testing.T) {
evRethSpans := extractSpansFromTraces(evRethTraces)
printTraceReport(t, "ev-reth", toTraceSpans(evRethSpans))

// write benchmark JSON for ev-node spans when output path is configured
if outputPath := os.Getenv("BENCH_JSON_OUTPUT"); outputPath != "" {
writeTraceBenchmarkJSON(t, "SpamoorSmoke", toTraceSpans(evNodeSpans), outputPath)
}

// assert expected ev-node span names are present.
// these spans reliably appear during block production with transactions flowing.
expectedSpans := []string{
Expand Down
87 changes: 72 additions & 15 deletions test/e2e/evm_test_common.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ package e2e

import (
"context"
"encoding/json"
"flag"
"fmt"
"math/big"
Expand Down Expand Up @@ -855,21 +856,17 @@ type traceSpan interface {
SpanDuration() time.Duration
}

// printTraceReport aggregates spans by operation name and prints a timing breakdown.
func printTraceReport(t testing.TB, label string, spans []traceSpan) {
t.Helper()
if len(spans) == 0 {
t.Logf("WARNING: no spans found for %s", label)
return
}
// spanStats holds aggregated timing statistics for a single span operation.
type spanStats struct {
count int
total time.Duration
min time.Duration
max time.Duration
}

type stats struct {
count int
total time.Duration
min time.Duration
max time.Duration
}
m := make(map[string]*stats)
// aggregateSpanStats groups spans by operation name and computes count, total, min, max.
func aggregateSpanStats(spans []traceSpan) map[string]*spanStats {
m := make(map[string]*spanStats)
for _, span := range spans {
d := span.SpanDuration()
if d <= 0 {
Expand All @@ -878,7 +875,7 @@ func printTraceReport(t testing.TB, label string, spans []traceSpan) {
name := span.SpanName()
s, ok := m[name]
if !ok {
s = &stats{min: d, max: d}
s = &spanStats{min: d, max: d}
m[name] = s
}
s.count++
Expand All @@ -890,6 +887,18 @@ func printTraceReport(t testing.TB, label string, spans []traceSpan) {
s.max = d
}
}
return m
}

// printTraceReport aggregates spans by operation name and prints a timing breakdown.
func printTraceReport(t testing.TB, label string, spans []traceSpan) {
t.Helper()
if len(spans) == 0 {
t.Logf("WARNING: no spans found for %s", label)
return
}

m := aggregateSpanStats(spans)

names := make([]string, 0, len(m))
for name := range m {
Expand Down Expand Up @@ -924,3 +933,51 @@ func printTraceReport(t testing.TB, label string, spans []traceSpan) {
t.Logf("%-40s %5.1f%% %s", name, pct, bar)
}
}

// benchmarkEntry matches the customSmallerIsBetter format for github-action-benchmark.
type benchmarkEntry struct {
Name string `json:"name"`
Unit string `json:"unit"`
Value float64 `json:"value"`
}

// writeTraceBenchmarkJSON aggregates spans and writes a customSmallerIsBetter JSON file.
// If outputPath is empty, the function is a no-op.
func writeTraceBenchmarkJSON(t testing.TB, label string, spans []traceSpan, outputPath string) {
t.Helper()
if outputPath == "" {
return
}
m := aggregateSpanStats(spans)
if len(m) == 0 {
t.Logf("WARNING: no span stats to write for %s", label)
return
}

// sort by name for stable output
names := make([]string, 0, len(m))
for name := range m {
names = append(names, name)
}
sort.Strings(names)

var entries []benchmarkEntry
for _, name := range names {
s := m[name]
avg := float64(s.total.Microseconds()) / float64(s.count)
entries = append(entries,
benchmarkEntry{Name: fmt.Sprintf("%s - %s (avg)", label, name), Unit: "us", Value: avg},
benchmarkEntry{Name: fmt.Sprintf("%s - %s (min)", label, name), Unit: "us", Value: float64(s.min.Microseconds())},
benchmarkEntry{Name: fmt.Sprintf("%s - %s (max)", label, name), Unit: "us", Value: float64(s.max.Microseconds())},
)
}

data, err := json.MarshalIndent(entries, "", " ")
if err != nil {
t.Fatalf("failed to marshal benchmark JSON: %v", err)
}
if err := os.WriteFile(outputPath, data, 0644); err != nil {
t.Fatalf("failed to write benchmark JSON to %s: %v", outputPath, err)
}
t.Logf("wrote %d benchmark entries to %s", len(entries), outputPath)
}
Loading