Sandbox Benchmark #18
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Sandbox Benchmark | |
| on: | |
| pull_request: | |
| paths: | |
| - 'src/sandbox/**' | |
| - 'src/util/**' | |
| - 'src/run.ts' | |
| - 'src/merge-results.ts' | |
| - 'package.json' | |
| schedule: | |
| - cron: '0 0 * * *' # Daily at midnight UTC | |
| workflow_dispatch: | |
| inputs: | |
| iterations: | |
| description: 'Iterations per provider' | |
| required: false | |
| default: '100' | |
| concurrency: | |
| description: 'Concurrent sandboxes for burst/staggered tests' | |
| required: false | |
| default: '100' | |
| mode: | |
| description: 'Test mode (leave empty to run all)' | |
| required: false | |
| default: '' | |
| type: choice | |
| options: | |
| - '' | |
| - sequential | |
| - staggered | |
| - burst | |
| permissions: | |
| contents: write | |
| pull-requests: write | |
| jobs: | |
| bench: | |
| name: Bench ${{ matrix.provider }} | |
| runs-on: | |
| - arc-runner-set | |
| timeout-minutes: 60 | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| # fcspawn only. Other providers intentionally removed — this | |
| # fork tracks only our numbers on the same harness. Add a | |
| # provider back to this list if you want a cross-check run. | |
| provider: | |
| - fcspawn | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - uses: actions/setup-node@v4 | |
| with: | |
| node-version: 24 | |
| cache: 'npm' | |
| - name: Install dependencies | |
| run: | | |
| if [ "${{ github.event_name }}" = "schedule" ]; then | |
| npm update | |
| else | |
| npm ci | |
| fi | |
| - name: Clear stale results from checkout | |
| run: rm -rf results/ | |
| - name: Run benchmark | |
| env: | |
| FCSPAWN_URL: ${{ secrets.FCSPAWN_URL }} | |
| FCSPAWN_TOKEN: ${{ secrets.FCSPAWN_TOKEN }} | |
| run: | | |
| MODE_FLAG="" | |
| if [ -n "${{ github.event.inputs.mode }}" ]; then | |
| MODE_FLAG="--mode ${{ github.event.inputs.mode }}" | |
| fi | |
| npm run bench -- \ | |
| --provider ${{ matrix.provider }} \ | |
| --iterations ${{ github.event.inputs.iterations || (github.event_name == 'pull_request' && '10') || '100' }} \ | |
| --concurrency ${{ github.event.inputs.concurrency || (github.event_name == 'pull_request' && '10') || '100' }} \ | |
| $MODE_FLAG | |
| - name: Upload results | |
| if: always() | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: results-${{ matrix.provider }} | |
| path: results/ | |
| if-no-files-found: ignore | |
| retention-days: 7 | |
| collect: | |
| name: Collect Results | |
| runs-on: | |
| - arc-runner-set | |
| needs: bench | |
| if: always() | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - uses: actions/setup-node@v4 | |
| with: | |
| node-version: 24 | |
| cache: 'npm' | |
| - name: Install dependencies | |
| run: | | |
| if [ "${{ github.event_name }}" = "schedule" ]; then | |
| npm update | |
| else | |
| npm ci | |
| fi | |
| - name: Download all artifacts | |
| uses: actions/download-artifact@v4 | |
| with: | |
| path: artifacts/ | |
| pattern: results-* | |
| - name: Merge results | |
| run: npx tsx src/merge-results.ts --input artifacts | |
| - run: npm run generate-svg | |
| - run: npm run generate-pricing-svg | |
| - name: Upload SVGs as artifacts | |
| if: github.event_name == 'pull_request' | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: sandbox-benchmark-svgs | |
| path: | | |
| results.svg | |
| *_tti.svg | |
| pricing.svg | |
| if-no-files-found: ignore | |
| retention-days: 7 | |
| - name: Post results to PR | |
| if: github.event_name == 'pull_request' | |
| continue-on-error: true | |
| uses: actions/github-script@v7 | |
| with: | |
| script: | | |
| const fs = require('fs'); | |
| const path = require('path'); | |
| const runUrl = `${context.serverUrl}/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}`; | |
| const modes = [ | |
| { key: 'sequential_tti', label: 'Sequential' }, | |
| { key: 'staggered_tti', label: 'Staggered' }, | |
| { key: 'burst_tti', label: 'Burst' }, | |
| ]; | |
| let body = '## Sandbox Benchmark Results\n\n'; | |
| let hasResults = false; | |
| for (const mode of modes) { | |
| const latestPath = path.join('results', mode.key, 'latest.json'); | |
| if (!fs.existsSync(latestPath)) continue; | |
| const data = JSON.parse(fs.readFileSync(latestPath, 'utf-8')); | |
| const results = data.results | |
| .filter(r => !r.skipped) | |
| .sort((a, b) => (b.compositeScore || 0) - (a.compositeScore || 0)); | |
| if (results.length === 0) continue; | |
| hasResults = true; | |
| body += `### ${mode.label}\n\n`; | |
| body += '| # | Provider | Score | Median TTI | P95 | P99 | Status |\n'; | |
| body += '|---|----------|-------|------------|-----|-----|--------|\n'; | |
| results.forEach((r, i) => { | |
| const score = r.compositeScore !== undefined ? r.compositeScore.toFixed(1) : '--'; | |
| const median = (r.summary.ttiMs.median / 1000).toFixed(2) + 's'; | |
| const p95 = (r.summary.ttiMs.p95 / 1000).toFixed(2) + 's'; | |
| const p99 = (r.summary.ttiMs.p99 / 1000).toFixed(2) + 's'; | |
| const ok = r.iterations.filter(it => !it.error).length; | |
| const total = r.iterations.length; | |
| body += `| ${i + 1} | ${r.provider} | ${score} | ${median} | ${p95} | ${p99} | ${ok}/${total} |\n`; | |
| }); | |
| body += '\n'; | |
| } | |
| if (!hasResults) { | |
| body += '> No sandbox benchmark results were generated.\n\n'; | |
| } | |
| body += `---\n*[View full run](${runUrl}) · SVGs available as [build artifacts](${runUrl}#artifacts)*`; | |
| // Find and update existing comment or create new one | |
| const marker = '## Sandbox Benchmark Results'; | |
| const { data: comments } = await github.rest.issues.listComments({ | |
| owner: context.repo.owner, | |
| repo: context.repo.repo, | |
| issue_number: context.issue.number, | |
| }); | |
| const existing = comments.find(c => c.body.startsWith(marker)); | |
| if (existing) { | |
| await github.rest.issues.updateComment({ | |
| owner: context.repo.owner, | |
| repo: context.repo.repo, | |
| comment_id: existing.id, | |
| body, | |
| }); | |
| } else { | |
| await github.rest.issues.createComment({ | |
| owner: context.repo.owner, | |
| repo: context.repo.repo, | |
| issue_number: context.issue.number, | |
| body, | |
| }); | |
| } | |
| - name: Commit and push | |
| if: github.event_name != 'pull_request' | |
| run: | | |
| git config user.name "github-actions[bot]" | |
| git config user.email "github-actions[bot]@users.noreply.github.com" | |
| git add package.json package-lock.json results.svg *_tti.svg pricing.svg results/ | |
| git diff --cached --quiet && echo "No changes to commit" && exit 0 | |
| git commit -m "chore: update benchmark results [skip ci]" | |
| git push |