Sandbox Benchmark #18

Workflow file for this run

.github/workflows/sandbox-benchmarks.yml at cd578e6

	name: Sandbox Benchmark

	on:
	pull_request:
	paths:
	- 'src/sandbox/**'
	- 'src/util/**'
	- 'src/run.ts'
	- 'src/merge-results.ts'
	- 'package.json'
	schedule:
	- cron: '0 0 * * *' # Daily at midnight UTC
	workflow_dispatch:
	inputs:
	iterations:
	description: 'Iterations per provider'
	required: false
	default: '100'
	concurrency:
	description: 'Concurrent sandboxes for burst/staggered tests'
	required: false
	default: '100'
	mode:
	description: 'Test mode (leave empty to run all)'
	required: false
	default: ''
	type: choice
	options:
	- ''
	- sequential
	- staggered
	- burst

	permissions:
	contents: write
	pull-requests: write

	jobs:
	bench:
	name: Bench ${{ matrix.provider }}
	runs-on:
	- arc-runner-set
	timeout-minutes: 60
	strategy:
	fail-fast: false
	matrix:
	# fcspawn only. Other providers intentionally removed — this
	# fork tracks only our numbers on the same harness. Add a
	# provider back to this list if you want a cross-check run.
	provider:
	- fcspawn
	steps:
	- uses: actions/checkout@v4
	- uses: actions/setup-node@v4
	with:
	node-version: 24
	cache: 'npm'
	- name: Install dependencies
	run: \|
	if [ "${{ github.event_name }}" = "schedule" ]; then
	npm update
	else
	npm ci
	fi
	- name: Clear stale results from checkout
	run: rm -rf results/
	- name: Run benchmark
	env:
	FCSPAWN_URL: ${{ secrets.FCSPAWN_URL }}
	FCSPAWN_TOKEN: ${{ secrets.FCSPAWN_TOKEN }}
	run: \|
	MODE_FLAG=""
	if [ -n "${{ github.event.inputs.mode }}" ]; then
	MODE_FLAG="--mode ${{ github.event.inputs.mode }}"
	fi
	npm run bench -- \
	--provider ${{ matrix.provider }} \
	--iterations ${{ github.event.inputs.iterations \|\| (github.event_name == 'pull_request' && '10') \|\| '100' }} \
	--concurrency ${{ github.event.inputs.concurrency \|\| (github.event_name == 'pull_request' && '10') \|\| '100' }} \
	$MODE_FLAG
	- name: Upload results
	if: always()
	uses: actions/upload-artifact@v4
	with:
	name: results-${{ matrix.provider }}
	path: results/
	if-no-files-found: ignore
	retention-days: 7

	collect:
	name: Collect Results
	runs-on:
	- arc-runner-set
	needs: bench
	if: always()
	steps:
	- uses: actions/checkout@v4
	- uses: actions/setup-node@v4
	with:
	node-version: 24
	cache: 'npm'
	- name: Install dependencies
	run: \|
	if [ "${{ github.event_name }}" = "schedule" ]; then
	npm update
	else
	npm ci
	fi
	- name: Download all artifacts
	uses: actions/download-artifact@v4
	with:
	path: artifacts/
	pattern: results-*
	- name: Merge results
	run: npx tsx src/merge-results.ts --input artifacts
	- run: npm run generate-svg
	- run: npm run generate-pricing-svg
	- name: Upload SVGs as artifacts
	if: github.event_name == 'pull_request'
	uses: actions/upload-artifact@v4
	with:
	name: sandbox-benchmark-svgs
	path: \|
	results.svg
	*_tti.svg
	pricing.svg
	if-no-files-found: ignore
	retention-days: 7
	- name: Post results to PR
	if: github.event_name == 'pull_request'
	continue-on-error: true
	uses: actions/github-script@v7
	with:
	script: \|
	const fs = require('fs');
	const path = require('path');

	const runUrl = `${context.serverUrl}/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}`;
	const modes = [
	{ key: 'sequential_tti', label: 'Sequential' },
	{ key: 'staggered_tti', label: 'Staggered' },
	{ key: 'burst_tti', label: 'Burst' },
	];
	let body = '## Sandbox Benchmark Results\n\n';
	let hasResults = false;

	for (const mode of modes) {
	const latestPath = path.join('results', mode.key, 'latest.json');
	if (!fs.existsSync(latestPath)) continue;

	const data = JSON.parse(fs.readFileSync(latestPath, 'utf-8'));
	const results = data.results
	.filter(r => !r.skipped)
	.sort((a, b) => (b.compositeScore \|\| 0) - (a.compositeScore \|\| 0));

	if (results.length === 0) continue;
	hasResults = true;

	body += `### ${mode.label}\n\n`;
	body += '\| # \| Provider \| Score \| Median TTI \| P95 \| P99 \| Status \|\n';
	body += '\|---\|----------\|-------\|------------\|-----\|-----\|--------\|\n';

	results.forEach((r, i) => {
	const score = r.compositeScore !== undefined ? r.compositeScore.toFixed(1) : '--';
	const median = (r.summary.ttiMs.median / 1000).toFixed(2) + 's';
	const p95 = (r.summary.ttiMs.p95 / 1000).toFixed(2) + 's';
	const p99 = (r.summary.ttiMs.p99 / 1000).toFixed(2) + 's';
	const ok = r.iterations.filter(it => !it.error).length;
	const total = r.iterations.length;
	body += `\| ${i + 1} \| ${r.provider} \| ${score} \| ${median} \| ${p95} \| ${p99} \| ${ok}/${total} \|\n`;
	});

	body += '\n';
	}

	if (!hasResults) {
	body += '> No sandbox benchmark results were generated.\n\n';
	}

	body += `---\n[View full run](${runUrl}) · SVGs available as [build artifacts](${runUrl}#artifacts)`;

	// Find and update existing comment or create new one
	const marker = '## Sandbox Benchmark Results';
	const { data: comments } = await github.rest.issues.listComments({
	owner: context.repo.owner,
	repo: context.repo.repo,
	issue_number: context.issue.number,
	});

	const existing = comments.find(c => c.body.startsWith(marker));

	if (existing) {
	await github.rest.issues.updateComment({
	owner: context.repo.owner,
	repo: context.repo.repo,
	comment_id: existing.id,
	body,
	});
	} else {
	await github.rest.issues.createComment({
	owner: context.repo.owner,
	repo: context.repo.repo,
	issue_number: context.issue.number,
	body,
	});
	}
	- name: Commit and push
	if: github.event_name != 'pull_request'
	run: \|
	git config user.name "github-actions[bot]"
	git config user.email "github-actions[bot]@users.noreply.github.com"
	git add package.json package-lock.json results.svg *_tti.svg pricing.svg results/
	git diff --cached --quiet && echo "No changes to commit" && exit 0
	git commit -m "chore: update benchmark results [skip ci]"
	git push

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Sandbox Benchmark #18

Workflow file

Sandbox Benchmark #18

Uh oh!

Workflow file for this run