diff --git a/.github/workflows/bulk-generate.yml b/.github/workflows/bulk-generate.yml index 8a78c8c885..8283efae26 100644 --- a/.github/workflows/bulk-generate.yml +++ b/.github/workflows/bulk-generate.yml @@ -45,6 +45,11 @@ on: - haiku - sonnet - opus + change_requests: + description: "JSON object {library: one-sentence-hint} from daily-regen similarity audit. Empty = no clusters." + required: false + type: string + default: '{}' env: ALL_LIBRARIES: "matplotlib seaborn plotly bokeh altair plotnine pygal highcharts letsplot" @@ -178,13 +183,22 @@ jobs: MATRIX: ${{ needs.build-matrix.outputs.matrix }} PACE_SECONDS: ${{ inputs.pace_seconds || '120' }} MODEL: ${{ inputs.model || 'sonnet' }} + CHANGE_REQUESTS: ${{ inputs.change_requests || '{}' }} run: | set -u pace="${PACE_SECONDS}" pairs=$(echo "$MATRIX" | jq -r '.include[] | "\(.specification_id) \(.library)"') total=$(echo "$pairs" | wc -l | tr -d ' ') - echo "::notice::Dispatching $total item(s) with ${pace}s pacing between each (model=${MODEL})" + + # Validate change_requests is a JSON object early — bad JSON would + # silently produce empty hints later and we'd never know. + if ! echo "$CHANGE_REQUESTS" | jq -e 'type == "object"' >/dev/null 2>&1; then + echo "::warning::change_requests input is not a valid JSON object; ignoring (got: ${CHANGE_REQUESTS})" + CHANGE_REQUESTS='{}' + fi + flagged_count=$(echo "$CHANGE_REQUESTS" | jq 'length') + echo "::notice::Dispatching $total item(s) with ${pace}s pacing between each (model=${MODEL}, change_requests for ${flagged_count} libs)" i=0 failed=0 @@ -199,12 +213,19 @@ jobs: [ "$ISSUE" = "null" ] && ISSUE="" fi + # Per-library divergence hint (empty if not flagged). + HINT=$(echo "$CHANGE_REQUESTS" | jq -r --arg lib "$LIBRARY" '.[$lib] // ""') + # Best-effort pending label so the issue shows the in-flight lib. if [ -n "$ISSUE" ]; then gh issue edit "$ISSUE" --add-label "impl:${LIBRARY}:pending" 2>/dev/null || true fi - echo "::notice::[$i/$total] $(date -u +%H:%M:%SZ) dispatching impl-generate for ${SPEC_ID}/${LIBRARY} (issue: ${ISSUE:-none})" + if [ -n "$HINT" ]; then + echo "::notice::[$i/$total] $(date -u +%H:%M:%SZ) dispatching impl-generate for ${SPEC_ID}/${LIBRARY} (issue: ${ISSUE:-none}, change_request: ${HINT})" + else + echo "::notice::[$i/$total] $(date -u +%H:%M:%SZ) dispatching impl-generate for ${SPEC_ID}/${LIBRARY} (issue: ${ISSUE:-none})" + fi # Retry dispatch up to 3× with linear backoff. dispatched=0 @@ -214,12 +235,14 @@ jobs: -f specification_id="${SPEC_ID}" \ -f library="${LIBRARY}" \ -f issue_number="${ISSUE}" \ - -f model="${MODEL}" && dispatched=1 && break + -f model="${MODEL}" \ + -f change_request="${HINT}" && dispatched=1 && break else gh workflow run impl-generate.yml --repo "${{ github.repository }}" \ -f specification_id="${SPEC_ID}" \ -f library="${LIBRARY}" \ - -f model="${MODEL}" && dispatched=1 && break + -f model="${MODEL}" \ + -f change_request="${HINT}" && dispatched=1 && break fi echo "::warning::Dispatch attempt $attempt failed for ${SPEC_ID}/${LIBRARY}, retrying in 10s" sleep 10 diff --git a/.github/workflows/daily-regen.yml b/.github/workflows/daily-regen.yml index c069a5c161..904af820c0 100644 --- a/.github/workflows/daily-regen.yml +++ b/.github/workflows/daily-regen.yml @@ -61,6 +61,7 @@ jobs: runs-on: ubuntu-latest outputs: specs: ${{ steps.pick.outputs.specs }} + specs_json: ${{ steps.pick.outputs.specs_json }} count: ${{ steps.pick.outputs.count }} steps: - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 @@ -81,6 +82,7 @@ jobs: SPEC_OVERRIDE: ${{ inputs.specification_id }} run: | python3 <<'PY' + import json import os import sys from datetime import datetime, timedelta, timezone @@ -106,6 +108,7 @@ jobs: github_output = os.environ["GITHUB_OUTPUT"] with open(github_output, "a", encoding="utf-8") as f: f.write(f"specs={OVERRIDE}\n") + f.write(f"specs_json={json.dumps(picks)}\n") f.write(f"count=1\n") sys.exit(0) @@ -158,29 +161,137 @@ jobs: github_output = os.environ["GITHUB_OUTPUT"] with open(github_output, "a", encoding="utf-8") as f: f.write(f"specs={' '.join(picks)}\n") + f.write(f"specs_json={json.dumps(picks)}\n") f.write(f"count={len(picks)}\n") PY - dispatch: + # ============================================================================ + # Pre-flight: per spec, run autonomous spec polish + cross-library similarity + # audit, then dispatch bulk-generate with the resulting change_requests. + # + # Each matrix entry is one spec from the pick job. We do polish + audit + + # dispatch in the same job so we don't have to aggregate matrix outputs back + # into a separate dispatch job (which is awkward in GitHub Actions). + # + # The two pre-flight LLM steps are HARDCODED to Haiku regardless of + # `inputs.model` — they're narrow, cheap audits. The user-selected model is + # passed through to bulk-generate (and from there to impl-generate / review / + # repair) unchanged. + # ============================================================================ + preflight-dispatch: needs: pick - if: ${{ needs.pick.outputs.count != '0' && !inputs.dry_run }} + if: ${{ needs.pick.outputs.count != '0' }} runs-on: ubuntu-latest permissions: - actions: write + contents: write # spec polish: branch + commit + pull-requests: write # spec polish: open PR + add label + actions: write # dispatch bulk-generate + id-token: write + strategy: + matrix: + spec_id: ${{ fromJson(needs.pick.outputs.specs_json) }} + fail-fast: false + max-parallel: 1 # serialize so polish PRs and dispatches don't race + # Note on dry_run: the JOB always runs when there's a spec to process, so + # operators can exercise skip-gate + similarity-audit + collect on demand. + # Side-effect steps (polish, dispatch) are individually gated on + # `!inputs.dry_run` below. steps: - - name: Trigger bulk-generate for each picked spec + - name: Checkout repository + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 + with: + fetch-depth: 0 + + - name: Skip-gate — open PRs touching this spec? + id: gate + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + SPEC_ID: ${{ matrix.spec_id }} + run: | + # If any PR is open that touches plots/{spec}/, skip the polish step + # to avoid racing against human edits or stacking auto-polish PRs. + # Similarity audit still runs — it's read-only. + OPEN=$(gh pr list \ + --repo "${{ github.repository }}" \ + --search "plots/${SPEC_ID}/ in:files is:open" \ + --json number --jq 'length' 2>/dev/null || echo 0) + if [ "${OPEN:-0}" -gt 0 ]; then + echo "::notice::Open PR(s) touch plots/${SPEC_ID}/ — skipping spec polish" + echo "skip_polish=1" >> "$GITHUB_OUTPUT" + else + echo "skip_polish=0" >> "$GITHUB_OUTPUT" + fi + + - name: Spec polish (autonomous, opens PR — no auto-merge) + if: ${{ steps.gate.outputs.skip_polish == '0' && !inputs.dry_run }} + # Optional quality pass: a transient action failure here must not + # block the main regeneration pipeline. Skip cleanly and continue. + continue-on-error: true + timeout-minutes: 15 + uses: anthropics/claude-code-action@2cc1ac1331eac7a6a96d716dd204dd2888d0fcd2 # v1 + with: + claude_code_oauth_token: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }} + claude_args: '--model haiku' + allowed_bots: '*' + prompt: | + Read `prompts/workflow-prompts/spec-polish-claude.md` and follow those instructions. + + Variables for this run: + - SPEC_ID: ${{ matrix.spec_id }} + + - name: Cross-library similarity audit + # Read-only audit; if it fails, fall back to empty change_requests + # rather than aborting the dispatch. + continue-on-error: true + timeout-minutes: 15 + uses: anthropics/claude-code-action@2cc1ac1331eac7a6a96d716dd204dd2888d0fcd2 # v1 + with: + claude_code_oauth_token: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }} + claude_args: '--model haiku' + allowed_bots: '*' + prompt: | + Read `prompts/workflow-prompts/impl-similarity-claude.md` and follow those instructions. + + Variables for this run: + - SPEC_ID: ${{ matrix.spec_id }} + + - name: Collect change_requests + id: collect + run: | + # Default to empty object if the audit never wrote a file (e.g. + # fewer than 2 metadata files exist). + if [ -f /tmp/change-requests.json ]; then + CR=$(cat /tmp/change-requests.json) + # Validate it's a JSON object; fall back to empty otherwise. + if ! echo "$CR" | jq -e 'type == "object"' >/dev/null 2>&1; then + echo "::warning::/tmp/change-requests.json is not a valid JSON object; using {} (got: ${CR})" + CR='{}' + fi + else + CR='{}' + fi + # Compact + escape newlines so it survives as a single GitHub Actions output line. + CR_COMPACT=$(echo "$CR" | jq -c '.') + echo "change_requests=${CR_COMPACT}" >> "$GITHUB_OUTPUT" + flagged=$(echo "$CR_COMPACT" | jq 'length') + echo "::notice::change_requests for ${{ matrix.spec_id }}: ${flagged} lib(s) flagged — ${CR_COMPACT}" + + - name: Dispatch bulk-generate with change_requests + if: ${{ !inputs.dry_run }} env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} - SPECS: ${{ needs.pick.outputs.specs }} + SPEC_ID: ${{ matrix.spec_id }} MODEL: ${{ inputs.model || 'haiku' }} + CHANGE_REQUESTS: ${{ steps.collect.outputs.change_requests }} run: | - for spec in $SPECS; do - echo "::notice::Dispatching bulk-generate for $spec (all 9 libs, model=$MODEL)" - gh workflow run bulk-generate.yml \ - --repo "${{ github.repository }}" \ - -f specification_id="$spec" \ - -f library=all \ - -f model="$MODEL" - # Small pause between dispatches so GitHub's webhook processing has a moment. - sleep 5 - done + echo "::notice::Dispatching bulk-generate for ${SPEC_ID} (all 9 libs, model=${MODEL})" + gh workflow run bulk-generate.yml \ + --repo "${{ github.repository }}" \ + -f specification_id="${SPEC_ID}" \ + -f library=all \ + -f model="${MODEL}" \ + -f change_requests="${CHANGE_REQUESTS}" + # Small pause so GitHub's webhook processing has a moment before + # the next matrix entry's dispatch (matrix is serialized via + # max-parallel: 1, so this is between specs). + sleep 5 diff --git a/.github/workflows/impl-generate.yml b/.github/workflows/impl-generate.yml index 0575eed71d..f6cc19d254 100644 --- a/.github/workflows/impl-generate.yml +++ b/.github/workflows/impl-generate.yml @@ -42,6 +42,11 @@ on: - haiku - sonnet - opus + change_request: + description: "One-sentence cross-library divergence hint from daily-regen pre-flight similarity audit (empty = none)" + required: false + type: string + default: '' # Global concurrency: max 3 concurrent implementation workflows concurrency: @@ -318,6 +323,16 @@ jobs: mkdir -p "plots/${SPEC_ID}/metadata/${LANGUAGE}" echo "::notice::Ensured implementation + metadata directories exist for language '${LANGUAGE}'" + - name: Stage change_request hint (cross-library divergence) + if: ${{ inputs.change_request != '' }} + env: + CHANGE_REQUEST: ${{ inputs.change_request }} + run: | + # Written to a file so the prompt template stays variable-free; impl-generate-claude.md + # checks for the file's existence and reads it if present. + printf '%s\n' "$CHANGE_REQUEST" > /tmp/anyplot-change-request.txt + echo "::notice::Change request staged: ${CHANGE_REQUEST}" + - name: Run Claude Code to generate implementation id: claude continue-on-error: true diff --git a/prompts/workflow-prompts/impl-generate-claude.md b/prompts/workflow-prompts/impl-generate-claude.md index ff10ed9747..68243066ed 100644 --- a/prompts/workflow-prompts/impl-generate-claude.md +++ b/prompts/workflow-prompts/impl-generate-claude.md @@ -52,6 +52,36 @@ and your own idiomatic API. The shared anchors are only the spec, the library prompt, and the base style guide. See `prompts/plot-generator.md` → "Library Independence" for the full rule. +### Change Request — cross-library divergence hint + +If the file `/tmp/anyplot-change-request.txt` exists, read it. Its content is a +**hard requirement** of this regen: the cross-library similarity audit (in +`daily-regen` pre-flight) flagged this library as too close to a sibling on a +dimension the spec didn't dictate, and produced a one-sentence direction hint +to break the convergence. + +When a change_request is present: + +- **Apply it.** This is the only cross-library context permitted in this run; + treat it as binding. +- **Do NOT open sibling-library files** even to "verify" the request. The hint + contains everything you need; the Library Independence rule above still + binds. +- The "no changes for the sake of changes" exception (default regen mindset + prefers incremental improvement) does **NOT** apply when a change_request is + present — you must implement the requested change. +- **Preserve `review.strengths`** while applying the new direction. Override + "Respect the spec variant" (below) only insofar as the change_request + explicitly permits — the spec-variant rule still binds the rest of the + implementation. +- The hint is short by design (~1 sentence). It will name the sibling and the + shared signal, then suggest 2–3 alternative directions along that dimension. + Pick one of the suggested alternatives, or another that fits the same + dimension; do not invent a tangential change. + +If `/tmp/anyplot-change-request.txt` does not exist, ignore this section +entirely — there is nothing to apply. + ### Feasibility Check (Static Libraries Only) If LIBRARY is **matplotlib**, **seaborn**, or **plotnine**, AND the specification mentions interactive features (hover, zoom, click, brush, animation, streaming): diff --git a/prompts/workflow-prompts/impl-similarity-claude.md b/prompts/workflow-prompts/impl-similarity-claude.md new file mode 100644 index 0000000000..d6f2f837dd --- /dev/null +++ b/prompts/workflow-prompts/impl-similarity-claude.md @@ -0,0 +1,90 @@ +# Cross-library Implementation Similarity Audit + +**YOUR TASK: detect when 2+ libraries' implementations have converged on the same data scenario / domain / visual variant beyond what the spec dictated, and emit one-sentence divergence hints for whoever should change.** + +This audit runs in the `daily-regen` pre-flight, before regeneration. The hints you produce are passed to the impl-generate jobs as `change_request` inputs. Library independence is a hard catalog rule (see `prompts/plot-generator.md` → "Library Independence"); convergence on dimensions the spec did NOT name is a defect. + +The output is a JSON file at `/tmp/change-requests.json`. Empty object means "no clusters detected." + +--- + +**Variables:** +- SPEC_ID: {SPEC_ID} + +## Step 1: Read spec context + +Read: + +1. `plots/{SPEC_ID}/specification.md` +2. `plots/{SPEC_ID}/specification.yaml` + +If the spec **explicitly names** a scenario / domain / sample data / shape, all impls sharing it is correct — that's the spec dictating, not the libs copying. **Only flag convergence on dimensions the spec is silent on.** + +## Step 2: Read all image descriptions + +For each `plots/{SPEC_ID}/metadata/python/*.yaml`: + +- Read the `review.image_description` field. The previous review cycle already wrote a plain-English description of the rendered chart there — this is your primary signal. +- The yaml stem is the library name (`bokeh.yaml` → `bokeh`). + +If fewer than 2 metadata files exist (or fewer than 2 have an `image_description`): write `{}` to `/tmp/change-requests.json` and stop. Print `SIMILARITY_DONE` and exit. There is nothing to compare. + +## Step 3: Cluster + +Look for groups of 2+ libraries where the descriptions reveal the same: + +- data formula / random seed / sample size +- example domain (web traffic vs stock prices vs weather is a real, distinguishing choice) +- visual variant when the spec listed multiple (e.g. plain line vs filled-area vs min/max-highlighted) +- chrome / annotation choices beyond the mandated Okabe-Ito + theme palette + +### What does NOT count as copying — these are project-mandated + +- **Okabe-Ito palette positions 1–7.** The data colors are fixed by the style guide; identical colors there are required, not copied. +- **Plot size and aspect ratio.** Fixed by `prompts/default-style-guide.md` and the per-library prompts. Identical aspect ratios across all 9 libs are correct, expected behavior — never propose "different aspect ratios" as divergence advice. +- **Theme chrome.** Page background `#FAF8F1` (light) / `#1A1A17` (dark), text inks, etc. flip identically across libs by design. + +If a candidate cluster's identical signal is *only* one of the mandated items above, it is not a cluster. Skip it. + +## Step 4: Inspect ambiguous clusters (optional) + +If the `image_description` blobs for a candidate cluster don't conclusively show copying — e.g. you can't tell whether two libraries used the same random seed, or whether their domain is genuinely the same — you MAY use the Read tool on `plots/{SPEC_ID}/implementations/python/{library}.py` for **only those libraries inside the candidate cluster** to verify. + +**Do not read .py files for libraries that are not in a candidate cluster.** That wastes tokens and is not what this audit is for. + +## Step 5: Build the hint per cluster + +For each confirmed cluster: + +- **Flag exactly ONE library**, not all of them. Pick the alphabetically later library, or the one with the shorter review history. Switching just one breaks the cluster identity cleanly; flagging multiple risks them re-converging on the same new direction. + +For the flagged library, write a **one-sentence** `change_request` that: + +1. **States concretely what's identical** — name the sibling and the specific shared signal (random seed, sample size, formula structure, example domain, visual variant, annotation choice, etc.). Be specific. +2. **Adds a brief direction hint** with 2–3 alternative examples along the *same* dimension that's currently shared. If the issue is domain, list a couple of different domains. If the issue is the data formula, suggest different shapes (step function, exponential decay, sinusoidal). If the issue is a visual variant, suggest one of the other variants the spec allows. +3. **Stays at ~1 sentence.** Do NOT pitch library-specific features or APIs (the regenerator already reads `prompts/library/{LIBRARY}.md` and chooses idiomatically). Do NOT suggest different aspect ratios or plot sizes — those are project-mandated and identical across libs by design. + +Example: + +> `"Spec is vague on data; current series matches plotly exactly (same seed, same sine+noise formula). Pick a different example domain (sensor temperatures, population growth, or daily revenue) or change the data shape to a step function."` + +## Step 6: Emit the JSON + +Write the JSON object to `/tmp/change-requests.json`. **Do not print the JSON to stdout — write it to the file only.** + +Shape: + + {} # no clusters detected + {"": "", ...} + +The keys must be library names that exist as `plots/{SPEC_ID}/metadata/python/.yaml`. The values are single-sentence English strings. + +After writing the file, print exactly `SIMILARITY_DONE` to stdout and stop. + +## What you must NOT do + +- Do not edit any files under `plots/{SPEC_ID}/`. This audit is read-only. +- Do not flag every library in a cluster — exactly one per cluster. +- Do not propose aspect-ratio or plot-size changes — those are mandated. +- Do not pitch library-specific APIs or visual features in the hint. +- Do not write to `/tmp/change-requests.json` if you printed `NOOP` somewhere — the only valid exit paths are: empty `{}` written + `SIMILARITY_DONE`, or populated JSON written + `SIMILARITY_DONE`. diff --git a/prompts/workflow-prompts/spec-polish-claude.md b/prompts/workflow-prompts/spec-polish-claude.md new file mode 100644 index 0000000000..9c41037ea3 --- /dev/null +++ b/prompts/workflow-prompts/spec-polish-claude.md @@ -0,0 +1,106 @@ +# Polish Specification + +**YOUR TASK: audit one anyplot specification and either improve it or report NOOP.** + +You are running autonomously inside the `daily-regen` pre-flight job. There is no human in the loop during this run — the user will review your output later as a pull request. + +The rule is simple: **make the spec better, or do nothing.** Never change for the sake of changing. If the spec is already clean, print `NOOP` and stop. + +--- + +**Variables:** +- SPEC_ID: {SPEC_ID} + +## Step 1: Read context + +Read these files: + +1. `plots/{SPEC_ID}/specification.md` — the spec under audit +2. `plots/{SPEC_ID}/specification.yaml` — its tags and metadata +3. `prompts/templates/specification.md` — canonical structure all specs should follow +4. `prompts/templates/specification.yaml` — canonical YAML shape +5. `prompts/spec-tags-generator.md` — canonical tag vocabulary and naming rules + +## Step 2: Audit five dimensions + +For each, decide if the spec needs work: + +1. **Wording** — descriptions concise and unambiguous? applications realistic? data fields include types/sizes? notes actionable? +2. **Missing sections** — every section from `specification.md` template present? +3. **Tag completeness** — all 4 dimensions (`plot_type`, `data_type`, `domain`, `features`) have ≥1 value? +4. **Tag quality** — naming conventions enforced (lowercase, hyphens, no underscores)? values from `spec-tags-generator.md` vocabulary? +5. **Tag accuracy** — do tags actually match the spec's content? + +## Step 3: Decide + +- **Nothing needs changing:** print exactly `NOOP` to stdout and stop. Do NOT edit any files. Do NOT create a branch. Do NOT open a PR. +- **One or more dimensions need work:** edit `plots/{SPEC_ID}/specification.md` and/or `plots/{SPEC_ID}/specification.yaml` in place. + +## Hard rules — do not break + +- Do NOT change `id`, `issue`, `created` fields in `specification.yaml`. +- Do NOT change semantic content. Data shape, plot type, and core requirements must stay identical. You are polishing wording, structure, and tags only — not redesigning the spec. +- After any edit, set `updated:` in `specification.yaml` to the current UTC ISO 8601 timestamp (e.g. `2026-05-05T18:30:00Z`). Use `date -u +"%Y-%m-%dT%H:%M:%SZ"` to generate it. + +## Step 4: Commit and open a PR + +If — and only if — you edited something, create a feature branch, commit, push, and open a PR. + +**Do NOT push to `main` directly. Every change goes through a PR.** + +Run these commands: + + TS=$(date -u +"%Y%m%d-%H%M%S") + BRANCH="auto-polish/{SPEC_ID}/$TS" + + git config user.name "github-actions[bot]" + git config user.email "41898282+github-actions[bot]@users.noreply.github.com" + git checkout -b "$BRANCH" + git add plots/{SPEC_ID}/specification.md plots/{SPEC_ID}/specification.yaml + git commit -m "chore(spec): auto-polish {SPEC_ID} + + + + Co-Authored-By: Claude " + git push -u origin "$BRANCH" + +Then open the PR. Use a HEREDOC for the body so multi-line markdown survives: + + gh pr create \ + --title "chore(spec): auto-polish {SPEC_ID}" \ + --label "auto-polish" \ + --body "$(cat <<'EOF' + Automated spec polish from `daily-regen` pre-flight. + + **Spec:** `{SPEC_ID}` + + ## What changed + - + + ## Why + + + ## Hard guarantees from the prompt + - `id`, `issue`, `created` unchanged + - No semantic changes (data shape, plot type, requirements identical) + - `updated` bumped to current UTC + + Awaiting human review. The skip-gate in `daily-regen` will prevent + additional auto-polish PRs for this spec while this one is open. + EOF + )" + +Substitute the literal `{SPEC_ID}` with the actual spec id when running the commands. The block above is illustrative; the bash you actually execute should have the value already filled in. + +## Step 5: Report and stop + +- Polish + PR opened: print `POLISHED ` and stop. +- Push or `gh pr create` failed: print `PR_CREATE_FAILED` and stop. Do NOT retry — the next daily-regen cycle (in 2h) will try again. The skip-gate prevents duplicates. +- Nothing to polish: you already printed `NOOP` in step 3 and stopped — do not get here. + +## What you must NOT do + +- Do not auto-merge the PR. Do not add the `approved` label. +- Do not push to `main` directly under any circumstances, even if the polish is "trivial". +- Do not edit any spec other than `{SPEC_ID}`. Do not touch implementations under `plots/{SPEC_ID}/implementations/` — your job is the spec only. +- Do not regenerate or re-run anything in `plots/{SPEC_ID}/metadata/`. Implementation metadata is owned by the impl-* workflows.