From bcc774ac16406a3f7d8e1550cbf4d6cc0b9c68ae Mon Sep 17 00:00:00 2001 From: Khaliq Date: Sat, 9 May 2026 11:11:51 +0200 Subject: [PATCH 1/4] fix(verify-sdk): wait for matching broker tarball before install The post-publish SDK verification on darwin-arm64 in run 25589146566 failed because the registry-wait step only blocked on the SDK + internal runtime deps, not on the platform-specific broker package. The broker-darwin-arm64 publish completed at 02:36:42 and verify ran at 02:38:01, but the tarball had not propagated to the npm CDN edge serving the macos-14 runner. npm silently skips optionalDependencies that 404, so the install "succeeded" without the broker, and the next assertion failed. Add the matching broker package to the wait list, and additionally probe its tarball URL via HEAD because `npm view` and the tarball CDN can hit different caches. Bump the retry budget by one iteration. Co-Authored-By: Claude Opus 4.7 (1M context) --- .github/workflows/verify-publish-sdk.yml | 30 +++++++++++++++++++++--- 1 file changed, 27 insertions(+), 3 deletions(-) diff --git a/.github/workflows/verify-publish-sdk.yml b/.github/workflows/verify-publish-sdk.yml index d78ee790d..b46a3533b 100644 --- a/.github/workflows/verify-publish-sdk.yml +++ b/.github/workflows/verify-publish-sdk.yml @@ -85,12 +85,20 @@ jobs: # The SDK pins internal runtime deps to the exact same release version; # wait for those too so npm/Bun installs cannot resolve a half-published # SDK that later fails at runtime inside @agent-relay/sdk/workflows. - - name: Wait for SDK and internal deps on registry + # + # The matching broker package is included because npm silently skips + # optionalDependencies that 404 — if the broker tarball hasn't reached + # the CDN edge serving this runner yet, install would "succeed" with + # no broker installed and the next step would fail confusingly. + # `npm view version` and the actual tarball fetch can hit + # different caches, so additionally probe the tarball URL directly. + - name: Wait for SDK, internal deps, and matching broker on registry shell: bash run: | set -euo pipefail SPEC="${{ steps.spec.outputs.spec }}" VERSION="${{ steps.spec.outputs.version }}" + EXPECTED_BROKER="${{ matrix.expected_pkg }}" if [ "$VERSION" = "latest" ]; then echo "version=latest — no wait needed" exit 0 @@ -102,9 +110,10 @@ jobs: "@agent-relay/github-primitive@$VERSION" "@agent-relay/slack-primitive@$VERSION" "@agent-relay/workflow-types@$VERSION" + "${EXPECTED_BROKER}@$VERSION" ) - for i in 1 2 3 4 5 6; do + for i in 1 2 3 4 5 6 7; do missing=() for pkg in "${PACKAGES[@]}"; do if ! npm view "$pkg" version >/dev/null 2>&1; then @@ -112,8 +121,23 @@ jobs: fi done + # Even when `npm view` succeeds, the tarball may still 404 from + # the CDN edge that npm install hits. Probe the tarball URL of + # the matching broker directly. if [ "${#missing[@]}" -eq 0 ]; then - echo "registry has SDK and internal deps for $VERSION" + TARBALL_URL=$(npm view "${EXPECTED_BROKER}@$VERSION" dist.tarball 2>/dev/null || true) + if [ -z "$TARBALL_URL" ]; then + missing+=("${EXPECTED_BROKER}@$VERSION (no tarball url)") + else + STATUS=$(curl -s -o /dev/null -w "%{http_code}" -I "$TARBALL_URL" || echo "000") + if [ "$STATUS" != "200" ]; then + missing+=("${EXPECTED_BROKER}@$VERSION (tarball HTTP $STATUS)") + fi + fi + fi + + if [ "${#missing[@]}" -eq 0 ]; then + echo "registry has SDK, internal deps, and ${EXPECTED_BROKER} for $VERSION" exit 0 fi From 7b3388937b004c04346fc2cb4d13f5a7e0239de1 Mon Sep 17 00:00:00 2001 From: Khaliq Date: Sat, 9 May 2026 11:17:04 +0200 Subject: [PATCH 2/4] bump skills and packages --- .../writing-agent-relay-workflows/SKILL.md | 378 +++++++++--- .../writing-agent-relay-workflows/SKILL.md | 561 ++++++++++++++---- .gitignore | 1 + package-lock.json | 8 +- package.json | 8 +- prpm.lock | 14 +- 6 files changed, 761 insertions(+), 209 deletions(-) diff --git a/.agents/skills/writing-agent-relay-workflows/SKILL.md b/.agents/skills/writing-agent-relay-workflows/SKILL.md index 7787cfc67..ce25a5e88 100644 --- a/.agents/skills/writing-agent-relay-workflows/SKILL.md +++ b/.agents/skills/writing-agent-relay-workflows/SKILL.md @@ -1,6 +1,6 @@ --- name: writing-agent-relay-workflows -description: Use when building multi-agent workflows with the relay broker-sdk - covers the WorkflowBuilder API, DAG step dependencies, agent definitions, step output chaining via {{steps.X.output}}, verification gates, evidence-based completion, owner decisions, dedicated channels, dynamic channel management (subscribe/unsubscribe/mute/unmute), swarm patterns, error handling, event listeners, step sizing rules, authoring best practices, and the lead+workers team pattern for complex steps +description: Use when building multi-agent workflows with the relay broker-sdk - covers conversation-shape vs pipeline-shape coordination, the WorkflowBuilder API, DAG step dependencies, agent definitions, step output chaining via {{steps.X.output}}, verification gates, evidence-based completion, owner decisions, dedicated channels, dynamic channel management (subscribe/unsubscribe/mute/unmute), swarm patterns, chat-native coordination recipes (Q/A, broadcast-ack, peer review, standup, hand-off), error handling, event listeners, step sizing rules, authoring best practices, and the lead+workers team pattern for complex steps --- ### Overview @@ -19,9 +19,24 @@ The relay broker-sdk workflow system orchestrates multiple AI agents (Claude, Co - Needing verification gates, retries, or step output chaining - Dynamic channel management: agents joining/leaving/muting channels mid-workflow -### Quick Reference +### Choose Your Coordination Style — Conversation vs Pipeline -#### > **Note:** this Quick Reference assumes an **ESM** workflow file (the host `package.json` has `"type": "module"`). For CJS repos, see rule #1 in **Critical TypeScript rules** below — convert `import { workflow } from '@agent-relay/sdk/workflows'` to `const { workflow } = require('@agent-relay/sdk/workflows')` and wrap the workflow in `async function main() { ... } main().catch(console.error)` since CJS does not support top-level `await`. **Always check `package.json` before copy-pasting the snippet.** +Before writing the workflow, decide *how the agents will coordinate*. The relay primitive supports two very different shapes, and picking the wrong one wastes the most valuable thing the SDK gives you. + +| Shape | What it is | Use when | +|---|---|---| +| **Conversation** (chat-native) | Interactive agents share a channel; messages, `@-mentions`, and ambient awareness drive coordination. Lead and workers spawn in parallel and self-organize. The relay is the coordination layer, not just transport. | Multi-file work, peer review loops, cross-agent feedback, dynamic re-planning, multi-PR coordination, anything with a human-in-the-loop escape, swarms where workers pick up each other's output. | +| **Pipeline** (one-shot DAG) | Each step runs as a one-shot subprocess (`claude -p`, `codex exec`); steps hand off via `{{steps.X.output}}` text injection. No agents are alive at the same time; no chat happens. | Linear, well-specified transformations; deterministic data passing; no review loop expected; the work could be expressed as a `bash | bash | bash` pipe. | + +**Default to Conversation for any non-trivial work.** Pipeline DAGs are simpler to reason about but they do not exercise the relay primitive — they are a Unix pipe with extra steps. If you would happily write the same task as a single shell pipeline, pipeline-shape is fine. Otherwise, you almost certainly want a Conversation shape. + +The two shapes can mix within one workflow: pipeline-style deterministic preflight → conversation in the middle → pipeline-style commit-and-PR at the end. See **Quick Reference (Conversation)** below and **[Common Patterns → Interactive Team](#interactive-team-lead--workers-on-shared-channel)** for the canonical recipe. + +> **A blunt rule of thumb:** if your workflow only uses `agent` steps with `preset: 'worker'` chained by `{{steps.X.output}}`, you are not using the relay — you are using `claude -p | codex exec`. That may still be the right answer; just make it a deliberate choice. + +### Quick Reference (Pipeline shape) + +#### > Use this when steps are linear, well-specified, and need no agent-to-agent feedback. For anything with iteration, review, or coordination, jump to **Quick Reference (Conversation shape)** below. ```typescript import { workflow } from '@agent-relay/sdk/workflows'; @@ -52,9 +67,87 @@ const result = await workflow('my-workflow') .onError('retry', { maxRetries: 2, retryDelayMs: 10_000 }) .run({ cwd: process.cwd() }); -console.log('Result:', result.status); + console.log('Result:', result.status); +``` + + +### Quick Reference (Conversation shape) + +#### > Use this for any non-trivial work — peer review, multi-file edits, cross-agent feedback, dynamic re-planning. Lead and workers spawn **in parallel** on a shared channel and self-organize via messages. The relay primitive does the coordinating; verification gates downstream of the lead close the workflow. + +```typescript +import { workflow } from '@agent-relay/sdk/workflows'; +import { ClaudeModels, CodexModels } from '@agent-relay/config'; + +const result = await workflow('my-workflow') + .description('Multi-file change with peer review') + .pattern('dag') + .channel('wf-my-feature') // dedicated channel — agents share it + .maxConcurrency(4) + .timeout(3_600_000) + + // Interactive agents — no preset, they live on the channel + .agent('lead', { + cli: 'claude', + model: ClaudeModels.OPUS, + role: 'Architect + reviewer. Plans, assigns, reviews, posts feedback.', + retries: 1, + }) + .agent('impl-a', { + cli: 'codex', + model: CodexModels.GPT_5_4, + role: 'Implementer. Listens on channel for assignments and feedback.', + retries: 2, + }) + .agent('impl-b', { + cli: 'codex', + model: CodexModels.GPT_5_4, + role: 'Implementer. Listens on channel for assignments and feedback.', + retries: 2, + }) + + // Deterministic context — pre-reads files once, posts to the channel for everyone + .step('context', { + type: 'deterministic', + command: 'git ls-files src/', + captureOutput: true, + }) + + // Lead and workers all depend on `context` — they start CONCURRENTLY. + // They coordinate over #wf-my-feature, not via {{steps.X.output}}. + .step('lead-coordinate', { + agent: 'lead', + dependsOn: ['context'], + task: `You are the lead on #wf-my-feature. Workers: impl-a, impl-b. +Post the plan. Assign files. Review their PRs/diffs. Post feedback in-channel. +Workers iterate based on your feedback. Exit when both files pass review.`, + }) + .step('impl-a-work', { + agent: 'impl-a', + dependsOn: ['context'], // SAME dep as lead → starts in parallel, no deadlock + task: `You are impl-a on #wf-my-feature. Wait for the lead's plan. +Implement your assigned file. Post a completion message. Address feedback.`, + }) + .step('impl-b-work', { + agent: 'impl-b', + dependsOn: ['context'], // SAME dep as lead + task: `You are impl-b on #wf-my-feature. Wait for the lead's plan. +Implement your assigned file. Post a completion message. Address feedback.`, + }) + + // Downstream gates on the lead — lead exits when satisfied. + .step('verify', { + type: 'deterministic', + dependsOn: ['lead-coordinate'], + command: 'npm run typecheck && npm test', + failOnError: true, + }) + + .onError('retry', { maxRetries: 2, retryDelayMs: 10_000 }) + .run({ cwd: process.cwd() }); ``` + ### ⚡ Parallelism — Design for Speed #### Cross-Workflow Parallelism: Wave Planning @@ -87,9 +180,9 @@ git add -A && git commit -m "Wave 2" ```typescript workflow('48-comparison-mode') - .packages(['web', 'core']) // monorepo packages touched - .isolatedFrom(['49-feedback-system']) // explicitly safe to parallelize - .requiresBefore(['46-admin-dashboard']); // explicit ordering constraint + .packages(['web', 'core']) // monorepo packages touched + .isolatedFrom(['49-feedback-system']) // explicitly safe to parallelize + .requiresBefore(['46-admin-dashboard']) // explicit ordering constraint ``` #### Within-Workflow Parallelism @@ -105,6 +198,7 @@ workflow('48-comparison-mode') .step('verify-all', { agent: 'reviewer', dependsOn: ['fix-component-a', 'fix-component-b'] }) ``` + ### Failure Prevention #### 1. Do not use raw top-level `await` @@ -193,6 +287,7 @@ export function applyCloudRepoSetup(wf: T, opts: CloudRepoSetupOptions): T { } ``` + ### End-to-End Bug Fix Workflows - **Capture the original failure** @@ -214,6 +309,10 @@ export function applyCloudRepoSetup(wf: T, opts: CloudRepoSetupOptions): T { - Show that the original failure no longer occurs - **Record residual risks** - Call out what was not covered +- **Ship the result as a PR** +- Open the pull request from the workflow itself with `createGitHubStep` +- See [Shipping the Result — Open a PR via `createGitHubStep`](#shipping-the-result--open-a-pr-via-creategithubstep) below +- A workflow that fixes a bug and stops short of the PR has only done half the loop - disposable sandbox / cloud workspace - Docker / containerized environment - fresh local shell with isolated paths @@ -222,6 +321,64 @@ export function applyCloudRepoSetup(wf: T, opts: CloudRepoSetupOptions): T { - chooses the best swarm pattern - then authors the final fix/validation workflow +### Shipping the Result — Open a PR via `createGitHubStep` + +#### The minimal "open a PR" recipe + +```typescript +import { workflow } from '@agent-relay/sdk/workflows'; +import { createGitHubStep } from '@agent-relay/sdk/github'; + +const REPO = 'AgentWorkforce/cloud'; +const BRANCH = `agent-relay/run-${Date.now()}`; + +await workflow('feature-x') + // ... your real steps that produce code changes ... + .step('write-marker', { + type: 'deterministic', + command: `echo "fix landed at $(date -u)" >> CHANGELOG.md`, + }) + + // Branch off main on the remote. + .step('create-branch', createGitHubStep({ + dependsOn: ['write-marker'], + action: 'createBranch', + repo: REPO, + params: { branch: BRANCH, source: 'main' }, + })) + + // Commit the change to the branch via Contents API. + .step('commit-change', createGitHubStep({ + dependsOn: ['create-branch'], + action: 'createFile', + repo: REPO, + params: { + path: 'CHANGELOG.md', + branch: BRANCH, + content: '', + message: 'chore: changelog entry', + }, + })) + + // Open the PR. This is the load-bearing step. + .step('open-pr', createGitHubStep({ + dependsOn: ['commit-change'], + action: 'createPR', + repo: REPO, + params: { + title: 'feat: ship feature X', + head: BRANCH, + base: 'main', + body: '## Summary\n\n- ...\n\n## Test plan\n\n- [x] ...', + draft: false, + }, + output: { mode: 'data', format: 'json', path: 'html_url' }, + })) + + .run({ cwd: process.cwd() }); +``` + + ### Key Concepts #### Verification Gates @@ -259,20 +416,13 @@ relay.unmute({ agent: 'security-auditor', channel: 'review-pr-123' }); #### Events ```typescript -relay.onChannelSubscribed = (agent, channels) => { - /* ... */ -}; -relay.onChannelUnsubscribed = (agent, channels) => { - /* ... */ -}; -relay.onChannelMuted = (agent, channel) => { - /* ... */ -}; -relay.onChannelUnmuted = (agent, channel) => { - /* ... */ -}; +relay.onChannelSubscribed = (agent, channels) => { /* ... */ }; +relay.onChannelUnsubscribed = (agent, channels) => { /* ... */ }; +relay.onChannelMuted = (agent, channel) => { /* ... */ }; +relay.onChannelUnmuted = (agent, channel) => { /* ... */ }; ``` + ### Agent Definition #### ```typescript @@ -298,6 +448,7 @@ import { ClaudeModels, CodexModels, GeminiModels } from '@agent-relay/config'; .agent('coder', { cli: 'codex', model: CodexModels.GPT_5_4 }) // not 'gpt-5.4' ``` + ### Step Definition #### Agent Steps @@ -324,6 +475,7 @@ import { ClaudeModels, CodexModels, GeminiModels } from '@agent-relay/config'; }) ``` + ### Common Patterns #### Interactive Team (lead + workers on shared channel) @@ -376,6 +528,78 @@ Edit files as assigned. Report completion. Fix issues from feedback.`, .step('verify', { type: 'deterministic', dependsOn: ['lead-coordinate'], ... }) ``` +#### 1. Question / Answer (blocking ask) + +```typescript +.step('integrate', { + agent: 'integrator', + dependsOn: ['context'], + task: `You are the integrator on #wf-feature. +Before writing code, post a direct question to @schema-owner asking which +table owns the new field. Do NOT proceed until @schema-owner replies in +channel. If no reply arrives in 5 minutes, @-mention the lead.`, +}) +``` + +#### 2. Broadcast / Ack + +```typescript +.step('lead-coordinate', { + agent: 'lead', + dependsOn: ['context'], + task: `Post the plan to #wf-feature, then @impl-a @impl-b @impl-c. +Wait for each to reply with "ACK " before issuing assignments. +If any worker hasn't acked in 3 minutes, re-post and ping again. +Only after all three have acked, post per-worker assignments.`, +}) +``` + +#### 3. Peer Review Handoff + +```typescript +.step('impl-a-work', { + agent: 'impl-a', + dependsOn: ['context'], + task: `Implement src/foo.ts per the lead's assignment. +When done, post to #wf-feature: "@reviewer ready: src/foo.ts" — include the +commit SHA. Then wait for @reviewer's verdict in channel. +- If "APPROVED", you're done. +- If "CHANGES_REQUESTED ", apply the notes and re-post. +- If no verdict in 5 min, @-mention the lead.`, +}) +``` + +#### 4. Standup / Status Probe + +```typescript +.step('lead-coordinate', { + agent: 'lead', + task: `... coordinate the team ... + +Every 10 minutes, post a status probe: "@impl-a @impl-b status?" +Each worker should reply with one of: + - "RUNNING " (still working) + - "BLOCKED " (@-mention the lead with the blocker) + - "DONE " (ready for review) + +If a worker is silent for two probes in a row, mark them stalled and +reassign their work to a peer.`, +}) +``` + +#### 5. Hand-Off with Context + +```typescript +.step('impl-a-work', { + agent: 'impl-a', + task: `... finish your part ... + +When done, post a handoff to #wf-feature targeting the next worker: +"@impl-b HANDOFF: src/foo.ts ready. Touched: . Open question: . +Tests: . Commit: ."`, +}) +``` + #### Pipeline (sequential handoff) ```typescript @@ -393,6 +617,7 @@ Edit files as assigned. Report completion. Fix issues from feedback.`, .onError('retry', { maxRetries: 3, retryDelayMs: 5000 }) ``` + ### Multi-File Edit Pattern #### When a workflow needs to modify multiple existing files, **use one agent step per file** with a deterministic verify gate after each. Agents reliably edit 1-2 files per step but fail on 4+. @@ -452,6 +677,7 @@ steps: failOnError: true ``` + ### File Materialization: Verify Before Proceeding #### After any step that creates files, add a deterministic `file_exists` check before proceeding. Non-interactive agents may exit 0 without writing anything (wrong cwd, stdout instead of disk). @@ -470,6 +696,7 @@ steps: failOnError: true ``` + ### DAG Deadlock Anti-Pattern #### ```yaml @@ -494,6 +721,7 @@ steps: dependsOn: [work-a, coordinate] ``` + ### Step Sizing #### **One agent, one deliverable.** A step's task prompt should be 10-20 lines max. @@ -510,7 +738,7 @@ steps: - name: track-worker-1-impl agent: track-worker-1 - dependsOn: [prior-step] # same dep as lead — starts concurrently + dependsOn: [prior-step] # same dep as lead — starts concurrently task: | Join #my-track. track-lead will post your assignment. Implement the file as directed. @@ -518,76 +746,79 @@ steps: type: exit_code - name: next-step - dependsOn: [track-lead-coord] # downstream depends on lead, not workers + dependsOn: [track-lead-coord] # downstream depends on lead, not workers ``` + ### Supervisor Pattern When you set `.pattern('supervisor')` (or `hub-spoke`, `fan-out`), the runner auto-assigns a supervisor agent as owner for worker steps. The supervisor monitors progress, nudges idle workers, and issues `OWNER_DECISION`. **Auto-hardening only activates for hub patterns** — not `pipeline` or `dag`. -| Use case | Pattern | Why | -| ------------------------- | ------------------- | -------------------------------- | -| Sequential, no monitoring | `pipeline` | Simple, no overhead | -| Workers need oversight | `supervisor` | Auto-owner monitors | -| Local/small models | `supervisor` | Supervisor catches stuck workers | -| All non-interactive | `pipeline` or `dag` | No PTY = no supervision needed | +| Use case | Pattern | Why | +|----------|---------|-----| +| Sequential, no monitoring | `pipeline` | Simple, no overhead | +| Workers need oversight | `supervisor` | Auto-owner monitors | +| Local/small models | `supervisor` | Supervisor catches stuck workers | +| All non-interactive | `pipeline` or `dag` | No PTY = no supervision needed | ### Concurrency **Cap `maxConcurrency` at 4-6.** Spawning 10+ agents simultaneously causes broker timeouts. | Parallel agents | `maxConcurrency` | -| --------------- | ---------------- | -| 2-4 | 4 (default safe) | -| 5-10 | 5 | -| 10+ | 6-8 max | +|-----------------|-------------------| +| 2-4 | 4 (default safe) | +| 5-10 | 5 | +| 10+ | 6-8 max | ### Common Mistakes -| Mistake | Fix | -| --------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| All workflows run sequentially | Group independent workflows into parallel waves (4-7x speedup) | -| Every step depends on the previous one | Only add `dependsOn` when there's a real data dependency | -| Self-review step with no timeout | Set `timeout: 300_000` (5 min) — Codex hangs in non-interactive review | -| One giant workflow per feature | Split into smaller workflows that can run in parallel waves | -| Adding exit instructions to tasks | Runner handles self-termination automatically | -| Setting `timeoutMs` on agents/steps | Use global `.timeout()` only | -| Using `general` channel | Set `.channel('wf-name')` for isolation | -| `{{steps.X.output}}` without `dependsOn: ['X']` | Output won't be available yet | -| Requiring exact sentinel as only completion gate | Use `exit_code` or `file_exists` verification | -| Writing 100-line task prompts | Split into lead + workers on a channel | -| `maxConcurrency: 16` with many parallel steps | Cap at 5-6 | -| Non-interactive agent reading large files via tools | Pre-read in deterministic step, inject via `{{steps.X.output}}` | -| Workers depending on lead step (deadlock) | Both depend on shared context step | -| `fan-out`/`hub-spoke` for simple parallel workers | Use `dag` instead | -| `pipeline` but expecting auto-supervisor | Only hub patterns auto-harden. Use `.pattern('supervisor')` | -| Workers without `preset: 'worker'` in one-shot DAG lead+worker flows | Add preset for clean stdout when chaining `{{steps.X.output}}` (not needed for interactive team patterns) | -| Using `_` in YAML numbers (`timeoutMs: 1_200_000`) | YAML doesn't support `_` separators | -| Workflow timeout under 30 min for complex workflows | Use `3600000` (1 hour) as default | -| Using `require()` in ESM projects | Check `package.json` for `"type": "module"` — use `import` if ESM | -| Wrapping in `async function main()` in ESM | ESM supports top-level `await` — no wrapper needed | -| Using `createWorkflowRenderer` | Does not exist. Use `.run({ cwd: process.cwd() })` | -| `export default workflow(...)...build()` | No `.build()`. Chain ends with `.run()` — the file must call `.run()`, not just export config | -| Relative import `'../workflows/builder.js'` | Use `import { workflow } from '@agent-relay/sdk/workflows'` | -| Hardcoded model strings (`model: 'opus'`) | Use constants: `import { ClaudeModels } from '@agent-relay/config'` → `model: ClaudeModels.OPUS` | -| Thinking `agent-relay run` inspects exports | It executes the file as a subprocess. Only `.run()` invocations trigger steps | -| `pattern('single')` on cloud runner | Not supported — use `dag` | -| `pattern('supervisor')` with one agent | Same agent is owner + specialist. Use `dag` | -| Invalid verification type (`type: 'deterministic'`) | Only `exit_code`, `output_contains`, `file_exists`, `custom` are valid | -| Chaining `{{steps.X.output}}` from interactive agents | PTY output is garbled. Use deterministic steps or `preset: 'worker'` | -| Single step editing 4+ files | Agents modify 1-2 then exit. Split to one file per step with verify gates | -| Relying on agents to `git commit` | Agents emit markers without running git. Use deterministic commit step | -| File-writing steps without `file_exists` verification | `exit_code` auto-passes even if no file written | -| Manual peer fanout in `handleChannelMessage()` | Use broker-managed channel subscriptions — broker fans out to all subscribers automatically | -| Client-side `personaNames.has(from)` filtering | Use `relay.subscribe()`/`relay.unsubscribe()` — only subscribed agents receive messages | -| Agents receiving noisy cross-channel messages during focused work | Use `relay.mute({ agent, channel })` to silence non-primary channels without leaving them | -| Hardcoding all channels at spawn time | Use `agent.subscribe()` / `agent.unsubscribe()` for dynamic channel membership post-spawn | -| Using `preset: 'worker'` for Codex in _interactive team_ patterns when coordination is needed | Codex interactive mode works fine with PTY channel injection. Drop the preset for interactive team patterns (keep it for one-shot DAG workers where clean stdout matters) | -| Separate reviewer agent from lead in interactive team | Merge lead + reviewer into one interactive Claude agent — reviews between rounds, fewer agents | -| Not printing PR URL after `gh pr create` | Add a final deterministic step: `echo "PR: $(cat pr-url.txt)"` or capture in the `gh pr create` command | -| Workflow ending without worktree + PR for cross-repo changes | Add `setup-worktree` at start and `push-and-pr` + `cleanup-worktree` at end | +| Mistake | Fix | +|---------|-----| +| Treating relay as transport, not as a coordination layer (every step is `preset: 'worker'`, every handoff is `{{steps.X.output}}`) | Default to **Conversation shape** for non-trivial work — interactive agents on a shared channel. Pipeline-shape is only correct when the work could be expressed as a `bash | bash | bash` pipe. | +| Interactive agents on a channel whose task strings don't tell them to talk to each other | Pick a [Chat-Native Coordination Recipe](#chat-native-coordination-recipes) (Q/A, Broadcast/Ack, Peer Review, Standup, Hand-Off) and bake it into the task prompt — otherwise you're paying for a chat substrate you're not using | +| All workflows run sequentially | Group independent workflows into parallel waves (4-7x speedup) | +| Every step depends on the previous one | Only add `dependsOn` when there's a real data dependency | +| Self-review step with no timeout | Set `timeout: 300_000` (5 min) — Codex hangs in non-interactive review | +| One giant workflow per feature | Split into smaller workflows that can run in parallel waves | +| Adding exit instructions to tasks | Runner handles self-termination automatically | +| Setting `timeoutMs` on agents/steps | Use global `.timeout()` only | +| Using `general` channel | Set `.channel('wf-name')` for isolation | +| `{{steps.X.output}}` without `dependsOn: ['X']` | Output won't be available yet | +| Requiring exact sentinel as only completion gate | Use `exit_code` or `file_exists` verification | +| Writing 100-line task prompts | Split into lead + workers on a channel | +| `maxConcurrency: 16` with many parallel steps | Cap at 5-6 | +| Non-interactive agent reading large files via tools | Pre-read in deterministic step, inject via `{{steps.X.output}}` | +| Workers depending on lead step (deadlock) | Both depend on shared context step | +| `fan-out`/`hub-spoke` for simple parallel workers | Use `dag` instead | +| `pipeline` but expecting auto-supervisor | Only hub patterns auto-harden. Use `.pattern('supervisor')` | +| Workers without `preset: 'worker'` in one-shot DAG lead+worker flows | Add preset for clean stdout when chaining `{{steps.X.output}}` (not needed for interactive team patterns) | +| Using `_` in YAML numbers (`timeoutMs: 1_200_000`) | YAML doesn't support `_` separators | +| Workflow timeout under 30 min for complex workflows | Use `3600000` (1 hour) as default | +| Using `require()` in ESM projects | Check `package.json` for `"type": "module"` — use `import` if ESM | +| Wrapping in `async function main()` in ESM | ESM supports top-level `await` — no wrapper needed | +| Using `createWorkflowRenderer` | Does not exist. Use `.run({ cwd: process.cwd() })` | +| `export default workflow(...)...build()` | No `.build()`. Chain ends with `.run()` — the file must call `.run()`, not just export config | +| Relative import `'../workflows/builder.js'` | Use `import { workflow } from '@agent-relay/sdk/workflows'` | +| Hardcoded model strings (`model: 'opus'`) | Use constants: `import { ClaudeModels } from '@agent-relay/config'` → `model: ClaudeModels.OPUS` | +| Thinking `agent-relay run` inspects exports | It executes the file as a subprocess. Only `.run()` invocations trigger steps | +| `pattern('single')` on cloud runner | Not supported — use `dag` | +| `pattern('supervisor')` with one agent | Same agent is owner + specialist. Use `dag` | +| Invalid verification type (`type: 'deterministic'`) | Only `exit_code`, `output_contains`, `file_exists`, `custom` are valid | +| Chaining `{{steps.X.output}}` from interactive agents | PTY output is garbled. Use deterministic steps or `preset: 'worker'` | +| Single step editing 4+ files | Agents modify 1-2 then exit. Split to one file per step with verify gates | +| Relying on agents to `git commit` | Agents emit markers without running git. Use deterministic commit step | +| File-writing steps without `file_exists` verification | `exit_code` auto-passes even if no file written | +| Manual peer fanout in `handleChannelMessage()` | Use broker-managed channel subscriptions — broker fans out to all subscribers automatically | +| Client-side `personaNames.has(from)` filtering | Use `relay.subscribe()`/`relay.unsubscribe()` — only subscribed agents receive messages | +| Agents receiving noisy cross-channel messages during focused work | Use `relay.mute({ agent, channel })` to silence non-primary channels without leaving them | +| Hardcoding all channels at spawn time | Use `agent.subscribe()` / `agent.unsubscribe()` for dynamic channel membership post-spawn | +| Using `preset: 'worker'` for Codex in *interactive team* patterns when coordination is needed | Codex interactive mode works fine with PTY channel injection. Drop the preset for interactive team patterns (keep it for one-shot DAG workers where clean stdout matters) | +| Separate reviewer agent from lead in interactive team | Merge lead + reviewer into one interactive Claude agent — reviews between rounds, fewer agents | +| Not printing PR URL after `createGitHubStep({ action: 'createPR' })` | Capture `html_url` with `output: { mode: 'data', format: 'json', path: 'html_url' }` and echo or write it in a final deterministic step | +| Workflow ending without worktree + PR for cross-repo changes | Add `setup-worktree` at start and `push-and-pr` + `cleanup-worktree` at end | ### YAML Alternative @@ -620,6 +851,7 @@ workflows: type: exit_code ``` + ### Available Swarm Patterns `dag` (default), `fan-out`, `pipeline`, `hub-spoke`, `consensus`, `mesh`, `handoff`, `cascade`, `debate`, `hierarchical`, `map-reduce`, `scatter-gather`, `supervisor`, `reflection`, `red-team`, `verifier`, `auction`, `escalation`, `saga`, `circuit-breaker`, `blackboard`, `swarm` diff --git a/.claude/skills/writing-agent-relay-workflows/SKILL.md b/.claude/skills/writing-agent-relay-workflows/SKILL.md index 73762176e..ab5f87205 100644 --- a/.claude/skills/writing-agent-relay-workflows/SKILL.md +++ b/.claude/skills/writing-agent-relay-workflows/SKILL.md @@ -1,6 +1,6 @@ --- name: writing-agent-relay-workflows -description: Use when building multi-agent workflows with the relay broker-sdk - covers the WorkflowBuilder API, DAG step dependencies, agent definitions, step output chaining via {{steps.X.output}}, verification gates, evidence-based completion, owner decisions, dedicated channels, dynamic channel management (subscribe/unsubscribe/mute/unmute), swarm patterns, error handling, event listeners, step sizing rules, authoring best practices, and the lead+workers team pattern for complex steps +description: Use when building multi-agent workflows with the relay broker-sdk - covers conversation-shape vs pipeline-shape coordination, the WorkflowBuilder API, DAG step dependencies, agent definitions, step output chaining via {{steps.X.output}}, verification gates, evidence-based completion, owner decisions, dedicated channels, dynamic channel management (subscribe/unsubscribe/mute/unmute), swarm patterns, chat-native coordination recipes (Q/A, broadcast-ack, peer review, standup, hand-off), error handling, event listeners, step sizing rules, authoring best practices, and the lead+workers team pattern for complex steps --- # Writing Agent Relay Workflows @@ -21,8 +21,25 @@ The relay broker-sdk workflow system orchestrates multiple AI agents (Claude, Co - Needing verification gates, retries, or step output chaining - Dynamic channel management: agents joining/leaving/muting channels mid-workflow -## Quick Reference +## Choose Your Coordination Style — Conversation vs Pipeline +Before writing the workflow, decide *how the agents will coordinate*. The relay primitive supports two very different shapes, and picking the wrong one wastes the most valuable thing the SDK gives you. + +| Shape | What it is | Use when | +|---|---|---| +| **Conversation** (chat-native) | Interactive agents share a channel; messages, `@-mentions`, and ambient awareness drive coordination. Lead and workers spawn in parallel and self-organize. The relay is the coordination layer, not just transport. | Multi-file work, peer review loops, cross-agent feedback, dynamic re-planning, multi-PR coordination, anything with a human-in-the-loop escape, swarms where workers pick up each other's output. | +| **Pipeline** (one-shot DAG) | Each step runs as a one-shot subprocess (`claude -p`, `codex exec`); steps hand off via `{{steps.X.output}}` text injection. No agents are alive at the same time; no chat happens. | Linear, well-specified transformations; deterministic data passing; no review loop expected; the work could be expressed as a `bash | bash | bash` pipe. | + +**Default to Conversation for any non-trivial work.** Pipeline DAGs are simpler to reason about but they do not exercise the relay primitive — they are a Unix pipe with extra steps. If you would happily write the same task as a single shell pipeline, pipeline-shape is fine. Otherwise, you almost certainly want a Conversation shape. + +The two shapes can mix within one workflow: pipeline-style deterministic preflight → conversation in the middle → pipeline-style commit-and-PR at the end. See **Quick Reference (Conversation)** below and **[Common Patterns → Interactive Team](#interactive-team-lead--workers-on-shared-channel)** for the canonical recipe. + +> **A blunt rule of thumb:** if your workflow only uses `agent` steps with `preset: 'worker'` chained by `{{steps.X.output}}`, you are not using the relay — you are using `claude -p | codex exec`. That may still be the right answer; just make it a deliberate choice. + +## Quick Reference (Pipeline shape) + +> Use this when steps are linear, well-specified, and need no agent-to-agent feedback. For anything with iteration, review, or coordination, jump to **Quick Reference (Conversation shape)** below. +> > **Note:** this Quick Reference assumes an **ESM** workflow file (the host `package.json` has `"type": "module"`). For CJS repos, see rule #1 in **Critical TypeScript rules** below — convert `import { workflow } from '@agent-relay/sdk/workflows'` to `const { workflow } = require('@agent-relay/sdk/workflows')` and wrap the workflow in `async function main() { ... } main().catch(console.error)` since CJS does not support top-level `await`. **Always check `package.json` before copy-pasting the snippet.** ```typescript @@ -54,11 +71,103 @@ const result = await workflow('my-workflow') .onError('retry', { maxRetries: 2, retryDelayMs: 10_000 }) .run({ cwd: process.cwd() }); -console.log('Result:', result.status); + console.log('Result:', result.status); ``` -**Critical TypeScript rules:** +## Quick Reference (Conversation shape) + +> Use this for any non-trivial work — peer review, multi-file edits, cross-agent feedback, dynamic re-planning. Lead and workers spawn **in parallel** on a shared channel and self-organize via messages. The relay primitive does the coordinating; verification gates downstream of the lead close the workflow. + +```typescript +import { workflow } from '@agent-relay/sdk/workflows'; +import { ClaudeModels, CodexModels } from '@agent-relay/config'; + +const result = await workflow('my-workflow') + .description('Multi-file change with peer review') + .pattern('dag') + .channel('wf-my-feature') // dedicated channel — agents share it + .maxConcurrency(4) + .timeout(3_600_000) + + // Interactive agents — no preset, they live on the channel + .agent('lead', { + cli: 'claude', + model: ClaudeModels.OPUS, + role: 'Architect + reviewer. Plans, assigns, reviews, posts feedback.', + retries: 1, + }) + .agent('impl-a', { + cli: 'codex', + model: CodexModels.GPT_5_4, + role: 'Implementer. Listens on channel for assignments and feedback.', + retries: 2, + }) + .agent('impl-b', { + cli: 'codex', + model: CodexModels.GPT_5_4, + role: 'Implementer. Listens on channel for assignments and feedback.', + retries: 2, + }) + + // Deterministic context — pre-reads files once, posts to the channel for everyone + .step('context', { + type: 'deterministic', + command: 'git ls-files src/', + captureOutput: true, + }) + + // Lead and workers all depend on `context` — they start CONCURRENTLY. + // They coordinate over #wf-my-feature, not via {{steps.X.output}}. + .step('lead-coordinate', { + agent: 'lead', + dependsOn: ['context'], + task: `You are the lead on #wf-my-feature. Workers: impl-a, impl-b. +Post the plan. Assign files. Review their PRs/diffs. Post feedback in-channel. +Workers iterate based on your feedback. Exit when both files pass review.`, + }) + .step('impl-a-work', { + agent: 'impl-a', + dependsOn: ['context'], // SAME dep as lead → starts in parallel, no deadlock + task: `You are impl-a on #wf-my-feature. Wait for the lead's plan. +Implement your assigned file. Post a completion message. Address feedback.`, + }) + .step('impl-b-work', { + agent: 'impl-b', + dependsOn: ['context'], // SAME dep as lead + task: `You are impl-b on #wf-my-feature. Wait for the lead's plan. +Implement your assigned file. Post a completion message. Address feedback.`, + }) + + // Downstream gates on the lead — lead exits when satisfied. + .step('verify', { + type: 'deterministic', + dependsOn: ['lead-coordinate'], + command: 'npm run typecheck && npm test', + failOnError: true, + }) + + .onError('retry', { maxRetries: 2, retryDelayMs: 10_000 }) + .run({ cwd: process.cwd() }); +``` +**What this exercises that pipeline-shape does not:** + +- **Ambient awareness** — workers see each other's completion messages and start dependent work without the lead relaying. +- **Lead-as-reviewer** — the lead reads actual files between rounds and posts diff-aware feedback in chat. One agent does coordination + review; no separate reviewer step. +- **Iterative correction** — when the lead pings *"impl-a, the type on line 42 is wrong"*, impl-a fixes and re-posts. No new step, no re-spawn, no `{{output}}` chaining. + +**Critical workflow rules for this shape:** + +1. Lead and workers MUST share the same `dependsOn` (e.g., both depend on `context`). If a worker depends on the lead, you have a deadlock — the lead is waiting for worker output, the worker is waiting for the lead step to "complete." +2. Drop `preset: 'worker'` on the implementer agents — interactive mode is what lets them receive channel messages via PTY injection. +3. Downstream gates depend on the **lead step**, not the workers. The lead exits when it's satisfied; that's the workflow's signal of completion. +4. Use a dedicated `.channel('wf-...')` so the team is isolated from other workflows and the global `general` channel. + +See [Common Patterns → Interactive Team](#interactive-team-lead--workers-on-shared-channel) for production notes from real runs and decision criteria for picking this shape over one-shot DAG. + +--- + +**Critical TypeScript rules:** 1. Check the project's `package.json` for `"type": "module"` — if ESM, use `import` and top-level `await`. If CJS, use `require()` and wrap in `async function main()`. 2. `agent-relay run ` executes the file as a standalone subprocess — it does NOT inspect exports. The file MUST call `.run()`. 3. Use `.run({ cwd: process.cwd() })` — `createWorkflowRenderer` does not exist @@ -100,13 +209,13 @@ git add -A && git commit -m "Wave 2" Two workflows can run in parallel if they don't have write-write or write-read file conflicts: -| Touch Zone | Can Parallelize? | -| --------------------------------------------- | ------------------------------------------ | -| Different `packages/*/src/` dirs | ✅ Yes | -| Different `app/` routes | ✅ Yes | -| Same package, different subdirs | ⚠️ Usually yes | +| Touch Zone | Can Parallelize? | +|---|---| +| Different `packages/*/src/` dirs | ✅ Yes | +| Different `app/` routes | ✅ Yes | +| Same package, different subdirs | ⚠️ Usually yes | | Same files (shared config, root package.json) | ❌ No — sequential or same wave with merge | -| Explicit dependency | ❌ No — ordered waves | +| Explicit dependency | ❌ No — ordered waves | ### Declare File Scope for Planning @@ -114,9 +223,9 @@ Help wave planners (human or automated) understand what each workflow touches: ```typescript workflow('48-comparison-mode') - .packages(['web', 'core']) // monorepo packages touched - .isolatedFrom(['49-feedback-system']) // explicitly safe to parallelize - .requiresBefore(['46-admin-dashboard']); // explicit ordering constraint + .packages(['web', 'core']) // monorepo packages touched + .isolatedFrom(['49-feedback-system']) // explicitly safe to parallelize + .requiresBefore(['46-admin-dashboard']) // explicit ordering constraint ``` ### Within-Workflow Parallelism @@ -137,13 +246,11 @@ Use shared `dependsOn` to fan out independent sub-tasks: ### Impact Real-world example (Relayed — 60 workflows): - - **Sequential**: ~30 min × 60 = **30 hours** - **Parallel waves (4-6 per wave)**: ~12 waves × 35 min = **~7 hours** (4x faster) - **Aggressive parallelism (8-way)**: **~4 hours** (7.5x faster) --- - ## Failure Prevention These workflow files are easy to break in ways that only appear mid-run. Follow these rules when authoring or editing workflow `.ts` files. @@ -173,9 +280,28 @@ runWorkflow().catch((error) => { Do not end workflow files with bare top-level `await workflow(...).run(...)`. +### 1b. Make commit and PR boundaries explicit + +Workflows do **not** get a PR for free just because they pass validation. If the intended deliverable is a branch, commit, push, or GitHub PR, the workflow itself must own that boundary explicitly and document the expected file scope. + +Use this pattern only when the workflow is supposed to own repository delivery: + +1. Preflight the git state and fail on unexpected staged changes. +2. Create or verify the intended branch. +3. Run implementation, review, soft validation, fix, and hard validation gates. +4. Stage only the declared target files and review/signoff artifacts. +5. Commit with a deterministic message. +6. Push the branch. +7. Use `createGitHubStep({ action: 'createPR', ... })` from `@agent-relay/sdk/github` to open the PR. +8. Verify the PR URL/state deterministically and write it into the final signoff artifact. + +Do not hide commit/PR work in agent prose. Model it as deterministic steps whenever possible. For PR creation, issue updates, file reads, or any GitHub operation, prefer `createGitHubStep` over shelling out to `gh`; it is bundled with `@agent-relay/sdk`. The downstream hard gate must still verify the PR exists before signoff. + +If commit or PR creation is intentionally outside the workflow, say that directly in the workflow description and signoff so the operator knows to do it after completion. + ### 2. Avoid raw fenced code blocks inside workflow task template literals -Raw triple-backtick code fences inside large inline `task: \`...\``template strings are fragile and can break outer TypeScript parsing, especially when they contain language tags like`swift`or`diff`. +Raw triple-backtick code fences inside large inline `task: \`...\`` template strings are fragile and can break outer TypeScript parsing, especially when they contain language tags like `swift` or `diff`. Preferred options, in order: @@ -224,7 +350,7 @@ The battle-tested template: - **Use `grep -vE "^(...)$"` for full-line match.** Substring matches bleed across unrelated files (e.g., `setup.ts` would also match `packages/core/src/bootstrap/setup.ts`). - **Append `|| true` to the grep.** Without it, an empty result triggers `set -e` and the whole preflight fails before the `if` can even run. - **Check the staging area separately.** A dirty index is different from a dirty working tree and both must be clean (modulo allow-list). -- **Check `gh auth status` early** if any downstream step uses `gh pr create` or similar. Failing on auth at the end of a long DAG is painful. +- **Check `gh auth status` early** if downstream GitHub operations will use the local transport. Failing on auth at the end of a long DAG is painful. **Never use `git diff --quiet` alone as your "clean tree" check.** It fails on any dirty file, including the ones the workflow is expected to rewrite, which causes false failures on every resume / re-run. @@ -267,7 +393,7 @@ command: [ Results in `set -e && cat > /tmp/f <` | ❌ replays **stored config from DB** | ✅ from same run id | -| `--start-from --previous-run-id ` | ✅ reads fresh file | ✅ from previous run id's cached outputs | +| Flag | Reads workflow file fresh? | Uses cached step outputs? | +|---|---|---| +| `--resume ` | ❌ replays **stored config from DB** | ✅ from same run id | +| `--start-from --previous-run-id ` | ✅ reads fresh file | ✅ from previous run id's cached outputs | **Rule:** if you edited the workflow file to fix the failing step, use `--start-from --previous-run-id `, **not** `--resume `. `--resume` pulls the entire workflow config from the run's DB record and replays it — your edits to the workflow file are ignored, and the step re-runs with its original (broken) definition. @@ -471,13 +597,16 @@ For bug-fix or reliability workflows, do **not** stop at unit or integration tes - Show that the original failure no longer occurs 8. **Record residual risks** - Call out what was not covered +9. **Ship the result as a PR** + - Open the pull request from the workflow itself with `createGitHubStep` + - See [Shipping the Result — Open a PR via `createGitHubStep`](#shipping-the-result--open-a-pr-via-creategithubstep) below + - A workflow that fixes a bug and stops short of the PR has only done half the loop ### Clean-environment validation guidance When the bug involves install, bootstrap, PATH/shims, auth, brokers, background services, OS-specific packaging, or first-run UX, add a second workflow (or second phase) that validates the fix in a **fresh environment**. Preferred order of proving environments: - 1. disposable sandbox / cloud workspace 2. Docker / containerized environment 3. fresh local shell with isolated paths @@ -485,7 +614,6 @@ Preferred order of proving environments: ### Meta-workflow guidance If the right proving environment is unclear, first write a **meta-workflow** that: - - compares candidate validation environments - defines the acceptance contract - chooses the best swarm pattern @@ -493,18 +621,108 @@ If the right proving environment is unclear, first write a **meta-workflow** tha This is often better than jumping straight to implementation. +## Shipping the Result — Open a PR via `createGitHubStep` + +A workflow whose final artifact is "a clean working tree on a sandbox you'll throw away" has not shipped anything. **End every code-changing workflow by opening a pull request, and do it from inside the workflow** using `createGitHubStep` from `@agent-relay/sdk/github`. Don't tell the operator to follow up with `gh pr create` — make the workflow's own last step the PR. + +### Why `createGitHubStep` (and not raw `gh` / `octokit`) + +The primitive picks the right transport at runtime: + +| Where the workflow runs | Transport `createGitHubStep` uses | What you provide | +|---|---|---| +| Local (`agent-relay run`) | `gh` CLI | `gh auth status` works | +| Cloud (`agent-relay cloud run`) — tenant-scoped | Nango → workspace's GitHub App installation | Nothing — cloud injects credentials | +| Cloud — fallback | Relay-cloud GitHub proxy | Nothing — cloud injects credentials | + +You write **one** workflow. The same `createPR` step opens a PR via your local `gh` when you iterate on it on a laptop, and via the workspace's GitHub App when the same file runs in `agent-relay cloud run`. No branching by environment, no env-var sniffing in your task strings, no "this part only works in cloud" caveats. That's the whole point of the adapter. + +> **Phase C interaction (cloud only):** `agent-relay cloud run` already auto-pushes per-`paths[]` diffs as separate PRs after the workflow callback when the repos are allowlisted (see `pushedTo` in the run record). Phase C is the *catch-all* — if your workflow does nothing else, you still get one PR per declared path. Use `createGitHubStep` **on top of** that when you need PRs the catch-all can't produce: cross-cutting issues, follow-up tracking issues, opening one PR that spans multiple paths, draft PRs you want labeled/assigned in specific ways, or PRs against a repo you didn't `paths[]` in. + +### The minimal "open a PR" recipe + +```typescript +import { workflow } from '@agent-relay/sdk/workflows'; +import { createGitHubStep } from '@agent-relay/sdk/github'; + +const REPO = 'AgentWorkforce/cloud'; +const BRANCH = `agent-relay/run-${Date.now()}`; + +await workflow('feature-x') + // ... your real steps that produce code changes ... + .step('write-marker', { + type: 'deterministic', + command: `echo "fix landed at $(date -u)" >> CHANGELOG.md`, + }) + + // Branch off main on the remote. + .step('create-branch', createGitHubStep({ + dependsOn: ['write-marker'], + action: 'createBranch', + repo: REPO, + params: { branch: BRANCH, source: 'main' }, + })) + + // Commit the change to the branch via Contents API. + .step('commit-change', createGitHubStep({ + dependsOn: ['create-branch'], + action: 'createFile', + repo: REPO, + params: { + path: 'CHANGELOG.md', + branch: BRANCH, + content: '', + message: 'chore: changelog entry', + }, + })) + + // Open the PR. This is the load-bearing step. + .step('open-pr', createGitHubStep({ + dependsOn: ['commit-change'], + action: 'createPR', + repo: REPO, + params: { + title: 'feat: ship feature X', + head: BRANCH, + base: 'main', + body: '## Summary\n\n- ...\n\n## Test plan\n\n- [x] ...', + draft: false, + }, + output: { mode: 'data', format: 'json', path: 'html_url' }, + })) + + .run({ cwd: process.cwd() }); +``` + +`createGitHubStep` is bundled with `@agent-relay/sdk`; do not add a separate install. Its actions are stable across runtimes: `getRepo`, `createBranch`, `createFile`, `updateFile`, `createPR`, `updatePR`, `getPR`, `listPRs`, `mergePR`, `createIssue`, etc. See the SDK GitHub primitive docs for the full enum. + +### Authoring rules for PR-shipping workflows + +1. **Open the PR from the workflow, not from the operator's shell.** "Tell the user to run `gh pr create`" is a regression to a manual step the workflow could have done. The whole point of running this in cloud is that there is no operator's shell. +2. **One PR per workflow, by default.** A workflow that opens five PRs from one run is almost always wrong — humans review one PR at a time. If you genuinely need multiple, prefer a tracking issue + linked PRs, or split into separate workflows. +3. **Branch name encodes the run.** `agent-relay/run-${runId}` or `agent-relay/${workflow-name}-${timestamp}` so reviewers can tell the PR apart from other automation, and so reruns don't clash. +4. **`draft: true` while iterating.** Once the workflow is stable end-to-end, flip to `draft: false`. +5. **Body is a real PR description.** Summary + Test plan, generated from the workflow's own evidence (verification step output, diff stats, test run output). If you find yourself writing a placeholder body, the workflow isn't done — capture the real evidence in an earlier step and template it in. +6. **Don't use `createGitHubStep` to substitute for `paths[]` push-back in cloud.** If the diff lives in a tarballed `paths[]` mount, let cloud's Phase C push-back open that PR (it handles the patch generation, branch lifecycle, and per-repo allowlist). Use `createGitHubStep` when you need a PR against a repo or branch outside the `paths[]` set, or when you want to add an extra PR (e.g. a tracking issue, a follow-up against a sibling repo, a docs-only PR). +7. **Failure is a real failure.** If `createPR` errors (auth, permissions, branch conflict), the workflow should fail the step, not warn-and-continue. A "successful" workflow that silently failed to open the PR is the worst-case outcome — the human thinks the work shipped. + +### Where this fits in the bug-fix phases + +[End-to-End Bug Fix Workflows](#end-to-end-bug-fix-workflows) lists "Ship the result as a PR" as phase 9. Concretely that means: after phase 7 (compare before/after evidence) succeeds, the workflow's next step is `createPR` with that evidence templated into the body. The PR opening **is** the ship — there is no further manual step. + ## Key Concepts ### Step Output Chaining Use `{{steps.STEP_NAME.output}}` in a downstream step's task to inject the prior step's terminal output. -**Only chain output from clean sources:** +> **Mental model:** this is a **Unix pipe**, not agent communication. `{{steps.A.output}}` flowing into step B is `A | B` — A is dead by the time B reads its stdout. There is no chat, no feedback, no addressing. If your workflow's coordination story is *only* output chaining, you're using the relay as transport, not as a coordination layer. See **[Choose Your Coordination Style](#choose-your-coordination-style--conversation-vs-pipeline)** before defaulting to this. +**Only chain output from clean sources:** - Deterministic steps (shell commands — always clean) - Non-interactive agents (`preset: 'worker'` — clean stdout) -**Never chain from interactive agents** (`cli: 'claude'` without preset) — PTY output includes spinners, ANSI codes, and TUI chrome. Instead, have the agent write to a file, then read it in a deterministic step. +**Never chain from interactive agents** (`cli: 'claude'` without preset) — PTY output includes spinners, ANSI codes, and TUI chrome. Instead, have the agent write to a file, then read it in a deterministic step. (Or: don't use chaining at all — let the agents coordinate over the channel.) ### Verification Gates @@ -568,38 +786,30 @@ Agent-level methods are also available: ```typescript const agent = await relay.claude.spawn({ name: 'auditor', channels: ['ch-a'] }); -await agent.subscribe(['ch-b']); // now subscribed to ch-a and ch-b -await agent.mute('ch-a'); // ch-a messages silenced (still in history) -await agent.unmute('ch-a'); // ch-a messages resume -await agent.unsubscribe(['ch-b']); // leaves ch-b -console.log(agent.channels); // ['ch-a'] -console.log(agent.mutedChannels); // [] +await agent.subscribe(['ch-b']); // now subscribed to ch-a and ch-b +await agent.mute('ch-a'); // ch-a messages silenced (still in history) +await agent.unmute('ch-a'); // ch-a messages resume +await agent.unsubscribe(['ch-b']); // leaves ch-b +console.log(agent.channels); // ['ch-a'] +console.log(agent.mutedChannels); // [] ``` #### Semantics -| Operation | Channel membership | PTY injection | History access | -| ------------- | ------------------ | ------------- | --------------- | -| `subscribe` | Yes | Yes | Yes | -| `unsubscribe` | No | No | No (leaves) | -| `mute` | Yes (stays) | No (silenced) | Yes (can query) | -| `unmute` | Yes | Yes (resumes) | Yes | +| Operation | Channel membership | PTY injection | History access | +|---------------|-------------------|---------------|----------------| +| `subscribe` | Yes | Yes | Yes | +| `unsubscribe` | No | No | No (leaves) | +| `mute` | Yes (stays) | No (silenced) | Yes (can query)| +| `unmute` | Yes | Yes (resumes) | Yes | #### Events ```typescript -relay.onChannelSubscribed = (agent, channels) => { - /* ... */ -}; -relay.onChannelUnsubscribed = (agent, channels) => { - /* ... */ -}; -relay.onChannelMuted = (agent, channel) => { - /* ... */ -}; -relay.onChannelUnmuted = (agent, channel) => { - /* ... */ -}; +relay.onChannelSubscribed = (agent, channels) => { /* ... */ }; +relay.onChannelUnsubscribed = (agent, channels) => { /* ... */ }; +relay.onChannelMuted = (agent, channel) => { /* ... */ }; +relay.onChannelUnmuted = (agent, channel) => { /* ... */ }; ``` #### When to Use in Workflows @@ -612,7 +822,6 @@ relay.onChannelUnmuted = (agent, channel) => { #### What This Eliminates With broker-managed subscriptions, you no longer need: - 1. Client-side persona filtering (`personaNames.has(from)` checks) 2. Channel prefix regex for message routing 3. Manual peer fanout (iterating agents to forward messages) @@ -661,12 +870,12 @@ relay.mute({ agent: string, channel: string }): Promise relay.unmute({ agent: string, channel: string }): Promise ``` -| Preset | Interactive | Relay access | Use for | -| ---------- | --------------- | ------------ | ------------------------------------- | -| `lead` | yes (PTY) | yes | Coordination, monitoring channels | -| `worker` | no (subprocess) | no | Bounded tasks, structured stdout | -| `reviewer` | no (subprocess) | no | Reading artifacts, producing verdicts | -| `analyst` | no (subprocess) | no | Reading code/files, writing findings | +| Preset | Interactive | Relay access | Use for | +| ---------- | ------------- | ------------ | ---------------------------------------------------- | +| `lead` | yes (PTY) | yes | Coordination, monitoring channels | +| `worker` | no (subprocess) | no | Bounded tasks, structured stdout | +| `reviewer` | no (subprocess) | no | Reading artifacts, producing verdicts | +| `analyst` | no (subprocess) | no | Reading code/files, writing findings | Non-interactive presets run via one-shot mode (`claude -p`, `codex exec`). Output is clean and available via `{{steps.X.output}}`. @@ -764,12 +973,119 @@ Edit files as assigned. Report completion. Fix issues from feedback.`, **When to use interactive team vs one-shot DAG:** -| Scenario | Pattern | -| ---------------------------------- | ------------------------------------ | -| 4+ files, likely needs iteration | Interactive team | -| Simple edits, well-specified | One-shot DAG with `preset: 'worker'` | -| Cross-agent review feedback loop | Interactive team | +| Scenario | Pattern | +|----------|---------| +| 4+ files, likely needs iteration | Interactive team | +| Simple edits, well-specified | One-shot DAG with `preset: 'worker'` | +| Cross-agent review feedback loop | Interactive team | | Independent tasks, no coordination | Fan-out with non-interactive workers | +| Anything where the answer to "could this be `cmd1 | cmd2`?" is *no* | Interactive team | + +### Chat-Native Coordination Recipes + +Once you're in the Interactive Team shape, the channel is your coordination medium. These are recipes for using it well — they are *prompt-authoring patterns*, not new SDK surface. All of them assume interactive agents (no `preset`) sharing a `.channel('wf-...')`. + +#### 1. Question / Answer (blocking ask) + +When agent A needs information only agent B has, instruct A to **post a direct question and wait for a reply** rather than guessing or proceeding. + +```typescript +.step('integrate', { + agent: 'integrator', + dependsOn: ['context'], + task: `You are the integrator on #wf-feature. +Before writing code, post a direct question to @schema-owner asking which +table owns the new field. Do NOT proceed until @schema-owner replies in +channel. If no reply arrives in 5 minutes, @-mention the lead.`, +}) +``` + +**Why it beats `{{steps.X.output}}`:** the answer depends on something only an agent (or human) can decide at runtime; encoding it as a prior step's stdout is wrong. + +#### 2. Broadcast / Ack + +When a lead needs *N workers to confirm receipt* before proceeding (e.g., to make sure the plan was actually read), require explicit acks. + +```typescript +.step('lead-coordinate', { + agent: 'lead', + dependsOn: ['context'], + task: `Post the plan to #wf-feature, then @impl-a @impl-b @impl-c. +Wait for each to reply with "ACK " before issuing assignments. +If any worker hasn't acked in 3 minutes, re-post and ping again. +Only after all three have acked, post per-worker assignments.`, +}) +``` + +**Why it matters:** in the Codex history, the most common silent failure is a worker step that started but never read the channel. An ack gate makes "did you actually receive this?" deterministic without a separate verification step. + +#### 3. Peer Review Handoff + +The substantive form of "review my work." Worker pings reviewer in-channel with a concrete artifact reference; reviewer reads the actual files (not the chat); reviewer replies with a verdict. + +```typescript +.step('impl-a-work', { + agent: 'impl-a', + dependsOn: ['context'], + task: `Implement src/foo.ts per the lead's assignment. +When done, post to #wf-feature: "@reviewer ready: src/foo.ts" — include the +commit SHA. Then wait for @reviewer's verdict in channel. +- If "APPROVED", you're done. +- If "CHANGES_REQUESTED ", apply the notes and re-post. +- If no verdict in 5 min, @-mention the lead.`, +}) +``` + +**Pattern note:** the reviewer must read the files themselves — never let the worker paste the diff into chat. Channel messages are for *coordination*, not *content*. That's also what keeps you under output-token limits. + +#### 4. Standup / Status Probe + +For long-running workflows, have the lead post periodic `@-mention` probes so silently-stuck workers surface fast. + +```typescript +.step('lead-coordinate', { + agent: 'lead', + task: `... coordinate the team ... + +Every 10 minutes, post a status probe: "@impl-a @impl-b status?" +Each worker should reply with one of: + - "RUNNING " (still working) + - "BLOCKED " (@-mention the lead with the blocker) + - "DONE " (ready for review) + +If a worker is silent for two probes in a row, mark them stalled and +reassign their work to a peer.`, +}) +``` + +#### 5. Hand-Off with Context + +When work flows from agent A to agent B *during* a workflow (not just between steps), have A post a structured handoff message so B doesn't re-derive context. + +```typescript +.step('impl-a-work', { + agent: 'impl-a', + task: `... finish your part ... + +When done, post a handoff to #wf-feature targeting the next worker: +"@impl-b HANDOFF: src/foo.ts ready. Touched: . Open question: . +Tests: . Commit: ."`, +}) +``` + +**Vs `{{steps.X.output}}`:** an output-chain forces B to parse A's entire stdout. A handoff message is a curated summary A writes for B — much higher signal, no PTY/ANSI noise. + +#### Picking a recipe + +| Need | Recipe | +|---|---| +| One agent needs an answer from another at runtime | **Q/A** | +| Lead needs to confirm workers received the plan | **Broadcast/Ack** | +| Agent-to-agent code review | **Peer Review Handoff** | +| Long-running team, want stalled-worker visibility | **Standup/Probe** | +| Sequential agent work that needs context curation | **Hand-Off with Context** | + +> **Authoring rule:** if your workflow has interactive agents on a channel but their task strings don't *instruct them to talk to each other*, you're not using the chat primitive — you've just paid the overhead of starting it. Either add an explicit recipe above, or drop to `preset: 'worker'` and pipeline-shape. ### Pipeline (sequential handoff) @@ -848,7 +1164,6 @@ steps: ``` **Key rules:** - - Read the file in a deterministic step right before the edit (not all files upfront) - Tell the agent "Only edit this one file" to prevent it touching other files - Verify with `git diff --quiet` after each edit — fail fast if the agent didn't write @@ -873,7 +1188,6 @@ After any step that creates files, add a deterministic `file_exists` check befor ``` **Rules for file-writing tasks:** - 1. Use full paths from project root — say `src/auth/credentials.ts`, not `credentials.ts` 2. Add `IMPORTANT: Write the file to disk. Do NOT output to stdout.` 3. Use `file_exists` verification for creation steps (not just `exit_code`) @@ -908,7 +1222,6 @@ steps: **One agent, one deliverable.** A step's task prompt should be 10-20 lines max. Split into a **lead + workers team** when: - - The task requires a 50+ line prompt - The deliverable is multiple files that must be consistent - You need one agent to verify another's output @@ -925,7 +1238,7 @@ steps: - name: track-worker-1-impl agent: track-worker-1 - dependsOn: [prior-step] # same dep as lead — starts concurrently + dependsOn: [prior-step] # same dep as lead — starts concurrently task: | Join #my-track. track-lead will post your assignment. Implement the file as directed. @@ -933,7 +1246,7 @@ steps: type: exit_code - name: next-step - dependsOn: [track-lead-coord] # downstream depends on lead, not workers + dependsOn: [track-lead-coord] # downstream depends on lead, not workers ``` ## Supervisor Pattern @@ -942,67 +1255,69 @@ When you set `.pattern('supervisor')` (or `hub-spoke`, `fan-out`), the runner au **Auto-hardening only activates for hub patterns** — not `pipeline` or `dag`. -| Use case | Pattern | Why | -| ------------------------- | ------------------- | -------------------------------- | -| Sequential, no monitoring | `pipeline` | Simple, no overhead | -| Workers need oversight | `supervisor` | Auto-owner monitors | -| Local/small models | `supervisor` | Supervisor catches stuck workers | -| All non-interactive | `pipeline` or `dag` | No PTY = no supervision needed | +| Use case | Pattern | Why | +|----------|---------|-----| +| Sequential, no monitoring | `pipeline` | Simple, no overhead | +| Workers need oversight | `supervisor` | Auto-owner monitors | +| Local/small models | `supervisor` | Supervisor catches stuck workers | +| All non-interactive | `pipeline` or `dag` | No PTY = no supervision needed | ## Concurrency **Cap `maxConcurrency` at 4-6.** Spawning 10+ agents simultaneously causes broker timeouts. | Parallel agents | `maxConcurrency` | -| --------------- | ---------------- | -| 2-4 | 4 (default safe) | -| 5-10 | 5 | -| 10+ | 6-8 max | +|-----------------|-------------------| +| 2-4 | 4 (default safe) | +| 5-10 | 5 | +| 10+ | 6-8 max | ## Common Mistakes -| Mistake | Fix | -| --------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| All workflows run sequentially | Group independent workflows into parallel waves (4-7x speedup) | -| Every step depends on the previous one | Only add `dependsOn` when there's a real data dependency | -| Self-review step with no timeout | Set `timeout: 300_000` (5 min) — Codex hangs in non-interactive review | -| One giant workflow per feature | Split into smaller workflows that can run in parallel waves | -| Adding exit instructions to tasks | Runner handles self-termination automatically | -| Setting `timeoutMs` on agents/steps | Use global `.timeout()` only | -| Using `general` channel | Set `.channel('wf-name')` for isolation | -| `{{steps.X.output}}` without `dependsOn: ['X']` | Output won't be available yet | -| Requiring exact sentinel as only completion gate | Use `exit_code` or `file_exists` verification | -| Writing 100-line task prompts | Split into lead + workers on a channel | -| `maxConcurrency: 16` with many parallel steps | Cap at 5-6 | -| Non-interactive agent reading large files via tools | Pre-read in deterministic step, inject via `{{steps.X.output}}` | -| Workers depending on lead step (deadlock) | Both depend on shared context step | -| `fan-out`/`hub-spoke` for simple parallel workers | Use `dag` instead | -| `pipeline` but expecting auto-supervisor | Only hub patterns auto-harden. Use `.pattern('supervisor')` | -| Workers without `preset: 'worker'` in one-shot DAG lead+worker flows | Add preset for clean stdout when chaining `{{steps.X.output}}` (not needed for interactive team patterns) | -| Using `_` in YAML numbers (`timeoutMs: 1_200_000`) | YAML doesn't support `_` separators | -| Workflow timeout under 30 min for complex workflows | Use `3600000` (1 hour) as default | -| Using `require()` in ESM projects | Check `package.json` for `"type": "module"` — use `import` if ESM | -| Wrapping in `async function main()` in ESM | ESM supports top-level `await` — no wrapper needed | -| Using `createWorkflowRenderer` | Does not exist. Use `.run({ cwd: process.cwd() })` | -| `export default workflow(...)...build()` | No `.build()`. Chain ends with `.run()` — the file must call `.run()`, not just export config | -| Relative import `'../workflows/builder.js'` | Use `import { workflow } from '@agent-relay/sdk/workflows'` | -| Hardcoded model strings (`model: 'opus'`) | Use constants: `import { ClaudeModels } from '@agent-relay/config'` → `model: ClaudeModels.OPUS` | -| Thinking `agent-relay run` inspects exports | It executes the file as a subprocess. Only `.run()` invocations trigger steps | -| `pattern('single')` on cloud runner | Not supported — use `dag` | -| `pattern('supervisor')` with one agent | Same agent is owner + specialist. Use `dag` | -| Invalid verification type (`type: 'deterministic'`) | Only `exit_code`, `output_contains`, `file_exists`, `custom` are valid | -| Chaining `{{steps.X.output}}` from interactive agents | PTY output is garbled. Use deterministic steps or `preset: 'worker'` | -| Single step editing 4+ files | Agents modify 1-2 then exit. Split to one file per step with verify gates | -| Relying on agents to `git commit` | Agents emit markers without running git. Use deterministic commit step | -| File-writing steps without `file_exists` verification | `exit_code` auto-passes even if no file written | -| Manual peer fanout in `handleChannelMessage()` | Use broker-managed channel subscriptions — broker fans out to all subscribers automatically | -| Client-side `personaNames.has(from)` filtering | Use `relay.subscribe()`/`relay.unsubscribe()` — only subscribed agents receive messages | -| Agents receiving noisy cross-channel messages during focused work | Use `relay.mute({ agent, channel })` to silence non-primary channels without leaving them | -| Hardcoding all channels at spawn time | Use `agent.subscribe()` / `agent.unsubscribe()` for dynamic channel membership post-spawn | -| Using `preset: 'worker'` for Codex in _interactive team_ patterns when coordination is needed | Codex interactive mode works fine with PTY channel injection. Drop the preset for interactive team patterns (keep it for one-shot DAG workers where clean stdout matters) | -| Separate reviewer agent from lead in interactive team | Merge lead + reviewer into one interactive Claude agent — reviews between rounds, fewer agents | -| Not printing PR URL after `gh pr create` | Add a final deterministic step: `echo "PR: $(cat pr-url.txt)"` or capture in the `gh pr create` command | -| Workflow ending without worktree + PR for cross-repo changes | Add `setup-worktree` at start and `push-and-pr` + `cleanup-worktree` at end | +| Mistake | Fix | +|---------|-----| +| Treating relay as transport, not as a coordination layer (every step is `preset: 'worker'`, every handoff is `{{steps.X.output}}`) | Default to **Conversation shape** for non-trivial work — interactive agents on a shared channel. Pipeline-shape is only correct when the work could be expressed as a `bash | bash | bash` pipe. | +| Interactive agents on a channel whose task strings don't tell them to talk to each other | Pick a [Chat-Native Coordination Recipe](#chat-native-coordination-recipes) (Q/A, Broadcast/Ack, Peer Review, Standup, Hand-Off) and bake it into the task prompt — otherwise you're paying for a chat substrate you're not using | +| All workflows run sequentially | Group independent workflows into parallel waves (4-7x speedup) | +| Every step depends on the previous one | Only add `dependsOn` when there's a real data dependency | +| Self-review step with no timeout | Set `timeout: 300_000` (5 min) — Codex hangs in non-interactive review | +| One giant workflow per feature | Split into smaller workflows that can run in parallel waves | +| Adding exit instructions to tasks | Runner handles self-termination automatically | +| Setting `timeoutMs` on agents/steps | Use global `.timeout()` only | +| Using `general` channel | Set `.channel('wf-name')` for isolation | +| `{{steps.X.output}}` without `dependsOn: ['X']` | Output won't be available yet | +| Requiring exact sentinel as only completion gate | Use `exit_code` or `file_exists` verification | +| Writing 100-line task prompts | Split into lead + workers on a channel | +| `maxConcurrency: 16` with many parallel steps | Cap at 5-6 | +| Non-interactive agent reading large files via tools | Pre-read in deterministic step, inject via `{{steps.X.output}}` | +| Workers depending on lead step (deadlock) | Both depend on shared context step | +| `fan-out`/`hub-spoke` for simple parallel workers | Use `dag` instead | +| `pipeline` but expecting auto-supervisor | Only hub patterns auto-harden. Use `.pattern('supervisor')` | +| Workers without `preset: 'worker'` in one-shot DAG lead+worker flows | Add preset for clean stdout when chaining `{{steps.X.output}}` (not needed for interactive team patterns) | +| Using `_` in YAML numbers (`timeoutMs: 1_200_000`) | YAML doesn't support `_` separators | +| Workflow timeout under 30 min for complex workflows | Use `3600000` (1 hour) as default | +| Using `require()` in ESM projects | Check `package.json` for `"type": "module"` — use `import` if ESM | +| Wrapping in `async function main()` in ESM | ESM supports top-level `await` — no wrapper needed | +| Using `createWorkflowRenderer` | Does not exist. Use `.run({ cwd: process.cwd() })` | +| `export default workflow(...)...build()` | No `.build()`. Chain ends with `.run()` — the file must call `.run()`, not just export config | +| Relative import `'../workflows/builder.js'` | Use `import { workflow } from '@agent-relay/sdk/workflows'` | +| Hardcoded model strings (`model: 'opus'`) | Use constants: `import { ClaudeModels } from '@agent-relay/config'` → `model: ClaudeModels.OPUS` | +| Thinking `agent-relay run` inspects exports | It executes the file as a subprocess. Only `.run()` invocations trigger steps | +| `pattern('single')` on cloud runner | Not supported — use `dag` | +| `pattern('supervisor')` with one agent | Same agent is owner + specialist. Use `dag` | +| Invalid verification type (`type: 'deterministic'`) | Only `exit_code`, `output_contains`, `file_exists`, `custom` are valid | +| Chaining `{{steps.X.output}}` from interactive agents | PTY output is garbled. Use deterministic steps or `preset: 'worker'` | +| Single step editing 4+ files | Agents modify 1-2 then exit. Split to one file per step with verify gates | +| Relying on agents to `git commit` | Agents emit markers without running git. Use deterministic commit step | +| File-writing steps without `file_exists` verification | `exit_code` auto-passes even if no file written | +| Manual peer fanout in `handleChannelMessage()` | Use broker-managed channel subscriptions — broker fans out to all subscribers automatically | +| Client-side `personaNames.has(from)` filtering | Use `relay.subscribe()`/`relay.unsubscribe()` — only subscribed agents receive messages | +| Agents receiving noisy cross-channel messages during focused work | Use `relay.mute({ agent, channel })` to silence non-primary channels without leaving them | +| Hardcoding all channels at spawn time | Use `agent.subscribe()` / `agent.unsubscribe()` for dynamic channel membership post-spawn | +| Using `preset: 'worker'` for Codex in *interactive team* patterns when coordination is needed | Codex interactive mode works fine with PTY channel injection. Drop the preset for interactive team patterns (keep it for one-shot DAG workers where clean stdout matters) | +| Separate reviewer agent from lead in interactive team | Merge lead + reviewer into one interactive Claude agent — reviews between rounds, fewer agents | +| Not printing PR URL after `createGitHubStep({ action: 'createPR' })` | Capture `html_url` with `output: { mode: 'data', format: 'json', path: 'html_url' }` and echo or write it in a final deterministic step | +| Workflow ending without worktree + PR for cross-repo changes | Add `setup-worktree` at start and `push-and-pr` + `cleanup-worktree` at end | ## YAML Alternative diff --git a/.gitignore b/.gitignore index 87909f171..f2f71f14a 100644 --- a/.gitignore +++ b/.gitignore @@ -87,6 +87,7 @@ web/waitlist.json web/.open-next .msd/ /workflows/* +.workflow-artifacts/* !/workflows/ci/ !/workflows/refactor/ !/workflows/relayauth-integration/ diff --git a/package-lock.json b/package-lock.json index 21defb95b..82da6ac92 100644 --- a/package-lock.json +++ b/package-lock.json @@ -35,7 +35,7 @@ "@relayfile/local-mount": "^0.2.2", "@relayfile/sdk": "^0.6.0", "@sinclair/typebox": "^0.34.14", - "agent-trajectories": "^0.5.7", + "agent-trajectories": "^0.5.8", "chalk": "^4.1.2", "chokidar": "^5.0.0", "commander": "^12.1.0", @@ -6342,9 +6342,9 @@ } }, "node_modules/agent-trajectories": { - "version": "0.5.7", - "resolved": "https://registry.npmjs.org/agent-trajectories/-/agent-trajectories-0.5.7.tgz", - "integrity": "sha512-XOD35i7h6JftQytnG0F1mI0FndqR8QuhO73xEsqXa+bI3dyK+sQ3+s4zhKtZxhR9pQZRkRV3nclkSmp6EjQfPQ==", + "version": "0.5.8", + "resolved": "https://registry.npmjs.org/agent-trajectories/-/agent-trajectories-0.5.8.tgz", + "integrity": "sha512-Cu/+uyxAy+eNSlpzuOhk62kM/i0BdlfG8Z4avyzfbHbQ3I9EQLqiUikl3WcG75m3v+4MwTbJq9e6YTG8/ykKPw==", "license": "MIT", "dependencies": { "@clack/prompts": "^0.7.0", diff --git a/package.json b/package.json index 78a0edbb7..e9bc02e69 100644 --- a/package.json +++ b/package.json @@ -147,7 +147,7 @@ "@relayfile/local-mount": "^0.2.2", "@relayfile/sdk": "^0.6.0", "@sinclair/typebox": "^0.34.14", - "agent-trajectories": "^0.5.7", + "agent-trajectories": "^0.5.8", "chalk": "^4.1.2", "chokidar": "^5.0.0", "commander": "^12.1.0", @@ -204,5 +204,9 @@ "flatted": "^3.4.2", "react": "^18.3.1", "react-dom": "^18.3.1" - } + }, + "bundleDependencies": [ + "@relaycast/sdk", + "@relayfile/local-mount" + ] } diff --git a/prpm.lock b/prpm.lock index 844cbb1ea..5f4f9d8f2 100644 --- a/prpm.lock +++ b/prpm.lock @@ -154,9 +154,9 @@ "installedPath": ".claude/skills/choosing-swarm-patterns/SKILL.md" }, "@agent-relay/writing-agent-relay-workflows#claude": { - "version": "1.5.1", - "resolved": "https://registry.prpm.dev/api/v1/packages/%40agent-relay%2Fwriting-agent-relay-workflows/1.5.1.tar.gz", - "integrity": "sha256-745f43edd930c5f930065e8f92a3f2fae6ef902dd768576d574f9c3a50837042", + "version": "1.6.2", + "resolved": "https://registry.prpm.dev/api/v1/packages/%40agent-relay%2Fwriting-agent-relay-workflows/1.6.2.tar.gz", + "integrity": "sha256-2394f6b0cd6ef65e871720646d23b9406a2193a314bd82e1f9a6eb0416c672ed", "format": "claude", "subtype": "skill", "sourceFormat": "claude", @@ -194,9 +194,9 @@ "installedPath": ".agents/skills/choosing-swarm-patterns/SKILL.md" }, "@agent-relay/writing-agent-relay-workflows#codex": { - "version": "1.5.1", - "resolved": "https://registry.prpm.dev/api/v1/packages/%40agent-relay%2Fwriting-agent-relay-workflows/1.5.1.tar.gz", - "integrity": "sha256-745f43edd930c5f930065e8f92a3f2fae6ef902dd768576d574f9c3a50837042", + "version": "1.6.2", + "resolved": "https://registry.prpm.dev/api/v1/packages/%40agent-relay%2Fwriting-agent-relay-workflows/1.6.2.tar.gz", + "integrity": "sha256-2394f6b0cd6ef65e871720646d23b9406a2193a314bd82e1f9a6eb0416c672ed", "format": "codex", "subtype": "skill", "sourceFormat": "claude", @@ -244,5 +244,5 @@ "installedPath": ".agents/skills/relay-80-100-workflow/SKILL.md" } }, - "generated": "2026-05-04T08:36:28.666Z" + "generated": "2026-05-09T09:15:37.156Z" } \ No newline at end of file From 7624f9a3e6830dd15b9db95ec86d95c0b87f299d Mon Sep 17 00:00:00 2001 From: Khaliq Date: Sat, 9 May 2026 11:17:16 +0200 Subject: [PATCH 3/4] skill formatting --- .../writing-agent-relay-workflows/SKILL.md | 233 ++++++------ .../writing-agent-relay-workflows/SKILL.md | 343 ++++++++++-------- 2 files changed, 303 insertions(+), 273 deletions(-) diff --git a/.agents/skills/writing-agent-relay-workflows/SKILL.md b/.agents/skills/writing-agent-relay-workflows/SKILL.md index ce25a5e88..5c89d7f34 100644 --- a/.agents/skills/writing-agent-relay-workflows/SKILL.md +++ b/.agents/skills/writing-agent-relay-workflows/SKILL.md @@ -21,12 +21,12 @@ The relay broker-sdk workflow system orchestrates multiple AI agents (Claude, Co ### Choose Your Coordination Style — Conversation vs Pipeline -Before writing the workflow, decide *how the agents will coordinate*. The relay primitive supports two very different shapes, and picking the wrong one wastes the most valuable thing the SDK gives you. +Before writing the workflow, decide _how the agents will coordinate_. The relay primitive supports two very different shapes, and picking the wrong one wastes the most valuable thing the SDK gives you. -| Shape | What it is | Use when | -|---|---|---| +| Shape | What it is | Use when | +| ------------------------------ | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---- | ----------- | | **Conversation** (chat-native) | Interactive agents share a channel; messages, `@-mentions`, and ambient awareness drive coordination. Lead and workers spawn in parallel and self-organize. The relay is the coordination layer, not just transport. | Multi-file work, peer review loops, cross-agent feedback, dynamic re-planning, multi-PR coordination, anything with a human-in-the-loop escape, swarms where workers pick up each other's output. | -| **Pipeline** (one-shot DAG) | Each step runs as a one-shot subprocess (`claude -p`, `codex exec`); steps hand off via `{{steps.X.output}}` text injection. No agents are alive at the same time; no chat happens. | Linear, well-specified transformations; deterministic data passing; no review loop expected; the work could be expressed as a `bash | bash | bash` pipe. | +| **Pipeline** (one-shot DAG) | Each step runs as a one-shot subprocess (`claude -p`, `codex exec`); steps hand off via `{{steps.X.output}}` text injection. No agents are alive at the same time; no chat happens. | Linear, well-specified transformations; deterministic data passing; no review loop expected; the work could be expressed as a `bash | bash | bash` pipe. | **Default to Conversation for any non-trivial work.** Pipeline DAGs are simpler to reason about but they do not exercise the relay primitive — they are a Unix pipe with extra steps. If you would happily write the same task as a single shell pipeline, pipeline-shape is fine. Otherwise, you almost certainly want a Conversation shape. @@ -67,10 +67,9 @@ const result = await workflow('my-workflow') .onError('retry', { maxRetries: 2, retryDelayMs: 10_000 }) .run({ cwd: process.cwd() }); - console.log('Result:', result.status); +console.log('Result:', result.status); ``` - ### Quick Reference (Conversation shape) #### > Use this for any non-trivial work — peer review, multi-file edits, cross-agent feedback, dynamic re-planning. Lead and workers spawn **in parallel** on a shared channel and self-organize via messages. The relay primitive does the coordinating; verification gates downstream of the lead close the workflow. @@ -82,7 +81,7 @@ import { ClaudeModels, CodexModels } from '@agent-relay/config'; const result = await workflow('my-workflow') .description('Multi-file change with peer review') .pattern('dag') - .channel('wf-my-feature') // dedicated channel — agents share it + .channel('wf-my-feature') // dedicated channel — agents share it .maxConcurrency(4) .timeout(3_600_000) @@ -124,13 +123,13 @@ Workers iterate based on your feedback. Exit when both files pass review.`, }) .step('impl-a-work', { agent: 'impl-a', - dependsOn: ['context'], // SAME dep as lead → starts in parallel, no deadlock + dependsOn: ['context'], // SAME dep as lead → starts in parallel, no deadlock task: `You are impl-a on #wf-my-feature. Wait for the lead's plan. Implement your assigned file. Post a completion message. Address feedback.`, }) .step('impl-b-work', { agent: 'impl-b', - dependsOn: ['context'], // SAME dep as lead + dependsOn: ['context'], // SAME dep as lead task: `You are impl-b on #wf-my-feature. Wait for the lead's plan. Implement your assigned file. Post a completion message. Address feedback.`, }) @@ -147,7 +146,6 @@ Implement your assigned file. Post a completion message. Address feedback.`, .run({ cwd: process.cwd() }); ``` - ### ⚡ Parallelism — Design for Speed #### Cross-Workflow Parallelism: Wave Planning @@ -180,9 +178,9 @@ git add -A && git commit -m "Wave 2" ```typescript workflow('48-comparison-mode') - .packages(['web', 'core']) // monorepo packages touched - .isolatedFrom(['49-feedback-system']) // explicitly safe to parallelize - .requiresBefore(['46-admin-dashboard']) // explicit ordering constraint + .packages(['web', 'core']) // monorepo packages touched + .isolatedFrom(['49-feedback-system']) // explicitly safe to parallelize + .requiresBefore(['46-admin-dashboard']); // explicit ordering constraint ``` #### Within-Workflow Parallelism @@ -198,7 +196,6 @@ workflow('48-comparison-mode') .step('verify-all', { agent: 'reviewer', dependsOn: ['fix-component-a', 'fix-component-b'] }) ``` - ### Failure Prevention #### 1. Do not use raw top-level `await` @@ -287,7 +284,6 @@ export function applyCloudRepoSetup(wf: T, opts: CloudRepoSetupOptions): T { } ``` - ### End-to-End Bug Fix Workflows - **Capture the original failure** @@ -340,45 +336,53 @@ await workflow('feature-x') }) // Branch off main on the remote. - .step('create-branch', createGitHubStep({ - dependsOn: ['write-marker'], - action: 'createBranch', - repo: REPO, - params: { branch: BRANCH, source: 'main' }, - })) + .step( + 'create-branch', + createGitHubStep({ + dependsOn: ['write-marker'], + action: 'createBranch', + repo: REPO, + params: { branch: BRANCH, source: 'main' }, + }) + ) // Commit the change to the branch via Contents API. - .step('commit-change', createGitHubStep({ - dependsOn: ['create-branch'], - action: 'createFile', - repo: REPO, - params: { - path: 'CHANGELOG.md', - branch: BRANCH, - content: '', - message: 'chore: changelog entry', - }, - })) + .step( + 'commit-change', + createGitHubStep({ + dependsOn: ['create-branch'], + action: 'createFile', + repo: REPO, + params: { + path: 'CHANGELOG.md', + branch: BRANCH, + content: '', + message: 'chore: changelog entry', + }, + }) + ) // Open the PR. This is the load-bearing step. - .step('open-pr', createGitHubStep({ - dependsOn: ['commit-change'], - action: 'createPR', - repo: REPO, - params: { - title: 'feat: ship feature X', - head: BRANCH, - base: 'main', - body: '## Summary\n\n- ...\n\n## Test plan\n\n- [x] ...', - draft: false, - }, - output: { mode: 'data', format: 'json', path: 'html_url' }, - })) + .step( + 'open-pr', + createGitHubStep({ + dependsOn: ['commit-change'], + action: 'createPR', + repo: REPO, + params: { + title: 'feat: ship feature X', + head: BRANCH, + base: 'main', + body: '## Summary\n\n- ...\n\n## Test plan\n\n- [x] ...', + draft: false, + }, + output: { mode: 'data', format: 'json', path: 'html_url' }, + }) + ) .run({ cwd: process.cwd() }); ``` - ### Key Concepts #### Verification Gates @@ -416,13 +420,20 @@ relay.unmute({ agent: 'security-auditor', channel: 'review-pr-123' }); #### Events ```typescript -relay.onChannelSubscribed = (agent, channels) => { /* ... */ }; -relay.onChannelUnsubscribed = (agent, channels) => { /* ... */ }; -relay.onChannelMuted = (agent, channel) => { /* ... */ }; -relay.onChannelUnmuted = (agent, channel) => { /* ... */ }; +relay.onChannelSubscribed = (agent, channels) => { + /* ... */ +}; +relay.onChannelUnsubscribed = (agent, channels) => { + /* ... */ +}; +relay.onChannelMuted = (agent, channel) => { + /* ... */ +}; +relay.onChannelUnmuted = (agent, channel) => { + /* ... */ +}; ``` - ### Agent Definition #### ```typescript @@ -448,7 +459,6 @@ import { ClaudeModels, CodexModels, GeminiModels } from '@agent-relay/config'; .agent('coder', { cli: 'codex', model: CodexModels.GPT_5_4 }) // not 'gpt-5.4' ``` - ### Step Definition #### Agent Steps @@ -475,7 +485,6 @@ import { ClaudeModels, CodexModels, GeminiModels } from '@agent-relay/config'; }) ``` - ### Common Patterns #### Interactive Team (lead + workers on shared channel) @@ -617,7 +626,6 @@ Tests: . Commit: ."`, .onError('retry', { maxRetries: 3, retryDelayMs: 5000 }) ``` - ### Multi-File Edit Pattern #### When a workflow needs to modify multiple existing files, **use one agent step per file** with a deterministic verify gate after each. Agents reliably edit 1-2 files per step but fail on 4+. @@ -677,7 +685,6 @@ steps: failOnError: true ``` - ### File Materialization: Verify Before Proceeding #### After any step that creates files, add a deterministic `file_exists` check before proceeding. Non-interactive agents may exit 0 without writing anything (wrong cwd, stdout instead of disk). @@ -696,7 +703,6 @@ steps: failOnError: true ``` - ### DAG Deadlock Anti-Pattern #### ```yaml @@ -721,7 +727,6 @@ steps: dependsOn: [work-a, coordinate] ``` - ### Step Sizing #### **One agent, one deliverable.** A step's task prompt should be 10-20 lines max. @@ -738,7 +743,7 @@ steps: - name: track-worker-1-impl agent: track-worker-1 - dependsOn: [prior-step] # same dep as lead — starts concurrently + dependsOn: [prior-step] # same dep as lead — starts concurrently task: | Join #my-track. track-lead will post your assignment. Implement the file as directed. @@ -746,79 +751,78 @@ steps: type: exit_code - name: next-step - dependsOn: [track-lead-coord] # downstream depends on lead, not workers + dependsOn: [track-lead-coord] # downstream depends on lead, not workers ``` - ### Supervisor Pattern When you set `.pattern('supervisor')` (or `hub-spoke`, `fan-out`), the runner auto-assigns a supervisor agent as owner for worker steps. The supervisor monitors progress, nudges idle workers, and issues `OWNER_DECISION`. **Auto-hardening only activates for hub patterns** — not `pipeline` or `dag`. -| Use case | Pattern | Why | -|----------|---------|-----| -| Sequential, no monitoring | `pipeline` | Simple, no overhead | -| Workers need oversight | `supervisor` | Auto-owner monitors | -| Local/small models | `supervisor` | Supervisor catches stuck workers | -| All non-interactive | `pipeline` or `dag` | No PTY = no supervision needed | +| Use case | Pattern | Why | +| ------------------------- | ------------------- | -------------------------------- | +| Sequential, no monitoring | `pipeline` | Simple, no overhead | +| Workers need oversight | `supervisor` | Auto-owner monitors | +| Local/small models | `supervisor` | Supervisor catches stuck workers | +| All non-interactive | `pipeline` or `dag` | No PTY = no supervision needed | ### Concurrency **Cap `maxConcurrency` at 4-6.** Spawning 10+ agents simultaneously causes broker timeouts. | Parallel agents | `maxConcurrency` | -|-----------------|-------------------| -| 2-4 | 4 (default safe) | -| 5-10 | 5 | -| 10+ | 6-8 max | +| --------------- | ---------------- | +| 2-4 | 4 (default safe) | +| 5-10 | 5 | +| 10+ | 6-8 max | ### Common Mistakes -| Mistake | Fix | -|---------|-----| -| Treating relay as transport, not as a coordination layer (every step is `preset: 'worker'`, every handoff is `{{steps.X.output}}`) | Default to **Conversation shape** for non-trivial work — interactive agents on a shared channel. Pipeline-shape is only correct when the work could be expressed as a `bash | bash | bash` pipe. | -| Interactive agents on a channel whose task strings don't tell them to talk to each other | Pick a [Chat-Native Coordination Recipe](#chat-native-coordination-recipes) (Q/A, Broadcast/Ack, Peer Review, Standup, Hand-Off) and bake it into the task prompt — otherwise you're paying for a chat substrate you're not using | -| All workflows run sequentially | Group independent workflows into parallel waves (4-7x speedup) | -| Every step depends on the previous one | Only add `dependsOn` when there's a real data dependency | -| Self-review step with no timeout | Set `timeout: 300_000` (5 min) — Codex hangs in non-interactive review | -| One giant workflow per feature | Split into smaller workflows that can run in parallel waves | -| Adding exit instructions to tasks | Runner handles self-termination automatically | -| Setting `timeoutMs` on agents/steps | Use global `.timeout()` only | -| Using `general` channel | Set `.channel('wf-name')` for isolation | -| `{{steps.X.output}}` without `dependsOn: ['X']` | Output won't be available yet | -| Requiring exact sentinel as only completion gate | Use `exit_code` or `file_exists` verification | -| Writing 100-line task prompts | Split into lead + workers on a channel | -| `maxConcurrency: 16` with many parallel steps | Cap at 5-6 | -| Non-interactive agent reading large files via tools | Pre-read in deterministic step, inject via `{{steps.X.output}}` | -| Workers depending on lead step (deadlock) | Both depend on shared context step | -| `fan-out`/`hub-spoke` for simple parallel workers | Use `dag` instead | -| `pipeline` but expecting auto-supervisor | Only hub patterns auto-harden. Use `.pattern('supervisor')` | -| Workers without `preset: 'worker'` in one-shot DAG lead+worker flows | Add preset for clean stdout when chaining `{{steps.X.output}}` (not needed for interactive team patterns) | -| Using `_` in YAML numbers (`timeoutMs: 1_200_000`) | YAML doesn't support `_` separators | -| Workflow timeout under 30 min for complex workflows | Use `3600000` (1 hour) as default | -| Using `require()` in ESM projects | Check `package.json` for `"type": "module"` — use `import` if ESM | -| Wrapping in `async function main()` in ESM | ESM supports top-level `await` — no wrapper needed | -| Using `createWorkflowRenderer` | Does not exist. Use `.run({ cwd: process.cwd() })` | -| `export default workflow(...)...build()` | No `.build()`. Chain ends with `.run()` — the file must call `.run()`, not just export config | -| Relative import `'../workflows/builder.js'` | Use `import { workflow } from '@agent-relay/sdk/workflows'` | -| Hardcoded model strings (`model: 'opus'`) | Use constants: `import { ClaudeModels } from '@agent-relay/config'` → `model: ClaudeModels.OPUS` | -| Thinking `agent-relay run` inspects exports | It executes the file as a subprocess. Only `.run()` invocations trigger steps | -| `pattern('single')` on cloud runner | Not supported — use `dag` | -| `pattern('supervisor')` with one agent | Same agent is owner + specialist. Use `dag` | -| Invalid verification type (`type: 'deterministic'`) | Only `exit_code`, `output_contains`, `file_exists`, `custom` are valid | -| Chaining `{{steps.X.output}}` from interactive agents | PTY output is garbled. Use deterministic steps or `preset: 'worker'` | -| Single step editing 4+ files | Agents modify 1-2 then exit. Split to one file per step with verify gates | -| Relying on agents to `git commit` | Agents emit markers without running git. Use deterministic commit step | -| File-writing steps without `file_exists` verification | `exit_code` auto-passes even if no file written | -| Manual peer fanout in `handleChannelMessage()` | Use broker-managed channel subscriptions — broker fans out to all subscribers automatically | -| Client-side `personaNames.has(from)` filtering | Use `relay.subscribe()`/`relay.unsubscribe()` — only subscribed agents receive messages | -| Agents receiving noisy cross-channel messages during focused work | Use `relay.mute({ agent, channel })` to silence non-primary channels without leaving them | -| Hardcoding all channels at spawn time | Use `agent.subscribe()` / `agent.unsubscribe()` for dynamic channel membership post-spawn | -| Using `preset: 'worker'` for Codex in *interactive team* patterns when coordination is needed | Codex interactive mode works fine with PTY channel injection. Drop the preset for interactive team patterns (keep it for one-shot DAG workers where clean stdout matters) | -| Separate reviewer agent from lead in interactive team | Merge lead + reviewer into one interactive Claude agent — reviews between rounds, fewer agents | -| Not printing PR URL after `createGitHubStep({ action: 'createPR' })` | Capture `html_url` with `output: { mode: 'data', format: 'json', path: 'html_url' }` and echo or write it in a final deterministic step | -| Workflow ending without worktree + PR for cross-repo changes | Add `setup-worktree` at start and `push-and-pr` + `cleanup-worktree` at end | +| Mistake | Fix | +| ---------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---- | ----------- | +| Treating relay as transport, not as a coordination layer (every step is `preset: 'worker'`, every handoff is `{{steps.X.output}}`) | Default to **Conversation shape** for non-trivial work — interactive agents on a shared channel. Pipeline-shape is only correct when the work could be expressed as a `bash | bash | bash` pipe. | +| Interactive agents on a channel whose task strings don't tell them to talk to each other | Pick a [Chat-Native Coordination Recipe](#chat-native-coordination-recipes) (Q/A, Broadcast/Ack, Peer Review, Standup, Hand-Off) and bake it into the task prompt — otherwise you're paying for a chat substrate you're not using | +| All workflows run sequentially | Group independent workflows into parallel waves (4-7x speedup) | +| Every step depends on the previous one | Only add `dependsOn` when there's a real data dependency | +| Self-review step with no timeout | Set `timeout: 300_000` (5 min) — Codex hangs in non-interactive review | +| One giant workflow per feature | Split into smaller workflows that can run in parallel waves | +| Adding exit instructions to tasks | Runner handles self-termination automatically | +| Setting `timeoutMs` on agents/steps | Use global `.timeout()` only | +| Using `general` channel | Set `.channel('wf-name')` for isolation | +| `{{steps.X.output}}` without `dependsOn: ['X']` | Output won't be available yet | +| Requiring exact sentinel as only completion gate | Use `exit_code` or `file_exists` verification | +| Writing 100-line task prompts | Split into lead + workers on a channel | +| `maxConcurrency: 16` with many parallel steps | Cap at 5-6 | +| Non-interactive agent reading large files via tools | Pre-read in deterministic step, inject via `{{steps.X.output}}` | +| Workers depending on lead step (deadlock) | Both depend on shared context step | +| `fan-out`/`hub-spoke` for simple parallel workers | Use `dag` instead | +| `pipeline` but expecting auto-supervisor | Only hub patterns auto-harden. Use `.pattern('supervisor')` | +| Workers without `preset: 'worker'` in one-shot DAG lead+worker flows | Add preset for clean stdout when chaining `{{steps.X.output}}` (not needed for interactive team patterns) | +| Using `_` in YAML numbers (`timeoutMs: 1_200_000`) | YAML doesn't support `_` separators | +| Workflow timeout under 30 min for complex workflows | Use `3600000` (1 hour) as default | +| Using `require()` in ESM projects | Check `package.json` for `"type": "module"` — use `import` if ESM | +| Wrapping in `async function main()` in ESM | ESM supports top-level `await` — no wrapper needed | +| Using `createWorkflowRenderer` | Does not exist. Use `.run({ cwd: process.cwd() })` | +| `export default workflow(...)...build()` | No `.build()`. Chain ends with `.run()` — the file must call `.run()`, not just export config | +| Relative import `'../workflows/builder.js'` | Use `import { workflow } from '@agent-relay/sdk/workflows'` | +| Hardcoded model strings (`model: 'opus'`) | Use constants: `import { ClaudeModels } from '@agent-relay/config'` → `model: ClaudeModels.OPUS` | +| Thinking `agent-relay run` inspects exports | It executes the file as a subprocess. Only `.run()` invocations trigger steps | +| `pattern('single')` on cloud runner | Not supported — use `dag` | +| `pattern('supervisor')` with one agent | Same agent is owner + specialist. Use `dag` | +| Invalid verification type (`type: 'deterministic'`) | Only `exit_code`, `output_contains`, `file_exists`, `custom` are valid | +| Chaining `{{steps.X.output}}` from interactive agents | PTY output is garbled. Use deterministic steps or `preset: 'worker'` | +| Single step editing 4+ files | Agents modify 1-2 then exit. Split to one file per step with verify gates | +| Relying on agents to `git commit` | Agents emit markers without running git. Use deterministic commit step | +| File-writing steps without `file_exists` verification | `exit_code` auto-passes even if no file written | +| Manual peer fanout in `handleChannelMessage()` | Use broker-managed channel subscriptions — broker fans out to all subscribers automatically | +| Client-side `personaNames.has(from)` filtering | Use `relay.subscribe()`/`relay.unsubscribe()` — only subscribed agents receive messages | +| Agents receiving noisy cross-channel messages during focused work | Use `relay.mute({ agent, channel })` to silence non-primary channels without leaving them | +| Hardcoding all channels at spawn time | Use `agent.subscribe()` / `agent.unsubscribe()` for dynamic channel membership post-spawn | +| Using `preset: 'worker'` for Codex in _interactive team_ patterns when coordination is needed | Codex interactive mode works fine with PTY channel injection. Drop the preset for interactive team patterns (keep it for one-shot DAG workers where clean stdout matters) | +| Separate reviewer agent from lead in interactive team | Merge lead + reviewer into one interactive Claude agent — reviews between rounds, fewer agents | +| Not printing PR URL after `createGitHubStep({ action: 'createPR' })` | Capture `html_url` with `output: { mode: 'data', format: 'json', path: 'html_url' }` and echo or write it in a final deterministic step | +| Workflow ending without worktree + PR for cross-repo changes | Add `setup-worktree` at start and `push-and-pr` + `cleanup-worktree` at end | ### YAML Alternative @@ -851,7 +855,6 @@ workflows: type: exit_code ``` - ### Available Swarm Patterns `dag` (default), `fan-out`, `pipeline`, `hub-spoke`, `consensus`, `mesh`, `handoff`, `cascade`, `debate`, `hierarchical`, `map-reduce`, `scatter-gather`, `supervisor`, `reflection`, `red-team`, `verifier`, `auction`, `escalation`, `saga`, `circuit-breaker`, `blackboard`, `swarm` diff --git a/.claude/skills/writing-agent-relay-workflows/SKILL.md b/.claude/skills/writing-agent-relay-workflows/SKILL.md index ab5f87205..8f67c3f91 100644 --- a/.claude/skills/writing-agent-relay-workflows/SKILL.md +++ b/.claude/skills/writing-agent-relay-workflows/SKILL.md @@ -23,12 +23,12 @@ The relay broker-sdk workflow system orchestrates multiple AI agents (Claude, Co ## Choose Your Coordination Style — Conversation vs Pipeline -Before writing the workflow, decide *how the agents will coordinate*. The relay primitive supports two very different shapes, and picking the wrong one wastes the most valuable thing the SDK gives you. +Before writing the workflow, decide _how the agents will coordinate_. The relay primitive supports two very different shapes, and picking the wrong one wastes the most valuable thing the SDK gives you. -| Shape | What it is | Use when | -|---|---|---| +| Shape | What it is | Use when | +| ------------------------------ | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---- | ----------- | | **Conversation** (chat-native) | Interactive agents share a channel; messages, `@-mentions`, and ambient awareness drive coordination. Lead and workers spawn in parallel and self-organize. The relay is the coordination layer, not just transport. | Multi-file work, peer review loops, cross-agent feedback, dynamic re-planning, multi-PR coordination, anything with a human-in-the-loop escape, swarms where workers pick up each other's output. | -| **Pipeline** (one-shot DAG) | Each step runs as a one-shot subprocess (`claude -p`, `codex exec`); steps hand off via `{{steps.X.output}}` text injection. No agents are alive at the same time; no chat happens. | Linear, well-specified transformations; deterministic data passing; no review loop expected; the work could be expressed as a `bash | bash | bash` pipe. | +| **Pipeline** (one-shot DAG) | Each step runs as a one-shot subprocess (`claude -p`, `codex exec`); steps hand off via `{{steps.X.output}}` text injection. No agents are alive at the same time; no chat happens. | Linear, well-specified transformations; deterministic data passing; no review loop expected; the work could be expressed as a `bash | bash | bash` pipe. | **Default to Conversation for any non-trivial work.** Pipeline DAGs are simpler to reason about but they do not exercise the relay primitive — they are a Unix pipe with extra steps. If you would happily write the same task as a single shell pipeline, pipeline-shape is fine. Otherwise, you almost certainly want a Conversation shape. @@ -71,7 +71,7 @@ const result = await workflow('my-workflow') .onError('retry', { maxRetries: 2, retryDelayMs: 10_000 }) .run({ cwd: process.cwd() }); - console.log('Result:', result.status); +console.log('Result:', result.status); ``` ## Quick Reference (Conversation shape) @@ -85,7 +85,7 @@ import { ClaudeModels, CodexModels } from '@agent-relay/config'; const result = await workflow('my-workflow') .description('Multi-file change with peer review') .pattern('dag') - .channel('wf-my-feature') // dedicated channel — agents share it + .channel('wf-my-feature') // dedicated channel — agents share it .maxConcurrency(4) .timeout(3_600_000) @@ -127,13 +127,13 @@ Workers iterate based on your feedback. Exit when both files pass review.`, }) .step('impl-a-work', { agent: 'impl-a', - dependsOn: ['context'], // SAME dep as lead → starts in parallel, no deadlock + dependsOn: ['context'], // SAME dep as lead → starts in parallel, no deadlock task: `You are impl-a on #wf-my-feature. Wait for the lead's plan. Implement your assigned file. Post a completion message. Address feedback.`, }) .step('impl-b-work', { agent: 'impl-b', - dependsOn: ['context'], // SAME dep as lead + dependsOn: ['context'], // SAME dep as lead task: `You are impl-b on #wf-my-feature. Wait for the lead's plan. Implement your assigned file. Post a completion message. Address feedback.`, }) @@ -154,7 +154,7 @@ Implement your assigned file. Post a completion message. Address feedback.`, - **Ambient awareness** — workers see each other's completion messages and start dependent work without the lead relaying. - **Lead-as-reviewer** — the lead reads actual files between rounds and posts diff-aware feedback in chat. One agent does coordination + review; no separate reviewer step. -- **Iterative correction** — when the lead pings *"impl-a, the type on line 42 is wrong"*, impl-a fixes and re-posts. No new step, no re-spawn, no `{{output}}` chaining. +- **Iterative correction** — when the lead pings _"impl-a, the type on line 42 is wrong"_, impl-a fixes and re-posts. No new step, no re-spawn, no `{{output}}` chaining. **Critical workflow rules for this shape:** @@ -168,6 +168,7 @@ See [Common Patterns → Interactive Team](#interactive-team-lead--workers-on-sh --- **Critical TypeScript rules:** + 1. Check the project's `package.json` for `"type": "module"` — if ESM, use `import` and top-level `await`. If CJS, use `require()` and wrap in `async function main()`. 2. `agent-relay run ` executes the file as a standalone subprocess — it does NOT inspect exports. The file MUST call `.run()`. 3. Use `.run({ cwd: process.cwd() })` — `createWorkflowRenderer` does not exist @@ -209,13 +210,13 @@ git add -A && git commit -m "Wave 2" Two workflows can run in parallel if they don't have write-write or write-read file conflicts: -| Touch Zone | Can Parallelize? | -|---|---| -| Different `packages/*/src/` dirs | ✅ Yes | -| Different `app/` routes | ✅ Yes | -| Same package, different subdirs | ⚠️ Usually yes | +| Touch Zone | Can Parallelize? | +| --------------------------------------------- | ------------------------------------------ | +| Different `packages/*/src/` dirs | ✅ Yes | +| Different `app/` routes | ✅ Yes | +| Same package, different subdirs | ⚠️ Usually yes | | Same files (shared config, root package.json) | ❌ No — sequential or same wave with merge | -| Explicit dependency | ❌ No — ordered waves | +| Explicit dependency | ❌ No — ordered waves | ### Declare File Scope for Planning @@ -223,9 +224,9 @@ Help wave planners (human or automated) understand what each workflow touches: ```typescript workflow('48-comparison-mode') - .packages(['web', 'core']) // monorepo packages touched - .isolatedFrom(['49-feedback-system']) // explicitly safe to parallelize - .requiresBefore(['46-admin-dashboard']) // explicit ordering constraint + .packages(['web', 'core']) // monorepo packages touched + .isolatedFrom(['49-feedback-system']) // explicitly safe to parallelize + .requiresBefore(['46-admin-dashboard']); // explicit ordering constraint ``` ### Within-Workflow Parallelism @@ -246,11 +247,13 @@ Use shared `dependsOn` to fan out independent sub-tasks: ### Impact Real-world example (Relayed — 60 workflows): + - **Sequential**: ~30 min × 60 = **30 hours** - **Parallel waves (4-6 per wave)**: ~12 waves × 35 min = **~7 hours** (4x faster) - **Aggressive parallelism (8-way)**: **~4 hours** (7.5x faster) --- + ## Failure Prevention These workflow files are easy to break in ways that only appear mid-run. Follow these rules when authoring or editing workflow `.ts` files. @@ -301,7 +304,7 @@ If commit or PR creation is intentionally outside the workflow, say that directl ### 2. Avoid raw fenced code blocks inside workflow task template literals -Raw triple-backtick code fences inside large inline `task: \`...\`` template strings are fragile and can break outer TypeScript parsing, especially when they contain language tags like `swift` or `diff`. +Raw triple-backtick code fences inside large inline `task: \`...\``template strings are fragile and can break outer TypeScript parsing, especially when they contain language tags like`swift`or`diff`. Preferred options, in order: @@ -411,12 +414,12 @@ This pattern is specifically recommended over `git commit -m "$(cat <<'EOF' ... ### 2d. Template-literal escape sequences are processed once before the string is rendered -If your file generates code as a giant template literal (the pattern used by `packages/core/src/bootstrap/script-generator.ts` in cloud), every backslash in that template gets processed by JavaScript before the string is returned. This silently breaks regexes and escape sequences that are meant to appear in the *generated* output. +If your file generates code as a giant template literal (the pattern used by `packages/core/src/bootstrap/script-generator.ts` in cloud), every backslash in that template gets processed by JavaScript before the string is returned. This silently breaks regexes and escape sequences that are meant to appear in the _generated_ output. Specifically: - `\s` is not a recognized string escape → the backslash is stripped → `\s` renders as a literal `s` -- `\b` *is* a recognized string escape (backspace, U+0008) → `\b` renders as a backspace character in the output +- `\b` _is_ a recognized string escape (backspace, U+0008) → `\b` renders as a backspace character in the output - `\n`, `\t`, `\r`, `\\`, `\0`, `\uXXXX`, `\xXX` all get resolved at template time The footgun: the outer TypeScript compiles cleanly, the rendered code parses and runs, and the regex/escape just never matches what the author intended. See AgentWorkforce/cloud#113 for the exact incident (`hasConfigExport = /^export\s+.../m` silently became `/^exports+.../m` in the generated bootstrap, making every TS workflow fall through to the standalone-script fallback). @@ -427,7 +430,7 @@ Guidelines: 2. If you want to write a long string-literal newline into the output, `'\\n'` in the template renders to `'\n'` in the output, which the runtime JS interprets as a newline. Using a literal `'\n'` would render an actual newline into the JS source — visually messy and sometimes surprising. 3. If you add anything non-trivial to a generator file that returns a big template literal, add a unit test that calls the generator with canonical inputs and asserts something about the rendered output — either exact string matches or, for regexes, `eval`/construct the regex and test it against known samples. See `tests/orchestrator/script-generator.test.ts` in cloud for prior art. -Task-prompt workaround: for agent-relay workflow *task prompts* (where the contents go into a template literal but the inner content is plain text for an LLM), it's often cleaner to build the string as an array and `.join('\n')` at the boundary. That sidesteps the "does this backslash survive?" question entirely — no backslashes in the source, no processing to reason about. Several workflows in `cloud/workflows/` use this pattern (see the sage migration PRs). +Task-prompt workaround: for agent-relay workflow _task prompts_ (where the contents go into a template literal but the inner content is plain text for an LLM), it's often cleaner to build the string as an array and `.join('\n')` at the boundary. That sidesteps the "does this backslash survive?" question entirely — no backslashes in the source, no processing to reason about. Several workflows in `cloud/workflows/` use this pattern (see the sage migration PRs). ### 3. Keep final verification boring and deterministic @@ -494,10 +497,10 @@ Do not assume users will infer the behavior. In particular, `--wave N` should be When a workflow fails at step X and you want to re-run it after editing the workflow file, the flag choice matters: -| Flag | Reads workflow file fresh? | Uses cached step outputs? | -|---|---|---| -| `--resume ` | ❌ replays **stored config from DB** | ✅ from same run id | -| `--start-from --previous-run-id ` | ✅ reads fresh file | ✅ from previous run id's cached outputs | +| Flag | Reads workflow file fresh? | Uses cached step outputs? | +| -------------------------------------------- | ------------------------------------ | ---------------------------------------- | +| `--resume ` | ❌ replays **stored config from DB** | ✅ from same run id | +| `--start-from --previous-run-id ` | ✅ reads fresh file | ✅ from previous run id's cached outputs | **Rule:** if you edited the workflow file to fix the failing step, use `--start-from --previous-run-id `, **not** `--resume `. `--resume` pulls the entire workflow config from the run's DB record and replays it — your edits to the workflow file are ignored, and the step re-runs with its original (broken) definition. @@ -607,6 +610,7 @@ For bug-fix or reliability workflows, do **not** stop at unit or integration tes When the bug involves install, bootstrap, PATH/shims, auth, brokers, background services, OS-specific packaging, or first-run UX, add a second workflow (or second phase) that validates the fix in a **fresh environment**. Preferred order of proving environments: + 1. disposable sandbox / cloud workspace 2. Docker / containerized environment 3. fresh local shell with isolated paths @@ -614,6 +618,7 @@ Preferred order of proving environments: ### Meta-workflow guidance If the right proving environment is unclear, first write a **meta-workflow** that: + - compares candidate validation environments - defines the acceptance contract - chooses the best swarm pattern @@ -629,15 +634,15 @@ A workflow whose final artifact is "a clean working tree on a sandbox you'll thr The primitive picks the right transport at runtime: -| Where the workflow runs | Transport `createGitHubStep` uses | What you provide | -|---|---|---| -| Local (`agent-relay run`) | `gh` CLI | `gh auth status` works | +| Where the workflow runs | Transport `createGitHubStep` uses | What you provide | +| ----------------------------------------------- | ------------------------------------------- | ----------------------------------- | +| Local (`agent-relay run`) | `gh` CLI | `gh auth status` works | | Cloud (`agent-relay cloud run`) — tenant-scoped | Nango → workspace's GitHub App installation | Nothing — cloud injects credentials | -| Cloud — fallback | Relay-cloud GitHub proxy | Nothing — cloud injects credentials | +| Cloud — fallback | Relay-cloud GitHub proxy | Nothing — cloud injects credentials | You write **one** workflow. The same `createPR` step opens a PR via your local `gh` when you iterate on it on a laptop, and via the workspace's GitHub App when the same file runs in `agent-relay cloud run`. No branching by environment, no env-var sniffing in your task strings, no "this part only works in cloud" caveats. That's the whole point of the adapter. -> **Phase C interaction (cloud only):** `agent-relay cloud run` already auto-pushes per-`paths[]` diffs as separate PRs after the workflow callback when the repos are allowlisted (see `pushedTo` in the run record). Phase C is the *catch-all* — if your workflow does nothing else, you still get one PR per declared path. Use `createGitHubStep` **on top of** that when you need PRs the catch-all can't produce: cross-cutting issues, follow-up tracking issues, opening one PR that spans multiple paths, draft PRs you want labeled/assigned in specific ways, or PRs against a repo you didn't `paths[]` in. +> **Phase C interaction (cloud only):** `agent-relay cloud run` already auto-pushes per-`paths[]` diffs as separate PRs after the workflow callback when the repos are allowlisted (see `pushedTo` in the run record). Phase C is the _catch-all_ — if your workflow does nothing else, you still get one PR per declared path. Use `createGitHubStep` **on top of** that when you need PRs the catch-all can't produce: cross-cutting issues, follow-up tracking issues, opening one PR that spans multiple paths, draft PRs you want labeled/assigned in specific ways, or PRs against a repo you didn't `paths[]` in. ### The minimal "open a PR" recipe @@ -656,40 +661,49 @@ await workflow('feature-x') }) // Branch off main on the remote. - .step('create-branch', createGitHubStep({ - dependsOn: ['write-marker'], - action: 'createBranch', - repo: REPO, - params: { branch: BRANCH, source: 'main' }, - })) + .step( + 'create-branch', + createGitHubStep({ + dependsOn: ['write-marker'], + action: 'createBranch', + repo: REPO, + params: { branch: BRANCH, source: 'main' }, + }) + ) // Commit the change to the branch via Contents API. - .step('commit-change', createGitHubStep({ - dependsOn: ['create-branch'], - action: 'createFile', - repo: REPO, - params: { - path: 'CHANGELOG.md', - branch: BRANCH, - content: '', - message: 'chore: changelog entry', - }, - })) + .step( + 'commit-change', + createGitHubStep({ + dependsOn: ['create-branch'], + action: 'createFile', + repo: REPO, + params: { + path: 'CHANGELOG.md', + branch: BRANCH, + content: '', + message: 'chore: changelog entry', + }, + }) + ) // Open the PR. This is the load-bearing step. - .step('open-pr', createGitHubStep({ - dependsOn: ['commit-change'], - action: 'createPR', - repo: REPO, - params: { - title: 'feat: ship feature X', - head: BRANCH, - base: 'main', - body: '## Summary\n\n- ...\n\n## Test plan\n\n- [x] ...', - draft: false, - }, - output: { mode: 'data', format: 'json', path: 'html_url' }, - })) + .step( + 'open-pr', + createGitHubStep({ + dependsOn: ['commit-change'], + action: 'createPR', + repo: REPO, + params: { + title: 'feat: ship feature X', + head: BRANCH, + base: 'main', + body: '## Summary\n\n- ...\n\n## Test plan\n\n- [x] ...', + draft: false, + }, + output: { mode: 'data', format: 'json', path: 'html_url' }, + }) + ) .run({ cwd: process.cwd() }); ``` @@ -716,9 +730,10 @@ await workflow('feature-x') Use `{{steps.STEP_NAME.output}}` in a downstream step's task to inject the prior step's terminal output. -> **Mental model:** this is a **Unix pipe**, not agent communication. `{{steps.A.output}}` flowing into step B is `A | B` — A is dead by the time B reads its stdout. There is no chat, no feedback, no addressing. If your workflow's coordination story is *only* output chaining, you're using the relay as transport, not as a coordination layer. See **[Choose Your Coordination Style](#choose-your-coordination-style--conversation-vs-pipeline)** before defaulting to this. +> **Mental model:** this is a **Unix pipe**, not agent communication. `{{steps.A.output}}` flowing into step B is `A | B` — A is dead by the time B reads its stdout. There is no chat, no feedback, no addressing. If your workflow's coordination story is _only_ output chaining, you're using the relay as transport, not as a coordination layer. See **[Choose Your Coordination Style](#choose-your-coordination-style--conversation-vs-pipeline)** before defaulting to this. **Only chain output from clean sources:** + - Deterministic steps (shell commands — always clean) - Non-interactive agents (`preset: 'worker'` — clean stdout) @@ -786,30 +801,38 @@ Agent-level methods are also available: ```typescript const agent = await relay.claude.spawn({ name: 'auditor', channels: ['ch-a'] }); -await agent.subscribe(['ch-b']); // now subscribed to ch-a and ch-b -await agent.mute('ch-a'); // ch-a messages silenced (still in history) -await agent.unmute('ch-a'); // ch-a messages resume -await agent.unsubscribe(['ch-b']); // leaves ch-b -console.log(agent.channels); // ['ch-a'] -console.log(agent.mutedChannels); // [] +await agent.subscribe(['ch-b']); // now subscribed to ch-a and ch-b +await agent.mute('ch-a'); // ch-a messages silenced (still in history) +await agent.unmute('ch-a'); // ch-a messages resume +await agent.unsubscribe(['ch-b']); // leaves ch-b +console.log(agent.channels); // ['ch-a'] +console.log(agent.mutedChannels); // [] ``` #### Semantics -| Operation | Channel membership | PTY injection | History access | -|---------------|-------------------|---------------|----------------| -| `subscribe` | Yes | Yes | Yes | -| `unsubscribe` | No | No | No (leaves) | -| `mute` | Yes (stays) | No (silenced) | Yes (can query)| -| `unmute` | Yes | Yes (resumes) | Yes | +| Operation | Channel membership | PTY injection | History access | +| ------------- | ------------------ | ------------- | --------------- | +| `subscribe` | Yes | Yes | Yes | +| `unsubscribe` | No | No | No (leaves) | +| `mute` | Yes (stays) | No (silenced) | Yes (can query) | +| `unmute` | Yes | Yes (resumes) | Yes | #### Events ```typescript -relay.onChannelSubscribed = (agent, channels) => { /* ... */ }; -relay.onChannelUnsubscribed = (agent, channels) => { /* ... */ }; -relay.onChannelMuted = (agent, channel) => { /* ... */ }; -relay.onChannelUnmuted = (agent, channel) => { /* ... */ }; +relay.onChannelSubscribed = (agent, channels) => { + /* ... */ +}; +relay.onChannelUnsubscribed = (agent, channels) => { + /* ... */ +}; +relay.onChannelMuted = (agent, channel) => { + /* ... */ +}; +relay.onChannelUnmuted = (agent, channel) => { + /* ... */ +}; ``` #### When to Use in Workflows @@ -822,6 +845,7 @@ relay.onChannelUnmuted = (agent, channel) => { /* ... */ }; #### What This Eliminates With broker-managed subscriptions, you no longer need: + 1. Client-side persona filtering (`personaNames.has(from)` checks) 2. Channel prefix regex for message routing 3. Manual peer fanout (iterating agents to forward messages) @@ -870,12 +894,12 @@ relay.mute({ agent: string, channel: string }): Promise relay.unmute({ agent: string, channel: string }): Promise ``` -| Preset | Interactive | Relay access | Use for | -| ---------- | ------------- | ------------ | ---------------------------------------------------- | -| `lead` | yes (PTY) | yes | Coordination, monitoring channels | -| `worker` | no (subprocess) | no | Bounded tasks, structured stdout | -| `reviewer` | no (subprocess) | no | Reading artifacts, producing verdicts | -| `analyst` | no (subprocess) | no | Reading code/files, writing findings | +| Preset | Interactive | Relay access | Use for | +| ---------- | --------------- | ------------ | ------------------------------------- | +| `lead` | yes (PTY) | yes | Coordination, monitoring channels | +| `worker` | no (subprocess) | no | Bounded tasks, structured stdout | +| `reviewer` | no (subprocess) | no | Reading artifacts, producing verdicts | +| `analyst` | no (subprocess) | no | Reading code/files, writing findings | Non-interactive presets run via one-shot mode (`claude -p`, `codex exec`). Output is clean and available via `{{steps.X.output}}`. @@ -973,17 +997,17 @@ Edit files as assigned. Report completion. Fix issues from feedback.`, **When to use interactive team vs one-shot DAG:** -| Scenario | Pattern | -|----------|---------| -| 4+ files, likely needs iteration | Interactive team | -| Simple edits, well-specified | One-shot DAG with `preset: 'worker'` | -| Cross-agent review feedback loop | Interactive team | -| Independent tasks, no coordination | Fan-out with non-interactive workers | -| Anything where the answer to "could this be `cmd1 | cmd2`?" is *no* | Interactive team | +| Scenario | Pattern | +| ------------------------------------------------- | ------------------------------------ | ---------------- | +| 4+ files, likely needs iteration | Interactive team | +| Simple edits, well-specified | One-shot DAG with `preset: 'worker'` | +| Cross-agent review feedback loop | Interactive team | +| Independent tasks, no coordination | Fan-out with non-interactive workers | +| Anything where the answer to "could this be `cmd1 | cmd2`?" is _no_ | Interactive team | ### Chat-Native Coordination Recipes -Once you're in the Interactive Team shape, the channel is your coordination medium. These are recipes for using it well — they are *prompt-authoring patterns*, not new SDK surface. All of them assume interactive agents (no `preset`) sharing a `.channel('wf-...')`. +Once you're in the Interactive Team shape, the channel is your coordination medium. These are recipes for using it well — they are _prompt-authoring patterns_, not new SDK surface. All of them assume interactive agents (no `preset`) sharing a `.channel('wf-...')`. #### 1. Question / Answer (blocking ask) @@ -1004,7 +1028,7 @@ channel. If no reply arrives in 5 minutes, @-mention the lead.`, #### 2. Broadcast / Ack -When a lead needs *N workers to confirm receipt* before proceeding (e.g., to make sure the plan was actually read), require explicit acks. +When a lead needs _N workers to confirm receipt_ before proceeding (e.g., to make sure the plan was actually read), require explicit acks. ```typescript .step('lead-coordinate', { @@ -1036,7 +1060,7 @@ commit SHA. Then wait for @reviewer's verdict in channel. }) ``` -**Pattern note:** the reviewer must read the files themselves — never let the worker paste the diff into chat. Channel messages are for *coordination*, not *content*. That's also what keeps you under output-token limits. +**Pattern note:** the reviewer must read the files themselves — never let the worker paste the diff into chat. Channel messages are for _coordination_, not _content_. That's also what keeps you under output-token limits. #### 4. Standup / Status Probe @@ -1060,7 +1084,7 @@ reassign their work to a peer.`, #### 5. Hand-Off with Context -When work flows from agent A to agent B *during* a workflow (not just between steps), have A post a structured handoff message so B doesn't re-derive context. +When work flows from agent A to agent B _during_ a workflow (not just between steps), have A post a structured handoff message so B doesn't re-derive context. ```typescript .step('impl-a-work', { @@ -1077,15 +1101,15 @@ Tests: . Commit: ."`, #### Picking a recipe -| Need | Recipe | -|---|---| -| One agent needs an answer from another at runtime | **Q/A** | -| Lead needs to confirm workers received the plan | **Broadcast/Ack** | -| Agent-to-agent code review | **Peer Review Handoff** | -| Long-running team, want stalled-worker visibility | **Standup/Probe** | +| Need | Recipe | +| ------------------------------------------------- | ------------------------- | +| One agent needs an answer from another at runtime | **Q/A** | +| Lead needs to confirm workers received the plan | **Broadcast/Ack** | +| Agent-to-agent code review | **Peer Review Handoff** | +| Long-running team, want stalled-worker visibility | **Standup/Probe** | | Sequential agent work that needs context curation | **Hand-Off with Context** | -> **Authoring rule:** if your workflow has interactive agents on a channel but their task strings don't *instruct them to talk to each other*, you're not using the chat primitive — you've just paid the overhead of starting it. Either add an explicit recipe above, or drop to `preset: 'worker'` and pipeline-shape. +> **Authoring rule:** if your workflow has interactive agents on a channel but their task strings don't _instruct them to talk to each other_, you're not using the chat primitive — you've just paid the overhead of starting it. Either add an explicit recipe above, or drop to `preset: 'worker'` and pipeline-shape. ### Pipeline (sequential handoff) @@ -1164,6 +1188,7 @@ steps: ``` **Key rules:** + - Read the file in a deterministic step right before the edit (not all files upfront) - Tell the agent "Only edit this one file" to prevent it touching other files - Verify with `git diff --quiet` after each edit — fail fast if the agent didn't write @@ -1188,6 +1213,7 @@ After any step that creates files, add a deterministic `file_exists` check befor ``` **Rules for file-writing tasks:** + 1. Use full paths from project root — say `src/auth/credentials.ts`, not `credentials.ts` 2. Add `IMPORTANT: Write the file to disk. Do NOT output to stdout.` 3. Use `file_exists` verification for creation steps (not just `exit_code`) @@ -1222,6 +1248,7 @@ steps: **One agent, one deliverable.** A step's task prompt should be 10-20 lines max. Split into a **lead + workers team** when: + - The task requires a 50+ line prompt - The deliverable is multiple files that must be consistent - You need one agent to verify another's output @@ -1238,7 +1265,7 @@ steps: - name: track-worker-1-impl agent: track-worker-1 - dependsOn: [prior-step] # same dep as lead — starts concurrently + dependsOn: [prior-step] # same dep as lead — starts concurrently task: | Join #my-track. track-lead will post your assignment. Implement the file as directed. @@ -1246,7 +1273,7 @@ steps: type: exit_code - name: next-step - dependsOn: [track-lead-coord] # downstream depends on lead, not workers + dependsOn: [track-lead-coord] # downstream depends on lead, not workers ``` ## Supervisor Pattern @@ -1255,69 +1282,69 @@ When you set `.pattern('supervisor')` (or `hub-spoke`, `fan-out`), the runner au **Auto-hardening only activates for hub patterns** — not `pipeline` or `dag`. -| Use case | Pattern | Why | -|----------|---------|-----| -| Sequential, no monitoring | `pipeline` | Simple, no overhead | -| Workers need oversight | `supervisor` | Auto-owner monitors | -| Local/small models | `supervisor` | Supervisor catches stuck workers | -| All non-interactive | `pipeline` or `dag` | No PTY = no supervision needed | +| Use case | Pattern | Why | +| ------------------------- | ------------------- | -------------------------------- | +| Sequential, no monitoring | `pipeline` | Simple, no overhead | +| Workers need oversight | `supervisor` | Auto-owner monitors | +| Local/small models | `supervisor` | Supervisor catches stuck workers | +| All non-interactive | `pipeline` or `dag` | No PTY = no supervision needed | ## Concurrency **Cap `maxConcurrency` at 4-6.** Spawning 10+ agents simultaneously causes broker timeouts. | Parallel agents | `maxConcurrency` | -|-----------------|-------------------| -| 2-4 | 4 (default safe) | -| 5-10 | 5 | -| 10+ | 6-8 max | +| --------------- | ---------------- | +| 2-4 | 4 (default safe) | +| 5-10 | 5 | +| 10+ | 6-8 max | ## Common Mistakes -| Mistake | Fix | -|---------|-----| -| Treating relay as transport, not as a coordination layer (every step is `preset: 'worker'`, every handoff is `{{steps.X.output}}`) | Default to **Conversation shape** for non-trivial work — interactive agents on a shared channel. Pipeline-shape is only correct when the work could be expressed as a `bash | bash | bash` pipe. | -| Interactive agents on a channel whose task strings don't tell them to talk to each other | Pick a [Chat-Native Coordination Recipe](#chat-native-coordination-recipes) (Q/A, Broadcast/Ack, Peer Review, Standup, Hand-Off) and bake it into the task prompt — otherwise you're paying for a chat substrate you're not using | -| All workflows run sequentially | Group independent workflows into parallel waves (4-7x speedup) | -| Every step depends on the previous one | Only add `dependsOn` when there's a real data dependency | -| Self-review step with no timeout | Set `timeout: 300_000` (5 min) — Codex hangs in non-interactive review | -| One giant workflow per feature | Split into smaller workflows that can run in parallel waves | -| Adding exit instructions to tasks | Runner handles self-termination automatically | -| Setting `timeoutMs` on agents/steps | Use global `.timeout()` only | -| Using `general` channel | Set `.channel('wf-name')` for isolation | -| `{{steps.X.output}}` without `dependsOn: ['X']` | Output won't be available yet | -| Requiring exact sentinel as only completion gate | Use `exit_code` or `file_exists` verification | -| Writing 100-line task prompts | Split into lead + workers on a channel | -| `maxConcurrency: 16` with many parallel steps | Cap at 5-6 | -| Non-interactive agent reading large files via tools | Pre-read in deterministic step, inject via `{{steps.X.output}}` | -| Workers depending on lead step (deadlock) | Both depend on shared context step | -| `fan-out`/`hub-spoke` for simple parallel workers | Use `dag` instead | -| `pipeline` but expecting auto-supervisor | Only hub patterns auto-harden. Use `.pattern('supervisor')` | -| Workers without `preset: 'worker'` in one-shot DAG lead+worker flows | Add preset for clean stdout when chaining `{{steps.X.output}}` (not needed for interactive team patterns) | -| Using `_` in YAML numbers (`timeoutMs: 1_200_000`) | YAML doesn't support `_` separators | -| Workflow timeout under 30 min for complex workflows | Use `3600000` (1 hour) as default | -| Using `require()` in ESM projects | Check `package.json` for `"type": "module"` — use `import` if ESM | -| Wrapping in `async function main()` in ESM | ESM supports top-level `await` — no wrapper needed | -| Using `createWorkflowRenderer` | Does not exist. Use `.run({ cwd: process.cwd() })` | -| `export default workflow(...)...build()` | No `.build()`. Chain ends with `.run()` — the file must call `.run()`, not just export config | -| Relative import `'../workflows/builder.js'` | Use `import { workflow } from '@agent-relay/sdk/workflows'` | -| Hardcoded model strings (`model: 'opus'`) | Use constants: `import { ClaudeModels } from '@agent-relay/config'` → `model: ClaudeModels.OPUS` | -| Thinking `agent-relay run` inspects exports | It executes the file as a subprocess. Only `.run()` invocations trigger steps | -| `pattern('single')` on cloud runner | Not supported — use `dag` | -| `pattern('supervisor')` with one agent | Same agent is owner + specialist. Use `dag` | -| Invalid verification type (`type: 'deterministic'`) | Only `exit_code`, `output_contains`, `file_exists`, `custom` are valid | -| Chaining `{{steps.X.output}}` from interactive agents | PTY output is garbled. Use deterministic steps or `preset: 'worker'` | -| Single step editing 4+ files | Agents modify 1-2 then exit. Split to one file per step with verify gates | -| Relying on agents to `git commit` | Agents emit markers without running git. Use deterministic commit step | -| File-writing steps without `file_exists` verification | `exit_code` auto-passes even if no file written | -| Manual peer fanout in `handleChannelMessage()` | Use broker-managed channel subscriptions — broker fans out to all subscribers automatically | -| Client-side `personaNames.has(from)` filtering | Use `relay.subscribe()`/`relay.unsubscribe()` — only subscribed agents receive messages | -| Agents receiving noisy cross-channel messages during focused work | Use `relay.mute({ agent, channel })` to silence non-primary channels without leaving them | -| Hardcoding all channels at spawn time | Use `agent.subscribe()` / `agent.unsubscribe()` for dynamic channel membership post-spawn | -| Using `preset: 'worker'` for Codex in *interactive team* patterns when coordination is needed | Codex interactive mode works fine with PTY channel injection. Drop the preset for interactive team patterns (keep it for one-shot DAG workers where clean stdout matters) | -| Separate reviewer agent from lead in interactive team | Merge lead + reviewer into one interactive Claude agent — reviews between rounds, fewer agents | -| Not printing PR URL after `createGitHubStep({ action: 'createPR' })` | Capture `html_url` with `output: { mode: 'data', format: 'json', path: 'html_url' }` and echo or write it in a final deterministic step | -| Workflow ending without worktree + PR for cross-repo changes | Add `setup-worktree` at start and `push-and-pr` + `cleanup-worktree` at end | +| Mistake | Fix | +| ---------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---- | ----------- | +| Treating relay as transport, not as a coordination layer (every step is `preset: 'worker'`, every handoff is `{{steps.X.output}}`) | Default to **Conversation shape** for non-trivial work — interactive agents on a shared channel. Pipeline-shape is only correct when the work could be expressed as a `bash | bash | bash` pipe. | +| Interactive agents on a channel whose task strings don't tell them to talk to each other | Pick a [Chat-Native Coordination Recipe](#chat-native-coordination-recipes) (Q/A, Broadcast/Ack, Peer Review, Standup, Hand-Off) and bake it into the task prompt — otherwise you're paying for a chat substrate you're not using | +| All workflows run sequentially | Group independent workflows into parallel waves (4-7x speedup) | +| Every step depends on the previous one | Only add `dependsOn` when there's a real data dependency | +| Self-review step with no timeout | Set `timeout: 300_000` (5 min) — Codex hangs in non-interactive review | +| One giant workflow per feature | Split into smaller workflows that can run in parallel waves | +| Adding exit instructions to tasks | Runner handles self-termination automatically | +| Setting `timeoutMs` on agents/steps | Use global `.timeout()` only | +| Using `general` channel | Set `.channel('wf-name')` for isolation | +| `{{steps.X.output}}` without `dependsOn: ['X']` | Output won't be available yet | +| Requiring exact sentinel as only completion gate | Use `exit_code` or `file_exists` verification | +| Writing 100-line task prompts | Split into lead + workers on a channel | +| `maxConcurrency: 16` with many parallel steps | Cap at 5-6 | +| Non-interactive agent reading large files via tools | Pre-read in deterministic step, inject via `{{steps.X.output}}` | +| Workers depending on lead step (deadlock) | Both depend on shared context step | +| `fan-out`/`hub-spoke` for simple parallel workers | Use `dag` instead | +| `pipeline` but expecting auto-supervisor | Only hub patterns auto-harden. Use `.pattern('supervisor')` | +| Workers without `preset: 'worker'` in one-shot DAG lead+worker flows | Add preset for clean stdout when chaining `{{steps.X.output}}` (not needed for interactive team patterns) | +| Using `_` in YAML numbers (`timeoutMs: 1_200_000`) | YAML doesn't support `_` separators | +| Workflow timeout under 30 min for complex workflows | Use `3600000` (1 hour) as default | +| Using `require()` in ESM projects | Check `package.json` for `"type": "module"` — use `import` if ESM | +| Wrapping in `async function main()` in ESM | ESM supports top-level `await` — no wrapper needed | +| Using `createWorkflowRenderer` | Does not exist. Use `.run({ cwd: process.cwd() })` | +| `export default workflow(...)...build()` | No `.build()`. Chain ends with `.run()` — the file must call `.run()`, not just export config | +| Relative import `'../workflows/builder.js'` | Use `import { workflow } from '@agent-relay/sdk/workflows'` | +| Hardcoded model strings (`model: 'opus'`) | Use constants: `import { ClaudeModels } from '@agent-relay/config'` → `model: ClaudeModels.OPUS` | +| Thinking `agent-relay run` inspects exports | It executes the file as a subprocess. Only `.run()` invocations trigger steps | +| `pattern('single')` on cloud runner | Not supported — use `dag` | +| `pattern('supervisor')` with one agent | Same agent is owner + specialist. Use `dag` | +| Invalid verification type (`type: 'deterministic'`) | Only `exit_code`, `output_contains`, `file_exists`, `custom` are valid | +| Chaining `{{steps.X.output}}` from interactive agents | PTY output is garbled. Use deterministic steps or `preset: 'worker'` | +| Single step editing 4+ files | Agents modify 1-2 then exit. Split to one file per step with verify gates | +| Relying on agents to `git commit` | Agents emit markers without running git. Use deterministic commit step | +| File-writing steps without `file_exists` verification | `exit_code` auto-passes even if no file written | +| Manual peer fanout in `handleChannelMessage()` | Use broker-managed channel subscriptions — broker fans out to all subscribers automatically | +| Client-side `personaNames.has(from)` filtering | Use `relay.subscribe()`/`relay.unsubscribe()` — only subscribed agents receive messages | +| Agents receiving noisy cross-channel messages during focused work | Use `relay.mute({ agent, channel })` to silence non-primary channels without leaving them | +| Hardcoding all channels at spawn time | Use `agent.subscribe()` / `agent.unsubscribe()` for dynamic channel membership post-spawn | +| Using `preset: 'worker'` for Codex in _interactive team_ patterns when coordination is needed | Codex interactive mode works fine with PTY channel injection. Drop the preset for interactive team patterns (keep it for one-shot DAG workers where clean stdout matters) | +| Separate reviewer agent from lead in interactive team | Merge lead + reviewer into one interactive Claude agent — reviews between rounds, fewer agents | +| Not printing PR URL after `createGitHubStep({ action: 'createPR' })` | Capture `html_url` with `output: { mode: 'data', format: 'json', path: 'html_url' }` and echo or write it in a final deterministic step | +| Workflow ending without worktree + PR for cross-repo changes | Add `setup-worktree` at start and `push-and-pr` + `cleanup-worktree` at end | ## YAML Alternative From b7f77c7f482a7dc2e8ec4c9c86b13009efd79ea9 Mon Sep 17 00:00:00 2001 From: Khaliq Date: Sat, 9 May 2026 11:24:56 +0200 Subject: [PATCH 4/4] fix: address PR feedback (timeouts, duplicate bundle field) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - verify-publish-sdk.yml: add --connect-timeout 5 / --max-time 15 to the tarball HEAD probe so a network stall fails fast instead of distorting the backoff loop (CodeRabbit). - package.json: remove duplicate "bundleDependencies" block — the file already declared "bundledDependencies" with the same two packages (Devin Review). Co-Authored-By: Claude Opus 4.7 (1M context) --- .github/workflows/verify-publish-sdk.yml | 3 ++- package.json | 6 +----- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/.github/workflows/verify-publish-sdk.yml b/.github/workflows/verify-publish-sdk.yml index b46a3533b..8d7b3a2bb 100644 --- a/.github/workflows/verify-publish-sdk.yml +++ b/.github/workflows/verify-publish-sdk.yml @@ -129,7 +129,8 @@ jobs: if [ -z "$TARBALL_URL" ]; then missing+=("${EXPECTED_BROKER}@$VERSION (no tarball url)") else - STATUS=$(curl -s -o /dev/null -w "%{http_code}" -I "$TARBALL_URL" || echo "000") + STATUS=$(curl -sS -o /dev/null -w "%{http_code}" -I \ + --connect-timeout 5 --max-time 15 "$TARBALL_URL" || echo "000") if [ "$STATUS" != "200" ]; then missing+=("${EXPECTED_BROKER}@$VERSION (tarball HTTP $STATUS)") fi diff --git a/package.json b/package.json index e9bc02e69..3cdcc4727 100644 --- a/package.json +++ b/package.json @@ -204,9 +204,5 @@ "flatted": "^3.4.2", "react": "^18.3.1", "react-dom": "^18.3.1" - }, - "bundleDependencies": [ - "@relaycast/sdk", - "@relayfile/local-mount" - ] + } }