diff --git a/.claude/rules/docs-sync.md b/.claude/rules/docs-sync.md index 85f3b435a..b7423b243 100644 --- a/.claude/rules/docs-sync.md +++ b/.claude/rules/docs-sync.md @@ -1,21 +1,30 @@ -# Documentation Sync Rule - -The docs exist in two locations that **must stay in sync**: - -- `web/content/docs/*.mdx` — MDX source (used by the Next.js web app) -- `docs/*.md` — Plain markdown mirror (for LLMs, CLI users, GitHub readers) - -## Rules - -1. **Any change to an `.mdx` file must be mirrored to the corresponding `.md` file**, and vice versa. -2. The markdown files should have the same content but with MDX components converted to plain markdown: - - `` / `` → remove (just keep the code blocks) - - `` → `> **Note:**` - - `` → `> **Warning:**` - - `` / `` → use headers or separate code blocks - - Frontmatter (`---` YAML block) → remove from `.md` files -3. **File mapping** (flat structure, no subdirectories): - - `web/content/docs/{slug}.mdx` ↔ `docs/{slug}.md` - - e.g. `web/content/docs/reference-sdk.mdx` ↔ `docs/reference-sdk.md` -4. If you add a new `.mdx` doc, create the corresponding `.md` mirror. -5. If you update default values, API signatures, or examples — update **both** files. +# Documentation Rule + +## Public docs live in `web/content/docs/*.mdx` + +This is the single source of truth — the Next.js web app under +`web/` reads this directory directly to build the published docs +site. New pages go here, period. + +When you add a new page: + +- Create `web/content/docs/{slug}.mdx` with the standard frontmatter + (`title:` and `description:`). +- Add an entry to the navigation in `web/lib/docs-nav.ts` under the + appropriate group, OR to the `ALL_SLUGS` "hidden but routable" + list if it shouldn't appear in the sidebar. + +That's it. Don't create or update files in the top-level `docs/` +directory. + +## The top-level `docs/` directory is legacy + +It contains a partial mirror of a handful of MDX pages converted to +plain markdown. It was originally maintained as an "LLMs / CLI users +/ GitHub readers" alternate, but it drifted out of sync long ago +(5 pages survive vs 40+ in `web/content/docs/`) and is no longer +authoritative. Do not add new files to it, and do not "mirror" your +MDX changes into it. + +Existing pages there will be cleaned up separately. Treat the +directory as read-only legacy until that happens. diff --git a/web/content/docs/reference-broker-api.mdx b/web/content/docs/reference-broker-api.mdx new file mode 100644 index 000000000..5af68886b --- /dev/null +++ b/web/content/docs/reference-broker-api.mdx @@ -0,0 +1,247 @@ +--- +title: 'Broker HTTP / WS API' +description: 'Reference for the listen API that the broker exposes for dashboards, the CLI, and custom integrations.' +--- + +The broker daemon (`agent-relay-broker`) exposes an HTTP + WebSocket +listen API once it's running. This is the surface that the dashboard, +the `agent-relay` CLI, the SDKs, and any custom integration use to +spawn agents, inject input into PTYs, observe live events, and +shut the broker down. + +## Base URL and port + +The listen API binds to the port you pass to `agent-relay up` (default +**3888**), on `127.0.0.1` by default. Bind to a non-loopback address +with `--api-bind` if you need remote access: + +```bash +agent-relay up --port 3888 # local only +agent-relay up --port 3888 --api-bind 0.0.0.0 # accept remote +``` + +All routes below live under `http://:`. + +## Authentication + +The broker requires an API key on every protected route. Pass it as +either header: + +``` +X-API-Key: +Authorization: Bearer +``` + +The expected token is read from the `RELAY_BROKER_API_KEY` environment +variable. If that variable is unset, the broker runs **unauthenticated** +— protected routes accept any request. In production, always set it. + +The only route exempt from auth is `GET /health`. + +## Routes + +### Health and configuration + +| Method | Path | Purpose | +| ------ | ---- | ------- | +| `GET` | `/health` | Liveness probe + workspace/startup status. Unauthenticated. | +| `GET` | `/api/session` | Broker version, protocol version, persist/ephemeral mode, uptime. | +| `POST` | `/api/session/renew` | Renew the broker lease (persist mode only). | +| `GET` | `/api/config` | Relaycast workspace key and workspace memberships. | +| `GET` | `/api/metrics` | Broker metrics. Optional `?agent=` filter. | +| `GET` | `/api/status` | Aggregate broker status. | +| `GET` | `/api/crash-insights` | Recent crash diagnostics. | +| `GET` | `/api/history/stats` | Stub message-history counters. | +| `POST` | `/api/preflight` | Check that each `{ name, cli }` agent can be spawned. | +| `POST` | `/api/shutdown` | Graceful broker shutdown. | + +### Agent lifecycle + +| Method | Path | Purpose | +| ------ | ---- | ------- | +| `POST` | `/api/spawn` | Spawn an agent as a child of the broker. | +| `GET` | `/api/spawned` | List running agents. | +| `DELETE` | `/api/spawned/{name}` | Release / kill an agent. Optional body `{ "reason": "..." }`. | +| `POST` | `/api/spawned/{name}/model` | Change an agent's model. Body `{ "model": "...", "timeoutMs"?: number }`. | +| `POST` | `/api/spawned/{name}/subscribe` | Subscribe an agent to channels. Body `{ "channels": ["..."] }`. | +| `POST` | `/api/spawned/{name}/unsubscribe` | Unsubscribe an agent from channels. Body `{ "channels": ["..."] }`. | +| `POST` | `/api/agents/by-name/{name}/interrupt` | Interrupt an agent (not yet implemented — returns 501). | + +#### `POST /api/spawn` + +Body (fields accept both camelCase and snake_case): + +```json +{ + "name": "Alice", + "cli": "claude", + "model": "sonnet", + "args": ["--no-color"], + "task": "Read the README and summarize it", + "channels": ["#general"], + "cwd": "/Users/me/project", + "team": "demo", + "shadowOf": null, + "shadowMode": null, + "continueFrom": null, + "idleThresholdSecs": 0, + "skipRelayPrompt": false, + "restartPolicy": null, + "agentToken": null +} +``` + +`name` and `cli` are required. Returns `{ "success": true, ... }` on +success or `{ "success": false, "error": "..." }` with a non-2xx +status on failure. + +### PTY interaction + +| Method | Path | Purpose | +| ------ | ---- | ------- | +| `POST` | `/api/input/{name}` | Send raw bytes to an agent's PTY stdin. Body `{ "data": "..." }`. | +| `POST` | `/api/resize/{name}` | Resize an agent's PTY. Body `{ "rows": , "cols": }`. | +| `POST` | `/api/send` | Inject a relay message into an agent. | + +#### `POST /api/input/{name}` + +This is the keystroke channel. The `data` string is written to the +target agent's stdin verbatim — escape characters are passed through. + +```bash +curl -X POST localhost:3888/api/input/Alice \ + -H "X-API-Key: $RELAY_BROKER_API_KEY" \ + -d '{"data":"hello\n"}' +``` + +Returns 404 if the agent isn't found. + +#### `POST /api/send` + +```json +{ + "to": "Alice", + "message": "Please review PR #837", + "from": "Bob", + "thread": "thr_abc", + "workspaceId": "ws_demo", + "mode": "wait" +} +``` + +The message text field accepts any of `message`, `text`, `body`, or +`content`. The `mode` field accepts `wait` (default — queue and +inject when the agent is idle) or `steer` (inject immediately, +even mid-response). Returns 504 on a 30s broker timeout; 404 if +the target agent isn't registered. + +### Event stream + +| Method | Path | Purpose | +| ------ | ---- | ------- | +| `GET` | `/ws` | WebSocket: subscribe to broker events. | +| `GET` | `/api/events/replay` | HTTP-based replay of events since a sequence number. | + +#### `GET /ws` + +Upgrade to WebSocket and you'll receive every broker event as a JSON +text frame. The broker pings every 30s; respond with pong to stay +connected. + +To resume after a disconnect without missing durable events, include +the last sequence number you saw: + +``` +ws://localhost:3888/ws?sinceSeq=12345 +``` + +If the requested sequence is older than the replay buffer's window, +the first frame you receive will be: + +```json +{ "kind": "replay_gap", "requestedSinceSeq": 12345, "oldestAvailable": 14000, "seq": 14999 } +``` + +> **Note:** Two event kinds are **ephemeral** and never stored in the +> replay buffer: `worker_stream` (PTY output chunks — high frequency) +> and `delivery_active` (in-flight delivery progress). If you +> disconnect, you cannot replay them. + +#### Event kinds emitted on `/ws` + +Durable (replayable via `?sinceSeq=...`): + +| `kind` | When it fires | +| ------ | ------------- | +| `agent_spawned` | An agent was successfully spawned. | +| `agent_exit` / `agent_exited` | Worker process exited. | +| `agent_idle` | Worker has been quiet past `idleThresholdSecs`. | +| `agent_restarting` / `agent_restarted` | Worker is being restarted under the restart policy. | +| `agent_released` | Worker was released by `/api/spawned/{name}` DELETE. | +| `agent_permanently_dead` | Worker exhausted restart attempts. | +| `worker_ready` | Worker finished startup and is ready for input. | +| `worker_error` | Worker emitted an error frame. | +| `relay_inbound` | Inbound relay message routed to an agent. | +| `delivery_ack` | Message delivery acknowledged. | +| `delivery_verified` | Echo verification confirmed the message was delivered. | +| `delivery_failed` | Message delivery failed. | +| `delivery_dropped` | Delivery was dropped (e.g. agent gone). | +| `delivery_retry` | Delivery is being retried. | + +Ephemeral (broadcast only, no replay): + +| `kind` | When it fires | +| ------ | ------------- | +| `worker_stream` | A chunk of stdout from a wrapped CLI. Payload contains `stream` (`"stdout"`) and `chunk` (the raw bytes — typically still ANSI-escaped). | +| `delivery_active` | High-frequency progress events for in-flight deliveries. | + +## Worked example: control a spawned agent end-to-end + +```bash +KEY="$RELAY_BROKER_API_KEY" + +# 1. Start the broker +agent-relay up --port 3888 & + +# 2. Spawn Alice running claude +curl -sX POST localhost:3888/api/spawn \ + -H "X-API-Key: $KEY" \ + -d '{"name":"Alice","cli":"claude"}' + +# 3. Stream her PTY output (filter for her worker_stream frames) +websocat ws://localhost:3888/ws \ + -H "X-API-Key: $KEY" \ + | jq -r 'select(.kind=="worker_stream" and .name=="Alice") | .chunk' + +# 4. Send a keystroke to Alice's CLI +curl -sX POST localhost:3888/api/input/Alice \ + -H "X-API-Key: $KEY" \ + -d '{"data":"hello\n"}' + +# 5. Inject a relay message +curl -sX POST localhost:3888/api/send \ + -H "X-API-Key: $KEY" \ + -d '{"to":"Alice","from":"Bob","message":"please review #837"}' + +# 6. Release her +curl -sX DELETE localhost:3888/api/spawned/Alice \ + -H "X-API-Key: $KEY" +``` + +## Error envelope + +Failed responses return a consistent envelope: + +```json +{ + "error": { + "code": "agent_not_found", + "message": "no agent named Alice", + "statusCode": 404 + } +} +``` + +Common codes: `agent_not_found` (404), `invalid_request` (400), +`unsupported_operation` (400), `unauthorized` (401), `request_failed` +(400), `internal_error` (500). diff --git a/web/lib/docs-nav.ts b/web/lib/docs-nav.ts index 1dcc38027..43a1eb988 100644 --- a/web/lib/docs-nav.ts +++ b/web/lib/docs-nav.ts @@ -58,6 +58,7 @@ export const docsNav: NavGroup[] = [ { title: 'Cloud commands', slug: 'cli-cloud-commands' }, { title: 'On the relay', slug: 'cli-on-the-relay' }, { title: 'CLI reference', slug: 'reference-cli' }, + { title: 'Broker HTTP / WS API', slug: 'reference-broker-api' }, ], }, {