diff --git a/.beads/issues.jsonl b/.beads/issues.jsonl index ddce6fa31..c01f29238 100644 --- a/.beads/issues.jsonl +++ b/.beads/issues.jsonl @@ -1,31 +1,51 @@ {"id":"agent-relay-0bn","title":"Dashboard doesn't show real-time connection status","description":"Dashboard shows agent status from messages but doesn't show live connection status (connected/disconnected). The daemon tracks this in agents.json but dashboard could show online/offline indicators with last-seen timestamps.","status":"open","priority":2,"issue_type":"feature","created_at":"2025-12-20T00:18:04.647965+01:00","updated_at":"2025-12-20T00:18:04.647965+01:00"} -{"id":"agent-relay-2z1","title":"ACK messages not used for reliability","description":"In connection.ts:114-116, ACK messages are accepted but not processed. The protocol supports reliable delivery with ACK/NACK but it's not implemented. Need to: (1) Track unACKed messages, (2) Implement retry logic, (3) Add configurable TTL for messages.","status":"open","priority":2,"issue_type":"feature","created_at":"2025-12-20T00:17:43.615251+01:00","updated_at":"2025-12-20T00:17:43.615251+01:00"} +{"id":"agent-relay-0uh","title":"Add session discovery for better status output","description":"Auto-discover relay sessions via 'tmux list-sessions'. Filter to relay-* sessions and extract agent names. Enhance 'agent-relay status' command output. See docs/TMUX_IMPROVEMENTS.md for implementation details.","status":"closed","priority":2,"issue_type":"feature","assignee":"LeadDev","created_at":"2025-12-20T21:28:49.058578+01:00","updated_at":"2025-12-20T21:36:23.10406+01:00","closed_at":"2025-12-20T21:36:23.10406+01:00"} +{"id":"agent-relay-1ek","title":"Add agent registry for persistence across restarts","description":"Store agent metadata in persistent registry (agents.json): id, name, cli, workingDirectory, firstSeen, lastSeen, messagesSent, messagesReceived. Enables agent history and stats. See docs/TMUX_IMPROVEMENTS.md for implementation details.","notes":"No progress today; agent registry idea remains open. Consider aligning with Swarm Mail 'agents' projection + last_active tracking.","status":"open","priority":3,"issue_type":"feature","created_at":"2025-12-20T21:28:50.235444+01:00","updated_at":"2025-12-20T22:00:47.791303+01:00"} +{"id":"agent-relay-2lw","title":"Add agent metadata tracking","description":"Track program, model, task description in agents.json. Better agent discovery.","status":"open","priority":2,"issue_type":"feature","created_at":"2025-12-20T21:36:19.741328+01:00","updated_at":"2025-12-20T21:36:19.741328+01:00"} +{"id":"agent-relay-2sn","title":"Competitive Analysis: mcp_agent_mail vs agent-relay","description":"","status":"closed","priority":2,"issue_type":"task","created_at":"2025-12-20T21:34:44.49366+01:00","updated_at":"2025-12-20T21:36:26.362391+01:00","closed_at":"2025-12-20T21:36:26.362391+01:00"} +{"id":"agent-relay-2uf","title":"Add message threading support","description":"@relay:Bob [thread:feature-123] pattern. Group related messages for better context.","notes":"Current state: thread parsing + protocol/storage support implemented (ParsedCommand.thread, SendPayload.thread, DB messages.thread + filter). Remaining: wire cmd.thread through tmux-wrapper sendRelayCommand -\u003e RelayClient.sendMessage(..., thread) and include thread in injected display/Inbox. See src/wrapper/tmux-wrapper.ts sendRelayCommand + handleIncomingMessage.","status":"open","priority":2,"issue_type":"feature","created_at":"2025-12-20T21:36:19.631448+01:00","updated_at":"2025-12-20T21:59:53.973954+01:00"} +{"id":"agent-relay-2z1","title":"ACK messages not used for reliability","description":"In connection.ts:114-116, ACK messages are accepted but not processed. The protocol supports reliable delivery with ACK/NACK but it's not implemented. Need to: (1) Track unACKed messages, (2) Implement retry logic, (3) Add configurable TTL for messages.","status":"closed","priority":2,"issue_type":"feature","assignee":"LeadDev","created_at":"2025-12-20T00:17:43.615251+01:00","updated_at":"2025-12-20T21:56:07.202292+01:00","closed_at":"2025-12-20T21:56:07.202292+01:00"} {"id":"agent-relay-37i","title":"Message deduplication uses in-memory Set without limits","description":"In tmux-wrapper.ts:65, sentMessageHashes is a Set that grows unbounded. For long-running sessions, this could cause memory issues. Add: (1) Max size with LRU eviction, (2) Time-based expiration, (3) Bloom filter alternative for memory efficiency.","status":"open","priority":2,"issue_type":"task","created_at":"2025-12-20T00:18:47.229988+01:00","updated_at":"2025-12-20T00:18:47.229988+01:00"} +{"id":"agent-relay-41f","title":"BUG: better-sqlite3 bindings fail on Node 25","description":"agent-relay read command fails with 'Could not locate the bindings file' on Node v25.2.1. Need to rebuild bindings or use compatible Node version.","status":"closed","priority":2,"issue_type":"bug","created_at":"2025-12-20T21:46:24.882216+01:00","updated_at":"2025-12-20T21:49:56.89756+01:00","closed_at":"2025-12-20T21:49:56.89756+01:00"} {"id":"agent-relay-47z","title":"Express 5 may have breaking changes from Express 4 patterns","description":"package.json uses express@5.2.1 which is a major version with breaking changes from Express 4. Verify: (1) Error handling middleware patterns, (2) Router behavior, (3) Body parsing (express.json vs body-parser).","status":"open","priority":2,"issue_type":"task","created_at":"2025-12-20T00:18:49.269841+01:00","updated_at":"2025-12-20T00:18:49.269841+01:00"} {"id":"agent-relay-4e0","title":"Fix message truncation - messages cut off at source","description":"Root cause found: parser.ts:40 inline regex only captures single line. Multi-line messages are split by parsePassThrough() at line 206. Fix options: (1) Allow continuation lines in inline format, (2) Use block format for multi-line, (3) Add heuristic to join lines until next @relay pattern.","status":"closed","priority":2,"issue_type":"bug","assignee":"MistyShelter","created_at":"2025-12-19T23:40:35.082717+01:00","updated_at":"2025-12-20T00:03:54.806087+01:00","closed_at":"2025-12-20T00:03:54.806087+01:00"} {"id":"agent-relay-4ft","title":"Merge project info into status command","description":"","status":"closed","priority":2,"issue_type":"task","assignee":"Pruner","created_at":"2025-12-19T21:59:52.685495+01:00","updated_at":"2025-12-19T22:06:44.276187+01:00","closed_at":"2025-12-19T22:06:44.276187+01:00"} +{"id":"agent-relay-51u","title":"Outgoing messages truncated before delivery","description":"Agent messages are being cut off mid-sentence. Examples: 'Updates for mcl/2z1:' and 'Signing off. Progress report:' - both end abruptly. May be related to capture-pane buffer limits, parser issues, or injection timing. Investigate: (1) capture-pane -S - scrollback limits, (2) parser line joining, (3) message storage truncation.","notes":"PROGRESS: Reproduced truncation in tmux wrapper/relay flow. Likely due to display truncation on injection length and terminal capture limitations. No code changes yet.","status":"open","priority":1,"issue_type":"bug","created_at":"2025-12-20T21:50:11.411952+01:00","updated_at":"2025-12-20T21:58:01.000293+01:00"} {"id":"agent-relay-52d","title":"Add metrics/observability for daemon","description":"No way to monitor daemon health, message throughput, or agent activity. Add: (1) /metrics endpoint for Prometheus, (2) Message count/rate stats, (3) Connection lifecycle events, (4) Error rate tracking.","status":"open","priority":2,"issue_type":"feature","created_at":"2025-12-20T00:18:48.378728+01:00","updated_at":"2025-12-20T00:18:48.378728+01:00"} {"id":"agent-relay-5af","title":"Hook doesn't integrate with daemon-based messaging","description":"hooks/inbox-check/hook.ts reads from file-based inbox but the daemon uses SQLite. When using daemon mode, the hook won't see messages. Need to: (1) Query daemon storage, (2) Or ensure inbox files are written in daemon mode too.","status":"open","priority":2,"issue_type":"bug","created_at":"2025-12-20T00:18:35.503078+01:00","updated_at":"2025-12-20T00:18:35.503078+01:00"} +{"id":"agent-relay-5fa","title":"Add exponential backoff for daemon reconnection","description":"Implement graceful reconnection with exponential backoff delays [100, 500, 1000, 2000, 5000ms]. After max attempts, operate offline gracefully. See docs/TMUX_IMPROVEMENTS.md for implementation details.","status":"closed","priority":2,"issue_type":"feature","assignee":"LeadDev","created_at":"2025-12-20T21:28:48.055013+01:00","updated_at":"2025-12-20T21:33:42.229756+01:00","closed_at":"2025-12-20T21:33:42.229756+01:00"} {"id":"agent-relay-5g0","title":"Heartbeat timeout could be more configurable","description":"In connection.ts:196, heartbeat timeout is hardcoded as 2x heartbeatMs. This should be independently configurable. Also, heartbeat failures immediately kill the connection - could implement exponential backoff for transient issues.","status":"open","priority":2,"issue_type":"task","created_at":"2025-12-20T00:18:03.556614+01:00","updated_at":"2025-12-20T00:18:03.556614+01:00"} +{"id":"agent-relay-68a","title":"Ask pattern: request/response messaging (replyTo + deferred)","description":"Implement an RPC-like ask() over agent-relay: sender creates a short-lived deferred handle, sends message with replyTo, receiver responds resolving the deferred. Similar to swarm-mail DurableDeferred + ask/respond.","acceptance_criteria":"- ask() returns response or times out\\n- Deferreds are persisted with TTL\\n- Works across process restarts/reconnects","status":"open","priority":2,"issue_type":"task","created_at":"2025-12-20T21:44:33.930531+01:00","updated_at":"2025-12-20T21:44:33.930531+01:00","labels":["coordination","protocol"]} +{"id":"agent-relay-6nx","title":"Add activity state tracking for better injection timing","description":"Track active/idle/disconnected state with timestamps. When session goes idle (30s no activity), trigger message injection opportunity. Improves injection timing vs current fixed 1.5s wait. See docs/TMUX_IMPROVEMENTS.md for implementation details.","status":"closed","priority":2,"issue_type":"feature","assignee":"LeadDev","created_at":"2025-12-20T21:28:46.993856+01:00","updated_at":"2025-12-20T21:33:42.223042+01:00","closed_at":"2025-12-20T21:33:42.223042+01:00"} {"id":"agent-relay-6ny","title":"Fix message truncation: store full message in SQLite first, include ID in relay","description":"","status":"closed","priority":1,"issue_type":"bug","created_at":"2025-12-19T22:04:36.168862+01:00","updated_at":"2025-12-19T22:08:28.207532+01:00","closed_at":"2025-12-19T22:08:28.207532+01:00"} {"id":"agent-relay-6rz","title":"Message injection timing can cause race conditions","description":"In tmux-wrapper.ts:564-569, injection waits for 'idle' (1.5s since last output) but this is fragile. If agent produces output during injection, messages could interleave. Consider: (1) Input buffer detection, (2) Bracketed paste mode, (3) Agent-specific injection strategies.","status":"open","priority":2,"issue_type":"bug","created_at":"2025-12-20T00:18:17.76865+01:00","updated_at":"2025-12-20T00:18:17.76865+01:00"} {"id":"agent-relay-7bp","title":"Memory storage adapter has fixed 1000 message limit","description":"In storage/adapter.ts:60-63, MemoryStorageAdapter hard-codes 1000 message limit. This should be configurable and potentially use LRU eviction instead of slice.","status":"open","priority":2,"issue_type":"task","created_at":"2025-12-20T00:17:46.132735+01:00","updated_at":"2025-12-20T00:17:46.132735+01:00"} +{"id":"agent-relay-7tu","title":"Fix storage portability: better-sqlite3 native binding failures","description":"The read command fails in some Node versions (e.g., Node 25) due to missing better-sqlite3 bindings. Decide path: pin supported Node LTS + ensure rebuild, or replace with a non-native option (e.g., Node sqlite, wasm, or a pure JS adapter).","design":"Implement driver selection/fallback: try better-sqlite3, fallback to node:sqlite DatabaseSync when bindings missing. Add env override AGENT_RELAY_SQLITE_DRIVER=node|better-sqlite3. Convert queries to positional params so both drivers share code.","acceptance_criteria":"- dist CLI read works on supported Node versions\\n- CI/build docs reflect supported runtime or replacement adapter","notes":"Implemented sqlite portability: dynamic better-sqlite3 import + fallback to node:sqlite (env override AGENT_RELAY_SQLITE_DRIVER). Updated sqlite adapter to use positional params for compatibility; added/updated tests; npx vitest run passes (216).","status":"closed","priority":1,"issue_type":"task","created_at":"2025-12-20T21:44:57.868091+01:00","updated_at":"2025-12-20T21:56:13.384399+01:00","closed_at":"2025-12-20T21:56:13.3844+01:00","labels":["build","storage"]} {"id":"agent-relay-7yo","title":"Update CLAUDE.md with new CLI commands","description":"","status":"closed","priority":2,"issue_type":"task","created_at":"2025-12-19T22:00:02.661859+01:00","updated_at":"2025-12-19T22:07:27.766701+01:00","closed_at":"2025-12-19T22:07:27.766701+01:00","dependencies":[{"issue_id":"agent-relay-7yo","depends_on_id":"agent-relay-85z","type":"blocks","created_at":"2025-12-19T22:00:25.731921+01:00","created_by":"daemon"},{"issue_id":"agent-relay-7yo","depends_on_id":"agent-relay-4ft","type":"blocks","created_at":"2025-12-19T22:00:25.772241+01:00","created_by":"daemon"},{"issue_id":"agent-relay-7yo","depends_on_id":"agent-relay-bd0","type":"blocks","created_at":"2025-12-19T22:00:25.80776+01:00","created_by":"daemon"},{"issue_id":"agent-relay-7yo","depends_on_id":"agent-relay-f3q","type":"blocks","created_at":"2025-12-19T22:00:25.843131+01:00","created_by":"daemon"}]} {"id":"agent-relay-85z","title":"Merge dashboard into start command","description":"","status":"closed","priority":2,"issue_type":"task","assignee":"InterfaceManager","created_at":"2025-12-19T21:59:51.61716+01:00","updated_at":"2025-12-19T22:06:44.27487+01:00","closed_at":"2025-12-19T22:06:44.27487+01:00"} {"id":"agent-relay-8ff","title":"Add agent list command to CLI","description":"CLI has up/down/status/read but no way to list connected agents or see message history from command line. Add: (1) agent-relay agents - list connected agents, (2) agent-relay history - show recent messages, (3) agent-relay send \u003cagent\u003e \u003cmsg\u003e - send from CLI.","status":"open","priority":2,"issue_type":"feature","created_at":"2025-12-20T00:18:34.400432+01:00","updated_at":"2025-12-20T00:18:34.400432+01:00"} {"id":"agent-relay-8nx","title":"SIGINT/SIGTERM handlers don't await cleanup","description":"In cli/index.ts:74-77 and daemon setup, SIGINT handlers call stop() but process.exit(0) runs immediately. The stop() is async but not awaited, potentially leaving socket files or incomplete shutdown.","status":"open","priority":2,"issue_type":"bug","created_at":"2025-12-20T00:18:19.189514+01:00","updated_at":"2025-12-20T00:18:19.189514+01:00"} -{"id":"agent-relay-8z1","title":"Add CLI tests for new command structure","description":"","status":"in_progress","priority":2,"issue_type":"task","created_at":"2025-12-19T22:00:04.561793+01:00","updated_at":"2025-12-19T22:19:19.658435+01:00","dependencies":[{"issue_id":"agent-relay-8z1","depends_on_id":"agent-relay-85z","type":"blocks","created_at":"2025-12-19T22:00:27.632396+01:00","created_by":"daemon"},{"issue_id":"agent-relay-8z1","depends_on_id":"agent-relay-4ft","type":"blocks","created_at":"2025-12-19T22:00:27.671868+01:00","created_by":"daemon"},{"issue_id":"agent-relay-8z1","depends_on_id":"agent-relay-bd0","type":"blocks","created_at":"2025-12-19T22:00:27.713889+01:00","created_by":"daemon"},{"issue_id":"agent-relay-8z1","depends_on_id":"agent-relay-f3q","type":"blocks","created_at":"2025-12-19T22:00:27.752052+01:00","created_by":"daemon"}]} +{"id":"agent-relay-8z1","title":"Add CLI tests for new command structure","description":"","status":"open","priority":2,"issue_type":"task","created_at":"2025-12-19T22:00:04.561793+01:00","updated_at":"2025-12-20T21:57:43.401385+01:00","dependencies":[{"issue_id":"agent-relay-8z1","depends_on_id":"agent-relay-85z","type":"blocks","created_at":"2025-12-19T22:00:27.632396+01:00","created_by":"daemon"},{"issue_id":"agent-relay-8z1","depends_on_id":"agent-relay-4ft","type":"blocks","created_at":"2025-12-19T22:00:27.671868+01:00","created_by":"daemon"},{"issue_id":"agent-relay-8z1","depends_on_id":"agent-relay-bd0","type":"blocks","created_at":"2025-12-19T22:00:27.713889+01:00","created_by":"daemon"},{"issue_id":"agent-relay-8z1","depends_on_id":"agent-relay-f3q","type":"blocks","created_at":"2025-12-19T22:00:27.752052+01:00","created_by":"daemon"}]} +{"id":"agent-relay-9uy","title":"Durable inbox state (unread/read/acked per recipient)","description":"Implement per-recipient inbox state (unread/read/acked) similar to swarm-mail’s message_recipients projection. Persist and query unread/urgent; expose to dashboard + CLI read/lookup.","acceptance_criteria":"- Can mark message read and acked per recipient\\n- Dashboard/API can query unreadOnly and urgentOnly\\n- State survives daemon restart","status":"open","priority":1,"issue_type":"task","created_at":"2025-12-20T21:43:46.201965+01:00","updated_at":"2025-12-20T21:43:46.201965+01:00","labels":["durability","storage"]} {"id":"agent-relay-ahe","title":"Session resume not implemented - RESUME_TOO_OLD always sent","description":"In connection.ts:140-143, session resume tokens are received but not persisted or validated. The server always responds with RESUME_TOO_OLD. This breaks the resume capability advertised in the protocol. Need to implement token persistence and session state recovery.","status":"open","priority":2,"issue_type":"bug","created_at":"2025-12-20T00:17:30.605649+01:00","updated_at":"2025-12-20T00:17:30.605649+01:00"} {"id":"agent-relay-bd0","title":"Consolidate team-* commands under team subcommand","description":"","status":"closed","priority":2,"issue_type":"task","created_at":"2025-12-19T21:59:54.102815+01:00","updated_at":"2025-12-19T22:06:16.220013+01:00","closed_at":"2025-12-19T22:06:16.220013+01:00"} {"id":"agent-relay-bei","title":"Gemini (CleverBeacon) bash syntax errors - unexpected EOF","description":"Gemini agent gets bash errors: 'unexpected EOF while looking for matching quote' and 'syntax error: unexpected end of file' when running shell commands. Possibly related to quote escaping in the relay wrapper.","status":"closed","priority":2,"issue_type":"bug","assignee":"GraniteElk","created_at":"2025-12-19T23:40:36.464079+01:00","updated_at":"2025-12-19T23:45:46.05609+01:00","closed_at":"2025-12-19T23:45:46.05609+01:00"} {"id":"agent-relay-cli-simplify","title":"Simplify agent-relay CLI interface","description":"The CLI has too many commands. Consolidate into a clean, simple interface:\n\n1. `relay start` should also kick off the dashboard automatically\n2. Merge redundant team-* commands\n3. Remove rarely-used commands or make them subcommands\n4. Target: 5-7 top-level commands max\n\nCurrent commands to evaluate:\n- start, stop, status (keep)\n- wrap (keep)\n- project (keep or merge into status)\n- send (keep)\n- team-setup, team-status, team-send, team-check, team-listen, team-start (consolidate)\n- msg-read (make subcommand or integrate)\n- dashboard (merge into start)","status":"closed","priority":1,"issue_type":"task","created_at":"2025-12-19T22:00:00Z","updated_at":"2025-12-19T22:08:44.107992+01:00","closed_at":"2025-12-19T22:08:44.107992+01:00"} +{"id":"agent-relay-d07","title":"Message metadata: subject/thread/importance/replyTo","description":"Add first-class metadata fields to messages (subject, threadId, importance, ackRequired, replyTo/correlationId). Carry them via SendPayload.data and/or [[RELAY]] blocks; persist in storage; update injection formatting + dashboard filters.","acceptance_criteria":"- Can send/receive messages with threadId + importance\\n- Dashboard can filter/group by threadId\\n- Injection shows importance/thread hint without breaking TUIs","status":"open","priority":2,"issue_type":"task","created_at":"2025-12-20T21:44:18.094598+01:00","updated_at":"2025-12-20T21:44:18.094598+01:00","labels":["protocol","ux"]} +{"id":"agent-relay-d2f","title":"Add optional Git audit trail","description":"Commit messages to .agent-relay/messages/ directory. Recoverable history, compliance-friendly.","status":"open","priority":2,"issue_type":"feature","created_at":"2025-12-20T21:36:19.689795+01:00","updated_at":"2025-12-20T21:36:19.689795+01:00"} +{"id":"agent-relay-dd8","title":"Add file reservation system (from mcp_agent_mail)","description":"Advisory locks with TTL, @relay:lock pattern, pre-commit hook integration. Critical for multi-agent file editing.","notes":"No progress today; added as future work from swarm-mail comparison: implement reservations/locks with TTL + conflict detection. See swarm-mail reservations + locks patterns.","status":"open","priority":2,"issue_type":"feature","created_at":"2025-12-20T21:36:19.573504+01:00","updated_at":"2025-12-20T22:00:37.608017+01:00"} {"id":"agent-relay-dyr","title":"No authentication between agents","description":"Any process can connect to the daemon socket and impersonate any agent name. Consider: (1) Per-agent tokens/secrets, (2) Socket permission checks, (3) Optional TLS for non-localhost deployments.","status":"open","priority":2,"issue_type":"feature","created_at":"2025-12-20T00:18:16.215889+01:00","updated_at":"2025-12-20T00:18:16.215889+01:00"} {"id":"agent-relay-f3q","title":"Make msg-read a subcommand of send or message","description":"","status":"closed","priority":2,"issue_type":"task","created_at":"2025-12-19T21:59:55.123772+01:00","updated_at":"2025-12-19T22:04:51.112763+01:00","closed_at":"2025-12-19T22:04:51.112763+01:00"} -{"id":"agent-relay-ghy","title":"Team config: auto-spawn agents or auto-assign names from teams.json","description":"When a project has a teams.json config file, agent-relay up should either:\n\n1. **Auto-spawn option**: Automatically kick off terminal sessions for each agent defined in teams.json\n2. **Auto-assign option**: When users manually start sessions with `agent-relay -n \u003cname\u003e claude`, validate the name against teams.json and auto-assign roles/permissions\n\n## teams.json format\n```json\n{\n \"team\": \"my-project\",\n \"agents\": [\n {\"name\": \"Coordinator\", \"cli\": \"claude\", \"role\": \"coordinator\"},\n {\"name\": \"LeadDev\", \"cli\": \"claude\", \"role\": \"developer\"},\n {\"name\": \"Reviewer\", \"cli\": \"claude\", \"role\": \"reviewer\"}\n ],\n \"autoSpawn\": false\n}\n```\n\n## Behavior\n- If `autoSpawn: true`, `agent-relay up` spawns tmux sessions for each agent\n- If `autoSpawn: false`, validate names against config when agents connect\n- Store teams.json in project root or .agent-relay/teams.json\n\n## Commands\n- `agent-relay up --spawn` - force spawn all agents\n- `agent-relay up --no-spawn` - just start daemon, manual agent starts","status":"in_progress","priority":2,"issue_type":"feature","created_at":"2025-12-19T23:13:02.482971+01:00","updated_at":"2025-12-19T23:56:21.713993+01:00"} +{"id":"agent-relay-fb3","title":"Add configurable relay prefix for Gemini compatibility","description":"Gemini CLI uses @ for file references, conflicting with @relay:. Add --prefix flag to CLI and relayPrefix config option. Auto-detect CLI type and use appropriate default (\u003e\u003e for Gemini, @relay: for Claude/Codex). Update parser to use dynamic prefix. See docs/TMUX_IMPROVEMENTS.md for full implementation plan.","status":"closed","priority":2,"issue_type":"feature","assignee":"Coordinator","created_at":"2025-12-20T21:28:52.685002+01:00","updated_at":"2025-12-20T21:40:13.066766+01:00","closed_at":"2025-12-20T21:40:13.066766+01:00"} +{"id":"agent-relay-ghy","title":"Team config: auto-spawn agents or auto-assign names from teams.json","description":"When a project has a teams.json config file, agent-relay up should either:\n\n1. **Auto-spawn option**: Automatically kick off terminal sessions for each agent defined in teams.json\n2. **Auto-assign option**: When users manually start sessions with `agent-relay -n \u003cname\u003e claude`, validate the name against teams.json and auto-assign roles/permissions\n\n## teams.json format\n```json\n{\n \"team\": \"my-project\",\n \"agents\": [\n {\"name\": \"Coordinator\", \"cli\": \"claude\", \"role\": \"coordinator\"},\n {\"name\": \"LeadDev\", \"cli\": \"claude\", \"role\": \"developer\"},\n {\"name\": \"Reviewer\", \"cli\": \"claude\", \"role\": \"reviewer\"}\n ],\n \"autoSpawn\": false\n}\n```\n\n## Behavior\n- If `autoSpawn: true`, `agent-relay up` spawns tmux sessions for each agent\n- If `autoSpawn: false`, validate names against config when agents connect\n- Store teams.json in project root or .agent-relay/teams.json\n\n## Commands\n- `agent-relay up --spawn` - force spawn all agents\n- `agent-relay up --no-spawn` - just start daemon, manual agent starts","status":"open","priority":2,"issue_type":"feature","created_at":"2025-12-19T23:13:02.482971+01:00","updated_at":"2025-12-20T21:57:43.362401+01:00"} {"id":"agent-relay-go9","title":"PostgreSQL storage adapter not implemented","description":"In storage/adapter.ts:152-162, PostgreSQL is listed as a storage option but throws 'not yet implemented'. For production multi-node deployments, SQLite won't scale. Implement PostgreSQL adapter for distributed storage.","status":"open","priority":2,"issue_type":"feature","created_at":"2025-12-20T00:17:45.065487+01:00","updated_at":"2025-12-20T00:17:45.065487+01:00"} +{"id":"agent-relay-hgd","title":"Reliable delivery: resume/replay via cursor/checkpoint","description":"Make ACK/RESUME real: persist a per-agent delivery cursor (last delivered/acked seq) and on reconnect replay missed messages from storage. Use existing delivery.seq as stream position, similar to swarm-mail DurableCursor checkpointing.","acceptance_criteria":"- Client can reconnect and receive missed messages\\n- Duplicate suppression is deterministic (id/seq based)\\n- Cursor persists across daemon restarts","status":"open","priority":1,"issue_type":"task","created_at":"2025-12-20T21:44:02.016428+01:00","updated_at":"2025-12-20T21:44:02.016428+01:00","labels":["durability","protocol"]} {"id":"agent-relay-hks","title":"Increase test coverage for daemon/server.ts, dashboard, and CLI","description":"Current coverage is only 39% overall. Key files with 0% coverage: daemon/server.ts, dashboard/server.ts, cli/index.ts, wrapper/client.ts, wrapper/tmux-wrapper.ts, utils/project-namespace.ts. Need integration tests for the daemon startup/shutdown lifecycle and CLI commands.","status":"open","priority":2,"issue_type":"task","created_at":"2025-12-20T00:17:29.603137+01:00","updated_at":"2025-12-20T00:17:29.603137+01:00"} +{"id":"agent-relay-j9z","title":"Message injection corrupts human input in progress","description":"When a relay message arrives while the human is actively typing, the injection (Esc + Ctrl-U + message + Enter) destroys their partial input. Need to detect active input state before injecting. Options: (1) Check tmux pane input buffer, (2) Queue messages until prompt detected, (3) Use tmux display-message instead of send-keys for non-intrusive notification, (4) Add visual indicator that message is pending.","design":"Mitigation ideas: 1) Prefer tmux non-intrusive notify: tmux display-message (and always write to inbox/storage) instead of send-keys when user is typing. 2) Gate send-keys injection on 'safe' state: pane idle AND prompt detected AND no partial input; otherwise queue. 3) If queued, show pending count via display-message.","status":"open","priority":1,"issue_type":"bug","created_at":"2025-12-20T21:48:03.280298+01:00","updated_at":"2025-12-20T22:00:04.196161+01:00","dependencies":[{"issue_id":"agent-relay-j9z","depends_on_id":"agent-relay-6rz","type":"blocks","created_at":"2025-12-20T21:48:10.118963+01:00","created_by":"daemon"}]} {"id":"agent-relay-kzw","title":"Project namespace uses /tmp which can be cleared on reboot","description":"In utils/project-namespace.ts:13, BASE_DIR is /tmp/agent-relay. On macOS/Linux, /tmp is cleared on reboot, losing all message history. Consider: (1) XDG_DATA_HOME fallback, (2) ~/.agent-relay option, (3) Per-project .agent-relay folder.","status":"open","priority":2,"issue_type":"task","created_at":"2025-12-20T00:18:33.224965+01:00","updated_at":"2025-12-20T00:18:33.224965+01:00"} -{"id":"agent-relay-mcl","title":"Gemini agent cannot send messages properly","description":"Gemini agent cannot autonomously send relay messages. When Gemini outputs @relay: patterns, it enters shell mode instead of being intercepted by the relay wrapper. Manual typing works fine. This suggests a PTY parsing issue specific to how Gemini outputs the @relay: pattern - possibly timing, escaping, or output buffering differences compared to Claude.","status":"open","priority":2,"issue_type":"bug","created_at":"2025-12-20T00:29:15.797552+01:00","updated_at":"2025-12-20T00:31:48.062831+01:00"} +{"id":"agent-relay-l99","title":"Add output filtering for cleaner logs","description":"Filter noisy patterns from logs: thinking indicators [1/418], empty ANSI-only lines, etc. Add config.filterLogs option. See docs/TMUX_IMPROVEMENTS.md for implementation details.","status":"open","priority":4,"issue_type":"feature","created_at":"2025-12-20T21:28:51.199736+01:00","updated_at":"2025-12-20T21:28:51.199736+01:00","dependencies":[{"issue_id":"agent-relay-l99","depends_on_id":"agent-relay-1ek","type":"blocks","created_at":"2025-12-20T21:29:26.987526+01:00","created_by":"daemon"}]} +{"id":"agent-relay-mcl","title":"Gemini agent cannot send messages properly","description":"Gemini agent cannot autonomously send relay messages. When Gemini outputs @relay: patterns, it enters shell mode instead of being intercepted by the relay wrapper. Manual typing works fine. This suggests a PTY parsing issue specific to how Gemini outputs the @relay: pattern - possibly timing, escaping, or output buffering differences compared to Claude.","status":"closed","priority":2,"issue_type":"bug","assignee":"LeadDev","created_at":"2025-12-20T00:29:15.797552+01:00","updated_at":"2025-12-20T21:56:07.20143+01:00","closed_at":"2025-12-20T21:56:07.20143+01:00"} +{"id":"agent-relay-oiw","title":"BUG: No per-agent inbox files created","description":"Expected /tmp/agent-relay/\u003cproject\u003e/\u003cagent\u003e/inbox.md but only team/agents.json exists. Agents cannot read their inbox via file.","status":"open","priority":2,"issue_type":"bug","created_at":"2025-12-20T21:46:24.826442+01:00","updated_at":"2025-12-20T21:46:24.826442+01:00"} {"id":"agent-relay-sio","title":"Add graceful degradation when relay daemon is unavailable","description":"In wrapper/tmux-wrapper.ts:195-197, daemon connection failures are silently caught. Consider: (1) Periodic reconnection attempts, (2) Queueing messages for later delivery, (3) Visual indicator in terminal showing connection status.","status":"open","priority":2,"issue_type":"feature","created_at":"2025-12-20T00:17:32.600333+01:00","updated_at":"2025-12-20T00:17:32.600333+01:00"} {"id":"agent-relay-ucw","title":"Dashboard: multi-project navigation or dynamic port allocation","description":"When the dashboard is already running for one project, users should be able to either: (1) Navigate between different projects in a single dashboard view, OR (2) Start a new dashboard instance on an automatically allocated available port for a different project. Currently if a dashboard is running, starting another project conflicts.","status":"closed","priority":2,"issue_type":"feature","created_at":"2025-12-19T23:40:18.667766+01:00","updated_at":"2025-12-20T00:18:05.10495+01:00","closed_at":"2025-12-20T00:18:05.10495+01:00"} {"id":"agent-relay-v57","title":"No message expiration/cleanup in SQLite storage","description":"SQLite adapter has no TTL or cleanup mechanism for old messages. Over time, the database will grow unbounded. Add: (1) Configurable message retention period, (2) Automatic cleanup job, (3) Index on ts column is there but no cleanup uses it.","status":"open","priority":2,"issue_type":"task","created_at":"2025-12-20T00:18:01.86766+01:00","updated_at":"2025-12-20T00:18:01.86766+01:00"} diff --git a/README.md b/README.md index 79a9d86d8..b5834a5b0 100644 --- a/README.md +++ b/README.md @@ -96,6 +96,8 @@ agent-relay read abc123 `agent-relay up` starts a web dashboard at http://localhost:3888 +![Agent Relay Dashboard](dashboard.png) + ## Troubleshooting | Issue | Solution | @@ -112,6 +114,16 @@ cd agent-relay npm install && npm run build ``` +## Inspiration + +This project was inspired by some excellent work in the multi-agent coordination space: + +- **[mcp_agent_mail](https://github.com/Dicklesworthstone/mcp_agent_mail)** - A brilliant MCP-based approach to agent messaging with file-based inboxes and structured message handling. Great patterns for durable, asynchronous agent communication. + +- **[swarm-tools / swarm-mail](https://github.com/joelhooks/swarm-tools)** - An exceptional event-sourced coordination system with durable cursors, locks, deferred responses, and ask/respond patterns. The gold standard for robust multi-agent workflows with full audit trails. + +Both projects informed our thinking around durability, message threading, and coordination primitives. Check them out! + ## License MIT diff --git a/dashboard.png b/dashboard.png new file mode 100644 index 000000000..3faa94ca1 Binary files /dev/null and b/dashboard.png differ diff --git a/docs/DESIGN_V2.md b/docs/DESIGN_V2.md new file mode 100644 index 000000000..b63d515d5 --- /dev/null +++ b/docs/DESIGN_V2.md @@ -0,0 +1,1079 @@ +# Agent-Relay v2 Design Document + +## Overview + +This document outlines improvements to agent-relay while preserving its core philosophy: **simple, transparent agent-to-agent communication via terminal output patterns**. + +The `@relay:` pattern is the killer feature. Agents communicate naturally by just printing text. No APIs, no SDKs, no special integrations. This must remain the foundation. + +--- + +## Current Pain Points + +### 1. Ephemeral Storage (`/tmp`) +- Data lives in `/tmp/agent-relay//` +- Cleared on reboot (macOS/Linux) +- Message history lost unexpectedly + +### 2. Dead Code +- ACK/NACK protocol defined but not implemented +- Session resume tokens always return `RESUME_TOO_OLD` +- PostgreSQL adapter throws "not implemented" + +### 3. Memory Leaks +- `sentMessageHashes` Set grows unbounded +- Long-running sessions will OOM + +### 4. Polling Overhead +- `capture-pane` every 200ms consumes CPU +- Latency up to 200ms for message detection + +### 5. Fragile Injection Timing +- 1.5s idle detection is a heuristic +- Race conditions if agent outputs during injection + +--- + +## Design Principles + +1. **Keep it simple** - Every feature must justify its complexity +2. **Terminal-native** - Users stay in tmux, not a browser +3. **Pattern-based** - `@relay:` is the API +4. **Zero config** - Works out of the box +5. **Debuggable** - Easy to understand what's happening + +--- + +## Proposed Changes + +### Phase 1: Foundation Fixes + +#### 1.1 Persistent Storage Location + +Move from `/tmp` to XDG-compliant location: + +``` +~/.local/share/agent-relay/ # XDG_DATA_HOME fallback +├── projects/ +│ └── / +│ ├── relay.sock # Unix socket +│ ├── messages.db # SQLite +│ └── agents.json # Connected agents +└── config.json # Global settings (optional) +``` + +**Migration path:** +- Check for existing `/tmp/agent-relay/` data on startup +- Offer one-time migration prompt +- Fall back to new location for fresh installs + +#### 1.2 Remove Dead Code + +Delete these unimplemented features: + +| Feature | Location | Action | +|---------|----------|--------| +| ACK handling | `connection.ts:114-116` | Remove | +| Resume tokens | `connection.ts:140-143` | Remove | +| PostgreSQL adapter | `storage/adapter.ts:152-162` | Remove | +| Topic subscriptions | `router.ts` | Keep but mark experimental | + +**Protocol simplification:** +```typescript +// Before: 10 message types +type MessageType = 'HELLO' | 'WELCOME' | 'SEND' | 'DELIVER' | 'ACK' | + 'PING' | 'PONG' | 'SUBSCRIBE' | 'UNSUBSCRIBE' | 'ERROR' | 'BYE'; + +// After: 6 message types +type MessageType = 'HELLO' | 'WELCOME' | 'SEND' | 'DELIVER' | + 'PING' | 'PONG' | 'ERROR'; +``` + +#### 1.3 Fix Memory Leak + +Replace unbounded Set with LRU cache: + +```typescript +// Before +private sentMessageHashes: Set = new Set(); + +// After +import { LRUCache } from 'lru-cache'; + +private sentMessageHashes = new LRUCache({ + max: 10000, // Max 10k unique messages tracked + ttl: 1000 * 60 * 60, // Expire after 1 hour +}); +``` + +#### 1.4 Simplify Binary Protocol + +Replace 4-byte length prefix with newline-delimited JSON: + +```typescript +// Before: Binary framing +[4-byte length][JSON payload] + +// After: NDJSON (newline-delimited JSON) +{"v":1,"type":"SEND","to":"Bob","payload":{"body":"Hello"}}\n +{"v":1,"type":"DELIVER","from":"Alice","payload":{"body":"Hello"}}\n +``` + +**Benefits:** +- Human-readable when debugging (`nc -U relay.sock`) +- Simpler parser (~20 lines vs ~50 lines) +- Standard format (NDJSON) + +**Trade-off:** Messages cannot contain literal newlines in body. Since we already sanitize newlines for injection (`replace(/[\r\n]+/g, ' ')`), this is acceptable. + +--- + +### Phase 2: Reliability Improvements + +#### 2.1 Improved Injection Strategy + +Replace time-based idle detection with input buffer detection: + +```typescript +// Current: Wait 1.5s after last output (fragile) +if (Date.now() - lastOutputTime > 1500) { + inject(); +} + +// Proposed: Check if input line is empty +async function isInputClear(): Promise { + // Capture current pane content + const { stdout } = await execAsync( + `tmux capture-pane -t ${session} -p -J` + ); + const lines = stdout.split('\n'); + const lastLine = lines[lines.length - 1] || ''; + + // Check if last line is just a prompt (no partial input) + return /^[>$%#➜]\s*$/.test(lastLine); +} +``` + +#### 2.2 Bracketed Paste Mode + +Use bracketed paste for safer injection: + +```typescript +// Wrap injection in bracketed paste markers +const PASTE_START = '\x1b[200~'; +const PASTE_END = '\x1b[201~'; + +async function injectSafe(text: string): Promise { + await sendKeysLiteral(PASTE_START + text + PASTE_END); + await sendKeys('Enter'); +} +``` + +**Benefits:** +- Prevents shell interpretation of special characters +- Atomic paste (no interleaving) +- Supported by most modern terminals/shells + +#### 2.3 Message Queue for Offline Agents + +Queue messages when target agent is disconnected: + +```typescript +interface QueuedMessage { + id: string; + from: string; + to: string; + payload: SendPayload; + queuedAt: number; + attempts: number; +} + +// In router.ts +if (!targetConnection || targetConnection.state !== 'ACTIVE') { + this.messageQueue.enqueue({ + id: envelope.id, + from: connection.agentName, + to: envelope.to, + payload: envelope.payload, + queuedAt: Date.now(), + attempts: 0, + }); + + // Notify sender + connection.send({ + type: 'QUEUED', + id: envelope.id, + reason: 'recipient_offline', + }); +} + +// On agent connect, flush queued messages +onAgentConnect(agentName: string) { + const queued = this.messageQueue.getForRecipient(agentName); + for (const msg of queued) { + this.deliverMessage(msg); + this.messageQueue.remove(msg.id); + } +} +``` + +--- + +### Phase 3: Developer Experience + +#### 3.1 Structured Logging + +Replace scattered `console.log` with leveled logging: + +```typescript +import { createLogger } from './logger.js'; + +const log = createLogger('daemon'); + +log.info('Agent registered', { name: 'Alice', cli: 'claude' }); +log.debug('Message routed', { from: 'Alice', to: 'Bob', id: '...' }); +log.error('Connection failed', { error: err.message }); +``` + +Output format (when `DEBUG=agent-relay`): +``` +[14:23:01.234] INFO daemon: Agent registered name=Alice cli=claude +[14:23:01.456] DEBUG router: Message routed from=Alice to=Bob id=abc123 +``` + +#### 3.2 Health Check Endpoint + +Add simple HTTP health check (optional, disabled by default): + +```typescript +// Enable with: agent-relay up --health-port 3889 +// Or: AGENT_RELAY_HEALTH_PORT=3889 + +GET http://localhost:3889/health +{ + "status": "ok", + "uptime": 3600, + "agents": ["Alice", "Bob"], + "messages": { + "sent": 42, + "delivered": 41, + "queued": 1 + } +} +``` + +#### 3.3 CLI Improvements + +```bash +# Current +agent-relay up +agent-relay -n Alice claude +agent-relay status +agent-relay read + +# Add +agent-relay agents # List connected agents +agent-relay send Alice "Hello" # Send from CLI (for testing) +agent-relay logs # Tail daemon logs +agent-relay logs Alice # Tail agent's relay activity +``` + +--- + +### Phase 4: Optional Enhancements + +#### 4.1 WebSocket Streaming (Optional) + +Replace polling with WebSocket-based output streaming: + +```typescript +// Instead of polling capture-pane, attach via PTY +import { spawn } from 'node-pty'; + +const pty = spawn('tmux', ['attach-session', '-t', session, '-r'], { + // Read-only attach +}); + +pty.onData((data) => { + // Real-time output, no polling + const { commands } = parser.parse(data); + for (const cmd of commands) { + sendRelayCommand(cmd); + } +}); +``` + +**Trade-offs:** +| Aspect | Polling | WebSocket/PTY | +|--------|---------|---------------| +| Latency | 0-200ms | ~1-10ms | +| CPU | Higher | Lower | +| Complexity | Simple | More complex | +| Dependencies | None | node-pty | + +**Recommendation:** Keep polling as default, offer streaming as `--experimental-streaming` flag. + +#### 4.2 Message Encryption (Optional) + +For sensitive inter-agent communication: + +```typescript +// Generate per-project key on first run +const projectKey = await generateKey(); +fs.writeFileSync(keyPath, projectKey, { mode: 0o600 }); + +// Encrypt message bodies +const encrypted = await encrypt(payload.body, projectKey); +``` + +**Scope:** Only encrypt message body, not metadata (to/from/timestamp). + +--- + +## Migration Plan + +### v1.x → v2.0 + +1. **Storage migration** + - Detect existing `/tmp/agent-relay/` data + - Copy to `~/.local/share/agent-relay/` + - Remove old location after successful migration + +2. **Protocol compatibility** + - v2 daemon accepts both binary and NDJSON + - v2 clients send NDJSON only + - Deprecation warning for binary clients + +3. **Breaking changes** + - Remove ACK/resume/PostgreSQL (unused) + - Change default storage location + +--- + +## File Structure (Post-Refactor) + +``` +src/ +├── cli/ +│ └── index.ts # CLI entry point +├── daemon/ +│ ├── server.ts # Main daemon +│ ├── connection.ts # Connection handling (simplified) +│ └── router.ts # Message routing + queue +├── wrapper/ +│ ├── tmux-wrapper.ts # Agent wrapper +│ ├── parser.ts # @relay: pattern parser +│ └── client.ts # Relay client +├── protocol/ +│ └── types.ts # Message types (reduced) +├── storage/ +│ └── sqlite-adapter.ts # SQLite only (removed abstraction) +└── utils/ + ├── logger.ts # Structured logging + ├── paths.ts # XDG-compliant paths + └── lru-cache.ts # For deduplication +``` + +--- + +## Success Metrics + +| Metric | Current | Target | +|--------|---------|--------| +| Lines of code | ~2500 | ~2800 (with TUI) | +| Message types | 10 | 8 (added GROUP, TOPIC) | +| Max agents | ~3 practical | 10+ comfortable | +| Dependencies | 12 | 14 (adds blessed for TUI) | +| Memory (1hr session) | Unbounded | <100MB (10 agents) | +| Message detection latency | 0-200ms | 0-200ms | +| Data persistence | Lost on reboot | Permanent | +| Visibility | None | TUI dashboard | + +--- + +## Phase 5: Multi-Agent Coordination (5-10 Agents) + +Scaling from 2-3 agents to 5-10 requires better visibility, organization, and coordination patterns. + +### 5.1 Agent Groups + +Group agents for targeted messaging: + +```bash +# Define groups in teams.json +{ + "groups": { + "backend": ["ApiDev", "DbAdmin", "AuthService"], + "frontend": ["UiDev", "Stylist"], + "review": ["Reviewer", "QA"] + } +} + +# Send to group +@relay:@backend We need to refactor the user service +# → Message delivered to ApiDev, DbAdmin, AuthService + +# Broadcast to all +@relay:* Starting deployment in 5 minutes +``` + +**Implementation:** +```typescript +// In router.ts +route(from: Connection, envelope: Envelope) { + const to = envelope.to; + + if (to === '*') { + this.broadcast(from, envelope); + } else if (to.startsWith('@')) { + // Group message + const groupName = to.slice(1); + const members = this.groups.get(groupName) || []; + for (const member of members) { + if (member !== from.agentName) { + this.sendTo(member, envelope); + } + } + } else { + this.sendTo(to, envelope); + } +} +``` + +### 5.2 Terminal-Based Dashboard (TUI) + +A simple terminal UI for monitoring all agents without leaving the terminal: + +```bash +agent-relay watch +``` + +``` +┌─ Agent Relay ──────────────────────────────────────────────┐ +│ Agents (8 connected) │ +├─────────────────────────────────────────────────────────────┤ +│ ● Coordinator idle 2m msgs: 12↑ 8↓ │ +│ ● ApiDev active msgs: 5↑ 14↓ typing... │ +│ ● DbAdmin active msgs: 3↑ 6↓ │ +│ ● AuthService idle 45s msgs: 2↑ 4↓ │ +│ ● UiDev active msgs: 8↑ 10↓ typing... │ +│ ● Stylist idle 5m msgs: 1↑ 2↓ │ +│ ● Reviewer active msgs: 0↑ 15↓ │ +│ ○ QA offline queued: 3 │ +├─────────────────────────────────────────────────────────────┤ +│ Recent Messages │ +│ 14:23:01 ApiDev → DbAdmin: Can you check the user table? │ +│ 14:23:15 DbAdmin → ApiDev: Schema looks correct │ +│ 14:23:30 Coordinator → @backend: Stand up in 5 mins │ +│ 14:24:01 UiDev → Reviewer: PR ready for auth flow │ +├─────────────────────────────────────────────────────────────┤ +│ [a]ttach [s]end [g]roups [q]uit │ +└─────────────────────────────────────────────────────────────┘ +``` + +**Features:** +- Real-time agent status (active/idle/offline) +- Message counts and queue depth +- Recent message feed +- Quick attach to any agent's tmux session +- Send messages from dashboard + +**Implementation:** Use `blessed` or `ink` for terminal UI: +```typescript +// src/cli/watch.ts +import blessed from 'blessed'; + +const screen = blessed.screen({ smartCSR: true }); +const agentList = blessed.list({ + parent: screen, + label: 'Agents', + // ... +}); + +// Subscribe to daemon events via WebSocket +const ws = new WebSocket(`ws+unix://${socketPath}`); +ws.on('message', (data) => { + const event = JSON.parse(data); + updateDisplay(event); +}); +``` + +### 5.3 Coordination Patterns + +#### Pattern 1: Coordinator Agent + +One agent orchestrates the others: + +``` +Coordinator + ├── @relay:ApiDev Implement /api/users endpoint + ├── @relay:DbAdmin Create users table + └── @relay:UiDev Build user profile page + +ApiDev → Coordinator: Done, endpoint at /api/users +DbAdmin → Coordinator: Table created with schema... +UiDev → Coordinator: Need API spec first + +Coordinator → UiDev: Here's the spec: GET /api/users... +``` + +#### Pattern 2: Pipeline + +Agents pass work sequentially: + +``` +Developer → Reviewer → QA → Deployer + +@relay:Reviewer PR #123 ready for review + ↓ +@relay:QA Review passed, ready for testing + ↓ +@relay:Deployer Tests passed, deploy when ready +``` + +#### Pattern 3: Pub/Sub Topics + +Agents subscribe to topics of interest: + +```bash +# Agent subscribes to topic +@relay:subscribe security-alerts + +# Any agent can publish +@relay:topic:security-alerts Found SQL injection in auth.ts + +# All subscribers receive the message +``` + +**Implementation:** +```typescript +// Subscribe syntax +@relay:+topic-name # Subscribe +@relay:-topic-name # Unsubscribe +@relay:#topic-name msg # Publish to topic + +// In parser.ts +const TOPIC_SUBSCRIBE = /^@relay:\+(\S+)$/; +const TOPIC_UNSUBSCRIBE = /^@relay:-(\S+)$/; +const TOPIC_PUBLISH = /^@relay:#(\S+)\s+(.+)$/; +``` + +### 5.4 Tmux Layout Helper + +Quickly set up multi-agent tmux layouts: + +```bash +# Create tiled layout with all agents +agent-relay layout tile + +# Create layout from teams.json +agent-relay layout teams + +# Custom layout +agent-relay layout grid 3x3 +``` + +**Generated tmux layout:** +``` +┌─────────────┬─────────────┬─────────────┐ +│ Coordinator │ ApiDev │ DbAdmin │ +├─────────────┼─────────────┼─────────────┤ +│ AuthService │ UiDev │ Stylist │ +├─────────────┼─────────────┼─────────────┤ +│ Reviewer │ QA │ (empty) │ +└─────────────┴─────────────┴─────────────┘ +``` + +**Implementation:** +```bash +#!/bin/bash +# agent-relay layout tile +AGENTS=$(agent-relay agents --json | jq -r '.[].name') +COUNT=$(echo "$AGENTS" | wc -l) + +tmux new-session -d -s relay-overview +for agent in $AGENTS; do + tmux split-window -t relay-overview + tmux send-keys -t relay-overview "tmux attach -t relay-$agent-*" Enter +done +tmux select-layout -t relay-overview tiled +tmux attach -t relay-overview +``` + +### 5.5 Agent Roles & Capabilities + +Define what each agent can do: + +```json +// teams.json +{ + "agents": { + "Coordinator": { + "role": "coordinator", + "canMessage": ["*"], + "canReceiveFrom": ["*"] + }, + "ApiDev": { + "role": "developer", + "groups": ["backend"], + "canMessage": ["Coordinator", "@backend", "Reviewer"], + "canReceiveFrom": ["Coordinator", "@backend"] + }, + "Reviewer": { + "role": "reviewer", + "canMessage": ["Coordinator", "QA"], + "canReceiveFrom": ["*"] + } + } +} +``` + +**Use cases:** +- Prevent junior agents from messaging senior ones directly +- Ensure QA only receives from Reviewer (enforced pipeline) +- Coordinator can message anyone + +### 5.6 Message Priority & Filtering + +With more agents, message prioritization becomes important: + +```bash +# Urgent message (interrupts immediately) +@relay:!ApiDev Production is down, check auth service + +# Normal message (waits for idle) +@relay:ApiDev When you have time, review this PR + +# Low priority (batched, delivered during quiet periods) +@relay:?ApiDev FYI: Updated the style guide +``` + +**Injection behavior:** +| Priority | Syntax | Behavior | +|----------|--------|----------| +| Urgent | `@relay:!Name` | Inject immediately, even if busy | +| Normal | `@relay:Name` | Wait for idle (current behavior) | +| Low | `@relay:?Name` | Batch and deliver during long idle | + +### 5.7 Status Broadcasts + +Agents automatically announce state changes: + +```typescript +// Automatic status messages +@relay:* STATUS: ApiDev is now idle +@relay:* STATUS: Reviewer completed task (closed PR #123) +@relay:* STATUS: QA disconnected + +// Agents can filter these +// In wrapper config: +{ + "hideStatusMessages": true, // Don't inject STATUS broadcasts + "showStatusInLogs": true // But log them for visibility +} +``` + +--- + +## Why They Scale Better (And How We Can Too) + +### The Scaling Problem + +With 2-3 agents, our current approach works well: +- Open 2-3 terminal tabs +- Switch between them manually +- Remember who's doing what + +With 5-10 agents, this breaks down: + +| Problem | Impact at 5-10 Agents | +|---------|----------------------| +| **No visibility** | Can't see what all agents are doing at once | +| **No status** | Don't know if agent is busy, idle, or stuck | +| **Lost context** | Forget which agent is working on what | +| **Message chaos** | Too many messages to track manually | +| **Terminal sprawl** | 10 tabs is unmanageable | + +### Why Their Approach Scales + +``` +┌─────────────────────────────────────────────────────────────────┐ +│ THEIR ARCHITECTURE │ +├─────────────────────────────────────────────────────────────────┤ +│ │ +│ ┌─────────────────────────────────────────────────────────┐ │ +│ │ BROWSER DASHBOARD │ │ +│ │ ┌─────────┐ ┌─────────┐ ┌─────────┐ ┌─────────┐ │ │ +│ │ │ Agent 1 │ │ Agent 2 │ │ Agent 3 │ │ Agent 4 │ ... │ │ +│ │ │ ● active│ │ ○ idle │ │ ● active│ │ ✗ error │ │ │ +│ │ └─────────┘ └─────────┘ └─────────┘ └─────────┘ │ │ +│ │ │ │ +│ │ [Live message feed] [Inbox: 3 unread] [Agent graph] │ │ +│ └─────────────────────────────────────────────────────────┘ │ +│ │ │ +│ Single pane of glass │ +│ │ +└─────────────────────────────────────────────────────────────────┘ + +Key insight: ONE place to see EVERYTHING +``` + +Their specific advantages at scale: + +| Feature | Why It Helps at Scale | +|---------|----------------------| +| **Dashboard** | See all 10 agents at once without switching | +| **Activity state** | Know instantly who's busy vs idle | +| **Message inbox** | Messages don't disappear into terminal history | +| **Agent discovery** | Auto-finds agents, no manual tracking | +| **Persistent storage** | Query historical messages anytime | + +### How We Keep Our Strengths AND Scale + +The goal: **Single pane of glass, but in the terminal** + +``` +┌─────────────────────────────────────────────────────────────────┐ +│ OUR IMPROVED ARCHITECTURE │ +├─────────────────────────────────────────────────────────────────┤ +│ │ +│ ┌─────────────────────────────────────────────────────────┐ │ +│ │ TUI DASHBOARD (agent-relay watch) │ │ +│ │ │ │ +│ │ Agents: Status: Messages: │ │ +│ │ ● Coordinator active 12↑ 8↓ │ │ +│ │ ● ApiDev typing... 5↑ 14↓ │ │ +│ │ ● DbAdmin idle 30s 3↑ 6↓ │ │ +│ │ ○ QA offline queued: 3 │ │ +│ │ │ │ +│ │ [Press 'a' to attach, 's' to send, 'q' to quit] │ │ +│ └─────────────────────────────────────────────────────────┘ │ +│ │ │ +│ │ 'a' to attach │ +│ ▼ │ +│ ┌─────────────────────────────────────────────────────────┐ │ +│ │ NATIVE TMUX SESSION │ │ +│ │ │ │ +│ │ claude> Working on the API endpoint... │ │ +│ │ @relay:DbAdmin Need the users table schema │ │ +│ │ │ │ +│ └─────────────────────────────────────────────────────────┘ │ +│ │ │ +│ │ Ctrl+B d to detach │ +│ ▼ │ +│ Back to TUI dashboard │ +│ │ +└─────────────────────────────────────────────────────────────────┘ + +Key insight: TUI for overview, native tmux for work +``` + +### Specific Scaling Improvements + +#### 1. Daemon Event Stream + +The daemon must broadcast events, not just route messages: + +```typescript +// NEW: Daemon broadcasts events to all listeners +interface DaemonEvent { + type: 'agent_connected' | 'agent_disconnected' | 'agent_active' | + 'agent_idle' | 'message_sent' | 'message_delivered' | 'message_queued'; + timestamp: number; + data: Record; +} + +// In daemon/server.ts +class Daemon { + private eventSubscribers: Set = new Set(); + + broadcast(event: DaemonEvent): void { + const envelope = { type: 'EVENT', event }; + for (const subscriber of this.eventSubscribers) { + subscriber.send(envelope); + } + } + + // Called when agent output detected + onAgentActivity(agentName: string): void { + this.broadcast({ + type: 'agent_active', + timestamp: Date.now(), + data: { agent: agentName } + }); + } +} +``` + +#### 2. Activity Reporting from Wrapper + +Wrappers must report activity state to daemon: + +```typescript +// In tmux-wrapper.ts +private reportActivity(): void { + const now = Date.now(); + const timeSinceOutput = now - this.lastOutputTime; + + let state: 'active' | 'idle' | 'typing'; + if (timeSinceOutput < 1000) { + state = 'active'; + } else if (this.detectTypingIndicator()) { + state = 'typing'; // Agent is thinking/working + } else if (timeSinceOutput < 30000) { + state = 'idle'; + } else { + state = 'idle'; + } + + // Only send if state changed + if (state !== this.lastReportedState) { + this.client.sendStatus(state); + this.lastReportedState = state; + } +} + +private detectTypingIndicator(): boolean { + // Claude Code shows "[1/418]" when thinking + // Detect this pattern in recent output + return /\[\d+\/\d+\]/.test(this.recentOutput); +} +``` + +#### 3. TUI Dashboard Implementation + +```typescript +// src/cli/watch.ts +import blessed from 'blessed'; + +export async function watchCommand(socketPath: string): Promise { + const screen = blessed.screen({ smartCSR: true }); + + // Agent list panel + const agentList = blessed.list({ + parent: screen, + label: ' Agents ', + top: 0, + left: 0, + width: '50%', + height: '60%', + border: { type: 'line' }, + style: { + selected: { bg: 'blue' } + }, + keys: true, + vi: true, + }); + + // Message feed panel + const messageFeed = blessed.log({ + parent: screen, + label: ' Messages ', + top: 0, + right: 0, + width: '50%', + height: '60%', + border: { type: 'line' }, + scrollable: true, + }); + + // Status bar + const statusBar = blessed.box({ + parent: screen, + bottom: 0, + height: 3, + content: ' [a]ttach [s]end [r]efresh [q]uit ', + }); + + // Connect to daemon event stream + const client = new RelayClient({ socketPath, subscribe: true }); + + client.onEvent = (event: DaemonEvent) => { + switch (event.type) { + case 'agent_connected': + updateAgentList(); + break; + case 'message_sent': + messageFeed.log(`${event.data.from} → ${event.data.to}: ${event.data.preview}`); + break; + // ... + } + screen.render(); + }; + + // Keyboard handlers + screen.key(['a'], () => attachToSelected()); + screen.key(['s'], () => showSendDialog()); + screen.key(['q'], () => process.exit(0)); + + screen.render(); +} + +function attachToSelected(): void { + const agent = getSelectedAgent(); + // Detach from blessed, attach to tmux + screen.destroy(); + execSync(`tmux attach-session -t relay-${agent}-*`, { stdio: 'inherit' }); + // When user detaches (Ctrl+B d), restart watch + watchCommand(socketPath); +} +``` + +#### 4. Message History Query + +```typescript +// src/cli/index.ts +program + .command('history') + .description('Show message history') + .option('-n ', 'Number of messages', '20') + .option('-f, --from ', 'Filter by sender') + .option('-t, --to ', 'Filter by recipient') + .option('--since