From 4da9b3491972b448a1c5d482371247157b458c2b Mon Sep 17 00:00:00 2001 From: Proactive Runtime Bot Date: Fri, 15 May 2026 11:35:59 +0200 Subject: [PATCH 1/3] Fix headless broker readiness --- .../completed/2026-05/traj_0o6gb2wvk59t.json | 89 ++++ .../completed/2026-05/traj_0o6gb2wvk59t.md | 38 ++ .../completed/2026-05/traj_4vucir4qvqa2.json | 75 ++++ .../completed/2026-05/traj_4vucir4qvqa2.md | 32 ++ .../completed/2026-05/traj_6sjeohtm3php.json | 53 +++ .../completed/2026-05/traj_6sjeohtm3php.md | 31 ++ .../completed/2026-05/traj_7uznwzoxbao6.json | 73 ++++ .../completed/2026-05/traj_7uznwzoxbao6.md | 32 ++ .../completed/2026-05/traj_9fdv7hxm0b60.json | 65 +++ .../completed/2026-05/traj_9fdv7hxm0b60.md | 36 ++ .../completed/2026-05/traj_erzd7j9nto9r.json | 73 ++++ .../completed/2026-05/traj_erzd7j9nto9r.md | 32 ++ .../completed/2026-05/traj_v1wexlfur5zr.json | 65 +++ .../completed/2026-05/traj_v1wexlfur5zr.md | 32 ++ .trajectories/index.json | 53 ++- src/cli/commands/core.test.ts | 379 +++++++++++++++++- src/cli/commands/core.ts | 5 +- src/cli/lib/broker-lifecycle.ts | 274 ++++++++++--- .../broker/utils/broker-harness.ts | 3 + 19 files changed, 1369 insertions(+), 71 deletions(-) create mode 100644 .trajectories/completed/2026-05/traj_0o6gb2wvk59t.json create mode 100644 .trajectories/completed/2026-05/traj_0o6gb2wvk59t.md create mode 100644 .trajectories/completed/2026-05/traj_4vucir4qvqa2.json create mode 100644 .trajectories/completed/2026-05/traj_4vucir4qvqa2.md create mode 100644 .trajectories/completed/2026-05/traj_6sjeohtm3php.json create mode 100644 .trajectories/completed/2026-05/traj_6sjeohtm3php.md create mode 100644 .trajectories/completed/2026-05/traj_7uznwzoxbao6.json create mode 100644 .trajectories/completed/2026-05/traj_7uznwzoxbao6.md create mode 100644 .trajectories/completed/2026-05/traj_9fdv7hxm0b60.json create mode 100644 .trajectories/completed/2026-05/traj_9fdv7hxm0b60.md create mode 100644 .trajectories/completed/2026-05/traj_erzd7j9nto9r.json create mode 100644 .trajectories/completed/2026-05/traj_erzd7j9nto9r.md create mode 100644 .trajectories/completed/2026-05/traj_v1wexlfur5zr.json create mode 100644 .trajectories/completed/2026-05/traj_v1wexlfur5zr.md diff --git a/.trajectories/completed/2026-05/traj_0o6gb2wvk59t.json b/.trajectories/completed/2026-05/traj_0o6gb2wvk59t.json new file mode 100644 index 000000000..6cb84fec6 --- /dev/null +++ b/.trajectories/completed/2026-05/traj_0o6gb2wvk59t.json @@ -0,0 +1,89 @@ +{ + "id": "traj_0o6gb2wvk59t", + "version": 1, + "task": { + "title": "Fresh end-to-end validation for headless readiness" + }, + "status": "completed", + "startedAt": "2026-05-15T10:55:49.188Z", + "completedAt": "2026-05-15T11:11:52.324Z", + "agents": [ + { + "name": "default", + "role": "lead", + "joinedAt": "2026-05-15T10:56:20.988Z" + } + ], + "chapters": [ + { + "id": "chap_agsepzuxybr2", + "title": "Work", + "agentName": "default", + "startedAt": "2026-05-15T10:56:20.988Z", + "endedAt": "2026-05-15T11:11:52.324Z", + "events": [ + { + "ts": 1778842580989, + "type": "decision", + "content": "Validate via four independent surfaces: Validate via four independent surfaces", + "raw": { + "question": "Validate via four independent surfaces", + "chosen": "Validate via four independent surfaces", + "alternatives": [], + "reasoning": "For a fresh end-to-end pass, unit tests alone are not enough. Run the full Vitest suite, broker integration tests, direct built-CLI headless lifecycle, and packaged Bun standalone smoke so both Node and compiled binary invocation shapes are exercised." + }, + "significance": "high" + }, + { + "ts": 1778842631500, + "type": "reflection", + "content": "Full Vitest suite passed: 66 files, 849 tests. Continuing into built CLI and broker integration surfaces.", + "raw": { + "confidence": 0.85 + }, + "significance": "high", + "tags": [ + "confidence:0.85" + ] + }, + { + "ts": 1778842685777, + "type": "decision", + "content": "Fix broker integration harness strict typing before running E2E: Fix broker integration harness strict typing before running E2E", + "raw": { + "question": "Fix broker integration harness strict typing before running E2E", + "chosen": "Fix broker integration harness strict typing before running E2E", + "alternatives": [], + "reasoning": "Fresh validation uncovered that the broker integration suite could not compile because createWorkspace().apiKey is typed optional. The harness now throws if the workspace response lacks an API key instead of assigning undefined to RELAY_API_KEY." + }, + "significance": "high" + }, + { + "ts": 1778843512026, + "type": "reflection", + "content": "Fresh E2E pass results: full Vitest, clean build, built CLI headless lifecycle, packaged standalone smoke, and npm tarball validation passed. Broker integration runner now compiles with the harness fix, but the broad live broker suite still has pre-existing runtime failures/skips unrelated to this headless change, including stale PID-file expectations.", + "raw": { + "confidence": 0.88 + }, + "significance": "high", + "tags": [ + "confidence:0.88" + ] + } + ] + } + ], + "retrospective": { + "summary": "Ran a fresh end-to-end validation pass for headless broker readiness. Fixed the broker integration harness strict typing issue discovered during validation, verified full Vitest/build/direct headless CLI/standalone smoke/package tarball validation, and documented the remaining broad broker integration suite limitations.", + "approach": "Standard approach", + "confidence": 0.88 + }, + "commits": [], + "filesChanged": [], + "projectId": "/Users/khaliqgant/Projects/AgentWorkforce/relay-broker-headless-reliability-doc", + "tags": [], + "_trace": { + "startRef": "ccd34408916ca0ae9b897aad20ddeb7b8fb1a171", + "endRef": "ccd34408916ca0ae9b897aad20ddeb7b8fb1a171" + } +} \ No newline at end of file diff --git a/.trajectories/completed/2026-05/traj_0o6gb2wvk59t.md b/.trajectories/completed/2026-05/traj_0o6gb2wvk59t.md new file mode 100644 index 000000000..f3c073d28 --- /dev/null +++ b/.trajectories/completed/2026-05/traj_0o6gb2wvk59t.md @@ -0,0 +1,38 @@ +# Trajectory: Fresh end-to-end validation for headless readiness + +> **Status:** ✅ Completed +> **Confidence:** 88% +> **Started:** May 15, 2026 at 12:55 PM +> **Completed:** May 15, 2026 at 01:11 PM + +--- + +## Summary + +Ran a fresh end-to-end validation pass for headless broker readiness. Fixed the broker integration harness strict typing issue discovered during validation, verified full Vitest/build/direct headless CLI/standalone smoke/package tarball validation, and documented the remaining broad broker integration suite limitations. + +**Approach:** Standard approach + +--- + +## Key Decisions + +### Validate via four independent surfaces +- **Chose:** Validate via four independent surfaces +- **Reasoning:** For a fresh end-to-end pass, unit tests alone are not enough. Run the full Vitest suite, broker integration tests, direct built-CLI headless lifecycle, and packaged Bun standalone smoke so both Node and compiled binary invocation shapes are exercised. + +### Fix broker integration harness strict typing before running E2E +- **Chose:** Fix broker integration harness strict typing before running E2E +- **Reasoning:** Fresh validation uncovered that the broker integration suite could not compile because createWorkspace().apiKey is typed optional. The harness now throws if the workspace response lacks an API key instead of assigning undefined to RELAY_API_KEY. + +--- + +## Chapters + +### 1. Work +*Agent: default* + +- Validate via four independent surfaces: Validate via four independent surfaces +- Full Vitest suite passed: 66 files, 849 tests. Continuing into built CLI and broker integration surfaces. +- Fix broker integration harness strict typing before running E2E: Fix broker integration harness strict typing before running E2E +- Fresh E2E pass results: full Vitest, clean build, built CLI headless lifecycle, packaged standalone smoke, and npm tarball validation passed. Broker integration runner now compiles with the harness fix, but the broad live broker suite still has pre-existing runtime failures/skips unrelated to this headless change, including stale PID-file expectations. diff --git a/.trajectories/completed/2026-05/traj_4vucir4qvqa2.json b/.trajectories/completed/2026-05/traj_4vucir4qvqa2.json new file mode 100644 index 000000000..33da76d7b --- /dev/null +++ b/.trajectories/completed/2026-05/traj_4vucir4qvqa2.json @@ -0,0 +1,75 @@ +{ + "id": "traj_4vucir4qvqa2", + "version": 1, + "task": { + "title": "Harden headless broker readiness semantics" + }, + "status": "completed", + "startedAt": "2026-05-15T09:46:07.617Z", + "completedAt": "2026-05-15T09:59:00.460Z", + "agents": [ + { + "name": "default", + "role": "lead", + "joinedAt": "2026-05-15T09:53:03.774Z" + } + ], + "chapters": [ + { + "id": "chap_kfz7mc7l9gg4", + "title": "Work", + "agentName": "default", + "startedAt": "2026-05-15T09:53:03.774Z", + "endedAt": "2026-05-15T09:59:00.460Z", + "events": [ + { + "ts": 1778838783775, + "type": "decision", + "content": "Gate detached broker start on API readiness and report STARTING separately: Gate detached broker start on API readiness and report STARTING separately", + "raw": { + "question": "Gate detached broker start on API readiness and report STARTING separately", + "chosen": "Gate detached broker start on API readiness and report STARTING separately", + "alternatives": [], + "reasoning": "Headless orchestrators need command success to mean usable broker, and live-process/API-unready must not be collapsed into STOPPED." + }, + "significance": "high" + }, + { + "ts": 1778839134173, + "type": "reflection", + "content": "Readiness hardening implemented and verified with focused command tests, typecheck, lint, and diff checks; remaining lint output is pre-existing complexity/depth warnings outside the new readiness helper.", + "raw": { + "focalPoints": [ + "readiness semantics", + "detached start", + "status truthfulness", + "verification" + ], + "confidence": 0.9 + }, + "significance": "high", + "tags": [ + "focal:readiness semantics", + "focal:detached start", + "focal:status truthfulness", + "focal:verification", + "confidence:0.9" + ] + } + ] + } + ], + "retrospective": { + "summary": "Hardened headless broker startup so detached up waits for API readiness, status --wait-for distinguishes STARTING from STOPPED and exits non-zero on timeout, and docs/skills/tests now encode the readiness contract.", + "approach": "Standard approach", + "confidence": 0.92 + }, + "commits": [], + "filesChanged": [], + "projectId": "/Users/khaliqgant/Projects/AgentWorkforce/relay-broker-headless-reliability-doc", + "tags": [], + "_trace": { + "startRef": "67d37e44cecdab1be65a8ee2bf3fb9a60e61c162", + "endRef": "67d37e44cecdab1be65a8ee2bf3fb9a60e61c162" + } +} \ No newline at end of file diff --git a/.trajectories/completed/2026-05/traj_4vucir4qvqa2.md b/.trajectories/completed/2026-05/traj_4vucir4qvqa2.md new file mode 100644 index 000000000..8b6a01bf1 --- /dev/null +++ b/.trajectories/completed/2026-05/traj_4vucir4qvqa2.md @@ -0,0 +1,32 @@ +# Trajectory: Harden headless broker readiness semantics + +> **Status:** ✅ Completed +> **Confidence:** 92% +> **Started:** May 15, 2026 at 11:46 AM +> **Completed:** May 15, 2026 at 11:59 AM + +--- + +## Summary + +Hardened headless broker startup so detached up waits for API readiness, status --wait-for distinguishes STARTING from STOPPED and exits non-zero on timeout, and docs/skills/tests now encode the readiness contract. + +**Approach:** Standard approach + +--- + +## Key Decisions + +### Gate detached broker start on API readiness and report STARTING separately +- **Chose:** Gate detached broker start on API readiness and report STARTING separately +- **Reasoning:** Headless orchestrators need command success to mean usable broker, and live-process/API-unready must not be collapsed into STOPPED. + +--- + +## Chapters + +### 1. Work +*Agent: default* + +- Gate detached broker start on API readiness and report STARTING separately: Gate detached broker start on API readiness and report STARTING separately +- Readiness hardening implemented and verified with focused command tests, typecheck, lint, and diff checks; remaining lint output is pre-existing complexity/depth warnings outside the new readiness helper. diff --git a/.trajectories/completed/2026-05/traj_6sjeohtm3php.json b/.trajectories/completed/2026-05/traj_6sjeohtm3php.json new file mode 100644 index 000000000..ce8725e0c --- /dev/null +++ b/.trajectories/completed/2026-05/traj_6sjeohtm3php.json @@ -0,0 +1,53 @@ +{ + "id": "traj_6sjeohtm3php", + "version": 1, + "task": { + "title": "Address broker headless reliability review findings" + }, + "status": "completed", + "startedAt": "2026-05-15T09:30:56.316Z", + "completedAt": "2026-05-15T09:32:47.870Z", + "agents": [ + { + "name": "default", + "role": "lead", + "joinedAt": "2026-05-15T09:32:34.860Z" + } + ], + "chapters": [ + { + "id": "chap_nc9dfosyefib", + "title": "Work", + "agentName": "default", + "startedAt": "2026-05-15T09:32:34.860Z", + "endedAt": "2026-05-15T09:32:47.870Z", + "events": [ + { + "ts": 1778837554861, + "type": "decision", + "content": "Require broker API readiness for status --wait-for: Require broker API readiness for status --wait-for", + "raw": { + "question": "Require broker API readiness for status --wait-for", + "chosen": "Require broker API readiness for status --wait-for", + "alternatives": [], + "reasoning": "connection.json is written before Relaycast handshake and ready router setup, so PID-only polling can report RUNNING before spawn/control endpoints are usable." + }, + "significance": "high" + } + ] + } + ], + "retrospective": { + "summary": "Addressed review findings: status --wait-for now waits for broker HTTP API readiness instead of PID-only readiness, added a regression test for live PID with 503 startup API, verified TypeScript and focused CLI tests, and prepared trajectory records for commit with index.json.", + "approach": "Standard approach", + "confidence": 0.92 + }, + "commits": [], + "filesChanged": [], + "projectId": "/Users/khaliqgant/Projects/AgentWorkforce/relay-broker-headless-reliability-doc", + "tags": [], + "_trace": { + "startRef": "a7ef2fccd2cb971474f193edaba910a944545c3a", + "endRef": "a7ef2fccd2cb971474f193edaba910a944545c3a" + } +} \ No newline at end of file diff --git a/.trajectories/completed/2026-05/traj_6sjeohtm3php.md b/.trajectories/completed/2026-05/traj_6sjeohtm3php.md new file mode 100644 index 000000000..e356c63bb --- /dev/null +++ b/.trajectories/completed/2026-05/traj_6sjeohtm3php.md @@ -0,0 +1,31 @@ +# Trajectory: Address broker headless reliability review findings + +> **Status:** ✅ Completed +> **Confidence:** 92% +> **Started:** May 15, 2026 at 11:30 AM +> **Completed:** May 15, 2026 at 11:32 AM + +--- + +## Summary + +Addressed review findings: status --wait-for now waits for broker HTTP API readiness instead of PID-only readiness, added a regression test for live PID with 503 startup API, verified TypeScript and focused CLI tests, and prepared trajectory records for commit with index.json. + +**Approach:** Standard approach + +--- + +## Key Decisions + +### Require broker API readiness for status --wait-for +- **Chose:** Require broker API readiness for status --wait-for +- **Reasoning:** connection.json is written before Relaycast handshake and ready router setup, so PID-only polling can report RUNNING before spawn/control endpoints are usable. + +--- + +## Chapters + +### 1. Work +*Agent: default* + +- Require broker API readiness for status --wait-for: Require broker API readiness for status --wait-for diff --git a/.trajectories/completed/2026-05/traj_7uznwzoxbao6.json b/.trajectories/completed/2026-05/traj_7uznwzoxbao6.json new file mode 100644 index 000000000..90a98fd69 --- /dev/null +++ b/.trajectories/completed/2026-05/traj_7uznwzoxbao6.json @@ -0,0 +1,73 @@ +{ + "id": "traj_7uznwzoxbao6", + "version": 1, + "task": { + "title": "Fix standalone detached headless startup" + }, + "status": "completed", + "startedAt": "2026-05-15T10:18:46.273Z", + "completedAt": "2026-05-15T10:25:00.598Z", + "agents": [ + { + "name": "default", + "role": "lead", + "joinedAt": "2026-05-15T10:18:48.168Z" + } + ], + "chapters": [ + { + "id": "chap_z4scd0rnsf1j", + "title": "Work", + "agentName": "default", + "startedAt": "2026-05-15T10:18:48.168Z", + "endedAt": "2026-05-15T10:25:00.598Z", + "events": [ + { + "ts": 1778840328180, + "type": "decision", + "content": "Use invocation-shape aware detached re-exec: Use invocation-shape aware detached re-exec", + "raw": { + "question": "Use invocation-shape aware detached re-exec", + "chosen": "Use invocation-shape aware detached re-exec", + "alternatives": [], + "reasoning": "Standalone Bun binaries do not have a separate Node script path; spawning execPath with cliScript as an extra argv item prevents the foreground child from running the intended up command." + }, + "significance": "high" + }, + { + "ts": 1778840700442, + "type": "reflection", + "content": "Strict pass found and fixed the standalone binary regression surfaced by CI: detached start now builds child argv based on whether the current invocation is a Node script or a compiled executable.", + "raw": { + "focalPoints": [ + "standalone smoke", + "re-exec argv", + "CI failure" + ], + "confidence": 0.9 + }, + "significance": "high", + "tags": [ + "focal:standalone smoke", + "focal:re-exec argv", + "focal:CI failure", + "confidence:0.9" + ] + } + ] + } + ], + "retrospective": { + "summary": "Fixed standalone detached headless startup by avoiding an extra script argv for compiled binaries, added a regression test, and reran typecheck, core CLI tests, lint, and diff checks.", + "approach": "Standard approach", + "confidence": 0.9 + }, + "commits": [], + "filesChanged": [], + "projectId": "/Users/khaliqgant/Projects/AgentWorkforce/relay-broker-headless-reliability-doc", + "tags": [], + "_trace": { + "startRef": "4aa1139a9b70761f3d5fec825da10d5dac3512b4", + "endRef": "4aa1139a9b70761f3d5fec825da10d5dac3512b4" + } +} \ No newline at end of file diff --git a/.trajectories/completed/2026-05/traj_7uznwzoxbao6.md b/.trajectories/completed/2026-05/traj_7uznwzoxbao6.md new file mode 100644 index 000000000..0612eaa1e --- /dev/null +++ b/.trajectories/completed/2026-05/traj_7uznwzoxbao6.md @@ -0,0 +1,32 @@ +# Trajectory: Fix standalone detached headless startup + +> **Status:** ✅ Completed +> **Confidence:** 90% +> **Started:** May 15, 2026 at 12:18 PM +> **Completed:** May 15, 2026 at 12:25 PM + +--- + +## Summary + +Fixed standalone detached headless startup by avoiding an extra script argv for compiled binaries, added a regression test, and reran typecheck, core CLI tests, lint, and diff checks. + +**Approach:** Standard approach + +--- + +## Key Decisions + +### Use invocation-shape aware detached re-exec +- **Chose:** Use invocation-shape aware detached re-exec +- **Reasoning:** Standalone Bun binaries do not have a separate Node script path; spawning execPath with cliScript as an extra argv item prevents the foreground child from running the intended up command. + +--- + +## Chapters + +### 1. Work +*Agent: default* + +- Use invocation-shape aware detached re-exec: Use invocation-shape aware detached re-exec +- Strict pass found and fixed the standalone binary regression surfaced by CI: detached start now builds child argv based on whether the current invocation is a Node script or a compiled executable. diff --git a/.trajectories/completed/2026-05/traj_9fdv7hxm0b60.json b/.trajectories/completed/2026-05/traj_9fdv7hxm0b60.json new file mode 100644 index 000000000..33f2346d2 --- /dev/null +++ b/.trajectories/completed/2026-05/traj_9fdv7hxm0b60.json @@ -0,0 +1,65 @@ +{ + "id": "traj_9fdv7hxm0b60", + "version": 1, + "task": { + "title": "Strict standalone smoke follow-up" + }, + "status": "completed", + "startedAt": "2026-05-15T10:37:17.693Z", + "completedAt": "2026-05-15T10:43:11.587Z", + "agents": [ + { + "name": "default", + "role": "lead", + "joinedAt": "2026-05-15T10:37:18.176Z" + } + ], + "chapters": [ + { + "id": "chap_orqi2e5m9fp1", + "title": "Work", + "agentName": "default", + "startedAt": "2026-05-15T10:37:18.176Z", + "endedAt": "2026-05-15T10:43:11.587Z", + "events": [ + { + "ts": 1778841438177, + "type": "decision", + "content": "Treat Bun //root argv as a virtual executable entrypoint: Treat Bun //root argv as a virtual executable entrypoint", + "raw": { + "question": "Treat Bun //root argv as a virtual executable entrypoint", + "chosen": "Treat Bun //root argv as a virtual executable entrypoint", + "alternatives": [], + "reasoning": "CI showed compiled standalone still timed out; Bun compiled binaries report argv[1] as a virtual //root path while execPath is the real binary, so detached re-exec must preserve user args from argv[2] but omit the virtual path when spawning the child." + }, + "significance": "high" + }, + { + "ts": 1778841667818, + "type": "decision", + "content": "Align headless success output with standalone smoke readiness contract: Align headless success output with standalone smoke readiness contract", + "raw": { + "question": "Align headless success output with standalone smoke readiness contract", + "chosen": "Align headless success output with standalone smoke readiness contract", + "alternatives": [], + "reasoning": "The fixed Bun re-exec path started the broker locally, but the CI smoke contract asserts exactly one 'Broker started.' readiness line. The detached path now emits that stable line and logs the PID separately." + }, + "significance": "high" + } + ] + } + ], + "retrospective": { + "summary": "Fixed the remaining standalone macOS smoke failure by detecting Bun compiled binary argv shape, omitting the virtual //root entrypoint during detached re-exec, and aligning detached startup success output with the smoke readiness contract.", + "approach": "Standard approach", + "confidence": 0.92 + }, + "commits": [], + "filesChanged": [], + "projectId": "/Users/khaliqgant/Projects/AgentWorkforce/relay-broker-headless-reliability-doc", + "tags": [], + "_trace": { + "startRef": "28234d39ac210dad49ddcb979f28924dd8f93e6f", + "endRef": "28234d39ac210dad49ddcb979f28924dd8f93e6f" + } +} \ No newline at end of file diff --git a/.trajectories/completed/2026-05/traj_9fdv7hxm0b60.md b/.trajectories/completed/2026-05/traj_9fdv7hxm0b60.md new file mode 100644 index 000000000..92142bbd3 --- /dev/null +++ b/.trajectories/completed/2026-05/traj_9fdv7hxm0b60.md @@ -0,0 +1,36 @@ +# Trajectory: Strict standalone smoke follow-up + +> **Status:** ✅ Completed +> **Confidence:** 92% +> **Started:** May 15, 2026 at 12:37 PM +> **Completed:** May 15, 2026 at 12:43 PM + +--- + +## Summary + +Fixed the remaining standalone macOS smoke failure by detecting Bun compiled binary argv shape, omitting the virtual //root entrypoint during detached re-exec, and aligning detached startup success output with the smoke readiness contract. + +**Approach:** Standard approach + +--- + +## Key Decisions + +### Treat Bun //root argv as a virtual executable entrypoint +- **Chose:** Treat Bun //root argv as a virtual executable entrypoint +- **Reasoning:** CI showed compiled standalone still timed out; Bun compiled binaries report argv[1] as a virtual //root path while execPath is the real binary, so detached re-exec must preserve user args from argv[2] but omit the virtual path when spawning the child. + +### Align headless success output with standalone smoke readiness contract +- **Chose:** Align headless success output with standalone smoke readiness contract +- **Reasoning:** The fixed Bun re-exec path started the broker locally, but the CI smoke contract asserts exactly one 'Broker started.' readiness line. The detached path now emits that stable line and logs the PID separately. + +--- + +## Chapters + +### 1. Work +*Agent: default* + +- Treat Bun //root argv as a virtual executable entrypoint: Treat Bun //root argv as a virtual executable entrypoint +- Align headless success output with standalone smoke readiness contract: Align headless success output with standalone smoke readiness contract diff --git a/.trajectories/completed/2026-05/traj_erzd7j9nto9r.json b/.trajectories/completed/2026-05/traj_erzd7j9nto9r.json new file mode 100644 index 000000000..c63453a18 --- /dev/null +++ b/.trajectories/completed/2026-05/traj_erzd7j9nto9r.json @@ -0,0 +1,73 @@ +{ + "id": "traj_erzd7j9nto9r", + "version": 1, + "task": { + "title": "Strict review and PR prep for headless broker readiness" + }, + "status": "completed", + "startedAt": "2026-05-15T10:02:10.164Z", + "completedAt": "2026-05-15T10:06:38.127Z", + "agents": [ + { + "name": "default", + "role": "lead", + "joinedAt": "2026-05-15T10:03:26.093Z" + } + ], + "chapters": [ + { + "id": "chap_5g2lfephd4w2", + "title": "Work", + "agentName": "default", + "startedAt": "2026-05-15T10:03:26.093Z", + "endedAt": "2026-05-15T10:06:38.127Z", + "events": [ + { + "ts": 1778839406094, + "type": "decision", + "content": "Report broker PID on detached readiness timeout: Report broker PID on detached readiness timeout", + "raw": { + "question": "Report broker PID on detached readiness timeout", + "chosen": "Report broker PID on detached readiness timeout", + "alternatives": [], + "reasoning": "When wrapper and broker PIDs differ, diagnostics must point operators at the process represented by connection.json, especially for live-but-API-unready failures." + }, + "significance": "high" + }, + { + "ts": 1778839598021, + "type": "reflection", + "content": "Strict pass completed: removed the standalone reliability doc, moved skill guidance to the skills repo, and tightened timeout diagnostics to report the broker PID rather than the wrapper PID.", + "raw": { + "focalPoints": [ + "relay branch scope", + "skill repo split", + "diagnostics" + ], + "confidence": 0.9 + }, + "significance": "high", + "tags": [ + "focal:relay branch scope", + "focal:skill repo split", + "focal:diagnostics", + "confidence:0.9" + ] + } + ] + } + ], + "retrospective": { + "summary": "Strict PR prep removed local doc and skill edits from relay, retained trajectory records, added broker-PID timeout diagnostics, and moved the orchestrator guidance update to the sibling skills repo.", + "approach": "Standard approach", + "confidence": 0.91 + }, + "commits": [], + "filesChanged": [], + "projectId": "/Users/khaliqgant/Projects/AgentWorkforce/relay-broker-headless-reliability-doc", + "tags": [], + "_trace": { + "startRef": "9b017e2de113a019e9563f8cfd6ab0de77b14b16", + "endRef": "9b017e2de113a019e9563f8cfd6ab0de77b14b16" + } +} \ No newline at end of file diff --git a/.trajectories/completed/2026-05/traj_erzd7j9nto9r.md b/.trajectories/completed/2026-05/traj_erzd7j9nto9r.md new file mode 100644 index 000000000..68e937ea4 --- /dev/null +++ b/.trajectories/completed/2026-05/traj_erzd7j9nto9r.md @@ -0,0 +1,32 @@ +# Trajectory: Strict review and PR prep for headless broker readiness + +> **Status:** ✅ Completed +> **Confidence:** 91% +> **Started:** May 15, 2026 at 12:02 PM +> **Completed:** May 15, 2026 at 12:06 PM + +--- + +## Summary + +Strict PR prep removed local doc and skill edits from relay, retained trajectory records, added broker-PID timeout diagnostics, and moved the orchestrator guidance update to the sibling skills repo. + +**Approach:** Standard approach + +--- + +## Key Decisions + +### Report broker PID on detached readiness timeout +- **Chose:** Report broker PID on detached readiness timeout +- **Reasoning:** When wrapper and broker PIDs differ, diagnostics must point operators at the process represented by connection.json, especially for live-but-API-unready failures. + +--- + +## Chapters + +### 1. Work +*Agent: default* + +- Report broker PID on detached readiness timeout: Report broker PID on detached readiness timeout +- Strict pass completed: removed the standalone reliability doc, moved skill guidance to the skills repo, and tightened timeout diagnostics to report the broker PID rather than the wrapper PID. diff --git a/.trajectories/completed/2026-05/traj_v1wexlfur5zr.json b/.trajectories/completed/2026-05/traj_v1wexlfur5zr.json new file mode 100644 index 000000000..f47b4a946 --- /dev/null +++ b/.trajectories/completed/2026-05/traj_v1wexlfur5zr.json @@ -0,0 +1,65 @@ +{ + "id": "traj_v1wexlfur5zr", + "version": 1, + "task": { + "title": "Fix broker headless reliability doc" + }, + "status": "completed", + "startedAt": "2026-05-15T09:04:51.316Z", + "completedAt": "2026-05-15T09:13:50.970Z", + "agents": [ + { + "name": "default", + "role": "lead", + "joinedAt": "2026-05-15T09:13:34.194Z" + } + ], + "chapters": [ + { + "id": "chap_b3mn4r0wp8sw", + "title": "Work", + "agentName": "default", + "startedAt": "2026-05-15T09:13:34.194Z", + "endedAt": "2026-05-15T09:13:50.970Z", + "events": [ + { + "ts": 1778836414195, + "type": "decision", + "content": "Treat docs/BROKER_HEADLESS_RELIABILITY.md as an issue brief and implement the missing reliability fixes: Treat docs/BROKER_HEADLESS_RELIABILITY.md as an issue brief and implement the missing reliability fixes", + "raw": { + "question": "Treat docs/BROKER_HEADLESS_RELIABILITY.md as an issue brief and implement the missing reliability fixes", + "chosen": "Treat docs/BROKER_HEADLESS_RELIABILITY.md as an issue brief and implement the missing reliability fixes", + "alternatives": [], + "reasoning": "The file was untracked in the original checkout and absent from latest main, so I copied its intent into the new worktree, corrected stale file references, and fixed the high-impact no-dashboard/status gaps in current main." + }, + "significance": "high" + }, + { + "ts": 1778836414208, + "type": "reflection", + "content": "Core reliability patch is implemented, focused tests and TypeScript pass, remaining work is final trajectory completion and optional commit/stage if requested", + "raw": { + "confidence": 0.9 + }, + "significance": "high", + "tags": [ + "confidence:0.9" + ] + } + ] + } + ], + "retrospective": { + "summary": "Implemented headless broker reliability fixes: no-dashboard startup now detaches by default with a foreground escape hatch, status supports --wait-for polling, orchestration skills now recommend detached startup/readiness polling, and docs/BROKER_HEADLESS_RELIABILITY.md captures the corrected current-main plan and verification.", + "approach": "Standard approach", + "confidence": 0.9 + }, + "commits": [], + "filesChanged": [], + "projectId": "/Users/khaliqgant/Projects/AgentWorkforce/relay-broker-headless-reliability-doc", + "tags": [], + "_trace": { + "startRef": "a7ef2fccd2cb971474f193edaba910a944545c3a", + "endRef": "a7ef2fccd2cb971474f193edaba910a944545c3a" + } +} \ No newline at end of file diff --git a/.trajectories/completed/2026-05/traj_v1wexlfur5zr.md b/.trajectories/completed/2026-05/traj_v1wexlfur5zr.md new file mode 100644 index 000000000..d3d6d48e2 --- /dev/null +++ b/.trajectories/completed/2026-05/traj_v1wexlfur5zr.md @@ -0,0 +1,32 @@ +# Trajectory: Fix broker headless reliability doc + +> **Status:** ✅ Completed +> **Confidence:** 90% +> **Started:** May 15, 2026 at 11:04 AM +> **Completed:** May 15, 2026 at 11:13 AM + +--- + +## Summary + +Implemented headless broker reliability fixes: no-dashboard startup now detaches by default with a foreground escape hatch, status supports --wait-for polling, orchestration skills now recommend detached startup/readiness polling, and docs/BROKER_HEADLESS_RELIABILITY.md captures the corrected current-main plan and verification. + +**Approach:** Standard approach + +--- + +## Key Decisions + +### Treat docs/BROKER_HEADLESS_RELIABILITY.md as an issue brief and implement the missing reliability fixes +- **Chose:** Treat docs/BROKER_HEADLESS_RELIABILITY.md as an issue brief and implement the missing reliability fixes +- **Reasoning:** The file was untracked in the original checkout and absent from latest main, so I copied its intent into the new worktree, corrected stale file references, and fixed the high-impact no-dashboard/status gaps in current main. + +--- + +## Chapters + +### 1. Work +*Agent: default* + +- Treat docs/BROKER_HEADLESS_RELIABILITY.md as an issue brief and implement the missing reliability fixes: Treat docs/BROKER_HEADLESS_RELIABILITY.md as an issue brief and implement the missing reliability fixes +- Core reliability patch is implemented, focused tests and TypeScript pass, remaining work is final trajectory completion and optional commit/stage if requested diff --git a/.trajectories/index.json b/.trajectories/index.json index 807cf704c..bb1005713 100644 --- a/.trajectories/index.json +++ b/.trajectories/index.json @@ -1,6 +1,6 @@ { "version": 1, - "lastUpdated": "2026-05-13T11:00:43.267Z", + "lastUpdated": "2026-05-15T11:11:52.427Z", "trajectories": { "traj_1775914133873_35667beb": { "title": "fix-sdk-build-resolution-workflow", @@ -402,6 +402,55 @@ "startedAt": "2026-05-13T10:57:02.796Z", "completedAt": "2026-05-13T11:00:43.100Z", "path": "/Users/khaliqgant/Projects/AgentWorkforce/relay/.trajectories/completed/2026-05/traj_whd40oxptlhn.json" + }, + "traj_v1wexlfur5zr": { + "title": "Fix broker headless reliability doc", + "status": "completed", + "startedAt": "2026-05-15T09:04:51.316Z", + "completedAt": "2026-05-15T09:13:50.970Z", + "path": "/Users/khaliqgant/Projects/AgentWorkforce/relay-broker-headless-reliability-doc/.trajectories/completed/2026-05/traj_v1wexlfur5zr.json" + }, + "traj_6sjeohtm3php": { + "title": "Address broker headless reliability review findings", + "status": "completed", + "startedAt": "2026-05-15T09:30:56.316Z", + "completedAt": "2026-05-15T09:32:47.870Z", + "path": "/Users/khaliqgant/Projects/AgentWorkforce/relay-broker-headless-reliability-doc/.trajectories/completed/2026-05/traj_6sjeohtm3php.json" + }, + "traj_4vucir4qvqa2": { + "title": "Harden headless broker readiness semantics", + "status": "completed", + "startedAt": "2026-05-15T09:46:07.617Z", + "completedAt": "2026-05-15T09:59:00.460Z", + "path": "/Users/khaliqgant/Projects/AgentWorkforce/relay-broker-headless-reliability-doc/.trajectories/completed/2026-05/traj_4vucir4qvqa2.json" + }, + "traj_erzd7j9nto9r": { + "title": "Strict review and PR prep for headless broker readiness", + "status": "completed", + "startedAt": "2026-05-15T10:02:10.164Z", + "completedAt": "2026-05-15T10:06:38.127Z", + "path": "/Users/khaliqgant/Projects/AgentWorkforce/relay-broker-headless-reliability-doc/.trajectories/completed/2026-05/traj_erzd7j9nto9r.json" + }, + "traj_7uznwzoxbao6": { + "title": "Fix standalone detached headless startup", + "status": "completed", + "startedAt": "2026-05-15T10:18:46.273Z", + "completedAt": "2026-05-15T10:25:00.598Z", + "path": "/Users/khaliqgant/Projects/AgentWorkforce/relay-broker-headless-reliability-doc/.trajectories/completed/2026-05/traj_7uznwzoxbao6.json" + }, + "traj_9fdv7hxm0b60": { + "title": "Strict standalone smoke follow-up", + "status": "completed", + "startedAt": "2026-05-15T10:37:17.693Z", + "completedAt": "2026-05-15T10:43:11.587Z", + "path": "/Users/khaliqgant/Projects/AgentWorkforce/relay-broker-headless-reliability-doc/.trajectories/completed/2026-05/traj_9fdv7hxm0b60.json" + }, + "traj_0o6gb2wvk59t": { + "title": "Fresh end-to-end validation for headless readiness", + "status": "completed", + "startedAt": "2026-05-15T10:55:49.188Z", + "completedAt": "2026-05-15T11:11:52.324Z", + "path": "/Users/khaliqgant/Projects/AgentWorkforce/relay-broker-headless-reliability-doc/.trajectories/completed/2026-05/traj_0o6gb2wvk59t.json" } } -} +} \ No newline at end of file diff --git a/src/cli/commands/core.test.ts b/src/cli/commands/core.test.ts index 0eba8cf8e..147ab417a 100644 --- a/src/cli/commands/core.test.ts +++ b/src/cli/commands/core.test.ts @@ -1,5 +1,5 @@ import { Command } from 'commander'; -import { describe, expect, it, vi } from 'vitest'; +import { beforeEach, describe, expect, it, vi } from 'vitest'; const sdkStatusClient = { getStatus: vi.fn(async () => ({ agent_count: 0, pending_delivery_count: 0 })), @@ -11,6 +11,14 @@ vi.mock('@agent-relay/sdk', () => ({ AgentRelayClient: vi.fn().mockImplementation(() => sdkStatusClient), })); +beforeEach(() => { + sdkStatusClient.getStatus.mockReset(); + sdkStatusClient.getStatus.mockResolvedValue({ agent_count: 0, pending_delivery_count: 0 }); + sdkStatusClient.getSession.mockReset(); + sdkStatusClient.getSession.mockResolvedValue({ workspace_key: '' }); + sdkStatusClient.disconnect.mockClear(); +}); + import { registerCoreCommands, type BridgeProject, @@ -77,6 +85,7 @@ function createFsMock(initialFiles: Record = {}): CoreFileSystem }; } +// eslint-disable-next-line complexity function createHarness(options?: { fs?: CoreFileSystem; relay?: CoreRelay; @@ -90,6 +99,11 @@ function createHarness(options?: { spawnedProcess?: SpawnedProcess; spawnImpl?: CoreDependencies['spawnProcess']; killImpl?: CoreDependencies['killProcess']; + nowImpl?: CoreDependencies['now']; + sleepImpl?: CoreDependencies['sleep']; + execPath?: string; + cliScript?: string; + argv?: string[]; checkForUpdatesResult?: Awaited>; }) { const projectRoot = '/tmp/project'; @@ -134,14 +148,14 @@ function createHarness(options?: { ) as unknown as CoreDependencies['checkForUpdates'], getVersion: vi.fn(() => '1.2.3'), env: options?.env ?? {}, - argv: ['node', '/tmp/agent-relay.js', 'up'], - execPath: '/usr/bin/node', - cliScript: '/tmp/agent-relay.js', + argv: options?.argv ?? ['node', '/tmp/agent-relay.js', 'up'], + execPath: options?.execPath ?? '/usr/bin/node', + cliScript: options?.cliScript ?? '/tmp/agent-relay.js', pid: 4242, - now: vi.fn(() => Date.now()), + now: options?.nowImpl ?? vi.fn(() => Date.now()), isPortInUse: vi.fn(async () => false), findBrokerApiPort: vi.fn(async () => 3889), - sleep: vi.fn(async () => undefined), + sleep: options?.sleepImpl ?? vi.fn(async () => undefined), onSignal: vi.fn(() => undefined), holdOpen: vi.fn(async () => undefined), log: vi.fn(() => undefined), @@ -373,7 +387,7 @@ describe('registerCoreCommands', () => { }, }); - const exitCode = await runCommand(program, ['up', '--no-dashboard']); + const exitCode = await runCommand(program, ['up', '--no-dashboard', '--foreground']); expect(exitCode).toBeUndefined(); expect(relay.spawn).toHaveBeenCalledWith({ @@ -438,7 +452,7 @@ describe('registerCoreCommands', () => { const relay = createRelayMock(); const { program, deps } = createHarness({ relay }); - const exitCode = await runCommand(program, ['up', '--no-dashboard', '--port', '3888']); + const exitCode = await runCommand(program, ['up', '--no-dashboard', '--foreground', '--port', '3888']); expect(exitCode).toBeUndefined(); expect(deps.createRelay).toHaveBeenCalledTimes(1); @@ -446,6 +460,222 @@ describe('registerCoreCommands', () => { expect(relay.getStatus).toHaveBeenCalledTimes(1); }); + it('up --no-dashboard detaches by default for headless sessions', async () => { + const spawnedProcess = createSpawnedProcessMock(); + let now = 0; + const fs = createFsMock(); + const sleepImpl = vi.fn(async (ms: number) => { + now += ms; + fs.writeFileSync('/tmp/project/.agent-relay/connection.json', connectionFile(4242)); + }); + const killImpl = vi.fn((pid: number, signal?: NodeJS.Signals | number) => { + if ((pid === 9001 || pid === 4242) && signal === 0) return; + throw new Error('unexpected kill check'); + }); + const { program, deps, relay } = createHarness({ + fs, + spawnedProcess, + killImpl, + nowImpl: vi.fn(() => now), + sleepImpl, + }); + + const exitCode = await runCommand(program, ['up', '--no-dashboard']); + + expect(exitCode).toBe(0); + expect(deps.spawnProcess).toHaveBeenCalledWith( + '/usr/bin/node', + ['/tmp/agent-relay.js', 'up', '--no-dashboard', '--foreground'], + { + detached: true, + stdio: 'ignore', + env: deps.env, + } + ); + expect(spawnedProcess.unref).toHaveBeenCalled(); + expect(sleepImpl).toHaveBeenCalledWith(500); + expect(sdkStatusClient.getStatus).toHaveBeenCalledTimes(1); + expect(deps.log).toHaveBeenCalledWith('Broker started.'); + expect(deps.log).toHaveBeenCalledWith('Broker PID: 4242'); + expect(deps.log).toHaveBeenCalledWith('Stop with: agent-relay down'); + expect(relay.getStatus).not.toHaveBeenCalled(); + }); + + it('up --background --no-dashboard preserves state and workspace args in the foreground child', async () => { + const spawnedProcess = createSpawnedProcessMock(); + let now = 0; + const fs = createFsMock(); + const stateDir = '/tmp/custom-agent-relay-state'; + const sleepImpl = vi.fn(async (ms: number) => { + now += ms; + fs.writeFileSync(`${stateDir}/connection.json`, connectionFile(5151)); + }); + const killImpl = vi.fn((pid: number, signal?: NodeJS.Signals | number) => { + if ((pid === 9001 || pid === 5151) && signal === 0) return; + throw new Error('unexpected kill check'); + }); + const { program, deps } = createHarness({ + fs, + spawnedProcess, + killImpl, + nowImpl: vi.fn(() => now), + sleepImpl, + }); + deps.argv = [ + 'node', + '/tmp/agent-relay.js', + 'up', + '--background', + '--no-dashboard', + '--state-dir', + stateDir, + '--workspace-key', + 'rk_live_custom', + ]; + + const exitCode = await runCommand(program, [ + 'up', + '--background', + '--no-dashboard', + '--state-dir', + stateDir, + '--workspace-key', + 'rk_live_custom', + ]); + + expect(exitCode).toBe(0); + expect(deps.spawnProcess).toHaveBeenCalledWith( + '/usr/bin/node', + [ + '/tmp/agent-relay.js', + 'up', + '--no-dashboard', + '--state-dir', + stateDir, + '--workspace-key', + 'rk_live_custom', + '--foreground', + ], + { + detached: true, + stdio: 'ignore', + env: deps.env, + } + ); + expect(deps.env.AGENT_RELAY_STATE_DIR).toBe(stateDir); + expect(deps.log).toHaveBeenCalledWith('Broker started.'); + expect(deps.log).toHaveBeenCalledWith('Broker PID: 5151'); + }); + + it('up --no-dashboard re-execs a Bun standalone binary without adding its virtual entrypoint', async () => { + const spawnedProcess = createSpawnedProcessMock(); + let now = 0; + const fs = createFsMock(); + const sleepImpl = vi.fn(async (ms: number) => { + now += ms; + fs.writeFileSync('/tmp/project/.agent-relay/connection.json', connectionFile(4242)); + }); + const killImpl = vi.fn((pid: number, signal?: NodeJS.Signals | number) => { + if ((pid === 9001 || pid === 4242) && signal === 0) return; + throw new Error('unexpected kill check'); + }); + const { program, deps } = createHarness({ + fs, + spawnedProcess, + killImpl, + nowImpl: vi.fn(() => now), + sleepImpl, + execPath: '/tmp/agent-relay-darwin-arm64', + cliScript: '/$bunfs/root/agent-relay-darwin-arm64', + argv: ['bun', '/$bunfs/root/agent-relay-darwin-arm64', 'up', '--no-dashboard'], + }); + + const exitCode = await runCommand(program, ['up', '--no-dashboard']); + + expect(exitCode).toBe(0); + expect(deps.spawnProcess).toHaveBeenCalledWith( + '/tmp/agent-relay-darwin-arm64', + ['up', '--no-dashboard', '--foreground'], + { + detached: true, + stdio: 'ignore', + env: deps.env, + } + ); + }); + + it('up --no-dashboard exits non-zero when the detached broker never becomes ready', async () => { + const spawnedProcess = createSpawnedProcessMock(); + let now = 0; + const sleepImpl = vi.fn(async (ms: number) => { + now += ms; + }); + const killImpl = vi.fn((pid: number, signal?: NodeJS.Signals | number) => { + if (pid === 9001 && signal === 0) return; + throw new Error('unexpected kill check'); + }); + const { program, deps } = createHarness({ + spawnedProcess, + killImpl, + nowImpl: vi.fn(() => now), + sleepImpl, + }); + + const exitCode = await runCommand(program, ['up', '--no-dashboard']); + + expect(exitCode).toBe(1); + expect(deps.error).toHaveBeenCalledWith( + 'Broker background start did not become ready within 10s (pid: 9001).' + ); + expect(deps.error).toHaveBeenCalledWith( + 'Run `agent-relay status --wait-for=10` for details, or `agent-relay down --force` to clean up.' + ); + expect(deps.log).not.toHaveBeenCalledWith('Broker started.'); + }); + + it('up --no-dashboard reports the broker PID when the detached broker is live but API-unready', async () => { + const spawnedProcess = createSpawnedProcessMock({ pid: 9001 }); + let now = 0; + const fs = createFsMock({ ['/tmp/project/.agent-relay/connection.json']: connectionFile(4242) }); + const sleepImpl = vi.fn(async (ms: number) => { + now += ms; + }); + const killImpl = vi.fn((pid: number, signal?: NodeJS.Signals | number) => { + if ((pid === 9001 || pid === 4242) && signal === 0) return; + throw new Error('unexpected kill check'); + }); + sdkStatusClient.getStatus.mockRejectedValue(new Error('503 Service Unavailable')); + const { program, deps } = createHarness({ + fs, + spawnedProcess, + killImpl, + nowImpl: vi.fn(() => now), + sleepImpl, + }); + + const exitCode = await runCommand(program, ['up', '--no-dashboard']); + + expect(exitCode).toBe(1); + expect(deps.error).toHaveBeenCalledWith( + 'Broker background start did not become ready within 10s (pid: 4242).' + ); + expect(deps.error).toHaveBeenCalledWith('Broker process is running, but the API did not become ready.'); + }); + + it('up --no-dashboard reports spawn failures without claiming background success', async () => { + const { program, deps } = createHarness({ + spawnImpl: vi.fn(() => { + throw new Error('spawn EACCES'); + }) as unknown as CoreDependencies['spawnProcess'], + }); + + const exitCode = await runCommand(program, ['up', '--no-dashboard']); + + expect(exitCode).toBe(1); + expect(deps.error).toHaveBeenCalledWith('Failed to start broker in background: spawn EACCES'); + expect(deps.log).not.toHaveBeenCalledWith('Broker started.'); + }); + it('up force exits on repeated SIGINT during hung shutdown and suppresses expected dashboard signal noise', async () => { const relay = createRelayMock({ shutdown: vi.fn(() => new Promise(() => undefined)), @@ -596,6 +826,113 @@ describe('registerCoreCommands', () => { expect(deps.log).toHaveBeenCalledWith('Status: STOPPED'); }); + it('status --wait-for polls until broker connection metadata appears', async () => { + let now = 0; + const fs = createFsMock(); + const sleepImpl = vi.fn(async (ms: number) => { + now += ms; + fs.writeFileSync('/tmp/project/.agent-relay/connection.json', connectionFile(4242)); + }); + const killImpl = vi.fn((pid: number, signal?: NodeJS.Signals | number) => { + if (pid === 4242 && signal === 0) return; + throw new Error('unexpected kill check'); + }); + const { program, deps } = createHarness({ + fs, + killImpl, + nowImpl: vi.fn(() => now), + sleepImpl, + }); + + const exitCode = await runCommand(program, ['status', '--wait-for', '1']); + + expect(exitCode).toBeUndefined(); + expect(sleepImpl).toHaveBeenCalledWith(500); + expect(deps.log).toHaveBeenCalledWith('Status: RUNNING'); + expect(deps.log).toHaveBeenCalledWith('PID: 4242'); + }); + + it('status --wait-for waits for the broker API after the PID appears', async () => { + let now = 0; + const connectionPath = '/tmp/project/.agent-relay/connection.json'; + const fs = createFsMock({ [connectionPath]: connectionFile(4242) }); + const sleepImpl = vi.fn(async (ms: number) => { + now += ms; + }); + const killImpl = vi.fn((pid: number, signal?: NodeJS.Signals | number) => { + if (pid === 4242 && signal === 0) return; + throw new Error('unexpected kill check'); + }); + sdkStatusClient.getStatus + .mockRejectedValueOnce(new Error('503 Service Unavailable')) + .mockResolvedValueOnce({ agent_count: 1, pending_delivery_count: 0 }); + sdkStatusClient.getSession.mockResolvedValueOnce({ workspace_key: 'rk_live_ready' }); + + const { program, deps } = createHarness({ + fs, + killImpl, + nowImpl: vi.fn(() => now), + sleepImpl, + }); + + const exitCode = await runCommand(program, ['status', '--wait-for', '1']); + + expect(exitCode).toBeUndefined(); + expect(sleepImpl).toHaveBeenCalledWith(500); + expect(sdkStatusClient.getStatus).toHaveBeenCalledTimes(2); + expect(fs.unlinkSync).not.toHaveBeenCalledWith(connectionPath); + expect(deps.log).toHaveBeenCalledWith('Status: RUNNING'); + expect(deps.log).toHaveBeenCalledWith('Agents: 1'); + expect(deps.log).toHaveBeenCalledWith('Workspace Key: rk_live_ready'); + }); + + it('status --wait-for reports STARTING and exits non-zero when the PID is live but the API is unready', async () => { + let now = 0; + const connectionPath = '/tmp/project/.agent-relay/connection.json'; + const fs = createFsMock({ [connectionPath]: connectionFile(4242) }); + const sleepImpl = vi.fn(async (ms: number) => { + now += ms; + }); + const killImpl = vi.fn((pid: number, signal?: NodeJS.Signals | number) => { + if (pid === 4242 && signal === 0) return; + throw new Error('unexpected kill check'); + }); + sdkStatusClient.getStatus.mockRejectedValue(new Error('503 Service Unavailable')); + + const { program, deps } = createHarness({ + fs, + killImpl, + nowImpl: vi.fn(() => now), + sleepImpl, + }); + + const exitCode = await runCommand(program, ['status', '--wait-for', '1']); + + expect(exitCode).toBe(1); + expect(fs.unlinkSync).not.toHaveBeenCalledWith(connectionPath); + expect(deps.log).toHaveBeenCalledWith('Status: STARTING'); + expect(deps.log).toHaveBeenCalledWith('PID: 4242'); + expect(deps.warn).toHaveBeenCalledWith( + 'Broker process is running, but the API did not become ready before timeout.' + ); + }); + + it('status --wait-for exits non-zero when no broker becomes ready before timeout', async () => { + let now = 0; + const sleepImpl = vi.fn(async (ms: number) => { + now += ms; + }); + const { program, deps } = createHarness({ + nowImpl: vi.fn(() => now), + sleepImpl, + }); + + const exitCode = await runCommand(program, ['status', '--wait-for', '1']); + + expect(exitCode).toBe(1); + expect(deps.log).toHaveBeenCalledWith('Status: STOPPED'); + }); + it('version prints current version', async () => { const { program, deps } = createHarness(); @@ -653,7 +990,7 @@ describe('registerCoreCommands', () => { const relay = createRelayMock(); const { program, deps } = createHarness({ relay }); - const exitCode = await runCommand(program, ['up', '--no-dashboard']); + const exitCode = await runCommand(program, ['up', '--no-dashboard', '--foreground']); expect(exitCode).toBeUndefined(); expect(deps.log).toHaveBeenCalledWith('Workspace Key: rk_live_default'); @@ -674,7 +1011,13 @@ describe('registerCoreCommands', () => { const relay = createRelayMock({ workspaceKey: 'rk_live_custom' }); const { program, deps } = createHarness({ relay, env }); - const exitCode = await runCommand(program, ['up', '--no-dashboard', '--workspace-key', 'rk_live_custom']); + const exitCode = await runCommand(program, [ + 'up', + '--no-dashboard', + '--foreground', + '--workspace-key', + 'rk_live_custom', + ]); expect(exitCode).toBeUndefined(); expect(env.RELAY_API_KEY).toBe('rk_live_custom'); @@ -687,7 +1030,7 @@ describe('registerCoreCommands', () => { const relay = createRelayMock(); const { program } = createHarness({ relay, env }); - const exitCode = await runCommand(program, ['up', '--no-dashboard']); + const exitCode = await runCommand(program, ['up', '--no-dashboard', '--foreground']); expect(exitCode).toBeUndefined(); expect(env.RELAY_API_KEY).toBeUndefined(); @@ -699,7 +1042,7 @@ describe('registerCoreCommands', () => { const relay = createRelayMock(); const { program } = createHarness({ relay, env, fs }); - const exitCode = await runCommand(program, ['up', '--no-dashboard']); + const exitCode = await runCommand(program, ['up', '--no-dashboard', '--foreground']); expect(exitCode).toBeUndefined(); expect(env.RELAYCAST_MCP_COMMAND).toBe('/usr/bin/node /tmp/relaycast-mcp.js'); @@ -711,7 +1054,7 @@ describe('registerCoreCommands', () => { const relay = createRelayMock(); const { program } = createHarness({ relay, env, fs }); - const exitCode = await runCommand(program, ['up', '--no-dashboard']); + const exitCode = await runCommand(program, ['up', '--no-dashboard', '--foreground']); expect(exitCode).toBeUndefined(); expect(env.RELAYCAST_MCP_COMMAND).toBe('node /custom/relaycast-mcp.js'); @@ -721,7 +1064,7 @@ describe('registerCoreCommands', () => { const relay = createRelayMock({ workspaceKey: undefined }); const { program, deps } = createHarness({ relay }); - const exitCode = await runCommand(program, ['up', '--no-dashboard']); + const exitCode = await runCommand(program, ['up', '--no-dashboard', '--foreground']); expect(exitCode).toBeUndefined(); expect(deps.log).toHaveBeenCalledWith('Workspace Key: unknown'); @@ -755,7 +1098,13 @@ describe('registerCoreCommands', () => { const relay = createRelayMock({ workspaceKey: 'rk_live_new' }); const { program, deps } = createHarness({ relay, env }); - const exitCode = await runCommand(program, ['up', '--no-dashboard', '--workspace-key', 'rk_live_new']); + const exitCode = await runCommand(program, [ + 'up', + '--no-dashboard', + '--foreground', + '--workspace-key', + 'rk_live_new', + ]); expect(exitCode).toBeUndefined(); expect(env.RELAY_API_KEY).toBe('rk_live_new'); diff --git a/src/cli/commands/core.ts b/src/cli/commands/core.ts index 3fa1ac6ab..d02ca0e78 100644 --- a/src/cli/commands/core.ts +++ b/src/cli/commands/core.ts @@ -411,6 +411,7 @@ export function registerCoreCommands(program: Command, overrides: Partial', 'Use a pre-established Relaycast workspace key') .option('--state-dir ', 'Directory for broker state and connection files (default: .agent-relay/)') @@ -420,6 +421,7 @@ export function registerCoreCommands(program: Command, overrides: Partial', 'Directory for broker state and connection files') - .action(async (options: { stateDir?: string }) => { + .option('--wait-for ', 'Poll for broker readiness for up to this many seconds') + .action(async (options: { stateDir?: string; waitFor?: string }) => { await runStatusCommand(deps, options); }); diff --git a/src/cli/lib/broker-lifecycle.ts b/src/cli/lib/broker-lifecycle.ts index de341cef2..73925d21b 100644 --- a/src/cli/lib/broker-lifecycle.ts +++ b/src/cli/lib/broker-lifecycle.ts @@ -13,6 +13,7 @@ type UpOptions = { port?: string; spawn?: boolean; background?: boolean; + foreground?: boolean; verbose?: boolean; dashboardPath?: string; reuseExistingBroker?: boolean; @@ -33,6 +34,8 @@ const MAX_PORT = 65535; /** The broker writes this file with URL, port, API key, and PID. */ const CONNECTION_FILENAME = 'connection.json'; +const STATUS_POLL_INTERVAL_MS = 500; +const DETACHED_START_READY_TIMEOUT_MS = 10_000; export interface BrokerConnection { url: string; @@ -41,6 +44,25 @@ export interface BrokerConnection { pid: number; } +type BrokerStatusDetails = { + status: Awaited>; + session: Awaited>; +}; + +type BrokerReadiness = + | { + state: 'running'; + conn: BrokerConnection; + statusDetails?: BrokerStatusDetails | null; + } + | { + state: 'starting'; + conn: BrokerConnection; + } + | { + state: 'stopped'; + }; + type BrokerConnectionReader = { readFileSync: (filePath: string, encoding: BufferEncoding) => string; }; @@ -152,11 +174,7 @@ export function classifyBrokerStartError(err: unknown): string { } /** Exported for testing. */ -export function classifyBrokerStartStage( - err: unknown, - message: string, - wantsDashboard: boolean -): string { +export function classifyBrokerStartStage(err: unknown, message: string, wantsDashboard: boolean): string { if (errorCode(err) === 'EADDRINUSE' && wantsDashboard) return 'dashboard_port'; if (isBrokerAlreadyRunningError(message)) return 'already_running'; if (/fetch failed/i.test(message)) return 'connect'; @@ -372,6 +390,121 @@ function cleanupBrokerFiles(paths: CoreProjectPaths, deps: CoreDependencies): vo } } +function childUpArgsForDetachedStart(options: UpOptions, deps: CoreDependencies): string[] { + const args = cliUserArgs(deps) + .filter((arg) => !['--background', '--foreground'].some((name) => matchesCliOption(arg, name))); + if (options.dashboard === false && !args.includes('--no-dashboard')) { + args.push('--no-dashboard'); + } + if (options.stateDir && !hasCliOption(args, '--state-dir')) { + args.push('--state-dir', path.resolve(options.stateDir)); + } + if (options.workspaceKey && !hasCliOption(args, '--workspace-key')) { + args.push('--workspace-key', options.workspaceKey); + } + if (options.verbose === true && !args.includes('--verbose')) { + args.push('--verbose'); + } + if (options.dashboard === false && !args.includes('--foreground')) { + args.push('--foreground'); + } + return args; +} + +function cliUserArgs(deps: CoreDependencies): string[] { + return hasEntrypointArgvSlot(deps) ? deps.argv.slice(2) : deps.argv.slice(1); +} + +function detachedCliInvocation( + deps: CoreDependencies, + args: string[] +): { command: string; args: string[] } { + if (shouldReexecThroughScript(deps)) { + return { command: deps.execPath, args: [deps.cliScript, ...args] }; + } + return { command: deps.execPath, args }; +} + +function hasEntrypointArgvSlot(deps: CoreDependencies): boolean { + return isBundledBunExecutableEntrypoint(deps) || isCliScriptEntrypoint(deps); +} + +function shouldReexecThroughScript(deps: CoreDependencies): boolean { + return isCliScriptEntrypoint(deps) && !sameCliPath(deps.execPath, deps.cliScript); +} + +function isCliScriptEntrypoint(deps: CoreDependencies): boolean { + const cliScript = deps.cliScript.trim(); + if (!cliScript) { + return false; + } + if (isBundledBunExecutableEntrypoint(deps)) { + return false; + } + if (sameCliPath(deps.execPath, cliScript)) { + return true; + } + return path.isAbsolute(cliScript) || cliScript.includes('/') || cliScript.includes('\\') || /\.[cm]?js$/i.test(cliScript); +} + +function isBundledBunExecutableEntrypoint(deps: CoreDependencies): boolean { + // Bun --compile exposes argv[1] as a virtual path for the embedded executable. + return deps.argv[0] === 'bun' && deps.cliScript.startsWith('/$bunfs/root/'); +} + +function sameCliPath(left: string, right: string): boolean { + return path.resolve(left) === path.resolve(right); +} + +function hasCliOption(args: string[], name: string): boolean { + return args.some((arg) => matchesCliOption(arg, name)); +} + +function matchesCliOption(arg: string, name: string): boolean { + return arg === name || arg.startsWith(`${name}=`); +} + +async function checkBrokerReadiness( + paths: CoreProjectPaths, + deps: CoreDependencies, + requireApi: boolean +): Promise { + const conn = readBrokerConnectionFromFs(deps.fs, paths.dataDir); + if (!conn || conn.pid <= 0) { + return { state: 'stopped' }; + } + if (!isProcessRunning(conn.pid, deps)) { + safeUnlink(path.join(paths.dataDir, CONNECTION_FILENAME), deps); + return { state: 'stopped' }; + } + if (!requireApi) { + return { state: 'running', conn }; + } + + const statusDetails = await readBrokerStatusDetails(conn); + if (statusDetails) { + return { state: 'running', conn, statusDetails }; + } + return { state: 'starting', conn }; +} + +async function waitForBrokerReadiness( + paths: CoreProjectPaths, + deps: CoreDependencies, + waitMs: number, + requireApi: boolean +): Promise { + const deadline = deps.now() + waitMs; + let latest = await checkBrokerReadiness(paths, deps, requireApi); + + while (latest.state !== 'running' && waitMs > 0 && deps.now() < deadline) { + await deps.sleep(Math.min(STATUS_POLL_INTERVAL_MS, Math.max(0, deadline - deps.now()))); + latest = await checkBrokerReadiness(paths, deps, requireApi); + } + + return latest; +} + function pickDashboardStaticDir(candidates: string[], deps: CoreDependencies): string | null { const existingCandidates = Array.from(new Set(candidates)).filter((candidate) => deps.fs.existsSync(candidate) @@ -925,20 +1058,6 @@ async function shutdownUpResources( export async function runUpCommand(options: UpOptions, deps: CoreDependencies): Promise { ensureBundledRelaycastMcpCommand(deps); - if (options.background) { - const args = deps.argv.slice(2).filter((arg) => arg !== '--background'); - const child = deps.spawnProcess(deps.execPath, [deps.cliScript, ...args], { - detached: true, - stdio: 'ignore', - env: deps.env, - }); - child.unref?.(); - deps.log(`Broker started in background (pid: ${child.pid ?? 'unknown'})`); - deps.log('Stop with: agent-relay down'); - deps.exit(0); - return; - } - const paths = deps.getProjectPaths(); // --state-dir overrides where the broker writes state / connection files if (options.stateDir) { @@ -946,6 +1065,45 @@ export async function runUpCommand(options: UpOptions, deps: CoreDependencies): paths.dataDir = resolved; deps.env.AGENT_RELAY_STATE_DIR = resolved; } + + if (options.background || (options.dashboard === false && !options.foreground)) { + const args = childUpArgsForDetachedStart(options, deps); + const invocation = detachedCliInvocation(deps, args); + let child: SpawnedProcess; + try { + child = deps.spawnProcess(invocation.command, invocation.args, { + detached: true, + stdio: 'ignore', + env: deps.env, + }); + } catch (err: unknown) { + deps.error(`Failed to start broker in background: ${describeError(err)}`); + deps.exit(1); + return; + } + child.unref?.(); + const readiness = await waitForBrokerReadiness(paths, deps, DETACHED_START_READY_TIMEOUT_MS, true); + if (readiness.state !== 'running') { + const pid = readiness.state === 'starting' ? readiness.conn.pid : child.pid; + deps.error( + pid + ? `Broker background start did not become ready within ${DETACHED_START_READY_TIMEOUT_MS / 1000}s (pid: ${pid}).` + : `Broker background start did not become ready within ${DETACHED_START_READY_TIMEOUT_MS / 1000}s.` + ); + if (readiness.state === 'starting') { + deps.error('Broker process is running, but the API did not become ready.'); + } + deps.error('Run `agent-relay status --wait-for=10` for details, or `agent-relay down --force` to clean up.'); + deps.exit(1); + return; + } + deps.log('Broker started.'); + deps.log(`Broker PID: ${readiness.conn.pid}`); + deps.log('Stop with: agent-relay down'); + deps.exit(0); + return; + } + const wantsDashboard = options.dashboard !== false; const requestedDashboardPort = Number.parseInt(options.port ?? '3888', 10) || 3888; const shouldReuseExistingBroker = options.reuseExistingBroker === true; @@ -1342,56 +1500,66 @@ export async function runDownCommand(options: DownOptions, deps: CoreDependencie export async function runStatusCommand( deps: CoreDependencies, - options?: { stateDir?: string } + options?: { stateDir?: string; waitFor?: string } ): Promise { const paths = deps.getProjectPaths(); if (options?.stateDir) { paths.dataDir = path.resolve(options.stateDir); } - const conn = readBrokerConnectionFromFs(deps.fs, paths.dataDir); - - let running = false; - let brokerPid: number | undefined; + const waitSeconds = Number.parseFloat(options?.waitFor ?? '0'); + const waitMs = Number.isFinite(waitSeconds) && waitSeconds > 0 ? waitSeconds * 1000 : 0; - if (conn && conn.pid > 0 && isProcessRunning(conn.pid, deps)) { - brokerPid = conn.pid; - running = true; - } else if (conn) { - // Connection file exists but process is dead — clean up - safeUnlink(path.join(paths.dataDir, CONNECTION_FILENAME), deps); + const readiness = await waitForBrokerReadiness(paths, deps, waitMs, waitMs > 0); + if (readiness.state === 'stopped') { + deps.log('Status: STOPPED'); + if (waitMs > 0) { + deps.exit(1); + } + return; } - if (!running) { - deps.log('Status: STOPPED'); + if (readiness.state === 'starting') { + deps.log('Status: STARTING'); + deps.log('Mode: broker (stdio)'); + deps.log(`PID: ${readiness.conn.pid}`); + deps.log(`Project: ${paths.projectRoot}`); + deps.warn('Broker process is running, but the API did not become ready before timeout.'); + deps.exit(1); return; } deps.log('Status: RUNNING'); deps.log('Mode: broker (stdio)'); - deps.log(`PID: ${brokerPid}`); + deps.log(`PID: ${readiness.conn.pid}`); deps.log(`Project: ${paths.projectRoot}`); // Query the running broker for additional status info - const connInfo = readBrokerConnectionFromFs(deps.fs, paths.dataDir); - if (connInfo) { - const client = new AgentRelayClient({ baseUrl: connInfo.url, apiKey: connInfo.api_key }); - try { - const status = await client.getStatus(); - if (typeof status.agent_count === 'number') { - deps.log(`Agents: ${status.agent_count}`); - } - if (typeof status.pending_delivery_count === 'number' && status.pending_delivery_count > 0) { - deps.log(`Pending deliveries: ${status.pending_delivery_count}`); - } - const session = await client.getSession(); - if (session.workspace_key) { - deps.log(`Workspace Key: ${session.workspace_key}`); - deps.log(`Observer: https://agentrelay.com/observer?key=${session.workspace_key}`); - } - } catch { - // PID-based status is enough when broker query fails. - } finally { - client.disconnect(); + const statusDetails = + readiness.statusDetails ?? (waitMs > 0 ? null : await readBrokerStatusDetails(readiness.conn)); + if (statusDetails) { + const { status, session } = statusDetails; + if (typeof status.agent_count === 'number') { + deps.log(`Agents: ${status.agent_count}`); } + if (typeof status.pending_delivery_count === 'number' && status.pending_delivery_count > 0) { + deps.log(`Pending deliveries: ${status.pending_delivery_count}`); + } + if (session.workspace_key) { + deps.log(`Workspace Key: ${session.workspace_key}`); + deps.log(`Observer: https://agentrelay.com/observer?key=${session.workspace_key}`); + } + } +} + +async function readBrokerStatusDetails(conn: BrokerConnection): Promise { + const client = new AgentRelayClient({ baseUrl: conn.url, apiKey: conn.api_key }); + try { + const status = await client.getStatus(); + const session = await client.getSession(); + return { status, session }; + } catch { + return null; + } finally { + client.disconnect(); } } diff --git a/tests/integration/broker/utils/broker-harness.ts b/tests/integration/broker/utils/broker-harness.ts index db18e95fc..24be33dc6 100644 --- a/tests/integration/broker/utils/broker-harness.ts +++ b/tests/integration/broker/utils/broker-harness.ts @@ -36,6 +36,9 @@ export async function ensureApiKey(): Promise { } const ws = await RelayCast.createWorkspace(`test-${Date.now().toString(36)}`); const apiKey = ws.apiKey; + if (!apiKey) { + throw new Error('Relaycast workspace did not return an API key'); + } _cachedApiKey = apiKey; process.env.RELAY_API_KEY = apiKey; return apiKey; From fc4344a9468a8fabc4a2d48bf8f3e7e5609dcfaa Mon Sep 17 00:00:00 2001 From: Proactive Runtime Bot Date: Fri, 15 May 2026 13:52:57 +0200 Subject: [PATCH 2/3] Fix broker orphan cleanup matching --- .../completed/2026-05/traj_4chzkm724ufo.json | 76 ++++++++++++ .../completed/2026-05/traj_4chzkm724ufo.md | 40 ++++++ .trajectories/index.json | 11 +- src/cli/commands/core.test.ts | 26 +++- src/cli/lib/broker-lifecycle.ts | 115 ++++++++++++------ 5 files changed, 231 insertions(+), 37 deletions(-) create mode 100644 .trajectories/completed/2026-05/traj_4chzkm724ufo.json create mode 100644 .trajectories/completed/2026-05/traj_4chzkm724ufo.md diff --git a/.trajectories/completed/2026-05/traj_4chzkm724ufo.json b/.trajectories/completed/2026-05/traj_4chzkm724ufo.json new file mode 100644 index 000000000..b091851bb --- /dev/null +++ b/.trajectories/completed/2026-05/traj_4chzkm724ufo.json @@ -0,0 +1,76 @@ +{ + "id": "traj_4chzkm724ufo", + "version": 1, + "task": { + "title": "Fix headless orchestrator worktree CLI E2E issues" + }, + "status": "completed", + "startedAt": "2026-05-15T11:44:28.338Z", + "completedAt": "2026-05-15T11:51:05.319Z", + "agents": [ + { + "name": "default", + "role": "lead", + "joinedAt": "2026-05-15T11:44:28.513Z" + } + ], + "chapters": [ + { + "id": "chap_4065ik4d57ah", + "title": "Work", + "agentName": "default", + "startedAt": "2026-05-15T11:44:28.513Z", + "endedAt": "2026-05-15T11:51:05.319Z", + "events": [ + { + "ts": 1778845468515, + "type": "decision", + "content": "Investigate failed worktree E2E before editing: Investigate failed worktree E2E before editing", + "raw": { + "question": "Investigate failed worktree E2E before editing", + "chosen": "Investigate failed worktree E2E before editing", + "alternatives": [], + "reasoning": "The documented skill command failed in a real skills PR worktree with stale connection metadata and STOPPED status; need distinguish environmental port collision from CLI/doc defects before changing guidance." + }, + "significance": "high" + }, + { + "ts": 1778845739665, + "type": "decision", + "content": "Fixed orphan cleanup process matching: Fixed orphan cleanup process matching", + "raw": { + "question": "Fixed orphan cleanup process matching", + "chosen": "Fixed orphan cleanup process matching", + "alternatives": [], + "reasoning": "The worktree E2E harness was killed by agent-relay down --force because orphan cleanup grepped ps output for projectRoot and agent-relay-broker anywhere in the command line; shell wrappers can contain both strings without being broker processes. The fix parses ps output and only targets commands whose executable basename is agent-relay-broker." + }, + "significance": "high" + }, + { + "ts": 1778845804918, + "type": "reflection", + "content": "Worktree E2E now passes after fixing orphan cleanup false-positive process matching. The CLI scenario leaves .agent-relay/ and .mcp.json as expected runtime artifacts, so verification worktrees need cleanup before status checks.", + "raw": { + "focalPoints": ["worktree-e2e", "orphan-cleanup", "runtime-artifacts"], + "confidence": 0.85 + }, + "significance": "high", + "tags": ["focal:worktree-e2e", "focal:orphan-cleanup", "focal:runtime-artifacts", "confidence:0.85"] + } + ] + } + ], + "retrospective": { + "summary": "Fixed a headless CLI worktree E2E failure where orphan cleanup could match and terminate the caller shell when its command line contained both the worktree path and agent-relay-broker. Added robust ps parsing so down/up orphan cleanup only targets actual agent-relay-broker executables, added a regression test, refreshed build, reran the skills PR worktree CLI lifecycle end to end, and documented cleanup of generated .agent-relay/.mcp.json artifacts in the headless orchestrator skill.", + "approach": "Standard approach", + "confidence": 0.9 + }, + "commits": [], + "filesChanged": [], + "projectId": "/Users/khaliqgant/Projects/AgentWorkforce/relay-broker-headless-reliability-doc", + "tags": [], + "_trace": { + "startRef": "4da9b3491972b448a1c5d482371247157b458c2b", + "endRef": "4da9b3491972b448a1c5d482371247157b458c2b" + } +} diff --git a/.trajectories/completed/2026-05/traj_4chzkm724ufo.md b/.trajectories/completed/2026-05/traj_4chzkm724ufo.md new file mode 100644 index 000000000..e72d5c956 --- /dev/null +++ b/.trajectories/completed/2026-05/traj_4chzkm724ufo.md @@ -0,0 +1,40 @@ +# Trajectory: Fix headless orchestrator worktree CLI E2E issues + +> **Status:** ✅ Completed +> **Confidence:** 90% +> **Started:** May 15, 2026 at 01:44 PM +> **Completed:** May 15, 2026 at 01:51 PM + +--- + +## Summary + +Fixed a headless CLI worktree E2E failure where orphan cleanup could match and terminate the caller shell when its command line contained both the worktree path and agent-relay-broker. Added robust ps parsing so down/up orphan cleanup only targets actual agent-relay-broker executables, added a regression test, refreshed build, reran the skills PR worktree CLI lifecycle end to end, and documented cleanup of generated .agent-relay/.mcp.json artifacts in the headless orchestrator skill. + +**Approach:** Standard approach + +--- + +## Key Decisions + +### Investigate failed worktree E2E before editing + +- **Chose:** Investigate failed worktree E2E before editing +- **Reasoning:** The documented skill command failed in a real skills PR worktree with stale connection metadata and STOPPED status; need distinguish environmental port collision from CLI/doc defects before changing guidance. + +### Fixed orphan cleanup process matching + +- **Chose:** Fixed orphan cleanup process matching +- **Reasoning:** The worktree E2E harness was killed by agent-relay down --force because orphan cleanup grepped ps output for projectRoot and agent-relay-broker anywhere in the command line; shell wrappers can contain both strings without being broker processes. The fix parses ps output and only targets commands whose executable basename is agent-relay-broker. + +--- + +## Chapters + +### 1. Work + +_Agent: default_ + +- Investigate failed worktree E2E before editing: Investigate failed worktree E2E before editing +- Fixed orphan cleanup process matching: Fixed orphan cleanup process matching +- Worktree E2E now passes after fixing orphan cleanup false-positive process matching. The CLI scenario leaves .agent-relay/ and .mcp.json as expected runtime artifacts, so verification worktrees need cleanup before status checks. diff --git a/.trajectories/index.json b/.trajectories/index.json index bb1005713..c180da086 100644 --- a/.trajectories/index.json +++ b/.trajectories/index.json @@ -1,6 +1,6 @@ { "version": 1, - "lastUpdated": "2026-05-15T11:11:52.427Z", + "lastUpdated": "2026-05-15T11:51:05.433Z", "trajectories": { "traj_1775914133873_35667beb": { "title": "fix-sdk-build-resolution-workflow", @@ -451,6 +451,13 @@ "startedAt": "2026-05-15T10:55:49.188Z", "completedAt": "2026-05-15T11:11:52.324Z", "path": "/Users/khaliqgant/Projects/AgentWorkforce/relay-broker-headless-reliability-doc/.trajectories/completed/2026-05/traj_0o6gb2wvk59t.json" + }, + "traj_4chzkm724ufo": { + "title": "Fix headless orchestrator worktree CLI E2E issues", + "status": "completed", + "startedAt": "2026-05-15T11:44:28.338Z", + "completedAt": "2026-05-15T11:51:05.319Z", + "path": "/Users/khaliqgant/Projects/AgentWorkforce/relay-broker-headless-reliability-doc/.trajectories/completed/2026-05/traj_4chzkm724ufo.json" } } -} \ No newline at end of file +} diff --git a/src/cli/commands/core.test.ts b/src/cli/commands/core.test.ts index 147ab417a..696eaeede 100644 --- a/src/cli/commands/core.test.ts +++ b/src/cli/commands/core.test.ts @@ -98,6 +98,7 @@ function createHarness(options?: { missingBridgeProjects?: BridgeProject[]; spawnedProcess?: SpawnedProcess; spawnImpl?: CoreDependencies['spawnProcess']; + execCommand?: CoreDependencies['execCommand']; killImpl?: CoreDependencies['killProcess']; nowImpl?: CoreDependencies['now']; sleepImpl?: CoreDependencies['sleep']; @@ -139,7 +140,7 @@ function createHarness(options?: { findDashboardBinary: vi.fn(() => options?.dashboardBinary ?? '/usr/local/bin/relay-dashboard-server'), spawnProcess: options?.spawnImpl ?? (vi.fn(() => spawnedProcess) as unknown as CoreDependencies['spawnProcess']), - execCommand: vi.fn(async () => ({ stdout: '', stderr: '' })), + execCommand: options?.execCommand ?? vi.fn(async () => ({ stdout: '', stderr: '' })), killProcess: options?.killImpl ?? vi.fn(() => undefined), fs, generateAgentName: vi.fn(() => 'AutoAgent'), @@ -633,6 +634,29 @@ describe('registerCoreCommands', () => { expect(deps.log).not.toHaveBeenCalledWith('Broker started.'); }); + it('down --force only kills actual orphaned broker executables for the project', async () => { + const execCommand = vi.fn(async () => ({ + stdout: [ + 'USER PID %CPU %MEM VSZ RSS TT STAT STARTED TIME COMMAND', + 'khaliqgant 111 0.0 0.0 1 1 ?? S 1:00PM 0:00.01 /bin/zsh -lc BROKER=/tmp/project/target/release/agent-relay-broker node /tmp/agent-relay.js down --force', + 'khaliqgant 222 0.0 0.0 1 1 ?? S 1:00PM 0:00.01 /opt/bin/agent-relay-broker init --name project --channels general --persist', + 'khaliqgant 333 0.0 0.0 1 1 ?? S 1:00PM 0:00.01 /opt/bin/agent-relay-broker init --name other-project --channels general --persist', + ].join('\n'), + stderr: '', + })); + const killImpl = vi.fn(() => undefined); + const { program, deps } = createHarness({ execCommand, killImpl }); + + const exitCode = await runCommand(program, ['down', '--force']); + + expect(exitCode).toBeUndefined(); + expect(killImpl).toHaveBeenCalledWith(222, 'SIGTERM'); + expect(killImpl).not.toHaveBeenCalledWith(111, 'SIGTERM'); + expect(killImpl).not.toHaveBeenCalledWith(333, 'SIGTERM'); + expect(deps.warn).toHaveBeenCalledWith('Killing orphaned broker process (pid: 222)'); + expect(deps.log).toHaveBeenCalledWith('Cleaned up (was not running)'); + }); + it('up --no-dashboard reports the broker PID when the detached broker is live but API-unready', async () => { const spawnedProcess = createSpawnedProcessMock({ pid: 9001 }); let now = 0; diff --git a/src/cli/lib/broker-lifecycle.ts b/src/cli/lib/broker-lifecycle.ts index 73925d21b..979e869af 100644 --- a/src/cli/lib/broker-lifecycle.ts +++ b/src/cli/lib/broker-lifecycle.ts @@ -291,48 +291,90 @@ function isProcessRunning(pid: number, deps: CoreDependencies): boolean { } } +type ProcessInfo = { + pid: number; + command: string; +}; + +function parsePsAuxLine(line: string): ProcessInfo | null { + const fields = line.trim().split(/\s+/); + if (fields.length < 11 || fields[0] === 'USER') { + return null; + } + const pid = Number.parseInt(fields[1], 10); + if (Number.isNaN(pid) || pid <= 0) { + return null; + } + return { + pid, + command: fields.slice(10).join(' '), + }; +} + +function commandExecutableBasename(command: string): string { + const executable = command.trim().split(/\s+/)[0] ?? ''; + return path.basename(executable.replace(/^["']|["']$/g, '')); +} + +function isBrokerExecutableCommand(command: string): boolean { + const basename = commandExecutableBasename(command); + return basename === 'agent-relay-broker' || basename.startsWith('agent-relay-broker-'); +} + +function escapeRegExp(value: string): string { + return value.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); +} + +function commandHasBrokerName(command: string, brokerName: string): boolean { + const escapedName = escapeRegExp(brokerName); + return new RegExp(`(?:^|\\s)--name(?:\\s+|=)${escapedName}(?:\\s|$)`).test(command); +} + async function killOrphanedBrokerProcesses(projectRoot: string, deps: CoreDependencies): Promise { try { - const shellQuote = (s: string): string => "'" + s.replace(/'/g, "'\\''") + "'"; const brokerName = path.basename(projectRoot) || 'project'; - let stdout = ''; + let candidates: ProcessInfo[] = []; try { - const byName = await deps.execCommand( - `ps aux | grep '[a]gent-relay-broker' | grep -F ${shellQuote('--name ' + brokerName)}` - ); - stdout = byName.stdout; + const byName = await deps.execCommand('ps aux'); + candidates = byName.stdout + .split('\n') + .map(parsePsAuxLine) + .filter((process): process is ProcessInfo => process !== null) + .filter((process) => isBrokerExecutableCommand(process.command)) + .filter((process) => commandHasBrokerName(process.command, brokerName)); } catch { - // Name filter may not match older process invocations; try legacy path-based filter. + // Expected if ps is unavailable; fall through to no matches. } - if (!stdout.trim()) { + if (candidates.length === 0) { try { - const byPath = await deps.execCommand( - `ps aux | grep '[a]gent-relay-broker' | grep -F ${shellQuote(projectRoot)}` - ); - stdout = byPath.stdout; + const byPath = await deps.execCommand('ps aux'); + candidates = byPath.stdout + .split('\n') + .map(parsePsAuxLine) + .filter((process): process is ProcessInfo => process !== null) + .filter((process) => isBrokerExecutableCommand(process.command)) + .filter((process) => process.command.includes(projectRoot)); } catch { - // Expected when no orphaned processes are matched by either strategy. + // Expected if ps is unavailable; fall through to no matches. } } - const lines = stdout.trim().split('\n').filter(Boolean); - for (const line of lines) { - const parts = line.trim().split(/\s+/); - const pid = Number.parseInt(parts[1], 10); - if (!Number.isNaN(pid) && pid > 0 && pid !== deps.pid) { - deps.warn(`Killing orphaned broker process (pid: ${pid})`); - try { - deps.killProcess(pid, 'SIGTERM'); - } catch { - // Process may have already exited. - } + for (const { pid } of candidates) { + if (pid === deps.pid) { + continue; + } + deps.warn(`Killing orphaned broker process (pid: ${pid})`); + try { + deps.killProcess(pid, 'SIGTERM'); + } catch { + // Process may have already exited. } } // Give killed processes a moment to exit. - if (lines.length > 0) { + if (candidates.length > 0) { await deps.sleep(300); } } catch { - // grep returns exit code 1 when no matches found — this is expected. + // Best-effort orphan cleanup. } } @@ -391,8 +433,9 @@ function cleanupBrokerFiles(paths: CoreProjectPaths, deps: CoreDependencies): vo } function childUpArgsForDetachedStart(options: UpOptions, deps: CoreDependencies): string[] { - const args = cliUserArgs(deps) - .filter((arg) => !['--background', '--foreground'].some((name) => matchesCliOption(arg, name))); + const args = cliUserArgs(deps).filter( + (arg) => !['--background', '--foreground'].some((name) => matchesCliOption(arg, name)) + ); if (options.dashboard === false && !args.includes('--no-dashboard')) { args.push('--no-dashboard'); } @@ -415,10 +458,7 @@ function cliUserArgs(deps: CoreDependencies): string[] { return hasEntrypointArgvSlot(deps) ? deps.argv.slice(2) : deps.argv.slice(1); } -function detachedCliInvocation( - deps: CoreDependencies, - args: string[] -): { command: string; args: string[] } { +function detachedCliInvocation(deps: CoreDependencies, args: string[]): { command: string; args: string[] } { if (shouldReexecThroughScript(deps)) { return { command: deps.execPath, args: [deps.cliScript, ...args] }; } @@ -444,7 +484,12 @@ function isCliScriptEntrypoint(deps: CoreDependencies): boolean { if (sameCliPath(deps.execPath, cliScript)) { return true; } - return path.isAbsolute(cliScript) || cliScript.includes('/') || cliScript.includes('\\') || /\.[cm]?js$/i.test(cliScript); + return ( + path.isAbsolute(cliScript) || + cliScript.includes('/') || + cliScript.includes('\\') || + /\.[cm]?js$/i.test(cliScript) + ); } function isBundledBunExecutableEntrypoint(deps: CoreDependencies): boolean { @@ -1093,7 +1138,9 @@ export async function runUpCommand(options: UpOptions, deps: CoreDependencies): if (readiness.state === 'starting') { deps.error('Broker process is running, but the API did not become ready.'); } - deps.error('Run `agent-relay status --wait-for=10` for details, or `agent-relay down --force` to clean up.'); + deps.error( + 'Run `agent-relay status --wait-for=10` for details, or `agent-relay down --force` to clean up.' + ); deps.exit(1); return; } From 5e3424c8ce6b85a1498b638f9139e7857a617d2a Mon Sep 17 00:00:00 2001 From: Proactive Runtime Bot Date: Fri, 15 May 2026 14:16:40 +0200 Subject: [PATCH 3/3] Address headless broker PR feedback --- .../completed/2026-05/traj_f3arvbmmlomn.json | 63 ++++++++++++ .../completed/2026-05/traj_f3arvbmmlomn.md | 34 +++++++ .trajectories/index.json | 33 ++++--- src/cli/commands/core.test.ts | 81 ++++++++++++++-- src/cli/lib/broker-lifecycle.ts | 95 ++++++++++++++----- 5 files changed, 260 insertions(+), 46 deletions(-) create mode 100644 .trajectories/completed/2026-05/traj_f3arvbmmlomn.json create mode 100644 .trajectories/completed/2026-05/traj_f3arvbmmlomn.md diff --git a/.trajectories/completed/2026-05/traj_f3arvbmmlomn.json b/.trajectories/completed/2026-05/traj_f3arvbmmlomn.json new file mode 100644 index 000000000..b8d4f982e --- /dev/null +++ b/.trajectories/completed/2026-05/traj_f3arvbmmlomn.json @@ -0,0 +1,63 @@ +{ + "id": "traj_f3arvbmmlomn", + "version": 1, + "task": { + "title": "Address PR feedback for headless broker reliability" + }, + "status": "completed", + "startedAt": "2026-05-15T12:09:02.122Z", + "completedAt": "2026-05-15T12:15:11.435Z", + "agents": [ + { + "name": "default", + "role": "lead", + "joinedAt": "2026-05-15T12:10:45.107Z" + } + ], + "chapters": [ + { + "id": "chap_7cxvxcv9v11y", + "title": "Work", + "agentName": "default", + "startedAt": "2026-05-15T12:10:45.107Z", + "endedAt": "2026-05-15T12:15:11.435Z", + "events": [ + { + "ts": 1778847045108, + "type": "decision", + "content": "Matched orphan brokers by resolved project path or verified process cwd: Matched orphan brokers by resolved project path or verified process cwd", + "raw": { + "question": "Matched orphan brokers by resolved project path or verified process cwd", + "chosen": "Matched orphan brokers by resolved project path or verified process cwd", + "alternatives": [], + "reasoning": "PR feedback flagged basename and substring matching as unsafe; exact command-path boundaries plus lsof cwd verification preserve cleanup without killing sibling repos or shell harnesses" + }, + "significance": "high" + }, + { + "ts": 1778847282536, + "type": "reflection", + "content": "PR feedback fixes validated with focused CLI tests, full build/test, and live detached broker smoke including orphan cleanup after deleting connection metadata", + "raw": { + "confidence": 0.92 + }, + "significance": "high", + "tags": ["confidence:0.92"] + } + ] + } + ], + "retrospective": { + "summary": "Addressed PR feedback for headless broker reliability: rejected conflicting mode flags, strictly validated status wait durations, made broker readiness depend on status even if session lookup fails, tightened orphan cleanup with project-root command matching plus verified cwd fallback, normalized trajectory index paths, and validated with focused CLI tests, full build/test, lint, diff check, and live detached CLI smoke.", + "approach": "Standard approach", + "confidence": 0.92 + }, + "commits": [], + "filesChanged": [], + "projectId": "/Users/khaliqgant/Projects/AgentWorkforce/relay-broker-headless-reliability-doc", + "tags": [], + "_trace": { + "startRef": "fc4344a9468a8fabc4a2d48bf8f3e7e5609dcfaa", + "endRef": "fc4344a9468a8fabc4a2d48bf8f3e7e5609dcfaa" + } +} diff --git a/.trajectories/completed/2026-05/traj_f3arvbmmlomn.md b/.trajectories/completed/2026-05/traj_f3arvbmmlomn.md new file mode 100644 index 000000000..101c2e935 --- /dev/null +++ b/.trajectories/completed/2026-05/traj_f3arvbmmlomn.md @@ -0,0 +1,34 @@ +# Trajectory: Address PR feedback for headless broker reliability + +> **Status:** ✅ Completed +> **Confidence:** 92% +> **Started:** May 15, 2026 at 02:09 PM +> **Completed:** May 15, 2026 at 02:15 PM + +--- + +## Summary + +Addressed PR feedback for headless broker reliability: rejected conflicting mode flags, strictly validated status wait durations, made broker readiness depend on status even if session lookup fails, tightened orphan cleanup with project-root command matching plus verified cwd fallback, normalized trajectory index paths, and validated with focused CLI tests, full build/test, lint, diff check, and live detached CLI smoke. + +**Approach:** Standard approach + +--- + +## Key Decisions + +### Matched orphan brokers by resolved project path or verified process cwd + +- **Chose:** Matched orphan brokers by resolved project path or verified process cwd +- **Reasoning:** PR feedback flagged basename and substring matching as unsafe; exact command-path boundaries plus lsof cwd verification preserve cleanup without killing sibling repos or shell harnesses + +--- + +## Chapters + +### 1. Work + +_Agent: default_ + +- Matched orphan brokers by resolved project path or verified process cwd: Matched orphan brokers by resolved project path or verified process cwd +- PR feedback fixes validated with focused CLI tests, full build/test, and live detached broker smoke including orphan cleanup after deleting connection metadata diff --git a/.trajectories/index.json b/.trajectories/index.json index c180da086..d6f7f693d 100644 --- a/.trajectories/index.json +++ b/.trajectories/index.json @@ -1,6 +1,6 @@ { "version": 1, - "lastUpdated": "2026-05-15T11:51:05.433Z", + "lastUpdated": "2026-05-15T12:15:11.541Z", "trajectories": { "traj_1775914133873_35667beb": { "title": "fix-sdk-build-resolution-workflow", @@ -380,84 +380,91 @@ "status": "completed", "startedAt": "2026-05-11T18:43:20.429Z", "completedAt": "2026-05-11T18:43:20.733Z", - "path": "/home/runner/work/relay/relay/.trajectories/completed/2026-05/traj_dpgn0am1jq1c.json" + "path": ".trajectories/completed/2026-05/traj_dpgn0am1jq1c.json" }, "traj_mi9eqd4rjfea": { "title": "Address stdio fresh review findings", "status": "abandoned", "startedAt": "2026-05-11T18:25:24.626Z", "completedAt": "2026-05-11T18:37:05.318Z", - "path": "/home/runner/work/relay/relay/.trajectories/completed/2026-05/traj_mi9eqd4rjfea.json" + "path": ".trajectories/completed/2026-05/traj_mi9eqd4rjfea.json" }, "traj_wx00tjvpptvg": { "title": "Investigate agent-relay spawn persistence", "status": "completed", "startedAt": "2026-05-13T10:49:12.464Z", "completedAt": "2026-05-13T10:53:03.748Z", - "path": "/Users/khaliqgant/Projects/AgentWorkforce/relay/.trajectories/completed/2026-05/traj_wx00tjvpptvg.json" + "path": ".trajectories/completed/2026-05/traj_wx00tjvpptvg.json" }, "traj_whd40oxptlhn": { "title": "Review spawn persistence fix and open PR", "status": "completed", "startedAt": "2026-05-13T10:57:02.796Z", "completedAt": "2026-05-13T11:00:43.100Z", - "path": "/Users/khaliqgant/Projects/AgentWorkforce/relay/.trajectories/completed/2026-05/traj_whd40oxptlhn.json" + "path": ".trajectories/completed/2026-05/traj_whd40oxptlhn.json" }, "traj_v1wexlfur5zr": { "title": "Fix broker headless reliability doc", "status": "completed", "startedAt": "2026-05-15T09:04:51.316Z", "completedAt": "2026-05-15T09:13:50.970Z", - "path": "/Users/khaliqgant/Projects/AgentWorkforce/relay-broker-headless-reliability-doc/.trajectories/completed/2026-05/traj_v1wexlfur5zr.json" + "path": ".trajectories/completed/2026-05/traj_v1wexlfur5zr.json" }, "traj_6sjeohtm3php": { "title": "Address broker headless reliability review findings", "status": "completed", "startedAt": "2026-05-15T09:30:56.316Z", "completedAt": "2026-05-15T09:32:47.870Z", - "path": "/Users/khaliqgant/Projects/AgentWorkforce/relay-broker-headless-reliability-doc/.trajectories/completed/2026-05/traj_6sjeohtm3php.json" + "path": ".trajectories/completed/2026-05/traj_6sjeohtm3php.json" }, "traj_4vucir4qvqa2": { "title": "Harden headless broker readiness semantics", "status": "completed", "startedAt": "2026-05-15T09:46:07.617Z", "completedAt": "2026-05-15T09:59:00.460Z", - "path": "/Users/khaliqgant/Projects/AgentWorkforce/relay-broker-headless-reliability-doc/.trajectories/completed/2026-05/traj_4vucir4qvqa2.json" + "path": ".trajectories/completed/2026-05/traj_4vucir4qvqa2.json" }, "traj_erzd7j9nto9r": { "title": "Strict review and PR prep for headless broker readiness", "status": "completed", "startedAt": "2026-05-15T10:02:10.164Z", "completedAt": "2026-05-15T10:06:38.127Z", - "path": "/Users/khaliqgant/Projects/AgentWorkforce/relay-broker-headless-reliability-doc/.trajectories/completed/2026-05/traj_erzd7j9nto9r.json" + "path": ".trajectories/completed/2026-05/traj_erzd7j9nto9r.json" }, "traj_7uznwzoxbao6": { "title": "Fix standalone detached headless startup", "status": "completed", "startedAt": "2026-05-15T10:18:46.273Z", "completedAt": "2026-05-15T10:25:00.598Z", - "path": "/Users/khaliqgant/Projects/AgentWorkforce/relay-broker-headless-reliability-doc/.trajectories/completed/2026-05/traj_7uznwzoxbao6.json" + "path": ".trajectories/completed/2026-05/traj_7uznwzoxbao6.json" }, "traj_9fdv7hxm0b60": { "title": "Strict standalone smoke follow-up", "status": "completed", "startedAt": "2026-05-15T10:37:17.693Z", "completedAt": "2026-05-15T10:43:11.587Z", - "path": "/Users/khaliqgant/Projects/AgentWorkforce/relay-broker-headless-reliability-doc/.trajectories/completed/2026-05/traj_9fdv7hxm0b60.json" + "path": ".trajectories/completed/2026-05/traj_9fdv7hxm0b60.json" }, "traj_0o6gb2wvk59t": { "title": "Fresh end-to-end validation for headless readiness", "status": "completed", "startedAt": "2026-05-15T10:55:49.188Z", "completedAt": "2026-05-15T11:11:52.324Z", - "path": "/Users/khaliqgant/Projects/AgentWorkforce/relay-broker-headless-reliability-doc/.trajectories/completed/2026-05/traj_0o6gb2wvk59t.json" + "path": ".trajectories/completed/2026-05/traj_0o6gb2wvk59t.json" }, "traj_4chzkm724ufo": { "title": "Fix headless orchestrator worktree CLI E2E issues", "status": "completed", "startedAt": "2026-05-15T11:44:28.338Z", "completedAt": "2026-05-15T11:51:05.319Z", - "path": "/Users/khaliqgant/Projects/AgentWorkforce/relay-broker-headless-reliability-doc/.trajectories/completed/2026-05/traj_4chzkm724ufo.json" + "path": ".trajectories/completed/2026-05/traj_4chzkm724ufo.json" + }, + "traj_f3arvbmmlomn": { + "title": "Address PR feedback for headless broker reliability", + "status": "completed", + "startedAt": "2026-05-15T12:09:02.122Z", + "completedAt": "2026-05-15T12:15:11.435Z", + "path": ".trajectories/completed/2026-05/traj_f3arvbmmlomn.json" } } } diff --git a/src/cli/commands/core.test.ts b/src/cli/commands/core.test.ts index 696eaeede..50135b64a 100644 --- a/src/cli/commands/core.test.ts +++ b/src/cli/commands/core.test.ts @@ -568,6 +568,16 @@ describe('registerCoreCommands', () => { expect(deps.log).toHaveBeenCalledWith('Broker PID: 5151'); }); + it('up rejects mutually exclusive background and foreground flags', async () => { + const { program, deps } = createHarness(); + + const exitCode = await runCommand(program, ['up', '--no-dashboard', '--background', '--foreground']); + + expect(exitCode).toBe(1); + expect(deps.error).toHaveBeenCalledWith('Cannot use --background and --foreground together.'); + expect(deps.spawnProcess).not.toHaveBeenCalled(); + }); + it('up --no-dashboard re-execs a Bun standalone binary without adding its virtual entrypoint', async () => { const spawnedProcess = createSpawnedProcessMock(); let now = 0; @@ -635,15 +645,28 @@ describe('registerCoreCommands', () => { }); it('down --force only kills actual orphaned broker executables for the project', async () => { - const execCommand = vi.fn(async () => ({ - stdout: [ - 'USER PID %CPU %MEM VSZ RSS TT STAT STARTED TIME COMMAND', - 'khaliqgant 111 0.0 0.0 1 1 ?? S 1:00PM 0:00.01 /bin/zsh -lc BROKER=/tmp/project/target/release/agent-relay-broker node /tmp/agent-relay.js down --force', - 'khaliqgant 222 0.0 0.0 1 1 ?? S 1:00PM 0:00.01 /opt/bin/agent-relay-broker init --name project --channels general --persist', - 'khaliqgant 333 0.0 0.0 1 1 ?? S 1:00PM 0:00.01 /opt/bin/agent-relay-broker init --name other-project --channels general --persist', - ].join('\n'), - stderr: '', - })); + const execCommand = vi.fn(async (command: string) => { + if (command === 'ps aux') { + return { + stdout: [ + 'USER PID %CPU %MEM VSZ RSS TT STAT STARTED TIME COMMAND', + 'khaliqgant 111 0.0 0.0 1 1 ?? S 1:00PM 0:00.01 /bin/zsh -lc BROKER=/tmp/project/target/release/agent-relay-broker node /tmp/agent-relay.js down --force', + 'khaliqgant 222 0.0 0.0 1 1 ?? S 1:00PM 0:00.01 /opt/bin/agent-relay-broker init --name project --channels general --persist', + 'khaliqgant 333 0.0 0.0 1 1 ?? S 1:00PM 0:00.01 /opt/bin/agent-relay-broker init --name project --channels general --persist', + 'khaliqgant 444 0.0 0.0 1 1 ?? S 1:00PM 0:00.01 /opt/bin/agent-relay-broker init --state-dir /tmp/project/.agent-relay --persist', + 'khaliqgant 555 0.0 0.0 1 1 ?? S 1:00PM 0:00.01 /opt/bin/agent-relay-broker init --state-dir /tmp/project-other/.agent-relay --persist', + ].join('\n'), + stderr: '', + }; + } + if (command.includes('-p 222 ')) { + return { stdout: 'p222\nfcwd\nn/tmp/project\n', stderr: '' }; + } + if (command.includes('-p 333 ')) { + return { stdout: 'p333\nfcwd\nn/tmp/project-other\n', stderr: '' }; + } + throw new Error(`unexpected command: ${command}`); + }); const killImpl = vi.fn(() => undefined); const { program, deps } = createHarness({ execCommand, killImpl }); @@ -651,9 +674,12 @@ describe('registerCoreCommands', () => { expect(exitCode).toBeUndefined(); expect(killImpl).toHaveBeenCalledWith(222, 'SIGTERM'); + expect(killImpl).toHaveBeenCalledWith(444, 'SIGTERM'); expect(killImpl).not.toHaveBeenCalledWith(111, 'SIGTERM'); expect(killImpl).not.toHaveBeenCalledWith(333, 'SIGTERM'); + expect(killImpl).not.toHaveBeenCalledWith(555, 'SIGTERM'); expect(deps.warn).toHaveBeenCalledWith('Killing orphaned broker process (pid: 222)'); + expect(deps.warn).toHaveBeenCalledWith('Killing orphaned broker process (pid: 444)'); expect(deps.log).toHaveBeenCalledWith('Cleaned up (was not running)'); }); @@ -910,6 +936,43 @@ describe('registerCoreCommands', () => { expect(deps.log).toHaveBeenCalledWith('Workspace Key: rk_live_ready'); }); + it('status --wait-for treats getStatus success as ready even when session lookup fails', async () => { + const now = 0; + const connectionPath = '/tmp/project/.agent-relay/connection.json'; + const fs = createFsMock({ [connectionPath]: connectionFile(4242) }); + const killImpl = vi.fn((pid: number, signal?: NodeJS.Signals | number) => { + if (pid === 4242 && signal === 0) return; + throw new Error('unexpected kill check'); + }); + sdkStatusClient.getStatus.mockResolvedValueOnce({ agent_count: 2, pending_delivery_count: 0 }); + sdkStatusClient.getSession.mockRejectedValueOnce(new Error('503 Service Unavailable')); + + const { program, deps } = createHarness({ + fs, + killImpl, + nowImpl: vi.fn(() => now), + }); + + const exitCode = await runCommand(program, ['status', '--wait-for', '1']); + + expect(exitCode).toBeUndefined(); + expect(sdkStatusClient.getStatus).toHaveBeenCalledTimes(1); + expect(sdkStatusClient.getSession).toHaveBeenCalledTimes(1); + expect(deps.log).toHaveBeenCalledWith('Status: RUNNING'); + expect(deps.log).toHaveBeenCalledWith('Agents: 2'); + expect(deps.log).not.toHaveBeenCalledWith(expect.stringContaining('Workspace Key:')); + }); + + it.each(['10s', 'foo', '-1', ''])('status rejects invalid --wait-for value %j', async (waitFor) => { + const { program, deps } = createHarness(); + + const exitCode = await runCommand(program, ['status', '--wait-for', waitFor]); + + expect(exitCode).toBe(1); + expect(deps.error).toHaveBeenCalledWith('--wait-for must be a non-negative number of seconds.'); + expect(deps.log).not.toHaveBeenCalledWith('Status: STOPPED'); + }); + it('status --wait-for reports STARTING and exits non-zero when the PID is live but the API is unready', async () => { let now = 0; const connectionPath = '/tmp/project/.agent-relay/connection.json'; diff --git a/src/cli/lib/broker-lifecycle.ts b/src/cli/lib/broker-lifecycle.ts index 979e869af..286fc8a6e 100644 --- a/src/cli/lib/broker-lifecycle.ts +++ b/src/cli/lib/broker-lifecycle.ts @@ -46,7 +46,7 @@ export interface BrokerConnection { type BrokerStatusDetails = { status: Awaited>; - session: Awaited>; + session: Awaited> | null; }; type BrokerReadiness = @@ -330,34 +330,62 @@ function commandHasBrokerName(command: string, brokerName: string): boolean { return new RegExp(`(?:^|\\s)--name(?:\\s+|=)${escapedName}(?:\\s|$)`).test(command); } +function commandHasProjectRoot(command: string, projectRoot: string): boolean { + const escapedRoot = escapeRegExp(path.resolve(projectRoot)); + return new RegExp(`(?:^|\\s|=|["'])${escapedRoot}(?:$|\\s|["']|${escapeRegExp(path.sep)})`).test(command); +} + +async function processCwdMatchesProjectRoot( + processInfo: ProcessInfo, + projectRoot: string, + deps: CoreDependencies +): Promise { + try { + const cwdDetails = await deps.execCommand(`lsof -nP -a -p ${processInfo.pid} -d cwd -Fn`); + return cwdDetails.stdout + .split('\n') + .filter((line) => line.startsWith('n')) + .some((line) => path.resolve(line.slice(1)) === projectRoot); + } catch { + return false; + } +} + async function killOrphanedBrokerProcesses(projectRoot: string, deps: CoreDependencies): Promise { try { - const brokerName = path.basename(projectRoot) || 'project'; - let candidates: ProcessInfo[] = []; + const resolvedProjectRoot = path.resolve(projectRoot); + const brokerName = path.basename(resolvedProjectRoot) || 'project'; + const candidates: ProcessInfo[] = []; try { - const byName = await deps.execCommand('ps aux'); - candidates = byName.stdout + const processList = await deps.execCommand('ps aux'); + const brokerProcesses = processList.stdout .split('\n') .map(parsePsAuxLine) .filter((process): process is ProcessInfo => process !== null) - .filter((process) => isBrokerExecutableCommand(process.command)) - .filter((process) => commandHasBrokerName(process.command, brokerName)); + .filter((process) => isBrokerExecutableCommand(process.command)); + + const matchedPids = new Set(); + for (const processInfo of brokerProcesses) { + if (commandHasProjectRoot(processInfo.command, resolvedProjectRoot)) { + candidates.push(processInfo); + matchedPids.add(processInfo.pid); + } + } + + for (const processInfo of brokerProcesses) { + if ( + matchedPids.has(processInfo.pid) || + !commandHasBrokerName(processInfo.command, brokerName) || + !(await processCwdMatchesProjectRoot(processInfo, resolvedProjectRoot, deps)) + ) { + continue; + } + candidates.push(processInfo); + matchedPids.add(processInfo.pid); + } } catch { // Expected if ps is unavailable; fall through to no matches. } - if (candidates.length === 0) { - try { - const byPath = await deps.execCommand('ps aux'); - candidates = byPath.stdout - .split('\n') - .map(parsePsAuxLine) - .filter((process): process is ProcessInfo => process !== null) - .filter((process) => isBrokerExecutableCommand(process.command)) - .filter((process) => process.command.includes(projectRoot)); - } catch { - // Expected if ps is unavailable; fall through to no matches. - } - } for (const { pid } of candidates) { if (pid === deps.pid) { continue; @@ -1103,6 +1131,12 @@ async function shutdownUpResources( export async function runUpCommand(options: UpOptions, deps: CoreDependencies): Promise { ensureBundledRelaycastMcpCommand(deps); + if (options.background && options.foreground) { + deps.error('Cannot use --background and --foreground together.'); + deps.exit(1); + return; + } + const paths = deps.getProjectPaths(); // --state-dir overrides where the broker writes state / connection files if (options.stateDir) { @@ -1553,8 +1587,10 @@ export async function runStatusCommand( if (options?.stateDir) { paths.dataDir = path.resolve(options.stateDir); } - const waitSeconds = Number.parseFloat(options?.waitFor ?? '0'); - const waitMs = Number.isFinite(waitSeconds) && waitSeconds > 0 ? waitSeconds * 1000 : 0; + const waitMs = parseWaitForMs(options?.waitFor, deps); + if (waitMs === null) { + return; + } const readiness = await waitForBrokerReadiness(paths, deps, waitMs, waitMs > 0); if (readiness.state === 'stopped') { @@ -1591,18 +1627,29 @@ export async function runStatusCommand( if (typeof status.pending_delivery_count === 'number' && status.pending_delivery_count > 0) { deps.log(`Pending deliveries: ${status.pending_delivery_count}`); } - if (session.workspace_key) { + if (session?.workspace_key) { deps.log(`Workspace Key: ${session.workspace_key}`); deps.log(`Observer: https://agentrelay.com/observer?key=${session.workspace_key}`); } } } +function parseWaitForMs(rawValue: string | undefined, deps: CoreDependencies): number | null { + const rawWaitFor = rawValue?.trim(); + if (rawWaitFor !== undefined && !/^\d+(?:\.\d+)?$/.test(rawWaitFor)) { + deps.error('--wait-for must be a non-negative number of seconds.'); + deps.exit(1); + return null; + } + const waitSeconds = rawWaitFor === undefined ? 0 : Number.parseFloat(rawWaitFor); + return waitSeconds > 0 ? waitSeconds * 1000 : 0; +} + async function readBrokerStatusDetails(conn: BrokerConnection): Promise { const client = new AgentRelayClient({ baseUrl: conn.url, apiKey: conn.api_key }); try { const status = await client.getStatus(); - const session = await client.getSession(); + const session = await client.getSession().catch(() => null); return { status, session }; } catch { return null;