diff --git a/packages/codev/src/agent-farm/__tests__/claude-session-discovery.test.ts b/packages/codev/src/agent-farm/__tests__/claude-session-discovery.test.ts new file mode 100644 index 00000000..db17d8fd --- /dev/null +++ b/packages/codev/src/agent-farm/__tests__/claude-session-discovery.test.ts @@ -0,0 +1,97 @@ +/** + * Tests for Claude session discovery via on-disk jsonl introspection. + * + * Issue #829 — conversation resume. + */ + +import { describe, it, expect, beforeEach, afterEach } from 'vitest'; +import { mkdtempSync, rmSync, mkdirSync, writeFileSync, utimesSync } from 'node:fs'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; + +import { + encodeClaudeProjectDir, + findLatestSessionId, +} from '../utils/claude-session-discovery.js'; + +describe('encodeClaudeProjectDir', () => { + it('replaces / with -', () => { + expect(encodeClaudeProjectDir('/Users/x/repo')).toBe('-Users-x-repo'); + }); + + it('replaces . with -', () => { + expect(encodeClaudeProjectDir('/Users/x/repo/.builders/pir-1')).toBe( + '-Users-x-repo--builders-pir-1', + ); + }); + + it('leaves dashes in the source path untouched', () => { + expect(encodeClaudeProjectDir('/Users/x/repo/pir-1298')).toBe( + '-Users-x-repo-pir-1298', + ); + }); + + it('handles paths with multiple dots and slashes', () => { + expect(encodeClaudeProjectDir('/a/b.c/.d.e/f')).toBe('-a-b-c--d-e-f'); + }); +}); + +describe('findLatestSessionId', () => { + let fakeHome: string; + let projectsRoot: string; + + beforeEach(() => { + fakeHome = mkdtempSync(join(tmpdir(), 'csd-test-')); + projectsRoot = join(fakeHome, '.claude', 'projects'); + mkdirSync(projectsRoot, { recursive: true }); + }); + + afterEach(() => { + rmSync(fakeHome, { recursive: true, force: true }); + }); + + function writeSession(absPath: string, uuid: string, mtime: number): void { + const dir = join(projectsRoot, encodeClaudeProjectDir(absPath)); + mkdirSync(dir, { recursive: true }); + const file = join(dir, `${uuid}.jsonl`); + writeFileSync(file, `{"sessionId":"${uuid}"}\n`, 'utf-8'); + const t = mtime / 1000; + utimesSync(file, t, t); + } + + it('returns the newest session UUID by mtime', () => { + const worktree = '/Users/x/repo/.builders/pir-1'; + writeSession(worktree, 'old-uuid', 1_000_000_000_000); + writeSession(worktree, 'newest-uuid', 1_700_000_000_000); + writeSession(worktree, 'middle-uuid', 1_400_000_000_000); + expect(findLatestSessionId(worktree, { homeDir: fakeHome })).toBe('newest-uuid'); + }); + + it('returns null when the project dir does not exist', () => { + expect(findLatestSessionId('/nonexistent/path', { homeDir: fakeHome })).toBeNull(); + }); + + it('returns null when the project dir exists but contains no jsonl files', () => { + const worktree = '/Users/x/repo/.builders/pir-2'; + const dir = join(projectsRoot, encodeClaudeProjectDir(worktree)); + mkdirSync(dir, { recursive: true }); + writeFileSync(join(dir, 'memory'), 'not a jsonl', 'utf-8'); + expect(findLatestSessionId(worktree, { homeDir: fakeHome })).toBeNull(); + }); + + it('ignores non-jsonl files and subdirectories', () => { + const worktree = '/Users/x/repo/.builders/pir-3'; + const dir = join(projectsRoot, encodeClaudeProjectDir(worktree)); + mkdirSync(dir, { recursive: true }); + mkdirSync(join(dir, 'some-uuid'), { recursive: true }); + writeFileSync(join(dir, 'history.txt'), 'text', 'utf-8'); + writeSession(worktree, 'the-uuid', 1_500_000_000_000); + expect(findLatestSessionId(worktree, { homeDir: fakeHome })).toBe('the-uuid'); + }); + + it('returns the single jsonl when only one exists', () => { + const worktree = '/Users/x/repo/.builders/pir-4'; + writeSession(worktree, 'only-uuid', 1_500_000_000_000); + expect(findLatestSessionId(worktree, { homeDir: fakeHome })).toBe('only-uuid'); + }); +}); diff --git a/packages/codev/src/agent-farm/__tests__/discover-resume-session.test.ts b/packages/codev/src/agent-farm/__tests__/discover-resume-session.test.ts new file mode 100644 index 00000000..c7c57bca --- /dev/null +++ b/packages/codev/src/agent-farm/__tests__/discover-resume-session.test.ts @@ -0,0 +1,92 @@ +/** + * Tests for the discoverResumeSession helper — the spawn-CLI wrapper that + * gates findLatestSessionId on the --resume flag and surfaces a user-facing + * log line. (Issues #829 / #831.) + */ + +import { describe, it, expect, beforeEach, afterEach } from 'vitest'; +import { mkdtempSync, rmSync, mkdirSync, writeFileSync, utimesSync } from 'node:fs'; +import { tmpdir, homedir } from 'node:os'; +import { join } from 'node:path'; + +import { discoverResumeSession } from '../commands/spawn.js'; +import { encodeClaudeProjectDir } from '../utils/claude-session-discovery.js'; + +// discoverResumeSession reads from $HOME via os.homedir() through +// findLatestSessionId. Override the env var for the duration of the test so +// the helper looks at our fake home instead of the user's real one. +function pinHome(fakeHome: string, fn: () => T): T { + const original = process.env.HOME; + process.env.HOME = fakeHome; + try { + return fn(); + } finally { + if (original === undefined) delete process.env.HOME; + else process.env.HOME = original; + } +} + +function writeSession(projectsRoot: string, absPath: string, uuid: string, mtimeMs: number): void { + const dir = join(projectsRoot, encodeClaudeProjectDir(absPath)); + mkdirSync(dir, { recursive: true }); + const file = join(dir, `${uuid}.jsonl`); + writeFileSync(file, `{"sessionId":"${uuid}"}\n`, 'utf-8'); + const t = mtimeMs / 1000; + utimesSync(file, t, t); +} + +describe('discoverResumeSession', () => { + let fakeHome: string; + let projectsRoot: string; + + beforeEach(() => { + fakeHome = mkdtempSync(join(tmpdir(), 'drs-test-')); + projectsRoot = join(fakeHome, '.claude', 'projects'); + mkdirSync(projectsRoot, { recursive: true }); + }); + + afterEach(() => { + rmSync(fakeHome, { recursive: true, force: true }); + }); + + it('returns undefined when isResume is false (no filesystem touch)', () => { + // Even if a jsonl exists, a non-resume spawn must not pick it up. + const worktree = '/Users/x/repo/.builders/spir-1'; + writeSession(projectsRoot, worktree, 'should-not-pick', 1_700_000_000_000); + pinHome(fakeHome, () => { + expect(discoverResumeSession(worktree, false)).toBeUndefined(); + }); + }); + + it('returns undefined when isResume is undefined', () => { + const worktree = '/Users/x/repo/.builders/spir-2'; + writeSession(projectsRoot, worktree, 'should-not-pick', 1_700_000_000_000); + pinHome(fakeHome, () => { + expect(discoverResumeSession(worktree, undefined)).toBeUndefined(); + }); + }); + + it('returns undefined when isResume is true but no jsonl exists', () => { + const worktree = '/Users/x/repo/.builders/spir-3-no-jsonl'; + pinHome(fakeHome, () => { + expect(discoverResumeSession(worktree, true)).toBeUndefined(); + }); + }); + + it('returns the newest jsonl UUID when isResume is true and jsonls exist', () => { + const worktree = '/Users/x/repo/.builders/pir-1661'; + writeSession(projectsRoot, worktree, 'older-uuid', 1_500_000_000_000); + writeSession(projectsRoot, worktree, 'newest-uuid', 1_700_000_000_000); + pinHome(fakeHome, () => { + expect(discoverResumeSession(worktree, true)).toBe('newest-uuid'); + }); + }); + + it('does not consult the filesystem when isResume is false (perf safety)', () => { + // Negative case: isResume=false short-circuits before any filesystem + // access happens. Tests pass even if HOME points at /nonexistent. + pinHome('/nonexistent-home-path', () => { + expect(discoverResumeSession('/some/worktree', false)).toBeUndefined(); + }); + }); +}); diff --git a/packages/codev/src/agent-farm/__tests__/workspace-recover.test.ts b/packages/codev/src/agent-farm/__tests__/workspace-recover.test.ts new file mode 100644 index 00000000..d392db73 --- /dev/null +++ b/packages/codev/src/agent-farm/__tests__/workspace-recover.test.ts @@ -0,0 +1,535 @@ +/** + * Tests for `afx workspace recover` — eligibility predicate, builder-info + * derivation, worktree resolution, and listAllProjects precedence. + * + * Issue #829. + */ + +import { describe, it, expect, beforeEach, afterEach } from 'vitest'; +import { mkdtempSync, rmSync, mkdirSync, writeFileSync } from 'node:fs'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; + +import { + evaluateEligibility, + deriveBuilderInfo, + resolveWorktreePath, + formatRelativeAge, + type EligibilityInputs, + type BuilderInfo, +} from '../commands/workspace-recover.js'; +import { listAllProjects } from '../../commands/porch/state.js'; +import type { ProjectState } from '../../commands/porch/types.js'; +import type { DbTerminalSession } from '../servers/tower-types.js'; + +function makeState(overrides: Partial = {}): ProjectState { + return { + id: '0087', + title: 'Test project', + protocol: 'spir', + phase: 'implement', + plan_phases: [], + current_plan_phase: null, + gates: {}, + iteration: 1, + build_complete: false, + history: [], + started_at: '2026-05-20T00:00:00.000Z', + updated_at: new Date().toISOString(), + ...overrides, + }; +} + +function makeSession(overrides: Partial = {}): DbTerminalSession { + return { + id: 'term-123', + workspace_path: '/workspace', + type: 'builder', + role_id: 'builder-spir-87', + pid: null, + shellper_socket: '/tmp/shellper.sock', + shellper_pid: 12345, + shellper_start_time: Date.now(), + label: null, + cwd: null, + created_at: new Date().toISOString(), + ...overrides, + }; +} + +function makeBuilderInfo(overrides: Partial = {}): BuilderInfo { + return { builderId: 'builder-spir-87', issueArg: '87', cliProtocol: 'spir', ...overrides }; +} + +function defaults(): Omit { + return { + maxAgeDays: 7, + includeStale: false, + isProcessAlive: () => false, + socketExists: () => false, + }; +} + +describe('evaluateEligibility', () => { + it('skips terminal phase (verified) — comes before all other checks', () => { + const result = evaluateEligibility({ + state: makeState({ phase: 'verified' }), + builderInfo: makeBuilderInfo(), + sessions: [makeSession()], + worktreeExists: true, + ageDays: 0, + ...defaults(), + }); + expect(result).toEqual({ eligible: false, reason: 'terminal' }); + }); + + it('skips terminal phase (complete)', () => { + const result = evaluateEligibility({ + state: makeState({ phase: 'complete' }), + builderInfo: makeBuilderInfo(), + sessions: [makeSession()], + worktreeExists: true, + ageDays: 0, + ...defaults(), + }); + expect(result).toEqual({ eligible: false, reason: 'terminal' }); + }); + + it('skips when builderInfo is null (unsupported protocol)', () => { + const result = evaluateEligibility({ + state: makeState({ protocol: 'experiment' }), + builderInfo: null, + sessions: [makeSession()], + worktreeExists: true, + ageDays: 0, + ...defaults(), + }); + expect(result).toEqual({ eligible: false, reason: 'unsupported_protocol' }); + }); + + it('revives when no session row exists (Tower already reconciled the dead row)', () => { + // The common post-reboot case: Tower startup runs reconciliation, fails + // to reconnect to the dead shellper, and deletes the row. By the time + // `workspace recover` runs, the row is gone. Absence means "needs revival." + const result = evaluateEligibility({ + state: makeState(), + builderInfo: makeBuilderInfo(), + sessions: [], + worktreeExists: true, + ageDays: 0, + ...defaults(), + }); + expect(result).toEqual({ eligible: true }); + }); + + describe('liveness probe (PID-first per Gemini #829 review)', () => { + it('skips when shellper PID is alive (socket also present)', () => { + const result = evaluateEligibility({ + state: makeState(), + builderInfo: makeBuilderInfo(), + sessions: [makeSession({ shellper_pid: 12345, shellper_socket: '/sock' })], + worktreeExists: true, + ageDays: 0, + ...defaults(), + isProcessAlive: () => true, + socketExists: () => true, + }); + expect(result).toEqual({ eligible: false, reason: 'shellper_alive' }); + }); + + it('revives when PID is known dead even though stale socket file remains', () => { + // Critical for reboot recovery: sockets live in ~/.codev/run/ which the + // OS does not clear on reboot. The dead PID is definitive evidence. + const result = evaluateEligibility({ + state: makeState(), + builderInfo: makeBuilderInfo(), + sessions: [makeSession({ shellper_pid: 12345, shellper_socket: '/sock' })], + worktreeExists: true, + ageDays: 0, + ...defaults(), + isProcessAlive: () => false, + socketExists: () => true, + }); + expect(result).toEqual({ eligible: true }); + }); + + it('falls back to socket check when shellper_pid is null (legacy rows)', () => { + const result = evaluateEligibility({ + state: makeState(), + builderInfo: makeBuilderInfo(), + sessions: [makeSession({ shellper_pid: null, shellper_socket: '/sock' })], + worktreeExists: true, + ageDays: 0, + ...defaults(), + isProcessAlive: () => false, + socketExists: () => true, + }); + expect(result).toEqual({ eligible: false, reason: 'shellper_alive' }); + }); + + it('revives when shellper_pid is null AND no socket file', () => { + const result = evaluateEligibility({ + state: makeState(), + builderInfo: makeBuilderInfo(), + sessions: [makeSession({ shellper_pid: null, shellper_socket: '/sock' })], + worktreeExists: true, + ageDays: 0, + ...defaults(), + isProcessAlive: () => false, + socketExists: () => false, + }); + expect(result).toEqual({ eligible: true }); + }); + }); + + describe('duplicate row aggregation (Codex #829 review)', () => { + it('treats builder as alive if ANY matching row has a live PID', () => { + const result = evaluateEligibility({ + state: makeState(), + builderInfo: makeBuilderInfo(), + sessions: [ + makeSession({ id: 'dead-row', shellper_pid: 11111 }), + makeSession({ id: 'live-row', shellper_pid: 22222 }), + ], + worktreeExists: true, + ageDays: 0, + ...defaults(), + // pid 11111 dead, pid 22222 alive + isProcessAlive: (pid) => pid === 22222, + }); + expect(result).toEqual({ eligible: false, reason: 'shellper_alive' }); + }); + + it('revives only when ALL matching rows look dead', () => { + const result = evaluateEligibility({ + state: makeState(), + builderInfo: makeBuilderInfo(), + sessions: [ + makeSession({ id: 'row-a', shellper_pid: 11111 }), + makeSession({ id: 'row-b', shellper_pid: 22222 }), + ], + worktreeExists: true, + ageDays: 0, + ...defaults(), + isProcessAlive: () => false, + }); + expect(result).toEqual({ eligible: true }); + }); + }); + + it('skips when worktree is missing', () => { + const result = evaluateEligibility({ + state: makeState(), + builderInfo: makeBuilderInfo(), + sessions: [makeSession()], + worktreeExists: false, + ageDays: 0, + ...defaults(), + }); + expect(result).toEqual({ eligible: false, reason: 'worktree_missing' }); + }); + + it('skips stale projects when --include-stale not set', () => { + const result = evaluateEligibility({ + state: makeState(), + builderInfo: makeBuilderInfo(), + sessions: [makeSession()], + worktreeExists: true, + ageDays: 30, + ...defaults(), + }); + expect(result).toEqual({ eligible: false, reason: 'stale' }); + }); + + it('honors --include-stale on otherwise-stale projects', () => { + const result = evaluateEligibility({ + state: makeState(), + builderInfo: makeBuilderInfo(), + sessions: [makeSession()], + worktreeExists: true, + ageDays: 30, + ...defaults(), + includeStale: true, + }); + expect(result).toEqual({ eligible: true }); + }); + + it('returns eligible when all conditions are met', () => { + const result = evaluateEligibility({ + state: makeState(), + builderInfo: makeBuilderInfo(), + sessions: [makeSession()], + worktreeExists: true, + ageDays: 2, + ...defaults(), + }); + expect(result).toEqual({ eligible: true }); + }); + + it('checks predicates in cheap-first order (terminal beats unsupported)', () => { + const result = evaluateEligibility({ + state: makeState({ phase: 'verified', protocol: 'experiment' }), + builderInfo: null, + sessions: [], + worktreeExists: false, + ageDays: 999, + ...defaults(), + }); + expect(result).toEqual({ eligible: false, reason: 'terminal' }); + }); + + it('skips a stale project even with no session row (post-reconciliation + old)', () => { + // Without the predicate fix, this would have shown `no_session_row`; + // with the new ordering, an old project past the recency window is + // surfaced as `stale` — the more useful diagnostic for the operator. + const result = evaluateEligibility({ + state: makeState(), + builderInfo: makeBuilderInfo(), + sessions: [], + worktreeExists: true, + ageDays: 30, + ...defaults(), + }); + expect(result).toEqual({ eligible: false, reason: 'stale' }); + }); + + it('revives a recent active project even with no session row (the PIR-at-gate case)', () => { + // The motivating bug: a PIR builder sitting at plan-approval, killed by + // a reboot, with its session row cleaned up by Tower's reconciliation. + // Before the predicate fix, this incorrectly skipped with `no_session_row`. + const result = evaluateEligibility({ + state: makeState({ protocol: 'pir', phase: 'plan' }), + builderInfo: makeBuilderInfo({ builderId: 'builder-pir-1661', issueArg: '1661', cliProtocol: 'pir' }), + sessions: [], + worktreeExists: true, + ageDays: 1, + ...defaults(), + }); + expect(result).toEqual({ eligible: true }); + }); +}); + +describe('deriveBuilderInfo', () => { + it('maps SPIR state to builder-spir-', () => { + expect(deriveBuilderInfo(makeState({ id: '0087', protocol: 'spir' }))).toEqual({ + builderId: 'builder-spir-87', + issueArg: '87', + cliProtocol: 'spir', + }); + }); + + it('handles bugfix project IDs (bugfix-693 → builder-bugfix-693, issue 693)', () => { + expect(deriveBuilderInfo(makeState({ id: 'bugfix-693', protocol: 'bugfix' }))).toEqual({ + builderId: 'builder-bugfix-693', + issueArg: '693', + cliProtocol: 'bugfix', + }); + }); + + it('handles PIR projects', () => { + expect(deriveBuilderInfo(makeState({ id: '0829', protocol: 'pir' }))).toEqual({ + builderId: 'builder-pir-829', + issueArg: '829', + cliProtocol: 'pir', + }); + }); + + it('handles ASPIR projects', () => { + expect(deriveBuilderInfo(makeState({ id: '0438', protocol: 'aspir' }))).toEqual({ + builderId: 'builder-aspir-438', + issueArg: '438', + cliProtocol: 'aspir', + }); + }); + + it('handles AIR projects', () => { + expect(deriveBuilderInfo(makeState({ id: '0501', protocol: 'air' }))).toEqual({ + builderId: 'builder-air-501', + issueArg: '501', + cliProtocol: 'air', + }); + }); + + describe('unsupported protocols return null', () => { + it.each(['experiment', 'maintain', 'task', 'protocol', 'release', 'spider'])( + 'returns null for protocol: %s', + (protocol) => { + expect(deriveBuilderInfo(makeState({ protocol }))).toBeNull(); + }, + ); + }); +}); + +describe('resolveWorktreePath', () => { + let tmp: string; + let buildersDir: string; + + beforeEach(() => { + tmp = mkdtempSync(join(tmpdir(), 'recover-test-')); + buildersDir = join(tmp, '.builders'); + mkdirSync(buildersDir, { recursive: true }); + }); + + afterEach(() => { + rmSync(tmp, { recursive: true, force: true }); + }); + + it('finds ID-only worktree (Spec 653 layout)', () => { + const wt = join(buildersDir, 'spir-87'); + mkdirSync(join(wt, '.git'), { recursive: true }); + const result = resolveWorktreePath(buildersDir, makeState({ id: '0087', protocol: 'spir' })); + expect(result).toBe(wt); + }); + + it('falls back to legacy title-suffixed worktree', () => { + const wt = join(buildersDir, 'spir-87-some-title-slug'); + mkdirSync(join(wt, '.git'), { recursive: true }); + const result = resolveWorktreePath(buildersDir, makeState({ id: '0087', protocol: 'spir' })); + expect(result).toBe(wt); + }); + + it('returns null when no worktree matches', () => { + const result = resolveWorktreePath(buildersDir, makeState({ id: '0087', protocol: 'spir' })); + expect(result).toBeNull(); + }); + + it('ignores directories with the right prefix but no .git', () => { + mkdirSync(join(buildersDir, 'spir-87'), { recursive: true }); + const result = resolveWorktreePath(buildersDir, makeState({ id: '0087', protocol: 'spir' })); + expect(result).toBeNull(); + }); + + it('resolves bugfix worktree by issue number', () => { + const wt = join(buildersDir, 'bugfix-693'); + mkdirSync(join(wt, '.git'), { recursive: true }); + const result = resolveWorktreePath(buildersDir, makeState({ id: 'bugfix-693', protocol: 'bugfix' })); + expect(result).toBe(wt); + }); + + it('returns null for unsupported protocols without filesystem lookups', () => { + // An experiment dir on disk would normally be found if the protocol were + // supported — but for unsupported protocols we short-circuit to null. + const wt = join(buildersDir, 'experiment-abcd'); + mkdirSync(join(wt, '.git'), { recursive: true }); + const result = resolveWorktreePath(buildersDir, makeState({ id: 'abcd', protocol: 'experiment' })); + expect(result).toBeNull(); + }); +}); + +describe('listAllProjects (precedence + diagnostics)', () => { + let tmp: string; + + beforeEach(() => { + tmp = mkdtempSync(join(tmpdir(), 'recover-list-')); + }); + + afterEach(() => { + rmSync(tmp, { recursive: true, force: true }); + }); + + function writeStatus(dir: string, state: Partial): void { + mkdirSync(dir, { recursive: true }); + const full = { ...makeState(state) }; + const yaml = [ + `id: '${full.id}'`, + `title: '${full.title}'`, + `protocol: ${full.protocol}`, + `phase: ${full.phase}`, + 'plan_phases: []', + 'current_plan_phase: null', + 'gates: {}', + `iteration: ${full.iteration}`, + `build_complete: ${full.build_complete}`, + 'history: []', + `started_at: '${full.started_at}'`, + `updated_at: '${full.updated_at}'`, + ].join('\n'); + writeFileSync(join(dir, 'status.yaml'), yaml + '\n', 'utf-8'); + } + + it('returns projects from codev/projects when no .builders copy exists', () => { + writeStatus(join(tmp, 'codev', 'projects', '0087-foo'), { id: '0087', phase: 'implement' }); + const result = listAllProjects(tmp); + expect(result).toHaveLength(1); + expect(result[0].state.id).toBe('0087'); + expect(result[0].statusPath).toBe(join(tmp, 'codev', 'projects', '0087-foo', 'status.yaml')); + }); + + it('prefers .builders/ copy when same project id exists in both', () => { + writeStatus(join(tmp, 'codev', 'projects', '0087-foo'), { id: '0087', phase: 'specify' }); + writeStatus( + join(tmp, '.builders', 'spir-87', 'codev', 'projects', '0087-foo'), + { id: '0087', phase: 'review' }, + ); + const result = listAllProjects(tmp); + expect(result).toHaveLength(1); + expect(result[0].state.phase).toBe('review'); + expect(result[0].statusPath).toContain('.builders'); + }); + + it('returns empty array for a workspace with no projects', () => { + expect(listAllProjects(tmp)).toEqual([]); + }); + + it('skips unparseable status.yaml files silently by default', () => { + const dir = join(tmp, 'codev', 'projects', '0099-broken'); + mkdirSync(dir, { recursive: true }); + writeFileSync(join(dir, 'status.yaml'), 'this is: not\n valid:\nyaml: [\n', 'utf-8'); + writeStatus(join(tmp, 'codev', 'projects', '0087-foo'), { id: '0087' }); + const result = listAllProjects(tmp); + expect(result).toHaveLength(1); + expect(result[0].state.id).toBe('0087'); + }); + + it('invokes onParseError callback when provided', () => { + const dir = join(tmp, 'codev', 'projects', '0099-broken'); + mkdirSync(dir, { recursive: true }); + writeFileSync(join(dir, 'status.yaml'), 'this is: not\n valid:\nyaml: [\n', 'utf-8'); + const errors: Array<{ path: string; err: unknown }> = []; + const result = listAllProjects(tmp, { + onParseError: (path, err) => errors.push({ path, err }), + }); + expect(result).toEqual([]); + expect(errors).toHaveLength(1); + expect(errors[0].path).toBe(join(dir, 'status.yaml')); + expect(errors[0].err).toBeInstanceOf(Error); + }); +}); + +describe('formatRelativeAge', () => { + it('formats minutes', () => { + const iso = new Date(Date.now() - 30 * 60_000).toISOString(); + expect(formatRelativeAge(iso)).toMatch(/^\d+m ago$/); + }); + + it('formats hours', () => { + const iso = new Date(Date.now() - 3 * 3600_000).toISOString(); + expect(formatRelativeAge(iso)).toMatch(/^\d+h ago$/); + }); + + it('formats days', () => { + const iso = new Date(Date.now() - 5 * 86_400_000).toISOString(); + expect(formatRelativeAge(iso)).toMatch(/^\d+d ago$/); + }); + + it('rounds days UP so the label aligns with --max-age (25h shows 2d, not 1d)', () => { + const iso = new Date(Date.now() - 25 * 3600_000).toISOString(); + expect(formatRelativeAge(iso)).toBe('2d ago'); + }); + + it('rounds 47h up to 2d (still within the ceil(2) bucket)', () => { + const iso = new Date(Date.now() - 47 * 3600_000).toISOString(); + expect(formatRelativeAge(iso)).toBe('2d ago'); + }); + + it('shows "2d ago" rather than "1d ago" for anything strictly older than 24h', () => { + // Just past the boundary — ensures the predicate boundary + // (`ageDays > maxAge`) matches what the label promises. + const iso = new Date(Date.now() - (24 * 3600_000 + 1_000)).toISOString(); + expect(formatRelativeAge(iso)).toBe('2d ago'); + }); + + it('returns placeholder for malformed ISO', () => { + expect(formatRelativeAge('not a date')).toBe('—'); + }); +}); diff --git a/packages/codev/src/agent-farm/cli.ts b/packages/codev/src/agent-farm/cli.ts index 6e4d89f2..dc1c8591 100644 --- a/packages/codev/src/agent-farm/cli.ts +++ b/packages/codev/src/agent-farm/cli.ts @@ -132,6 +132,34 @@ export async function runAgentFarm(args: string[]): Promise { } }); + // Issue #829: revive builders whose shellper died (e.g. after machine reboot). + workspaceCmd + .command('recover') + .description('Revive builders whose shellper died (e.g. after machine reboot)') + .option('--apply', 'Actually respawn builders (default: dry-run preview only)') + .option('--max-age ', 'Skip projects with status.yaml older than N days', '7') + .option('--include-stale', 'Ignore --max-age (revive arbitrarily old projects)') + .option('-y, --yes', 'Skip --apply confirmation prompt') + .action(async (options: { apply?: boolean; maxAge?: string; includeStale?: boolean; yes?: boolean }) => { + const { workspaceRecover } = await import('./commands/workspace-recover.js'); + try { + const parsedMaxAge = options.maxAge ? parseInt(options.maxAge, 10) : undefined; + if (parsedMaxAge !== undefined && (Number.isNaN(parsedMaxAge) || parsedMaxAge < 0)) { + logger.error(`Invalid --max-age value: ${options.maxAge}`); + process.exit(1); + } + await workspaceRecover({ + apply: options.apply, + maxAge: parsedMaxAge, + includeStale: options.includeStale, + yes: options.yes, + }); + } catch (error) { + logger.error(error instanceof Error ? error.message : String(error)); + process.exit(1); + } + }); + // Deprecated alias: `afx dash` → `afx workspace` const dashCmd = program .command('dash') diff --git a/packages/codev/src/agent-farm/commands/spawn-worktree.ts b/packages/codev/src/agent-farm/commands/spawn-worktree.ts index d7a6d96c..1203b44a 100644 --- a/packages/codev/src/agent-farm/commands/spawn-worktree.ts +++ b/packages/codev/src/agent-farm/commands/spawn-worktree.ts @@ -673,7 +673,13 @@ function writeWorktreeFiles( } /** - * Start a terminal session for a builder + * Start a terminal session for a builder. + * + * When `resumeSessionId` is provided, the launch script invokes + * `claude --resume ` instead of a fresh prompt+role invocation. The + * saved Claude conversation contains the system prompt / role context + * already, so role injection and the initial prompt are intentionally + * skipped on that path. */ export async function startBuilderSession( config: Config, @@ -683,18 +689,32 @@ export async function startBuilderSession( prompt: string, roleContent: string | null, roleSource: string | null, + resumeSessionId?: string, ): Promise<{ terminalId: string }> { logger.info('Creating terminal session...'); - // Write initial prompt to a file for reference - const promptFile = resolve(worktreePath, '.builder-prompt.txt'); - writeFileSync(promptFile, prompt); - - // Build the start script with role if provided const scriptPath = resolve(worktreePath, '.builder-start.sh'); let scriptContent: string; - if (roleContent) { + if (resumeSessionId) { + // Resume path: load the prior Claude conversation by UUID. No prompt file, + // no role injection — both are already part of the saved conversation. + logger.info(`Resuming Claude session ${resumeSessionId.slice(0, 8)}…`); + scriptContent = `#!/bin/bash +cd "${worktreePath}" +while true; do + ${baseCmd} --resume "${resumeSessionId}" + echo "" + echo "Agent exited. Restarting in 2 seconds... (Ctrl+C to quit)" + sleep 2 +done +`; + } else if (roleContent) { + // Fresh spawn with role injection. + // Write initial prompt to a file for reference. + const promptFile = resolve(worktreePath, '.builder-prompt.txt'); + writeFileSync(promptFile, prompt); + // Write role to a file for harness-based injection const roleFile = resolve(worktreePath, '.builder-role.md'); // Inject the actual dashboard port into the role prompt @@ -725,6 +745,9 @@ ${envBlock}while true; do done `; } else { + // Fresh spawn without role injection. + const promptFile = resolve(worktreePath, '.builder-prompt.txt'); + writeFileSync(promptFile, prompt); scriptContent = `#!/bin/bash cd "${worktreePath}" while true; do diff --git a/packages/codev/src/agent-farm/commands/spawn.ts b/packages/codev/src/agent-farm/commands/spawn.ts index 0f5ed39e..3f1f8695 100644 --- a/packages/codev/src/agent-farm/commands/spawn.ts +++ b/packages/codev/src/agent-farm/commands/spawn.ts @@ -37,6 +37,7 @@ const SPAWNING_ARCHITECT_NAME = (process.env.CODEV_ARCHITECT_NAME && process.env.CODEV_ARCHITECT_NAME.trim()) || DEFAULT_ARCHITECT_NAME; import { loadRolePrompt } from '../utils/roles.js'; import { buildAgentName, stripLeadingZeros } from '../utils/agent-names.js'; +import { findLatestSessionId } from '../utils/claude-session-discovery.js'; import { fetchIssue as fetchIssueNonFatal } from '../../lib/github.js'; import { type TemplateContext, @@ -73,8 +74,23 @@ import { executeForgeCommand, loadForgeConfig } from '../../lib/forge.js'; // ============================================================================= /** - * Log spawn success with terminal WebSocket URL + * On --resume, look up the prior Claude conversation jsonl for the worktree + * so the revived builder can pick up the saved conversation via + * `claude --resume ` instead of starting fresh with a resume-notice + * prompt. (Issue #831.) Returns undefined when not resuming or when no + * jsonl exists; callers fall back to the fresh-spawn path in that case. */ +export function discoverResumeSession(worktreePath: string, isResume: boolean | undefined): string | undefined { + if (!isResume) return undefined; + const found = findLatestSessionId(worktreePath); + if (found) { + logger.kv('Claude session', `${found.slice(0, 8)}… (resuming conversation)`); + return found; + } + logger.info('No prior Claude conversation found for this worktree; starting a fresh session.'); + return undefined; +} + function logSpawnSuccess(label: string, terminalId: string, mode?: string): void { const client = getTowerClient(); logger.blank(); @@ -440,6 +456,8 @@ async function spawnSpec(options: SpawnOptions, config: Config): Promise { templateContext.existing_branch = options.branch; } + const resumeSessionId = discoverResumeSession(worktreePath, options.resume); + const initialPrompt = buildPromptFromTemplate(config, protocol, templateContext); const resumeNotice = options.resume ? `\n${buildResumeNotice(projectId)}\n` : ''; const branchNotice = options.branch @@ -452,6 +470,7 @@ async function spawnSpec(options: SpawnOptions, config: Config): Promise { const { terminalId } = await startBuilderSession( config, builderId, worktreePath, commands.builder, builderPrompt, role?.content ?? null, role?.source ?? null, + resumeSessionId, ); upsertBuilder({ @@ -816,11 +835,13 @@ async function spawnIssueDrivenBuilder( : ''; const builderPrompt = `You are a Builder. Read codev/roles/builder.md for your full role definition.\n${resumeNotice}${branchNotice}\n${prompt}`; + const resumeSessionId = discoverResumeSession(worktreePath, options.resume); const role = options.noRole ? null : loadRolePrompt(config, 'builder'); const commands = getResolvedCommands(); const { terminalId } = await startBuilderSession( config, builderId, worktreePath, commands.builder, builderPrompt, role?.content ?? null, role?.source ?? null, + resumeSessionId, ); upsertBuilder({ diff --git a/packages/codev/src/agent-farm/commands/workspace-recover.ts b/packages/codev/src/agent-farm/commands/workspace-recover.ts new file mode 100644 index 00000000..fc50221e --- /dev/null +++ b/packages/codev/src/agent-farm/commands/workspace-recover.ts @@ -0,0 +1,381 @@ +// workspace recover — revive builders whose shellper died (e.g. machine reboot). +// Issue #829. Dry-run by default; --apply actually respawns. + +import { spawn } from 'node:child_process'; +import { existsSync, readdirSync } from 'node:fs'; +import { join } from 'node:path'; +import chalk from 'chalk'; + +import { getConfig } from '../utils/index.js'; +import { logger } from '../utils/logger.js'; +import { buildAgentName, stripLeadingZeros } from '../utils/agent-names.js'; +import { processExists, getTerminalSessionsForWorkspace } from '../servers/tower-terminals.js'; +import { closeGlobalDb } from '../db/index.js'; +import { listAllProjects } from '../../commands/porch/state.js'; +import type { ProjectState } from '../../commands/porch/types.js'; +import type { DbTerminalSession } from '../servers/tower-types.js'; +import { confirm } from '../../lib/cli-prompts.js'; + +const TERMINAL_PHASES = new Set(['verified', 'complete']); +const DEFAULT_MAX_AGE_DAYS = 7; + +// Protocols that `afx spawn --resume --protocol ` can revive cleanly: +// issue-driven families with a positional issue arg and a stable worktree layout. +// `experiment` / `maintain` / `task` / bare protocol-mode builders can't be +// resumed by ID through spawn.ts and are skipped with `unsupported_protocol`. +// (Legacy `spider` is also excluded — it was retired long ago, and any stray +// project still carrying that protocol value should be treated as unsupported.) +const REVIVABLE_PROTOCOLS = new Set(['spir', 'aspir', 'air', 'pir', 'bugfix']); + +export interface WorkspaceRecoverOptions { + apply?: boolean; + maxAge?: number; + includeStale?: boolean; + yes?: boolean; +} + +export type IneligibleReason = + | 'terminal' + | 'unsupported_protocol' + | 'worktree_missing' + | 'shellper_alive' + | 'stale'; + +export type EligibilityResult = + | { eligible: true } + | { eligible: false; reason: IneligibleReason }; + +export interface EligibilityInputs { + state: ProjectState; + builderInfo: BuilderInfo | null; + sessions: DbTerminalSession[]; + worktreeExists: boolean; + ageDays: number; + maxAgeDays: number; + includeStale: boolean; + isProcessAlive: (pid: number) => boolean; + socketExists: (socket: string) => boolean; +} + +/** + * Liveness rule per row (Gemini cmap finding): + * - shellper_pid known + alive → ALIVE + * - shellper_pid known + dead → DEAD (socket is ignored; the OS doesn't clean + * ~/.codev/run/ on reboot, so a stale socket + * would otherwise falsely mark the row alive + * and defeat the primary recovery use case) + * - shellper_pid null + socket file exists → ALIVE (legacy / pre-PID rows) + * - shellper_pid null + no socket → DEAD + */ +function isSessionAlive( + session: DbTerminalSession, + isProcessAlive: (pid: number) => boolean, + socketExists: (socket: string) => boolean, +): boolean { + if (session.shellper_pid !== null) { + return isProcessAlive(session.shellper_pid); + } + return session.shellper_socket !== null && socketExists(session.shellper_socket); +} + +/** + * Pure predicate — no I/O. All filesystem and process probes happen in the + * caller and are passed in via `isProcessAlive` and `socketExists`. This keeps + * the predicate trivially unit-testable. + * + * On terminal_sessions and the "no row" case: + * Tower's reconciliation deletes terminal_sessions rows whose shellper can't + * be reconnected (tower-terminals.ts:485-711). That runs on Tower startup, + * which is exactly what happens between a machine reboot and the user + * invoking `workspace recover`. By the time recovery runs, the rows for + * dead builders have already been pruned. So "no matching session row" + * is the COMMON case for any builder that needs revival — not a reason to + * skip. The row is only useful here as a positive "still alive" signal. + * + * Predicate order (cheap structural checks first): + * 1. terminal phase + * 2. unsupported protocol family + * 3. worktree missing on disk + * 4. any matching session row is alive → known still running, leave alone + * 5. stale (older than maxAge, unless includeStale) + * 6. otherwise → revive + */ +export function evaluateEligibility(inputs: EligibilityInputs): EligibilityResult { + const { + state, builderInfo, sessions, worktreeExists, ageDays, maxAgeDays, includeStale, + isProcessAlive, socketExists, + } = inputs; + + if (TERMINAL_PHASES.has(state.phase)) { + return { eligible: false, reason: 'terminal' }; + } + if (builderInfo === null) { + return { eligible: false, reason: 'unsupported_protocol' }; + } + if (!worktreeExists) { + return { eligible: false, reason: 'worktree_missing' }; + } + // If ANY matching session looks alive, treat the builder as alive. Duplicates + // can occur when a prior recovery left a dead row behind (terminal_sessions + // has no UNIQUE constraint on role_id) — the cautious read is "alive." + if (sessions.some(s => isSessionAlive(s, isProcessAlive, socketExists))) { + return { eligible: false, reason: 'shellper_alive' }; + } + if (!includeStale && ageDays > maxAgeDays) { + return { eligible: false, reason: 'stale' }; + } + return { eligible: true }; +} + +export interface BuilderInfo { + builderId: string; + issueArg: string; + cliProtocol: string; +} + +/** + * Derive the inputs needed to invoke `afx spawn --resume --protocol ` + * and the SQLite `role_id` to look up the builder's terminal session. + * + * Returns null for protocols that cannot be cleanly resumed via the spawn CLI + * (experiment/maintain/task/legacy spider/etc) — callers should skip those + * projects with an `unsupported_protocol` reason. + */ +export function deriveBuilderInfo(state: ProjectState): BuilderInfo | null { + if (!REVIVABLE_PROTOCOLS.has(state.protocol)) { + return null; + } + if (state.protocol === 'bugfix') { + const numericId = state.id.replace(/^bugfix-/, ''); + return { + builderId: buildAgentName('bugfix', numericId), + issueArg: numericId, + cliProtocol: 'bugfix', + }; + } + return { + builderId: buildAgentName('spec', state.id, state.protocol), + issueArg: stripLeadingZeros(state.id), + cliProtocol: state.protocol, + }; +} + +/** + * Resolve the builder's worktree path on disk, handling both the Spec-653 + * ID-only layout and the legacy title-suffixed form. + * + * Returns null when the project's protocol isn't revivable (no defined worktree + * naming) or when no matching directory exists. + */ +export function resolveWorktreePath(buildersDir: string, state: ProjectState): string | null { + const info = deriveBuilderInfo(state); + if (info === null) return null; + + const idOnlyName = `${info.cliProtocol}-${info.issueArg}`; + const idOnlyPath = join(buildersDir, idOnlyName); + if (existsSync(idOnlyPath) && existsSync(join(idOnlyPath, '.git'))) { + return idOnlyPath; + } + + if (!existsSync(buildersDir)) return null; + const prefix = `${info.cliProtocol}-${info.issueArg}-`; + for (const entry of readdirSync(buildersDir, { withFileTypes: true })) { + if (!entry.isDirectory() || !entry.name.startsWith(prefix)) continue; + const candidate = join(buildersDir, entry.name); + if (existsSync(join(candidate, '.git'))) return candidate; + } + return null; +} + +export function formatRelativeAge(iso: string): string { + const ms = Date.now() - Date.parse(iso); + if (Number.isNaN(ms) || ms < 0) return '—'; + const minutes = Math.floor(ms / 60_000); + if (minutes < 60) return `${minutes}m ago`; + const hours = Math.floor(minutes / 60); + if (hours < 24) return `${hours}h ago`; + // Round UP rather than down so the label aligns with --max-age semantics: + // anything strictly older than 24h prints as "2d ago" (not "1d ago"), so + // a row labelled "1d ago" is actually within --max-age 1 (≤ 24h exact). + // Use ms rather than the floored `hours` to preserve sub-hour precision — + // 24h + 1s must ceil to 2 days, not 1. + const days = Math.ceil(ms / 86_400_000); + return `${days}d ago`; +} + +function reasonLabel(reason: IneligibleReason): string { + switch (reason) { + case 'terminal': return 'terminal'; + case 'unsupported_protocol': return 'unsupported protocol'; + case 'worktree_missing': return 'worktree missing'; + case 'shellper_alive': return 'shellper alive'; + case 'stale': return 'stale'; + } +} + +interface RecoverRow { + state: ProjectState; + builderInfo: BuilderInfo | null; + worktreePath: string | null; + eligibility: EligibilityResult; + ageDays: number; +} + +function printPreview(rows: RecoverRow[]): void { + const widths = [6, 9, 12, 14, 10, 20]; + logger.row(['ID', 'PROTOCOL', 'PHASE', 'UPDATED', 'STATUS', 'REASON'], widths); + logger.row(['─'.repeat(6), '─'.repeat(9), '─'.repeat(12), '─'.repeat(14), '─'.repeat(10), '─'.repeat(20)], widths); + for (const row of rows) { + const status = row.eligibility.eligible + ? chalk.green('revive') + : chalk.gray('skip'); + const reason = row.eligibility.eligible ? '—' : reasonLabel(row.eligibility.reason); + logger.row( + [ + row.state.id, + row.state.protocol, + row.state.phase, + formatRelativeAge(row.state.updated_at), + status, + reason, + ], + widths, + ); + } +} + +async function respawnBuilder(info: BuilderInfo): Promise { + // Use the current node binary and CLI entry point so the respawn invocation + // matches the install method this command was started under (npm global, + // npx, dev script, etc.) instead of relying on PATH lookup of 'afx'. + await new Promise((resolvePromise, rejectPromise) => { + const child = spawn( + process.execPath, + [process.argv[1], 'spawn', info.issueArg, '--resume', '--protocol', info.cliProtocol], + { stdio: 'inherit' }, + ); + child.on('error', rejectPromise); + child.on('exit', (code) => { + if (code === 0) resolvePromise(); + else rejectPromise(new Error(`afx spawn exited with code ${code}`)); + }); + }); +} + +export async function workspaceRecover(options: WorkspaceRecoverOptions = {}): Promise { + const config = getConfig(); + const maxAgeDays = options.maxAge ?? DEFAULT_MAX_AGE_DAYS; + const includeStale = options.includeStale ?? false; + const apply = options.apply ?? false; + + logger.header(`Workspace Recover${apply ? '' : ' (dry-run)'}`); + logger.kv('Workspace', config.workspaceRoot); + if (!includeStale) logger.kv('Max age', `${maxAgeDays} day(s)`); + logger.blank(); + + const projects = listAllProjects(config.workspaceRoot, { + onParseError: (statusPath, err) => { + logger.debug(`Skipped unparseable ${statusPath}: ${err instanceof Error ? err.message : String(err)}`); + }, + }); + if (projects.length === 0) { + logger.info('No porch projects found.'); + return; + } + + let sessions: DbTerminalSession[]; + try { + sessions = getTerminalSessionsForWorkspace(config.workspaceRoot); + } finally { + closeGlobalDb(); + } + // role_id has no UNIQUE constraint in the schema, so collect every matching + // row per builder id rather than collapsing to last-write-wins. + const sessionsByRoleId = new Map(); + for (const s of sessions) { + if (s.type !== 'builder' || !s.role_id) continue; + const bucket = sessionsByRoleId.get(s.role_id); + if (bucket) bucket.push(s); + else sessionsByRoleId.set(s.role_id, [s]); + } + + const allRows: RecoverRow[] = projects.map(({ state }) => { + const builderInfo = deriveBuilderInfo(state); + const matchingSessions = builderInfo ? sessionsByRoleId.get(builderInfo.builderId) ?? [] : []; + const worktreePath = resolveWorktreePath(config.buildersDir, state); + const ageDays = (Date.now() - Date.parse(state.updated_at)) / 86_400_000; + const eligibility = evaluateEligibility({ + state, builderInfo, + sessions: matchingSessions, + worktreeExists: worktreePath !== null, + ageDays, maxAgeDays, includeStale, + isProcessAlive: processExists, + socketExists: existsSync, + }); + return { state, builderInfo, worktreePath, eligibility, ageDays }; + }); + + // By default the preview hides projects beyond the recency window — for a + // large workspace the stale tail dominates the table and obscures the few + // rows the operator actually cares about. --include-stale shows everything. + const visibleRows = includeStale + ? allRows + : allRows.filter(r => r.ageDays <= maxAgeDays); + const hiddenStaleCount = allRows.length - visibleRows.length; + + printPreview(visibleRows); + if (hiddenStaleCount > 0) { + logger.blank(); + logger.info(`${hiddenStaleCount} project(s) older than ${maxAgeDays} day(s) hidden. Pass --include-stale to show them.`); + } + + const eligible = allRows.filter( + (r): r is RecoverRow & { builderInfo: BuilderInfo; eligibility: { eligible: true } } => + r.eligibility.eligible && r.builderInfo !== null, + ); + logger.blank(); + if (hiddenStaleCount > 0) { + logger.kv('Eligible', `${eligible.length} / ${visibleRows.length} visible (${allRows.length} scanned)`); + } else { + logger.kv('Eligible', `${eligible.length} / ${allRows.length}`); + } + + if (eligible.length === 0) { + logger.info(apply ? 'Nothing to revive.' : 'Nothing would be revived.'); + return; + } + + if (!apply) { + logger.info(`Run with --apply to respawn ${eligible.length} builder(s).`); + return; + } + + if (!options.yes) { + const proceed = await confirm(`Proceed to respawn ${eligible.length} builder(s)?`, false); + if (!proceed) { + logger.info('Aborted.'); + return; + } + } + + let succeeded = 0; + let failed = 0; + for (const row of eligible) { + logger.blank(); + logger.info(`Respawning ${row.builderInfo.builderId} (issue ${row.builderInfo.issueArg}, ${row.builderInfo.cliProtocol})...`); + try { + await respawnBuilder(row.builderInfo); + succeeded++; + } catch (err) { + failed++; + logger.error(`Failed to respawn ${row.builderInfo.builderId}: ${err instanceof Error ? err.message : String(err)}`); + } + } + + logger.blank(); + logger.kv('Respawned', String(succeeded)); + if (failed > 0) { + logger.kv('Failed', String(failed)); + process.exit(1); + } +} diff --git a/packages/codev/src/agent-farm/servers/tower-instances.ts b/packages/codev/src/agent-farm/servers/tower-instances.ts index 47553b0f..15f43bb6 100644 --- a/packages/codev/src/agent-farm/servers/tower-instances.ts +++ b/packages/codev/src/agent-farm/servers/tower-instances.ts @@ -13,6 +13,7 @@ import { exec } from 'node:child_process'; import { promisify } from 'node:util'; import { homedir } from 'node:os'; import { encodeWorkspacePath } from '../lib/tower-client.js'; +import { findLatestSessionId } from '../utils/claude-session-discovery.js'; import { loadConfig } from '../../lib/config.js'; const execAsync = promisify(exec); @@ -456,7 +457,61 @@ export async function launchInstance(workspacePath: string): Promise<{ success: // Parse command string to separate command and args, inject role prompt const cmdParts = architectCmd.split(/\s+/); const cmd = cmdParts[0]; - const { args: cmdArgs, env: harnessEnv } = buildArchitectArgs(cmdParts.slice(1), workspacePath); + + // Issue #830 (main architect only): if a prior Claude session exists + // for this workspace cwd, resume it instead of starting fresh. Role + // injection is skipped on the resume path — the saved conversation + // already contains the role/system prompt. + // + // Lookup is unconditional here (unlike builders, where spawn.ts gates + // discovery behind `options.resume`). The asymmetry is intentional: + // launchInstance only runs when main isn't already alive, so the + // implicit intent is always "spawn the missing main; resume its + // prior conversation if one exists." There is no equivalent user + // intent surface (no flag) on the workspace-start path. + // + // Multi-architect collision guard (Codex / Gemini cmap-3 round 2): + // Named sibling architects (Spec 755) share `cwd = workspacePath` and + // write their jsonls into the same encoded-cwd directory as main. The + // newest-jsonl heuristic can't tell which jsonl belongs to which + // architect — so if any sibling is currently persisted in state.db, + // main risks resuming a sibling's conversation instead of its own. + // Until #832 lands per-architect session UUIDs, the conservative + // behavior is: if state.db reports >1 architect, skip resume for + // main and spawn fresh with role injection. Single-architect + // workspaces (the common case) keep conversation resume. + // + // Note: this only checks current persisted state, not history. A + // sibling that existed before but was removed leaves stale jsonl + // files behind; main could still pick one up. Acceptable until #832. + let safeToResume: boolean; + try { + // Single architect (main only, or empty before first spawn) → safe. + // Multiple → unsafe (sibling jsonls collide with main's in the same cwd). + // Bugfix #826: getArchitects is workspace-scoped — pass resolvedPath + // (the canonical workspace path used by the architect table). + safeToResume = getArchitects(resolvedPath).length <= 1; + } catch { + // state.db read should never fail here, but if it does the safe + // default is to skip resume rather than risk attaching main to + // an unrelated jsonl. + safeToResume = false; + } + const resumeSessionId = safeToResume ? findLatestSessionId(workspacePath) : null; + if (!safeToResume) { + _deps.log('WARN', `Skipping main architect conversation resume for ${workspacePath}: persisted sibling architects detected (or state.db unreadable); cannot disambiguate jsonl by cwd. See #832.`); + } + let cmdArgs: string[]; + let harnessEnv: Record; + if (resumeSessionId) { + cmdArgs = [...cmdParts.slice(1), '--resume', resumeSessionId]; + harnessEnv = {}; + _deps.log('INFO', `Resuming main architect Claude session ${resumeSessionId.slice(0, 8)}… for ${workspacePath}`); + } else { + const built = buildArchitectArgs(cmdParts.slice(1), workspacePath); + cmdArgs = built.args; + harnessEnv = built.env; + } // Build env with CLAUDECODE removed so spawned Claude processes // don't detect a nested session, and merge harness env vars. diff --git a/packages/codev/src/agent-farm/utils/claude-session-discovery.ts b/packages/codev/src/agent-farm/utils/claude-session-discovery.ts new file mode 100644 index 00000000..b8d505aa --- /dev/null +++ b/packages/codev/src/agent-farm/utils/claude-session-discovery.ts @@ -0,0 +1,70 @@ +// Discover the most-recent Claude conversation session for a given working +// directory by inspecting Claude Code's on-disk session store. +// +// Claude Code automatically persists every interactive session to +// ~/.claude/projects//.jsonl +// where the encoding replaces both '/' and '.' in the absolute path with '-'. +// We use the newest jsonl by mtime as a stand-in for "the last conversation +// that ran in this directory" so reviving a dead builder (or architect) can +// resume via `claude --resume ` without any spawn-time bookkeeping. +// +// This is intentionally a heuristic — multiple jsonl files in the same +// directory mean multiple past sessions, and we pick the most recent. For +// builder worktrees that almost always means the right one; for shared cwds +// the caller should be aware of the ambiguity. + +import { existsSync, readdirSync, statSync } from 'node:fs'; +import { homedir } from 'node:os'; +import { join } from 'node:path'; + +const JSONL_EXT = '.jsonl'; + +/** + * Encode an absolute path to the directory name Claude uses under + * ~/.claude/projects/. The scheme is: replace every '/' and '.' with '-'. + * + * Example: '/Users/x/repos/foo/.builders/pir-1' → '-Users-x-repos-foo--builders-pir-1' + */ +export function encodeClaudeProjectDir(absolutePath: string): string { + return absolutePath.replace(/[/.]/g, '-'); +} + +export function getClaudeProjectDir(absolutePath: string): string { + return join(homedir(), '.claude', 'projects', encodeClaudeProjectDir(absolutePath)); +} + +/** + * Return the session UUID of the most-recently-modified jsonl in the Claude + * project dir for the given cwd, or null if none exists. + * + * `opts.homeDir` lets tests pin the home directory; otherwise resolves via + * `os.homedir()`. + */ +export function findLatestSessionId( + absolutePath: string, + opts?: { homeDir?: string }, +): string | null { + const home = opts?.homeDir ?? homedir(); + const dir = join(home, '.claude', 'projects', encodeClaudeProjectDir(absolutePath)); + if (!existsSync(dir)) return null; + + let bestName: string | null = null; + let bestMtime = -Infinity; + + for (const entry of readdirSync(dir, { withFileTypes: true })) { + if (!entry.isFile() || !entry.name.endsWith(JSONL_EXT)) continue; + const fullPath = join(dir, entry.name); + try { + const mtime = statSync(fullPath).mtimeMs; + if (mtime > bestMtime) { + bestMtime = mtime; + bestName = entry.name; + } + } catch { + // stat failed (race with deletion, permissions) — skip + } + } + + if (!bestName) return null; + return bestName.slice(0, -JSONL_EXT.length); +} diff --git a/packages/codev/src/commands/porch/state.ts b/packages/codev/src/commands/porch/state.ts index 9b97c910..4408eef7 100644 --- a/packages/codev/src/commands/porch/state.ts +++ b/packages/codev/src/commands/porch/state.ts @@ -302,6 +302,56 @@ export function findStatusPath(workspaceRoot: string, projectId: string): string return null; } +/** + * Enumerate every project on disk, parsing each status.yaml. + * + * Precedence matches findStatusPath(): `.builders//codev/projects/` wins + * over `codev/projects/` when the same project id appears in both. In + * multi-PR flows (Spec 653) early phases merge status.yaml to main, so the + * worktree copy is the fresher one. + * + * status.yaml files that fail to parse are skipped. Callers can observe these + * skips by passing `onParseError`; otherwise failures are silent. + */ +export function listAllProjects( + workspaceRoot: string, + opts?: { onParseError?: (statusPath: string, err: unknown) => void }, +): Array<{ statusPath: string; state: ProjectState }> { + const collected = new Map(); + + const addFromDir = (projectsDir: string): void => { + if (!fs.existsSync(projectsDir)) return; + const entries = fs.readdirSync(projectsDir, { withFileTypes: true }); + for (const entry of entries) { + if (!entry.isDirectory()) continue; + const statusPath = path.join(projectsDir, entry.name, 'status.yaml'); + if (!fs.existsSync(statusPath)) continue; + try { + const state = readState(statusPath); + if (!collected.has(state.id)) { + collected.set(state.id, { statusPath, state }); + } + } catch (err) { + opts?.onParseError?.(statusPath, err); + } + } + }; + + // 1. Builder worktrees first (freshest in multi-PR layouts) + const buildersDir = path.join(workspaceRoot, '.builders'); + if (fs.existsSync(buildersDir)) { + for (const wt of fs.readdirSync(buildersDir, { withFileTypes: true })) { + if (!wt.isDirectory()) continue; + addFromDir(path.join(buildersDir, wt.name, PROJECTS_DIR)); + } + } + + // 2. Local main copy + addFromDir(path.join(workspaceRoot, PROJECTS_DIR)); + + return [...collected.values()]; +} + /** * Detect project ID from the current working directory if inside a builder worktree. * Works from any subdirectory within the worktree.