diff --git a/src/product/generation/workforce-persona-writer.test.ts b/src/product/generation/workforce-persona-writer.test.ts index f2d56407..9ba83980 100644 --- a/src/product/generation/workforce-persona-writer.test.ts +++ b/src/product/generation/workforce-persona-writer.test.ts @@ -360,6 +360,135 @@ describe('workforce persona workflow writer', () => { }), ).toThrow(/structured JSON or include fenced TypeScript artifact/); }); + + // Regression: when the writer is prompted to use the Write tool, claude + // sometimes emits a complete ```typescript fence whose body is a + // *placeholder* ("// (full source above — file written to disk)") + // followed by a complete ```json metadata fence. Both fences parse, + // so the parser took the fenced-response path and threw "does not + // call workflow()" — even though the actual workflow source had just + // been written to disk and the rest of the response was structurally + // fine. Result: ~25 minutes of writer work discarded, and 3 more + // 25-minute repair attempts burned chasing the same symptom. The + // parser must treat the placeholder-fence case as a stdout-format + // mismatch and prefer the freshly-written file on disk, exactly like + // it already does for outright truncated stdout. + const placeholderFenceOutput = [ + 'The file is complete and correct. Here is the response contract output:', + '', + '```typescript', + '// workflows/generated/persona.ts', + '// (full source above — file written to disk)', + '```', + '', + '```json', + JSON.stringify({ + artifact: { + path: 'workflows/generated/persona.ts', + language: 'typescript', + linesOfCode: 513, + writtenToDisk: true, + }, + metadata: { workflowName: 'persona', agents: ['lead'] }, + }, null, 2), + '```', + ].join('\n'); + + it('recovers from a placeholder ```typescript fence + complete ```json metadata fence', () => { + const artifactPath = 'workflows/generated/persona.ts'; + const writerInvokedAtMs = 100; + const parsed = parsePersonaWorkflowResponse(placeholderFenceOutput, artifactPath, { + repoRoot: '/tmp/repo', + writerInvokedAtMs, + statFile: (path) => path.endsWith('persona.ts') ? { mtimeMs: writerInvokedAtMs + 1_000 } : undefined, + readFileText: () => workflowSource(), + }); + expect(parsed.responseFormat).toBe('fenced-artifact'); + expect(parsed.content).toContain('workflow("persona")'); + expect(parsed.metadata).toMatchObject({ recoveredFromDisk: true, reason: 'fenced-ts-placeholder' }); + }); + + it('does NOT recover from a placeholder fence when the on-disk file is STALE (mtime <= writerInvokedAtMs)', () => { + // Regression guard for the bypass identified during review: when + // `recoverArtifactFromTruncatedOutput` correctly rejects a stale + // on-disk file, the placeholder-fence helper used to return + // `undefined` and the parser fell through to + // `validateStructuredResponse` → `recoverExpectedArtifactContent`, + // which has no mtime check and would silently surface the stale + // artifact as if it were the current writer's output. The helper + // must now throw the original "does not call workflow()" error so + // no fallthrough is possible. + const artifactPath = 'workflows/generated/persona.ts'; + const writerInvokedAtMs = 1_000; + expect(() => + parsePersonaWorkflowResponse(placeholderFenceOutput, artifactPath, { + repoRoot: '/tmp/repo', + writerInvokedAtMs, + statFile: () => ({ mtimeMs: writerInvokedAtMs - 1 }), + // Even though the on-disk file is a valid workflow, the mtime + // says it predates this writer run — must NOT be accepted. + readFileText: () => workflowSource(), + }), + ).toThrow(/does not call workflow\(\)/); + }); + + it('placeholder fence path falls through to disk recovery only when `writerInvokedAtMs` is provided', () => { + // Without `writerInvokedAtMs` the disk-recovery freshness guard + // short-circuits, so the placeholder-fence helper has no fresh + // file to fall back to and must re-throw the original + // "does not call workflow()" error rather than returning a + // misleading shape or silently consuming a stale artifact. + const artifactPath = 'workflows/generated/persona.ts'; + expect(() => + parsePersonaWorkflowResponse(placeholderFenceOutput, artifactPath, { + repoRoot: '/tmp/repo', + // writerInvokedAtMs intentionally omitted + statFile: () => ({ mtimeMs: Date.now() }), + readFileText: () => workflowSource(), + }), + ).toThrow(/does not call workflow\(\)/); + }); + + it('does NOT recover from a placeholder fence when on-disk content also lacks `workflow(` (no silent bypass)', () => { + const artifactPath = 'workflows/generated/persona.ts'; + expect(() => + parsePersonaWorkflowResponse(placeholderFenceOutput, artifactPath, { + repoRoot: '/tmp/repo', + writerInvokedAtMs: 100, + statFile: () => ({ mtimeMs: 200 }), + readFileText: () => 'export const broken = 1;', + }), + ).toThrow(/does not call workflow\(\)/); + }); + + it('placeholder fence path re-validates fenced metadata against the recovered disk content (mismatched path still throws)', () => { + // Regression guard for CodeRabbit's metadata concern: after the + // helper recovers content from disk, it must re-run the full + // fenced-response validator on the recovered content + metadata + // so metadata-level issues (mismatched `path`) still surface. + const artifactPath = 'workflows/generated/persona.ts'; + const writerInvokedAtMs = 100; + const mismatchedFenceOutput = [ + '```typescript', + '// (file written to disk)', + '```', + '', + '```json', + JSON.stringify({ + path: 'workflows/generated/SOMETHING-ELSE.ts', + workflowName: 'persona', + }, null, 2), + '```', + ].join('\n'); + expect(() => + parsePersonaWorkflowResponse(mismatchedFenceOutput, artifactPath, { + repoRoot: '/tmp/repo', + writerInvokedAtMs, + statFile: (path) => path.endsWith('persona.ts') ? { mtimeMs: writerInvokedAtMs + 1_000 } : undefined, + readFileText: () => workflowSource(), + }), + ).toThrow(/fenced metadata path .* did not match expected output path/); + }); }); it('invokes the spawned harness non-interactively (no TUI flag, structured-response contract)', async () => { diff --git a/src/product/generation/workforce-persona-writer.ts b/src/product/generation/workforce-persona-writer.ts index e84481d7..6b767352 100644 --- a/src/product/generation/workforce-persona-writer.ts +++ b/src/product/generation/workforce-persona-writer.ts @@ -1029,7 +1029,14 @@ export function parsePersonaWorkflowResponse( const clarification = metadataJson ? parseClarificationResponse(metadataJson) : null; if (clarification) return { metadata: {}, responseFormat: 'needs-clarification', clarification }; if (metadataJson && tsFence) { - return validateFencedResponse(tsFence, metadataJson, expectedPath); + // The writer is sometimes prompted to use the Write tool and only + // emit a placeholder ("// (file written to disk)") inside the ts + // fence. validateFencedResponse rejects that because the placeholder + // lacks a `workflow(` call. tryFencedResponseOrDiskRecovery prefers + // a fresh on-disk artifact in that case; it always returns or throws + // (never falls through to validateStructuredResponse below), so + // stale on-disk artifacts can't sneak in via recoverExpectedArtifactContent. + return tryFencedResponseOrDiskRecovery(tsFence, metadataJson, expectedPath, options); } if (metadataJson) { return validateStructuredResponse(metadataJson, expectedPath, 'structured-json', options); @@ -1040,7 +1047,7 @@ export function parsePersonaWorkflowResponse( const metadataFence = fencedBlock(output, 'metadata'); const metadata = metadataFence ? parseJsonObject(metadataFence) : null; if (tsFence && metadata) { - return validateFencedResponse(tsFence, metadata, expectedPath); + return tryFencedResponseOrDiskRecovery(tsFence, metadata, expectedPath, options); } // Tolerant fallback: Claude Sonnet has been observed to emit a prose @@ -1307,6 +1314,65 @@ function validateFencedResponse( }; } +/** + * Attempt `validateFencedResponse` first; if it rejects the ts fence for + * lacking a `workflow(` call AND a freshly-written workflow exists on disk + * at `expectedPath`, prefer the disk content. The placeholder-fence pattern + * shows up when the writer is prompted to use the Write tool: it emits + * something like `// (full source above — file written to disk)` inside + * the ts fence and the actual source lands on disk. Returning the disk + * content instead of throwing keeps the writer's successful work from + * being discarded over a stdout-format mismatch. + * + * Always returns or throws — never `undefined`. That contract is load- + * bearing: if the caller could fall through to `validateStructuredResponse` + * on `undefined`, an unguarded `recoverExpectedArtifactContent` call could + * silently surface a STALE on-disk artifact from a prior writer run (no + * mtime check). By throwing the original "no workflow() call" error when + * disk recovery fails, we preserve the freshness guarantee + * `recoverArtifactFromTruncatedOutput` enforces via `writerInvokedAtMs`. + * + * After a successful disk read, re-validates the recovered content + the + * fence metadata via the full `validateFencedResponse` pipeline — so + * metadata-level issues (missing fields, mismatched `path`) still surface + * even though the on-disk content saved us from the placeholder symptom. + */ +function tryFencedResponseOrDiskRecovery( + tsFence: string, + metadata: Record, + expectedPath: string, + options: PersonaResponseParseOptions, +): ParsedPersonaResponse { + try { + return validateFencedResponse(tsFence, metadata, expectedPath); + } catch (error) { + if (!(error instanceof WorkforcePersonaWriterError)) throw error; + if (!/does not call workflow\(\)/.test(error.message)) throw error; + const recovered = recoverArtifactFromTruncatedOutput(expectedPath, options); + if (!recovered) throw error; + let parsed: ParsedPersonaResponse; + try { + parsed = validateFencedResponse(recovered, metadata, expectedPath); + } catch (recoveredError) { + // If the on-disk file *also* lacks `workflow(`, surface the original + // error — the writer genuinely produced an invalid artifact. For any + // other validation failure (bad metadata, mismatched path), let it + // bubble up so the caller sees the real symptom. + if ( + recoveredError instanceof WorkforcePersonaWriterError && + /does not call workflow\(\)/.test(recoveredError.message) + ) { + throw error; + } + throw recoveredError; + } + return { + ...parsed, + metadata: { ...parsed.metadata, recoveredFromDisk: true, reason: 'fenced-ts-placeholder' }, + }; + } +} + function validateArtifactContent(content: string): void { if (!/\bworkflow\(/.test(content)) { throw new WorkforcePersonaWriterError('Workforce persona artifact does not call workflow().');