Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
129 changes: 129 additions & 0 deletions src/product/generation/workforce-persona-writer.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -360,6 +360,135 @@ describe('workforce persona workflow writer', () => {
}),
).toThrow(/structured JSON or include fenced TypeScript artifact/);
});

// Regression: when the writer is prompted to use the Write tool, claude
// sometimes emits a complete ```typescript fence whose body is a
// *placeholder* ("// (full source above — file written to disk)")
// followed by a complete ```json metadata fence. Both fences parse,
// so the parser took the fenced-response path and threw "does not
// call workflow()" — even though the actual workflow source had just
// been written to disk and the rest of the response was structurally
// fine. Result: ~25 minutes of writer work discarded, and 3 more
// 25-minute repair attempts burned chasing the same symptom. The
// parser must treat the placeholder-fence case as a stdout-format
// mismatch and prefer the freshly-written file on disk, exactly like
// it already does for outright truncated stdout.
const placeholderFenceOutput = [
'The file is complete and correct. Here is the response contract output:',
'',
'```typescript',
'// workflows/generated/persona.ts',
'// (full source above — file written to disk)',
'```',
'',
'```json',
JSON.stringify({
artifact: {
path: 'workflows/generated/persona.ts',
language: 'typescript',
linesOfCode: 513,
writtenToDisk: true,
},
metadata: { workflowName: 'persona', agents: ['lead'] },
}, null, 2),
'```',
].join('\n');

it('recovers from a placeholder ```typescript fence + complete ```json metadata fence', () => {
const artifactPath = 'workflows/generated/persona.ts';
const writerInvokedAtMs = 100;
const parsed = parsePersonaWorkflowResponse(placeholderFenceOutput, artifactPath, {
repoRoot: '/tmp/repo',
writerInvokedAtMs,
statFile: (path) => path.endsWith('persona.ts') ? { mtimeMs: writerInvokedAtMs + 1_000 } : undefined,
readFileText: () => workflowSource(),
});
expect(parsed.responseFormat).toBe('fenced-artifact');
expect(parsed.content).toContain('workflow("persona")');
expect(parsed.metadata).toMatchObject({ recoveredFromDisk: true, reason: 'fenced-ts-placeholder' });
});

it('does NOT recover from a placeholder fence when the on-disk file is STALE (mtime <= writerInvokedAtMs)', () => {
// Regression guard for the bypass identified during review: when
// `recoverArtifactFromTruncatedOutput` correctly rejects a stale
// on-disk file, the placeholder-fence helper used to return
// `undefined` and the parser fell through to
// `validateStructuredResponse` → `recoverExpectedArtifactContent`,
// which has no mtime check and would silently surface the stale
// artifact as if it were the current writer's output. The helper
// must now throw the original "does not call workflow()" error so
// no fallthrough is possible.
const artifactPath = 'workflows/generated/persona.ts';
const writerInvokedAtMs = 1_000;
expect(() =>
parsePersonaWorkflowResponse(placeholderFenceOutput, artifactPath, {
repoRoot: '/tmp/repo',
writerInvokedAtMs,
statFile: () => ({ mtimeMs: writerInvokedAtMs - 1 }),
// Even though the on-disk file is a valid workflow, the mtime
// says it predates this writer run — must NOT be accepted.
readFileText: () => workflowSource(),
}),
).toThrow(/does not call workflow\(\)/);
});

it('placeholder fence path falls through to disk recovery only when `writerInvokedAtMs` is provided', () => {
// Without `writerInvokedAtMs` the disk-recovery freshness guard
// short-circuits, so the placeholder-fence helper has no fresh
// file to fall back to and must re-throw the original
// "does not call workflow()" error rather than returning a
// misleading shape or silently consuming a stale artifact.
const artifactPath = 'workflows/generated/persona.ts';
expect(() =>
parsePersonaWorkflowResponse(placeholderFenceOutput, artifactPath, {
repoRoot: '/tmp/repo',
// writerInvokedAtMs intentionally omitted
statFile: () => ({ mtimeMs: Date.now() }),
readFileText: () => workflowSource(),
}),
).toThrow(/does not call workflow\(\)/);
});

it('does NOT recover from a placeholder fence when on-disk content also lacks `workflow(` (no silent bypass)', () => {
const artifactPath = 'workflows/generated/persona.ts';
expect(() =>
parsePersonaWorkflowResponse(placeholderFenceOutput, artifactPath, {
repoRoot: '/tmp/repo',
writerInvokedAtMs: 100,
statFile: () => ({ mtimeMs: 200 }),
readFileText: () => 'export const broken = 1;',
}),
).toThrow(/does not call workflow\(\)/);
});

it('placeholder fence path re-validates fenced metadata against the recovered disk content (mismatched path still throws)', () => {
// Regression guard for CodeRabbit's metadata concern: after the
// helper recovers content from disk, it must re-run the full
// fenced-response validator on the recovered content + metadata
// so metadata-level issues (mismatched `path`) still surface.
const artifactPath = 'workflows/generated/persona.ts';
const writerInvokedAtMs = 100;
const mismatchedFenceOutput = [
'```typescript',
'// (file written to disk)',
'```',
'',
'```json',
JSON.stringify({
path: 'workflows/generated/SOMETHING-ELSE.ts',
workflowName: 'persona',
}, null, 2),
'```',
].join('\n');
expect(() =>
parsePersonaWorkflowResponse(mismatchedFenceOutput, artifactPath, {
repoRoot: '/tmp/repo',
writerInvokedAtMs,
statFile: (path) => path.endsWith('persona.ts') ? { mtimeMs: writerInvokedAtMs + 1_000 } : undefined,
readFileText: () => workflowSource(),
}),
).toThrow(/fenced metadata path .* did not match expected output path/);
});
});

it('invokes the spawned harness non-interactively (no TUI flag, structured-response contract)', async () => {
Expand Down
70 changes: 68 additions & 2 deletions src/product/generation/workforce-persona-writer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1029,7 +1029,14 @@ export function parsePersonaWorkflowResponse(
const clarification = metadataJson ? parseClarificationResponse(metadataJson) : null;
if (clarification) return { metadata: {}, responseFormat: 'needs-clarification', clarification };
if (metadataJson && tsFence) {
return validateFencedResponse(tsFence, metadataJson, expectedPath);
// The writer is sometimes prompted to use the Write tool and only
// emit a placeholder ("// (file written to disk)") inside the ts
// fence. validateFencedResponse rejects that because the placeholder
// lacks a `workflow(` call. tryFencedResponseOrDiskRecovery prefers
// a fresh on-disk artifact in that case; it always returns or throws
// (never falls through to validateStructuredResponse below), so
// stale on-disk artifacts can't sneak in via recoverExpectedArtifactContent.
return tryFencedResponseOrDiskRecovery(tsFence, metadataJson, expectedPath, options);
}
if (metadataJson) {
return validateStructuredResponse(metadataJson, expectedPath, 'structured-json', options);
Expand All @@ -1040,7 +1047,7 @@ export function parsePersonaWorkflowResponse(
const metadataFence = fencedBlock(output, 'metadata');
const metadata = metadataFence ? parseJsonObject(metadataFence) : null;
if (tsFence && metadata) {
return validateFencedResponse(tsFence, metadata, expectedPath);
return tryFencedResponseOrDiskRecovery(tsFence, metadata, expectedPath, options);
}

// Tolerant fallback: Claude Sonnet has been observed to emit a prose
Expand Down Expand Up @@ -1307,6 +1314,65 @@ function validateFencedResponse(
};
}

/**
* Attempt `validateFencedResponse` first; if it rejects the ts fence for
* lacking a `workflow(` call AND a freshly-written workflow exists on disk
* at `expectedPath`, prefer the disk content. The placeholder-fence pattern
* shows up when the writer is prompted to use the Write tool: it emits
* something like `// (full source above — file written to disk)` inside
* the ts fence and the actual source lands on disk. Returning the disk
* content instead of throwing keeps the writer's successful work from
* being discarded over a stdout-format mismatch.
*
* Always returns or throws — never `undefined`. That contract is load-
* bearing: if the caller could fall through to `validateStructuredResponse`
* on `undefined`, an unguarded `recoverExpectedArtifactContent` call could
* silently surface a STALE on-disk artifact from a prior writer run (no
* mtime check). By throwing the original "no workflow() call" error when
* disk recovery fails, we preserve the freshness guarantee
* `recoverArtifactFromTruncatedOutput` enforces via `writerInvokedAtMs`.
*
* After a successful disk read, re-validates the recovered content + the
* fence metadata via the full `validateFencedResponse` pipeline — so
* metadata-level issues (missing fields, mismatched `path`) still surface
* even though the on-disk content saved us from the placeholder symptom.
*/
function tryFencedResponseOrDiskRecovery(
tsFence: string,
metadata: Record<string, unknown>,
expectedPath: string,
options: PersonaResponseParseOptions,
): ParsedPersonaResponse {
try {
return validateFencedResponse(tsFence, metadata, expectedPath);
} catch (error) {
if (!(error instanceof WorkforcePersonaWriterError)) throw error;
if (!/does not call workflow\(\)/.test(error.message)) throw error;
const recovered = recoverArtifactFromTruncatedOutput(expectedPath, options);
if (!recovered) throw error;
let parsed: ParsedPersonaResponse;
try {
parsed = validateFencedResponse(recovered, metadata, expectedPath);
} catch (recoveredError) {
// If the on-disk file *also* lacks `workflow(`, surface the original
// error — the writer genuinely produced an invalid artifact. For any
// other validation failure (bad metadata, mismatched path), let it
// bubble up so the caller sees the real symptom.
if (
recoveredError instanceof WorkforcePersonaWriterError &&
/does not call workflow\(\)/.test(recoveredError.message)
) {
throw error;
}
throw recoveredError;
}
return {
...parsed,
metadata: { ...parsed.metadata, recoveredFromDisk: true, reason: 'fenced-ts-placeholder' },
};
}
}

function validateArtifactContent(content: string): void {
if (!/\bworkflow\(/.test(content)) {
throw new WorkforcePersonaWriterError('Workforce persona artifact does not call workflow().');
Expand Down
Loading