From f41c550a407765b753f48036c6a3f9dde490e89d Mon Sep 17 00:00:00 2001 From: Cloud Bot Date: Sun, 17 May 2026 23:11:36 +0200 Subject: [PATCH 1/2] fix(persona-writer): recover from placeholder-ts-fence + JSON metadata responses by reading freshly-written workflow from disk MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The writer's response parser had a gap in the fenced-response path. When claude was prompted to use the Write tool and only emit a placeholder comment inside the ```typescript fence (e.g. `// (full source above — file written to disk)`), the existing path: if (metadataJson && tsFence) { return validateFencedResponse(tsFence, metadataJson, expectedPath); } threw `"does not call workflow()"` because the placeholder text lacks a `workflow(` call. The downstream truncated-stdout disk-recovery fallback (line ~1070) never ran, because both fences had successfully parsed. Concrete regression: in our 4-spec ricky-cloud-spawn re-run on v0.1.69 (after the master-renderer fix landed), the writer produced a real 513-line workflow with proper top-level `GitHubStepExecutor`/`createGitHubStep` imports for pr-08 and wrote it to disk via the Write tool in ~25 minutes. Ricky then rejected the response as parse-error and burned 3 more 25-minute repair attempts on output it could not parse — none of which had any chance of succeeding, since the only thing wrong was the stdout-format mismatch. The underlying file on disk was always fine. The fix: try `validateFencedResponse` first, and only on its specific "does not call workflow()" symptom, attempt `recoverArtifactFromTruncatedOutput` (mtime-guarded, same as the existing truncated-stdout path) before giving up. Other failures (metadata path mismatch, malformed metadata, etc.) still surface unchanged. Both fenced-response code paths (json-fence + ts-fence at ~1031, and ts-fence + metadata-fence at ~1042) now flow through the same `tryFencedResponseOrDiskRecovery` helper for consistency. Tests: - `recovers from a placeholder typescript fence + complete json metadata fence` — pins the exact regression shape (placeholder text in ts fence + structured json with `writtenToDisk: true` metadata) producing a `fenced-artifact` response sourced from the on-disk file, marked `recoveredFromDisk: true, reason: 'fenced-ts-placeholder'`. - `placeholder fence path respects the freshness guard` — locks the mtime guard: without `writerInvokedAtMs`, recovery short-circuits and the parser falls through to its usual error paths. - `does NOT recover from a placeholder fence when on-disk content also lacks workflow()` — no silent bypass: if the on-disk file is also invalid, the original parser-failure surface is preserved. Co-Authored-By: Claude Opus 4.7 --- .../workforce-persona-writer.test.ts | 82 +++++++++++++++++++ .../generation/workforce-persona-writer.ts | 61 +++++++++++++- 2 files changed, 141 insertions(+), 2 deletions(-) diff --git a/src/product/generation/workforce-persona-writer.test.ts b/src/product/generation/workforce-persona-writer.test.ts index f2d56407..b35fc71c 100644 --- a/src/product/generation/workforce-persona-writer.test.ts +++ b/src/product/generation/workforce-persona-writer.test.ts @@ -360,6 +360,88 @@ describe('workforce persona workflow writer', () => { }), ).toThrow(/structured JSON or include fenced TypeScript artifact/); }); + + // Regression: when the writer is prompted to use the Write tool, claude + // sometimes emits a complete ```typescript fence whose body is a + // *placeholder* ("// (full source above — file written to disk)") + // followed by a complete ```json metadata fence. Both fences parse, + // so the parser took the fenced-response path and threw "does not + // call workflow()" — even though the actual workflow source had just + // been written to disk and the rest of the response was structurally + // fine. Result: ~25 minutes of writer work discarded, and 3 more + // 25-minute repair attempts burned chasing the same symptom. The + // parser must treat the placeholder-fence case as a stdout-format + // mismatch and prefer the freshly-written file on disk, exactly like + // it already does for outright truncated stdout. + const placeholderFenceOutput = [ + 'The file is complete and correct. Here is the response contract output:', + '', + '```typescript', + '// workflows/generated/persona.ts', + '// (full source above — file written to disk)', + '```', + '', + '```json', + JSON.stringify({ + artifact: { + path: 'workflows/generated/persona.ts', + language: 'typescript', + linesOfCode: 513, + writtenToDisk: true, + }, + metadata: { workflowName: 'persona', agents: ['lead'] }, + }, null, 2), + '```', + ].join('\n'); + + it('recovers from a placeholder ```typescript fence + complete ```json metadata fence', () => { + const artifactPath = 'workflows/generated/persona.ts'; + const writerInvokedAtMs = 100; + const parsed = parsePersonaWorkflowResponse(placeholderFenceOutput, artifactPath, { + repoRoot: '/tmp/repo', + writerInvokedAtMs, + statFile: (path) => path.endsWith('persona.ts') ? { mtimeMs: writerInvokedAtMs + 1_000 } : undefined, + readFileText: () => workflowSource(), + }); + expect(parsed.responseFormat).toBe('fenced-artifact'); + expect(parsed.content).toContain('workflow("persona")'); + expect(parsed.metadata).toMatchObject({ recoveredFromDisk: true, reason: 'fenced-ts-placeholder' }); + }); + + it('placeholder fence path respects the freshness guard (returns undefined when mtime older than writerInvokedAtMs, letting downstream parsers decide)', () => { + // The new placeholder-fence helper uses the same mtime-guarded + // `recoverArtifactFromTruncatedOutput` as the truncated-stdout + // fallback — when the on-disk file is older than the writer's + // invocation, the helper returns undefined and lets the rest of + // the parser run. We exercise that here via a controlled + // statFile/readFileText pair so the assertion stays decoupled + // from any other recovery path the parser may later succeed on. + const artifactPath = 'workflows/generated/persona.ts'; + // No writerInvokedAtMs at all → the helper must short-circuit and + // return undefined (regardless of statFile / readFileText), and + // because both stat and readFile are absent, every other recovery + // path also yields no usable workflow source. We pin that + // composite outcome (throws the generic parser-failure error) + // to lock the freshness guard in this code path. + expect(() => + parsePersonaWorkflowResponse(placeholderFenceOutput, artifactPath, { + repoRoot: '/tmp/repo', + // writerInvokedAtMs intentionally omitted + }), + ).toThrow(/structured JSON or include fenced TypeScript artifact|missing artifact\.content/); + }); + + it('does NOT recover from a placeholder fence when on-disk content also lacks `workflow(` (no silent bypass)', () => { + const artifactPath = 'workflows/generated/persona.ts'; + expect(() => + parsePersonaWorkflowResponse(placeholderFenceOutput, artifactPath, { + repoRoot: '/tmp/repo', + writerInvokedAtMs: 100, + statFile: () => ({ mtimeMs: 200 }), + readFileText: () => 'export const broken = 1;', + }), + ).toThrow(/does not call workflow\(\)|structured JSON or include fenced TypeScript artifact/); + }); }); it('invokes the spawned harness non-interactively (no TUI flag, structured-response contract)', async () => { diff --git a/src/product/generation/workforce-persona-writer.ts b/src/product/generation/workforce-persona-writer.ts index e84481d7..a0d44bf2 100644 --- a/src/product/generation/workforce-persona-writer.ts +++ b/src/product/generation/workforce-persona-writer.ts @@ -1029,7 +1029,15 @@ export function parsePersonaWorkflowResponse( const clarification = metadataJson ? parseClarificationResponse(metadataJson) : null; if (clarification) return { metadata: {}, responseFormat: 'needs-clarification', clarification }; if (metadataJson && tsFence) { - return validateFencedResponse(tsFence, metadataJson, expectedPath); + // The writer is sometimes prompted to use the Write tool and only + // emit a placeholder ("// (file written to disk)") inside the ts + // fence. validateFencedResponse rejects that because the placeholder + // lacks a `workflow(` call. Don't throw on placeholder fences when + // the file on disk is a freshly-written, structurally-valid workflow + // — fall through to the disk-recovery path below instead. Other + // failures (e.g. mismatched metadata path) still bubble up. + const recovered = tryFencedResponseOrDiskRecovery(tsFence, metadataJson, expectedPath, options); + if (recovered) return recovered; } if (metadataJson) { return validateStructuredResponse(metadataJson, expectedPath, 'structured-json', options); @@ -1040,7 +1048,8 @@ export function parsePersonaWorkflowResponse( const metadataFence = fencedBlock(output, 'metadata'); const metadata = metadataFence ? parseJsonObject(metadataFence) : null; if (tsFence && metadata) { - return validateFencedResponse(tsFence, metadata, expectedPath); + const recovered = tryFencedResponseOrDiskRecovery(tsFence, metadata, expectedPath, options); + if (recovered) return recovered; } // Tolerant fallback: Claude Sonnet has been observed to emit a prose @@ -1307,6 +1316,54 @@ function validateFencedResponse( }; } +/** + * Attempt `validateFencedResponse` first; if it rejects the ts fence for + * lacking a `workflow(` call AND a freshly-written workflow exists on disk + * at `expectedPath`, prefer the disk content. The placeholder-fence pattern + * shows up when the writer is prompted to use the Write tool: it emits + * something like `// (full source above — file written to disk)` inside + * the ts fence and the actual source lands on disk. Returning the disk + * content instead of throwing keeps the writer's successful work from + * being discarded over a stdout-format mismatch. + * + * Returns `undefined` if neither the fence nor the disk content validates, + * letting the caller fall through to its own error path. Other validation + * failures (e.g. metadata path mismatch) still throw — only the + * "no workflow() call" symptom is treated as recoverable. + */ +function tryFencedResponseOrDiskRecovery( + tsFence: string, + metadata: Record, + expectedPath: string, + options: PersonaResponseParseOptions, +): ParsedPersonaResponse | undefined { + try { + return validateFencedResponse(tsFence, metadata, expectedPath); + } catch (error) { + if (!(error instanceof WorkforcePersonaWriterError)) throw error; + if (!/does not call workflow\(\)/.test(error.message)) throw error; + const recovered = recoverArtifactFromTruncatedOutput(expectedPath, options); + if (!recovered) return undefined; + try { + validateArtifactContent(recovered); + } catch { + return undefined; + } + try { + validateMetadata(metadata); + } catch { + // Metadata-level issues should not block a disk-recovery success; + // the workflow source itself is what matters for downstream + // validation. Surface metadata as-is and let later checks decide. + } + return { + content: recovered.trimEnd() + '\n', + metadata: { ...metadata, recoveredFromDisk: true, reason: 'fenced-ts-placeholder' }, + responseFormat: 'fenced-artifact', + }; + } +} + function validateArtifactContent(content: string): void { if (!/\bworkflow\(/.test(content)) { throw new WorkforcePersonaWriterError('Workforce persona artifact does not call workflow().'); From e2b9e3d4dd4fa3f89d78e8365bdd8fb801d9889a Mon Sep 17 00:00:00 2001 From: kjgbot Date: Mon, 18 May 2026 09:49:32 +0200 Subject: [PATCH 2/2] fix(persona-writer): close stale-artifact bypass + re-validate metadata after disk recovery MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses PR #124 review feedback: - Devin/Cubic (P1): when tryFencedResponseOrDiskRecovery returned undefined for a stale on-disk file, the parser fell through to validateStructuredResponse → recoverExpectedArtifactContent, which has no mtime check and would silently surface a stale artifact from a prior writer run. The helper now always returns or throws (never undefined) and throws the original "does not call workflow()" error when disk recovery fails, preserving the freshness guarantee recoverArtifactFromTruncatedOutput enforces. - CodeRabbit: re-run the full validateFencedResponse(recovered, metadata, ...) pipeline after disk recovery so metadata-level issues (missing fields, mismatched path) still surface — previously swallowed silently. - CodeRabbit: rename and rewrite the "respects the freshness guard" test that wasn't actually exercising the stale-mtime branch. Add an explicit stale-mtime regression test that proves the bypass is closed, plus a new test that mismatched fenced-metadata path still throws after disk recovery. All 75 writer tests pass (was 73, +2 new regression guards). Co-Authored-By: Claude Opus 4.7 (1M context) --- .../workforce-persona-writer.test.ts | 79 +++++++++++++++---- .../generation/workforce-persona-writer.ts | 63 ++++++++------- 2 files changed, 99 insertions(+), 43 deletions(-) diff --git a/src/product/generation/workforce-persona-writer.test.ts b/src/product/generation/workforce-persona-writer.test.ts index b35fc71c..9ba83980 100644 --- a/src/product/generation/workforce-persona-writer.test.ts +++ b/src/product/generation/workforce-persona-writer.test.ts @@ -408,27 +408,45 @@ describe('workforce persona workflow writer', () => { expect(parsed.metadata).toMatchObject({ recoveredFromDisk: true, reason: 'fenced-ts-placeholder' }); }); - it('placeholder fence path respects the freshness guard (returns undefined when mtime older than writerInvokedAtMs, letting downstream parsers decide)', () => { - // The new placeholder-fence helper uses the same mtime-guarded - // `recoverArtifactFromTruncatedOutput` as the truncated-stdout - // fallback — when the on-disk file is older than the writer's - // invocation, the helper returns undefined and lets the rest of - // the parser run. We exercise that here via a controlled - // statFile/readFileText pair so the assertion stays decoupled - // from any other recovery path the parser may later succeed on. + it('does NOT recover from a placeholder fence when the on-disk file is STALE (mtime <= writerInvokedAtMs)', () => { + // Regression guard for the bypass identified during review: when + // `recoverArtifactFromTruncatedOutput` correctly rejects a stale + // on-disk file, the placeholder-fence helper used to return + // `undefined` and the parser fell through to + // `validateStructuredResponse` → `recoverExpectedArtifactContent`, + // which has no mtime check and would silently surface the stale + // artifact as if it were the current writer's output. The helper + // must now throw the original "does not call workflow()" error so + // no fallthrough is possible. + const artifactPath = 'workflows/generated/persona.ts'; + const writerInvokedAtMs = 1_000; + expect(() => + parsePersonaWorkflowResponse(placeholderFenceOutput, artifactPath, { + repoRoot: '/tmp/repo', + writerInvokedAtMs, + statFile: () => ({ mtimeMs: writerInvokedAtMs - 1 }), + // Even though the on-disk file is a valid workflow, the mtime + // says it predates this writer run — must NOT be accepted. + readFileText: () => workflowSource(), + }), + ).toThrow(/does not call workflow\(\)/); + }); + + it('placeholder fence path falls through to disk recovery only when `writerInvokedAtMs` is provided', () => { + // Without `writerInvokedAtMs` the disk-recovery freshness guard + // short-circuits, so the placeholder-fence helper has no fresh + // file to fall back to and must re-throw the original + // "does not call workflow()" error rather than returning a + // misleading shape or silently consuming a stale artifact. const artifactPath = 'workflows/generated/persona.ts'; - // No writerInvokedAtMs at all → the helper must short-circuit and - // return undefined (regardless of statFile / readFileText), and - // because both stat and readFile are absent, every other recovery - // path also yields no usable workflow source. We pin that - // composite outcome (throws the generic parser-failure error) - // to lock the freshness guard in this code path. expect(() => parsePersonaWorkflowResponse(placeholderFenceOutput, artifactPath, { repoRoot: '/tmp/repo', // writerInvokedAtMs intentionally omitted + statFile: () => ({ mtimeMs: Date.now() }), + readFileText: () => workflowSource(), }), - ).toThrow(/structured JSON or include fenced TypeScript artifact|missing artifact\.content/); + ).toThrow(/does not call workflow\(\)/); }); it('does NOT recover from a placeholder fence when on-disk content also lacks `workflow(` (no silent bypass)', () => { @@ -440,7 +458,36 @@ describe('workforce persona workflow writer', () => { statFile: () => ({ mtimeMs: 200 }), readFileText: () => 'export const broken = 1;', }), - ).toThrow(/does not call workflow\(\)|structured JSON or include fenced TypeScript artifact/); + ).toThrow(/does not call workflow\(\)/); + }); + + it('placeholder fence path re-validates fenced metadata against the recovered disk content (mismatched path still throws)', () => { + // Regression guard for CodeRabbit's metadata concern: after the + // helper recovers content from disk, it must re-run the full + // fenced-response validator on the recovered content + metadata + // so metadata-level issues (mismatched `path`) still surface. + const artifactPath = 'workflows/generated/persona.ts'; + const writerInvokedAtMs = 100; + const mismatchedFenceOutput = [ + '```typescript', + '// (file written to disk)', + '```', + '', + '```json', + JSON.stringify({ + path: 'workflows/generated/SOMETHING-ELSE.ts', + workflowName: 'persona', + }, null, 2), + '```', + ].join('\n'); + expect(() => + parsePersonaWorkflowResponse(mismatchedFenceOutput, artifactPath, { + repoRoot: '/tmp/repo', + writerInvokedAtMs, + statFile: (path) => path.endsWith('persona.ts') ? { mtimeMs: writerInvokedAtMs + 1_000 } : undefined, + readFileText: () => workflowSource(), + }), + ).toThrow(/fenced metadata path .* did not match expected output path/); }); }); diff --git a/src/product/generation/workforce-persona-writer.ts b/src/product/generation/workforce-persona-writer.ts index a0d44bf2..6b767352 100644 --- a/src/product/generation/workforce-persona-writer.ts +++ b/src/product/generation/workforce-persona-writer.ts @@ -1032,12 +1032,11 @@ export function parsePersonaWorkflowResponse( // The writer is sometimes prompted to use the Write tool and only // emit a placeholder ("// (file written to disk)") inside the ts // fence. validateFencedResponse rejects that because the placeholder - // lacks a `workflow(` call. Don't throw on placeholder fences when - // the file on disk is a freshly-written, structurally-valid workflow - // — fall through to the disk-recovery path below instead. Other - // failures (e.g. mismatched metadata path) still bubble up. - const recovered = tryFencedResponseOrDiskRecovery(tsFence, metadataJson, expectedPath, options); - if (recovered) return recovered; + // lacks a `workflow(` call. tryFencedResponseOrDiskRecovery prefers + // a fresh on-disk artifact in that case; it always returns or throws + // (never falls through to validateStructuredResponse below), so + // stale on-disk artifacts can't sneak in via recoverExpectedArtifactContent. + return tryFencedResponseOrDiskRecovery(tsFence, metadataJson, expectedPath, options); } if (metadataJson) { return validateStructuredResponse(metadataJson, expectedPath, 'structured-json', options); @@ -1048,8 +1047,7 @@ export function parsePersonaWorkflowResponse( const metadataFence = fencedBlock(output, 'metadata'); const metadata = metadataFence ? parseJsonObject(metadataFence) : null; if (tsFence && metadata) { - const recovered = tryFencedResponseOrDiskRecovery(tsFence, metadata, expectedPath, options); - if (recovered) return recovered; + return tryFencedResponseOrDiskRecovery(tsFence, metadata, expectedPath, options); } // Tolerant fallback: Claude Sonnet has been observed to emit a prose @@ -1326,40 +1324,51 @@ function validateFencedResponse( * content instead of throwing keeps the writer's successful work from * being discarded over a stdout-format mismatch. * - * Returns `undefined` if neither the fence nor the disk content validates, - * letting the caller fall through to its own error path. Other validation - * failures (e.g. metadata path mismatch) still throw — only the - * "no workflow() call" symptom is treated as recoverable. + * Always returns or throws — never `undefined`. That contract is load- + * bearing: if the caller could fall through to `validateStructuredResponse` + * on `undefined`, an unguarded `recoverExpectedArtifactContent` call could + * silently surface a STALE on-disk artifact from a prior writer run (no + * mtime check). By throwing the original "no workflow() call" error when + * disk recovery fails, we preserve the freshness guarantee + * `recoverArtifactFromTruncatedOutput` enforces via `writerInvokedAtMs`. + * + * After a successful disk read, re-validates the recovered content + the + * fence metadata via the full `validateFencedResponse` pipeline — so + * metadata-level issues (missing fields, mismatched `path`) still surface + * even though the on-disk content saved us from the placeholder symptom. */ function tryFencedResponseOrDiskRecovery( tsFence: string, metadata: Record, expectedPath: string, options: PersonaResponseParseOptions, -): ParsedPersonaResponse | undefined { +): ParsedPersonaResponse { try { return validateFencedResponse(tsFence, metadata, expectedPath); } catch (error) { if (!(error instanceof WorkforcePersonaWriterError)) throw error; if (!/does not call workflow\(\)/.test(error.message)) throw error; const recovered = recoverArtifactFromTruncatedOutput(expectedPath, options); - if (!recovered) return undefined; - try { - validateArtifactContent(recovered); - } catch { - return undefined; - } + if (!recovered) throw error; + let parsed: ParsedPersonaResponse; try { - validateMetadata(metadata); - } catch { - // Metadata-level issues should not block a disk-recovery success; - // the workflow source itself is what matters for downstream - // validation. Surface metadata as-is and let later checks decide. + parsed = validateFencedResponse(recovered, metadata, expectedPath); + } catch (recoveredError) { + // If the on-disk file *also* lacks `workflow(`, surface the original + // error — the writer genuinely produced an invalid artifact. For any + // other validation failure (bad metadata, mismatched path), let it + // bubble up so the caller sees the real symptom. + if ( + recoveredError instanceof WorkforcePersonaWriterError && + /does not call workflow\(\)/.test(recoveredError.message) + ) { + throw error; + } + throw recoveredError; } return { - content: recovered.trimEnd() + '\n', - metadata: { ...metadata, recoveredFromDisk: true, reason: 'fenced-ts-placeholder' }, - responseFormat: 'fenced-artifact', + ...parsed, + metadata: { ...parsed.metadata, recoveredFromDisk: true, reason: 'fenced-ts-placeholder' }, }; } }