Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
84 changes: 84 additions & 0 deletions src/product/generation/workforce-persona-writer.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,90 @@ describe('workforce persona workflow writer', () => {
}
});

describe('truncated-stdout disk fallback', () => {
// The claude CLI buffers a full --print response then emits it; for
// large workflow artifacts the LLM's max_output_tokens cap kicks in
// and the JSON gets cut off mid-emit. The writer typically *did*
// succeed at writing the workflow to disk via the Write tool (now that
// bypass-permissions makes that reliable in headless mode), so the
// parser should fall back to reading the file rather than hard-failing.

const truncatedJsonOutput = [
'I need to author a workflow that ships this PR. Let me produce the structured JSON response.',
'',
'```json',
'{',
' "artifact": {',
' "path": "workflows/generated/persona.ts",',
' "language": "typescript",',
// cut off here — no `content` field, no closing braces
].join('\n');

it('recovers from truncated stdout by reading a fresh file at expectedPath', () => {
const artifactPath = 'workflows/generated/persona.ts';
const writerInvokedAtMs = 100;
const parsed = parsePersonaWorkflowResponse(truncatedJsonOutput, artifactPath, {
repoRoot: '/tmp/repo',
writerInvokedAtMs,
statFile: (path) => path.endsWith('persona.ts') ? { mtimeMs: writerInvokedAtMs + 1_000 } : undefined,
readFileText: () => workflowSource(),
});
expect(parsed.responseFormat).toBe('structured-json');
expect(parsed.content).toContain('workflow("persona")');
expect(parsed.metadata).toMatchObject({ recoveredFromDisk: true });
});

it('does NOT fall back to a STALE file (mtime older than writerInvokedAtMs)', () => {
const artifactPath = 'workflows/generated/persona.ts';
const writerInvokedAtMs = 1_000;
expect(() =>
parsePersonaWorkflowResponse(truncatedJsonOutput, artifactPath, {
repoRoot: '/tmp/repo',
writerInvokedAtMs,
statFile: () => ({ mtimeMs: writerInvokedAtMs - 1 }),
readFileText: () => workflowSource(),
}),
).toThrow(/structured JSON or include fenced TypeScript artifact/);
});

it('does NOT fall back when `writerInvokedAtMs` is absent (preserves prior behavior for callers that have not opted in)', () => {
const artifactPath = 'workflows/generated/persona.ts';
expect(() =>
parsePersonaWorkflowResponse(truncatedJsonOutput, artifactPath, {
repoRoot: '/tmp/repo',
// writerInvokedAtMs intentionally omitted
statFile: () => ({ mtimeMs: Date.now() }),
readFileText: () => workflowSource(),
}),
).toThrow(/structured JSON or include fenced TypeScript artifact/);
});

it('does NOT fall back when no file exists at expectedPath (stat returns undefined)', () => {
const artifactPath = 'workflows/generated/persona.ts';
expect(() =>
parsePersonaWorkflowResponse(truncatedJsonOutput, artifactPath, {
repoRoot: '/tmp/repo',
writerInvokedAtMs: 100,
statFile: () => undefined,
readFileText: () => workflowSource(),
}),
).toThrow(/structured JSON or include fenced TypeScript artifact/);
});

it('does NOT fall back when the on-disk file fails structural validation (re-raises original parser error)', () => {
const artifactPath = 'workflows/generated/persona.ts';
expect(() =>
parsePersonaWorkflowResponse(truncatedJsonOutput, artifactPath, {
repoRoot: '/tmp/repo',
writerInvokedAtMs: 100,
statFile: () => ({ mtimeMs: 200 }),
// File on disk is not a valid workflow (no `workflow(` call) — fallback should refuse it.
readFileText: () => 'export const broken = 1;',
}),
).toThrow(/structured JSON or include fenced TypeScript artifact/);
});
});

it('invokes the spawned harness non-interactively (no TUI flag, structured-response contract)', async () => {
const base = generate({
spec: spec({
Expand Down
91 changes: 90 additions & 1 deletion src/product/generation/workforce-persona-writer.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { createHash } from 'node:crypto';
import { readFileSync } from 'node:fs';
import { readFileSync, statSync } from 'node:fs';
import { mkdir, readFile, writeFile } from 'node:fs/promises';
import { dirname, isAbsolute, join, resolve } from 'node:path';

Expand Down Expand Up @@ -158,6 +158,25 @@ export interface WorkforcePersonaWriterResult {
export interface PersonaResponseParseOptions {
repoRoot?: string;
readFileText?: (path: string) => string;
/**
* Optional `Date.now()`-style ms timestamp captured *before* the writer
* subprocess was spawned. When set, the parser's "stdout was truncated
* but a file exists on disk at expectedPath" fallback only fires for a
* file whose mtime is newer than this — i.e. one this writer call
* produced — so the parser cannot accidentally accept a stale leftover
* artifact from a prior attempt as the current writer's output.
*
* Without this timestamp the parser treats the file as untrusted and
* skips the disk fallback (preserving prior behavior).
*/
writerInvokedAtMs?: number;
/**
* Optional stat-by-path used by the disk fallback. Defaults to
* `fs.statSync(path)` swallowing ENOENT to `undefined`. Override for
* deterministic tests of the freshness check without touching the real
* filesystem.
*/
statFile?: (path: string) => { mtimeMs: number } | undefined;
}

export interface ResolvedWorkforcePersonaContext {
Expand Down Expand Up @@ -227,6 +246,13 @@ export async function writeWorkflowWithWorkforcePersona(
});
const promptDigest = digest(task);
const selection = resolved.context.selection;
// Pin "writer invoked at" immediately before the spawn so the parser's
// disk fallback can prove a file at outputPath was actually written by
// THIS sendMessage window (mtime > this timestamp) rather than left
// over from a prior attempt. Capturing earlier — e.g. before the
// resolver and task-builder awaits — opens a multi-second window in
// which a stale leftover could falsely satisfy the freshness check.
const writerInvokedAtMs = Date.now();
const run = resolved.context.sendMessage(task, {
workingDirectory: options.repoRoot,
name: `ricky-workflow-writer-${promptDigest.slice(0, 12)}`,
Expand Down Expand Up @@ -275,6 +301,7 @@ export async function writeWorkflowWithWorkforcePersona(
try {
parsed = parsePersonaWorkflowResponse(result.output, options.outputPath, {
repoRoot: options.repoRoot,
writerInvokedAtMs: writerInvokedAtMs,
});
} catch (error) {
await dumpDebug('parse-error');
Expand Down Expand Up @@ -955,11 +982,73 @@ export function parsePersonaWorkflowResponse(
return validateStructuredResponse(embedded, expectedPath, 'structured-json', options);
}

// Last-resort disk fallback. claude --print buffers a full response then
// returns; for large workflow artifacts the LLM's max_output_tokens cap
// kicks in and the JSON gets cut off mid-emit (observed: 38KB of valid
// JSON ending at `"language": "typescript",` with no closing braces). In
// that case the writer typically *did* succeed at writing the workflow
// source to disk via the Write tool — there's just no parseable stdout
// wrapper for the parser to read. Trust the file on disk only when (a)
// the caller provided `writerInvokedAtMs` so we can prove the file is
// fresh, and (b) the file's mtime is strictly after that timestamp.
const recovered = recoverArtifactFromTruncatedOutput(expectedPath, options);
if (recovered) {
try {
validateArtifactContent(recovered);
return {
content: recovered,
metadata: { recoveredFromDisk: true, reason: 'stdout-truncated-or-unparseable' },
responseFormat: 'structured-json',
};
} catch {
// Recovered content failed structural validation; fall through to
// the original parser-failure error so the caller sees the real
// truncation symptom rather than a misleading "no workflow()" error.
}
}

throw new WorkforcePersonaWriterError(
'Workforce persona response must be structured JSON or include fenced TypeScript artifact and JSON metadata blocks.',
);
}

/**
* Reads the artifact from disk as a last-resort fallback when the writer's
* stdout could not be parsed. Only fires when the caller pinned a
* `writerInvokedAtMs` timestamp AND the file at `expectedPath` was modified
* strictly after it — proving the file is the current writer's output, not
* a stale leftover from a prior attempt.
*/
function recoverArtifactFromTruncatedOutput(
expectedPath: string,
options: PersonaResponseParseOptions,
): string | undefined {
if (!options.repoRoot || options.writerInvokedAtMs === undefined) return undefined;
const expectedResolved = isAbsolute(expectedPath)
? expectedPath
: resolve(options.repoRoot, expectedPath);

const statFn = options.statFile ?? defaultStatFile;
const stat = statFn(expectedResolved);
if (!stat) return undefined;
if (stat.mtimeMs <= options.writerInvokedAtMs) return undefined;

try {
return options.readFileText?.(expectedResolved) ?? readFileSync(expectedResolved, 'utf8');
} catch {
return undefined;
}
}

function defaultStatFile(path: string): { mtimeMs: number } | undefined {
try {
const stats = statSync(path);
return { mtimeMs: stats.mtimeMs };
} catch {
return undefined;
}
}

/**
* Walks `text` looking for the first top-level balanced `{ ... }` object
* and parses it. String literals (including nested escape sequences) and
Expand Down
Loading