From f2447282c5bde2c6571ab997b10bb75e68d34863 Mon Sep 17 00:00:00 2001 From: Khaliq Date: Thu, 7 May 2026 16:48:13 +0200 Subject: [PATCH 1/4] feat(auto-fix): add code-drift repair path for verify-style failures MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, when a workflow fails because it has detected drift between target source code and an external reference (provider docs, schema, etc.), ricky's auto-fix loop dispatches workforce-persona-repairer to patch the workflow file — the wrong target. The workflow already worked; the source code under inspection is what needs fixing. This commit adds a parallel auto-fix path that: 1. Detects "code drift" failures by scanning for structured drift reports under /.workflow-artifacts/**/*-drift.json with verdict === 'DRIFT' and at least one blocker- or major-severity finding. Reports are validated against a generic schema (verdict + findings shape) and freshness-checked against the run start time. 2. Dispatches a new repairer (repairCodeFromDriftArtifacts) that builds a code-fix prompt from the report content and sends it via the existing workforce-persona resolver. The agent edits target source files in place via its harness; ricky then retries the workflow normally, which re-validates by re-running the same gates that previously caught the drift. 3. Falls back to the existing workflow-repair path if the agent returns applied=false or throws — drift detection is convention- based, and a false positive should not block workflow repair. The dispatch is preferred over workflow repair: when both apply (drift artifacts present AND a workflow artifact resolves), code-drift wins. This matches the intent: verify-style workflows are designed to surface findings about target code, not about themselves. Detection design: - Convention-based: any workflow that writes drift-shaped JSON gets auto-repair for free (no per-workflow opt-in needed). - Schema-validated: file must parse with verdict + findings shape. - Freshness-checked: artifact mtime must be >= runStartTimeMs to ignore stale reports from prior runs. The new file (code-drift-repairer.ts, ~280 lines) mirrors the shape of workforce-persona-repairer.ts but with a code-fix prompt and a sentinel- based completion contract (CODE_DRIFT_REPAIR_COMPLETE) — the agent edits files in place rather than returning a workflow artifact for ricky to write. Default severity threshold: blocker + major. Minor findings are skipped unless trivial to bundle in. (Future: configurable per workflow.) The auto-fix-loop change is additive: existing workflow-repair behavior is unchanged when no drift artifacts are present. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/local/auto-fix-loop.ts | 81 +++++++ src/local/code-drift-repairer.ts | 382 +++++++++++++++++++++++++++++++ 2 files changed, 463 insertions(+) create mode 100644 src/local/code-drift-repairer.ts diff --git a/src/local/auto-fix-loop.ts b/src/local/auto-fix-loop.ts index 14680f80..f0ed69e0 100644 --- a/src/local/auto-fix-loop.ts +++ b/src/local/auto-fix-loop.ts @@ -13,6 +13,13 @@ import type { DebuggerResult } from '../product/specialists/debugger/types.js'; import type { WorkflowRunEvidence, WorkflowStepEvidence } from '../shared/models/workflow-evidence.js'; import { repairWorkflowWithWorkforcePersona } from '../product/generation/workforce-persona-repairer.js'; import type { WorkforcePersonaRepairAttempt } from '../product/generation/workforce-persona-repairer.js'; +import { + discoverDriftReports, + repairCodeFromDriftArtifacts, + type CodeDriftRepairOptions, + type CodeDriftRepairResult, + type CodeDriftTarget, +} from './code-drift-repairer.js'; import { localRunStateRoot } from '../shared/state-paths.js'; export interface AutoFixAttemptSummary { @@ -73,6 +80,14 @@ export interface RunWithAutoFixOptions { classification: FailureClassification; }) => DebuggerResult; workflowRepairer?: (input: WorkflowRepairInput) => Promise; + /** + * Optional repairer for "code drift" failures — when the workflow has + * generated structured drift reports under `.workflow-artifacts/**\/*-drift.json` + * indicating that target source code does not match an external reference. + * If unset, defaults to `repairCodeFromDriftArtifacts`. Dispatched before + * the workflow repairer when discoverable drift reports are present. + */ + codeDriftRepairer?: (options: CodeDriftRepairOptions) => Promise; artifactWriter?: (artifactPath: string, content: string, cwd: string) => Promise; repairRunner?: (command: string, cwd: string) => Promise<{ exitCode: number }>; sleep?: (ms: number) => Promise; @@ -89,6 +104,7 @@ export async function runWithAutoFix( const classifyFailure = options.classifyFailure ?? defaultClassifyFailure; const debugWorkflowRun = options.debugWorkflowRun ?? defaultDebugWorkflowRun; const workflowRepairer = options.workflowRepairer ?? defaultWorkflowRepairer; + const codeDriftRepairer = options.codeDriftRepairer ?? repairCodeFromDriftArtifacts; const artifactWriter = options.artifactWriter ?? writeWorkflowArtifact; const repairRunner = options.repairRunner ?? runShellCommand; const sleep = options.sleep ?? ((ms: number) => new Promise((resolve) => setTimeout(resolve, ms))); @@ -97,6 +113,9 @@ export async function runWithAutoFix( const previousRepairAttempts: WorkforcePersonaRepairAttempt[] = []; const warnings: string[] = []; const trackingRunId = resolveTrackingRunId(request) ?? `ricky-local-${randomUUID()}`; + // Used to ignore stale drift artifacts from prior runs when scanning for + // code-drift repair targets. Captured before the first attempt fires. + const runStartTimeMs = Date.now(); let currentRequest: LocalInvocationRequest = { ...request, autoFix: undefined }; let lastResponse: LocalResponse | undefined; let retryOfRunId: string | undefined; @@ -174,6 +193,68 @@ export async function runWithAutoFix( }); pendingRepairAttempt = undefined; } + + // Code-drift dispatch — preferred over workflow repair when the + // workflow has emitted structured drift reports under + // .workflow-artifacts/**\/*-drift.json indicating that target source + // code (not the workflow itself) is what's wrong. Falls through to + // workflow repair if no actionable reports are found. + const codeDriftCwd = response.execution?.execution.cwd ?? request.invocationRoot ?? process.cwd(); + const driftReports = await discoverDriftReports(codeDriftCwd, runStartTimeMs); + if (driftReports) { + const driftTarget: CodeDriftTarget = { cwd: codeDriftCwd, reports: driftReports }; + try { + onProgress?.(`Ricky is fixing target code (${driftReports.length} drift report${driftReports.length === 1 ? '' : 's'})...`); + const driftRepair = await codeDriftRepairer({ + target: driftTarget, + attempt, + maxAttempts, + ...(failedStep ? { failedStep } : {}), + ...(runId ? { previousRunId: runId } : {}), + }); + if (driftRepair.applied) { + attemptSummary.applied_fix = { + mode: 'code-drift', + reports: driftReports.map((r) => r.filePath), + summary: driftRepair.summary, + ...(driftRepair.runId ? { persona_run_id: driftRepair.runId } : {}), + }; + warnings.push(...(driftRepair.warnings ?? [])); + if (!runId) { + const warning = 'Auto-fix retry could not resolve a previous run id; retrying without step-level resume.'; + attemptSummary.warning = warning; + warnings.push(warning); + } else if (!retryOfRunId) { + retryOfRunId = runId; + } + // Retry the workflow as-is — the agent has edited the source files + // in place. No workflow artifact was rewritten. + currentRequest = { + ...retryBaseRequest(currentRequest, response), + autoFix: undefined, + retry: { + attempt: attempt + 1, + maxAttempts, + ...(runId ? { previousRunId: runId, retryOfRunId: retryOfRunId ?? runId } : {}), + ...(failedStep ? { startFromStep: failedStep } : {}), + reason: `auto-fix retry after code-drift repair (${driftReports.length} report${driftReports.length === 1 ? '' : 's'})`, + }, + }; + onProgress?.(`Retrying workflow${failedStep ? ` from ${failedStep}` : ''}...`); + continue; + } + // codeDriftRepairer returned applied=false: no-op, fall through to workflow repair. + warnings.push(`Code-drift repairer returned applied=false: ${driftRepair.summary}`); + } catch (error) { + warnings.push(...warningsFromError(error)); + warnings.push( + `Code-drift repair failed; falling back to workflow repair: ${error instanceof Error ? error.message : String(error)}`, + ); + // Fall through to the workflow-repair path below — the failure + // might still be a workflow bug that workforce-persona can fix. + } + } + const repairTarget = await resolveWorkflowRepairTarget(currentRequest, response); if (repairTarget) { diff --git a/src/local/code-drift-repairer.ts b/src/local/code-drift-repairer.ts new file mode 100644 index 00000000..f6f0fdba --- /dev/null +++ b/src/local/code-drift-repairer.ts @@ -0,0 +1,382 @@ +/** + * code-drift-repairer.ts + * + * Auto-fix path for "drift report" workflow failures — i.e. when a workflow + * has *successfully* generated structured findings showing that target code + * doesn't match an external reference (provider docs, schema, etc.) but the + * workflow's final gate exits non-zero because drift exists. + * + * Distinct from workforce-persona-repairer: + * - workforce-persona-repairer: fixes the *workflow artifact itself* + * (treating the failure as a generation bug) + * - code-drift-repairer (this file): fixes the *target code* the workflow + * was inspecting (treating the failure as expected output that requires + * a follow-up code change before retry) + * + * Drift report schema (any file under .workflow-artifacts/**\/*-drift.json): + * { + * verdict: 'PASS' | 'DRIFT', + * findings: Array<{ severity: 'blocker'|'major'|'minor', axis: string, description: string }>, + * expected?: Record, + * actual?: Record, + * sources?: Record, + * ... + * } + * + * Detection (in resolveCodeDriftTarget): convention-based + schema-validated + * + freshness-checked. See auto-fix-loop.ts → resolveCodeDriftTarget. + */ + +import { createHash } from 'node:crypto'; +import { readFile } from 'node:fs/promises'; +import { isAbsolute, resolve } from 'node:path'; + +import { + defaultWorkforcePersonaResolver, + WORKFORCE_PERSONA_INTENT_CANDIDATES, + WorkforcePersonaWriterError, + type WorkforcePersonaResolver, + type WorkforcePersonaSendOptions, +} from '../product/generation/workforce-persona-writer.js'; + +export interface DriftFinding { + severity: 'blocker' | 'major' | 'minor'; + axis: string; + description: string; +} + +export interface DriftReport { + /** Path to the .json file on disk (absolute or cwd-relative). */ + filePath: string; + /** Slug or identifier carried by the report (`r.slug` if present, else basename). */ + slug?: string; + verdict: 'PASS' | 'DRIFT'; + findings: DriftFinding[]; + expected?: Record; + actual?: Record; + sources?: Record; + /** Full parsed JSON for the agent prompt. */ + raw: Record; +} + +export interface CodeDriftTarget { + /** Working directory the workflow ran in. */ + cwd: string; + /** Reports with verdict === 'DRIFT' that contain at least one blocker or major finding. */ + reports: DriftReport[]; +} + +export interface CodeDriftRepairOptions { + target: CodeDriftTarget; + attempt: number; + maxAttempts: number; + failedStep?: string; + previousRunId?: string; + timeoutSeconds?: number; + installSkills?: boolean; + installRoot?: string; + tier?: string; + env?: NodeJS.ProcessEnv; + signal?: AbortSignal; + onProgress?: WorkforcePersonaSendOptions['onProgress']; + personaIntentCandidates?: readonly string[]; + resolver?: WorkforcePersonaResolver; +} + +export interface CodeDriftRepairResult { + applied: boolean; + summary: string; + /** The persona/agent run id, when the underlying harness exposes one. */ + runId?: string | null; + /** Slugs whose findings were addressed; informational. */ + fixedSlugs?: string[]; + warnings?: string[]; +} + +/** + * Severity threshold for which findings the repair agent is asked to fix. + * Blockers are always fixed. Majors are included by default; the auto-fix + * loop can override via `severityThreshold` in the future. + */ +const DEFAULT_SEVERITY_THRESHOLD: ReadonlyArray = ['blocker', 'major']; + +const DEFAULT_TIER = 'best-value'; + +function actionableFindings(findings: DriftFinding[]): DriftFinding[] { + const allowed = new Set(DEFAULT_SEVERITY_THRESHOLD); + return findings.filter((f) => allowed.has(f.severity)); +} + +export async function repairCodeFromDriftArtifacts( + options: CodeDriftRepairOptions, +): Promise { + const { target } = options; + if (target.reports.length === 0) { + return { applied: false, summary: 'No actionable drift reports.' }; + } + + const resolver = options.resolver ?? defaultWorkforcePersonaResolver; + const resolved = await resolver( + options.personaIntentCandidates ?? WORKFORCE_PERSONA_INTENT_CANDIDATES, + { + tier: options.tier ?? DEFAULT_TIER, + ...(options.installRoot ? { installRoot: options.installRoot } : {}), + }, + ); + + const task = await buildCodeDriftRepairTask(target, options); + const promptDigest = createHash('sha256').update(task).digest('hex'); + const selection = resolved.context.selection; + + const run = resolved.context.sendMessage(task, { + workingDirectory: target.cwd, + name: `ricky-code-drift-repair-${promptDigest.slice(0, 12)}`, + timeoutSeconds: options.timeoutSeconds ?? selection.runtime.harnessSettings?.timeoutSeconds, + installSkills: options.installSkills, + env: options.env, + signal: options.signal, + onProgress: options.onProgress, + inputs: { + driftReportPaths: target.reports.map((r) => r.filePath).join(','), + reportCount: target.reports.length, + attempt: options.attempt, + maxAttempts: options.maxAttempts, + promptDigest, + }, + }); + + const [result, runId] = await Promise.all([run, run.runId.catch(() => null)]); + if (result.status !== 'completed') { + throw new WorkforcePersonaWriterError( + `Code-drift repair did not complete: ${result.status}.`, + [...resolved.warnings, result.stderr].filter(Boolean), + ); + } + + // The agent edits files in place via its harness. We require a sentinel in + // the response confirming completion. If the agent emitted a structured + // response we accept that too (parsePersonaWorkflowResponse), but fall + // back to a textual sentinel for harnesses that don't structure output. + const completed = isCompletionAcknowledged(result.output); + if (!completed) { + throw new WorkforcePersonaWriterError( + 'Code-drift repair response did not include a CODE_DRIFT_REPAIR_COMPLETE marker.', + ); + } + + return { + applied: true, + summary: extractRepairSummary(result.output) ?? `Applied fixes for ${target.reports.length} drift report(s).`, + runId: result.workflowRunId ?? runId, + fixedSlugs: target.reports.map((r) => r.slug ?? r.filePath), + warnings: [...resolved.warnings], + }; +} + +async function buildCodeDriftRepairTask( + target: CodeDriftTarget, + options: CodeDriftRepairOptions, +): Promise { + // Read package.json files near each affected source path so the agent has + // package context when it patches; cheaply done by reading the package.json + // co-located with the most-referenced file path. + const reportSummaries = target.reports.map((r) => { + const filtered = actionableFindings(r.findings); + return { + filePath: r.filePath, + slug: r.slug ?? null, + verdict: r.verdict, + expected: r.expected, + actual: r.actual, + sources: r.sources, + findings: filtered.map((f) => ({ + severity: f.severity, + axis: f.axis, + description: f.description, + })), + }; + }); + + return [ + 'Repair source code that has drifted from an external reference (provider docs, schema, etc.).', + '', + 'A previous workflow step produced structured drift reports identifying specific issues in target code. Your job is to read each report, locate the offending source files, and apply minimal targeted fixes — then return a completion marker.', + '', + 'Working directory:', + target.cwd, + '', + 'Drift reports to address (paths are relative to the working directory):', + safeJson(reportSummaries), + '', + 'Repair contract:', + '- Address ALL findings whose severity is blocker or major. Skip minor findings unless they are trivial to bundle in.', + '- Edit source files in place using your file-editing tools. Do not produce a patch for the user to apply manually.', + '- For each blocker, the description usually pinpoints the file and the wrong value. Use that as your starting point; verify with a quick grep before editing.', + '- After editing, run package-level type checks and tests where applicable (e.g. `cd packages/ && npx tsc --noEmit -p tsconfig.json && npm test`) to confirm the fix compiles and passes tests. If a test fails, fix it.', + '- Do not modify the workflow file itself. Do not modify the drift reports. Do not commit, push, or open PRs.', + '- Do not introduce backward-incompatible API changes if a smaller compatible fix is possible (e.g. rename a constant + add a deprecated alias rather than break callers).', + '- If two findings conflict (e.g. one says use header X, another says use header Y), apply the one with higher severity and document the rejected one in your summary.', + '', + 'On completion, emit the literal sentinel line:', + 'CODE_DRIFT_REPAIR_COMPLETE', + '', + 'Optionally precede the sentinel with a short summary of files changed (one line per file). Example:', + ' packages/zendesk/src/webhook-normalizer.ts: header constant renamed; empty-body fallback added', + ' CODE_DRIFT_REPAIR_COMPLETE', + '', + 'Failure context:', + safeJson({ + failedStep: options.failedStep, + previousRunId: options.previousRunId, + attempt: options.attempt, + maxAttempts: options.maxAttempts, + }), + ].join('\n'); +} + +function isCompletionAcknowledged(output: string | undefined | null): boolean { + if (!output) return false; + return /^\s*CODE_DRIFT_REPAIR_COMPLETE\s*$/m.test(output); +} + +function extractRepairSummary(output: string | undefined | null): string | undefined { + if (!output) return undefined; + // Take the lines immediately preceding the sentinel as the summary. + const lines = output.split(/\r?\n/); + const idx = lines.findIndex((line) => /^\s*CODE_DRIFT_REPAIR_COMPLETE\s*$/.test(line)); + if (idx < 0) return undefined; + const back = lines + .slice(Math.max(0, idx - 10), idx) + .map((line) => line.trim()) + .filter(Boolean); + if (back.length === 0) return undefined; + return back.join(' '); +} + +function safeJson(value: unknown): string { + try { + return JSON.stringify(value, null, 2); + } catch (error) { + return JSON.stringify({ error: error instanceof Error ? error.message : String(error) }, null, 2); + } +} + +// --------------------------------------------------------------------------- +// Drift report discovery + schema validation +// --------------------------------------------------------------------------- + +/** + * Recursively scans `/.workflow-artifacts/**` for `*-drift.json` files + * created at or after `runStartTimeMs`, parses each, and returns the subset + * that: + * 1. Conform to the drift-report schema + * 2. Have verdict === 'DRIFT' + * 3. Contain at least one blocker- or major-severity finding + * + * Returns null if no actionable reports are found, signalling that the + * caller should fall back to the regular workflow-repair path. + */ +export async function discoverDriftReports( + cwd: string, + runStartTimeMs: number, +): Promise { + const root = isAbsolute(cwd) ? cwd : resolve(process.cwd(), cwd); + const artifactsDir = resolve(root, '.workflow-artifacts'); + + let driftFiles: string[]; + try { + driftFiles = await collectDriftJsonFiles(artifactsDir); + } catch { + return null; + } + + const reports: DriftReport[] = []; + for (const filePath of driftFiles) { + try { + const stat = await (await import('node:fs/promises')).stat(filePath); + if (stat.mtimeMs < runStartTimeMs) continue; + + const raw = await readFile(filePath, 'utf8'); + const parsed: unknown = JSON.parse(raw); + if (!isDriftReportShape(parsed)) continue; + + const parsedRecord = parsed as unknown as Record; + const report: DriftReport = { + filePath, + verdict: parsed.verdict, + findings: Array.isArray(parsed.findings) ? (parsed.findings as DriftFinding[]) : [], + raw: parsedRecord, + }; + const slugValue = parsedRecord.slug; + if (typeof slugValue === 'string') { + report.slug = slugValue; + } + const expectedValue = parsedRecord.expected; + if (isPlainRecord(expectedValue)) { + report.expected = expectedValue; + } + const actualValue = parsedRecord.actual; + if (isPlainRecord(actualValue)) { + report.actual = actualValue; + } + const sourcesValue = parsedRecord.sources; + if (isPlainRecord(sourcesValue)) { + report.sources = sourcesValue as Record; + } + + const actionable = actionableFindings(report.findings); + if (report.verdict === 'DRIFT' && actionable.length > 0) { + reports.push(report); + } + } catch { + // skip malformed files; do not let one bad file block the path + } + } + + return reports.length > 0 ? reports : null; +} + +async function collectDriftJsonFiles(dir: string): Promise { + const fs = await import('node:fs/promises'); + const out: string[] = []; + + async function walk(current: string): Promise { + let entries: import('node:fs').Dirent[]; + try { + entries = await fs.readdir(current, { withFileTypes: true }); + } catch { + return; + } + for (const entry of entries) { + const full = resolve(current, entry.name); + if (entry.isDirectory()) { + await walk(full); + } else if (entry.isFile() && entry.name.endsWith('-drift.json')) { + out.push(full); + } + } + } + + await walk(dir); + return out; +} + +function isDriftReportShape(value: unknown): value is { + verdict: 'PASS' | 'DRIFT'; + findings: DriftFinding[]; +} { + if (!isPlainRecord(value)) return false; + const verdict = (value as { verdict?: unknown }).verdict; + if (verdict !== 'PASS' && verdict !== 'DRIFT') return false; + const findings = (value as { findings?: unknown }).findings; + if (!Array.isArray(findings)) return false; + return findings.every((f) => { + if (!isPlainRecord(f)) return false; + const sev = (f as { severity?: unknown }).severity; + return sev === 'blocker' || sev === 'major' || sev === 'minor'; + }); +} + +function isPlainRecord(value: unknown): value is Record { + return typeof value === 'object' && value !== null && !Array.isArray(value); +} From f063ee418f57c9cb9496d4f4a7bac625ba9ef1f6 Mon Sep 17 00:00:00 2001 From: Khaliq Date: Thu, 7 May 2026 16:54:31 +0200 Subject: [PATCH 2/4] feat(auto-fix): add per-report opt-out for code-drift repair MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Default remains ON: when ricky's auto-fix loop is enabled and the workflow emits drift-shaped artifacts, ricky dispatches the code-fix repairer. This commit adds a fail-safe per-report opt-out so monitoring/audit workflows that want findings surfaced for human triage (not auto-edited) can declare so. Workflow emits `"autofix": false` in the report JSON; ricky parses the report, records the opt-out, and skips dispatch. - DriftReport interface gains `autofix: boolean` (defaulting to true). - discoverDriftReports treats only the exact boolean `false` as opt-out. Any other value (true, missing, "no", null) is treated as default ON, so a workflow that misspells the field still gets repaired — the default-useful behavior. - Filter applies after parsing so future telemetry can still see the opt-out without needing a separate scan. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/local/code-drift-repairer.ts | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/src/local/code-drift-repairer.ts b/src/local/code-drift-repairer.ts index f6f0fdba..d7bacd45 100644 --- a/src/local/code-drift-repairer.ts +++ b/src/local/code-drift-repairer.ts @@ -55,6 +55,14 @@ export interface DriftReport { expected?: Record; actual?: Record; sources?: Record; + /** + * Per-report opt-out for auto-fix. Auto-fix is ON by default — when a + * workflow emits a drift report with `"autofix": false`, ricky records + * the drift but does NOT dispatch a code-fix agent. Useful for + * monitoring/audit workflows that want findings surfaced for human + * triage rather than automated source edits. + */ + autofix: boolean; /** Full parsed JSON for the agent prompt. */ raw: Record; } @@ -301,10 +309,17 @@ export async function discoverDriftReports( if (!isDriftReportShape(parsed)) continue; const parsedRecord = parsed as unknown as Record; + // Auto-fix is ON by default. The workflow can opt out by emitting + // `"autofix": false` in the drift report. Any value other than the + // exact boolean `false` is treated as "use the default" (ON), which + // matches "fail-safe to the most useful behavior" — a workflow that + // accidentally emits e.g. autofix:"no" still gets repaired. + const autofixOptOut = parsedRecord.autofix === false; const report: DriftReport = { filePath, verdict: parsed.verdict, findings: Array.isArray(parsed.findings) ? (parsed.findings as DriftFinding[]) : [], + autofix: !autofixOptOut, raw: parsedRecord, }; const slugValue = parsedRecord.slug; @@ -325,7 +340,9 @@ export async function discoverDriftReports( } const actionable = actionableFindings(report.findings); - if (report.verdict === 'DRIFT' && actionable.length > 0) { + // Skip reports the workflow opted out of, but only after parsing them + // so that future telemetry/observability can still see the opt-out. + if (report.verdict === 'DRIFT' && actionable.length > 0 && report.autofix) { reports.push(report); } } catch { From 75cb5d07dbf64daa9c645f68f020decef9c08cce Mon Sep 17 00:00:00 2001 From: Khaliq Date: Thu, 7 May 2026 17:14:44 +0200 Subject: [PATCH 3/4] =?UTF-8?q?fix(auto-fix):=20address=20PR=20#69=20revie?= =?UTF-8?q?w=20=E2=80=94=20full=20restart=20on=20code-drift=20retry,=20tig?= =?UTF-8?q?hter=20schema?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [Codex P2] code-drift retry must rerun drift-producing steps Previously, after a successful code-drift repair, the retry resumed at the failed step (the aggregation gate that reads *-drift.json). That gate just re-reads the SAME pre-fix drift artifacts and fails again, looping until max attempts and never proving the fix cleared the drift. The verify agent steps that PRODUCE the drift reports must re-run against the patched source. Removed `startFromStep` from the code-drift retry — the workflow restarts from the root. Pays the cost of re-running successful steps (verify agents re-fetch external docs), but correctness wins. The progress message and retry reason both reflect the full restart so observers can see why the cost was paid. Workflow-repair retries (workforce-persona path) keep `startFromStep` unchanged — that path patches the workflow file itself, so resuming at the failed step is the right semantic. [CodeRabbit] strengthen DriftFinding schema validation isDriftReportShape only checked `severity`. Malformed JSON missing axis or description would pass validation, then get cast to DriftFinding[], surfacing `undefined` in the repair prompt — degrading the agent's ability to locate the offending code. Added typeof string + non-empty axis check. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/local/auto-fix-loop.ts | 27 ++++++++++++++++++++++----- src/local/code-drift-repairer.ts | 9 ++++++++- 2 files changed, 30 insertions(+), 6 deletions(-) diff --git a/src/local/auto-fix-loop.ts b/src/local/auto-fix-loop.ts index f0ed69e0..da831323 100644 --- a/src/local/auto-fix-loop.ts +++ b/src/local/auto-fix-loop.ts @@ -227,8 +227,26 @@ export async function runWithAutoFix( } else if (!retryOfRunId) { retryOfRunId = runId; } - // Retry the workflow as-is — the agent has edited the source files - // in place. No workflow artifact was rewritten. + // Retry the workflow from the BEGINNING — not from the failed + // step. Verify-style workflows have a structure like: + // + // verify-* (agent steps) → produce *-drift.json + // ↓ + // artifact-* (gates) → validate report shape + // ↓ + // aggregate-drift (gate) → fail if any DRIFT (this fails) + // + // Resuming with `startFromStep: aggregate-drift` after a code + // edit would just re-read the SAME stale drift artifacts from + // before the fix, fail again, and loop until max attempts. The + // verify-* agent steps need to re-run against the patched + // source so they regenerate fresh drift reports. + // + // We pay the cost of re-running successful steps (which is real + // — the verify agents re-fetch external docs), but correctness + // wins. If a future workflow needs cheaper resumption, it can + // declare a resume-anchor step in the drift report; for now, + // the safe default is full restart. currentRequest = { ...retryBaseRequest(currentRequest, response), autoFix: undefined, @@ -236,11 +254,10 @@ export async function runWithAutoFix( attempt: attempt + 1, maxAttempts, ...(runId ? { previousRunId: runId, retryOfRunId: retryOfRunId ?? runId } : {}), - ...(failedStep ? { startFromStep: failedStep } : {}), - reason: `auto-fix retry after code-drift repair (${driftReports.length} report${driftReports.length === 1 ? '' : 's'})`, + reason: `auto-fix retry after code-drift repair (${driftReports.length} report${driftReports.length === 1 ? '' : 's'}); restarting from workflow root so drift-producing steps re-run`, }, }; - onProgress?.(`Retrying workflow${failedStep ? ` from ${failedStep}` : ''}...`); + onProgress?.('Retrying workflow from the beginning so drift-producing steps re-run against the patched source...'); continue; } // codeDriftRepairer returned applied=false: no-op, fall through to workflow repair. diff --git a/src/local/code-drift-repairer.ts b/src/local/code-drift-repairer.ts index d7bacd45..e9d9609d 100644 --- a/src/local/code-drift-repairer.ts +++ b/src/local/code-drift-repairer.ts @@ -387,10 +387,17 @@ function isDriftReportShape(value: unknown): value is { if (verdict !== 'PASS' && verdict !== 'DRIFT') return false; const findings = (value as { findings?: unknown }).findings; if (!Array.isArray(findings)) return false; + // Every finding must have a valid severity AND non-empty string axis + + // string description. Without the string checks, malformed JSON could + // pass schema validation, get cast to DriftFinding[], and surface + // `undefined` values in the agent's repair prompt. return findings.every((f) => { if (!isPlainRecord(f)) return false; const sev = (f as { severity?: unknown }).severity; - return sev === 'blocker' || sev === 'major' || sev === 'minor'; + if (sev !== 'blocker' && sev !== 'major' && sev !== 'minor') return false; + const axis = (f as { axis?: unknown }).axis; + const description = (f as { description?: unknown }).description; + return typeof axis === 'string' && axis.length > 0 && typeof description === 'string'; }); } From f2bddec490c2b486f840fd5b5956e41b3dfa447d Mon Sep 17 00:00:00 2001 From: Khaliq Date: Thu, 7 May 2026 17:31:20 +0200 Subject: [PATCH 4/4] fix(auto-fix): per-attempt drift discovery floor + non-empty description Two more PR #69 review findings. [CodeRabbit major] Drift artifact freshness keyed to run-level timestamp Previously discoverDriftReports used `runStartTimeMs` (captured once before the attempt loop) as the freshness floor. That means stale drift artifacts produced by attempt N could be re-considered "fresh" on attempt N+1, re-triggering codeDriftRepairer for findings that earlier attempts already addressed (or that belong to a different failure path in the same run). Added `attemptStartTimeMs` per attempt and pass `Math.max(runStartTimeMs, attemptStartTimeMs)` to discoverDriftReports so each attempt only considers artifacts the CURRENT attempt produced. The Math.max guards against backward clock skew between attempts (NTP correction during a long run) which would otherwise cause attemptStartTimeMs to drop below runStartTimeMs. [CodeRabbit minor] description: '' or whitespace passed schema validation isDriftReportShape required `description` to be a string but allowed empty/whitespace values. That weakened the "tightened schema" intent from the previous commit and could feed empty findings into the agent repair prompt. Added trim().length > 0 for both axis and description. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/local/auto-fix-loop.ts | 14 +++++++++++++- src/local/code-drift-repairer.ts | 7 ++++++- 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/src/local/auto-fix-loop.ts b/src/local/auto-fix-loop.ts index da831323..4b65d80f 100644 --- a/src/local/auto-fix-loop.ts +++ b/src/local/auto-fix-loop.ts @@ -122,6 +122,13 @@ export async function runWithAutoFix( let pendingRepairAttempt: Omit | undefined; for (let attempt = 1; attempt <= maxAttempts; attempt += 1) { + // Per-attempt timestamp so drift discovery only considers artifacts the + // CURRENT attempt produced. Without this, a later unrelated failure + // could re-trigger code-drift repair on stale reports from a prior + // attempt (e.g. attempt 1 produced reports → repair → attempt 2 fails + // for an unrelated reason → discovery sees attempt 1's reports as + // "fresh" because runStartTimeMs hasn't moved). + const attemptStartTimeMs = Date.now(); onProgress?.(`Running workflow (attempt ${attempt}/${maxAttempts})...`); const response = await options.runSingleAttempt(currentRequest); lastResponse = response; @@ -200,7 +207,12 @@ export async function runWithAutoFix( // code (not the workflow itself) is what's wrong. Falls through to // workflow repair if no actionable reports are found. const codeDriftCwd = response.execution?.execution.cwd ?? request.invocationRoot ?? process.cwd(); - const driftReports = await discoverDriftReports(codeDriftCwd, runStartTimeMs); + // Use the more recent of run-start and attempt-start. attemptStartTimeMs + // alone would be ideal, but if the system clock skews backward between + // attempts (NTP correction during a long run) we don't want to silently + // accept artifacts from before this run began. Math.max guards both. + const driftDiscoveryFloor = Math.max(runStartTimeMs, attemptStartTimeMs); + const driftReports = await discoverDriftReports(codeDriftCwd, driftDiscoveryFloor); if (driftReports) { const driftTarget: CodeDriftTarget = { cwd: codeDriftCwd, reports: driftReports }; try { diff --git a/src/local/code-drift-repairer.ts b/src/local/code-drift-repairer.ts index e9d9609d..183691e8 100644 --- a/src/local/code-drift-repairer.ts +++ b/src/local/code-drift-repairer.ts @@ -397,7 +397,12 @@ function isDriftReportShape(value: unknown): value is { if (sev !== 'blocker' && sev !== 'major' && sev !== 'minor') return false; const axis = (f as { axis?: unknown }).axis; const description = (f as { description?: unknown }).description; - return typeof axis === 'string' && axis.length > 0 && typeof description === 'string'; + return ( + typeof axis === 'string' && + axis.trim().length > 0 && + typeof description === 'string' && + description.trim().length > 0 + ); }); }