diff --git a/src/local/auto-fix-loop.ts b/src/local/auto-fix-loop.ts index 14680f80..4b65d80f 100644 --- a/src/local/auto-fix-loop.ts +++ b/src/local/auto-fix-loop.ts @@ -13,6 +13,13 @@ import type { DebuggerResult } from '../product/specialists/debugger/types.js'; import type { WorkflowRunEvidence, WorkflowStepEvidence } from '../shared/models/workflow-evidence.js'; import { repairWorkflowWithWorkforcePersona } from '../product/generation/workforce-persona-repairer.js'; import type { WorkforcePersonaRepairAttempt } from '../product/generation/workforce-persona-repairer.js'; +import { + discoverDriftReports, + repairCodeFromDriftArtifacts, + type CodeDriftRepairOptions, + type CodeDriftRepairResult, + type CodeDriftTarget, +} from './code-drift-repairer.js'; import { localRunStateRoot } from '../shared/state-paths.js'; export interface AutoFixAttemptSummary { @@ -73,6 +80,14 @@ export interface RunWithAutoFixOptions { classification: FailureClassification; }) => DebuggerResult; workflowRepairer?: (input: WorkflowRepairInput) => Promise; + /** + * Optional repairer for "code drift" failures — when the workflow has + * generated structured drift reports under `.workflow-artifacts/**\/*-drift.json` + * indicating that target source code does not match an external reference. + * If unset, defaults to `repairCodeFromDriftArtifacts`. Dispatched before + * the workflow repairer when discoverable drift reports are present. + */ + codeDriftRepairer?: (options: CodeDriftRepairOptions) => Promise; artifactWriter?: (artifactPath: string, content: string, cwd: string) => Promise; repairRunner?: (command: string, cwd: string) => Promise<{ exitCode: number }>; sleep?: (ms: number) => Promise; @@ -89,6 +104,7 @@ export async function runWithAutoFix( const classifyFailure = options.classifyFailure ?? defaultClassifyFailure; const debugWorkflowRun = options.debugWorkflowRun ?? defaultDebugWorkflowRun; const workflowRepairer = options.workflowRepairer ?? defaultWorkflowRepairer; + const codeDriftRepairer = options.codeDriftRepairer ?? repairCodeFromDriftArtifacts; const artifactWriter = options.artifactWriter ?? writeWorkflowArtifact; const repairRunner = options.repairRunner ?? runShellCommand; const sleep = options.sleep ?? ((ms: number) => new Promise((resolve) => setTimeout(resolve, ms))); @@ -97,12 +113,22 @@ export async function runWithAutoFix( const previousRepairAttempts: WorkforcePersonaRepairAttempt[] = []; const warnings: string[] = []; const trackingRunId = resolveTrackingRunId(request) ?? `ricky-local-${randomUUID()}`; + // Used to ignore stale drift artifacts from prior runs when scanning for + // code-drift repair targets. Captured before the first attempt fires. + const runStartTimeMs = Date.now(); let currentRequest: LocalInvocationRequest = { ...request, autoFix: undefined }; let lastResponse: LocalResponse | undefined; let retryOfRunId: string | undefined; let pendingRepairAttempt: Omit | undefined; for (let attempt = 1; attempt <= maxAttempts; attempt += 1) { + // Per-attempt timestamp so drift discovery only considers artifacts the + // CURRENT attempt produced. Without this, a later unrelated failure + // could re-trigger code-drift repair on stale reports from a prior + // attempt (e.g. attempt 1 produced reports → repair → attempt 2 fails + // for an unrelated reason → discovery sees attempt 1's reports as + // "fresh" because runStartTimeMs hasn't moved). + const attemptStartTimeMs = Date.now(); onProgress?.(`Running workflow (attempt ${attempt}/${maxAttempts})...`); const response = await options.runSingleAttempt(currentRequest); lastResponse = response; @@ -174,6 +200,90 @@ export async function runWithAutoFix( }); pendingRepairAttempt = undefined; } + + // Code-drift dispatch — preferred over workflow repair when the + // workflow has emitted structured drift reports under + // .workflow-artifacts/**\/*-drift.json indicating that target source + // code (not the workflow itself) is what's wrong. Falls through to + // workflow repair if no actionable reports are found. + const codeDriftCwd = response.execution?.execution.cwd ?? request.invocationRoot ?? process.cwd(); + // Use the more recent of run-start and attempt-start. attemptStartTimeMs + // alone would be ideal, but if the system clock skews backward between + // attempts (NTP correction during a long run) we don't want to silently + // accept artifacts from before this run began. Math.max guards both. + const driftDiscoveryFloor = Math.max(runStartTimeMs, attemptStartTimeMs); + const driftReports = await discoverDriftReports(codeDriftCwd, driftDiscoveryFloor); + if (driftReports) { + const driftTarget: CodeDriftTarget = { cwd: codeDriftCwd, reports: driftReports }; + try { + onProgress?.(`Ricky is fixing target code (${driftReports.length} drift report${driftReports.length === 1 ? '' : 's'})...`); + const driftRepair = await codeDriftRepairer({ + target: driftTarget, + attempt, + maxAttempts, + ...(failedStep ? { failedStep } : {}), + ...(runId ? { previousRunId: runId } : {}), + }); + if (driftRepair.applied) { + attemptSummary.applied_fix = { + mode: 'code-drift', + reports: driftReports.map((r) => r.filePath), + summary: driftRepair.summary, + ...(driftRepair.runId ? { persona_run_id: driftRepair.runId } : {}), + }; + warnings.push(...(driftRepair.warnings ?? [])); + if (!runId) { + const warning = 'Auto-fix retry could not resolve a previous run id; retrying without step-level resume.'; + attemptSummary.warning = warning; + warnings.push(warning); + } else if (!retryOfRunId) { + retryOfRunId = runId; + } + // Retry the workflow from the BEGINNING — not from the failed + // step. Verify-style workflows have a structure like: + // + // verify-* (agent steps) → produce *-drift.json + // ↓ + // artifact-* (gates) → validate report shape + // ↓ + // aggregate-drift (gate) → fail if any DRIFT (this fails) + // + // Resuming with `startFromStep: aggregate-drift` after a code + // edit would just re-read the SAME stale drift artifacts from + // before the fix, fail again, and loop until max attempts. The + // verify-* agent steps need to re-run against the patched + // source so they regenerate fresh drift reports. + // + // We pay the cost of re-running successful steps (which is real + // — the verify agents re-fetch external docs), but correctness + // wins. If a future workflow needs cheaper resumption, it can + // declare a resume-anchor step in the drift report; for now, + // the safe default is full restart. + currentRequest = { + ...retryBaseRequest(currentRequest, response), + autoFix: undefined, + retry: { + attempt: attempt + 1, + maxAttempts, + ...(runId ? { previousRunId: runId, retryOfRunId: retryOfRunId ?? runId } : {}), + reason: `auto-fix retry after code-drift repair (${driftReports.length} report${driftReports.length === 1 ? '' : 's'}); restarting from workflow root so drift-producing steps re-run`, + }, + }; + onProgress?.('Retrying workflow from the beginning so drift-producing steps re-run against the patched source...'); + continue; + } + // codeDriftRepairer returned applied=false: no-op, fall through to workflow repair. + warnings.push(`Code-drift repairer returned applied=false: ${driftRepair.summary}`); + } catch (error) { + warnings.push(...warningsFromError(error)); + warnings.push( + `Code-drift repair failed; falling back to workflow repair: ${error instanceof Error ? error.message : String(error)}`, + ); + // Fall through to the workflow-repair path below — the failure + // might still be a workflow bug that workforce-persona can fix. + } + } + const repairTarget = await resolveWorkflowRepairTarget(currentRequest, response); if (repairTarget) { diff --git a/src/local/code-drift-repairer.ts b/src/local/code-drift-repairer.ts new file mode 100644 index 00000000..183691e8 --- /dev/null +++ b/src/local/code-drift-repairer.ts @@ -0,0 +1,411 @@ +/** + * code-drift-repairer.ts + * + * Auto-fix path for "drift report" workflow failures — i.e. when a workflow + * has *successfully* generated structured findings showing that target code + * doesn't match an external reference (provider docs, schema, etc.) but the + * workflow's final gate exits non-zero because drift exists. + * + * Distinct from workforce-persona-repairer: + * - workforce-persona-repairer: fixes the *workflow artifact itself* + * (treating the failure as a generation bug) + * - code-drift-repairer (this file): fixes the *target code* the workflow + * was inspecting (treating the failure as expected output that requires + * a follow-up code change before retry) + * + * Drift report schema (any file under .workflow-artifacts/**\/*-drift.json): + * { + * verdict: 'PASS' | 'DRIFT', + * findings: Array<{ severity: 'blocker'|'major'|'minor', axis: string, description: string }>, + * expected?: Record, + * actual?: Record, + * sources?: Record, + * ... + * } + * + * Detection (in resolveCodeDriftTarget): convention-based + schema-validated + * + freshness-checked. See auto-fix-loop.ts → resolveCodeDriftTarget. + */ + +import { createHash } from 'node:crypto'; +import { readFile } from 'node:fs/promises'; +import { isAbsolute, resolve } from 'node:path'; + +import { + defaultWorkforcePersonaResolver, + WORKFORCE_PERSONA_INTENT_CANDIDATES, + WorkforcePersonaWriterError, + type WorkforcePersonaResolver, + type WorkforcePersonaSendOptions, +} from '../product/generation/workforce-persona-writer.js'; + +export interface DriftFinding { + severity: 'blocker' | 'major' | 'minor'; + axis: string; + description: string; +} + +export interface DriftReport { + /** Path to the .json file on disk (absolute or cwd-relative). */ + filePath: string; + /** Slug or identifier carried by the report (`r.slug` if present, else basename). */ + slug?: string; + verdict: 'PASS' | 'DRIFT'; + findings: DriftFinding[]; + expected?: Record; + actual?: Record; + sources?: Record; + /** + * Per-report opt-out for auto-fix. Auto-fix is ON by default — when a + * workflow emits a drift report with `"autofix": false`, ricky records + * the drift but does NOT dispatch a code-fix agent. Useful for + * monitoring/audit workflows that want findings surfaced for human + * triage rather than automated source edits. + */ + autofix: boolean; + /** Full parsed JSON for the agent prompt. */ + raw: Record; +} + +export interface CodeDriftTarget { + /** Working directory the workflow ran in. */ + cwd: string; + /** Reports with verdict === 'DRIFT' that contain at least one blocker or major finding. */ + reports: DriftReport[]; +} + +export interface CodeDriftRepairOptions { + target: CodeDriftTarget; + attempt: number; + maxAttempts: number; + failedStep?: string; + previousRunId?: string; + timeoutSeconds?: number; + installSkills?: boolean; + installRoot?: string; + tier?: string; + env?: NodeJS.ProcessEnv; + signal?: AbortSignal; + onProgress?: WorkforcePersonaSendOptions['onProgress']; + personaIntentCandidates?: readonly string[]; + resolver?: WorkforcePersonaResolver; +} + +export interface CodeDriftRepairResult { + applied: boolean; + summary: string; + /** The persona/agent run id, when the underlying harness exposes one. */ + runId?: string | null; + /** Slugs whose findings were addressed; informational. */ + fixedSlugs?: string[]; + warnings?: string[]; +} + +/** + * Severity threshold for which findings the repair agent is asked to fix. + * Blockers are always fixed. Majors are included by default; the auto-fix + * loop can override via `severityThreshold` in the future. + */ +const DEFAULT_SEVERITY_THRESHOLD: ReadonlyArray = ['blocker', 'major']; + +const DEFAULT_TIER = 'best-value'; + +function actionableFindings(findings: DriftFinding[]): DriftFinding[] { + const allowed = new Set(DEFAULT_SEVERITY_THRESHOLD); + return findings.filter((f) => allowed.has(f.severity)); +} + +export async function repairCodeFromDriftArtifacts( + options: CodeDriftRepairOptions, +): Promise { + const { target } = options; + if (target.reports.length === 0) { + return { applied: false, summary: 'No actionable drift reports.' }; + } + + const resolver = options.resolver ?? defaultWorkforcePersonaResolver; + const resolved = await resolver( + options.personaIntentCandidates ?? WORKFORCE_PERSONA_INTENT_CANDIDATES, + { + tier: options.tier ?? DEFAULT_TIER, + ...(options.installRoot ? { installRoot: options.installRoot } : {}), + }, + ); + + const task = await buildCodeDriftRepairTask(target, options); + const promptDigest = createHash('sha256').update(task).digest('hex'); + const selection = resolved.context.selection; + + const run = resolved.context.sendMessage(task, { + workingDirectory: target.cwd, + name: `ricky-code-drift-repair-${promptDigest.slice(0, 12)}`, + timeoutSeconds: options.timeoutSeconds ?? selection.runtime.harnessSettings?.timeoutSeconds, + installSkills: options.installSkills, + env: options.env, + signal: options.signal, + onProgress: options.onProgress, + inputs: { + driftReportPaths: target.reports.map((r) => r.filePath).join(','), + reportCount: target.reports.length, + attempt: options.attempt, + maxAttempts: options.maxAttempts, + promptDigest, + }, + }); + + const [result, runId] = await Promise.all([run, run.runId.catch(() => null)]); + if (result.status !== 'completed') { + throw new WorkforcePersonaWriterError( + `Code-drift repair did not complete: ${result.status}.`, + [...resolved.warnings, result.stderr].filter(Boolean), + ); + } + + // The agent edits files in place via its harness. We require a sentinel in + // the response confirming completion. If the agent emitted a structured + // response we accept that too (parsePersonaWorkflowResponse), but fall + // back to a textual sentinel for harnesses that don't structure output. + const completed = isCompletionAcknowledged(result.output); + if (!completed) { + throw new WorkforcePersonaWriterError( + 'Code-drift repair response did not include a CODE_DRIFT_REPAIR_COMPLETE marker.', + ); + } + + return { + applied: true, + summary: extractRepairSummary(result.output) ?? `Applied fixes for ${target.reports.length} drift report(s).`, + runId: result.workflowRunId ?? runId, + fixedSlugs: target.reports.map((r) => r.slug ?? r.filePath), + warnings: [...resolved.warnings], + }; +} + +async function buildCodeDriftRepairTask( + target: CodeDriftTarget, + options: CodeDriftRepairOptions, +): Promise { + // Read package.json files near each affected source path so the agent has + // package context when it patches; cheaply done by reading the package.json + // co-located with the most-referenced file path. + const reportSummaries = target.reports.map((r) => { + const filtered = actionableFindings(r.findings); + return { + filePath: r.filePath, + slug: r.slug ?? null, + verdict: r.verdict, + expected: r.expected, + actual: r.actual, + sources: r.sources, + findings: filtered.map((f) => ({ + severity: f.severity, + axis: f.axis, + description: f.description, + })), + }; + }); + + return [ + 'Repair source code that has drifted from an external reference (provider docs, schema, etc.).', + '', + 'A previous workflow step produced structured drift reports identifying specific issues in target code. Your job is to read each report, locate the offending source files, and apply minimal targeted fixes — then return a completion marker.', + '', + 'Working directory:', + target.cwd, + '', + 'Drift reports to address (paths are relative to the working directory):', + safeJson(reportSummaries), + '', + 'Repair contract:', + '- Address ALL findings whose severity is blocker or major. Skip minor findings unless they are trivial to bundle in.', + '- Edit source files in place using your file-editing tools. Do not produce a patch for the user to apply manually.', + '- For each blocker, the description usually pinpoints the file and the wrong value. Use that as your starting point; verify with a quick grep before editing.', + '- After editing, run package-level type checks and tests where applicable (e.g. `cd packages/ && npx tsc --noEmit -p tsconfig.json && npm test`) to confirm the fix compiles and passes tests. If a test fails, fix it.', + '- Do not modify the workflow file itself. Do not modify the drift reports. Do not commit, push, or open PRs.', + '- Do not introduce backward-incompatible API changes if a smaller compatible fix is possible (e.g. rename a constant + add a deprecated alias rather than break callers).', + '- If two findings conflict (e.g. one says use header X, another says use header Y), apply the one with higher severity and document the rejected one in your summary.', + '', + 'On completion, emit the literal sentinel line:', + 'CODE_DRIFT_REPAIR_COMPLETE', + '', + 'Optionally precede the sentinel with a short summary of files changed (one line per file). Example:', + ' packages/zendesk/src/webhook-normalizer.ts: header constant renamed; empty-body fallback added', + ' CODE_DRIFT_REPAIR_COMPLETE', + '', + 'Failure context:', + safeJson({ + failedStep: options.failedStep, + previousRunId: options.previousRunId, + attempt: options.attempt, + maxAttempts: options.maxAttempts, + }), + ].join('\n'); +} + +function isCompletionAcknowledged(output: string | undefined | null): boolean { + if (!output) return false; + return /^\s*CODE_DRIFT_REPAIR_COMPLETE\s*$/m.test(output); +} + +function extractRepairSummary(output: string | undefined | null): string | undefined { + if (!output) return undefined; + // Take the lines immediately preceding the sentinel as the summary. + const lines = output.split(/\r?\n/); + const idx = lines.findIndex((line) => /^\s*CODE_DRIFT_REPAIR_COMPLETE\s*$/.test(line)); + if (idx < 0) return undefined; + const back = lines + .slice(Math.max(0, idx - 10), idx) + .map((line) => line.trim()) + .filter(Boolean); + if (back.length === 0) return undefined; + return back.join(' '); +} + +function safeJson(value: unknown): string { + try { + return JSON.stringify(value, null, 2); + } catch (error) { + return JSON.stringify({ error: error instanceof Error ? error.message : String(error) }, null, 2); + } +} + +// --------------------------------------------------------------------------- +// Drift report discovery + schema validation +// --------------------------------------------------------------------------- + +/** + * Recursively scans `/.workflow-artifacts/**` for `*-drift.json` files + * created at or after `runStartTimeMs`, parses each, and returns the subset + * that: + * 1. Conform to the drift-report schema + * 2. Have verdict === 'DRIFT' + * 3. Contain at least one blocker- or major-severity finding + * + * Returns null if no actionable reports are found, signalling that the + * caller should fall back to the regular workflow-repair path. + */ +export async function discoverDriftReports( + cwd: string, + runStartTimeMs: number, +): Promise { + const root = isAbsolute(cwd) ? cwd : resolve(process.cwd(), cwd); + const artifactsDir = resolve(root, '.workflow-artifacts'); + + let driftFiles: string[]; + try { + driftFiles = await collectDriftJsonFiles(artifactsDir); + } catch { + return null; + } + + const reports: DriftReport[] = []; + for (const filePath of driftFiles) { + try { + const stat = await (await import('node:fs/promises')).stat(filePath); + if (stat.mtimeMs < runStartTimeMs) continue; + + const raw = await readFile(filePath, 'utf8'); + const parsed: unknown = JSON.parse(raw); + if (!isDriftReportShape(parsed)) continue; + + const parsedRecord = parsed as unknown as Record; + // Auto-fix is ON by default. The workflow can opt out by emitting + // `"autofix": false` in the drift report. Any value other than the + // exact boolean `false` is treated as "use the default" (ON), which + // matches "fail-safe to the most useful behavior" — a workflow that + // accidentally emits e.g. autofix:"no" still gets repaired. + const autofixOptOut = parsedRecord.autofix === false; + const report: DriftReport = { + filePath, + verdict: parsed.verdict, + findings: Array.isArray(parsed.findings) ? (parsed.findings as DriftFinding[]) : [], + autofix: !autofixOptOut, + raw: parsedRecord, + }; + const slugValue = parsedRecord.slug; + if (typeof slugValue === 'string') { + report.slug = slugValue; + } + const expectedValue = parsedRecord.expected; + if (isPlainRecord(expectedValue)) { + report.expected = expectedValue; + } + const actualValue = parsedRecord.actual; + if (isPlainRecord(actualValue)) { + report.actual = actualValue; + } + const sourcesValue = parsedRecord.sources; + if (isPlainRecord(sourcesValue)) { + report.sources = sourcesValue as Record; + } + + const actionable = actionableFindings(report.findings); + // Skip reports the workflow opted out of, but only after parsing them + // so that future telemetry/observability can still see the opt-out. + if (report.verdict === 'DRIFT' && actionable.length > 0 && report.autofix) { + reports.push(report); + } + } catch { + // skip malformed files; do not let one bad file block the path + } + } + + return reports.length > 0 ? reports : null; +} + +async function collectDriftJsonFiles(dir: string): Promise { + const fs = await import('node:fs/promises'); + const out: string[] = []; + + async function walk(current: string): Promise { + let entries: import('node:fs').Dirent[]; + try { + entries = await fs.readdir(current, { withFileTypes: true }); + } catch { + return; + } + for (const entry of entries) { + const full = resolve(current, entry.name); + if (entry.isDirectory()) { + await walk(full); + } else if (entry.isFile() && entry.name.endsWith('-drift.json')) { + out.push(full); + } + } + } + + await walk(dir); + return out; +} + +function isDriftReportShape(value: unknown): value is { + verdict: 'PASS' | 'DRIFT'; + findings: DriftFinding[]; +} { + if (!isPlainRecord(value)) return false; + const verdict = (value as { verdict?: unknown }).verdict; + if (verdict !== 'PASS' && verdict !== 'DRIFT') return false; + const findings = (value as { findings?: unknown }).findings; + if (!Array.isArray(findings)) return false; + // Every finding must have a valid severity AND non-empty string axis + + // string description. Without the string checks, malformed JSON could + // pass schema validation, get cast to DriftFinding[], and surface + // `undefined` values in the agent's repair prompt. + return findings.every((f) => { + if (!isPlainRecord(f)) return false; + const sev = (f as { severity?: unknown }).severity; + if (sev !== 'blocker' && sev !== 'major' && sev !== 'minor') return false; + const axis = (f as { axis?: unknown }).axis; + const description = (f as { description?: unknown }).description; + return ( + typeof axis === 'string' && + axis.trim().length > 0 && + typeof description === 'string' && + description.trim().length > 0 + ); + }); +} + +function isPlainRecord(value: unknown): value is Record { + return typeof value === 'object' && value !== null && !Array.isArray(value); +}