diff --git a/packages/runtime/package.json b/packages/runtime/package.json index 489ff938..52c67855 100644 --- a/packages/runtime/package.json +++ b/packages/runtime/package.json @@ -22,6 +22,10 @@ "types": "./dist/raw.d.ts", "default": "./dist/raw.js" }, + "./simulate": { + "types": "./dist/simulate/index.d.ts", + "default": "./dist/simulate/index.js" + }, "./proactive": { "types": "./dist/proactive.d.ts", "default": "./dist/proactive.js" diff --git a/packages/runtime/src/index.ts b/packages/runtime/src/index.ts index 4a37cda8..a50552d6 100644 --- a/packages/runtime/src/index.ts +++ b/packages/runtime/src/index.ts @@ -61,6 +61,25 @@ export type { LinearIssueReference } from './types.js'; +// Invocation dry-run / simulation (workforce#186): execute a handler against +// fixture envelopes with side effects recorded-not-executed, emitting +// Cloud-compatible run records (`origin: "local_dry_run"`). +export { + simulateInvocation, + createSimulationSubsystems, + deriveSimulatedRunFailureClass, + type CapturedLogLine, + type RecordedSideEffect, + type SimulateInvocationOptions, + type SimulatedRunFailureClass, + type SimulatedRunFailureInput, + type SimulatedRunRecord, + type SimulationResult, + type SimulationSink, + type SimulationSubsystems, + type UnsupportedEnvelope +} from './simulate/index.js'; + // VFS-backed transport helpers. All provider interactions go through these // — no per-provider client code in the runtime. Handlers use listJsonFiles / // readJsonFile / writeJsonFile directly against the provider path conventions diff --git a/packages/runtime/src/simulate/failure-class.ts b/packages/runtime/src/simulate/failure-class.ts new file mode 100644 index 00000000..73f5ddcc --- /dev/null +++ b/packages/runtime/src/simulate/failure-class.ts @@ -0,0 +1,45 @@ +/** + * Failure classification for simulated runs. + * + * The union MIRRORS Cloud's hosted run model verbatim + * (cloud: `packages/web/lib/proactive-runtime/deployment-run-failure-class.ts`, + * shipped in AgentWorkforce/cloud#1788) so a simulated run record is + * type-compatible with hosted run records and Cloud can ingest/display + * `origin: "local_dry_run"` runs without redesign. + * + * A local simulation never provisions a sandbox, installs deps, mounts a + * Relayfile, or stages a bundle, so in practice it only produces `success` + * or `runner_error` — but the full union is kept so the contract stays + * identical across origins. + */ +export type SimulatedRunFailureClass = + | 'success' + | 'bootstrap_failed' + | 'bundle_unavailable' + | 'dep_install_failed' + | 'runner_error' + | 'mount_failure' + | 'cleanup_warning'; + +export interface SimulatedRunFailureInput { + /** Run status using Cloud's vocabulary: `succeeded` | `failed`. */ + status: string; + /** Error message captured from the handler, if it threw. */ + error: string | null; +} + +/** + * Derive the failure class for a simulated run. Mirrors the invariant Cloud + * enforces for hosted runs: a failure-ish status or a present error message + * can never classify as `success`. + */ +export function deriveSimulatedRunFailureClass( + input: SimulatedRunFailureInput +): SimulatedRunFailureClass { + const status = input.status.toLowerCase(); + const hasError = typeof input.error === 'string' && input.error.trim().length > 0; + if ((status === 'succeeded' || status === 'success') && !hasError) { + return 'success'; + } + return 'runner_error'; +} diff --git a/packages/runtime/src/simulate/index.ts b/packages/runtime/src/simulate/index.ts new file mode 100644 index 00000000..d30b6ed0 --- /dev/null +++ b/packages/runtime/src/simulate/index.ts @@ -0,0 +1,26 @@ +// Invocation dry-run / simulation surface (workforce#186). +// +// `simulateInvocation` executes a persona handler against fixture envelopes +// with every external side effect recorded-not-executed, and emits run +// records mirroring Cloud's hosted compact run shape with +// `origin: "local_dry_run"` (cloud#1783 / cloud#1788). + +export { simulateInvocation } from './simulate.js'; +export { + createSimulationSubsystems, + type SimulationSink, + type SimulationSubsystems +} from './subsystems.js'; +export { + deriveSimulatedRunFailureClass, + type SimulatedRunFailureClass, + type SimulatedRunFailureInput +} from './failure-class.js'; +export type { + CapturedLogLine, + RecordedSideEffect, + SimulateInvocationOptions, + SimulatedRunRecord, + SimulationResult, + UnsupportedEnvelope +} from './types.js'; diff --git a/packages/runtime/src/simulate/simulate.test.ts b/packages/runtime/src/simulate/simulate.test.ts new file mode 100644 index 00000000..dc0c1814 --- /dev/null +++ b/packages/runtime/src/simulate/simulate.test.ts @@ -0,0 +1,288 @@ +import test from 'node:test'; +import assert from 'node:assert/strict'; +import type { PersonaSpec } from '@agentworkforce/persona-kit'; +import { handler } from '../handler.js'; +import type { RawGatewayEnvelope } from '../shim.js'; +import { deriveSimulatedRunFailureClass } from './failure-class.js'; +import { simulateInvocation } from './simulate.js'; +import { createSimulationSubsystems } from './subsystems.js'; +import type { SimulationSink } from './subsystems.js'; + +const persona: PersonaSpec = { + id: 'sim-demo', + intent: 'documentation', + tags: ['documentation'], + description: 'simulation test persona', + skills: [], + harness: 'claude', + model: 'anthropic/claude-3-5-sonnet', + systemPrompt: 'be helpful', + harnessSettings: { reasoning: 'medium', timeoutSeconds: 300 }, + cloud: true +}; + +const cronEnvelope: RawGatewayEnvelope = { + id: 'e1', + workspace: 'ws-test', + type: 'cron.tick', + occurredAt: '2026-05-12T09:00:00Z', + name: 'weekly', + cron: '0 9 * * 6' +}; + +const githubEnvelope: RawGatewayEnvelope = { + id: 'e2', + workspace: 'ws-test', + type: 'github.pull_request.opened', + occurredAt: '2026-05-12T10:00:00Z', + resource: { action: 'opened', pull_request: { number: 123 } }, + summary: { title: 'Fix bug' } +}; + +function deterministicIds(): () => string { + let seq = 0; + return () => `sim_run_${++seq}`; +} + +function tickingClock(startMs = Date.parse('2026-06-03T12:00:00Z'), stepMs = 5): () => Date { + let current = startMs; + return () => new Date((current += stepMs)); +} + +test('simulateInvocation: successful run emits a Cloud-compatible record', async () => { + const result = await simulateInvocation({ + persona, + handler: handler(async (ctx, event) => { + ctx.log('info', 'saw event', { id: event.id }); + return 'handled weekly tick' as unknown as void; + }), + envelopes: [cronEnvelope], + runIdFactory: deterministicIds(), + now: tickingClock() + }); + + assert.equal(result.exitCode, 0); + assert.equal(result.origin, 'local_dry_run'); + assert.equal(result.summary.total, 1); + assert.equal(result.summary.succeeded, 1); + assert.equal(result.summary.failed, 0); + + const run = result.runs[0]; + // Core compact shape — field-for-field with Cloud's hosted compactBase + // + detail logs (cloud#1788), origin swapped to local_dry_run. + assert.equal(run.runId, 'sim_run_1'); + assert.equal(run.deploymentId, 'sim-deployment'); + assert.equal(run.agentId, 'sim-agent'); + assert.equal(run.status, 'succeeded'); + assert.equal(run.exitCode, 0); + assert.equal(run.summary, 'handled weekly tick'); + assert.equal(run.error, null); + assert.ok(run.startedAt.endsWith('Z')); + assert.ok(run.endedAt.endsWith('Z')); + assert.ok(run.durationMs >= 0); + assert.deepEqual(run.trigger, { kind: 'clock', eventSource: 'cron' }); + assert.deepEqual(run.sandbox, { id: null, name: 'local-simulation' }); + assert.equal(run.failureClass, 'success'); + assert.equal(run.origin, 'local_dry_run'); + assert.equal(run.logs.mountLogTail, ''); + assert.equal(run.logs.stdoutTruncated, false); + assert.equal(run.logs.stderrTruncated, false); + assert.match(run.logs.stdout, /"message":"saw event"/); + assert.equal(run.logs.stderr, ''); + assert.equal(run.simulation.mode, 'simulate'); +}); + +test('simulateInvocation: handler throw → failed run, replay continues', async () => { + const result = await simulateInvocation({ + persona, + handler: handler(async (_ctx, event) => { + if (event.source === 'cron') throw new Error('boom on cron'); + }), + envelopes: [cronEnvelope, githubEnvelope], + runIdFactory: deterministicIds(), + now: tickingClock() + }); + + assert.equal(result.exitCode, 1); + assert.equal(result.summary.total, 2); + assert.equal(result.summary.succeeded, 1); + assert.equal(result.summary.failed, 1); + + const [failed, succeeded] = result.runs; + assert.equal(failed.status, 'failed'); + assert.equal(failed.exitCode, 1); + assert.equal(failed.error, 'boom on cron'); + assert.equal(failed.summary, 'boom on cron'); // first error message fills summary + assert.equal(failed.failureClass, 'runner_error'); + assert.equal(succeeded.status, 'succeeded'); + assert.deepEqual(succeeded.trigger, { kind: 'inbox', eventSource: 'github' }); +}); + +test('simulateInvocation: every side-effect channel is recorded, not executed', async () => { + const result = await simulateInvocation({ + persona, + handler: handler(async (ctx) => { + await ctx.harness.run({ prompt: 'do the thing' }); + await ctx.llm.complete('quick question'); + await ctx.sandbox.exec('rm -rf / --no-preserve-root'); + await ctx.sandbox.writeFile('/notes.txt', 'written in sim'); + await ctx.sandbox.readFile('/notes.txt'); + await ctx.files.write('/slack/outbox/msg.json', '{"text":"hi"}'); + await ctx.files.read('/slack/outbox/msg.json'); + await ctx.memory.save('remember this', { scope: 'workspace' }); + await ctx.memory.recall('anything?'); + const wf = await ctx.workflow.run('reindex', { full: true }); + await ctx.workflow.status(wf.runId); + await ctx.schedule.at(new Date('2026-06-04T00:00:00Z'), { follow: 'up' }); + await ctx.schedule.cancel('weekly'); + }), + envelopes: [cronEnvelope], + runIdFactory: deterministicIds(), + now: tickingClock() + }); + + assert.equal(result.exitCode, 0); + const kinds = result.runs[0].simulation.sideEffects.map((effect) => effect.kind); + assert.deepEqual(kinds, [ + 'harness.run', + 'llm.complete', + 'sandbox.exec', + 'sandbox.writeFile', + 'sandbox.readFile', + 'files.write', + 'files.read', + 'memory.save', + 'memory.recall', + 'workflow.run', + 'workflow.status', + 'schedule.at', + 'schedule.cancel' + ]); + // The dangerous exec was recorded with an inert simulated result. + const exec = result.runs[0].simulation.sideEffects.find((e) => e.kind === 'sandbox.exec'); + assert.equal((exec?.simulatedResult as { exitCode: number }).exitCode, 0); + assert.match(String((exec?.simulatedResult as { note: string }).note), /not executed/); +}); + +test('simulateInvocation: reads of unseeded paths fail with a seeding hint', async () => { + const result = await simulateInvocation({ + persona, + handler: handler(async (ctx) => { + await ctx.files.read('/slack/channels/_index.json'); + }), + envelopes: [cronEnvelope], + runIdFactory: deterministicIds(), + now: tickingClock() + }); + + assert.equal(result.exitCode, 1); + assert.match(result.runs[0].error ?? '', /never seeded or written/); + assert.match(result.runs[0].error ?? '', /`files` option/); +}); + +test('simulateInvocation: seeded files are readable; VFS persists across envelopes', async () => { + const reads: string[] = []; + const result = await simulateInvocation({ + persona, + handler: handler(async (ctx, event) => { + if (event.source === 'cron') { + reads.push(await ctx.files.read('/seeded.json')); + await ctx.files.write('/from-event-1.txt', 'carried over'); + } else { + reads.push(await ctx.files.read('/from-event-1.txt')); + } + }), + envelopes: [cronEnvelope, githubEnvelope], + files: { '/seeded.json': '{"ok":true}' }, + runIdFactory: deterministicIds(), + now: tickingClock() + }); + + assert.equal(result.exitCode, 0); + assert.deepEqual(reads, ['{"ok":true}', 'carried over']); +}); + +test('simulateInvocation: unsupported envelopes are reported, not dispatched', async () => { + const unknownEnvelope: RawGatewayEnvelope = { + id: 'e3', + workspace: 'ws-test', + type: 'unknownprovider.something', + occurredAt: '2026-05-12T11:00:00Z' + }; + let invocations = 0; + const result = await simulateInvocation({ + persona, + handler: handler(async () => { + invocations += 1; + }), + envelopes: [unknownEnvelope, cronEnvelope], + runIdFactory: deterministicIds(), + now: tickingClock() + }); + + assert.equal(invocations, 1); + assert.equal(result.summary.total, 1); + assert.equal(result.summary.unsupported, 1); + assert.deepEqual(result.unsupported, [{ id: 'e3', type: 'unknownprovider.something' }]); + assert.equal(result.exitCode, 0); // unsupported is not a failure +}); + +test('simulateInvocation: workspaceId falls back to the first envelope workspace', async () => { + let seen: string | null = null; + await simulateInvocation({ + persona, + handler: handler(async (ctx) => { + seen = ctx.workspaceId; + }), + envelopes: [cronEnvelope], + runIdFactory: deterministicIds(), + now: tickingClock() + }); + assert.equal(seen, 'ws-test'); +}); + +test('simulateInvocation: explicit deployment triggerKind overrides derivation', async () => { + const result = await simulateInvocation({ + persona, + handler: handler(async () => {}), + envelopes: [cronEnvelope], + deployment: { triggerKind: 'radio' }, + runIdFactory: deterministicIds(), + now: tickingClock() + }); + assert.equal(result.runs[0].trigger.kind, 'radio'); +}); + +test('simulateInvocation: raw (unbranded) handler is accepted', async () => { + const result = await simulateInvocation({ + persona, + handler: async () => {}, + envelopes: [cronEnvelope], + runIdFactory: deterministicIds(), + now: tickingClock() + }); + assert.equal(result.exitCode, 0); +}); + +test('deriveSimulatedRunFailureClass: failed status can never read as success', () => { + assert.equal(deriveSimulatedRunFailureClass({ status: 'succeeded', error: null }), 'success'); + assert.equal(deriveSimulatedRunFailureClass({ status: 'failed', error: 'x' }), 'runner_error'); + // The cloud#1788 invariant: error present → never success, even with a + // success-ish status. + assert.equal(deriveSimulatedRunFailureClass({ status: 'succeeded', error: 'late error' }), 'runner_error'); +}); + +test('createSimulationSubsystems: sink swap attributes effects per run', async () => { + const subsystems = createSimulationSubsystems({ now: tickingClock() }); + const first: SimulationSink = { sideEffects: [], logs: [] }; + const second: SimulationSink = { sideEffects: [], logs: [] }; + + subsystems.useSink(first); + await subsystems.memory.save('one'); + subsystems.useSink(second); + await subsystems.memory.save('two'); + + assert.equal(first.sideEffects.length, 1); + assert.equal(second.sideEffects.length, 1); + assert.equal(subsystems.vfsSnapshot()['/x'], undefined); +}); diff --git a/packages/runtime/src/simulate/simulate.ts b/packages/runtime/src/simulate/simulate.ts new file mode 100644 index 00000000..ee2ceb9d --- /dev/null +++ b/packages/runtime/src/simulate/simulate.ts @@ -0,0 +1,236 @@ +import { randomUUID } from 'node:crypto'; +import { buildCtx } from '../ctx.js'; +import { isWorkforceHandler } from '../handler.js'; +import { shimEnvelope, type RawGatewayEnvelope } from '../shim.js'; +import type { + WorkforceAgentContext, + WorkforceDeploymentContext, + WorkforceEvent, + WorkforceHandler +} from '../types.js'; +import { deriveSimulatedRunFailureClass } from './failure-class.js'; +import { createSimulationSubsystems, type SimulationSink } from './subsystems.js'; +import type { + CapturedLogLine, + SimulateInvocationOptions, + SimulatedRunRecord, + SimulationResult, + UnsupportedEnvelope +} from './types.js'; + +/** + * TRUE invocation dry-run: execute the persona's handler against fixture + * envelopes with every external side effect recorded-not-executed, and + * emit one Cloud-compatible run record per dispatched envelope + * (`origin: "local_dry_run"`). + * + * This is distinct from the deploy-preflight `--dry-run` (which validates + * persona/config and exits without ever invoking the handler). Dispatch + * semantics mirror `startRunner`: envelopes are shimmed with the same + * `shimEnvelope`, unsupported envelopes are skipped (reported, not fatal), + * handler errors are caught per envelope and never abort the replay. + * + * Envelopes are materialized up front — fixture replay is finite by + * definition; this is not a long-lived stream consumer. + */ +export async function simulateInvocation( + options: SimulateInvocationOptions +): Promise { + const handlerFn = options.handler as WorkforceHandler; + if (typeof handlerFn !== 'function') { + throw new TypeError('simulateInvocation: options.handler must be a function'); + } + if (!isWorkforceHandler(handlerFn)) { + // Same soft acceptance as startRunner: raw functions work, branded + // handlers are preferred. No stderr noise in simulation — the caller + // owns presentation. + } + + const now = options.now ?? (() => new Date()); + const runIdFactory = options.runIdFactory ?? (() => `sim_${randomUUID()}`); + + const envelopes: RawGatewayEnvelope[] = []; + for await (const envelope of toAsyncIterable(options.envelopes)) { + envelopes.push(envelope); + } + + const workspaceId = + options.workspaceId ?? envelopes[0]?.workspace ?? 'ws-simulation'; + + const agent: WorkforceAgentContext & { + inputValues?: Record; + } = { + id: options.agent?.id ?? 'sim-agent', + deployedName: options.agent?.deployedName ?? options.persona.id, + spawnedByAgentId: options.agent?.spawnedByAgentId ?? null, + ...(options.agent?.inputValues ? { inputValues: options.agent.inputValues } : {}) + }; + + const deployment: WorkforceDeploymentContext = { + id: options.deployment?.id ?? 'sim-deployment', + triggerKind: options.deployment?.triggerKind ?? 'inbox', + parentDeploymentId: options.deployment?.parentDeploymentId ?? null + }; + const explicitTriggerKind = options.deployment?.triggerKind; + + const subsystems = createSimulationSubsystems({ + ...(options.files ? { files: options.files } : {}), + now + }); + + // buildCtx may throw for unresolved required persona inputs — that is a + // simulation setup error (seed inputs via `agent.inputValues`), surfaced + // to the caller before any run records exist. + const ctx = buildCtx({ + persona: options.persona, + agent, + deployment, + workspaceId, + sandbox: subsystems.sandbox, + files: subsystems.files, + llm: subsystems.llm, + memory: subsystems.memory, + workflow: subsystems.workflow, + schedule: subsystems.schedule, + log: subsystems.log, + harnessRunner: subsystems.harnessRunner + }); + + const startedAtDate = now(); + const runs: SimulatedRunRecord[] = []; + const unsupported: UnsupportedEnvelope[] = []; + + for (const raw of envelopes) { + const event = shimEnvelope(raw); + if (!event) { + unsupported.push({ id: raw.id ?? '(missing id)', type: raw.type ?? '(missing type)' }); + continue; + } + + const sink: SimulationSink = { sideEffects: [], logs: [] }; + subsystems.useSink(sink); + + const runStarted = now(); + let summary: string | null = null; + let error: string | null = null; + try { + const returned: unknown = await handlerFn(ctx, event); + summary = summaryFromHandlerReturn(returned); + } catch (err) { + error = err instanceof Error ? err.message : String(err); + } + const runEnded = now(); + + const status: 'succeeded' | 'failed' = error === null ? 'succeeded' : 'failed'; + runs.push({ + runId: runIdFactory(), + deploymentId: deployment.id, + agentId: agent.id, + status, + exitCode: error === null ? 0 : 1, + summary: summary ?? (error !== null ? error : null), + error, + startedAt: runStarted.toISOString(), + endedAt: runEnded.toISOString(), + durationMs: runEnded.getTime() - runStarted.getTime(), + trigger: { + kind: explicitTriggerKind ?? triggerKindForEvent(event), + eventSource: event.source + }, + sandbox: { id: null, name: 'local-simulation' }, + failureClass: deriveSimulatedRunFailureClass({ status, error }), + origin: 'local_dry_run', + logs: { + stdout: renderLogLines(sink.logs, ['debug', 'info']), + stderr: renderLogLines(sink.logs, ['warn', 'error']), + mountLogTail: '', + stdoutTruncated: false, + stderrTruncated: false + }, + simulation: { + mode: 'simulate', + sideEffects: sink.sideEffects, + capturedLogs: sink.logs + } + }); + } + + const endedAtDate = now(); + const succeeded = runs.filter((run) => run.status === 'succeeded').length; + const failed = runs.length - succeeded; + + return { + origin: 'local_dry_run', + mode: 'simulate', + startedAt: startedAtDate.toISOString(), + endedAt: endedAtDate.toISOString(), + durationMs: endedAtDate.getTime() - startedAtDate.getTime(), + runs, + unsupported, + summary: { + total: runs.length, + succeeded, + failed, + unsupported: unsupported.length + }, + exitCode: failed > 0 ? 1 : 0 + }; +} + +/** + * Handlers are typed `Promise | void`, but raw handlers may return a + * value. A string return (or an object carrying a string `summary`) becomes + * the run record's `summary` so Cloud's list view shows something + * meaningful; anything else is ignored. + */ +function summaryFromHandlerReturn(returned: unknown): string | null { + if (typeof returned === 'string' && returned.trim().length > 0) { + return returned; + } + if ( + typeof returned === 'object' && + returned !== null && + 'summary' in returned && + typeof (returned as { summary: unknown }).summary === 'string' && + (returned as { summary: string }).summary.trim().length > 0 + ) { + return (returned as { summary: string }).summary; + } + return null; +} + +/** + * Map an event source onto the workforce trigger vocabulary Cloud's + * `trigger_kind` column stores: cron ticks are `clock`, provider events + * are `inbox`. An explicit deployment.triggerKind override wins (handled + * by the caller). + */ +function triggerKindForEvent(event: WorkforceEvent): string { + return event.source === 'cron' ? 'clock' : 'inbox'; +} + +function renderLogLines( + logs: CapturedLogLine[], + levels: ReadonlyArray +): string { + // Mirrors the runner's stream split: debug/info → stdout, warn/error → + // stderr, one JSON line per emission (same shape defaultRunnerLog writes). + return logs + .filter((line) => levels.includes(line.level)) + .map((line) => + JSON.stringify({ t: line.t, level: line.level, message: line.message, ...(line.attrs ?? {}) }) + ) + .join('\n'); +} + +function toAsyncIterable( + value: Iterable | AsyncIterable +): AsyncIterable { + if (Symbol.asyncIterator in (value as AsyncIterable)) { + return value as AsyncIterable; + } + const iterable = value as Iterable; + return (async function* () { + for (const item of iterable) yield item; + })(); +} diff --git a/packages/runtime/src/simulate/subsystems.ts b/packages/runtime/src/simulate/subsystems.ts new file mode 100644 index 00000000..0d3fd476 --- /dev/null +++ b/packages/runtime/src/simulate/subsystems.ts @@ -0,0 +1,206 @@ +import type { + FilesContext, + HarnessRunArgs, + HarnessRunResult, + LlmContext, + MemoryContext, + MemoryItem, + SandboxContext, + ScheduleContext, + WorkflowContext, + WorkforceCtx +} from '../types.js'; +import type { CapturedLogLine, RecordedSideEffect } from './types.js'; + +/** + * Mutable sink the orchestrator swaps per envelope so side effects and + * logs attribute to the run that made them. Handlers run sequentially + * (the runner's for-await contract), so a single swappable sink is safe. + */ +export interface SimulationSink { + sideEffects: RecordedSideEffect[]; + logs: CapturedLogLine[]; +} + +export interface SimulationSubsystems { + sandbox: SandboxContext; + files: FilesContext; + llm: LlmContext; + memory: MemoryContext; + workflow: WorkflowContext; + schedule: ScheduleContext; + log: WorkforceCtx['log']; + harnessRunner: (args: HarnessRunArgs) => Promise; + /** Point subsequent recordings at a new sink (one per envelope). */ + useSink(sink: SimulationSink): void; + /** Read-only view of the simulated VFS (seeded + handler-written files). */ + vfsSnapshot(): Record; +} + +const SIMULATED = '[simulated — not executed]'; + +/** + * Build recording, no-side-effect implementations of every ctx channel a + * handler can reach. Nothing leaves the process: HTTP-backed subsystems + * (memory, workflow, schedule, llm), the harness, and the sandbox shell + * all return inert simulated results and record the call. File writes go + * to an in-memory map shared across the simulation so a handler that + * writes in one event can read it back in the next. + */ +export function createSimulationSubsystems(options?: { + files?: Record; + now?: () => Date; +}): SimulationSubsystems { + const now = options?.now ?? (() => new Date()); + const vfs = new Map(Object.entries(options?.files ?? {})); + let sink: SimulationSink = { sideEffects: [], logs: [] }; + + let memorySeq = 0; + let workflowSeq = 0; + + function record( + kind: RecordedSideEffect['kind'], + args: Record, + simulatedResult?: unknown + ): void { + sink.sideEffects.push({ + kind, + at: now().toISOString(), + args, + ...(simulatedResult !== undefined ? { simulatedResult } : {}) + }); + } + + function readPath(kind: 'sandbox.readFile' | 'files.read', path: string): string { + const contents = vfs.get(path); + if (contents === undefined) { + record(kind, { path }, { error: 'path not seeded' }); + throw new Error( + `simulation: read of path "${path}" that was never seeded or written. ` + + 'Seed it via the simulation `files` option (e.g. provider VFS data the handler expects).' + ); + } + record(kind, { path }, { bytes: contents.length }); + return contents; + } + + const sandbox: SandboxContext = { + cwd: '/simulated', + async exec(cmd, opts) { + const result = { output: '', exitCode: 0 }; + record('sandbox.exec', { cmd, ...(opts?.cwd ? { cwd: opts.cwd } : {}) }, { + ...result, + note: SIMULATED + }); + return result; + }, + async readFile(path) { + return readPath('sandbox.readFile', path); + }, + async writeFile(path, contents) { + vfs.set(path, contents); + record('sandbox.writeFile', { path, bytes: contents.length }); + } + }; + + const files: FilesContext = { + async read(path) { + return readPath('files.read', path); + }, + async write(path, contents) { + vfs.set(path, contents); + record('files.write', { path, bytes: contents.length }); + } + }; + + const llm: LlmContext = { + async complete(prompt, opts) { + record('llm.complete', { + promptChars: prompt.length, + ...(opts?.maxTokens !== undefined ? { maxTokens: opts.maxTokens } : {}) + }, { note: SIMULATED }); + return SIMULATED; + } + }; + + const memory: MemoryContext = { + async save(content, opts) { + const id = `sim-mem-${++memorySeq}`; + record('memory.save', { + contentChars: content.length, + ...(opts?.scope ? { scope: opts.scope } : {}), + ...(opts?.tags ? { tags: opts.tags } : {}) + }, { id }); + return { id }; + }, + async recall(query, opts) { + const items: MemoryItem[] = []; + record('memory.recall', { + query, + ...(opts?.limit !== undefined ? { limit: opts.limit } : {}) + }, { items: 0 }); + return items; + } + }; + + const workflow: WorkflowContext = { + async run(name, args) { + const runId = `sim-wf-${++workflowSeq}`; + record('workflow.run', { name, ...(args ? { args } : {}) }, { runId, note: SIMULATED }); + return { + runId, + async completion() { + return { output: null, status: 'success' as const }; + } + }; + }, + async status(runId) { + record('workflow.status', { runId }, { status: 'success' }); + return { status: 'success' as const }; + } + }; + + const schedule: ScheduleContext = { + async at(when, payload) { + record('schedule.at', { when: when.toISOString(), payload }, { note: SIMULATED }); + }, + async cancel(name) { + record('schedule.cancel', { name }, { note: SIMULATED }); + } + }; + + const log: WorkforceCtx['log'] = (level, message, attrs) => { + sink.logs.push({ + t: now().toISOString(), + level, + message, + ...(attrs ? { attrs } : {}) + }); + }; + + const harnessRunner = async (args: HarnessRunArgs): Promise => { + const result: HarnessRunResult = { output: SIMULATED, exitCode: 0, durationMs: 0 }; + record('harness.run', { + promptChars: args.prompt.length, + ...(args.cwd ? { cwd: args.cwd } : {}) + }, { exitCode: 0, note: SIMULATED }); + return result; + }; + + return { + sandbox, + files, + llm, + memory, + workflow, + schedule, + log, + harnessRunner, + useSink(next) { + sink = next; + }, + vfsSnapshot() { + return Object.fromEntries(vfs.entries()); + } + }; +} diff --git a/packages/runtime/src/simulate/types.ts b/packages/runtime/src/simulate/types.ts new file mode 100644 index 00000000..819eeca4 --- /dev/null +++ b/packages/runtime/src/simulate/types.ts @@ -0,0 +1,155 @@ +import type { PersonaSpec } from '@agentworkforce/persona-kit'; +import type { RawGatewayEnvelope } from '../shim.js'; +import type { + WorkforceAgentContext, + WorkforceDeploymentContext, + WorkforceHandler, + WorkforceHandlerExport +} from '../types.js'; +import type { SimulatedRunFailureClass } from './failure-class.js'; + +/** + * One intercepted subsystem call. Simulation never executes external + * effects — every call a handler makes through ctx is recorded here with + * the simulated result it received instead. + */ +export interface RecordedSideEffect { + /** Which ctx channel the handler called, e.g. `memory.save`. */ + kind: + | 'harness.run' + | 'llm.complete' + | 'sandbox.exec' + | 'sandbox.readFile' + | 'sandbox.writeFile' + | 'files.read' + | 'files.write' + | 'memory.save' + | 'memory.recall' + | 'workflow.run' + | 'workflow.status' + | 'schedule.at' + | 'schedule.cancel'; + /** ISO timestamp the call was made. */ + at: string; + /** Call arguments, normalized to a plain record for serialization. */ + args: Record; + /** Summary of the simulated value returned to the handler. */ + simulatedResult?: unknown; +} + +/** A single captured `ctx.log(...)` emission. */ +export interface CapturedLogLine { + t: string; + level: 'debug' | 'info' | 'warn' | 'error'; + message: string; + attrs?: Record; +} + +/** + * Machine-readable record for one simulated invocation (one envelope). + * + * Field names and types MIRROR Cloud's hosted compact run shape + * (cloud `packages/web/lib/proactive-runtime/deployment-run-observability.ts` + * `compactBase` + detail `logs`, AgentWorkforce/cloud#1788) with + * `origin: "local_dry_run"` — the origin value Cloud reserved for exactly + * this ingestion. Simulation-only data is nested under the additive + * `simulation` key so the core shape stays byte-compatible. + */ +export interface SimulatedRunRecord { + runId: string; + deploymentId: string; + agentId: string; + /** Cloud status vocabulary: `succeeded` | `failed`. */ + status: 'succeeded' | 'failed'; + exitCode: number; + /** + * Populated from the handler's returned value when it returns a string + * (or an object with a string `summary`), so Cloud's list view shows + * something meaningful. Null when the handler returns nothing. + */ + summary: string | null; + error: string | null; + startedAt: string; + endedAt: string; + durationMs: number; + trigger: { + /** Workforce trigger vocabulary (`inbox` | `clock` | `radio`). */ + kind: string; + eventSource: string; + }; + sandbox: { + id: string | null; + name: string | null; + }; + failureClass: SimulatedRunFailureClass; + origin: 'local_dry_run'; + logs: { + stdout: string; + stderr: string; + mountLogTail: string; + stdoutTruncated: boolean; + stderrTruncated: boolean; + }; + /** Simulation-only extension; absent from hosted records. */ + simulation: { + mode: 'simulate'; + sideEffects: RecordedSideEffect[]; + capturedLogs: CapturedLogLine[]; + }; +} + +/** Envelope the simulation could not shim into a dispatchable event. */ +export interface UnsupportedEnvelope { + id: string; + type: string; +} + +export interface SimulationResult { + origin: 'local_dry_run'; + mode: 'simulate'; + startedAt: string; + endedAt: string; + durationMs: number; + runs: SimulatedRunRecord[]; + unsupported: UnsupportedEnvelope[]; + summary: { + total: number; + succeeded: number; + failed: number; + unsupported: number; + }; + /** 0 when every dispatched envelope succeeded; 1 when any failed. */ + exitCode: 0 | 1; +} + +export interface SimulateInvocationOptions { + /** Parsed persona JSON, same shape `startRunner` takes. */ + persona: PersonaSpec; + /** The persona's `onEvent` handler (branded or raw function). */ + handler: WorkforceHandlerExport | WorkforceHandler; + /** Fixture envelopes to replay, in order. */ + envelopes: + | Iterable + | AsyncIterable; + /** Agent row context; a local placeholder is synthesized when omitted. */ + agent?: Partial & { + inputValues?: Record; + }; + /** Deployment context; a local placeholder is synthesized when omitted. */ + deployment?: Partial; + /** + * Workspace id. Falls back to the first envelope's `workspace`, then + * `ws-simulation`. Never read from the environment. + */ + workspaceId?: string; + /** + * Seed contents for the simulated in-memory filesystem, keyed by path. + * Reads of unseeded, unwritten paths fail with a clear message that + * names this option. + */ + files?: Record; + /** Clock override for deterministic tests. */ + now?: () => Date; + /** Run id factory override for deterministic tests. */ + runIdFactory?: () => string; +}