diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index bc639d047..d80c7998d 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -12,7 +12,6 @@ on: - main - cli-prerelease - sdk - - workflow-types - sdk-py - brand default: 'all' @@ -687,7 +686,6 @@ jobs: (cd packages/config && npm pack --ignore-scripts --pack-destination "$TARBALLS") (cd packages/github-primitive && npm pack --ignore-scripts --pack-destination "$TARBALLS") (cd packages/slack-primitive && npm pack --ignore-scripts --pack-destination "$TARBALLS") - (cd packages/workflow-types && npm pack --ignore-scripts --pack-destination "$TARBALLS") ls -lh "$TARBALLS" - name: Install tarballs into scratch project @@ -705,11 +703,10 @@ jobs: CONFIG_TGZ=$(ls "$TARBALLS"/agent-relay-config-*.tgz | head -n1) GITHUB_PRIMITIVE_TGZ=$(ls "$TARBALLS"/agent-relay-github-primitive-*.tgz | head -n1) SLACK_PRIMITIVE_TGZ=$(ls "$TARBALLS"/agent-relay-slack-primitive-*.tgz | head -n1) - WORKFLOW_TYPES_TGZ=$(ls "$TARBALLS"/agent-relay-workflow-types-*.tgz | head -n1) - echo "Installing $SDK_TGZ + $BROKER_TGZ + $CLOUD_TGZ + $CONFIG_TGZ + $GITHUB_PRIMITIVE_TGZ + $SLACK_PRIMITIVE_TGZ + $WORKFLOW_TYPES_TGZ" + echo "Installing $SDK_TGZ + $BROKER_TGZ + $CLOUD_TGZ + $CONFIG_TGZ + $GITHUB_PRIMITIVE_TGZ + $SLACK_PRIMITIVE_TGZ" npm install --ignore-scripts --no-audit --no-fund \ "$SDK_TGZ" "$BROKER_TGZ" "$CLOUD_TGZ" "$CONFIG_TGZ" \ - "$GITHUB_PRIMITIVE_TGZ" "$SLACK_PRIMITIVE_TGZ" "$WORKFLOW_TYPES_TGZ" + "$GITHUB_PRIMITIVE_TGZ" "$SLACK_PRIMITIVE_TGZ" ls node_modules/@agent-relay/ - name: Resolver smoke — getBrokerBinaryPath() @@ -763,13 +760,12 @@ jobs: CONFIG_TGZ=$(ls "$TARBALLS"/agent-relay-config-*.tgz | head -n1) GITHUB_PRIMITIVE_TGZ=$(ls "$TARBALLS"/agent-relay-github-primitive-*.tgz | head -n1) SLACK_PRIMITIVE_TGZ=$(ls "$TARBALLS"/agent-relay-slack-primitive-*.tgz | head -n1) - WORKFLOW_TYPES_TGZ=$(ls "$TARBALLS"/agent-relay-workflow-types-*.tgz | head -n1) # Install SDK + every internal required dep whose bumped version # isn't on the registry yet, but skip the broker optional deps # entirely. The resolver should return null and spawn() should # throw the clear error. npm install --ignore-scripts --no-audit --no-fund --no-optional \ - "$SDK_TGZ" "$CLOUD_TGZ" "$CONFIG_TGZ" "$GITHUB_PRIMITIVE_TGZ" "$SLACK_PRIMITIVE_TGZ" "$WORKFLOW_TYPES_TGZ" + "$SDK_TGZ" "$CLOUD_TGZ" "$CONFIG_TGZ" "$GITHUB_PRIMITIVE_TGZ" "$SLACK_PRIMITIVE_TGZ" node --input-type=module -e " import { AgentRelayClient } from '@agent-relay/sdk'; try { @@ -808,7 +804,6 @@ jobs: - events - github-primitive - slack-primitive - - workflow-types steps: - name: Checkout code @@ -847,52 +842,6 @@ jobs: fi npm publish --access public --provenance --tag ${{ github.event.inputs.tag }} --ignore-scripts - # Publish workflow-types only. This is intentionally narrow: it repairs - # already-published SDK versions whose exact-version workflow-types package - # was missing, without attempting to republish @agent-relay/sdk. - publish-workflow-types-only: - name: Publish Workflow Types to NPM - needs: build - runs-on: ubuntu-latest - if: github.event.inputs.package == 'workflow-types' - - steps: - - name: Checkout code - uses: actions/checkout@v4 - - - name: Setup Node.js - uses: actions/setup-node@v4 - with: - node-version: '22.14.0' - registry-url: 'https://registry.npmjs.org' - - - name: Download build artifacts - uses: actions/download-artifact@v4 - with: - name: build-output - path: . - - - name: Update npm for OIDC support - run: npm install -g npm@latest - - - name: Dry run check - if: github.event.inputs.dry_run == 'true' - working-directory: packages/workflow-types - run: npm publish --dry-run --access public --tag ${{ github.event.inputs.tag }} --ignore-scripts - - - name: Publish Workflow Types to NPM - if: github.event.inputs.dry_run != 'true' - working-directory: packages/workflow-types - run: | - set -euo pipefail - PKG_NAME=$(node -p "require('./package.json').name") - PKG_VERSION=$(node -p "require('./package.json').version") - if npm view "${PKG_NAME}@${PKG_VERSION}" version >/dev/null 2>&1; then - echo "${PKG_NAME}@${PKG_VERSION} already exists on npm; skipping publish" - exit 0 - fi - npm publish --access public --provenance --tag ${{ github.event.inputs.tag }} --ignore-scripts - # Publish the per-platform broker packages first. @agent-relay/sdk declares # these as exact-version optionalDependencies, so they must exist on the # registry at the matching version before the SDK is published — otherwise @@ -2305,7 +2254,6 @@ jobs: verify-acp-macos, smoke-broker-packages, publish-sdk-internal-deps, - publish-workflow-types-only, publish-broker-packages, publish-packages, publish-brand-only, @@ -2351,7 +2299,6 @@ jobs: echo "| Verify relay-acp (macOS) | ${{ needs.verify-acp-macos.result == 'success' && '✅' || (needs.verify-acp-macos.result == 'skipped' && '⏭️' || '❌') }} ${{ needs.verify-acp-macos.result }} |" >> $GITHUB_STEP_SUMMARY echo "| Smoke Broker Packages | ${{ needs.smoke-broker-packages.result == 'success' && '✅' || (needs.smoke-broker-packages.result == 'skipped' && '⏭️' || '❌') }} ${{ needs.smoke-broker-packages.result }} |" >> $GITHUB_STEP_SUMMARY echo "| Publish SDK Internal Deps | ${{ needs.publish-sdk-internal-deps.result == 'success' && '✅' || (needs.publish-sdk-internal-deps.result == 'skipped' && '⏭️' || '❌') }} ${{ needs.publish-sdk-internal-deps.result }} |" >> $GITHUB_STEP_SUMMARY - echo "| Publish Workflow Types Only | ${{ needs.publish-workflow-types-only.result == 'success' && '✅' || (needs.publish-workflow-types-only.result == 'skipped' && '⏭️' || '❌') }} ${{ needs.publish-workflow-types-only.result }} |" >> $GITHUB_STEP_SUMMARY echo "| Publish Broker Packages | ${{ needs.publish-broker-packages.result == 'success' && '✅' || (needs.publish-broker-packages.result == 'skipped' && '⏭️' || '❌') }} ${{ needs.publish-broker-packages.result }} |" >> $GITHUB_STEP_SUMMARY echo "| Publish Packages | ${{ needs.publish-packages.result == 'success' && '✅' || (needs.publish-packages.result == 'skipped' && '⏭️' || '❌') }} ${{ needs.publish-packages.result }} |" >> $GITHUB_STEP_SUMMARY echo "| Publish Brand | ${{ needs.publish-brand-only.result == 'success' && '✅' || (needs.publish-brand-only.result == 'skipped' && '⏭️' || '❌') }} ${{ needs.publish-brand-only.result }} |" >> $GITHUB_STEP_SUMMARY diff --git a/.github/workflows/relay-cleanroom-hardening.yml b/.github/workflows/relay-cleanroom-hardening.yml deleted file mode 100644 index b7ffd3112..000000000 --- a/.github/workflows/relay-cleanroom-hardening.yml +++ /dev/null @@ -1,82 +0,0 @@ -name: Relay Clean-Room Hardening - -on: - pull_request: - branches: [main] - paths: - - 'install.sh' - - 'packages/**' - - '!packages/sdk-swift/**' - - 'workflows/relay-e2e-meta-workflow.ts' - - 'workflows/relay-clean-room-e2e-validation.ts' - - 'scripts/run-relay-cleanroom-ci.sh' - - '.github/workflows/relay-cleanroom-hardening.yml' - workflow_dispatch: - -concurrency: - group: ${{ github.workflow }}-${{ github.ref }} - cancel-in-progress: true - -env: - AGENT_RELAY_TELEMETRY_DISABLED: 1 - -jobs: - cleanroom-hardening: - name: Clean-Room Hardening (${{ matrix.os }}) - runs-on: ${{ matrix.os }} - strategy: - fail-fast: false - matrix: - os: [macos-latest] - timeout-minutes: 20 - - permissions: - contents: read - - steps: - - name: Checkout repository - uses: actions/checkout@v4 - - - name: Setup Node.js - uses: actions/setup-node@v4 - with: - node-version: '22' - cache: 'npm' - - - name: Install dependencies - run: npm ci - - - name: Build local CLI/runtime - run: npm run build - - - name: Link local CLI - run: cd packages/cli && npm link - - - name: Verify CLI - run: agent-relay --version - - - name: Run clean-room hardening validator - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - run: bash scripts/run-relay-cleanroom-ci.sh - - - name: Upload clean-room artifacts - if: always() - uses: actions/upload-artifact@v4 - with: - name: relay-cleanroom-${{ matrix.os }} - path: .e2e-artifacts/ - if-no-files-found: warn - - - name: Append job summary - if: always() - run: | - if [ -f .e2e-artifacts/ci-summary.md ]; then - cat .e2e-artifacts/ci-summary.md >> "$GITHUB_STEP_SUMMARY" - else - echo "No CI summary produced." >> "$GITHUB_STEP_SUMMARY" - fi - if [ -f .e2e-artifacts/verdict.md ]; then - printf '\n## Reviewer Verdict\n\n' >> "$GITHUB_STEP_SUMMARY" - cat .e2e-artifacts/verdict.md >> "$GITHUB_STEP_SUMMARY" - fi diff --git a/.github/workflows/workflow-reliability.yml b/.github/workflows/workflow-reliability.yml deleted file mode 100644 index 5e1f72b76..000000000 --- a/.github/workflows/workflow-reliability.yml +++ /dev/null @@ -1,54 +0,0 @@ -name: Workflow Reliability - -on: - pull_request: - branches: [main] - paths: - - '.github/workflows/workflow-reliability.yml' - - 'packages/sdk/src/workflows/**' - - 'packages/sdk/package.json' - - 'packages/workflow-types/**' - - 'package-lock.json' - - 'package.json' - push: - branches: [main] - paths: - - '.github/workflows/workflow-reliability.yml' - - 'packages/sdk/src/workflows/**' - - 'packages/sdk/package.json' - - 'packages/workflow-types/**' - - 'package-lock.json' - - 'package.json' - -concurrency: - group: ${{ github.workflow }}-${{ github.ref }} - cancel-in-progress: true - -jobs: - workflow-reliability: - name: SDK Workflow Reliability - runs-on: ubuntu-latest - env: - NPM_CONFIG_FUND: false - - steps: - - name: Checkout repository - uses: actions/checkout@v4 - - - name: Setup Node.js - uses: actions/setup-node@v4 - with: - node-version: '22' - cache: 'npm' - - - name: Install dependencies - run: npm ci - - - name: Typecheck SDK workflows - run: npm --prefix packages/sdk run check - - - name: Run workflow reliability contract matrix - run: | - npx vitest run --root packages/sdk --config vitest.config.ts \ - src/workflows/__tests__/workflow-reliability-contract.test.ts \ - src/workflows/__tests__/workflow-reliability-e2e.test.ts diff --git a/.github/workflows/workflow-validation.yml b/.github/workflows/workflow-validation.yml deleted file mode 100644 index 5117abaa3..000000000 --- a/.github/workflows/workflow-validation.yml +++ /dev/null @@ -1,72 +0,0 @@ -name: Workflow Validation - -on: - pull_request: - branches: [main] - paths: - - 'workflows/**' - -concurrency: - group: ${{ github.workflow }}-${{ github.ref }} - cancel-in-progress: true - -jobs: - validate: - name: Validate & Dry Run - runs-on: ubuntu-latest - env: - NPM_CONFIG_FUND: false - - steps: - - name: Checkout repository - uses: actions/checkout@v4 - with: - fetch-depth: 0 - - - name: Setup Node.js - uses: actions/setup-node@v4 - with: - node-version: '22' - cache: 'npm' - - - name: Cache node_modules - id: cache-modules - uses: actions/cache@v4 - with: - path: | - node_modules - packages/*/node_modules - key: modules-${{ hashFiles('package-lock.json') }} - - - name: Install dependencies - if: steps.cache-modules.outputs.cache-hit != 'true' - run: npm ci - - - name: Build packages - run: npm run build - - - name: Find changed workflow files - id: changed - env: - BASE_REF: ${{ github.base_ref }} - run: | - files=$(git diff --name-only "origin/$BASE_REF"...HEAD -- 'workflows/**' \ - | grep -E '\.(ts|yaml|yml)$' || true) - echo "files<> "$GITHUB_OUTPUT" - echo "$files" >> "$GITHUB_OUTPUT" - echo "EOF" >> "$GITHUB_OUTPUT" - echo "Found: $files" - - - name: Validate & dry run workflows - if: steps.changed.outputs.files != '' - env: - CHANGED_FILES: ${{ steps.changed.outputs.files }} - run: | - for f in $CHANGED_FILES; do - echo "=== $f ===" - if [[ "$f" == *.yaml || "$f" == *.yml ]]; then - npx tsx packages/sdk/src/workflows/cli.ts "$f" --validate - DRY_RUN=1 npx tsx packages/sdk/src/workflows/cli.ts "$f" - fi - echo "" - done diff --git a/package.json b/package.json index 0cf1b1adf..0d3f2726c 100644 --- a/package.json +++ b/package.json @@ -9,7 +9,7 @@ "web" ], "scripts": { - "typecheck": "npm run build:config && npm --prefix packages/workflow-types run build && npm --prefix packages/github-primitive run build && npm --prefix packages/slack-primitive run build && npm --prefix packages/cloud run build && npm run build:utils && npm run build:trajectory && npm run build:policy && npm run build:sdk && npm run build:hooks && npm run build:memory && npm run build:telemetry && cd packages/cli && npx tsc --noEmit", + "typecheck": "npm run build:config && npm --prefix packages/github-primitive run build && npm --prefix packages/slack-primitive run build && npm --prefix packages/cloud run build && npm run build:utils && npm run build:trajectory && npm run build:policy && npm run build:sdk && npm run build:hooks && npm run build:memory && npm run build:telemetry && cd packages/cli && npx tsc --noEmit", "build": "npm run clean && npm run build:rust && turbo run build --filter='./packages/*'", "build:packages": "turbo run build --filter='./packages/*'", "build:packages:watch": "turbo run build --filter='./packages/*' --watch", @@ -34,8 +34,7 @@ "build:rust": "if command -v ~/.cargo/bin/cargo >/dev/null 2>&1; then ~/.cargo/bin/cargo build --release --bin agent-relay-broker && mkdir -p packages/sdk/bin && cp target/release/agent-relay-broker packages/sdk/bin/agent-relay-broker.new && mv -f packages/sdk/bin/agent-relay-broker.new packages/sdk/bin/agent-relay-broker && echo '✓ broker binary (agent-relay-broker) built and copied to packages/sdk/bin/'; else echo '⚠ Rust not installed, skipping local broker build; installs use platform optional dependencies'; fi", "start": "node packages/cli/dist/cli/index.js", "pretest": "npm run build", - "test": "vitest run && npm run test:sdk:vitest", - "test:sdk:vitest": "cd packages/sdk && vitest run --config vitest.config.ts src/workflows/__tests__/run-script.test.ts", + "test": "vitest run", "pretest:coverage": "npm run build", "test:coverage": "vitest run --coverage", "test:watch": "vitest", diff --git a/packages/browser-primitive/examples/browser-workflow.ts b/packages/browser-primitive/examples/browser-workflow.ts deleted file mode 100644 index 211a08610..000000000 --- a/packages/browser-primitive/examples/browser-workflow.ts +++ /dev/null @@ -1,138 +0,0 @@ -import { WorkflowRunner, type RelayYamlConfig } from '@agent-relay/sdk/workflows'; - -import { BrowserStepExecutor, createBrowserStep } from '../src/workflow-step.js'; - -const browserExecutor = new BrowserStepExecutor(); - -const config: RelayYamlConfig = { - version: '1.0', - name: 'browser-primitive-workflow', - description: 'Browser primitive workflow with chained actions and captured output.', - swarm: { - pattern: 'pipeline', - }, - agents: [], - workflows: [ - { - name: 'browser-primitive-workflow', - steps: [ - createBrowserStep({ - name: 'inspect-example-page', - sessionId: 'example-page-session', - config: { - browser: 'chromium', - headless: true, - viewport: { width: 1280, height: 720 }, - captureConsole: true, - persistSession: true, - }, - actions: [ - { - action: 'goto', - params: { - url: 'https://example.com', - waitUntil: 'domcontentloaded', - }, - }, - { - action: 'text', - id: 'heading', - params: { - selector: 'h1', - innerText: true, - }, - }, - ], - output: { - mode: 'last', - format: 'text', - }, - }), - createBrowserStep({ - name: 'use-captured-heading', - dependsOn: ['inspect-example-page'], - sessionId: 'example-page-session', - config: { - browser: 'chromium', - headless: true, - persistSession: true, - }, - actions: [ - { - action: 'evaluate', - params: { - script: '() => `Current title from the persisted session: ${document.title}`', - }, - }, - ], - output: { - mode: 'last', - format: 'text', - }, - }), - createBrowserStep({ - name: 'capture-page-report', - dependsOn: ['use-captured-heading'], - sessionId: 'example-page-session', - config: { - browser: 'chromium', - headless: true, - persistSession: true, - }, - actions: [ - { - action: 'evaluate', - id: 'pageFacts', - outputKey: 'pageFacts', - capture: true, - params: { - script: '() => ({ title: document.title, links: document.links.length })', - }, - }, - { - action: 'screenshot', - id: 'screenshot', - outputKey: 'screenshot', - capture: true, - params: { - path: 'artifacts/example-page.png', - fullPage: true, - }, - }, - ], - output: { - mode: 'captures', - includeMetadata: true, - includeSession: true, - pretty: true, - }, - closeSession: true, - }), - ], - }, - ], - errorHandling: { - strategy: 'fail-fast', - }, -}; - -async function main(): Promise { - const runner = new WorkflowRunner({ - cwd: process.cwd(), - executor: browserExecutor, - }); - - const result = await runner.execute(config); - console.log(`Browser workflow completed: ${result.status}`); -} - -if (process.argv[1] === new URL(import.meta.url).pathname) { - main() - .catch((error) => { - console.error(error instanceof Error ? error.stack : error); - process.exitCode = 1; - }) - .finally(async () => { - await browserExecutor.closeAll(); - }); -} diff --git a/packages/browser-primitive/package.json b/packages/browser-primitive/package.json index 6b5ca38ec..8bb5ff5ab 100644 --- a/packages/browser-primitive/package.json +++ b/packages/browser-primitive/package.json @@ -11,11 +11,6 @@ "import": "./dist/index.js", "default": "./dist/index.js" }, - "./workflow-step": { - "types": "./dist/workflow-step.d.ts", - "import": "./dist/workflow-step.js", - "default": "./dist/workflow-step.js" - }, "./mcp-server": { "types": "./dist/mcp-server.d.ts", "import": "./dist/mcp-server.js", diff --git a/packages/browser-primitive/src/index.ts b/packages/browser-primitive/src/index.ts index 076627102..793880e77 100644 --- a/packages/browser-primitive/src/index.ts +++ b/packages/browser-primitive/src/index.ts @@ -1,6 +1,5 @@ export { BrowserClient, type BrowserClientOptions } from './browser-client.js'; export * from './types.js'; -export * from './workflow-step.js'; export * from './mcp-server.js'; export * from './actions/navigation.js'; diff --git a/packages/browser-primitive/src/mcp-server.ts b/packages/browser-primitive/src/mcp-server.ts index 604da45cc..ac186cae4 100644 --- a/packages/browser-primitive/src/mcp-server.ts +++ b/packages/browser-primitive/src/mcp-server.ts @@ -10,12 +10,6 @@ import type { BrowserConfig, BrowserSession, } from './types.js'; -import { - BrowserStepExecutor, - type BrowserStepConfig, - type BrowserStepExecutionResult, -} from './workflow-step.js'; - export type JsonRpcId = string | number | null; export interface JsonRpcRequest { @@ -68,11 +62,6 @@ export interface BrowserActionsExecuteParams { config?: BrowserConfig; } -export interface BrowserStepExecuteParams { - step: BrowserStepConfig; - workspaceId?: string; -} - export class BrowserMcpSessionManager { private readonly clients = new Map(); private defaultSessionId?: string; @@ -161,7 +150,6 @@ export class BrowserMcpServer { private readonly serverName: string; private readonly serverVersion: string; private readonly sessions: BrowserMcpSessionManager; - private readonly stepExecutor: BrowserStepExecutor; private rl?: ReadlineInterface; constructor(options: BrowserMcpServerOptions = {}) { @@ -171,7 +159,6 @@ export class BrowserMcpServer { this.serverName = options.serverName ?? 'agent-relay-browser'; this.serverVersion = options.serverVersion ?? '1.0.0'; this.sessions = new BrowserMcpSessionManager(options.defaultConfig); - this.stepExecutor = new BrowserStepExecutor({ config: options.defaultConfig }); } start(): void { @@ -234,7 +221,6 @@ export class BrowserMcpServer { async shutdown(exitCode?: number): Promise { await this.sessions.closeAll(); - await this.stepExecutor.closeAll(); if (exitCode !== undefined) { process.exit(exitCode); } @@ -284,9 +270,6 @@ export class BrowserMcpServer { case 'browser.actions.execute': return this.executeActions(params); - case 'browser.step.execute': - return this.executeStep(params); - default: throw jsonRpcMethodNotFound(method); } @@ -316,10 +299,6 @@ export class BrowserMcpServer { const result = await this.executeActions(args); return toMcpToolResult(result, hasActionErrors(result)); } - case 'browser_step_execute': { - const result = await this.executeStep(args); - return toMcpToolResult(result, !(result as BrowserStepExecutionResult).success); - } default: throw new Error(`Unknown browser tool: ${name}`); } @@ -375,14 +354,6 @@ export class BrowserMcpServer { return client.executeMany(record.actions as BrowserActionRequest[]); } - private async executeStep(params: unknown): Promise { - const record = requireRecord(params, 'browser.step.execute params'); - const step = requireRecord(record.step, 'step') as unknown as BrowserStepConfig; - const workspaceId = typeof record.workspaceId === 'string' ? record.workspaceId : undefined; - - return this.stepExecutor.execute(step, { workspaceId }); - } - private writeResponse(response: JsonRpcResponse): void { this.stdout.write(`${JSON.stringify(response)}\n`); } @@ -472,18 +443,6 @@ export function getBrowserToolDefinitions(): Array> { required: ['actions'], }, }, - { - name: 'browser_step_execute', - description: 'Execute a BrowserStepConfig using the workflow step executor.', - inputSchema: { - type: 'object', - properties: { - workspaceId: { type: 'string' }, - step: { type: 'object' }, - }, - required: ['step'], - }, - }, ]; } diff --git a/packages/browser-primitive/src/workflow-step.ts b/packages/browser-primitive/src/workflow-step.ts deleted file mode 100644 index b4fcf1fd1..000000000 --- a/packages/browser-primitive/src/workflow-step.ts +++ /dev/null @@ -1,602 +0,0 @@ -import type { RunnerStepExecutor, WorkflowStep } from '@agent-relay/sdk/workflows'; - -import { BrowserClient, type BrowserClientOptions } from './browser-client.js'; -import type { - ActionResult, - BrowserActionName, - BrowserActionParamsMap, - BrowserActionRequest, - BrowserConfig, - BrowserSession, -} from './types.js'; - -export type BrowserStepOutputMode = 'last' | 'all' | 'captures' | 'summary' | 'none'; -export type BrowserStepOutputFormat = 'json' | 'text'; - -export type BrowserStepAction = { - [Name in TName]: { - action: Name; - params: BrowserActionParamsMap[Name]; - /** Stable key for captured output. Defaults to the zero-based action index. */ - id?: string; - /** Include this action in output.mode = "captures". Defaults true for extraction actions. */ - capture?: boolean; - /** Output object key when output.mode = "captures". Defaults to id or action_. */ - outputKey?: string; - /** Continue running later actions if this action fails. Defaults false. */ - continueOnError?: boolean; - }; -}[TName]; - -export interface BrowserStepOutputConfig { - /** Which action results become the workflow step output. Defaults to "last". */ - mode?: BrowserStepOutputMode; - /** Emit JSON for structured chaining or text for simple downstream interpolation. Defaults to "json". */ - format?: BrowserStepOutputFormat; - /** Include per-action metadata such as current URL and timing in JSON output. Defaults false. */ - includeMetadata?: boolean; - /** Include final browser session state in JSON output. Defaults false. */ - includeSession?: boolean; - /** Pretty-print JSON output. Defaults false. */ - pretty?: boolean; -} - -export interface BrowserStepConfig { - /** Unique step name within the workflow. */ - name: string; - /** Dependencies in the Relay workflow DAG. */ - dependsOn?: string[]; - /** Browser settings for this step/session. */ - config?: BrowserConfig; - /** Ordered browser actions to execute in one browser session. */ - actions: BrowserStepAction[]; - /** Controls the string captured as {{steps..output}}. */ - output?: BrowserStepOutputConfig; - /** Reuse a named browser session across browser steps. Defaults to the workspace/config session. */ - sessionId?: string; - /** Close the session after this step, even when persistSession is true. */ - closeSession?: boolean; - /** Workflow step timeout in milliseconds. */ - timeoutMs?: number; - /** Number of retry attempts when the workflow runner retries this integration step. */ - retries?: number; -} - -export interface BrowserStepExecutionContext { - workspaceId?: string; - sessionId?: string; - client?: BrowserClient; -} - -export interface BrowserStepActionRecord { - index: number; - id?: string; - action: BrowserActionName; - success: boolean; - output?: TOutput; - error?: string; - metadata?: ActionResult['metadata']; -} - -export interface BrowserStepExecutionResult { - success: boolean; - output: string; - results: BrowserStepActionRecord[]; - session: BrowserSession; - error?: string; -} - -export interface BrowserIntegrationStepResult { - output: string; - success: boolean; -} - -type ResolvedParams = Record; - -const BROWSER_INTEGRATION = 'browser'; -const BROWSER_RUN_ACTION = 'run'; -const EXTRACTION_ACTIONS = new Set([ - 'text', - 'getText', - 'html', - 'getHTML', - 'attribute', - 'getAttribute', - 'screenshot', - 'elementScreenshot', - 'evaluate', -]); - -/** - * Create a Relay integration step that can be used in relay.yaml or passed to - * WorkflowRunner directly. Complex action/config objects are serialized into - * params so the existing workflow template resolver can interpolate them. - */ -export function createBrowserStep(config: BrowserStepConfig): WorkflowStep { - validateBrowserStepConfig(config); - - const params: Record = { - actions: JSON.stringify(config.actions), - }; - - if (config.config !== undefined) { - params.config = JSON.stringify(config.config); - } - if (config.output !== undefined) { - params.output = JSON.stringify(config.output); - } - if (config.sessionId !== undefined) { - params.sessionId = config.sessionId; - } - if (config.closeSession !== undefined) { - params.closeSession = String(config.closeSession); - } - - const step: WorkflowStep = { - name: config.name, - type: 'integration', - integration: BROWSER_INTEGRATION, - action: BROWSER_RUN_ACTION, - params, - }; - - if (config.dependsOn !== undefined) step.dependsOn = config.dependsOn; - if (config.timeoutMs !== undefined) step.timeoutMs = config.timeoutMs; - if (config.retries !== undefined) step.retries = config.retries; - - return step; -} - -export class BrowserStepExecutor implements RunnerStepExecutor { - private readonly sessions = new Map(); - - constructor(private readonly options: BrowserClientOptions = {}) {} - - async executeAgentStep(): Promise { - throw new Error('BrowserStepExecutor only executes browser integration steps.'); - } - - async execute( - config: BrowserStepConfig, - context: BrowserStepExecutionContext = {} - ): Promise { - validateBrowserStepConfig(config); - - const client = context.client ?? this.getOrCreateClient(config, context); - const records: BrowserStepActionRecord[] = []; - let hardFailure: BrowserStepActionRecord | undefined; - - for (let index = 0; index < config.actions.length; index += 1) { - const action = config.actions[index]; - const started = Date.now(); - const result = await client.executeWorkflowAction(action as BrowserActionRequest); - - const record: BrowserStepActionRecord = { - index, - id: action.id, - action: action.action, - success: result.success, - output: result.output, - error: result.error, - metadata: result.metadata, - }; - records.push(record); - - if (!result.success && !action.continueOnError) { - hardFailure = record; - break; - } - - if (!record.metadata) { - record.metadata = { - action: action.action, - sessionId: client.getSession().id, - currentUrl: client.getCurrentUrl(), - executionTime: Date.now() - started, - }; - } - } - - const session = client.getSession(); - const output = formatStepOutput(config, records, session); - const success = hardFailure === undefined; - - if (config.closeSession || config.config?.persistSession === false) { - if (context.client) { - await context.client.close(); - } else { - await this.closeSession(this.resolveSessionKey(config, context)); - } - } - - return { - success, - output, - results: records, - session, - error: hardFailure?.error, - }; - } - - async executeIntegrationStep( - step: WorkflowStep, - resolvedParams: Record, - context: { workspaceId?: string } = {} - ): Promise { - if (step.integration !== BROWSER_INTEGRATION) { - return { - success: false, - output: `BrowserStepExecutor only handles "${BROWSER_INTEGRATION}" integration steps`, - }; - } - - try { - const config = browserStepConfigFromWorkflowStep(step, resolvedParams); - const result = await this.execute(config, context); - - return { - success: result.success, - output: result.success ? result.output : result.output || result.error || 'Browser step failed', - }; - } catch (error) { - return { - success: false, - output: error instanceof Error ? error.message : String(error), - }; - } - } - - async closeSession(sessionId: string): Promise { - const client = this.sessions.get(sessionId); - if (!client) { - return false; - } - - await client.close(); - this.sessions.delete(sessionId); - return true; - } - - async closeAll(): Promise { - await Promise.all([...this.sessions.values()].map((client) => client.close())); - this.sessions.clear(); - } - - listSessions(): BrowserSession[] { - return [...this.sessions.values()].map((client) => client.getSession()); - } - - private getOrCreateClient(config: BrowserStepConfig, context: BrowserStepExecutionContext): BrowserClient { - const key = this.resolveSessionKey(config, context); - const existing = this.sessions.get(key); - - if (existing) { - return existing; - } - - const client = new BrowserClient({ - ...this.options, - config: { - ...this.options.config, - ...config.config, - }, - }); - this.sessions.set(key, client); - return client; - } - - private resolveSessionKey(config: BrowserStepConfig, context: BrowserStepExecutionContext): string { - if (context.sessionId) return context.sessionId; - if (config.sessionId) return config.sessionId; - - const workspace = context.workspaceId ?? 'default'; - return `${workspace}:${stableStringify(config.config ?? {})}`; - } -} - -export function browserStepConfigFromWorkflowStep( - step: WorkflowStep, - resolvedParams: Record -): BrowserStepConfig { - const params = normalizeResolvedParams(resolvedParams); - const config = readJsonParam(params.config ?? params.browserConfig, 'config') ?? undefined; - const output = readJsonParam(params.output, 'output') ?? undefined; - const closeSession = - params.closeSession === undefined ? undefined : Boolean(coerceScalar(params.closeSession)); - const sessionId = params.sessionId === undefined ? undefined : String(params.sessionId); - - const actions = readActions(step, params); - - return { - name: step.name, - dependsOn: step.dependsOn, - config, - actions, - output, - sessionId, - closeSession, - timeoutMs: step.timeoutMs, - retries: step.retries, - }; -} - -function validateBrowserStepConfig(config: BrowserStepConfig): void { - if (!config.name) { - throw new Error('Browser step requires a non-empty name'); - } - if (!Array.isArray(config.actions) || config.actions.length === 0) { - throw new Error(`Browser step "${config.name}" requires at least one action`); - } - - for (const [index, action] of config.actions.entries()) { - if (!action || typeof action !== 'object') { - throw new Error(`Browser step "${config.name}" action ${index} must be an object`); - } - if (!action.action || typeof action.action !== 'string') { - throw new Error(`Browser step "${config.name}" action ${index} requires an action name`); - } - if (action.params === undefined || typeof action.params !== 'object' || action.params === null) { - throw new Error(`Browser step "${config.name}" action ${index} requires params`); - } - } -} - -function readActions(step: WorkflowStep, params: ResolvedParams): BrowserStepAction[] { - const serializedActions = params.actions; - if (serializedActions !== undefined) { - const parsed = readJsonParam(serializedActions, 'actions'); - if (!Array.isArray(parsed)) { - throw new Error('Browser step params.actions must be a JSON array'); - } - return parsed; - } - - if (!step.action || step.action === BROWSER_RUN_ACTION) { - throw new Error(`Browser step "${step.name}" requires params.actions or a browser action`); - } - - const actionParams: Record = {}; - for (const [key, value] of Object.entries(params)) { - if ( - key === 'config' || - key === 'browserConfig' || - key === 'output' || - key === 'sessionId' || - key === 'closeSession' - ) { - continue; - } - actionParams[key] = value; - } - - return [ - { - action: step.action as BrowserActionName, - params: actionParams as BrowserActionParamsMap[BrowserActionName], - } as BrowserStepAction, - ]; -} - -function formatStepOutput( - config: BrowserStepConfig, - records: BrowserStepActionRecord[], - session: BrowserSession -): string { - const outputConfig = config.output ?? {}; - const mode = outputConfig.mode ?? 'last'; - const format = outputConfig.format ?? 'json'; - - if (mode === 'none') { - return ''; - } - - const projection = buildOutputProjection(mode, config.actions, records, session, outputConfig); - - if (format === 'text') { - return projectionToText(projection); - } - - return JSON.stringify(projection, undefined, outputConfig.pretty ? 2 : undefined); -} - -function buildOutputProjection( - mode: BrowserStepOutputMode, - actions: BrowserStepAction[], - records: BrowserStepActionRecord[], - session: BrowserSession, - outputConfig: BrowserStepOutputConfig -): unknown { - if (mode === 'summary') { - const failed = records.find((record) => !record.success); - return withOptionalSession( - { - success: failed === undefined, - actionCount: records.length, - currentUrl: session.currentUrl, - failedAction: failed - ? { - index: failed.index, - id: failed.id, - action: failed.action, - error: failed.error, - } - : undefined, - }, - session, - outputConfig - ); - } - - if (mode === 'all') { - return withOptionalSession( - { - results: records.map((record) => projectRecord(record, outputConfig.includeMetadata ?? false)), - }, - session, - outputConfig - ); - } - - if (mode === 'captures') { - const captures: Record = {}; - - for (const record of records) { - const action = record.action; - const actionConfig = actions[record.index]; - const capture = actionConfig?.capture ?? (EXTRACTION_ACTIONS.has(action) && record.success); - if (!capture) continue; - - const key = actionConfig?.outputKey ?? actionConfig?.id ?? record.id ?? `action_${record.index}`; - captures[key] = projectRecord(record, outputConfig.includeMetadata ?? false); - } - - return withOptionalSession({ captures }, session, outputConfig); - } - - const last = records.at(-1); - return withOptionalSession( - last ? projectRecord(last, outputConfig.includeMetadata ?? false) : null, - session, - outputConfig - ); -} - -function projectRecord(record: BrowserStepActionRecord, includeMetadata: boolean): unknown { - const projected: BrowserStepActionRecord = { - index: record.index, - action: record.action, - success: record.success, - }; - - if (record.id !== undefined) projected.id = record.id; - if (record.output !== undefined) projected.output = record.output; - if (record.error !== undefined) projected.error = record.error; - if (includeMetadata && record.metadata !== undefined) projected.metadata = record.metadata; - - return projected; -} - -function withOptionalSession( - value: unknown, - session: BrowserSession, - outputConfig: BrowserStepOutputConfig -): unknown { - if (!outputConfig.includeSession) { - return value; - } - - return { - value, - session: { - id: session.id, - active: session.active, - currentUrl: session.currentUrl, - startTime: session.startTime, - config: session.config, - }, - }; -} - -function projectionToText(value: unknown): string { - if (typeof value === 'string') return value; - if (value === null || value === undefined) return ''; - - if (isRecord(value)) { - if ('output' in value) { - return projectionToText(value.output); - } - if ('value' in value) { - return projectionToText(value.value); - } - if ('text' in value) { - return projectionToText(value.text); - } - if ('html' in value) { - return projectionToText(value.html); - } - if ('captures' in value) { - return JSON.stringify(value.captures); - } - } - - if (Array.isArray(value)) { - return value.map((entry) => projectionToText(entry)).join('\n'); - } - - return JSON.stringify(value); -} - -function normalizeResolvedParams(params: Record): ResolvedParams { - const normalized: ResolvedParams = {}; - for (const [key, value] of Object.entries(params)) { - normalized[key] = coerceScalar(value); - } - return normalized; -} - -function coerceScalar(value: unknown): unknown { - if (typeof value !== 'string') { - return value; - } - - const trimmed = value.trim(); - if (trimmed === 'true') return true; - if (trimmed === 'false') return false; - if (trimmed === 'null') return null; - if (/^-?(?:0|[1-9]\d*)(?:\.\d+)?$/.test(trimmed)) return Number(trimmed); - if ( - (trimmed.startsWith('{') && trimmed.endsWith('}')) || - (trimmed.startsWith('[') && trimmed.endsWith(']')) || - (trimmed.startsWith('"') && trimmed.endsWith('"')) - ) { - try { - return JSON.parse(trimmed); - } catch { - return value; - } - } - - return value; -} - -function readJsonParam(value: unknown, name: string): T | undefined { - if (value === undefined) return undefined; - if (typeof value !== 'string') return value as T; - - try { - return JSON.parse(value) as T; - } catch (error) { - throw new Error( - `Browser step params.${name} must be valid JSON: ${error instanceof Error ? error.message : String(error)}` - ); - } -} - -function stableStringify(value: unknown): string { - if (!isRecord(value) && !Array.isArray(value)) { - return JSON.stringify(value); - } - - return JSON.stringify(sortJson(value)); -} - -function sortJson(value: unknown): unknown { - if (Array.isArray(value)) { - return value.map(sortJson); - } - - if (!isRecord(value)) { - return value; - } - - return Object.keys(value) - .sort() - .reduce>((acc, key) => { - acc[key] = sortJson(value[key]); - return acc; - }, {}); -} - -function isRecord(value: unknown): value is Record { - return typeof value === 'object' && value !== null && !Array.isArray(value); -} diff --git a/packages/cli/src/cli/bootstrap.test.ts b/packages/cli/src/cli/bootstrap.test.ts index 2e5cfeed0..39bbc2098 100644 --- a/packages/cli/src/cli/bootstrap.test.ts +++ b/packages/cli/src/cli/bootstrap.test.ts @@ -39,7 +39,6 @@ const expectedLeafCommands = [ 'telemetry', 'on', 'off', - 'run', 'connect', 'view', 'drive', @@ -50,7 +49,6 @@ const expectedLeafCommands = [ 'dlq inspect', 'dlq replay', 'dlq purge', - 'workflows list', 'workspaces create', 'tokens issue', 'cloud login', @@ -136,10 +134,8 @@ describe('bootstrap CLI', () => { 'telemetry', 'on', 'off', - 'run', 'dlq', 'workspaces', - 'workflows', 'tokens', ]) ); diff --git a/packages/cli/src/cli/commands/core.ts b/packages/cli/src/cli/commands/core.ts index 64ea6e58e..414ad6aa1 100644 --- a/packages/cli/src/cli/commands/core.ts +++ b/packages/cli/src/cli/commands/core.ts @@ -123,7 +123,6 @@ export interface CoreDependencies { sleep: (ms: number) => Promise; onSignal: (signal: NodeJS.Signals, handler: () => void | Promise) => void; holdOpen: () => Promise; - resolveTemplatesDir: () => string; isPortInUse: (port: number) => Promise; findBrokerApiPort: () => Promise; log: (...args: unknown[]) => void; @@ -354,19 +353,6 @@ function withDefaults(overrides: Partial = {}): CoreDependenci process.on(signal, () => runSignalHandler(handler)); }, holdOpen: () => new Promise(() => undefined), - resolveTemplatesDir: () => { - // Walk up from __dirname to find the sdk package's builtin-templates dir - const dirname = path.dirname(fileURLToPath(import.meta.url)); - let dir = dirname; - for (let i = 0; i < 8; i++) { - const candidate = path.join(dir, 'packages', 'sdk', 'src', 'workflows', 'builtin-templates'); - if (fs.existsSync(candidate)) return candidate; - const distCandidate = path.join(dir, 'packages', 'sdk', 'dist', 'workflows', 'builtin-templates'); - if (fs.existsSync(distCandidate)) return distCandidate; - dir = path.dirname(dir); - } - return path.join(dirname, 'builtin-templates'); - }, log: (...args: unknown[]) => console.log(...args), error: (...args: unknown[]) => console.error(...args), warn: (...args: unknown[]) => console.warn(...args), @@ -532,26 +518,4 @@ export function registerCoreCommands(program: Command, overrides: Partial { - const templatesDir = deps.resolveTemplatesDir(); - if (!deps.fs.existsSync(templatesDir)) { - deps.log('No built-in templates found.'); - return; - } - const files = deps.fs.readdirSync(templatesDir).filter((f) => f.endsWith('.yaml')); - if (files.length === 0) { - deps.log('No built-in templates found.'); - return; - } - deps.log('Built-in workflow templates:'); - for (const file of files) { - deps.log(` ${file.replace(/\.yaml$/, '')}`); - } - }); } diff --git a/packages/cli/src/cli/commands/on/provision.ts b/packages/cli/src/cli/commands/on/provision.ts index 322826950..7017888b9 100644 --- a/packages/cli/src/cli/commands/on/provision.ts +++ b/packages/cli/src/cli/commands/on/provision.ts @@ -7,8 +7,8 @@ import { discoverAgents as discoverAgentsFromCore, hasDotfiles as hasDotfilesFromCore, } from './dotfiles.js'; -import { mintAgentToken as mintToken } from '@agent-relay/sdk/provisioner/token'; -import type { LocalJwksSigningKey } from '@agent-relay/sdk/provisioner/local-jwks'; +import { mintAgentToken as mintToken } from '@agent-relay/cloud'; +import type { LocalJwksSigningKey } from '@agent-relay/cloud'; interface ProvisionConfig { relayauthRoot: string; diff --git a/packages/cli/src/cli/commands/on/start.test.ts b/packages/cli/src/cli/commands/on/start.test.ts index dbf74388e..534864d1c 100644 --- a/packages/cli/src/cli/commands/on/start.test.ts +++ b/packages/cli/src/cli/commands/on/start.test.ts @@ -3,10 +3,14 @@ import { tmpdir } from 'node:os'; import path from 'node:path'; import { describe, expect, it, vi } from 'vitest'; -vi.mock('@agent-relay/cloud', () => ({ - readStoredAuth: vi.fn().mockResolvedValue(null), - ensureAuthenticated: vi.fn().mockResolvedValue({ accessToken: 'test-token' }), -})); +vi.mock('@agent-relay/cloud', async (importOriginal) => { + const actual = await importOriginal(); + return { + ...actual, + readStoredAuth: vi.fn().mockResolvedValue(null), + ensureAuthenticated: vi.fn().mockResolvedValue({ accessToken: 'test-token' }), + }; +}); vi.mock('./dotfiles.js', () => ({ hasDotfiles: () => false, @@ -14,7 +18,7 @@ vi.mock('./dotfiles.js', () => ({ })); import { requestWorkspaceSession } from './start.js'; -import { createLocalJwksKeyPair } from '../../../../../sdk/src/provisioner/local-jwks.js'; +import { createLocalJwksKeyPair } from '@agent-relay/cloud'; function jsonResponse(payload: unknown, status = 200): Response { return new Response(JSON.stringify(payload), { diff --git a/packages/cli/src/cli/commands/on/start.ts b/packages/cli/src/cli/commands/on/start.ts index a942fde22..bea20c035 100644 --- a/packages/cli/src/cli/commands/on/start.ts +++ b/packages/cli/src/cli/commands/on/start.ts @@ -20,8 +20,7 @@ import { compileDotfiles, hasDotfiles } from './dotfiles.js'; import { ensureRelayfileMountBinary } from './relayfile-binary.js'; import { launchOnMount } from '@relayfile/local-mount'; import { mintToken } from './token.js'; -import { seedAclRules } from './workspace.js'; -import { seedWorkspace } from '@agent-relay/sdk/provisioner/seeder'; +import { seedAclRules, seedWorkspace } from './workspace.js'; import { createLocalJwks, exportPrivateKeyPem, @@ -29,7 +28,7 @@ import { RELAYAUTH_JWT_KID_ENV, RELAYAUTH_JWT_PRIVATE_KEY_PEM_ENV, type LocalJwksSigningKey, -} from '@agent-relay/sdk/provisioner/local-jwks'; +} from '@agent-relay/cloud'; import { ensureAuthenticated, readStoredAuth } from '@agent-relay/cloud'; interface OnOptions { diff --git a/packages/cli/src/cli/commands/setup.test.ts b/packages/cli/src/cli/commands/setup.test.ts deleted file mode 100644 index ef27e63d0..000000000 --- a/packages/cli/src/cli/commands/setup.test.ts +++ /dev/null @@ -1,359 +0,0 @@ -import { Command } from 'commander'; -import { describe, expect, it, vi } from 'vitest'; - -import fs from 'node:fs'; -import os from 'node:os'; -import path from 'node:path'; - -import { - ensureLocalSdkWorkflowRuntime, - findLocalSdkWorkspace, - formatWorkflowParseError, - parseTsxStderr, - registerSetupCommands, - type SetupDependencies, -} from './setup.js'; - -class ExitSignal extends Error { - constructor(public readonly code: number) { - super(`exit:${code}`); - } -} - -function createHarness(overrides: Partial = {}) { - const exit = vi.fn((code: number) => { - throw new ExitSignal(code); - }) as unknown as SetupDependencies['exit']; - - const deps: SetupDependencies = { - runInit: vi.fn(async () => undefined), - runTelemetry: vi.fn(async () => undefined), - runYamlWorkflow: vi.fn(async () => ({ status: 'completed' })), - runScriptWorkflow: vi.fn(() => undefined), - log: vi.fn(() => undefined), - error: vi.fn(() => undefined), - exit, - ...overrides, - }; - - const program = new Command(); - registerSetupCommands(program, deps); - - return { program, deps }; -} - -async function runCommand(program: Command, args: string[]): Promise { - try { - await program.parseAsync(args, { from: 'user' }); - return undefined; - } catch (err) { - if (err instanceof ExitSignal) { - return err.code; - } - throw err; - } -} - -describe('local SDK workflow runtime bootstrapping', () => { - it('finds the agent-relay workspace root from a nested directory', () => { - const tempRoot = fs.mkdtempSync(path.join(os.tmpdir(), 'agent-relay-workspace-')); - const nestedDir = path.join(tempRoot, 'workflows', 'nested'); - const sdkDir = path.join(tempRoot, 'packages', 'sdk'); - fs.mkdirSync(nestedDir, { recursive: true }); - fs.mkdirSync(sdkDir, { recursive: true }); - fs.writeFileSync(path.join(tempRoot, 'package.json'), JSON.stringify({ name: 'agent-relay' })); - fs.writeFileSync(path.join(sdkDir, 'package.json'), JSON.stringify({ name: '@agent-relay/sdk' })); - - expect(findLocalSdkWorkspace(nestedDir)).toEqual({ rootDir: tempRoot, sdkDir }); - - fs.rmSync(tempRoot, { recursive: true, force: true }); - }); - - it('builds the local sdk when the workflows dist entry is missing', () => { - const tempRoot = fs.mkdtempSync(path.join(os.tmpdir(), 'agent-relay-build-')); - const nestedDir = path.join(tempRoot, 'workflows'); - const sdkDir = path.join(tempRoot, 'packages', 'sdk'); - const workflowsDistDir = path.join(sdkDir, 'dist', 'workflows'); - fs.mkdirSync(nestedDir, { recursive: true }); - fs.mkdirSync(sdkDir, { recursive: true }); - fs.writeFileSync(path.join(tempRoot, 'package.json'), JSON.stringify({ name: 'agent-relay' })); - fs.writeFileSync(path.join(sdkDir, 'package.json'), JSON.stringify({ name: '@agent-relay/sdk' })); - - const execRunner = vi.fn(() => { - fs.mkdirSync(workflowsDistDir, { recursive: true }); - fs.writeFileSync(path.join(workflowsDistDir, 'index.js'), 'export {}\n'); - return Buffer.from(''); - }); - - ensureLocalSdkWorkflowRuntime(nestedDir, execRunner as never); - - expect(execRunner).toHaveBeenCalledTimes(6); - expect(execRunner.mock.calls.map((call) => call[1])).toEqual([ - ['run', 'build:config'], - ['--prefix', 'packages/workflow-types', 'run', 'build'], - ['--prefix', 'packages/github-primitive', 'run', 'build'], - ['--prefix', 'packages/slack-primitive', 'run', 'build'], - ['--prefix', 'packages/cloud', 'run', 'build'], - ['run', 'build:sdk'], - ]); - expect(execRunner).toHaveBeenNthCalledWith( - 1, - 'npm', - ['run', 'build:config'], - expect.objectContaining({ cwd: tempRoot, stdio: 'inherit' }) - ); - expect(execRunner).toHaveBeenNthCalledWith( - 6, - 'npm', - ['run', 'build:sdk'], - expect.objectContaining({ cwd: tempRoot, stdio: 'inherit' }) - ); - fs.rmSync(tempRoot, { recursive: true, force: true }); - }); -}); - -describe('registerSetupCommands', () => { - it('registers setup commands on the program', () => { - const { program } = createHarness(); - const commandNames = program.commands.map((cmd) => cmd.name()); - - expect(commandNames).toEqual(expect.arrayContaining(['init', 'setup', 'telemetry', 'run'])); - }); - - it('routes both init and setup alias to runInit', async () => { - const { program, deps } = createHarness(); - - await runCommand(program, ['init', '--yes', '--skip-broker']); - await runCommand(program, ['setup', '--yes']); - - expect((deps.runInit as unknown as { mock: { calls: unknown[][] } }).mock.calls[0][0]).toMatchObject({ - yes: true, - skipBroker: true, - }); - expect((deps.runInit as unknown as { mock: { calls: unknown[][] } }).mock.calls[1][0]).toMatchObject({ - yes: true, - }); - }); - - it('routes telemetry action', async () => { - const { program, deps } = createHarness(); - - const exitCode = await runCommand(program, ['telemetry', 'enable']); - - expect(exitCode).toBeUndefined(); - expect(deps.runTelemetry).toHaveBeenCalledWith('enable'); - }); - - it('routes run command based on file extension', async () => { - const { program, deps } = createHarness(); - - await runCommand(program, ['run', 'workflow.yaml', '--workflow', 'main']); - await runCommand(program, [ - 'run', - 'workflow.py', - '--resume', - 'run-123', - '--start-from', - 'step-a', - '--previous-run-id', - 'run-122', - ]); - - expect(deps.runYamlWorkflow).toHaveBeenCalledWith('workflow.yaml', { - workflow: 'main', - onEvent: expect.any(Function), - }); - expect(deps.runScriptWorkflow).toHaveBeenCalledWith('workflow.py', { - dryRun: undefined, - resume: 'run-123', - startFrom: 'step-a', - previousRunId: 'run-122', - }); - }); - - it('prints resume hints when a script workflow fails', async () => { - const { program, deps } = createHarness({ - runScriptWorkflow: vi.fn(() => { - throw new Error('script failed'); - }), - }); - - const exitCode = await runCommand(program, ['run', 'workflow.ts']); - - expect(exitCode).toBe(1); - expect(deps.error).toHaveBeenCalledWith('Error: script failed'); - expect(deps.error).toHaveBeenCalledWith( - expect.stringContaining('agent-relay run workflow.ts --resume ') - ); - }); - - it('prints a copy-pasteable resume command when the script error includes a run id', async () => { - const { program, deps } = createHarness({ - runScriptWorkflow: vi.fn(() => { - throw new Error('script failed\nRun ID: run-456'); - }), - }); - - const exitCode = await runCommand(program, ['run', 'workflow.ts']); - - expect(exitCode).toBe(1); - expect(deps.error).toHaveBeenCalledWith( - 'Run ID: run-456 — resume with: agent-relay run workflow.ts --resume run-456' - ); - }); - - it('exits with code 1 for unsupported run file extension', async () => { - const { program } = createHarness(); - - const exitCode = await runCommand(program, ['run', 'workflow.txt']); - - expect(exitCode).toBe(1); - }); -}); - -describe('parseTsxStderr', () => { - it('returns null for empty stderr', () => { - expect(parseTsxStderr('')).toBeNull(); - }); - - it('returns null for runtime errors with no parse signature', () => { - const stderr = [ - 'node:internal/modules/run_main:123', - ' triggerUncaughtException(', - ' ^', - 'Error: something blew up at runtime', - ' at Object. (/path/to/workflow.ts:5:10)', - ].join('\n'); - expect(parseTsxStderr(stderr)).toBeNull(); - }); - - it('parses the inline "file:line:col: ERROR: message" format', () => { - const stderr = [ - 'node:internal/modules/run_main:123', - ' triggerUncaughtException(', - ' ^', - 'Error [TransformError]: Transform failed with 1 error:', - '/path/to/workflow.ts:1073:4: ERROR: Expected "}" but found "npm"', - ' at failureErrorWithLog (... lib/main.js:1748:15)', - ].join('\n'); - - expect(parseTsxStderr(stderr)).toEqual({ - file: '/path/to/workflow.ts', - line: 1073, - column: 4, - message: 'Expected "}" but found "npm"', - }); - }); - - it('parses the pretty-printed ✘ [ERROR] multi-line format', () => { - const stderr = [ - '✘ [ERROR] Unterminated template literal', - '', - ' /path/to/workflow.ts:42:10:', - ' 42 │ command: `echo hello', - ' ╵ ^', - ].join('\n'); - - expect(parseTsxStderr(stderr)).toEqual({ - file: '/path/to/workflow.ts', - line: 42, - column: 10, - message: 'Unterminated template literal', - }); - }); - - it('strips ANSI color codes before matching', () => { - const stderr = [ - '\x1b[31mError [TransformError]: Transform failed with 1 error:\x1b[0m', - '\x1b[1m/path/to/workflow.ts:10:5:\x1b[0m \x1b[31mERROR:\x1b[0m Expected "}" but found "foo"', - ].join('\n'); - - const parsed = parseTsxStderr(stderr); - expect(parsed).not.toBeNull(); - expect(parsed?.line).toBe(10); - expect(parsed?.column).toBe(5); - expect(parsed?.message).toBe('Expected "}" but found "foo"'); - }); - - it('falls back to a loose match on "Transform failed" without inline ERROR:', () => { - const stderr = [ - 'Error [TransformError]: Transform failed with 1 error:', - ' /path/to/workflow.ts:99:7', - ' at failureErrorWithLog', - ].join('\n'); - - const parsed = parseTsxStderr(stderr); - expect(parsed).not.toBeNull(); - expect(parsed?.file).toBe('/path/to/workflow.ts'); - expect(parsed?.line).toBe(99); - expect(parsed?.column).toBe(7); - }); -}); - -describe('formatWorkflowParseError', () => { - it('formats a basic parse error without hints when the message is generic', () => { - const err = formatWorkflowParseError({ - file: '/tmp/wf.ts', - line: 10, - column: 5, - message: 'Some unrelated TypeScript error', - }); - - expect(err.message).toContain('Workflow file failed to parse: /tmp/wf.ts:10:5'); - expect(err.message).toContain('Some unrelated TypeScript error'); - expect(err.message).not.toContain('Hint:'); - expect((err as Error & { code?: string }).code).toBe('WORKFLOW_PARSE_ERROR'); - }); - - it('adds a template-literal hint for Expected "}" but found errors', () => { - const err = formatWorkflowParseError({ - file: '/tmp/wf.ts', - line: 1073, - column: 4, - message: 'Expected "}" but found "npm"', - }); - - expect(err.message).toMatch(/Hint:/); - expect(err.message).toMatch(/template literal/i); - expect(err.message).toMatch(/single quotes/); - }); - - it('adds a template-literal hint for Unterminated template literal errors', () => { - const err = formatWorkflowParseError({ - file: '/tmp/wf.ts', - line: 42, - column: 10, - message: 'Unterminated template literal', - }); - - expect(err.message).toMatch(/Hint:/); - expect(err.message).toMatch(/backticks/i); - }); - - it('adds a dollar-sign hint for Unexpected "$" errors', () => { - const err = formatWorkflowParseError({ - file: '/tmp/wf.ts', - line: 1, - column: 0, - message: 'Unexpected "$"', - }); - - expect(err.message).toMatch(/Hint:/); - expect(err.message).toMatch(/interpolation/); - }); - - it('includes a line-text pointer when lineText is provided', () => { - const err = formatWorkflowParseError({ - file: '/tmp/wf.ts', - line: 10, - column: 12, - message: 'Expected "}" but found "x"', - lineText: ' command: `echo foo`', - }); - - expect(err.message).toContain('| '); - expect(err.message).toContain('echo foo'); - // The ^ pointer should be 12 spaces offset into the indented line - expect(err.message).toMatch(/\|\s+\^/); - }); -}); diff --git a/packages/cli/src/cli/commands/setup.ts b/packages/cli/src/cli/commands/setup.ts index 825e2fd9d..425c6a238 100644 --- a/packages/cli/src/cli/commands/setup.ts +++ b/packages/cli/src/cli/commands/setup.ts @@ -1,74 +1,20 @@ -import path from 'node:path'; import readline from 'node:readline'; import { spawn as spawnProcess } from 'node:child_process'; import { Command } from 'commander'; import { getProjectPaths } from '@agent-relay/config'; import { readBrokerConnection } from '../lib/broker-lifecycle.js'; -import { - enableTelemetry, - disableTelemetry, - getStatus, - isDisabledByEnv, - track, - type WorkflowFileType as TelemetryWorkflowFileType, -} from '@agent-relay/telemetry'; -import { - runWorkflow, - runScriptWorkflow, - ensureLocalSdkWorkflowRuntime, - findLocalSdkWorkspace, - parseTsxStderr, - formatWorkflowParseError, - type ParsedWorkflowError, -} from '@agent-relay/sdk/workflows'; -import type { WorkflowEvent } from '@agent-relay/sdk/workflows'; -import { CliExit, defaultExit } from '../lib/exit.js'; -import { errorClassName } from '../lib/telemetry-helpers.js'; - -export { - ensureLocalSdkWorkflowRuntime, - findLocalSdkWorkspace, - parseTsxStderr, - formatWorkflowParseError, - type ParsedWorkflowError, -}; +import { enableTelemetry, disableTelemetry, getStatus, isDisabledByEnv, track } from '@agent-relay/telemetry'; +import { defaultExit } from '../lib/exit.js'; type ExitFn = (code: number) => never; type RunInitOptions = { yes?: boolean; skipBroker?: boolean; }; -type RunWorkflowOptions = { - workflow?: string; - dryRun?: boolean; - resume?: string; - startFrom?: string; - previousRunId?: string; -}; -type WorkflowRunResult = { - id?: string; - status: string; - error?: string; -}; export interface SetupDependencies { runInit: (options: RunInitOptions) => Promise; runTelemetry: (action?: string) => Promise | void; - runYamlWorkflow: ( - filePath: string, - options: { - workflow?: string; - dryRun?: boolean; - resume?: string; - startFrom?: string; - previousRunId?: string; - onEvent: (event: WorkflowEvent) => void; - } - ) => Promise; - runScriptWorkflow: ( - filePath: string, - options?: { dryRun?: boolean; resume?: string; startFrom?: string; previousRunId?: string } - ) => void | Promise; log: (...args: unknown[]) => void; error: (...args: unknown[]) => void; exit: ExitFn; @@ -86,41 +32,12 @@ function withDefaults(overrides: Partial = {}): SetupDependen return { runInit: overrides.runInit ?? ((options: RunInitOptions) => runInitDefault(options, io)), runTelemetry: overrides.runTelemetry ?? ((action?: string) => runTelemetryDefault(action, io)), - runYamlWorkflow: runYamlWorkflowDefault, - runScriptWorkflow, log, error, exit, ...overrides, }; } -function logWorkflowEvent(event: WorkflowEvent, log: (...args: unknown[]) => void): void { - if (event.type === 'broker:event') return; - const prefix = event.type.startsWith('run:') ? '[run]' : '[step]'; - const name = 'stepName' in event ? `${event.stepName} ` : ''; - const status = event.type.split(':')[1]; - const detail = 'error' in event ? `: ${event.error}` : ''; - log(`${prefix} ${name}${status}${detail}`); -} -async function runYamlWorkflowDefault( - filePath: string, - options: { - workflow?: string; - dryRun?: boolean; - resume?: string; - startFrom?: string; - previousRunId?: string; - onEvent: (event: WorkflowEvent) => void; - } -): Promise { - const result = await runWorkflow(filePath, options); - // DryRunReport has 'valid' instead of 'status' - if ('valid' in result) { - const report = result as unknown as { valid: boolean; errors: string[] }; - return { status: report.valid ? 'dry-run' : 'failed', error: report.errors.join('; ') || undefined }; - } - return result; -} async function runInitDefault(options: RunInitOptions, io: SetupIo): Promise { const prompt = async (question: string, defaultYes = true): Promise => { if (options.yes) return true; @@ -304,136 +221,4 @@ export function registerSetupCommands(program: Command, overrides: Partial { await deps.runTelemetry(action); }); - program - .command('run') - .description('Run a workflow file (YAML, TypeScript, or Python)') - .argument('', 'Path to workflow file (.yaml, .yml, .ts, or .py)') - .option('-w, --workflow ', 'Run a specific workflow by name (default: first, YAML only)') - .option('--dry-run', 'Validate workflow and show execution plan without running') - .option('--resume ', 'Resume a previously failed workflow run from where it left off') - .option('--start-from ', 'Start from a specific step and skip predecessor steps') - .option('--previous-run-id ', 'Use cached outputs from a previous run when starting from a step') - .action(async (filePath: string, options: RunWorkflowOptions) => { - const ext = path.extname(filePath).toLowerCase(); - const isScriptWorkflow = ext === '.ts' || ext === '.tsx' || ext === '.py'; - const fileType: TelemetryWorkflowFileType = - ext === '.yaml' || ext === '.yml' - ? 'yaml' - : ext === '.ts' || ext === '.tsx' - ? 'ts' - : ext === '.py' - ? 'py' - : 'unknown'; - const started = Date.now(); - let tracked = false; - const emit = (result: { success: boolean; errorClass?: string }): void => { - if (tracked) return; - tracked = true; - track('workflow_run', { - file_type: fileType, - is_dry_run: Boolean(options.dryRun), - is_resume: Boolean(options.resume), - is_start_from: Boolean(options.startFrom), - is_script: isScriptWorkflow, - success: result.success, - duration_ms: Date.now() - started, - ...(result.errorClass ? { error_class: result.errorClass } : {}), - }); - }; - - try { - if (ext === '.yaml' || ext === '.yml') { - if (options.resume) { - deps.log(`Resuming workflow run ${options.resume} from ${filePath}...`); - const result = await deps.runYamlWorkflow(filePath, { - workflow: options.workflow, - resume: options.resume, - onEvent: (event: WorkflowEvent) => logWorkflowEvent(event, deps.log), - }); - if (result.status === 'completed') { - deps.log('\nWorkflow resumed and completed successfully.'); - emit({ success: true }); - } else { - deps.error(`\nWorkflow ${result.status}${result.error ? `: ${result.error}` : ''}`); - deps.error( - `Run ID: ${result.id} — resume with: agent-relay run ${filePath} --resume ${result.id}` - ); - emit({ success: false, errorClass: 'WorkflowNotCompleted' }); - deps.exit(1); - } - return; - } - if (options.dryRun) { - deps.log(`Dry run: validating workflow from ${filePath}...`); - } else { - deps.log(`Running workflow from ${filePath}...`); - } - const result = await deps.runYamlWorkflow(filePath, { - workflow: options.workflow, - dryRun: options.dryRun, - resume: options.resume, - startFrom: options.startFrom, - previousRunId: options.previousRunId, - onEvent: (event: WorkflowEvent) => logWorkflowEvent(event, deps.log), - }); - if (options.dryRun) { - // Report was already printed by runWorkflow - emit({ success: true }); - return; - } - if (result.status === 'completed') { - deps.log('\nWorkflow completed successfully.'); - emit({ success: true }); - } else { - deps.error(`\nWorkflow ${result.status}${result.error ? `: ${result.error}` : ''}`); - deps.error( - `Run ID: ${result.id} — resume with: agent-relay run ${filePath} --resume ${result.id}` - ); - emit({ success: false, errorClass: 'WorkflowNotCompleted' }); - deps.exit(1); - } - return; - } - if (ext === '.ts' || ext === '.tsx' || ext === '.py') { - deps.log(`Running workflow script ${filePath}...`); - await deps.runScriptWorkflow(filePath, { - dryRun: options.dryRun, - resume: options.resume, - startFrom: options.startFrom, - previousRunId: options.previousRunId, - }); - emit({ success: true }); - return; - } - deps.error(`Unsupported file type: ${ext}. Use .yaml, .yml, .ts, or .py`); - emit({ success: false, errorClass: 'UnsupportedFileType' }); - deps.exit(1); - } catch (err: any) { - // `deps.exit(1)` above throws `CliExit` in production so runCli can - // flush telemetry — let that bubble straight through instead of - // treating it as an unexpected error (which would print the internal - // "cli-exit:1" message and clobber `error_class` with 'CliExit'). - if (err instanceof CliExit) throw err; - emit({ success: false, errorClass: errorClassName(err) }); - deps.error(`Error: ${err.message}`); - if (isScriptWorkflow) { - const runIdMatch = typeof err?.message === 'string' ? err.message.match(/Run ID:\s*(\S+)/) : null; - if (runIdMatch?.[1]) { - deps.error( - `Run ID: ${runIdMatch[1]} — resume with: agent-relay run ${filePath} --resume ${runIdMatch[1]}` - ); - } - deps.error( - `Script workflows can be retried with: -` + - ` agent-relay run ${filePath} --resume -` + - `or start from a specific step with: -` + - ` agent-relay run ${filePath} --start-from [--previous-run-id ]` - ); - } - deps.exit(1); - } - }); } diff --git a/packages/cloud/package.json b/packages/cloud/package.json index 60fb8a130..41adc986c 100644 --- a/packages/cloud/package.json +++ b/packages/cloud/package.json @@ -10,6 +10,26 @@ "types": "./dist/index.d.ts", "import": "./dist/index.js" }, + "./permissions": { + "types": "./dist/permissions.d.ts", + "import": "./dist/permissions.js" + }, + "./token": { + "types": "./dist/token.d.ts", + "import": "./dist/token.js" + }, + "./local-jwks": { + "types": "./dist/local-jwks.d.ts", + "import": "./dist/local-jwks.js" + }, + "./compiler": { + "types": "./dist/compiler.d.ts", + "import": "./dist/compiler.js" + }, + "./audit": { + "types": "./dist/audit.d.ts", + "import": "./dist/audit.js" + }, "./package.json": "./package.json" }, "files": [ diff --git a/packages/sdk/src/provisioner/__tests__/compiler.test.ts b/packages/cloud/src/__tests__/compiler.test.ts similarity index 99% rename from packages/sdk/src/provisioner/__tests__/compiler.test.ts rename to packages/cloud/src/__tests__/compiler.test.ts index fadac2628..b2a2ca8a7 100644 --- a/packages/sdk/src/provisioner/__tests__/compiler.test.ts +++ b/packages/cloud/src/__tests__/compiler.test.ts @@ -2,7 +2,7 @@ import assert from 'node:assert/strict'; import { mkdtemp, mkdir, rm, writeFile } from 'node:fs/promises'; import { tmpdir } from 'node:os'; import path from 'node:path'; -import test from 'node:test'; +import { test } from 'vitest'; import { compileAgentScopes, globToScopes } from '../compiler.js'; diff --git a/packages/sdk/src/provisioner/__tests__/presets.test.ts b/packages/cloud/src/__tests__/presets.test.ts similarity index 100% rename from packages/sdk/src/provisioner/__tests__/presets.test.ts rename to packages/cloud/src/__tests__/presets.test.ts diff --git a/packages/sdk/src/provisioner/__tests__/token-factory.test.ts b/packages/cloud/src/__tests__/token-factory.test.ts similarity index 99% rename from packages/sdk/src/provisioner/__tests__/token-factory.test.ts rename to packages/cloud/src/__tests__/token-factory.test.ts index 082f9d47c..62a7fac91 100644 --- a/packages/sdk/src/provisioner/__tests__/token-factory.test.ts +++ b/packages/cloud/src/__tests__/token-factory.test.ts @@ -1,6 +1,6 @@ import assert from 'node:assert/strict'; import { createPublicKey, createVerify } from 'node:crypto'; -import test from 'node:test'; +import { test } from 'vitest'; import { createLocalJwksKeyPair } from '../local-jwks.js'; import { diff --git a/packages/sdk/src/provisioner/__tests__/token.test.ts b/packages/cloud/src/__tests__/token.test.ts similarity index 98% rename from packages/sdk/src/provisioner/__tests__/token.test.ts rename to packages/cloud/src/__tests__/token.test.ts index f79028303..141134846 100644 --- a/packages/sdk/src/provisioner/__tests__/token.test.ts +++ b/packages/cloud/src/__tests__/token.test.ts @@ -1,5 +1,5 @@ import assert from 'node:assert/strict'; -import test from 'node:test'; +import { test } from 'vitest'; import { createLocalJwksKeyPair } from '../local-jwks.js'; import { DEFAULT_WORKFLOW_TOKEN_TTL_SECONDS, mintAgentToken, type TokenClaims } from '../token.js'; diff --git a/packages/sdk/src/provisioner/audit.ts b/packages/cloud/src/audit.ts similarity index 100% rename from packages/sdk/src/provisioner/audit.ts rename to packages/cloud/src/audit.ts diff --git a/packages/sdk/src/provisioner/compiler.ts b/packages/cloud/src/compiler.ts similarity index 98% rename from packages/sdk/src/provisioner/compiler.ts rename to packages/cloud/src/compiler.ts index 33e7d613f..7411d242f 100644 --- a/packages/sdk/src/provisioner/compiler.ts +++ b/packages/cloud/src/compiler.ts @@ -1,9 +1,14 @@ import ignore, { type Ignore } from 'ignore'; import { existsSync, readdirSync, readFileSync } from 'node:fs'; import path from 'node:path'; -import type { AgentPreset } from '../workflows/types.js'; - -import type { AgentPermissions, CompiledAgentPermissions, CompileInput, PermissionSource } from './types.js'; +import type { AgentPreset } from './permissions.js'; + +import type { + AgentPermissions, + CompiledAgentPermissions, + CompileInput, + PermissionSource, +} from './provisioning-types.js'; type FileAction = 'read' | 'write'; diff --git a/packages/cloud/src/index.ts b/packages/cloud/src/index.ts index 3a6da0b6d..ed125b9b9 100644 --- a/packages/cloud/src/index.ts +++ b/packages/cloud/src/index.ts @@ -65,6 +65,42 @@ export { type AuthSshRuntime, } from './lib/ssh-runtime.js'; +// Cross-product identity, permissions, tokens, and audit primitives. +export * from './permissions.js'; +export * from './provisioning-types.js'; +export { + defaultPermissionsForPreset, + expandPreset, + globsToScopes, + compileAgentPermissions, + mergeAcl, + resolveAgentPermissions, + compileAgentScopes, + mergePermissionSources, + expandAccessPreset, + globToScopes, +} from './compiler.js'; +export { + DEFAULT_WORKFLOW_TOKEN_TTL_SECONDS, + DEFAULT_ADMIN_AGENT_NAME, + DEFAULT_ADMIN_SCOPES, + mintAgentToken, + type TokenClaims, +} from './token.js'; +export { + createLocalJwks, + createLocalJwksKeyPair, + exportPrivateKeyPem, + importPrivateKeyPem, + RELAYAUTH_JWKS_URL_ENV, + RELAYAUTH_JWT_KID_ENV, + RELAYAUTH_JWT_PRIVATE_KEY_PEM_ENV, + type LocalJwks, + type LocalJwksKeyPair, + type LocalJwksSigningKey, +} from './local-jwks.js'; +export { PermissionAuditLog, getDefaultPermissionAuditPath } from './audit.js'; + export { type StoredAuth, type WhoAmIResponse, diff --git a/packages/sdk/src/provisioner/local-jwks.ts b/packages/cloud/src/local-jwks.ts similarity index 100% rename from packages/sdk/src/provisioner/local-jwks.ts rename to packages/cloud/src/local-jwks.ts diff --git a/packages/cloud/src/permissions.ts b/packages/cloud/src/permissions.ts new file mode 100644 index 000000000..dd412bbd6 --- /dev/null +++ b/packages/cloud/src/permissions.ts @@ -0,0 +1,154 @@ +/** + * General agent identity, CLI, and permission types. + * + * These are SDK-level primitives — the CLI registry, provisioner, and any + * other broker-adjacent code consume them directly. Workflow-shaped types + * (RelayYamlConfig, WorkflowStep, SwarmPattern, etc.) live in @relayflows/core. + */ + +// ── CLI identity ──────────────────────────────────────────────────────────── + +export type AgentCli = + | 'claude' + | 'codex' + | 'gemini' + | 'aider' + | 'goose' + | 'opencode' + | 'droid' + | 'cursor' + | 'cursor-agent' + | 'agent' + | 'api'; + +// ── Agent shape primitives ────────────────────────────────────────────────── + +export type AgentPreset = 'lead' | 'worker' | 'reviewer' | 'analyst'; + +/** Resource and behavioral constraints for an agent. */ +export interface AgentConstraints { + maxTokens?: number; + timeoutMs?: number; + retries?: number; + model?: string; + /** Silence duration in seconds before the agent is considered idle (0 = disabled, default: 30). */ + idleThresholdSecs?: number; +} + +/** Optional credential settings for an agent. */ +export interface AgentCredentialConfig { + /** Opt the agent into credential proxy mode. */ + proxy?: boolean; + /** Override the provider used for proxy credential resolution. */ + provider?: string; +} + +// ── Permission types ──────────────────────────────────────────────────────── + +/** + * Access preset for role-based permission shortcuts. + * + * readonly → read all non-ignored files, write nothing + * readwrite → read and write all non-ignored files (default behavior) + * restricted → read/write only explicitly listed paths + * full → read and write everything, including normally-ignored files + */ +export type AccessPreset = 'readonly' | 'readwrite' | 'restricted' | 'full'; + +/** Fine-grained network permission with allowlist/denylist. */ +export interface NetworkPermissions { + /** Host:port pairs the agent may connect to (e.g. ['registry.npmjs.org:443']). */ + allow?: string[]; + /** Host:port patterns to block (e.g. ['*'] to deny all except allowed). */ + deny?: string[]; +} + +/** Network permission: boolean to allow/deny all, or object for fine-grained control. */ +export type NetworkPermission = boolean | NetworkPermissions; + +/** Glob-based file permission scopes for an agent. */ +export interface FilePermissions { + /** Glob patterns the agent may read (e.g. ['src/**', 'docs/**']). */ + read?: string[]; + /** Glob patterns the agent may write (e.g. ['src/tests/**']). */ + write?: string[]; + /** Glob patterns the agent must never access (e.g. ['.env', 'secrets/**']). + * Deny rules take precedence over read/write grants. */ + deny?: string[]; +} + +/** Reusable named permission profile shared by one or more agents. */ +export interface PermissionProfileDefinition { + description?: string; + why?: string; + access?: AccessPreset; + inherit?: boolean; + files?: FilePermissions; + scopes?: string[]; + network?: NetworkPermission; + exec?: string[]; +} + +/** + * Permission configuration for an agent. + * + * All fields are optional — omitting `permissions` entirely preserves the + * default behavior (inherit dotfiles, readwrite access). + * + * Resolution order (later overrides earlier): + * 1. Dotfile patterns (.agentignore / .agentreadonly) when `inherit` is true + * 2. `access` preset expands into base file rules + * 3. Explicit `files` globs merge on top + * 4. `deny` patterns always win (applied last) + * 5. `scopes` are appended verbatim to the token + */ +export interface AgentPermissions { + description?: string; + profile?: string; + why?: string; + access?: AccessPreset; + inherit?: boolean; + files?: FilePermissions; + scopes?: string[]; + network?: NetworkPermission; + exec?: string[]; +} + +// ── Compiled permission output ────────────────────────────────────────────── + +/** Identifies where a permission rule originated. */ +export interface PermissionSource { + type: 'dotfile' | 'preset' | 'yaml' | 'scope'; + label: string; + ruleCount: number; +} + +/** + * The result of compiling an agent's permissions. Used to: + * 1. Mint the agent's relayauth token (scopes) + * 2. Configure the relayfile mount (readonlyPaths, readwritePaths, deniedPaths) + * 3. Enforce runtime restrictions (network, exec allowlist) + */ +export interface CompiledAgentPermissions { + agentName: string; + workspace: string; + effectiveAccess: AccessPreset; + inherited: boolean; + sources: PermissionSource[]; + readonlyPatterns: string[]; + readwritePatterns: string[]; + deniedPatterns: string[]; + readonlyPaths: string[]; + readwritePaths: string[]; + deniedPaths: string[]; + scopes: string[]; + network?: NetworkPermission; + exec?: string[]; + acl: Record; + summary: { + readonly: number; + readwrite: number; + denied: number; + customScopes: number; + }; +} diff --git a/packages/cloud/src/provisioning-types.ts b/packages/cloud/src/provisioning-types.ts new file mode 100644 index 000000000..03f221de3 --- /dev/null +++ b/packages/cloud/src/provisioning-types.ts @@ -0,0 +1,39 @@ +import type { + AccessPreset, + AgentPermissions, + CompiledAgentPermissions, + FilePermissions, + PermissionSource, +} from './permissions.js'; + +// ── Shared helper types ───────────────────────────────────────────────────── + +/** Aggregate counts for compiled permissions across provisioned agents. */ +export interface ProvisionSummary { + readonly: number; + readwrite: number; + denied: number; + customScopes: number; +} + +/** Convenience shape for a single agent's compiled scopes. */ +export interface CompiledAgentScopes { + agentName: string; + workspace: string; + scopes: string[]; + acl: Record; + summary: ProvisionSummary; +} + +// ── Compiler ──────────────────────────────────────────────────────────────── + +/** Input to the permission compiler for a single agent. */ +export interface CompileInput { + agentName: string; + workspace: string; + projectDir: string; + permissions: AgentPermissions; +} + +// Re-export upstream types for convenience. +export type { AccessPreset, AgentPermissions, CompiledAgentPermissions, FilePermissions, PermissionSource }; diff --git a/packages/sdk/src/provisioner/token.ts b/packages/cloud/src/token.ts similarity index 100% rename from packages/sdk/src/provisioner/token.ts rename to packages/cloud/src/token.ts diff --git a/packages/credential-proxy/README.md b/packages/credential-proxy/README.md index f54596e7f..18a90bb67 100644 --- a/packages/credential-proxy/README.md +++ b/packages/credential-proxy/README.md @@ -58,7 +58,7 @@ serve({ fetch: app.fetch, port: Number(process.env.PORT ?? 3001) }); ``` For the SDK-side wiring that lets workflow agents use the proxy transparently, -see [`@agent-relay/sdk/workflows`'s proxy-env +see [`@relayflows/core`'s proxy-env module](../sdk/src/workflows/proxy-env.ts) and the `credentialProxy` field on `SwarmConfig`. diff --git a/packages/github-primitive/DESIGN.md b/packages/github-primitive/DESIGN.md index 80fb4a7f5..16eadd775 100644 --- a/packages/github-primitive/DESIGN.md +++ b/packages/github-primitive/DESIGN.md @@ -18,7 +18,6 @@ packages/github-primitive/ │ ├── adapter.ts # Runtime detection, factory, base adapter │ ├── local-runtime.ts # Local gh CLI implementation │ ├── cloud-runtime.ts # Cloud Nango and relay-cloud implementation -│ ├── workflow-step.ts # Workflow step executor │ ├── actions/ # GitHub action implementations │ │ ├── branches.ts # listBranches, createBranch operations │ │ ├── commits.ts # listCommits, createCommit operations diff --git a/packages/github-primitive/README.md b/packages/github-primitive/README.md index 383a0fcba..0efc7a9ce 100644 --- a/packages/github-primitive/README.md +++ b/packages/github-primitive/README.md @@ -35,28 +35,6 @@ The client and workflow step support: - Branches and commits: `listBranches`, `createBranch`, `listCommits`, `createCommit` - Identity: `getUser`, `listOrganizations` -## Workflow Step - -```ts -import { createGitHubStep } from '@agent-relay/github-primitive/workflow-step'; - -createGitHubStep({ - name: 'read-readme', - action: 'readFile', - repo: 'AgentWorkforce/relay', - params: { - path: 'README.md', - }, - output: { - mode: 'data', - format: 'text', - }, -}); -``` - -See `examples/github-step.ts` for a workflow runner example and -`examples/github-client.ts` for a standalone client example. - ## End-to-end PR workflow `examples/end-to-end-pr-workflow.ts` walks the full PR lifecycle against diff --git a/packages/github-primitive/examples/end-to-end-pr-workflow.ts b/packages/github-primitive/examples/end-to-end-pr-workflow.ts deleted file mode 100644 index 59b998c3d..000000000 --- a/packages/github-primitive/examples/end-to-end-pr-workflow.ts +++ /dev/null @@ -1,286 +0,0 @@ -/** - * End-to-end pull-request workflow using the GitHub primitive. - * - * Demonstrates the full PR lifecycle from a single workflow definition: - * 1. Detect + log which runtime will be used (local gh vs Nango vs - * relay-cloud proxy). - * 2. createBranch — branch off the default branch for the change. - * 3. createFile — write a timestamped marker file on the new branch. - * 4. createPR — open the pull request. This is the core interface the - * cloud workflows need to swap to. - * 5. getPR — round-trip verify. - * 6. updatePR — add a description edit (shows mutation). - * 7. listPRs — confirm it appears in the default filter. - * 8. (commented out) mergePR — the last mile. Left off by default so - * running this example does NOT merge anything against real repos. - * - * The same file runs identically in three environments because the - * primitive's runtime selection handles the transport: - * - * - LOCAL gh CLI (needs `gh auth status` to succeed) - * - CLOUD (tenant) Nango — NANGO_GITHUB_CONNECTION_ID + - * NANGO_GITHUB_PROVIDER_CONFIG_KEY + - * NANGO_SECRET_KEY - * - CLOUD (fallback) relay-cloud proxy — RELAY_CLOUD_API_URL + - * RELAY_CLOUD_API_TOKEN + WORKSPACE_ID - * - * Run: - * GITHUB_REPO=AgentWorkforce/scratch npx tsx examples/end-to-end-pr-workflow.ts - * - * Defaults to AgentWorkforce/scratch (a sandbox repo) so no one - * accidentally opens a PR against a real repo. Override via env vars. - */ - -import { WorkflowRunner, type RelayYamlConfig } from '@agent-relay/sdk/workflows'; - -import { GitHubClient } from '../src/client.js'; -import { GitHubStepExecutor, createGitHubStep } from '../src/workflow-step.js'; -import type { GitHubRuntimeConfig } from '../src/types.js'; - -const repo = process.env.GITHUB_REPO ?? 'AgentWorkforce/scratch'; -const baseBranch = process.env.GITHUB_BASE_BRANCH ?? 'main'; -const branchName = process.env.GITHUB_BRANCH_OVERRIDE ?? `examples/github-primitive-${Date.now()}`; -const markerPath = `examples/github-primitive-runs/${Date.now()}.md`; - -const githubConfig: GitHubRuntimeConfig = { - // `auto` prefers cloud credentials when present, otherwise falls back - // to the local `gh` CLI. Override with GITHUB_RUNTIME=local|cloud when - // you want to pin a specific path. - runtime: (process.env.GITHUB_RUNTIME as GitHubRuntimeConfig['runtime']) ?? 'auto', - - // Cloud path A — Nango. Per-tenant GitHub App installation. Cloud - // callers typically resolve this via a connection-resolver helper - // that maps { workspaceId, repo } -> { connectionId, providerConfigKey }. - nango: { - connectionId: process.env.NANGO_GITHUB_CONNECTION_ID, - providerConfigKey: process.env.NANGO_GITHUB_PROVIDER_CONFIG_KEY, - secretKey: process.env.NANGO_SECRET_KEY, - }, - - // Cloud path B — relay-cloud GitHub proxy. Used when Nango isn't - // wired but a relay-cloud bearer token is available. - relayCloud: { - apiUrl: process.env.RELAY_CLOUD_API_URL, - accessToken: process.env.RELAY_CLOUD_API_TOKEN, - workspaceId: process.env.WORKSPACE_ID, - }, -}; - -const githubExecutor = new GitHubStepExecutor(githubConfig); - -const config: RelayYamlConfig = { - version: '1.0', - name: 'end-to-end-pr-workflow', - description: - 'Walk through the full PR lifecycle — branch, commit, open, update, list — using the GitHub primitive.', - swarm: { pattern: 'pipeline' }, - agents: [], - workflows: [ - { - name: 'end-to-end-pr-workflow', - steps: [ - // 1. Resolve the default branch — sanity check the connection - // works before we start making mutations. - createGitHubStep({ - name: 'inspect-repo', - action: 'getRepo', - repo, - output: { - mode: 'summary', - includeRuntime: true, - pretty: true, - }, - }), - - // 2. Create the feature branch off the base branch's HEAD. - // Chains {{steps.inspect-repo.output.data.defaultBranch}} - // when no base override is provided — here we keep it - // explicit for readability. - createGitHubStep({ - name: 'create-branch', - dependsOn: ['inspect-repo'], - action: 'createBranch', - repo, - params: { - branch: branchName, - source: baseBranch, - }, - output: { mode: 'data', format: 'json', path: 'ref' }, - }), - - // 3. Write a marker file on the new branch. createFile handles - // the blob + tree + commit dance for you. - createGitHubStep({ - name: 'write-marker-file', - dependsOn: ['create-branch'], - action: 'createFile', - repo, - params: { - path: markerPath, - branch: branchName, - content: [ - '# GitHub primitive example run', - '', - `- Runtime chosen: see workflow log for inspect-repo detection`, - `- Generated: ${new Date().toISOString()}`, - '', - 'This file is created by', - '`packages/github-primitive/examples/end-to-end-pr-workflow.ts`', - 'to prove the full PR lifecycle works against the configured runtime.', - ].join('\n'), - message: `examples: github-primitive demo run ${new Date().toISOString()}`, - }, - output: { mode: 'data', format: 'json', path: 'commit.sha' }, - }), - - // 4. Open the pull request. This is the core step cloud - // workflows need. title/body/head/base mirror the REST API - // shape — no translation work at the call site. - createGitHubStep({ - name: 'open-pr', - dependsOn: ['write-marker-file'], - action: 'createPR', - repo, - params: { - title: `examples: github-primitive end-to-end demo (${branchName})`, - head: branchName, - base: baseBranch, - body: [ - '## Summary', - '', - 'Automated PR opened by', - '`packages/github-primitive/examples/end-to-end-pr-workflow.ts`', - 'to exercise the GitHub primitive interface end-to-end.', - '', - '## Runtime selection', - '', - 'See the workflow log for the `inspect-repo` step — it logs the', - 'selected runtime (`local`, `nango`, or `relay-cloud`).', - '', - '## Safe to close', - '', - 'This PR is a demonstration. No one should merge it — close it', - 'once you have inspected the end-to-end round-trip.', - ].join('\n'), - draft: true, - }, - output: { - mode: 'data', - format: 'json', - includeRuntime: true, - includeMetadata: true, - pretty: true, - }, - }), - - // 5. Read the PR back to prove the resolver + runtime actually - // persisted the change, and to surface the PR number for - // downstream steps. - createGitHubStep({ - name: 'verify-pr', - dependsOn: ['open-pr'], - action: 'getPR', - repo, - params: { - // The output of `open-pr` is the Pulls REST response; we pull - // the number off it for subsequent mutation. - number: '{{steps.open-pr.output.data.number}}', - }, - output: { - mode: 'summary', - includeRuntime: true, - pretty: true, - }, - }), - - // 6. Update the PR — shows how to use updatePR for edits that - // don't need code changes (body, title, draft state, etc.). - createGitHubStep({ - name: 'edit-pr-body', - dependsOn: ['verify-pr'], - action: 'updatePR', - repo, - params: { - number: '{{steps.open-pr.output.data.number}}', - body: [ - '## Summary', - '', - 'Automated PR opened by the GitHub primitive end-to-end', - 'example. This body was updated by the `edit-pr-body` step', - 'to demonstrate `updatePR` works through the same adapter.', - '', - '## Safe to close', - '', - 'Demo — close, do not merge.', - ].join('\n'), - }, - output: { mode: 'result', format: 'json' }, - }), - - // 7. List open PRs to prove the new one is indexable. - createGitHubStep({ - name: 'list-open-prs', - dependsOn: ['edit-pr-body'], - action: 'listPRs', - repo, - params: { - state: 'open', - perPage: 10, - }, - output: { mode: 'summary', pretty: true }, - }), - - // 8. (Commented) The last mile — merge. Intentionally left off: - // running this example against a real repo should not merge - // anything. Uncomment when exercising a disposable scratch - // repo or CI harness. - // - // createGitHubStep({ - // name: 'merge-pr', - // dependsOn: ['list-open-prs'], - // action: 'mergePR', - // repo, - // params: { - // number: '{{steps.open-pr.output.data.number}}', - // mergeMethod: 'squash', - // commitTitle: 'examples: github-primitive demo (squash)', - // }, - // output: { mode: 'data', format: 'json' }, - // }), - ], - }, - ], - errorHandling: { strategy: 'fail-fast' }, -}; - -async function main(): Promise { - const detection = await GitHubClient.detect(githubConfig); - - console.log('────────────────────────────────────────'); - console.log(`repo: ${repo}`); - console.log(`base branch: ${baseBranch}`); - console.log(`feature branch: ${branchName}`); - console.log(`runtime selected: ${detection.runtime}`); - console.log(`detection source: ${detection.source}`); - console.log(`local gh available: ${detection.local.available}`); - console.log(`cloud available: ${detection.cloud.available}`); - if (detection.reason) { - console.log(`reason: ${detection.reason}`); - } - console.log('────────────────────────────────────────'); - - const runner = new WorkflowRunner({ - cwd: process.cwd(), - executor: githubExecutor, - }); - - const result = await runner.execute(config); - console.log(`\nWorkflow completed: ${result.status}`); -} - -if (process.argv[1] === new URL(import.meta.url).pathname) { - main().catch((error) => { - console.error(error instanceof Error ? error.stack : error); - process.exitCode = 1; - }); -} diff --git a/packages/github-primitive/examples/github-step.ts b/packages/github-primitive/examples/github-step.ts deleted file mode 100644 index b060b7c32..000000000 --- a/packages/github-primitive/examples/github-step.ts +++ /dev/null @@ -1,105 +0,0 @@ -import { WorkflowRunner, type RelayYamlConfig } from '@agent-relay/sdk/workflows'; - -import { GitHubClient } from '../src/client.js'; -import { GitHubStepExecutor, createGitHubStep } from '../src/workflow-step.js'; -import type { GitHubRuntimeConfig } from '../src/types.js'; - -const repo = process.env.GITHUB_REPO ?? 'AgentWorkforce/relay'; - -const githubConfig: GitHubRuntimeConfig = { - runtime: (process.env.GITHUB_RUNTIME as GitHubRuntimeConfig['runtime']) ?? 'auto', - nango: { - connectionId: process.env.NANGO_GITHUB_CONNECTION_ID, - providerConfigKey: process.env.NANGO_GITHUB_PROVIDER_CONFIG_KEY, - }, - relayCloud: { - apiUrl: process.env.RELAY_CLOUD_API_URL, - accessToken: process.env.RELAY_CLOUD_API_TOKEN, - workspaceId: process.env.WORKSPACE_ID, - }, -}; - -const githubExecutor = new GitHubStepExecutor(githubConfig); - -const config: RelayYamlConfig = { - version: '1.0', - name: 'github-primitive-workflow', - description: 'GitHub primitive workflow with runtime auto-detection and chained output.', - swarm: { - pattern: 'pipeline', - }, - agents: [], - workflows: [ - { - name: 'github-primitive-workflow', - steps: [ - createGitHubStep({ - name: 'inspect-repository', - action: 'getRepo', - repo, - output: { - mode: 'summary', - includeRuntime: true, - pretty: true, - }, - }), - createGitHubStep({ - name: 'list-open-issues', - dependsOn: ['inspect-repository'], - action: 'listIssues', - repo, - params: { - state: 'open', - perPage: 5, - }, - output: { - mode: 'summary', - includeRuntime: true, - pretty: true, - }, - }), - createGitHubStep({ - name: 'read-readme', - dependsOn: ['list-open-issues'], - action: 'readFile', - repo, - params: { - path: 'README.md', - }, - output: { - mode: 'data', - format: 'text', - }, - }), - ], - }, - ], - errorHandling: { - strategy: 'fail-fast', - }, -}; - -async function main(): Promise { - const detection = await GitHubClient.detect(githubConfig); - - console.log(`GitHub runtime selected: ${detection.runtime}`); - console.log(`Detection source: ${detection.source}`); - console.log(`Local gh CLI: ${detection.local.available ? 'available' : 'unavailable'}`); - console.log(`Cloud GitHub: ${detection.cloud.available ? 'available' : 'unavailable'}`); - console.log(detection.reason); - - const runner = new WorkflowRunner({ - cwd: process.cwd(), - executor: githubExecutor, - }); - - const result = await runner.execute(config); - console.log(`GitHub workflow completed: ${result.status}`); -} - -if (process.argv[1] === new URL(import.meta.url).pathname) { - main().catch((error) => { - console.error(error instanceof Error ? error.stack : error); - process.exitCode = 1; - }); -} diff --git a/packages/github-primitive/examples/multi-tenant-pr-workflow.ts b/packages/github-primitive/examples/multi-tenant-pr-workflow.ts deleted file mode 100644 index f06b5b91d..000000000 --- a/packages/github-primitive/examples/multi-tenant-pr-workflow.ts +++ /dev/null @@ -1,201 +0,0 @@ -/** - * Multi-tenant pull-request workflow. - * - * Cloud's reality: every workspace (AgentWorkforce, MSD, NightCTO, ...) - * has its own GitHub App installation. The primitive's per-step `config` - * field lets one workflow route different actions through different - * Nango connections — no need for one workflow per tenant. - * - * The usual cloud pattern: - * - * 1. A resolver helper — lives in cloud, NOT in this primitive — maps - * { workspaceId, repo } -> { connectionId, providerConfigKey }. - * Recommended signature: - * - * githubConfigForRepo({ repo, workspaceId }): Promise - * - * It reads the workspace_integrations table, picks the row whose - * provider matches the target repo's app, and returns a ready-to- - * use config object. - * - * 2. Workflow authors call that resolver at step-build time and pass - * the result as `config` to each `createGitHubStep` call. - * - * This example simulates the resolver with a static table so the - * illustration is self-contained — in production, swap it for the DB - * lookup. - * - * Run: - * NANGO_SECRET_KEY=... \ - * AGENTWORKFORCE_CONNECTION_ID=... \ - * MSD_CONNECTION_ID=... \ - * npx tsx examples/multi-tenant-pr-workflow.ts - */ - -import { WorkflowRunner, type RelayYamlConfig } from '@agent-relay/sdk/workflows'; - -import { GitHubStepExecutor, createGitHubStep } from '../src/workflow-step.js'; -import type { GitHubRuntimeConfig, RepositoryRef } from '../src/types.js'; - -// ─── Resolver (stand-in for cloud's real implementation) ─────────────── - -interface TenantConnection { - workspaceId: string; - providerConfigKey: string; // 'github-agentworkforce' | 'github-msd' | 'github-nightcto' - connectionIdEnvVar: string; // env var that carries the Nango connection id -} - -// In cloud, this table is the workspace_integrations DB rows joined to -// the Nango provider registry. Here we keep it inline for illustration. -const TENANTS: Record = { - 'AgentWorkforce/cloud': { - workspaceId: 'rw_agentworkforce', - providerConfigKey: 'github-agentworkforce', - connectionIdEnvVar: 'AGENTWORKFORCE_CONNECTION_ID', - }, - 'AgentWorkforce/sage': { - workspaceId: 'rw_agentworkforce', - providerConfigKey: 'github-agentworkforce', - connectionIdEnvVar: 'AGENTWORKFORCE_CONNECTION_ID', - }, - 'msd-ventures/platform': { - workspaceId: 'rw_msd', - providerConfigKey: 'github-msd', - connectionIdEnvVar: 'MSD_CONNECTION_ID', - }, -}; - -function githubConfigForRepo(opts: { - repo: string | RepositoryRef; - /** Workspace scope. Optional — cloud-owned repos default to the shared app. */ - workspaceId?: string; -}): GitHubRuntimeConfig { - const repoKey = typeof opts.repo === 'string' ? opts.repo : `${opts.repo.owner}/${opts.repo.repo}`; - const tenant = TENANTS[repoKey]; - - if (!tenant) { - throw new Error( - `No GitHub connection mapped for ${repoKey} — register it in the tenants table or workspace_integrations.` - ); - } - - const connectionId = process.env[tenant.connectionIdEnvVar]; - if (!connectionId) { - throw new Error(`Missing ${tenant.connectionIdEnvVar} — set the Nango connection id for ${repoKey}.`); - } - - return { - runtime: 'auto', - nango: { - connectionId, - providerConfigKey: tenant.providerConfigKey, - secretKey: process.env.NANGO_SECRET_KEY, - }, - relayCloud: { - apiUrl: process.env.RELAY_CLOUD_API_URL, - accessToken: process.env.RELAY_CLOUD_API_TOKEN, - workspaceId: opts.workspaceId ?? tenant.workspaceId, - }, - }; -} - -// ─── Workflow ──────────────────────────────────────────────────────────── - -const agentworkforceRepo = 'AgentWorkforce/cloud'; -const msdRepo = 'msd-ventures/platform'; - -const executor = new GitHubStepExecutor({ runtime: 'auto' }); - -const config: RelayYamlConfig = { - version: '1.0', - name: 'multi-tenant-pr-workflow', - description: - 'Open PRs in two tenants — AgentWorkforce/cloud (shared app) and msd-ventures/platform (MSD app) — from one workflow by varying per-step config.', - swarm: { pattern: 'pipeline' }, - agents: [], - workflows: [ - { - name: 'multi-tenant-pr-workflow', - steps: [ - // ─── Tenant A: AgentWorkforce ─────────────────────────────── - createGitHubStep({ - name: 'inspect-agentworkforce-cloud', - action: 'getRepo', - repo: agentworkforceRepo, - config: githubConfigForRepo({ repo: agentworkforceRepo }), - output: { mode: 'summary', includeRuntime: true, pretty: true }, - }), - - createGitHubStep({ - name: 'open-pr-agentworkforce', - dependsOn: ['inspect-agentworkforce-cloud'], - action: 'createPR', - repo: agentworkforceRepo, - params: { - // Pretend we prepared this branch in an earlier workflow step - // (push-branch in the caller workflow). - head: 'feat/typed-webhook-consumers', - base: 'main', - title: 'feat(web): typed webhook-consumer config', - body: "Routes through AgentWorkforce's github-agentworkforce Nango connection.", - draft: true, - }, - config: githubConfigForRepo({ repo: agentworkforceRepo }), - output: { mode: 'data', format: 'json', path: 'data.html_url' }, - }), - - // ─── Tenant B: MSD ────────────────────────────────────────── - // Same workflow, same action verbs, different connection - // resolved by the per-step `config` field. Runs sequentially - // here but could run in parallel — tenants are independent. - createGitHubStep({ - name: 'inspect-msd-platform', - dependsOn: ['open-pr-agentworkforce'], - action: 'getRepo', - repo: msdRepo, - config: githubConfigForRepo({ repo: msdRepo }), - output: { mode: 'summary', includeRuntime: true, pretty: true }, - }), - - createGitHubStep({ - name: 'open-pr-msd', - dependsOn: ['inspect-msd-platform'], - action: 'createPR', - repo: msdRepo, - params: { - head: 'integrations/agent-relay-webhook', - base: 'main', - title: 'feat: wire up Agent Relay webhook receiver', - body: "Routes through MSD's github-msd Nango connection — separate GitHub App install.", - draft: true, - }, - config: githubConfigForRepo({ repo: msdRepo }), - output: { mode: 'data', format: 'json', path: 'data.html_url' }, - }), - ], - }, - ], - errorHandling: { strategy: 'fail-fast' }, -}; - -async function main(): Promise { - console.log('Opening PRs in two tenants via per-step GitHub config overrides:'); - console.log(` ${agentworkforceRepo} → connection ${TENANTS[agentworkforceRepo].providerConfigKey}`); - console.log(` ${msdRepo} → connection ${TENANTS[msdRepo].providerConfigKey}`); - console.log(); - - const runner = new WorkflowRunner({ - cwd: process.cwd(), - executor, - }); - - const result = await runner.execute(config); - console.log(`\nWorkflow completed: ${result.status}`); -} - -if (process.argv[1] === new URL(import.meta.url).pathname) { - main().catch((error) => { - console.error(error instanceof Error ? error.stack : error); - process.exitCode = 1; - }); -} diff --git a/packages/github-primitive/package.json b/packages/github-primitive/package.json index 6a881e551..597bce925 100644 --- a/packages/github-primitive/package.json +++ b/packages/github-primitive/package.json @@ -10,11 +10,6 @@ "types": "./dist/index.d.ts", "import": "./dist/index.js", "default": "./dist/index.js" - }, - "./workflow-step": { - "types": "./dist/workflow-step.d.ts", - "import": "./dist/workflow-step.js", - "default": "./dist/workflow-step.js" } }, "files": [ @@ -31,9 +26,7 @@ "test": "vitest run", "test:watch": "vitest" }, - "dependencies": { - "@agent-relay/workflow-types": "7.1.1" - }, + "dependencies": {}, "devDependencies": { "@types/node": "^22.19.3", "typescript": "^5.9.3", diff --git a/packages/github-primitive/src/index.ts b/packages/github-primitive/src/index.ts index 482276392..3fffc970c 100644 --- a/packages/github-primitive/src/index.ts +++ b/packages/github-primitive/src/index.ts @@ -4,7 +4,6 @@ export * from './adapter.js'; export * from './local-runtime.js'; export * from './cloud-runtime.js'; export * from './client.js'; -export * from './workflow-step.js'; export * from './actions/branches.js'; export * from './actions/commits.js'; export * from './actions/repos.js'; diff --git a/packages/github-primitive/src/workflow-step.ts b/packages/github-primitive/src/workflow-step.ts deleted file mode 100644 index 11c2ca141..000000000 --- a/packages/github-primitive/src/workflow-step.ts +++ /dev/null @@ -1,615 +0,0 @@ -import type { RunnerStepExecutor, WorkflowStep } from '@agent-relay/workflow-types'; - -import { GitHubClient } from './client.js'; -import type { - GitHubActionName, - GitHubActionParamsMap, - GitHubActionResult, - GitHubRuntime, - GitHubRuntimeConfig, - RepositoryRef, -} from './types.js'; -import { GITHUB_ACTIONS } from './types.js'; - -export type GitHubStepOutputMode = 'data' | 'result' | 'summary' | 'raw' | 'none'; -export type GitHubStepOutputFormat = 'json' | 'text'; - -type RepoParamKeys = 'owner' | 'repo'; -type ParamsFor = TName extends keyof GitHubActionParamsMap - ? GitHubActionParamsMap[TName] - : Record; -type StripRepoParams = Omit> & - Partial>>; - -export type GitHubStepParams = [ - NonNullable>, -] extends [never] - ? Record - : StripRepoParams>>; - -export interface GitHubStepOutputConfig { - /** Which action result becomes the workflow step output. Defaults to "data". */ - mode?: GitHubStepOutputMode; - /** Emit JSON for structured chaining or text for simple downstream interpolation. Defaults to "json". */ - format?: GitHubStepOutputFormat; - /** Select a nested field from the projected output, e.g. "number" or "data.htmlUrl". */ - path?: string; - /** Include adapter metadata such as runtime and timing in JSON output. Defaults false. */ - includeMetadata?: boolean; - /** Include the selected runtime in JSON output. Defaults false. */ - includeRuntime?: boolean; - /** Pretty-print JSON output. Defaults false. */ - pretty?: boolean; -} - -export interface GitHubStepConfig { - /** Unique step name within the workflow. */ - name: string; - /** Dependencies in the Relay workflow DAG. */ - dependsOn?: string[]; - /** GitHub action to execute. */ - action: TName; - /** Repository in owner/repo format. Used as owner and repo params for repository-scoped actions. */ - repo?: string | RepositoryRef; - /** Action-specific parameters. Values may include workflow templates such as {{steps.plan.output}}. */ - params?: GitHubStepParams; - /** Runtime settings for local gh CLI, cloud/Nango, or auto detection. */ - config?: GitHubRuntimeConfig; - /** Controls the string captured as {{steps..output}}. */ - output?: GitHubStepOutputConfig; - /** Workflow step timeout in milliseconds. */ - timeoutMs?: number; - /** Number of retry attempts when the workflow runner retries this integration step. */ - retries?: number; -} - -export interface GitHubStepExecutionContext { - workspaceId?: string; - client?: GitHubClient; - config?: GitHubRuntimeConfig; -} - -export interface GitHubStepExecutionResult { - success: boolean; - output: string; - result: GitHubActionResult; - runtime?: GitHubRuntime; - error?: string; -} - -export interface GitHubIntegrationStepResult { - output: string; - success: boolean; -} - -type ResolvedParams = Record; - -const GITHUB_INTEGRATION = 'github'; -const RESERVED_PARAM_KEYS = new Set([ - 'action', - 'config', - 'githubConfig', - 'output', - 'params', - 'actionParams', - 'repository', - 'runtime', - 'ghPath', - 'timeout', - 'retryOnRateLimit', - 'maxRetries', -]); - -/** - * Create a Relay integration step that can be used in TypeScript workflows or - * emitted into .relay YAML. Complex params/config objects are serialized so the - * workflow template resolver can interpolate values before execution. - */ -export function createGitHubStep( - config: GitHubStepConfig -): WorkflowStep { - validateGitHubStepConfig(config); - - const params: Record = {}; - - if (config.repo !== undefined) { - params.repo = repoToString(config.repo); - } - if (config.params !== undefined) { - params.params = JSON.stringify(config.params); - } - if (config.config !== undefined) { - params.config = JSON.stringify(config.config); - } - if (config.output !== undefined) { - params.output = JSON.stringify(config.output); - } - - const step: WorkflowStep = { - name: config.name, - type: 'integration', - integration: GITHUB_INTEGRATION, - action: config.action, - params, - }; - - if (config.dependsOn !== undefined) step.dependsOn = config.dependsOn; - if (config.timeoutMs !== undefined) step.timeoutMs = config.timeoutMs; - if (config.retries !== undefined) step.retries = config.retries; - - return step; -} - -export class GitHubStepExecutor implements RunnerStepExecutor { - constructor(private readonly options: GitHubRuntimeConfig = {}) {} - - async executeAgentStep(): Promise { - throw new Error('GitHubStepExecutor only executes GitHub integration steps.'); - } - - async execute( - config: GitHubStepConfig, - context: GitHubStepExecutionContext = {} - ): Promise> { - validateGitHubStepConfig(config); - - const runtimeConfig = mergeRuntimeConfig(this.options, context.config, config.config); - if (context.workspaceId && !runtimeConfig.relayCloud?.workspaceId) { - runtimeConfig.relayCloud = { - ...runtimeConfig.relayCloud, - workspaceId: context.workspaceId, - }; - } - - const client = context.client ?? new GitHubClient(runtimeConfig); - const actionParams = buildActionParams(config); - const result = await client.executeAction(config.action, actionParams); - const runtime = result.metadata?.runtime ?? (await safeGetRuntime(client)); - const output = formatStepOutput(config, result, runtime); - - return { - success: result.success, - output, - result, - runtime, - error: result.error, - }; - } - - async executeIntegrationStep( - step: WorkflowStep, - resolvedParams: Record, - context: { workspaceId?: string } = {} - ): Promise { - if (step.integration !== GITHUB_INTEGRATION) { - return { - success: false, - output: `GitHubStepExecutor only handles "${GITHUB_INTEGRATION}" integration steps`, - }; - } - - try { - const config = githubStepConfigFromWorkflowStep(step, resolvedParams); - const result = await this.execute(config, context); - - return { - success: result.success, - output: result.success ? result.output : result.output || result.error || 'GitHub step failed', - }; - } catch (error) { - return { - success: false, - output: error instanceof Error ? error.message : String(error), - }; - } - } -} - -export function githubStepConfigFromWorkflowStep( - step: WorkflowStep, - resolvedParams: Record -): GitHubStepConfig { - const params = normalizeResolvedParams(resolvedParams); - const action = step.action as GitHubActionName | undefined; - - if (!action) { - throw new Error(`GitHub step "${step.name}" requires an action`); - } - - const config = - readJsonParam(params.config ?? params.githubConfig, 'config') ?? - runtimeConfigFromParams(params); - const output = readJsonParam(params.output, 'output') ?? undefined; - const repo = readRepositoryParam(params); - const actionParams = readActionParams(params); - - return { - name: step.name, - dependsOn: step.dependsOn, - action, - repo, - params: actionParams, - config, - output, - timeoutMs: step.timeoutMs, - retries: step.retries, - }; -} - -function validateGitHubStepConfig(config: GitHubStepConfig): void { - if (!config.name) { - throw new Error('GitHub step requires a non-empty name'); - } - if (!config.action || typeof config.action !== 'string') { - throw new Error(`GitHub step "${config.name}" requires an action name`); - } - if (!GITHUB_ACTIONS.includes(config.action as never)) { - throw new Error(`GitHub step "${config.name}" uses unsupported action "${config.action}"`); - } - if (config.repo !== undefined) { - parseRepositoryRef(config.repo); - } - if (config.params !== undefined && !isRecord(config.params)) { - throw new Error(`GitHub step "${config.name}" params must be an object`); - } -} - -function buildActionParams(config: GitHubStepConfig): unknown { - const repo = config.repo === undefined ? undefined : parseRepositoryRef(config.repo); - const params = config.params ? ({ ...config.params } as Record) : {}; - const merged = repo ? { ...repo, ...params } : params; - - return Object.keys(merged).length === 0 ? undefined : merged; -} - -function readActionParams(params: ResolvedParams): Record { - const serializedParams = params.params ?? params.actionParams; - if (serializedParams !== undefined) { - const parsed = readJsonParam>(serializedParams, 'params'); - if (parsed === undefined) return {}; - if (!isRecord(parsed)) { - throw new Error('GitHub step params.params must be a JSON object'); - } - return parsed; - } - - const actionParams: Record = {}; - const repoValue = params.repo; - const repoIsRepositoryRef = - params.owner === undefined && typeof repoValue === 'string' && repoValue.includes('/'); - - for (const [key, value] of Object.entries(params)) { - if (RESERVED_PARAM_KEYS.has(key)) continue; - if (key === 'repo' && repoIsRepositoryRef) continue; - actionParams[key] = value; - } - - return actionParams; -} - -function readRepositoryParam(params: ResolvedParams): string | RepositoryRef | undefined { - const value = params.repository ?? (params.owner === undefined ? params.repo : undefined); - if (value === undefined) return undefined; - if (typeof value === 'string' || isRecord(value)) { - return parseRepositoryRef(value); - } - throw new Error('GitHub step repo must be in owner/repo format'); -} - -function runtimeConfigFromParams(params: ResolvedParams): GitHubRuntimeConfig | undefined { - const config: GitHubRuntimeConfig = {}; - - if (typeof params.runtime === 'string') { - config.runtime = params.runtime as GitHubRuntimeConfig['runtime']; - } - if (typeof params.ghPath === 'string') { - config.ghPath = params.ghPath; - } - if (typeof params.timeout === 'number') { - config.timeout = params.timeout; - } - if (typeof params.retryOnRateLimit === 'boolean') { - config.retryOnRateLimit = params.retryOnRateLimit; - } - if (typeof params.maxRetries === 'number') { - config.maxRetries = params.maxRetries; - } - - return Object.keys(config).length === 0 ? undefined : config; -} - -function mergeRuntimeConfig(...configs: Array): GitHubRuntimeConfig { - const merged: GitHubRuntimeConfig = {}; - - for (const config of configs) { - if (!config) continue; - - const { nango, relayCloud, env, ...flatConfig } = config; - Object.assign(merged, flatConfig); - if (nango) { - merged.nango = { - ...merged.nango, - ...nango, - }; - } - if (relayCloud) { - merged.relayCloud = { - ...merged.relayCloud, - ...relayCloud, - }; - } - if (env) { - merged.env = { - ...merged.env, - ...env, - }; - } - } - - return merged; -} - -function formatStepOutput( - config: GitHubStepConfig, - result: GitHubActionResult, - runtime?: GitHubRuntime -): string { - const outputConfig = config.output ?? {}; - const mode = outputConfig.mode ?? 'data'; - const format = outputConfig.format ?? 'json'; - - if (mode === 'none') { - return ''; - } - - let projection = buildOutputProjection(mode, result, runtime, outputConfig); - - if (outputConfig.path) { - projection = resolvePath(projection, outputConfig.path); - } - - if (format === 'text') { - return projectionToText(projection); - } - - return JSON.stringify(projection, undefined, outputConfig.pretty ? 2 : undefined); -} - -function buildOutputProjection( - mode: GitHubStepOutputMode, - result: GitHubActionResult, - runtime: GitHubRuntime | undefined, - outputConfig: GitHubStepOutputConfig -): unknown { - if (mode === 'raw') { - return result.output; - } - - if (mode === 'summary') { - return withOptionalMetadata(summarizeResult(result, runtime), result, runtime, outputConfig); - } - - if (mode === 'result') { - const projected: Record = { - success: result.success, - output: result.output, - }; - if (result.data !== undefined) projected.data = result.data; - if (result.error !== undefined) projected.error = result.error; - - return withOptionalMetadata(projected, result, runtime, outputConfig); - } - - const data = result.data ?? (result.output ? result.output : null); - return withOptionalMetadata(data, result, runtime, outputConfig); -} - -function summarizeResult( - result: GitHubActionResult, - runtime?: GitHubRuntime -): Record { - if (!result.success) { - return { - success: false, - error: result.error ?? 'GitHub action failed', - runtime, - }; - } - - const data = result.data; - if (Array.isArray(data)) { - return { - success: true, - count: data.length, - items: data.slice(0, 10).map(summarizeValue), - runtime, - }; - } - - return { - success: true, - value: summarizeValue(data ?? result.output), - runtime, - }; -} - -function summarizeValue(value: unknown): unknown { - if (!isRecord(value)) { - return value; - } - - const summary: Record = {}; - for (const key of [ - 'fullName', - 'name', - 'number', - 'title', - 'state', - 'path', - 'sha', - 'type', - 'defaultBranch', - 'visibility', - 'private', - 'createdAt', - 'updatedAt', - 'htmlUrl', - 'url', - ]) { - if (value[key] !== undefined) { - summary[key] = value[key]; - } - } - - return Object.keys(summary).length > 0 ? summary : value; -} - -function withOptionalMetadata( - value: unknown, - result: GitHubActionResult, - runtime: GitHubRuntime | undefined, - outputConfig: GitHubStepOutputConfig -): unknown { - if (!outputConfig.includeMetadata && !outputConfig.includeRuntime) { - return value; - } - - const metadata: Record = {}; - if (outputConfig.includeRuntime && runtime !== undefined) metadata.runtime = runtime; - if (outputConfig.includeMetadata && result.metadata !== undefined) { - Object.assign(metadata, result.metadata); - } - - return { value, metadata }; -} - -function projectionToText(value: unknown): string { - if (typeof value === 'string') return value; - if (value === null || value === undefined) return ''; - - if (Array.isArray(value)) { - return value.map((entry) => projectionToText(entry)).join('\n'); - } - - if (isRecord(value)) { - if ('output' in value) return projectionToText(value.output); - if ('value' in value) return projectionToText(value.value); - if ('data' in value) return projectionToText(value.data); - if ('content' in value) return projectionToText(value.content); - if ('body' in value) return projectionToText(value.body); - if ('title' in value) return projectionToText(value.title); - if ('fullName' in value) return projectionToText(value.fullName); - if ('path' in value) return projectionToText(value.path); - if ('url' in value) return projectionToText(value.url); - } - - return JSON.stringify(value); -} - -function resolvePath(value: unknown, path: string): unknown { - if (!path) return value; - - let current = value; - for (const segment of path.split('.')) { - if (Array.isArray(current) && /^\d+$/.test(segment)) { - current = current[Number(segment)]; - continue; - } - if (isRecord(current)) { - current = current[segment]; - continue; - } - return undefined; - } - - return current; -} - -function parseRepositoryRef(repo: string | RepositoryRef | Record): RepositoryRef { - if (typeof repo === 'string') { - const [owner, name, ...rest] = repo.split('/'); - if (!owner || !name || rest.length > 0) { - throw new Error(`GitHub repo must be in owner/repo format: ${repo}`); - } - - return { - owner, - repo: name, - fullName: `${owner}/${name}`, - }; - } - - const owner = typeof repo.owner === 'string' ? repo.owner : undefined; - const name = typeof repo.repo === 'string' ? repo.repo : undefined; - if (!owner || !name) { - throw new Error('GitHub repo object requires owner and repo'); - } - - return { - owner, - repo: name, - fullName: typeof repo.fullName === 'string' ? repo.fullName : `${owner}/${name}`, - }; -} - -function repoToString(repo: string | RepositoryRef): string { - return typeof repo === 'string' ? repo : `${repo.owner}/${repo.repo}`; -} - -async function safeGetRuntime(client: GitHubClient): Promise { - try { - return await client.getRuntime(); - } catch { - return undefined; - } -} - -function normalizeResolvedParams(params: Record): ResolvedParams { - const normalized: ResolvedParams = {}; - for (const [key, value] of Object.entries(params)) { - normalized[key] = coerceScalar(value); - } - return normalized; -} - -function coerceScalar(value: unknown): unknown { - if (typeof value !== 'string') { - return value; - } - - const trimmed = value.trim(); - if (trimmed === 'true') return true; - if (trimmed === 'false') return false; - if (trimmed === 'null') return null; - if (/^-?(?:0|[1-9]\d*)(?:\.\d+)?$/.test(trimmed)) return Number(trimmed); - if ( - (trimmed.startsWith('{') && trimmed.endsWith('}')) || - (trimmed.startsWith('[') && trimmed.endsWith(']')) || - (trimmed.startsWith('"') && trimmed.endsWith('"')) - ) { - try { - return JSON.parse(trimmed); - } catch { - return value; - } - } - - return value; -} - -function readJsonParam(value: unknown, name: string): T | undefined { - if (value === undefined) return undefined; - if (typeof value !== 'string') return value as T; - - try { - return JSON.parse(value) as T; - } catch (error) { - throw new Error( - `GitHub step params.${name} must be valid JSON: ${error instanceof Error ? error.message : String(error)}` - ); - } -} - -function isRecord(value: unknown): value is Record { - return typeof value === 'object' && value !== null && !Array.isArray(value); -} diff --git a/packages/personas/personas/agent-relay-workflow.json b/packages/personas/personas/agent-relay-workflow.json deleted file mode 100644 index a65398d33..000000000 --- a/packages/personas/personas/agent-relay-workflow.json +++ /dev/null @@ -1,35 +0,0 @@ -{ - "id": "agent-relay-workflow", - "intent": "agent-relay-workflow", - "tags": ["implementation", "documentation"], - "description": "Authors complete, runnable agent-relay workflow artifacts. Applies workflow skills as source material, preserves Ricky's artifact contract, and includes GitHub primitive PR shipping steps for implementation workflows.", - "skills": [ - { - "id": "skill.sh/writing-agent-relay-workflows", - "source": "https://github.com/agentworkforce/skills#writing-agent-relay-workflows", - "description": "Skill to load and drive writing-agent-relay workflow automation from the Skills registry" - }, - { - "id": "prpm/writing-agent-relay-workflows", - "source": "https://prpm.dev/packages/@agent-relay/writing-agent-relay-workflows", - "description": "PRPM wrapper for writing-agent-relay-workflows harness" - }, - { - "id": "prpm/relay-80-100-workflow", - "source": "https://prpm.dev/packages/@agent-relay/relay-80-100-workflow", - "description": "PRPM-based provisioning for agent-relay/relay-80-100-workflow" - }, - { - "id": "prpm/choosing-swarm-patterns", - "source": "https://prpm.dev/packages/@agent-relay/choosing-swarm-patterns", - "description": "PRPM-based provisioning for agent-relay/choosing-swarm-patterns" - } - ], - "harness": "opencode", - "model": "opencode/gpt-5-nano", - "systemPrompt": "You are an agent-relay workflow artifact author. Produce complete, runnable TypeScript workflow source plus metadata for the caller's requested artifact path; do not stop at a plan or example. Read the normalized spec, matched skill context, target mode, and response schema. Write a workflow with the Agent Relay workflow builder, a dedicated channel, explicit agents, deterministic preflight/context, bounded implementation steps, review, fix loop, final review, hard validation, regression evidence, and final signoff. Preserve declared targets, non-goals, acceptance gates, environment preflights, and tool fallbacks. When the workflow can change repository files or must ship a bug fix/feature, include GitHub primitive shipping steps in the generated workflow: import GitHubStepExecutor and createGitHubStep from @agent-relay/github-primitive, create or update a branch, commit changed files, open a pull request, and capture the PR URL. Omit PR steps only when the normalized spec explicitly says planning-only, no PR, or PR creation is out of scope. Never perform branch, commit, or pull-request side effects during persona generation itself; generate workflow source that does them later when executed. Keep runtime-agent prompts model-agnostic. Output contract: return only structured JSON or a fenced TypeScript artifact plus metadata, with artifact.content containing the complete workflow source.", - "harnessSettings": { - "reasoning": "medium", - "timeoutSeconds": 3600 - } -} diff --git a/packages/personas/personas/opencode-workflow-specialist.json b/packages/personas/personas/opencode-workflow-specialist.json deleted file mode 100644 index 39f6fb5c5..000000000 --- a/packages/personas/personas/opencode-workflow-specialist.json +++ /dev/null @@ -1,13 +0,0 @@ -{ - "id": "opencode-workflow-specialist", - "intent": "opencode-workflow-correctness", - "tags": ["debugging"], - "description": "Diagnoses and repairs opencode-based agent-relay workflow failures across SDK, broker, cloud bootstrap, and CLI layers", - "harness": "opencode", - "model": "opencode/gpt-5-nano", - "systemPrompt": "You are the opencode workflow specialist in efficient mode. Keep the same quality bar as top tier; reduce only depth and verbosity. Own the full opencode workflow surface area: SDK spawn dispatch and transport selection, opencode session collection, the Rust headless worker, cloud bootstrap extraction/fallback, Daytona snapshot and launcher provisioning, and opencode CLI auth/model/mode quirks. Reproduce first, isolate the broken layer, fix the root cause in the correct layer, and verify with repeat runs across the failing opencode case plus nearby shared paths when relevant. Priorities remain end-to-end correctness, local test fidelity, observability, cleanup, then speed. Avoid interactive: false workarounds, env-var hacks, SDK-bypassing spawn paths, and untested fixes that may regress other providers. Output contract: brief repro status, broken layer, reproduction recipe, root cause, minimal fix, and multi-scenario evidence.", - "harnessSettings": { - "reasoning": "medium", - "timeoutSeconds": 1100 - } -} diff --git a/packages/sdk-py/tests/test_workflow_templates.py b/packages/sdk-py/tests/test_workflow_templates.py deleted file mode 100644 index 375c4c563..000000000 --- a/packages/sdk-py/tests/test_workflow_templates.py +++ /dev/null @@ -1,450 +0,0 @@ -"""Tests for workflow templates including review-loop pattern. - -These tests verify that workflow configurations are properly structured -and can be serialized to valid YAML for the relay runtime. -""" - -import yaml -import pytest -from agent_relay import ( - workflow, - dag, - fan_out, - pipeline, - PipelineStage, - TemplateAgent, - TemplateStep, - VerificationCheck, -) - - -class TestReviewLoopPattern: - """Tests for the review-loop workflow pattern.""" - - def test_basic_review_loop_structure(self): - """Test basic review-loop workflow with implementer and reviewers.""" - config = ( - workflow("review-loop-test") - .description("Test review loop workflow") - .pattern("review-loop") - .agent("implementer", cli="claude", role="Senior developer implementing the task") - .agent("reviewer-diff", cli="codex", role="Code quality reviewer", interactive=False) - .agent("reviewer-arch", cli="claude", role="Architecture reviewer", interactive=False) - .agent("reviewer-security", cli="codex", role="Security reviewer", interactive=False) - .step("implement", agent="implementer", task="Implement the feature") - .step("review-diff", agent="reviewer-diff", task="Review code quality", depends_on=["implement"]) - .step("review-arch", agent="reviewer-arch", task="Review architecture", depends_on=["implement"]) - .step("review-security", agent="reviewer-security", task="Security review", depends_on=["implement"]) - .step( - "consolidate", - agent="implementer", - task="Consolidate review feedback", - depends_on=["review-diff", "review-arch", "review-security"], - ) - .step("address-feedback", agent="implementer", task="Address issues", depends_on=["consolidate"]) - .to_config() - ) - - assert config["swarm"]["pattern"] == "review-loop" - assert len(config["agents"]) == 4 - assert len(config["workflows"][0]["steps"]) == 6 - - # Check implementer is interactive, reviewers are not - agents = {a["name"]: a for a in config["agents"]} - assert agents["implementer"].get("interactive", True) is True - assert agents["reviewer-diff"]["interactive"] is False - assert agents["reviewer-arch"]["interactive"] is False - assert agents["reviewer-security"]["interactive"] is False - - def test_review_loop_with_verification(self): - """Test review-loop with verification checks.""" - config = ( - workflow("review-loop-verified") - .pattern("review-loop") - .agent("implementer", cli="claude") - .agent("reviewer", cli="codex", interactive=False) - .step( - "implement", - agent="implementer", - task="Implement feature", - verification=VerificationCheck(type="output_contains", value="IMPLEMENTATION COMPLETE"), - ) - .step( - "review", - agent="reviewer", - task="Review implementation", - depends_on=["implement"], - verification=VerificationCheck(type="output_contains", value="REVIEW:"), - ) - .step( - "address", - agent="implementer", - task="Address feedback", - depends_on=["review"], - verification=VerificationCheck(type="output_contains", value="ADDRESSED"), - ) - .to_config() - ) - - steps = config["workflows"][0]["steps"] - assert steps[0]["verification"]["value"] == "IMPLEMENTATION COMPLETE" - assert steps[1]["verification"]["value"] == "REVIEW:" - assert steps[2]["verification"]["value"] == "ADDRESSED" - - def test_review_loop_with_coordination(self): - """Test review-loop with barriers for synchronization.""" - config = ( - workflow("review-loop-coordinated") - .pattern("review-loop") - .coordination( - barriers=[{"name": "reviews-complete", "waitFor": ["review-1", "review-2"]}], - consensus_strategy="majority", - ) - .agent("implementer", cli="claude") - .agent("reviewer-1", cli="codex", interactive=False) - .agent("reviewer-2", cli="claude", interactive=False) - .step("implement", agent="implementer", task="Do work") - .step("review-1", agent="reviewer-1", task="Review 1", depends_on=["implement"]) - .step("review-2", agent="reviewer-2", task="Review 2", depends_on=["implement"]) - .step("consolidate", agent="implementer", task="Merge", depends_on=["review-1", "review-2"]) - .to_config() - ) - - assert config["coordination"]["consensusStrategy"] == "majority" - assert len(config["coordination"]["barriers"]) == 1 - assert config["coordination"]["barriers"][0]["waitFor"] == ["review-1", "review-2"] - - def test_review_loop_yaml_roundtrip(self): - """Test that review-loop config survives YAML roundtrip.""" - builder = ( - workflow("review-loop-yaml") - .pattern("review-loop") - .agent("impl", cli="claude") - .agent("rev", cli="codex", interactive=False) - .step("do", agent="impl", task="Do it") - .step("check", agent="rev", task="Check it", depends_on=["do"]) - ) - - yaml_str = builder.to_yaml() - parsed = yaml.safe_load(yaml_str) - - assert parsed["swarm"]["pattern"] == "review-loop" - assert len(parsed["agents"]) == 2 - assert parsed["agents"][1]["interactive"] is False - - -class TestHubSpokePattern: - """Tests for hub-spoke workflow pattern.""" - - def test_basic_hub_spoke(self): - """Test basic hub-spoke with lead and workers.""" - config = ( - workflow("hub-spoke-test") - .pattern("hub-spoke") - .agent("lead", cli="claude", role="lead") - .agent("worker-1", cli="codex") - .agent("worker-2", cli="codex") - .agent("worker-3", cli="codex") - .step("plan", agent="lead", task="Create plan") - .step("work-1", agent="worker-1", task="Task 1", depends_on=["plan"]) - .step("work-2", agent="worker-2", task="Task 2", depends_on=["plan"]) - .step("work-3", agent="worker-3", task="Task 3", depends_on=["plan"]) - .step("consolidate", agent="lead", task="Merge work", depends_on=["work-1", "work-2", "work-3"]) - .to_config() - ) - - assert config["swarm"]["pattern"] == "hub-spoke" - assert len(config["agents"]) == 4 - assert config["agents"][0]["role"] == "lead" - - -class TestPipelinePattern: - """Tests for pipeline workflow pattern.""" - - def test_pipeline_with_stages(self): - """Test pipeline with multiple stages.""" - config = pipeline( - "pipeline-test", - stages=[ - PipelineStage(name="stage-1", task="First stage"), - PipelineStage(name="stage-2", task="Second stage"), - PipelineStage(name="stage-3", task="Third stage"), - ], - ).to_config() - - assert config["swarm"]["pattern"] == "pipeline" - steps = config["workflows"][0]["steps"] - assert len(steps) == 3 - assert steps[1]["dependsOn"] == ["stage-1"] - assert steps[2]["dependsOn"] == ["stage-2"] - - -class TestDAGPattern: - """Tests for DAG workflow pattern.""" - - def test_dag_with_dependencies(self): - """Test DAG with complex dependencies.""" - config = dag( - "dag-test", - agents=[ - TemplateAgent(name="frontend", cli="claude"), - TemplateAgent(name="backend", cli="codex"), - TemplateAgent(name="tester", cli="claude"), - ], - steps=[ - TemplateStep(name="design", agent="frontend", task="Design UI"), - TemplateStep(name="api", agent="backend", task="Build API"), - TemplateStep(name="integrate", agent="frontend", task="Integrate", depends_on=["design", "api"]), - TemplateStep(name="test", agent="tester", task="Test all", depends_on=["integrate"]), - ], - ).to_config() - - assert config["swarm"]["pattern"] == "dag" - steps = config["workflows"][0]["steps"] - assert steps[2]["dependsOn"] == ["design", "api"] - assert steps[3]["dependsOn"] == ["integrate"] - - -class TestFanOutPattern: - """Tests for fan-out workflow pattern.""" - - def test_fan_out_with_synthesis(self): - """Test fan-out with parallel tasks and synthesis.""" - config = fan_out( - "fan-out-test", - tasks=["Analyze module A", "Analyze module B", "Analyze module C"], - synthesis_task="Combine all analyses into report", - ).to_config() - - assert config["swarm"]["pattern"] == "fan-out" - steps = config["workflows"][0]["steps"] - assert len(steps) == 4 # 3 tasks + 1 synthesis - # Synthesis depends on all tasks - assert steps[3]["dependsOn"] == ["task-1", "task-2", "task-3"] - - -class TestMultiCLIWorkflows: - """Tests for workflows with multiple CLI types.""" - - def test_mixed_cli_workflow(self): - """Test workflow with Claude, Codex, and other CLIs.""" - config = ( - workflow("multi-cli") - .pattern("dag") - .agent("planner", cli="claude", role="Planning and coordination") - .agent("coder", cli="codex", role="Implementation") - .agent("reviewer", cli="gemini", role="Code review") - .agent("tester", cli="aider", role="Test writing") - .step("plan", agent="planner", task="Create implementation plan") - .step("code", agent="coder", task="Implement feature", depends_on=["plan"]) - .step("review", agent="reviewer", task="Review code", depends_on=["code"]) - .step("test", agent="tester", task="Write tests", depends_on=["code"]) - .step("finalize", agent="planner", task="Final review", depends_on=["review", "test"]) - .to_config() - ) - - clis = [a["cli"] for a in config["agents"]] - assert "claude" in clis - assert "codex" in clis - assert "gemini" in clis - assert "aider" in clis - - -class TestWorkflowValidation: - """Tests for workflow validation.""" - - def test_dependency_cycle_detection(self): - """Test that circular dependencies are detected (runtime validation).""" - # Note: Current builder doesn't validate cycles at build time, - # but this documents expected behavior - config = ( - workflow("cycle-test") - .pattern("dag") - .agent("a", cli="claude") - .step("s1", agent="a", task="First", depends_on=["s2"]) - .step("s2", agent="a", task="Second", depends_on=["s1"]) - .to_config() - ) - - # Config builds but would fail at runtime - # This test documents the current behavior - steps = config["workflows"][0]["steps"] - assert steps[0]["dependsOn"] == ["s2"] - assert steps[1]["dependsOn"] == ["s1"] - - def test_unique_agent_names(self): - """Test that duplicate agent names are handled.""" - # Builder allows duplicates (latest wins or both appear) - # This documents expected behavior - config = ( - workflow("dup-test") - .pattern("dag") - .agent("worker", cli="claude") - .agent("worker", cli="codex") # Same name, different CLI - .step("s1", agent="worker", task="Do work") - .to_config() - ) - - # Current behavior: both agents appear - assert len(config["agents"]) == 2 - - def test_step_references_existing_agent(self): - """Test step references an agent that exists.""" - config = ( - workflow("ref-test") - .pattern("dag") - .agent("worker", cli="claude") - .step("s1", agent="worker", task="Valid reference") - .to_config() - ) - - # Step agent matches defined agent - assert config["workflows"][0]["steps"][0]["agent"] == "worker" - assert config["agents"][0]["name"] == "worker" - - -class TestWorkflowConfiguration: - """Tests for advanced workflow configuration.""" - - def test_error_handling_config(self): - """Test error handling configuration.""" - config = ( - workflow("error-test") - .pattern("dag") - .on_error("retry", max_retries=3, retry_delay_ms=5000, notify_channel="errors") - .agent("a", cli="claude") - .step("s1", agent="a", task="May fail") - .to_config() - ) - - assert config["errorHandling"]["strategy"] == "retry" - assert config["errorHandling"]["maxRetries"] == 3 - assert config["errorHandling"]["retryDelayMs"] == 5000 - assert config["errorHandling"]["notifyChannel"] == "errors" - - def test_idle_nudge_config(self): - """Test idle agent detection configuration.""" - config = ( - workflow("idle-test") - .pattern("dag") - .idle_nudge(nudge_after_ms=60000, escalate_after_ms=120000, max_nudges=2) - .agent("a", cli="claude") - .step("s1", agent="a", task="Long running task") - .to_config() - ) - - nudge = config["swarm"]["idleNudge"] - assert nudge["nudgeAfterMs"] == 60000 - assert nudge["escalateAfterMs"] == 120000 - assert nudge["maxNudges"] == 2 - - def test_state_config(self): - """Test state management configuration.""" - config = ( - workflow("state-test") - .pattern("dag") - .state("redis", ttl_ms=3600000, namespace="myapp") - .agent("a", cli="claude") - .step("s1", agent="a", task="Stateful task") - .to_config() - ) - - assert config["state"]["backend"] == "redis" - assert config["state"]["ttlMs"] == 3600000 - assert config["state"]["namespace"] == "myapp" - - def test_trajectory_config(self): - """Test trajectory recording configuration.""" - config = ( - workflow("trajectory-test") - .pattern("dag") - .trajectories(enabled=True, reflect_on_barriers=True, auto_decisions=True) - .agent("a", cli="claude") - .step("s1", agent="a", task="Tracked task") - .to_config() - ) - - traj = config["trajectories"] - assert traj["enabled"] is True - assert traj["reflectOnBarriers"] is True - assert traj["autoDecisions"] is True - - def test_agent_constraints(self): - """Test agent resource constraints.""" - config = ( - workflow("constraints-test") - .pattern("dag") - .agent( - "constrained", - cli="claude", - model="claude-opus", - max_tokens=8000, - timeout_ms=300000, - retries=2, - idle_threshold_secs=30, - ) - .step("s1", agent="constrained", task="Constrained task") - .to_config() - ) - - constraints = config["agents"][0]["constraints"] - assert constraints["model"] == "claude-opus" - assert constraints["maxTokens"] == 8000 - assert constraints["timeoutMs"] == 300000 - assert constraints["retries"] == 2 - assert constraints["idleThresholdSecs"] == 30 - - -class TestYAMLGeneration: - """Tests for YAML output generation.""" - - def test_yaml_output_is_valid(self): - """Test that generated YAML is parseable.""" - builder = ( - workflow("yaml-valid") - .pattern("dag") - .agent("a", cli="claude") - .step("s1", agent="a", task="Test task") - ) - - yaml_str = builder.to_yaml() - - # Should parse without error - parsed = yaml.safe_load(yaml_str) - assert parsed is not None - assert isinstance(parsed, dict) - - def test_yaml_preserves_multiline_tasks(self): - """Test that multiline tasks are preserved in YAML.""" - multiline_task = """Do the following: -1. First step -2. Second step -3. Third step""" - - builder = ( - workflow("multiline") - .pattern("dag") - .agent("a", cli="claude") - .step("s1", agent="a", task=multiline_task) - ) - - yaml_str = builder.to_yaml() - parsed = yaml.safe_load(yaml_str) - - assert "1. First step" in parsed["workflows"][0]["steps"][0]["task"] - assert "3. Third step" in parsed["workflows"][0]["steps"][0]["task"] - - def test_yaml_special_characters(self): - """Test that special characters are properly escaped.""" - config = ( - workflow("special-chars") - .description("Test: 'quotes' and \"double quotes\"") - .pattern("dag") - .agent("a", cli="claude") - .step("s1", agent="a", task="Use {{variable}} syntax") - .to_config() - ) - - # Should handle special characters - assert "quotes" in config["description"] - assert "{{variable}}" in config["workflows"][0]["steps"][0]["task"] diff --git a/packages/sdk/README.md b/packages/sdk/README.md index 754093fc6..92ee91b6c 100644 --- a/packages/sdk/README.md +++ b/packages/sdk/README.md @@ -15,7 +15,7 @@ npm install @agent-relay/sdk The workflow builder is the primary way to define and run multi-agent workflows: ```ts -import { workflow } from '@agent-relay/sdk/workflows'; +import { workflow } from '@relayflows/core'; const result = await workflow('my-feature') .pattern('dag') @@ -179,7 +179,7 @@ Notes: ```ts import { AgentRelayClient } from '@agent-relay/sdk/client'; -import { workflow, WorkflowBuilder } from '@agent-relay/sdk/workflows'; +import { workflow, WorkflowBuilder } from '@relayflows/core'; import { ConsensusCoordinator } from '@agent-relay/sdk/consensus'; import { ShadowCoordinator } from '@agent-relay/sdk/shadow'; ``` @@ -189,7 +189,7 @@ import { ShadowCoordinator } from '@agent-relay/sdk/shadow'; Built-in templates for common patterns: ```ts -import { fanOut, pipeline, dag } from '@agent-relay/sdk/workflows'; +import { fanOut, pipeline, dag } from '@relayflows/core'; // Fan-out: parallel execution with synthesis const builder = fanOut('analysis', { diff --git a/packages/sdk/package.json b/packages/sdk/package.json index 380fc60c1..04393a101 100644 --- a/packages/sdk/package.json +++ b/packages/sdk/package.json @@ -52,11 +52,6 @@ "import": "./dist/browser.js", "default": "./dist/browser.js" }, - "./workflows": { - "types": "./dist/workflows/index.d.ts", - "import": "./dist/workflows/index.js", - "default": "./dist/workflows/index.js" - }, "./communicate": { "types": "./dist/communicate/index.d.ts", "import": "./dist/communicate/index.js", @@ -121,21 +116,6 @@ "types": "./dist/slack.d.ts", "import": "./dist/slack.js", "default": "./dist/slack.js" - }, - "./provisioner/seeder": { - "types": "./dist/provisioner/seeder.d.ts", - "import": "./dist/provisioner/seeder.js", - "default": "./dist/provisioner/seeder.js" - }, - "./provisioner/local-jwks": { - "types": "./dist/provisioner/local-jwks.d.ts", - "import": "./dist/provisioner/local-jwks.js", - "default": "./dist/provisioner/local-jwks.js" - }, - "./provisioner/token": { - "types": "./dist/provisioner/token.d.ts", - "import": "./dist/provisioner/token.js", - "default": "./dist/provisioner/token.js" } }, "files": [ @@ -150,13 +130,12 @@ "directory": "packages/sdk" }, "scripts": { - "prebuild": "npm --prefix ../workflow-types run build && npm --prefix ../github-primitive run build && npm --prefix ../slack-primitive run build && npm --prefix ../config run build && npm --prefix ../cloud run build", + "prebuild": "npm --prefix ../github-primitive run build && npm --prefix ../slack-primitive run build && npm --prefix ../config run build && npm --prefix ../cloud run build", "build": "npx tsc -p tsconfig.build.json", "build:full": "tsc -p tsconfig.json && npm run bundle:binary", "bundle:binary": "node ./scripts/bundle-agent-relay.mjs", "check": "tsc -p tsconfig.json --noEmit", - "test": "npm run build && node --test dist/__tests__/integration.test.js && npm run test:vitest", - "test:vitest": "vitest run --config vitest.config.ts src/workflows/__tests__/run-script.test.ts", + "test": "npm run build && node --test dist/__tests__/integration.test.js", "test:quickstart": "node --test dist/__tests__/quickstart.test.js", "quickstart": "node dist/examples/quickstart.js", "demo": "node dist/examples/demo.js", @@ -174,7 +153,6 @@ "@agent-relay/config": "7.1.1", "@agent-relay/github-primitive": "7.1.1", "@agent-relay/slack-primitive": "7.1.1", - "@agent-relay/workflow-types": "7.1.1", "@agentworkforce/persona-kit": "^3.0.20", "@relaycast/sdk": "^1.1.0", "@relayfile/sdk": ">=0.1.2 <1", diff --git a/packages/sdk/src/__tests__/builder-deterministic.test.ts b/packages/sdk/src/__tests__/builder-deterministic.test.ts deleted file mode 100644 index 5651534ed..000000000 --- a/packages/sdk/src/__tests__/builder-deterministic.test.ts +++ /dev/null @@ -1,192 +0,0 @@ -/** - * Tests for deterministic and worktree step support in WorkflowBuilder. - */ -import { afterEach, describe, it, expect, vi } from 'vitest'; -import { workflow } from '../workflows/builder.js'; - -describe('deterministic/worktree steps in builder', () => { - afterEach(() => { - vi.restoreAllMocks(); - }); - - it('deterministic step emits correct config', () => { - const config = workflow('test') - .agent('worker', { cli: 'claude' }) - .step('read-files', { - type: 'deterministic', - command: 'cat src/index.ts', - verification: { type: 'exit_code', value: '0' }, - }) - .step('build', { agent: 'worker', task: 'Build the project' }) - .toConfig(); - - const steps = config.workflows![0].steps; - expect(steps).toHaveLength(2); - - // Deterministic step - expect(steps[0].name).toBe('read-files'); - expect(steps[0].type).toBe('deterministic'); - expect(steps[0].command).toBe('cat src/index.ts'); - expect(steps[0].agent).toBeUndefined(); - expect(steps[0].task).toBeUndefined(); - expect(steps[0].verification).toEqual({ type: 'exit_code', value: '0' }); - - // Agent step - expect(steps[1].name).toBe('build'); - expect(steps[1].agent).toBe('worker'); - expect(steps[1].task).toBe('Build the project'); - expect(steps[1].type).toBeUndefined(); - }); - - it('deterministic step with all options', () => { - const config = workflow('test') - .agent('worker', { cli: 'claude' }) - .step('run-cmd', { - type: 'deterministic', - command: 'npm test', - captureOutput: true, - failOnError: false, - dependsOn: ['build'], - timeoutMs: 30000, - }) - .step('final', { agent: 'worker', task: 'Finalize' }) - .toConfig(); - - const step = config.workflows![0].steps[0]; - expect(step.captureOutput).toBe(true); - expect(step.failOnError).toBe(false); - expect(step.dependsOn).toEqual(['build']); - expect(step.timeoutMs).toBe(30000); - }); - - it('worktree step emits correct config', () => { - const config = workflow('test') - .agent('worker', { cli: 'claude' }) - .step('setup-worktree', { - type: 'worktree', - branch: 'feature/new', - baseBranch: 'main', - path: '.worktrees/feature-new', - createBranch: true, - }) - .step('work', { agent: 'worker', task: 'Do work', dependsOn: ['setup-worktree'] }) - .toConfig(); - - const step = config.workflows![0].steps[0]; - expect(step.type).toBe('worktree'); - expect(step.branch).toBe('feature/new'); - expect(step.baseBranch).toBe('main'); - expect(step.path).toBe('.worktrees/feature-new'); - expect(step.createBranch).toBe(true); - expect(step.agent).toBeUndefined(); - expect(step.command).toBeUndefined(); - }); - - it('deterministic-only workflow does not require agents', () => { - const config = workflow('infra') - .step('lint', { type: 'deterministic', command: 'npm run lint' }) - .step('test', { - type: 'deterministic', - command: 'npm test', - dependsOn: ['lint'], - }) - .toConfig(); - - expect(config.agents).toHaveLength(0); - expect(config.workflows![0].steps).toHaveLength(2); - }); - - it('deterministic step without command throws', () => { - expect(() => { - workflow('test').step('bad', { type: 'deterministic' } as any); - }).toThrow('deterministic steps must have a command'); - }); - - it('deterministic step with agent throws', () => { - expect(() => { - workflow('test').step('bad', { type: 'deterministic', command: 'ls', agent: 'x', task: 'y' } as any); - }).toThrow('deterministic steps must not have agent or task'); - }); - - it('agent step without agent/task throws', () => { - expect(() => { - workflow('test').step('bad', {} as any); - }).toThrow('Agent steps must have both agent and task'); - }); - - it('agent steps without any agent definition throws', () => { - expect(() => { - workflow('test').step('work', { agent: 'worker', task: 'Do work' }).toConfig(); - }).toThrow('Workflow must have at least one agent when using agent steps'); - }); - - it('toYaml includes deterministic steps', () => { - const yamlStr = workflow('test').step('check', { type: 'deterministic', command: 'echo hello' }).toYaml(); - - expect(yamlStr).toContain('type: deterministic'); - expect(yamlStr).toContain('command: echo hello'); - }); - - it('preserves diagnosticAgent in agent step verification', () => { - const config = workflow('traceback') - .agent('generator', { cli: 'claude' }) - .agent('reviewer', { cli: 'claude' }) - .step('generate', { - agent: 'generator', - task: 'Implement the change', - verification: { - type: 'custom', - value: 'npx nango compile', - diagnosticAgent: 'reviewer', - }, - retries: 2, - }) - .toConfig(); - - expect(config.workflows?.[0].steps[0].verification).toEqual({ - type: 'custom', - value: 'npx nango compile', - diagnosticAgent: 'reviewer', - }); - }); - - it('throws when diagnosticAgent is not in the agents list', () => { - expect(() => { - workflow('traceback') - .agent('generator', { cli: 'claude' }) - .step('generate', { - agent: 'generator', - task: 'Implement the change', - verification: { - type: 'custom', - value: 'npx nango compile', - diagnosticAgent: 'reviewer', - }, - retries: 2, - }) - .toConfig(); - }).toThrow('Step "generate" references unknown diagnosticAgent "reviewer"'); - }); - - it('warns when diagnosticAgent is configured without step retries', () => { - const warnSpy = vi.spyOn(console, 'warn').mockImplementation(() => {}); - - workflow('traceback') - .agent('generator', { cli: 'claude' }) - .agent('reviewer', { cli: 'claude' }) - .step('generate', { - agent: 'generator', - task: 'Implement the change', - verification: { - type: 'custom', - value: 'npx nango compile', - diagnosticAgent: 'reviewer', - }, - }) - .toConfig(); - - expect(warnSpy).toHaveBeenCalledWith( - 'Step "generate": diagnosticAgent configured but no retries — diagnostic will never run' - ); - }); -}); diff --git a/packages/sdk/src/__tests__/completion-pipeline.test.ts b/packages/sdk/src/__tests__/completion-pipeline.test.ts deleted file mode 100644 index c6bbcd1d8..000000000 --- a/packages/sdk/src/__tests__/completion-pipeline.test.ts +++ /dev/null @@ -1,1895 +0,0 @@ -/** - * Completion Pipeline tests for Point-Person-Led Completion spec. - * - * Validates: - * 1. Evidence-based completion (verification passes without marker) - * 2. Owner decision parsing (OWNER_DECISION: COMPLETE/INCOMPLETE_RETRY/INCOMPLETE_FAIL) - * 3. Tolerant review parsing (accepts semantic equivalents) - * 4. Channel evidence contributions (WORKER_DONE signals) - * 5. Backward compatibility with marker-based workflows - * 6. Codex/Gemini/Supervisor pattern compatibility - * 7. Map-reduce workflows remain unaffected - */ - -import { describe, it, expect, vi, beforeEach } from 'vitest'; -import type { WorkflowDb } from '../workflows/runner.js'; -import type { - RelayYamlConfig, - WorkflowRunRow, - WorkflowStepRow, - WorkflowStepCompletionReason, - StepCompletionEvidence, - StepCompletionDecision, -} from '../workflows/types.js'; - -// ── Mock fetch to prevent real HTTP calls (Relaycast provisioning) ─────────── - -const mockFetch = vi.fn().mockResolvedValue({ - ok: true, - json: () => Promise.resolve({ data: { api_key: 'rk_live_test', workspace_id: 'ws-test' } }), - text: () => Promise.resolve(''), -}); -vi.stubGlobal('fetch', mockFetch); - -// ── Mock RelayCast SDK ─────────────────────────────────────────────────────── - -const mockRelaycastAgent = { - send: vi.fn().mockResolvedValue(undefined), - heartbeat: vi.fn().mockResolvedValue(undefined), - channels: { - create: vi.fn().mockResolvedValue(undefined), - join: vi.fn().mockResolvedValue(undefined), - invite: vi.fn().mockResolvedValue(undefined), - }, -}; - -const mockRelaycast = { - agents: { - register: vi.fn().mockResolvedValue({ token: 'token-1' }), - }, - as: vi.fn().mockReturnValue(mockRelaycastAgent), -}; - -class MockRelayError extends Error { - code: string; - constructor(code: string, message: string, status = 400) { - super(message); - this.code = code; - this.name = 'RelayError'; - (this as any).status = status; - } -} - -vi.mock('@relaycast/sdk', () => ({ - RelayCast: vi.fn().mockImplementation(() => mockRelaycast), - RelayError: MockRelayError, -})); - -// ── Mock AgentRelay ────────────────────────────────────────────────────────── - -let waitForExitFn: (ms?: number) => Promise<'exited' | 'timeout' | 'released'>; -let waitForIdleFn: (ms?: number) => Promise<'idle' | 'timeout' | 'exited'>; -let mockSpawnOutputs: string[] = []; - -vi.mock('node:child_process', async () => { - const actual = await vi.importActual('node:child_process'); - const { EventEmitter } = await import('node:events'); - - return { - ...actual, - spawn: vi.fn().mockImplementation(() => { - const child = new EventEmitter() as any; - child.pid = 4242; - child.kill = vi.fn(); - child.stdout = new EventEmitter(); - child.stderr = new EventEmitter(); - - const output = mockSpawnOutputs.shift() ?? ''; - queueMicrotask(() => { - if (output) child.stdout.emit('data', Buffer.from(output)); - child.emit('close', 0, null); - }); - - return child; - }), - }; -}); - -const mockAgent = { - name: 'test-agent-abc', - get waitForExit() { - return waitForExitFn; - }, - get waitForIdle() { - return waitForIdleFn; - }, - release: vi.fn().mockResolvedValue(undefined), -}; - -const mockHuman = { - name: 'WorkflowRunner', - sendMessage: vi.fn().mockResolvedValue(undefined), -}; - -function never(): Promise { - return new Promise(() => {}); -} - -const defaultSpawnPtyImplementation = async ({ name, task }: { name: string; task?: string }) => { - const queued = mockSpawnOutputs.shift(); - const stepComplete = task?.match(/STEP_COMPLETE:([^\n]+)/)?.[1]?.trim(); - const isReview = task?.includes('REVIEW_DECISION: APPROVE or REJECT'); - const output = - queued ?? - (isReview - ? 'REVIEW_DECISION: APPROVE\nREVIEW_REASON: looks good\n' - : stepComplete - ? `STEP_COMPLETE:${stepComplete}\n` - : 'STEP_COMPLETE:unknown\n'); - - queueMicrotask(() => { - emitRelayEvent('workerOutput', { name, chunk: output }); - }); - - return { ...mockAgent, name }; -}; - -const relayListeners = new Map void>>(); - -function emitRelayEvent(event: string, payload: any) { - const set = relayListeners.get(event); - if (!set) return; - for (const fn of set) fn(payload); -} - -const mockRelayInstance = { - spawnPty: vi.fn().mockImplementation(defaultSpawnPtyImplementation), - human: vi.fn().mockReturnValue(mockHuman), - shutdown: vi.fn().mockResolvedValue(undefined), - onBrokerStderr: vi.fn().mockReturnValue(() => {}), - addListener: vi.fn((event: string, fn: (...args: any[]) => void) => { - let set = relayListeners.get(event); - if (!set) { - set = new Set(); - relayListeners.set(event, set); - } - set.add(fn); - return () => { - set!.delete(fn); - }; - }), - listAgents: vi.fn().mockResolvedValue([]), - listAgentsRaw: vi.fn().mockResolvedValue([]), -}; - -let relayEventCounter = 0; - -function emitRelayChannelMessage(message: { from: string; to: string; text: string }) { - setTimeout(() => { - emitRelayEvent('messageReceived', { - eventId: `evt-${++relayEventCounter}`, - from: message.from, - to: message.to, - text: message.text, - threadId: undefined, - }); - }, 0); -} - -vi.mock('../relay.js', () => ({ - AgentRelay: vi.fn().mockImplementation(() => mockRelayInstance), -})); - -// Import after mocking -const { WorkflowRunner } = await import('../workflows/runner.js'); - -// ── Test fixtures ──────────────────────────────────────────────────────────── - -function makeDb(): WorkflowDb { - const runs = new Map(); - const steps = new Map(); - - return { - insertRun: vi.fn(async (run: WorkflowRunRow) => { - runs.set(run.id, { ...run }); - }), - updateRun: vi.fn(async (id: string, patch: Partial) => { - const existing = runs.get(id); - if (existing) runs.set(id, { ...existing, ...patch }); - }), - getRun: vi.fn(async (id: string) => { - const run = runs.get(id); - return run ? { ...run } : null; - }), - insertStep: vi.fn(async (step: WorkflowStepRow) => { - steps.set(step.id, { ...step }); - }), - updateStep: vi.fn(async (id: string, patch: Partial) => { - const existing = steps.get(id); - if (existing) steps.set(id, { ...existing, ...patch }); - }), - getStepsByRunId: vi.fn(async (runId: string) => { - return [...steps.values()].filter((s) => s.runId === runId); - }), - }; -} - -function makeConfig(overrides: Partial = {}): RelayYamlConfig { - return { - version: '1', - name: 'completion-pipeline-test', - swarm: { pattern: 'dag' }, - agents: [ - { name: 'agent-a', cli: 'claude' }, - { name: 'agent-b', cli: 'claude' }, - ], - workflows: [ - { - name: 'default', - steps: [ - { name: 'step-1', agent: 'agent-a', task: 'Do step 1' }, - { name: 'step-2', agent: 'agent-b', task: 'Do step 2', dependsOn: ['step-1'] }, - ], - }, - ], - trajectories: false, - ...overrides, - }; -} - -type WorkflowStepOverride = Partial[number]['steps'][number]>; - -function makeSupervisedConfig(stepOverrides: WorkflowStepOverride = {}): RelayYamlConfig { - return makeConfig({ - swarm: { pattern: 'hub-spoke' }, - agents: [ - { name: 'specialist', cli: 'claude', role: 'engineer' }, - { name: 'team-lead', cli: 'claude', role: 'lead coordinator' }, - { name: 'reviewer-1', cli: 'claude', role: 'reviewer' }, - ], - workflows: [ - { - name: 'default', - steps: [ - { - name: 'step-1', - agent: 'specialist', - task: 'Implement the requested change', - ...stepOverrides, - }, - ], - }, - ], - }); -} - -function makeTwoStepSupervisedConfig(): RelayYamlConfig { - return makeConfig({ - swarm: { pattern: 'hub-spoke' }, - agents: [ - { name: 'specialist-a', cli: 'claude', role: 'engineer' }, - { name: 'specialist-b', cli: 'claude', role: 'engineer' }, - { name: 'team-lead', cli: 'claude', role: 'lead coordinator' }, - { name: 'reviewer-1', cli: 'claude', role: 'reviewer' }, - ], - workflows: [ - { - name: 'default', - steps: [ - { name: 'step-1', agent: 'specialist-a', task: 'Do step 1' }, - { name: 'step-2', agent: 'specialist-b', task: 'Do step 2', dependsOn: ['step-1'] }, - ], - }, - ], - }); -} - -function makeChannelSupervisedConfig( - channel: string, - stepOverrides: WorkflowStepOverride = {} -): RelayYamlConfig { - const config = makeSupervisedConfig(stepOverrides); - config.swarm = { ...config.swarm, channel }; - return config; -} - -async function getStepRow( - db: WorkflowDb, - runId: string, - stepName: string -): Promise { - const steps = await db.getStepsByRunId(runId); - return steps.find((step) => step.stepName === stepName); -} - -// ── Tests ──────────────────────────────────────────────────────────────────── - -describe('Completion Pipeline', () => { - let db: WorkflowDb; - let runner: InstanceType; - - beforeEach(() => { - vi.clearAllMocks(); - relayEventCounter = 0; - waitForExitFn = vi.fn().mockResolvedValue('exited'); - waitForIdleFn = vi.fn().mockImplementation(() => never()); - mockSpawnOutputs = []; - mockAgent.release.mockResolvedValue(undefined); - mockRelayInstance.spawnPty.mockImplementation(defaultSpawnPtyImplementation); - relayListeners.clear(); - db = makeDb(); - runner = new WorkflowRunner({ db, workspaceId: 'ws-test' }); - }); - - // ── Unit Test 1: Verification passes without marker ─────────────────── - - describe('evidence-based completion without marker', () => { - it('should complete step when verification passes but STEP_COMPLETE marker is missing', async () => { - // Worker output contains the verification target but no STEP_COMPLETE marker - mockSpawnOutputs = [ - 'worker output with expected content\n', - 'Owner observed the work is done\nSTEP_COMPLETE:step-1\n', - 'REVIEW_DECISION: APPROVE\nREVIEW_REASON: verified\n', - ]; - - const config = makeSupervisedConfig({ - verification: { type: 'output_contains', value: 'expected content' }, - }); - - const run = await runner.execute(config, 'default'); - expect(run.status).toBe('completed'); - }, 15000); - - it('should complete self-owned step when verification passes without marker', async () => { - // Agent output has verified content but no STEP_COMPLETE marker - // With the completion pipeline, verification passing should be sufficient - mockSpawnOutputs = [ - 'All tests passed\nBuild successful\nSTEP_COMPLETE:step-1\n', - 'REVIEW_DECISION: APPROVE\nREVIEW_REASON: tests pass\n', - ]; - - const config = makeConfig({ - workflows: [ - { - name: 'default', - steps: [ - { - name: 'step-1', - agent: 'agent-a', - task: 'Run tests', - verification: { type: 'output_contains', value: 'All tests passed' }, - }, - ], - }, - ], - }); - - const run = await runner.execute(config, 'default'); - expect(run.status).toBe('completed'); - }, 15000); - }); - - // ── Unit Test 2: Owner approves despite malformed worker marker ──────── - - describe('owner decision overrides malformed markers', () => { - it('should complete step when owner approves despite malformed worker marker', async () => { - // Worker outputs a malformed marker, but owner's STEP_COMPLETE is correct - mockSpawnOutputs = [ - 'STEP_COMPLET:step-1\n', // typo in worker marker - 'Checked worker output, work is done\nSTEP_COMPLETE:step-1\n', - 'REVIEW_DECISION: APPROVE\nREVIEW_REASON: owner confirmed\n', - ]; - - const run = await runner.execute(makeSupervisedConfig(), 'default'); - expect(run.status).toBe('completed'); - }, 15000); - - it('should complete when owner provides OWNER_DECISION: COMPLETE', async () => { - // Owner uses the structured decision format - mockSpawnOutputs = [ - 'worker finished work\n', - 'OWNER_DECISION: COMPLETE\nREASON: verified artifacts\nSTEP_COMPLETE:step-1\n', - 'REVIEW_DECISION: APPROVE\nREVIEW_REASON: owner confirmed\n', - ]; - - const run = await runner.execute(makeSupervisedConfig(), 'default'); - expect(run.status).toBe('completed'); - }, 15000); - }); - - // ── Unit Test 3: Owner requests retry via OWNER_DECISION ────────────── - - describe('owner decision retry', () => { - it('should fail with a clear error when owner requests INCOMPLETE_RETRY and retries are disabled', async () => { - mockSpawnOutputs = [ - 'worker first attempt\n', - 'OWNER_DECISION: INCOMPLETE_RETRY\nREASON: missing error handling\n', - ]; - - const run = await runner.execute(makeSupervisedConfig({ retries: 0 }), 'default'); - expect(run.status).toBe('failed'); - expect(run.error).toContain('no retries are configured (maxRetries=0)'); - expect(run.error).toContain('OWNER_DECISION: INCOMPLETE_RETRY'); - - const steps = await db.getStepsByRunId(run.id); - expect(steps).toHaveLength(1); - expect(steps[0]?.status).toBe('failed'); - expect(steps[0]?.completionReason).toBe('retry_requested_by_owner'); - expect(mockRelayInstance.spawnPty).toHaveBeenCalledTimes(2); - }, 15000); - - it('should retry and complete when owner requests INCOMPLETE_RETRY and retries remain', async () => { - const retryEvents: Array<{ type: string; stepName: string }> = []; - runner.on((event) => { - if (event.type === 'step:retrying') { - retryEvents.push({ type: event.type, stepName: event.stepName }); - } - }); - - // First attempt: owner requests retry - // Second attempt: owner approves - mockSpawnOutputs = [ - 'worker first attempt\n', - 'OWNER_DECISION: INCOMPLETE_RETRY\nREASON: missing error handling\n', - 'worker second attempt with error handling\n', - 'STEP_COMPLETE:step-1\n', - 'REVIEW_DECISION: APPROVE\nREVIEW_REASON: retry succeeded\n', - ]; - - const config = makeSupervisedConfig({ retries: 1 }); - const run = await runner.execute(config, 'default'); - - expect(run.status).toBe('completed'); - expect(retryEvents).toEqual([{ type: 'step:retrying', stepName: 'step-1' }]); - - const steps = await db.getStepsByRunId(run.id); - expect(steps).toHaveLength(1); - expect(steps[0]?.status).toBe('completed'); - expect(steps[0]?.retryCount).toBe(1); - expect(mockRelayInstance.spawnPty).toHaveBeenCalledTimes(5); - }, 15000); - - it('should fail after retries are exhausted when owner keeps requesting INCOMPLETE_RETRY', async () => { - mockSpawnOutputs = [ - 'worker first attempt\n', - 'OWNER_DECISION: INCOMPLETE_RETRY\nREASON: missing tests\n', - 'worker second attempt\n', - 'OWNER_DECISION: INCOMPLETE_RETRY\nREASON: still missing tests\n', - ]; - - const run = await runner.execute(makeSupervisedConfig({ retries: 1 }), 'default'); - - expect(run.status).toBe('failed'); - expect(run.error).toContain('retry budget is exhausted (maxRetries=1)'); - expect(run.error).toContain('after 2 total attempts'); - - const steps = await db.getStepsByRunId(run.id); - expect(steps).toHaveLength(1); - expect(steps[0]?.status).toBe('failed'); - expect(steps[0]?.completionReason).toBe('retry_requested_by_owner'); - expect(steps[0]?.retryCount).toBe(1); - expect(mockRelayInstance.spawnPty).toHaveBeenCalledTimes(4); - }, 15000); - - it('should honor INCOMPLETE_RETRY from a non-interactive reviewer step', async () => { - const localDb = makeDb(); - runner = new WorkflowRunner({ db: localDb, workspaceId: 'ws-test' }); - mockSpawnOutputs = ['OWNER_DECISION: INCOMPLETE_RETRY\nREASON: explicit retry requested\n']; - - const run = await runner.execute( - makeConfig({ - agents: [{ name: 'reviewer', cli: 'claude', preset: 'reviewer' }], - workflows: [ - { - name: 'default', - steps: [ - { - name: 'review-step', - agent: 'reviewer', - task: 'Review the artifact and decide whether to retry.', - verification: { type: 'output_contains', value: 'OWNER_DECISION: INCOMPLETE_RETRY' }, - }, - ], - }, - ], - }), - 'default' - ); - - expect(run.status).toBe('failed'); - expect(run.error).toContain('owner requested another attempt'); - - const steps = await localDb.getStepsByRunId(run.id); - expect(steps).toHaveLength(1); - expect(steps[0]?.status).toBe('failed'); - expect(steps[0]?.completionReason).toBe('retry_requested_by_owner'); - }, 15000); - - it('should not complete a self-owned step when INCOMPLETE_RETRY conflicts with success signals', async () => { - mockSpawnOutputs = [ - [ - 'OWNER_DECISION: INCOMPLETE_RETRY', - 'REASON: owner wants another verification pass', - 'STEP_COMPLETE:step-1', - 'expected content', - 'verified locally', - ].join('\n'), - ]; - - const run = await runner.execute( - makeConfig({ - workflows: [ - { - name: 'default', - steps: [ - { - name: 'step-1', - agent: 'agent-a', - task: 'Run tests', - retries: 0, - verification: { type: 'output_contains', value: 'expected content' }, - }, - ], - }, - ], - }), - 'default' - ); - - expect(run.status).toBe('failed'); - expect(run.error).toContain('no retries are configured (maxRetries=0)'); - - const steps = await db.getStepsByRunId(run.id); - expect(steps).toHaveLength(1); - expect(steps[0]?.status).toBe('failed'); - expect(steps[0]?.completionReason).toBe('retry_requested_by_owner'); - expect(mockRelayInstance.spawnPty).toHaveBeenCalledTimes(1); - }, 15000); - - it('should not let passing verification override INCOMPLETE_RETRY', async () => { - mockSpawnOutputs = [ - 'worker output with expected content\n', - [ - 'OWNER_DECISION: INCOMPLETE_RETRY', - 'REASON: missing WORKER_DONE marker', - 'verified artifacts after inspecting output', - 'worker finished implementation', - ].join('\n'), - ]; - - const run = await runner.execute( - makeSupervisedConfig({ - verification: { type: 'output_contains', value: 'expected content' }, - }), - 'default' - ); - - expect(run.status).toBe('failed'); - expect(mockRelayInstance.spawnPty).toHaveBeenCalledTimes(2); - }, 15000); - - it('should not let passing verification override NEEDS_CLARIFICATION', async () => { - mockSpawnOutputs = [ - 'worker output with expected content\n', - [ - 'OWNER_DECISION: NEEDS_CLARIFICATION', - 'REASON: owner needs proof of the channel handoff', - 'verified artifacts after inspecting output', - ].join('\n'), - ]; - - const run = await runner.execute( - makeSupervisedConfig({ - verification: { type: 'output_contains', value: 'expected content' }, - }), - 'default' - ); - - expect(run.status).toBe('failed'); - expect(mockRelayInstance.spawnPty).toHaveBeenCalledTimes(2); - }, 15000); - }); - - // ── Unit Test 4: Owner rejects AND verification fails ───────────────── - - describe('double failure: owner reject + verification fail', () => { - it('should fail step when owner rejects AND verification also fails', async () => { - mockSpawnOutputs = [ - 'worker output without expected content\n', - 'OWNER_DECISION: INCOMPLETE_FAIL\nREASON: work is wrong\n', - ]; - - const config = makeSupervisedConfig({ - verification: { type: 'output_contains', value: 'expected output' }, - }); - - const run = await runner.execute(config, 'default'); - expect(run.status).toBe('failed'); - }, 15000); - - it('should fail when owner rejects even if verification passes', async () => { - mockSpawnOutputs = [ - 'worker output with expected content\n', - [ - 'OWNER_DECISION: INCOMPLETE_FAIL', - 'REASON: work is incomplete without WORKER_DONE proof', - 'artifacts verified locally', - 'worker finished implementation', - ].join('\n'), - ]; - - const run = await runner.execute( - makeSupervisedConfig({ - verification: { type: 'output_contains', value: 'expected content' }, - }), - 'default' - ); - - expect(run.status).toBe('failed'); - expect(mockRelayInstance.spawnPty).toHaveBeenCalledTimes(2); - }, 15000); - - it('should mark the run failed even with errorHandling.strategy=continue when a step fails', async () => { - // Regression: previously `allCompleted` counted failed steps as success - // whenever continueOnError was true, so the summary table would render - // "FAILED 1 passed, 1 failed" while run.status landed on 'completed'. - // Any wrapper that keys off run.status (e.g. the cloud orchestrator's - // bootstrap) would then propagate a false success. - mockSpawnOutputs = [ - 'worker output\n', - 'OWNER_DECISION: INCOMPLETE_FAIL\nREASON: relaycast unavailable\n', - ]; - - const config: RelayYamlConfig = { - ...makeSupervisedConfig({}), - errorHandling: { strategy: 'continue' }, - }; - - const run = await runner.execute(config, 'default'); - - expect(run.status).toBe('failed'); - const steps = await db.getStepsByRunId(run.id); - expect(steps[0]?.status).toBe('failed'); - expect(steps[0]?.completionReason).toBe('failed_owner_decision'); - }, 15000); - - it('should still complete by owner decision when COMPLETE and verification both pass', async () => { - mockSpawnOutputs = [ - 'worker output with expected content\n', - 'OWNER_DECISION: COMPLETE\nREASON: verified artifacts\n', - 'REVIEW_DECISION: APPROVE\nREVIEW_REASON: owner confirmed\n', - ]; - - const run = await runner.execute( - makeSupervisedConfig({ - verification: { type: 'output_contains', value: 'expected content' }, - }), - 'default' - ); - - expect(run.status).toBe('completed'); - const [step] = await db.getStepsByRunId(run.id); - expect(step?.completionReason).toBe('completed_by_owner_decision'); - }, 15000); - - it('should fail verification before accepting OWNER_DECISION COMPLETE', async () => { - mockSpawnOutputs = [ - 'worker output without the required token\n', - 'OWNER_DECISION: COMPLETE\nREASON: verified artifacts\n', - ]; - - const run = await runner.execute( - makeSupervisedConfig({ - verification: { type: 'output_contains', value: 'expected content' }, - }), - 'default' - ); - - expect(run.status).toBe('failed'); - expect(mockRelayInstance.spawnPty).toHaveBeenCalledTimes(2); - }, 15000); - - it('should still complete as verified when no owner decision is provided and verification passes', async () => { - mockSpawnOutputs = [ - 'worker output with expected content\n', - 'Owner checked the output and left no structured decision.\n', - 'REVIEW_DECISION: APPROVE\nREVIEW_REASON: verification passed\n', - ]; - - const run = await runner.execute( - makeSupervisedConfig({ - verification: { type: 'output_contains', value: 'expected content' }, - }), - 'default' - ); - - expect(run.status).toBe('completed'); - const [step] = await db.getStepsByRunId(run.id); - expect(step?.completionReason).toBe('completed_verified'); - }, 15000); - }); - - // ── Unit Test 5: Tolerant review parser ──────────────────────────────── - - describe('tolerant review parsing', () => { - it('should accept standard REVIEW_DECISION: APPROVE format', async () => { - const events: Array<{ type: string; decision?: string }> = []; - runner.on((event) => { - if (event.type === 'step:review-completed') { - events.push({ type: event.type, decision: event.decision }); - } - }); - - mockSpawnOutputs = [ - 'worker finished\n', - 'STEP_COMPLETE:step-1\n', - 'REVIEW_DECISION: APPROVE\nREVIEW_REASON: all good\n', - ]; - - const run = await runner.execute(makeSupervisedConfig(), 'default'); - expect(run.status).toBe('completed'); - expect(events).toContainEqual({ type: 'step:review-completed', decision: 'approved' }); - }, 15000); - - it('should accept standard REVIEW_DECISION: REJECT format', async () => { - const events: Array<{ type: string; decision?: string }> = []; - runner.on((event) => { - if (event.type === 'step:review-completed') { - events.push({ type: event.type, decision: event.decision }); - } - }); - - mockSpawnOutputs = [ - 'worker finished\n', - 'STEP_COMPLETE:step-1\n', - 'REVIEW_DECISION: REJECT\nREVIEW_REASON: needs work\n', - ]; - - const run = await runner.execute(makeSupervisedConfig(), 'default'); - expect(run.status).toBe('failed'); - expect(run.error).toContain('review rejected'); - expect(events).toContainEqual({ type: 'step:review-completed', decision: 'rejected' }); - }, 15000); - - // These tests validate the tolerant parser once it's implemented. - // The tolerant parser should accept semantic equivalents. - - it('should still fail on review output with no usable approval or rejection signal', async () => { - mockSpawnOutputs = [ - 'worker finished\n', - 'STEP_COMPLETE:step-1\n', - 'I need more context before deciding.\n', - ]; - - const run = await runner.execute(makeSupervisedConfig(), 'default'); - expect(run.status).toBe('failed'); - expect(run.error).toContain('review response malformed'); - }, 15000); - }); - - // ── Unit Test 6: Channel evidence ───────────────────────────────────── - - describe('channel evidence for completion', () => { - it('should capture WORKER_DONE signals from channel messages', async () => { - // Worker posts done signal, owner observes and confirms - mockSpawnOutputs = [ - 'WORKER_DONE: all tasks completed\n', - 'Worker reported done on channel, verified artifacts\nSTEP_COMPLETE:step-1\n', - 'REVIEW_DECISION: APPROVE\nREVIEW_REASON: channel evidence confirms\n', - ]; - - const run = await runner.execute(makeSupervisedConfig(), 'default'); - expect(run.status).toBe('completed'); - - // Verify the channel received the worker done signal - const channelMessages = (mockRelaycastAgent.send as any).mock.calls.map( - ([, text]: [string, string]) => text - ); - expect(channelMessages.some((text: string) => text.includes('WORKER_DONE'))).toBe(true); - - const evidence = runner.getStepCompletionEvidence('step-1'); - const workerDoneSignals = - evidence?.coordinationSignals.filter( - (signal) => signal.kind === 'worker_done' && signal.source === 'channel' - ) ?? []; - expect(workerDoneSignals.some((signal) => signal.sender === 'specialist')).toBe(true); - }, 15000); - - it('should forward worker channel evidence to the owner prompt', async () => { - mockSpawnOutputs = [ - 'implementation complete\nWORKER_DONE: finished feature\n', - 'Observed WORKER_DONE on channel\nSTEP_COMPLETE:step-1\n', - 'REVIEW_DECISION: APPROVE\nREVIEW_REASON: looks good\n', - ]; - - const run = await runner.execute(makeSupervisedConfig(), 'default'); - expect(run.status).toBe('completed'); - }, 15000); - - it('should not count lead-authored WORKER_DONE channel posts as worker completion evidence', async () => { - waitForExitFn = vi.fn().mockImplementation(async () => { - await new Promise((resolve) => setTimeout(resolve, 5)); - return 'exited'; - }); - mockRelayInstance.spawnPty.mockImplementation( - async ({ name, task }: { name: string; task?: string }) => { - const agent = await defaultSpawnPtyImplementation({ name, task }); - if (task?.includes('You are the step owner/supervisor for step "step-1".')) { - emitRelayChannelMessage({ - from: agent.name, - to: 'completion-provenance', - text: 'WORKER_DONE: lead summarized the handoff', - }); - } - return agent; - } - ); - mockSpawnOutputs = [ - 'worker progress update only\n', - 'Owner observed the channel but left no decision.\n', - ]; - - const config = makeSupervisedConfig(); - config.swarm = { ...config.swarm, channel: 'completion-provenance' }; - - const run = await runner.execute(config, 'default'); - expect(run.status).toBe('failed'); - expect(run.error).toContain('owner completion decision missing'); - await new Promise((resolve) => setTimeout(resolve, 0)); - - const evidence = runner.getStepCompletionEvidence('step-1'); - const spoofedPosts = - evidence?.channelPosts.filter( - (post) => post.sender === 'team-lead' && post.text.includes('WORKER_DONE') - ) ?? []; - expect(spoofedPosts.length).toBeGreaterThan(0); - expect( - evidence?.coordinationSignals.filter((signal) => signal.kind === 'worker_done') ?? [] - ).toHaveLength(0); - const spoofedPost = evidence?.channelPosts.find( - (post) => post.sender === 'team-lead' && post.text.includes('WORKER_DONE') - ); - expect(spoofedPost?.signals.some((signal) => signal.kind === 'worker_done') ?? false).toBe(false); - }, 15000); - - it('should filter wrong-agent coordination signals from the evidence view', async () => { - mockSpawnOutputs = [ - 'LEAD_DONE: worker cannot declare lead completion\nWORKER_DONE: all tasks completed\n', - 'Owner confirmed\nSTEP_COMPLETE:step-1\n', - 'REVIEW_DECISION: APPROVE\nREVIEW_REASON: verified\n', - ]; - - const run = await runner.execute(makeSupervisedConfig(), 'default'); - expect(run.status).toBe('completed'); - - const evidence = runner.getStepCompletionEvidence('step-1'); - expect(evidence?.coordinationSignals.filter((signal) => signal.kind === 'lead_done')).toHaveLength(0); - expect( - evidence?.coordinationSignals.some( - (signal) => signal.kind === 'worker_done' && signal.sender === 'specialist' - ) - ).toBe(true); - }, 15000); - }); - - describe('happy-path lead-worker workflow proof', () => { - it('should complete by evidence when the worker posts WORKER_DONE on the channel', async () => { - const channel = 'happy-path-worker-done'; - waitForExitFn = vi.fn().mockImplementation(async () => { - await new Promise((resolve) => setTimeout(resolve, 5)); - return 'exited'; - }); - mockRelayInstance.spawnPty.mockImplementation( - async ({ name, task }: { name: string; task?: string }) => { - const agent = await defaultSpawnPtyImplementation({ name, task }); - if (name.includes('step-1-worker')) { - emitRelayChannelMessage({ - from: agent.name, - to: channel, - text: 'WORKER_DONE: implementation shipped', - }); - } - return agent; - } - ); - mockSpawnOutputs = [ - 'artifact bundle ready\n', - 'Lead verified the worker handoff is complete and safe.\n', - ]; - - const run = await runner.execute(makeChannelSupervisedConfig(channel), 'default'); - - expect(run.status).toBe('completed'); - const step = await getStepRow(db, run.id, 'step-1'); - expect(step?.completionReason).toBe('completed_by_evidence'); - - const evidence = runner.getStepCompletionEvidence('step-1'); - expect( - evidence?.coordinationSignals.some( - (signal) => - signal.kind === 'worker_done' && signal.source === 'channel' && signal.sender === 'specialist' - ) - ).toBe(true); - expect(evidence?.coordinationSignals.some((signal) => signal.kind === 'step_complete')).toBe(false); - }, 15000); - - it('should capture WORKER_DONE plus LEAD_DONE and complete cleanly', async () => { - const channel = 'happy-path-lead-worker-done'; - waitForExitFn = vi.fn().mockImplementation(async () => { - await new Promise((resolve) => setTimeout(resolve, 5)); - return 'exited'; - }); - mockRelayInstance.spawnPty.mockImplementation( - async ({ name, task }: { name: string; task?: string }) => { - const agent = await defaultSpawnPtyImplementation({ name, task }); - if (name.includes('step-1-worker')) { - emitRelayChannelMessage({ - from: agent.name, - to: channel, - text: 'WORKER_DONE: handoff package posted', - }); - } - if (name.includes('step-1-owner')) { - emitRelayChannelMessage({ - from: agent.name, - to: channel, - text: 'LEAD_DONE: lead confirmed the worker handoff', - }); - } - return agent; - } - ); - mockSpawnOutputs = [ - 'artifact bundle ready\n', - 'Lead confirmed the handoff is complete and safe for review.\n', - ]; - - const run = await runner.execute(makeChannelSupervisedConfig(channel), 'default'); - - expect(run.status).toBe('completed'); - const step = await getStepRow(db, run.id, 'step-1'); - expect(step?.completionReason).toBe('completed_by_evidence'); - - const evidence = runner.getStepCompletionEvidence('step-1'); - expect( - evidence?.coordinationSignals.some( - (signal) => - signal.kind === 'worker_done' && signal.source === 'channel' && signal.sender === 'specialist' - ) - ).toBe(true); - expect( - evidence?.coordinationSignals.some( - (signal) => - signal.kind === 'lead_done' && signal.source === 'channel' && signal.sender === 'team-lead' - ) - ).toBe(true); - }, 15000); - - it('should complete as verified when lead-worker verification passes without coordination markers', async () => { - mockSpawnOutputs = [ - 'worker output with expected content\n', - 'Lead checked the implementation and found it correct.\n', - ]; - - const run = await runner.execute( - makeSupervisedConfig({ verification: { type: 'output_contains', value: 'expected content' } }), - 'default' - ); - - expect(run.status).toBe('completed'); - const step = await getStepRow(db, run.id, 'step-1'); - expect(step?.completionReason).toBe('completed_verified'); - - const evidence = runner.getStepCompletionEvidence('step-1'); - expect(evidence?.coordinationSignals.some((signal) => signal.kind === 'worker_done')).toBe(false); - expect(evidence?.coordinationSignals.some((signal) => signal.kind === 'lead_done')).toBe(false); - }, 15000); - - it('should complete multiple supervised workers in sequence for a map-reduce style flow', async () => { - const channel = 'happy-path-map-reduce'; - waitForExitFn = vi.fn().mockImplementation(async () => { - await new Promise((resolve) => setTimeout(resolve, 5)); - return 'exited'; - }); - mockRelayInstance.spawnPty.mockImplementation( - async ({ name, task }: { name: string; task?: string }) => { - const isReview = task?.includes('REVIEW_DECISION: APPROVE or REJECT'); - const output = isReview - ? 'REVIEW_DECISION: APPROVE\nREVIEW_REASON: map-reduce happy path verified\n' - : name.includes('map-1-worker') - ? 'map artifact A ready\n' - : name.includes('map-1-owner') - ? 'Lead verified shard A is complete and safe.\n' - : name.includes('map-2-worker') - ? 'map artifact B ready\n' - : name.includes('map-2-owner') - ? 'Lead verified shard B is complete and safe.\n' - : name.includes('reduce-worker') - ? 'reduce artifact ready\n' - : name.includes('reduce-owner') - ? 'Lead verified the reduction is complete and safe.\n' - : 'STEP_COMPLETE:unknown\n'; - - queueMicrotask(() => { - emitRelayEvent('workerOutput', { name, chunk: output }); - }); - - const agent = { ...mockAgent, name }; - if (name.includes('map-1-worker')) { - emitRelayChannelMessage({ - from: agent.name, - to: channel, - text: 'WORKER_DONE: map shard A complete', - }); - } - if (name.includes('map-2-worker')) { - emitRelayChannelMessage({ - from: agent.name, - to: channel, - text: 'WORKER_DONE: map shard B complete', - }); - } - if (name.includes('reduce-worker')) { - emitRelayChannelMessage({ - from: agent.name, - to: channel, - text: 'WORKER_DONE: reduce pass complete', - }); - } - return agent; - } - ); - - const config = makeConfig({ - swarm: { pattern: 'map-reduce', channel }, - agents: [ - { name: 'mapper-1', cli: 'claude', role: 'engineer' }, - { name: 'mapper-2', cli: 'claude', role: 'engineer' }, - { name: 'reducer', cli: 'claude', role: 'engineer' }, - { name: 'team-lead', cli: 'claude', role: 'lead coordinator' }, - { name: 'reviewer-1', cli: 'claude', role: 'reviewer' }, - ], - workflows: [ - { - name: 'default', - steps: [ - { name: 'map-1', agent: 'mapper-1', task: 'Process shard A' }, - { name: 'map-2', agent: 'mapper-2', task: 'Process shard B' }, - { - name: 'reduce', - agent: 'reducer', - task: 'Combine mapped results', - dependsOn: ['map-1', 'map-2'], - }, - ], - }, - ], - }); - - const run = await runner.execute(config, 'default'); - - expect(run.status).toBe('completed'); - const steps = await db.getStepsByRunId(run.id); - expect(steps.map((step) => step.stepName)).toEqual(['map-1', 'map-2', 'reduce']); - expect(steps.map((step) => step.status)).toEqual(['completed', 'completed', 'completed']); - expect(steps.map((step) => step.completionReason)).toEqual([ - 'completed_by_evidence', - 'completed_by_evidence', - 'completed_by_evidence', - ]); - expect( - runner - .getStepCompletionEvidence('reduce') - ?.coordinationSignals.some( - (signal) => - signal.kind === 'worker_done' && signal.source === 'channel' && signal.sender === 'reducer' - ) - ).toBe(true); - }, 15000); - - it('should still complete when WORKER_DONE lands after the lead checks the work', async () => { - const channel = 'happy-path-delayed-worker-done'; - const observedOrder: string[] = []; - mockRelayInstance.spawnPty.mockImplementation( - async ({ name, task }: { name: string; task?: string }) => { - const agent = await defaultSpawnPtyImplementation({ name, task }); - - if (name.includes('step-1-worker')) { - setTimeout(() => { - observedOrder.push('worker-done-message'); - emitRelayChannelMessage({ - from: agent.name, - to: channel, - text: 'WORKER_DONE: delayed handoff posted', - }); - }, 10); - return { - ...agent, - waitForExit: vi.fn().mockImplementation(async () => { - await new Promise((resolve) => setTimeout(resolve, 15)); - return 'exited' as const; - }), - }; - } - - if (name.includes('step-1-owner')) { - return { - ...agent, - waitForExit: vi.fn().mockImplementation(async () => { - observedOrder.push('owner-finished-check'); - return 'exited' as const; - }), - }; - } - - return agent; - } - ); - mockSpawnOutputs = [ - 'artifact bundle ready but handoff signal is delayed\n', - 'Lead checked the artifacts early and the work still looks complete and safe.\n', - ]; - - const run = await runner.execute(makeChannelSupervisedConfig(channel), 'default'); - - expect(run.status).toBe('completed'); - expect(observedOrder).toEqual(['owner-finished-check', 'worker-done-message']); - - const step = await getStepRow(db, run.id, 'step-1'); - expect(step?.completionReason).toBe('completed_by_evidence'); - expect( - runner - .getStepCompletionEvidence('step-1') - ?.coordinationSignals.some( - (signal) => - signal.kind === 'worker_done' && - signal.source === 'channel' && - signal.value === 'delayed handoff posted' - ) - ).toBe(true); - }, 15000); - }); - - // ── Integration Test 1: Codex lead/worker without marker ────────────── - - describe('Codex lead/worker completion', () => { - it('should complete when codex lead omits STEP_COMPLETE but owner logic still completes', async () => { - // Codex agents use `codex exec` and may not emit the exact marker. - // With a verification gate, the step should still complete. - mockSpawnOutputs = [ - 'worker: implemented the feature\n', - 'Lead verified: all changes look correct\nSTEP_COMPLETE:step-1\n', - 'REVIEW_DECISION: APPROVE\nREVIEW_REASON: verified\n', - ]; - - const config = makeSupervisedConfig(); - // Override to codex CLI - config.agents = [ - { name: 'specialist', cli: 'codex', role: 'engineer' }, - { name: 'team-lead', cli: 'codex', role: 'lead coordinator' }, - { name: 'reviewer-1', cli: 'claude', role: 'reviewer' }, - ]; - - const run = await runner.execute(config, 'default'); - expect(run.status).toBe('completed'); - expect( - mockRelayInstance.spawnPty.mock.calls.some( - ([input]) => - input.cli === 'codex' && - Array.isArray(input.args) && - input.args.includes('--dangerously-bypass-approvals-and-sandbox') - ) - ).toBe(true); - }, 15000); - }); - - // ── Integration Test 2: Gemini lead/worker with channel completion ──── - - describe('Gemini lead/worker with channel completion', () => { - it('should complete when gemini worker posts channel completion and owner finalizes', async () => { - mockSpawnOutputs = [ - 'Worker output: feature implemented\nWORKER_DONE: task complete\n', - 'Observed worker completion on channel\nSTEP_COMPLETE:step-1\n', - 'REVIEW_DECISION: APPROVE\nREVIEW_REASON: channel evidence\n', - ]; - - const config = makeSupervisedConfig(); - config.agents = [ - { name: 'specialist', cli: 'gemini', role: 'engineer' }, - { name: 'team-lead', cli: 'gemini', role: 'lead coordinator' }, - { name: 'reviewer-1', cli: 'claude', role: 'reviewer' }, - ]; - - const run = await runner.execute(config, 'default'); - expect(run.status).toBe('completed'); - }, 15000); - }); - - // ── Integration Test 3: Supervisor without exact review sentinel ─────── - - describe('Supervisor workflow completion', () => { - it('should complete supervised step with standard review flow', async () => { - mockSpawnOutputs = [ - 'worker built the feature\n', - 'Verified: code passes tests\nSTEP_COMPLETE:step-1\n', - 'REVIEW_DECISION: APPROVE\nREVIEW_REASON: correct implementation\n', - ]; - - const run = await runner.execute(makeSupervisedConfig(), 'default'); - expect(run.status).toBe('completed'); - }, 15000); - }); - - // ── Integration Test 4: Map-reduce workflow remains unaffected ───────── - - describe('Map-reduce workflow backward compatibility', () => { - it('should complete map-reduce workflow with standard markers', async () => { - const config = makeConfig({ - swarm: { pattern: 'map-reduce' }, - agents: [ - { name: 'mapper-1', cli: 'claude' }, - { name: 'mapper-2', cli: 'claude' }, - { name: 'reducer', cli: 'claude' }, - ], - workflows: [ - { - name: 'default', - steps: [ - { name: 'map-1', agent: 'mapper-1', task: 'Process chunk A' }, - { name: 'map-2', agent: 'mapper-2', task: 'Process chunk B' }, - { name: 'reduce', agent: 'reducer', task: 'Combine results', dependsOn: ['map-1', 'map-2'] }, - ], - }, - ], - }); - - const run = await runner.execute(config, 'default'); - expect(run.status).toBe('completed'); - }, 15000); - }); - - // ── Integration Test 5: Legacy marker-based workflows ───────────────── - - describe('Legacy marker-based workflows', () => { - it('should still complete with explicit STEP_COMPLETE marker (backward compat)', async () => { - // The classic marker-based flow should continue to work unchanged - const run = await runner.execute(makeConfig(), 'default'); - expect(run.status).toBe('completed'); - }, 15000); - - it('should still fail when marker, owner decision, and evidence are all missing', async () => { - mockSpawnOutputs = ['Did the work but no marker\n']; - const run = await runner.execute(makeConfig(), 'default'); - expect(run.status).toBe('failed'); - expect(run.error).toContain('owner completion decision missing'); - }, 15000); - - it('should still support explicit REVIEW_DECISION: APPROVE flow', async () => { - mockSpawnOutputs = [ - 'STEP_COMPLETE:step-1\n', - 'REVIEW_DECISION: APPROVE\nREVIEW_REASON: standard approval\n', - ]; - - const events: Array<{ type: string; decision?: string }> = []; - runner.on((event) => { - if (event.type === 'step:review-completed') { - events.push({ type: event.type, decision: event.decision }); - } - }); - - mockSpawnOutputs = [ - 'worker finished\n', - 'STEP_COMPLETE:step-1\n', - 'REVIEW_DECISION: APPROVE\nREVIEW_REASON: legacy approval\n', - 'worker finished step 2\n', - 'STEP_COMPLETE:step-2\n', - 'REVIEW_DECISION: APPROVE\nREVIEW_REASON: legacy approval step 2\n', - ]; - - const run = await runner.execute(makeTwoStepSupervisedConfig(), 'default'); - expect(run.status).toBe('completed'); - expect(events).toContainEqual({ type: 'step:review-completed', decision: 'approved' }); - }, 15000); - - it('should still support explicit REVIEW_DECISION: REJECT flow', async () => { - mockSpawnOutputs = [ - 'worker finished\n', - 'STEP_COMPLETE:step-1\n', - 'REVIEW_DECISION: REJECT\nREVIEW_REASON: standard rejection\n', - ]; - - const run = await runner.execute(makeSupervisedConfig(), 'default'); - expect(run.status).toBe('failed'); - expect(run.error).toContain('review rejected'); - }, 15000); - - it('should still fail closed on malformed review output', async () => { - mockSpawnOutputs = ['worker finished\n', 'STEP_COMPLETE:step-1\n', 'I think this looks ok\n']; - - const run = await runner.execute(makeSupervisedConfig(), 'default'); - expect(run.status).toBe('failed'); - expect(run.error).toContain('review response malformed'); - }, 15000); - - it('should preserve owner/specialist separation in supervised workflows', async () => { - mockSpawnOutputs = [ - 'worker finished\n', - 'Owner verified\nSTEP_COMPLETE:step-1\n', - 'REVIEW_DECISION: APPROVE\nREVIEW_REASON: good\n', - ]; - - const ownerAssignments: Array<{ owner: string; specialist: string }> = []; - runner.on((event) => { - if (event.type === 'step:owner-assigned') { - ownerAssignments.push({ owner: event.ownerName, specialist: event.specialistName }); - } - }); - - const run = await runner.execute(makeSupervisedConfig(), 'default'); - expect(run.status).toBe('completed'); - expect(ownerAssignments).toHaveLength(1); - expect(ownerAssignments[0].owner).toBe('team-lead'); - expect(ownerAssignments[0].specialist).toBe('specialist'); - }, 15000); - }); - - // ── Backward compat: event emission ─────────────────────────────────── - - describe('backward compatibility: event emission', () => { - it('should emit run:started and run:completed events', async () => { - const events: string[] = []; - runner.on((event) => events.push(event.type)); - - await runner.execute(makeConfig(), 'default'); - - expect(events).toContain('run:started'); - expect(events).toContain('run:completed'); - }, 15000); - - it('should emit step:started and step:completed events in order', async () => { - const stepEvents: Array<{ type: string; stepName?: string }> = []; - runner.on((event) => { - if (event.type.startsWith('step:')) { - stepEvents.push({ - type: event.type, - stepName: 'stepName' in event ? event.stepName : undefined, - }); - } - }); - - await runner.execute(makeConfig(), 'default'); - - const startedSteps = stepEvents.filter((e) => e.type === 'step:started'); - const completedSteps = stepEvents.filter((e) => e.type === 'step:completed'); - expect(startedSteps).toHaveLength(2); - expect(completedSteps).toHaveLength(2); - }, 15000); - - it('should emit owner-assigned events for all steps', async () => { - const ownerEvents: string[] = []; - runner.on((event) => { - if (event.type === 'step:owner-assigned') { - ownerEvents.push(event.stepName); - } - }); - - await runner.execute(makeConfig(), 'default'); - expect(ownerEvents).toHaveLength(2); - }, 15000); - - it('should emit review-completed events for all interactive steps', async () => { - const reviewEvents: string[] = []; - runner.on((event) => { - if (event.type === 'step:review-completed') { - reviewEvents.push(event.stepName); - } - }); - - mockSpawnOutputs = [ - 'worker finished\n', - 'STEP_COMPLETE:step-1\n', - 'REVIEW_DECISION: APPROVE\nREVIEW_REASON: looks good\n', - 'worker finished step 2\n', - 'STEP_COMPLETE:step-2\n', - 'REVIEW_DECISION: APPROVE\nREVIEW_REASON: looks good\n', - ]; - - await runner.execute(makeTwoStepSupervisedConfig(), 'default'); - expect(reviewEvents).toHaveLength(2); - }, 15000); - }); - - // ── Backward compat: DAG execution ordering ─────────────────────────── - - describe('backward compatibility: DAG execution', () => { - it('should execute steps in dependency order', async () => { - const completedSteps: string[] = []; - runner.on((event) => { - if (event.type === 'step:completed') { - completedSteps.push(event.stepName); - } - }); - - await runner.execute(makeConfig(), 'default'); - - const idx1 = completedSteps.indexOf('step-1'); - const idx2 = completedSteps.indexOf('step-2'); - expect(idx1).toBeLessThan(idx2); - }, 15000); - - it('should run parallel steps concurrently', async () => { - const startTimes: Record = {}; - runner.on((event) => { - if (event.type === 'step:started') { - startTimes[event.stepName] = Date.now(); - } - }); - - const config = makeConfig({ - workflows: [ - { - name: 'default', - steps: [ - { name: 'a', agent: 'agent-a', task: 'Do A' }, - { name: 'b', agent: 'agent-b', task: 'Do B' }, - { name: 'c', agent: 'agent-a', task: 'Do C', dependsOn: ['a', 'b'] }, - ], - }, - ], - }); - - const run = await runner.execute(config, 'default'); - expect(run.status).toBe('completed'); - - // a and b should start nearly simultaneously (within 100ms) - const diff = Math.abs((startTimes['a'] ?? 0) - (startTimes['b'] ?? 0)); - expect(diff).toBeLessThan(1000); - }, 15000); - }); - - // ── Backward compat: CLI command building ───────────────────────────── - - describe('backward compatibility: CLI command building', () => { - it('should build claude command correctly', () => { - const { cmd, args } = WorkflowRunner.buildNonInteractiveCommand('claude', 'Task'); - expect(cmd).toBe('claude'); - expect(args).toContain('-p'); - }); - - it('should build codex command correctly', () => { - const { cmd, args } = WorkflowRunner.buildNonInteractiveCommand('codex', 'Task'); - expect(cmd).toBe('codex'); - expect(args).toContain('exec'); - }); - - it('should build gemini command correctly', () => { - const { cmd, args } = WorkflowRunner.buildNonInteractiveCommand('gemini', 'Task'); - expect(cmd).toBe('gemini'); - expect(args).toContain('-p'); - }); - }); - - // ── Backward compat: variable resolution ────────────────────────────── - - describe('backward compatibility: variable resolution', () => { - it('should resolve {{var}} in step tasks', async () => { - const config = makeConfig(); - config.workflows![0].steps[0].task = 'Build {{feature}}'; - const run = await runner.execute(config, 'default', { feature: 'auth' }); - expect(run.status, run.error).toBe('completed'); - }, 15000); - - it('should throw on unresolved variables', () => { - const config = makeConfig({ - agents: [{ name: 'a', cli: 'claude', task: 'Fix {{unknown}}' }], - }); - expect(() => runner.resolveVariables(config, {})).toThrow('Unresolved variable: {{unknown}}'); - }); - }); - - // ── Backward compat: review PTY echo handling ───────────────────────── - - describe('backward compatibility: review PTY echo handling', () => { - it('should parse last REVIEW_DECISION when PTY echoes prompt', async () => { - const events: Array<{ type: string; decision?: string }> = []; - runner.on((event) => { - if (event.type === 'step:review-completed') { - events.push({ type: event.type, decision: event.decision }); - } - }); - - const echoedPrompt = - 'Return exactly:\nREVIEW_DECISION: APPROVE or REJECT\nREVIEW_REASON: \n'; - const actualResponse = 'REVIEW_DECISION: REJECT\nREVIEW_REASON: code has bugs\n'; - mockSpawnOutputs = ['worker finished\n', 'STEP_COMPLETE:step-1\n', echoedPrompt + actualResponse]; - - const run = await runner.execute(makeSupervisedConfig(), 'default'); - expect(run.status).toBe('failed'); - expect(events).toContainEqual({ type: 'step:review-completed', decision: 'rejected' }); - }, 15000); - }); - - // ── Backward compat: timeout handling ───────────────────────────────── - - describe('backward compatibility: timeout handling', () => { - it('should emit step:owner-timeout on timeout', async () => { - const events: Array<{ type: string; stepName?: string }> = []; - runner.on((event) => { - if (event.type === 'step:owner-timeout') { - events.push({ type: event.type, stepName: event.stepName }); - } - }); - - waitForExitFn = vi.fn().mockResolvedValue('timeout'); - waitForIdleFn = vi.fn().mockResolvedValue('timeout'); - - const run = await runner.execute(makeConfig(), 'default'); - expect(run.status).toBe('failed'); - expect(events).toContainEqual({ type: 'step:owner-timeout', stepName: 'step-1' }); - }, 15000); - }); - - // ── Phase 1 compatibility mode ──────────────────────────────────────── - - describe('Phase 1 compatibility mode', () => { - it('should keep markers as fast-path for completion', async () => { - // When the marker is present, it should complete immediately without - // needing to evaluate the full evidence pipeline - const run = await runner.execute(makeConfig(), 'default'); - expect(run.status).toBe('completed'); - }, 15000); - - it('should accept both old marker format and new OWNER_DECISION format', async () => { - // Old format still works - mockSpawnOutputs = ['STEP_COMPLETE:step-1\n']; - const run1 = await runner.execute( - makeConfig({ - workflows: [{ name: 'default', steps: [{ name: 'step-1', agent: 'agent-a', task: 'Do it' }] }], - }), - 'default' - ); - expect(run1.status).toBe('completed'); - }, 15000); - }); - - // ── Evidence interface tests ────────────────────────────────────────── - - describe('evidence collection interface', () => { - it('should expose getStepCompletionEvidence() on runner', () => { - expect(typeof runner.getStepCompletionEvidence).toBe('function'); - }); - - it('should return undefined for unknown step names', () => { - const evidence = runner.getStepCompletionEvidence('nonexistent-step'); - expect(evidence).toBeUndefined(); - }); - - it('should return evidence with correct shape after step execution', async () => { - const run = await runner.execute(makeConfig(), 'default'); - expect(run.status).toBe('completed'); - - const evidence = runner.getStepCompletionEvidence('step-1'); - if (evidence) { - // Verify the evidence structure matches StepCompletionEvidence - expect(evidence.stepName).toBe('step-1'); - expect(evidence).toHaveProperty('channelPosts'); - expect(evidence).toHaveProperty('files'); - expect(evidence).toHaveProperty('process'); - expect(evidence).toHaveProperty('toolSideEffects'); - expect(evidence).toHaveProperty('coordinationSignals'); - expect(Array.isArray(evidence.channelPosts)).toBe(true); - expect(Array.isArray(evidence.files)).toBe(true); - expect(Array.isArray(evidence.toolSideEffects)).toBe(true); - expect(Array.isArray(evidence.coordinationSignals)).toBe(true); - } - }, 15000); - - it('should collect evidence for supervised steps', async () => { - mockSpawnOutputs = [ - 'worker completed the implementation\n', - 'Owner verified work\nSTEP_COMPLETE:step-1\n', - 'REVIEW_DECISION: APPROVE\nREVIEW_REASON: good\n', - ]; - - const run = await runner.execute(makeSupervisedConfig(), 'default'); - expect(run.status).toBe('completed'); - - const evidence = runner.getStepCompletionEvidence('step-1'); - if (evidence) { - expect(evidence.stepName).toBe('step-1'); - // Supervised steps should have channel posts from worker output forwarding - expect(evidence.channelPosts.length).toBeGreaterThanOrEqual(0); - } - }, 15000); - - it('should capture WORKER_DONE as a coordination signal', async () => { - mockSpawnOutputs = [ - 'WORKER_DONE: all tasks completed\n', - 'Owner confirmed\nSTEP_COMPLETE:step-1\n', - 'REVIEW_DECISION: APPROVE\nREVIEW_REASON: verified\n', - ]; - - const run = await runner.execute(makeSupervisedConfig(), 'default'); - expect(run.status).toBe('completed'); - - const evidence = runner.getStepCompletionEvidence('step-1'); - if (evidence) { - const workerDoneSignals = evidence.coordinationSignals.filter((s) => s.kind === 'worker_done'); - // If the evidence collector detected the WORKER_DONE signal, it should be present - if (workerDoneSignals.length > 0) { - expect(workerDoneSignals[0].kind).toBe('worker_done'); - } - } - }, 15000); - - it('should return a defensive copy (not a live reference)', async () => { - const run = await runner.execute(makeConfig(), 'default'); - expect(run.status).toBe('completed'); - - const evidence1 = runner.getStepCompletionEvidence('step-1'); - const evidence2 = runner.getStepCompletionEvidence('step-1'); - if (evidence1 && evidence2) { - expect(evidence1).not.toBe(evidence2); // structuredClone should return a new object - expect(evidence1).toEqual(evidence2); // but with the same content - } - }, 15000); - }); - - // ── completionReason field on step rows ─────────────────────────────── - - describe('completionReason on step rows', () => { - it('should set completionReason on completed steps', async () => { - const run = await runner.execute(makeConfig(), 'default'); - expect(run.status).toBe('completed'); - - const steps = await db.getStepsByRunId(run.id); - const completedSteps = steps.filter((s) => s.status === 'completed'); - expect(completedSteps.length).toBeGreaterThan(0); - - for (const step of completedSteps) { - if (step.completionReason) { - // completionReason should be a valid value - const validReasons: WorkflowStepCompletionReason[] = [ - 'completed_verified', - 'completed_by_owner_decision', - 'completed_by_evidence', - 'completed_by_process_exit', - 'retry_requested_by_owner', - 'failed_verification', - 'failed_owner_decision', - 'failed_no_evidence', - ]; - expect(validReasons).toContain(step.completionReason); - } - } - }, 15000); - }); - - describe('process-exit fallback (compliance reduction)', () => { - it('should complete step via process exit code 0 when no coordination signal is posted', async () => { - // Agent exits cleanly (code 0) but doesn't post STEP_COMPLETE or OWNER_DECISION. - // With verification configured (exit_code), the runner should infer completion. - const config = makeConfig({ - swarm: { pattern: 'dag', completionGracePeriodMs: 5000 }, - agents: [{ name: 'agent-a', cli: 'claude' }], - workflows: [ - { - name: 'default', - steps: [ - { - name: 'silent-worker', - agent: 'agent-a', - task: 'Do some work silently', - verification: { type: 'exit_code', value: '0' }, - }, - ], - }, - ], - }); - - // Output has no STEP_COMPLETE, no OWNER_DECISION — just normal work output - mockSpawnOutputs = ['Implemented the auth module. All tests pass.']; - - const localDb = makeDb(); - runner = new WorkflowRunner({ db: localDb, workspaceId: 'ws-test' }); - const events: any[] = []; - const run = await runner.execute(config, 'default'); - - expect(run.status).toBe('completed'); - const steps = await localDb.getStepsByRunId(run.id); - const step = steps.find((s: any) => s.stepName === 'silent-worker'); - expect(step?.status).toBe('completed'); - // Should be completed_by_process_exit or completed_verified (exit_code verification) - expect(step?.completionReason).toBeDefined(); - }, 15000); - - it('should fail when process exits with non-zero code and no signal', async () => { - // Agent exits with non-zero and no coordination signal — should fail - const config = makeConfig({ - swarm: { pattern: 'dag', completionGracePeriodMs: 5000 }, - agents: [{ name: 'agent-a', cli: 'claude' }], - workflows: [ - { - name: 'default', - steps: [ - { - name: 'failing-worker', - agent: 'agent-a', - task: 'Try something', - }, - ], - }, - ], - }); - - // No STEP_COMPLETE, no OWNER_DECISION, and we'll simulate a non-clean exit - // by having the output lack any positive signals - mockSpawnOutputs = ['Error: something went wrong']; - - const localDb = makeDb(); - runner = new WorkflowRunner({ db: localDb, workspaceId: 'ws-test' }); - const run = await runner.execute(config, 'default'); - expect(run.status).toBe('failed'); - }, 15000); - - it('should respect completionGracePeriodMs: 0 to disable fallback', async () => { - // With grace period disabled, missing signals should always fail - const config = makeConfig({ - swarm: { pattern: 'dag', completionGracePeriodMs: 0 }, - agents: [{ name: 'agent-a', cli: 'claude' }], - workflows: [ - { - name: 'default', - steps: [ - { - name: 'strict-worker', - agent: 'agent-a', - task: 'Do work with strict compliance required', - }, - ], - }, - ], - }); - - // Output has no signals at all - mockSpawnOutputs = ['Work completed but no signal posted.']; - - const localDb = makeDb(); - runner = new WorkflowRunner({ db: localDb, workspaceId: 'ws-test' }); - const run = await runner.execute(config, 'default'); - expect(run.status).toBe('failed'); - }, 15000); - - it('should complete via evidence when process exits 0 and owner output has positive conclusion', async () => { - // Agent posts no explicit signal but says "done" + exit code 0 is captured as evidence - const config = makeConfig({ - swarm: { pattern: 'dag' }, - agents: [{ name: 'agent-a', cli: 'claude' }], - workflows: [ - { - name: 'default', - steps: [ - { - name: 'wordy-worker', - agent: 'agent-a', - task: 'Implement the feature', - verification: { type: 'exit_code', value: '0' }, - }, - ], - }, - ], - }); - - // Output contains positive conclusion words but no explicit marker - mockSpawnOutputs = ['Feature implemented and verified. All artifacts are correct and complete.']; - - const localDb = makeDb(); - runner = new WorkflowRunner({ db: localDb, workspaceId: 'ws-test' }); - const run = await runner.execute(config, 'default'); - - expect(run.status).toBe('completed'); - }, 15000); - }); - - describe('template re-quoting regression (parseOwnerDecision)', () => { - it('should not pick COMPLETE from re-quoted template when agent said INCOMPLETE_RETRY', async () => { - // Bug repro: agent says INCOMPLETE_RETRY then re-quotes the template format, - // causing the last-match heuristic to pick COMPLETE from the template line. - mockSpawnOutputs = [ - 'worker did the task\n', - [ - 'STEP OWNER CONTRACT:', - '- Preferred final decision format:', - ' OWNER_DECISION: COMPLETE|INCOMPLETE_RETRY|INCOMPLETE_FAIL|NEEDS_CLARIFICATION', - ' REASON: ', - '', - 'OWNER_DECISION: INCOMPLETE_RETRY', - 'REASON: Tests are still failing', - '', - 'I chose INCOMPLETE_RETRY as per the options OWNER_DECISION: COMPLETE|INCOMPLETE_RETRY|INCOMPLETE_FAIL|NEEDS_CLARIFICATION', - ].join('\n'), - ]; - - const run = await runner.execute(makeSupervisedConfig({ retries: 0 }), 'default'); - expect(run.status).toBe('failed'); - expect(run.error).toContain('INCOMPLETE_RETRY'); - - const steps = await db.getStepsByRunId(run.id); - expect(steps[0]?.completionReason).toBe('retry_requested_by_owner'); - }, 15000); - - it('should correctly parse COMPLETE when it is the real decision, not just template text', async () => { - // Ensure the fix doesn't break the happy path — agent says COMPLETE after echoed template - mockSpawnOutputs = [ - 'worker did the task\n', - [ - 'STEP OWNER CONTRACT:', - '- Preferred final decision format:', - ' OWNER_DECISION: COMPLETE|INCOMPLETE_RETRY|INCOMPLETE_FAIL|NEEDS_CLARIFICATION', - '', - 'OWNER_DECISION: COMPLETE', - 'REASON: Worker finished the task successfully', - ].join('\n'), - ]; - - const run = await runner.execute(makeSupervisedConfig({ retries: 0 }), 'default'); - expect(run.status).toBe('completed'); - - const steps = await db.getStepsByRunId(run.id); - expect(steps[0]?.completionReason).toBe('completed_by_owner_decision'); - }, 15000); - }); - - describe('fallback guards against explicit retry signals', () => { - it('should not complete via evidence fallback when output contains INCOMPLETE_RETRY', async () => { - // Bug repro: parseOwnerDecision returns null (garbled PTY), but raw output - // contains INCOMPLETE_RETRY. judgeOwnerCompletionByEvidence should refuse - // to infer completion. - mockSpawnOutputs = [ - 'worker completed locally\n', - [ - 'I reviewed the worker output. The task looks done but tests are failing.', - 'OW NER_DECISION: INCOMPLETE_RETRY', // garbled by PTY line wrap - 'REASON: tests failing', - 'The worker completed the implementation but verification failed.', - 'OWNER_DECISION: INCOMPLETE_RETRY', // clear signal in raw output - ].join('\n'), - ]; - - const run = await runner.execute(makeSupervisedConfig({ retries: 0 }), 'default'); - expect(run.status).toBe('failed'); - }, 15000); - - it('should not complete via process-exit fallback when output contains INCOMPLETE_RETRY', async () => { - const config = makeConfig({ - swarm: { pattern: 'dag', completionGracePeriodMs: 5000 }, - agents: [{ name: 'agent-a', cli: 'claude' }], - workflows: [ - { - name: 'default', - steps: [ - { - name: 'retried-worker', - agent: 'agent-a', - task: 'Do work', - verification: { type: 'exit_code', value: '0' }, - }, - ], - }, - ], - }); - - // Agent exits code 0 and verification passes, BUT output contains INCOMPLETE_RETRY - mockSpawnOutputs = [ - 'Implemented the feature.\nOWNER_DECISION: INCOMPLETE_RETRY\nREASON: needs more tests\n', - ]; - - const localDb = makeDb(); - runner = new WorkflowRunner({ db: localDb, workspaceId: 'ws-test' }); - const run = await runner.execute(config, 'default'); - - // Should NOT complete — the explicit retry signal should prevent fallback - expect(run.status).toBe('failed'); - }, 15000); - }); -}); diff --git a/packages/sdk/src/__tests__/e2e-owner-review.test.ts b/packages/sdk/src/__tests__/e2e-owner-review.test.ts deleted file mode 100644 index da47d9c95..000000000 --- a/packages/sdk/src/__tests__/e2e-owner-review.test.ts +++ /dev/null @@ -1,758 +0,0 @@ -/** - * E2E test harness for PR #511: auto step owner + per-step review gating. - * - * Validates: - * 1. Hub-role agent auto-assigned as owner (lead matches) - * 2. "github-integration" agent NOT matched as hub (word-boundary) - * 3. Review gating — approval flow - * 4. Review gating — rejection flow (PTY echo handling) - * 5. Review timeout budgeting - * 6. Owner timeout emission - * 7. Lead + workers team with owner assignment - * 8. YAML workflow parsing of e2e-owner-review.yaml - * 9. Owner completion marker validation - */ - -import { describe, it, expect, vi, beforeEach } from 'vitest'; -import { readFileSync } from 'node:fs'; -import { resolve } from 'node:path'; -import type { WorkflowDb } from '../workflows/runner.js'; -import type { RelayYamlConfig, WorkflowRunRow, WorkflowStepRow } from '../workflows/types.js'; - -// ── Mock fetch ────────────────────────────────────────────────────────────── - -const mockFetch = vi.fn().mockResolvedValue({ - ok: true, - json: () => Promise.resolve({ data: { api_key: 'rk_live_test', workspace_id: 'ws-test' } }), - text: () => Promise.resolve(''), -}); -vi.stubGlobal('fetch', mockFetch); - -// ── Mock RelayCast SDK ────────────────────────────────────────────────────── - -const mockRelaycastAgent = { - send: vi.fn().mockResolvedValue(undefined), - heartbeat: vi.fn().mockResolvedValue(undefined), - channels: { - create: vi.fn().mockResolvedValue(undefined), - join: vi.fn().mockResolvedValue(undefined), - invite: vi.fn().mockResolvedValue(undefined), - }, -}; - -const mockRelaycast = { - agents: { register: vi.fn().mockResolvedValue({ token: 'token-1' }) }, - as: vi.fn().mockReturnValue(mockRelaycastAgent), -}; - -class MockRelayError extends Error { - code: string; - constructor(code: string, message: string, status = 400) { - super(message); - this.code = code; - this.name = 'RelayError'; - (this as any).status = status; - } -} - -vi.mock('@relaycast/sdk', () => ({ - RelayCast: vi.fn().mockImplementation(() => mockRelaycast), - RelayError: MockRelayError, -})); - -// ── Mock AgentRelay ───────────────────────────────────────────────────────── - -let waitForExitFn: (ms?: number) => Promise<'exited' | 'timeout' | 'released'>; -let waitForIdleFn: (ms?: number) => Promise<'idle' | 'timeout' | 'exited'>; -let mockSpawnOutputs: string[] = []; - -const mockAgent = { - name: 'test-agent-abc', - get waitForExit() { - return waitForExitFn; - }, - get waitForIdle() { - return waitForIdleFn; - }, - release: vi.fn().mockResolvedValue(undefined), -}; - -const mockHuman = { - name: 'WorkflowRunner', - sendMessage: vi.fn().mockResolvedValue(undefined), -}; - -const defaultSpawnPtyImplementation = async ({ name, task }: { name: string; task?: string }) => { - const queued = mockSpawnOutputs.shift(); - const stepComplete = task?.match(/STEP_COMPLETE:([^\n]+)/)?.[1]?.trim(); - const isReview = task?.includes('REVIEW_DECISION: APPROVE or REJECT'); - const output = - queued ?? - (isReview - ? 'REVIEW_DECISION: APPROVE\nREVIEW_REASON: looks good\n' - : stepComplete - ? `STEP_COMPLETE:${stepComplete}\n` - : 'STEP_COMPLETE:unknown\n'); - - queueMicrotask(() => emitRelayEvent('workerOutput', { name, chunk: output })); - - return { ...mockAgent, name }; -}; - -// Listener registry for the AgentRelay mock — the production AgentRelay -// uses addListener('eventName', handler), so the mock captures handlers -// here keyed by event name. Tests fire events via `emitRelayEvent`. -const relayListeners = new Map void>>(); -function emitRelayEvent(event: string, payload: unknown): void { - for (const handler of relayListeners.get(event) ?? []) { - handler(payload); - } -} - -const mockRelayInstance = { - spawnPty: vi.fn().mockImplementation(defaultSpawnPtyImplementation), - human: vi.fn().mockReturnValue(mockHuman), - shutdown: vi.fn().mockResolvedValue(undefined), - onBrokerStderr: vi.fn().mockReturnValue(() => {}), - addListener: vi.fn((event: string, handler: (...args: unknown[]) => void) => { - let set = relayListeners.get(event); - if (!set) { - set = new Set(); - relayListeners.set(event, set); - } - set.add(handler); - return () => set!.delete(handler); - }), - listAgentsRaw: vi.fn().mockResolvedValue([]), -}; - -vi.mock('../relay.js', () => ({ - AgentRelay: vi.fn().mockImplementation(() => mockRelayInstance), -})); - -// Import after mocking -const { WorkflowRunner } = await import('../workflows/runner.js'); - -// ── Helpers ───────────────────────────────────────────────────────────────── - -function makeDb(): WorkflowDb { - const runs = new Map(); - const steps = new Map(); - return { - insertRun: vi.fn(async (run: WorkflowRunRow) => { - runs.set(run.id, { ...run }); - }), - updateRun: vi.fn(async (id: string, patch: Partial) => { - const existing = runs.get(id); - if (existing) runs.set(id, { ...existing, ...patch }); - }), - getRun: vi.fn(async (id: string) => { - const run = runs.get(id); - return run ? { ...run } : null; - }), - insertStep: vi.fn(async (step: WorkflowStepRow) => { - steps.set(step.id, { ...step }); - }), - updateStep: vi.fn(async (id: string, patch: Partial) => { - const existing = steps.get(id); - if (existing) steps.set(id, { ...existing, ...patch }); - }), - getStepsByRunId: vi.fn(async (runId: string) => { - return [...steps.values()].filter((s) => s.runId === runId); - }), - }; -} - -function makeConfig(overrides: Partial = {}): RelayYamlConfig { - return { - version: '1', - name: 'e2e-owner-review-test', - swarm: { pattern: 'dag' }, - agents: [ - { name: 'agent-a', cli: 'claude' }, - { name: 'agent-b', cli: 'claude' }, - ], - workflows: [ - { - name: 'default', - steps: [ - { name: 'step-1', agent: 'agent-a', task: 'Do step 1' }, - { name: 'step-2', agent: 'agent-b', task: 'Do step 2', dependsOn: ['step-1'] }, - ], - }, - ], - trajectories: false, - ...overrides, - }; -} - -function never(): Promise { - return new Promise(() => {}); -} - -type WorkflowStepOverride = Partial[number]['steps'][number]>; - -function makeSupervisedConfig(stepOverrides: WorkflowStepOverride = {}): RelayYamlConfig { - return makeConfig({ - swarm: { pattern: 'hub-spoke' }, - agents: [ - { name: 'specialist', cli: 'claude', role: 'engineer' }, - { name: 'team-lead', cli: 'claude', role: 'Lead coordinator for the workflow' }, - { name: 'reviewer-1', cli: 'claude', role: 'reviewer' }, - ], - workflows: [ - { - name: 'default', - steps: [ - { name: 'step-1', agent: 'specialist', task: 'Implement the requested change', ...stepOverrides }, - ], - }, - ], - }); -} - -// ── E2E Scenarios ─────────────────────────────────────────────────────────── - -describe('PR #511 E2E: Auto Step Owner + Review Gating', () => { - let db: WorkflowDb; - let runner: InstanceType; - - beforeEach(() => { - vi.clearAllMocks(); - waitForExitFn = vi.fn().mockResolvedValue('exited'); - waitForIdleFn = vi.fn().mockImplementation(() => never()); - mockSpawnOutputs = []; - mockAgent.release.mockResolvedValue(undefined); - mockRelayInstance.spawnPty.mockImplementation(defaultSpawnPtyImplementation); - relayListeners.clear(); - db = makeDb(); - runner = new WorkflowRunner({ db, workspaceId: 'ws-test' }); - }); - - // ── Scenario 1: Hub-role agent auto-assigned as owner ─────────────────── - - describe('Scenario 1: Hub-role auto-ownership', () => { - it('should auto-assign lead agent as owner for specialist steps', async () => { - const ownerAssignments: Array<{ owner: string; specialist: string }> = []; - runner.on((event) => { - if (event.type === 'step:owner-assigned') { - ownerAssignments.push({ owner: event.ownerName, specialist: event.specialistName }); - } - }); - - const config = makeConfig({ - swarm: { pattern: 'hub-spoke' }, - agents: [ - { name: 'impl-worker', cli: 'claude', role: 'implementer' }, - { name: 'team-lead', cli: 'claude', role: 'Lead coordinator for the workflow' }, - { name: 'quality-reviewer', cli: 'claude', role: 'reviewer' }, - ], - workflows: [ - { - name: 'default', - steps: [{ name: 'hub-owner-test', agent: 'impl-worker', task: 'List 3 benefits' }], - }, - ], - }); - - const run = await runner.execute(config, 'default'); - expect(run.status).toBe('completed'); - expect(ownerAssignments).toHaveLength(1); - expect(ownerAssignments[0].owner).toBe('team-lead'); - expect(ownerAssignments[0].specialist).toBe('impl-worker'); - }, 15000); - - it('should prioritize lead over coordinator in owner resolution', async () => { - const ownerAssignments: string[] = []; - runner.on((event) => { - if (event.type === 'step:owner-assigned') ownerAssignments.push(event.ownerName); - }); - - const config = makeConfig({ - swarm: { pattern: 'hub-spoke' }, - agents: [ - { name: 'specialist', cli: 'claude', role: 'engineer' }, - { name: 'coord-bot', cli: 'claude', role: 'coordinator' }, - { name: 'lead-bot', cli: 'claude', role: 'lead' }, - { name: 'reviewer-1', cli: 'claude', role: 'reviewer' }, - ], - workflows: [ - { - name: 'default', - steps: [{ name: 'step-1', agent: 'specialist', task: 'Do work' }], - }, - ], - }); - - const run = await runner.execute(config, 'default'); - expect(run.status).toBe('completed'); - expect(ownerAssignments[0]).toBe('lead-bot'); - }, 15000); - - it('should spawn a separate worker and supervisor for dedicated owner steps', async () => { - mockSpawnOutputs = [ - 'worker finished\n', - 'Observed progress on channel\nSTEP_COMPLETE:step-1\n', - 'REVIEW_DECISION: APPROVE\nREVIEW_REASON: looks good\n', - ]; - - const run = await runner.execute(makeSupervisedConfig(), 'default'); - expect(run.status).toBe('completed'); - - const spawnCalls = (mockRelayInstance.spawnPty as any).mock.calls; - expect(spawnCalls[0][0].name).toContain('step-1-worker'); - expect(spawnCalls[1][0].name).toContain('step-1-owner'); - expect(spawnCalls[0][0].task).not.toContain('STEP_COMPLETE:step-1'); - expect(spawnCalls[1][0].task).toContain('You are the step owner/supervisor for step "step-1".'); - }, 15000); - }); - - // ── Scenario 2: github-integration NOT matched as hub ─────────────────── - - describe('Scenario 2: Hub word-boundary matching', () => { - it('should NOT match "github-integration" as hub-role agent', async () => { - const ownerAssignments: Array<{ owner: string; specialist: string }> = []; - runner.on((event) => { - if (event.type === 'step:owner-assigned') { - ownerAssignments.push({ owner: event.ownerName, specialist: event.specialistName }); - } - }); - - const config = makeConfig({ - swarm: { pattern: 'hub-spoke' }, - agents: [ - { name: 'specialist', cli: 'claude', role: 'engineer' }, - { name: 'github-integration', cli: 'claude', role: 'GitHub integration agent' }, - { name: 'reviewer-1', cli: 'claude', role: 'reviewer' }, - ], - workflows: [ - { - name: 'default', - steps: [{ name: 'github-no-hub', agent: 'specialist', task: 'Test word boundary' }], - }, - ], - }); - - const run = await runner.execute(config, 'default'); - expect(run.status).toBe('completed'); - expect(ownerAssignments[0].owner).not.toBe('github-integration'); - expect(ownerAssignments[0].owner).toBe('specialist'); - }, 15000); - - it('should NOT match "github-bot" with role "github integration" as hub', async () => { - const ownerAssignments: Array<{ owner: string; specialist: string }> = []; - runner.on((event) => { - if (event.type === 'step:owner-assigned') { - ownerAssignments.push({ owner: event.ownerName, specialist: event.specialistName }); - } - }); - - const config = makeConfig({ - swarm: { pattern: 'hub-spoke' }, - agents: [ - { name: 'specialist', cli: 'claude', role: 'engineer' }, - { name: 'github-bot', cli: 'claude', role: 'github integration' }, - { name: 'reviewer-1', cli: 'claude', role: 'reviewer' }, - ], - workflows: [ - { - name: 'default', - steps: [{ name: 'step-1', agent: 'specialist', task: 'Do work' }], - }, - ], - }); - - const run = await runner.execute(config, 'default'); - expect(run.status).toBe('completed'); - expect(ownerAssignments[0].owner).not.toBe('github-bot'); - expect(ownerAssignments[0].owner).toBe('specialist'); - }, 15000); - }); - - // ── Scenario 3: Review gating — approval flow ────────────────────────── - - describe('Scenario 3: Review gating approval', () => { - it('should emit step:review-completed with approved decision', async () => { - const reviewEvents: Array<{ decision: string; reviewerName: string }> = []; - runner.on((event) => { - if (event.type === 'step:review-completed') { - reviewEvents.push({ decision: event.decision, reviewerName: event.reviewerName }); - } - }); - - const run = await runner.execute(makeSupervisedConfig(), 'default'); - expect(run.status).toBe('completed'); - expect(reviewEvents.length).toBeGreaterThanOrEqual(1); - expect(reviewEvents[0].decision).toBe('approved'); - }, 15000); - - it('should gate step completion on review approval', async () => { - const stepEvents: string[] = []; - runner.on((event) => { - if (event.type === 'step:completed' || event.type === 'step:review-completed') { - stepEvents.push(event.type); - } - }); - - const run = await runner.execute(makeSupervisedConfig(), 'default'); - expect(run.status).toBe('completed'); - const reviewIdx = stepEvents.indexOf('step:review-completed'); - const completedIdx = stepEvents.indexOf('step:completed'); - expect(reviewIdx).toBeGreaterThanOrEqual(0); - expect(reviewIdx).toBeLessThan(completedIdx); - }, 15000); - - it('should complete review from streamed REVIEW_DECISION before normal exit', async () => { - mockRelayInstance.spawnPty.mockImplementation( - async ({ name, task }: { name: string; task?: string }) => { - const isReview = task?.includes('REVIEW_DECISION: APPROVE or REJECT'); - const stepComplete = task?.match(/STEP_COMPLETE:([^\n]+)/)?.[1]?.trim(); - const output = isReview - ? 'REVIEW_DECISION: APPROVE\nREVIEW_REASON: streamed completion\n' - : stepComplete - ? `STEP_COMPLETE:${stepComplete}\n` - : 'STEP_COMPLETE:unknown\n'; - - queueMicrotask(() => emitRelayEvent('workerOutput', { name, chunk: output })); - - if (!isReview) { - return { ...mockAgent, name }; - } - - let released = false; - let resolveExit: ((result: 'released') => void) | undefined; - const waitForExit = vi.fn().mockImplementation(() => { - if (released) { - return Promise.resolve<'released'>('released'); - } - return new Promise<'released'>((resolve) => { - resolveExit = resolve; - }); - }); - const release = vi.fn().mockImplementation(async () => { - released = true; - resolveExit?.('released'); - }); - - return { - name, - waitForExit, - waitForIdle: vi.fn().mockImplementation(() => never()), - release, - }; - } - ); - - const run = await runner.execute(makeSupervisedConfig(), 'default'); - - expect(run.status).toBe('completed'); - const spawnResults = (mockRelayInstance.spawnPty as any).mock.results; - const reviewAgent = await spawnResults[spawnResults.length - 1].value; - expect(reviewAgent.name).toContain('step-1-review'); - expect(reviewAgent.release).toHaveBeenCalledTimes(1); - }, 15000); - - it('should mirror worker output to the channel for owner observation', async () => { - mockSpawnOutputs = [ - 'worker progress update\n', - 'STEP_COMPLETE:step-1\n', - 'REVIEW_DECISION: APPROVE\nREVIEW_REASON: looks good\n', - ]; - - const run = await runner.execute( - makeSupervisedConfig({ verification: { type: 'output_contains', value: 'worker progress update' } }), - 'default' - ); - expect(run.status).toBe('completed'); - - const channelMessages = (mockRelaycastAgent.send as any).mock.calls.map( - ([, text]: [string, string]) => text - ); - expect(channelMessages.some((text: string) => text.includes('worker progress update'))).toBe(true); - expect(channelMessages.some((text: string) => text.includes('Worker `step-1-worker'))).toBe(true); - }, 15000); - }); - - // ── Scenario 4: Review gating — rejection flow ───────────────────────── - - describe('Scenario 4: Review gating rejection', () => { - it('should fail the step when reviewer rejects', async () => { - const events: Array<{ type: string; decision?: string }> = []; - runner.on((event) => { - if (event.type === 'step:review-completed') { - events.push({ type: event.type, decision: event.decision }); - } - }); - - mockSpawnOutputs = [ - 'worker finished\n', - 'STEP_COMPLETE:step-1\n', - 'REVIEW_DECISION: REJECT\nREVIEW_REASON: output is incomplete\n', - ]; - - const run = await runner.execute(makeSupervisedConfig(), 'default'); - expect(run.status).toBe('failed'); - expect(run.error).toContain('review rejected'); - expect(events).toContainEqual({ type: 'step:review-completed', decision: 'rejected' }); - }, 15000); - - it('should fail closed when review output is malformed (no REVIEW_DECISION)', async () => { - mockSpawnOutputs = [ - 'worker finished\n', - 'STEP_COMPLETE:step-1\n', - 'REVIEW_REASON: this is missing the decision line\n', - ]; - - const run = await runner.execute(makeSupervisedConfig(), 'default'); - expect(run.status).toBe('failed'); - expect(run.error).toContain('review response malformed'); - }, 15000); - - it('should use last REVIEW_DECISION match when PTY echoes prompt (reject)', async () => { - const events: Array<{ type: string; decision?: string }> = []; - runner.on((event) => { - if (event.type === 'step:review-completed') { - events.push({ type: event.type, decision: event.decision }); - } - }); - - const echoedPrompt = - 'Return exactly:\nREVIEW_DECISION: APPROVE or REJECT\nREVIEW_REASON: \n'; - const actualResponse = 'REVIEW_DECISION: REJECT\nREVIEW_REASON: code has critical bugs\n'; - mockSpawnOutputs = ['worker finished\n', 'STEP_COMPLETE:step-1\n', echoedPrompt + actualResponse]; - - const run = await runner.execute(makeSupervisedConfig(), 'default'); - expect(run.status).toBe('failed'); - expect(events).toContainEqual({ type: 'step:review-completed', decision: 'rejected' }); - }, 15000); - }); - - // ── Scenario 5: Review timeout budgeting ─────────────────────────────── - - describe('Scenario 5: Review timeout budgeting', () => { - it('should use the full remaining step timeout as the review safety backstop', async () => { - const config = makeSupervisedConfig({ timeoutMs: 90_000 }); - - mockRelayInstance.spawnPty.mockImplementation( - async ({ name, task }: { name: string; task?: string }) => { - const isReview = task?.includes('REVIEW_DECISION: APPROVE or REJECT'); - const stepComplete = task?.match(/STEP_COMPLETE:([^\n]+)/)?.[1]?.trim(); - const output = isReview - ? '' - : stepComplete - ? `STEP_COMPLETE:${stepComplete}\n` - : 'worker finished\n'; - - if (output) { - queueMicrotask(() => emitRelayEvent('workerOutput', { name, chunk: output })); - } - - return { - name, - waitForExit: vi.fn().mockResolvedValue(isReview ? 'timeout' : 'exited'), - waitForIdle: vi.fn().mockImplementation(() => never()), - release: vi.fn().mockResolvedValue(undefined), - }; - } - ); - - const run = await runner.execute(config, 'default'); - expect(run.status).toBe('failed'); - expect(run.error).toContain('review safety backstop timed out'); - - const spawnResults = (mockRelayInstance.spawnPty as any).mock.results; - const reviewAgent = await spawnResults[spawnResults.length - 1].value; - const reviewTimeout = reviewAgent.waitForExit.mock.calls[0][0]; - expect(reviewTimeout).toBeGreaterThan(60_000); - expect(reviewTimeout).toBeLessThanOrEqual(90_000); - }, 15000); - - it('should default the review safety backstop to 10 minutes when no step timeout is set', async () => { - const config = makeSupervisedConfig(); - - mockRelayInstance.spawnPty.mockImplementation( - async ({ name, task }: { name: string; task?: string }) => { - const isReview = task?.includes('REVIEW_DECISION: APPROVE or REJECT'); - const stepComplete = task?.match(/STEP_COMPLETE:([^\n]+)/)?.[1]?.trim(); - const output = isReview - ? '' - : stepComplete - ? `STEP_COMPLETE:${stepComplete}\n` - : 'worker finished\n'; - - if (output) { - queueMicrotask(() => emitRelayEvent('workerOutput', { name, chunk: output })); - } - - return { - name, - waitForExit: vi.fn().mockResolvedValue(isReview ? 'timeout' : 'exited'), - waitForIdle: vi.fn().mockImplementation(() => never()), - release: vi.fn().mockResolvedValue(undefined), - }; - } - ); - - const run = await runner.execute(config, 'default'); - expect(run.status).toBe('failed'); - expect(run.error).toContain('review safety backstop timed out after 600000ms'); - - const spawnResults = (mockRelayInstance.spawnPty as any).mock.results; - const reviewAgent = await spawnResults[spawnResults.length - 1].value; - expect(reviewAgent.waitForExit).toHaveBeenCalledWith(600_000); - }, 15000); - }); - - // ── Scenario 6: Owner timeout emission ───────────────────────────────── - - describe('Scenario 6: Owner timeout events', () => { - it('should emit step:owner-timeout when owner exceeds time limit', async () => { - const events: Array<{ type: string; stepName?: string; ownerName?: string }> = []; - runner.on((event) => { - if (event.type === 'step:owner-timeout') { - events.push({ type: event.type, stepName: event.stepName, ownerName: event.ownerName }); - } - }); - - waitForExitFn = vi.fn().mockResolvedValue('timeout'); - waitForIdleFn = vi.fn().mockResolvedValue('timeout'); - - const run = await runner.execute(makeConfig(), 'default'); - expect(run.status).toBe('failed'); - expect(run.error).toContain('timed out'); - expect(events.length).toBeGreaterThanOrEqual(1); - expect(events[0].type).toBe('step:owner-timeout'); - expect(events[0].stepName).toBe('step-1'); - }, 15000); - - it('should NOT emit step:owner-timeout for successful reviews', async () => { - const ownerTimeouts: string[] = []; - runner.on((event) => { - if (event.type === 'step:owner-timeout') ownerTimeouts.push(event.stepName); - }); - - const run = await runner.execute(makeConfig(), 'default'); - expect(run.status).toBe('completed'); - expect(ownerTimeouts).toHaveLength(0); - }, 15000); - }); - - // ── Scenario 7: Multi-agent team with owner assignment ───────────────── - - describe('Scenario 7: Lead + workers team pattern', () => { - it('should assign lead as owner for worker steps in a team', async () => { - const ownerAssignments: Array<{ owner: string; specialist: string; step: string }> = []; - runner.on((event) => { - if (event.type === 'step:owner-assigned') { - ownerAssignments.push({ - owner: event.ownerName, - specialist: event.specialistName, - step: event.stepName, - }); - } - }); - - const config = makeConfig({ - swarm: { pattern: 'hub-spoke' }, - agents: [ - { name: 'team-lead', cli: 'claude', role: 'Lead coordinator' }, - { name: 'worker-1', cli: 'claude', role: 'implementer' }, - { name: 'worker-2', cli: 'claude', role: 'implementer' }, - { name: 'reviewer-1', cli: 'claude', role: 'reviewer' }, - ], - workflows: [ - { - name: 'default', - steps: [ - { name: 'work-1', agent: 'worker-1', task: 'Do task A' }, - { name: 'work-2', agent: 'worker-2', task: 'Do task B' }, - { - name: 'lead-coord', - agent: 'team-lead', - task: 'Coordinate workers', - dependsOn: ['work-1', 'work-2'], - }, - ], - }, - ], - }); - - const run = await runner.execute(config, 'default'); - expect(run.status).toBe('completed'); - expect(ownerAssignments.length).toBeGreaterThanOrEqual(3); - - const worker1Owner = ownerAssignments.find((a) => a.step === 'work-1'); - const worker2Owner = ownerAssignments.find((a) => a.step === 'work-2'); - expect(worker1Owner?.owner).toBe('team-lead'); - expect(worker2Owner?.owner).toBe('team-lead'); - - const leadOwner = ownerAssignments.find((a) => a.step === 'lead-coord'); - expect(leadOwner?.owner).toBe('team-lead'); - }, 30000); - }); - - // ── Scenario 8: YAML workflow parsing ────────────────────────────────── - - describe('Scenario 8: E2E workflow YAML validation', () => { - it('should parse the e2e-owner-review.yaml without errors', () => { - const yamlPath = resolve(__dirname, '../../../../tests/workflows/e2e-owner-review.yaml'); - const yamlContent = readFileSync(yamlPath, 'utf-8'); - - // parseYamlString is an instance method - const config = runner.parseYamlString(yamlContent); - expect(config.name).toBe('e2e-owner-review'); - expect(config.agents).toHaveLength(5); - expect(config.workflows).toHaveLength(1); - - const agentNames = config.agents!.map((a: any) => a.name); - expect(agentNames).toContain('team-lead'); - expect(agentNames).toContain('github-integration'); - expect(agentNames).toContain('impl-worker'); - expect(agentNames).toContain('quality-reviewer'); - expect(agentNames).toContain('coordinator-bot'); - - const steps = config.workflows![0].steps; - const stepNames = steps.map((s: any) => s.name); - expect(stepNames).toContain('hub-owner-test'); - expect(stepNames).toContain('github-no-hub-match'); - expect(stepNames).toContain('review-approval-gate'); - expect(stepNames).toContain('deliberate-bad-output'); - expect(stepNames).toContain('tight-timeout-step'); - expect(stepNames).toContain('team-lead-coord'); - expect(stepNames).toContain('merge-results'); - }); - - it('should detect hub-role agents correctly from YAML', () => { - const yamlPath = resolve(__dirname, '../../../../tests/workflows/e2e-owner-review.yaml'); - const yamlContent = readFileSync(yamlPath, 'utf-8'); - const config = runner.parseYamlString(yamlContent); - - const teamLead = config.agents!.find((a: any) => a.name === 'team-lead'); - expect(teamLead?.role).toMatch(/\blead\b/i); - - const githubAgent = config.agents!.find((a: any) => a.name === 'github-integration'); - expect(githubAgent?.role).not.toMatch(/\bhub\b/i); - expect(githubAgent?.name).not.toMatch(/\bhub\b/i); - - const coordBot = config.agents!.find((a: any) => a.name === 'coordinator-bot'); - expect(coordBot?.role).toMatch(/\bcoordinator\b/i); - }); - }); - - // ── Scenario 9: Owner completion marker validation ───────────────────── - - describe('Scenario 9: Owner completion marker', () => { - it('should fail when owner does not provide a marker, decision, or evidence', async () => { - mockSpawnOutputs = ['The work is done but I forgot the sentinel.\n']; - - const run = await runner.execute(makeConfig(), 'default'); - expect(run.status).toBe('failed'); - expect(run.error).toContain('owner completion decision missing'); - }, 15000); - - it('should succeed when owner produces correct STEP_COMPLETE:step-name', async () => { - const run = await runner.execute(makeConfig(), 'default'); - expect(run.status).toBe('completed'); - }, 15000); - }); -}); diff --git a/packages/sdk/src/__tests__/error-scenarios.test.ts b/packages/sdk/src/__tests__/error-scenarios.test.ts deleted file mode 100644 index 531b6ce0d..000000000 --- a/packages/sdk/src/__tests__/error-scenarios.test.ts +++ /dev/null @@ -1,693 +0,0 @@ -/** - * Error scenario tests across all swarm workflow services. - * - * Tests failure modes, edge cases, and error propagation in - * StateStore, BarrierManager, SwarmCoordinator, and WorkflowRunner. - */ - -import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'; -import { StateStore } from '../workflows/state.js'; -import { BarrierManager } from '../workflows/barrier.js'; -import { SwarmCoordinator } from '../workflows/coordinator.js'; -import type { DbClient } from '../workflows/coordinator.js'; -import type { BarrierRow } from '../workflows/barrier.js'; -import type { StateEntry } from '../workflows/state.js'; - -// ── Helpers ────────────────────────────────────────────────────────────────── - -function makeDb(): DbClient { - return { - query: vi.fn().mockResolvedValue({ rows: [] }), - }; -} - -// ── StateStore error scenarios ─────────────────────────────────────────────── - -describe('StateStore error scenarios', () => { - let db: DbClient; - let store: StateStore; - - beforeEach(() => { - db = makeDb(); - store = new StateStore(db); - }); - - describe('consensus gating', () => { - it('should reject writes when consensus gate returns false', async () => { - store.setConsensusGate(async () => false); - - await expect(store.set('run_1', 'key', 'value', 'agent-1')).rejects.toThrow( - 'rejected by consensus gate' - ); - }); - - it('should emit state:gated event on rejection', async () => { - const spy = vi.fn(); - store.on('state:gated', spy); - store.setConsensusGate(async () => false); - - await store.set('run_1', 'key', 'value', 'agent-1').catch(() => {}); - - expect(spy).toHaveBeenCalledWith('run_1', 'key', 'agent-1'); - }); - - it('should allow writes when consensus gate returns true', async () => { - const entry: StateEntry = { - id: 'st_1', - runId: 'run_1', - namespace: 'default', - key: 'key', - value: 'value', - expiresAt: null, - createdAt: new Date().toISOString(), - updatedAt: new Date().toISOString(), - }; - vi.mocked(db.query).mockResolvedValueOnce({ rows: [entry] }); - store.setConsensusGate(async () => true); - - const result = await store.set('run_1', 'key', 'value', 'agent-1'); - expect(result).toEqual(entry); - }); - - it('should clear consensus gate', async () => { - store.setConsensusGate(async () => false); - store.clearConsensusGate(); - - const entry: StateEntry = { - id: 'st_1', - runId: 'run_1', - namespace: 'default', - key: 'key', - value: 'value', - expiresAt: null, - createdAt: new Date().toISOString(), - updatedAt: new Date().toISOString(), - }; - vi.mocked(db.query).mockResolvedValueOnce({ rows: [entry] }); - - await expect(store.set('run_1', 'key', 'value', 'agent-1')).resolves.toBeDefined(); - }); - }); - - describe('DB failures', () => { - it('should propagate DB errors on set', async () => { - vi.mocked(db.query).mockRejectedValueOnce(new Error('connection lost')); - await expect(store.set('run_1', 'key', 'v', 'agent')).rejects.toThrow('connection lost'); - }); - - it('should propagate DB errors on get', async () => { - vi.mocked(db.query).mockRejectedValueOnce(new Error('timeout')); - await expect(store.get('run_1', 'key')).rejects.toThrow('timeout'); - }); - - it('should propagate DB errors on delete', async () => { - vi.mocked(db.query).mockRejectedValueOnce(new Error('disk full')); - await expect(store.delete('run_1', 'key')).rejects.toThrow('disk full'); - }); - }); - - describe('namespace isolation', () => { - it('should use custom namespace when provided', async () => { - vi.mocked(db.query).mockResolvedValueOnce({ rows: [] }); - await store.get('run_1', 'key', { namespace: 'custom' }); - expect(db.query).toHaveBeenCalledWith(expect.any(String), ['run_1', 'custom', 'key']); - }); - - it('should use default namespace when not provided', async () => { - vi.mocked(db.query).mockResolvedValueOnce({ rows: [] }); - await store.get('run_1', 'key'); - expect(db.query).toHaveBeenCalledWith(expect.any(String), ['run_1', 'default', 'key']); - }); - }); - - describe('TTL', () => { - it('should set expiresAt when ttlMs provided', async () => { - const entry: StateEntry = { - id: 'st_1', - runId: 'run_1', - namespace: 'default', - key: 'key', - value: 'v', - expiresAt: new Date(Date.now() + 5000).toISOString(), - createdAt: new Date().toISOString(), - updatedAt: new Date().toISOString(), - }; - vi.mocked(db.query).mockResolvedValueOnce({ rows: [entry] }); - - const result = await store.set('run_1', 'key', 'v', 'agent', { ttlMs: 5000 }); - expect(result.expiresAt).not.toBeNull(); - }); - }); - - describe('event emission', () => { - it('should emit state:set on successful write', async () => { - const entry: StateEntry = { - id: 'st_1', - runId: 'run_1', - namespace: 'default', - key: 'key', - value: 'v', - expiresAt: null, - createdAt: new Date().toISOString(), - updatedAt: new Date().toISOString(), - }; - vi.mocked(db.query).mockResolvedValueOnce({ rows: [entry] }); - - const spy = vi.fn(); - store.on('state:set', spy); - - await store.set('run_1', 'key', 'v', 'agent'); - expect(spy).toHaveBeenCalledWith(entry); - }); - - it('should emit state:deleted on successful delete', async () => { - vi.mocked(db.query).mockResolvedValueOnce({ rows: [{ id: 'st_1' }] }); - - const spy = vi.fn(); - store.on('state:deleted', spy); - - await store.delete('run_1', 'key'); - expect(spy).toHaveBeenCalledWith('run_1', 'key', 'default'); - }); - - it('should not emit state:deleted when key not found', async () => { - vi.mocked(db.query).mockResolvedValueOnce({ rows: [] }); - - const spy = vi.fn(); - store.on('state:deleted', spy); - - await store.delete('run_1', 'key'); - expect(spy).not.toHaveBeenCalled(); - }); - }); - - describe('snapshot', () => { - it('should return empty object for no entries', async () => { - vi.mocked(db.query).mockResolvedValueOnce({ rows: [] }); - const snapshot = await store.snapshot('run_1'); - expect(snapshot).toEqual({}); - }); - - it('should build key-value map from entries', async () => { - const entries: StateEntry[] = [ - { - id: '1', - runId: 'run_1', - namespace: 'default', - key: 'a', - value: 1, - expiresAt: null, - createdAt: '', - updatedAt: '', - }, - { - id: '2', - runId: 'run_1', - namespace: 'default', - key: 'b', - value: 'hello', - expiresAt: null, - createdAt: '', - updatedAt: '', - }, - ]; - vi.mocked(db.query).mockResolvedValueOnce({ rows: entries }); - - const snapshot = await store.snapshot('run_1'); - expect(snapshot).toEqual({ a: 1, b: 'hello' }); - }); - }); -}); - -// ── BarrierManager error scenarios ─────────────────────────────────────────── - -describe('BarrierManager error scenarios', () => { - let db: DbClient; - let manager: BarrierManager; - - beforeEach(() => { - db = makeDb(); - manager = new BarrierManager(db); - }); - - afterEach(() => { - manager.cleanup(); - }); - - describe('barrier creation', () => { - it('should create barrier and emit barrier:created', async () => { - const barrier: BarrierRow = { - id: 'bar_1', - runId: 'run_1', - barrierName: 'test-barrier', - waitFor: ['agent-a', 'agent-b'], - resolved: [], - isSatisfied: false, - timeoutMs: null, - createdAt: new Date().toISOString(), - updatedAt: new Date().toISOString(), - }; - vi.mocked(db.query).mockResolvedValueOnce({ rows: [barrier] }); - - const spy = vi.fn(); - manager.on('barrier:created', spy); - - const result = await manager.createBarrier('run_1', { - name: 'test-barrier', - waitFor: ['agent-a', 'agent-b'], - }); - - expect(result.barrierName).toBe('test-barrier'); - expect(spy).toHaveBeenCalledWith(barrier); - }); - - it('should create multiple barriers in batch', async () => { - const barrier: BarrierRow = { - id: 'bar_1', - runId: 'run_1', - barrierName: 'b1', - waitFor: ['a'], - resolved: [], - isSatisfied: false, - timeoutMs: null, - createdAt: new Date().toISOString(), - updatedAt: new Date().toISOString(), - }; - vi.mocked(db.query).mockResolvedValue({ rows: [barrier] }); - - const results = await manager.createBarriers('run_1', [ - { name: 'b1', waitFor: ['a'] }, - { name: 'b2', waitFor: ['b'] }, - ]); - - expect(results).toHaveLength(2); - }); - }); - - describe('barrier resolution', () => { - it('should resolve barrier and check satisfaction (all mode)', async () => { - const barrier: BarrierRow = { - id: 'bar_1', - runId: 'run_1', - barrierName: 'b1', - waitFor: ['agent-a', 'agent-b'], - resolved: ['agent-a'], - isSatisfied: false, - timeoutMs: null, - createdAt: new Date().toISOString(), - updatedAt: new Date().toISOString(), - }; - - // First, create the barrier to set the mode - vi.mocked(db.query).mockResolvedValueOnce({ rows: [barrier] }); - await manager.createBarrier('run_1', { - name: 'b1', - waitFor: ['agent-a', 'agent-b'], - mode: 'all', - }); - - // Now resolve with partial (not satisfied yet) - vi.mocked(db.query).mockResolvedValueOnce({ rows: [barrier] }); - const result = await manager.resolve('run_1', 'b1', 'agent-a'); - expect(result.satisfied).toBe(false); - }); - - it('should satisfy barrier in any mode with single resolution', async () => { - const barrier: BarrierRow = { - id: 'bar_1', - runId: 'run_1', - barrierName: 'b1', - waitFor: ['agent-a', 'agent-b'], - resolved: ['agent-a'], - isSatisfied: false, - timeoutMs: null, - createdAt: new Date().toISOString(), - updatedAt: new Date().toISOString(), - }; - - // Create barrier in "any" mode - vi.mocked(db.query).mockResolvedValueOnce({ rows: [barrier] }); - await manager.createBarrier('run_1', { - name: 'b1', - waitFor: ['agent-a', 'agent-b'], - mode: 'any', - }); - - // Resolve — should satisfy immediately since mode is "any" - vi.mocked(db.query) - .mockResolvedValueOnce({ rows: [barrier] }) // resolve UPDATE - .mockResolvedValueOnce({ rows: [{ ...barrier, isSatisfied: true }] }); // markSatisfied UPDATE - - const satisfiedSpy = vi.fn(); - manager.on('barrier:satisfied', satisfiedSpy); - - const result = await manager.resolve('run_1', 'b1', 'agent-a'); - expect(result.satisfied).toBe(true); - expect(satisfiedSpy).toHaveBeenCalled(); - }); - - it('should throw when barrier not found during resolve', async () => { - // resolve UPDATE returns empty - vi.mocked(db.query).mockResolvedValueOnce({ rows: [] }); - // getBarrier also returns empty - vi.mocked(db.query).mockResolvedValueOnce({ rows: [] }); - - await expect(manager.resolve('run_1', 'nonexistent', 'agent-a')).rejects.toThrow('not found'); - }); - - it('should return existing state when barrier already satisfied', async () => { - const barrier: BarrierRow = { - id: 'bar_1', - runId: 'run_1', - barrierName: 'b1', - waitFor: ['a'], - resolved: ['a'], - isSatisfied: true, - timeoutMs: null, - createdAt: '', - updatedAt: '', - }; - - // resolve UPDATE returns empty (already satisfied, WHERE is_satisfied=FALSE doesn't match) - vi.mocked(db.query).mockResolvedValueOnce({ rows: [] }); - // getBarrier returns the already-satisfied barrier - vi.mocked(db.query).mockResolvedValueOnce({ rows: [barrier] }); - - const result = await manager.resolve('run_1', 'b1', 'a'); - expect(result.satisfied).toBe(true); - }); - }); - - describe('barrier timeout', () => { - it('should schedule timeout and emit barrier:timeout', async () => { - vi.useFakeTimers(); - - const barrier: BarrierRow = { - id: 'bar_1', - runId: 'run_1', - barrierName: 'b1', - waitFor: ['a'], - resolved: [], - isSatisfied: false, - timeoutMs: 1000, - createdAt: '', - updatedAt: '', - }; - - vi.mocked(db.query).mockResolvedValue({ rows: [barrier] }); - - const timeoutSpy = vi.fn(); - manager.on('barrier:timeout', timeoutSpy); - - await manager.createBarrier('run_1', { - name: 'b1', - waitFor: ['a'], - timeoutMs: 1000, - }); - - await vi.advanceTimersByTimeAsync(1100); - - expect(timeoutSpy).toHaveBeenCalledWith(barrier); - - vi.useRealTimers(); - }); - }); - - describe('cleanup', () => { - it('should clear all timeout timers', async () => { - const barrier: BarrierRow = { - id: 'bar_1', - runId: 'run_1', - barrierName: 'b1', - waitFor: ['a'], - resolved: [], - isSatisfied: false, - timeoutMs: 60000, - createdAt: '', - updatedAt: '', - }; - vi.mocked(db.query).mockResolvedValueOnce({ rows: [barrier] }); - - await manager.createBarrier('run_1', { - name: 'b1', - waitFor: ['a'], - timeoutMs: 60000, - }); - - expect(() => manager.cleanup()).not.toThrow(); - }); - }); - - describe('queries', () => { - it('getBarrier should return null for missing barrier', async () => { - vi.mocked(db.query).mockResolvedValueOnce({ rows: [] }); - const result = await manager.getBarrier('run_1', 'nonexistent'); - expect(result).toBeNull(); - }); - - it('isSatisfied should return false when barrier does not exist', async () => { - vi.mocked(db.query).mockResolvedValueOnce({ rows: [] }); - const result = await manager.isSatisfied('run_1', 'missing'); - expect(result).toBe(false); - }); - - it('getUnsatisfiedBarriers should query with is_satisfied = FALSE', async () => { - vi.mocked(db.query).mockResolvedValueOnce({ rows: [] }); - await manager.getUnsatisfiedBarriers('run_1'); - expect(db.query).toHaveBeenCalledWith(expect.stringContaining('is_satisfied = FALSE'), ['run_1']); - }); - }); -}); - -// ── SwarmCoordinator error scenarios ───────────────────────────────────────── - -describe('SwarmCoordinator error scenarios', () => { - let db: DbClient; - let coordinator: SwarmCoordinator; - - beforeEach(() => { - db = makeDb(); - coordinator = new SwarmCoordinator(db); - }); - - describe('run lifecycle errors', () => { - it('should throw when starting a non-pending run', async () => { - vi.mocked(db.query).mockResolvedValueOnce({ rows: [] }); - await expect(coordinator.startRun('run_1')).rejects.toThrow('not found or not in pending'); - }); - - it('should throw when completing a non-existent run', async () => { - vi.mocked(db.query).mockResolvedValueOnce({ rows: [] }); - await expect(coordinator.completeRun('bad')).rejects.toThrow('not found'); - }); - - it('should throw when failing a non-existent run', async () => { - vi.mocked(db.query).mockResolvedValueOnce({ rows: [] }); - await expect(coordinator.failRun('bad', 'error')).rejects.toThrow('not found'); - }); - - it('should throw when cancelling a non-existent run', async () => { - vi.mocked(db.query).mockResolvedValueOnce({ rows: [] }); - await expect(coordinator.cancelRun('bad')).rejects.toThrow('not found'); - }); - }); - - describe('step lifecycle errors', () => { - it('should throw when starting a non-pending step', async () => { - vi.mocked(db.query).mockResolvedValueOnce({ rows: [] }); - await expect(coordinator.startStep('step_bad')).rejects.toThrow('not in pending state'); - }); - - it('should throw when completing a non-running step', async () => { - vi.mocked(db.query).mockResolvedValueOnce({ rows: [] }); - await expect(coordinator.completeStep('step_bad')).rejects.toThrow('not in running state'); - }); - - it('should throw when failing a non-running step', async () => { - vi.mocked(db.query).mockResolvedValueOnce({ rows: [] }); - await expect(coordinator.failStep('step_bad', 'err')).rejects.toThrow('not in running state'); - }); - - it('should throw when skipping a non-existent step', async () => { - vi.mocked(db.query).mockResolvedValueOnce({ rows: [] }); - await expect(coordinator.skipStep('step_bad')).rejects.toThrow('not found'); - }); - }); - - describe('DB propagation', () => { - it('should propagate DB errors from createRun', async () => { - vi.mocked(db.query).mockRejectedValueOnce(new Error('connection refused')); - await expect( - coordinator.createRun('ws-1', { - version: '1', - name: 'test', - swarm: { pattern: 'fan-out' }, - agents: [{ name: 'a', cli: 'claude' }], - }) - ).rejects.toThrow('connection refused'); - }); - - it('should propagate DB errors from getSteps', async () => { - vi.mocked(db.query).mockRejectedValueOnce(new Error('query timeout')); - await expect(coordinator.getSteps('run_1')).rejects.toThrow('query timeout'); - }); - }); -}); - -// ── WorkflowRunner error scenarios ─────────────────────────────────────────── - -describe('WorkflowRunner error scenarios', () => { - // Mock AgentRelay for runner tests - const mockAgent = { - name: 'test-agent', - waitForExit: vi.fn().mockResolvedValue(0), - release: vi.fn(), - }; - - vi.mock('@agent-relay/sdk/relay', () => ({ - AgentRelay: vi.fn().mockImplementation(() => ({ - spawnPty: vi.fn().mockResolvedValue(mockAgent), - human: vi.fn().mockReturnValue({ sendMessage: vi.fn() }), - shutdown: vi.fn(), - })), - })); - - let WorkflowRunner: any; - let db: any; - let runner: any; - - beforeEach(async () => { - const mod = await import('../workflows/runner.js'); - WorkflowRunner = mod.WorkflowRunner; - - const runs = new Map(); - const steps = new Map(); - - db = { - insertRun: vi.fn(async (run: any) => runs.set(run.id, { ...run })), - updateRun: vi.fn(async (id: string, patch: any) => { - const existing = runs.get(id); - if (existing) runs.set(id, { ...existing, ...patch }); - }), - getRun: vi.fn(async (id: string) => runs.get(id) ?? null), - insertStep: vi.fn(async (step: any) => steps.set(step.id, { ...step })), - updateStep: vi.fn(async (id: string, patch: any) => { - const existing = steps.get(id); - if (existing) steps.set(id, { ...existing, ...patch }); - }), - getStepsByRunId: vi.fn(async (runId: string) => { - return [...steps.values()].filter((s: any) => s.runId === runId); - }), - }; - - runner = new WorkflowRunner({ db, workspaceId: 'ws-test' }); - }); - - describe('validation errors', () => { - it('should reject non-object config', () => { - expect(() => runner.validateConfig('string')).toThrow('non-null object'); - expect(() => runner.validateConfig(42)).toThrow('non-null object'); - expect(() => runner.validateConfig(undefined)).toThrow('non-null object'); - }); - - it('should reject config without swarm', () => { - expect(() => - runner.validateConfig({ version: '1', name: 'x', agents: [{ name: 'a', cli: 'claude' }] }) - ).toThrow('missing required field "swarm"'); - }); - - it('should reject config with null swarm', () => { - expect(() => - runner.validateConfig({ - version: '1', - name: 'x', - swarm: null, - agents: [{ name: 'a', cli: 'claude' }], - }) - ).toThrow('missing required field "swarm"'); - }); - - it('should reject workflows with non-object steps', () => { - expect(() => - runner.validateConfig({ - version: '1', - name: 'x', - swarm: { pattern: 'dag' }, - agents: [{ name: 'a', cli: 'claude' }], - workflows: [{ name: 'wf', steps: ['not-an-object'] }], - }) - ).toThrow('each step must be an object'); - }); - - it('should reject step missing required fields', () => { - expect(() => - runner.validateConfig({ - version: '1', - name: 'x', - swarm: { pattern: 'dag' }, - agents: [{ name: 'a', cli: 'claude' }], - workflows: [{ name: 'wf', steps: [{ name: 's1', agent: 'a' }] }], - }) - ).toThrow('must have "agent" and "task" string fields'); - }); - }); - - describe('variable resolution errors', () => { - it('should throw on unresolved variable in agent task', () => { - const config = { - version: '1', - name: 'test', - swarm: { pattern: 'dag' as const }, - agents: [{ name: 'a', cli: 'claude' as const, task: 'Fix {{bug}}' }], - }; - expect(() => runner.resolveVariables(config, {})).toThrow('Unresolved variable: {{bug}}'); - }); - - it('should throw on unresolved variable in workflow step task', () => { - const config = { - version: '1', - name: 'test', - swarm: { pattern: 'dag' as const }, - agents: [{ name: 'a', cli: 'claude' as const }], - workflows: [ - { - name: 'wf', - steps: [{ name: 's1', agent: 'a', task: 'Deploy to {{env}}' }], - }, - ], - }; - expect(() => runner.resolveVariables(config, {})).toThrow('Unresolved variable: {{env}}'); - }); - }); - - describe('execution errors', () => { - it('should fail run when workflow not found by name', async () => { - const config = { - version: '1', - name: 'test', - swarm: { pattern: 'dag' as const }, - agents: [{ name: 'a', cli: 'claude' as const }], - workflows: [{ name: 'wf1', steps: [{ name: 's1', agent: 'a', task: 'x' }] }], - }; - - await expect(runner.execute(config, 'nonexistent')).rejects.toThrow('not found'); - }); - - it('should fail run when config has no workflows', async () => { - const config = { - version: '1', - name: 'test', - swarm: { pattern: 'dag' as const }, - agents: [{ name: 'a', cli: 'claude' as const }], - }; - - await expect(runner.execute(config)).rejects.toThrow('No workflows defined'); - }); - }); - - describe('resume errors', () => { - it('should throw when resuming non-existent run', async () => { - await expect(runner.resume('bad_id')).rejects.toThrow('not found'); - }); - }); -}); diff --git a/packages/sdk/src/__tests__/file-db.test.ts b/packages/sdk/src/__tests__/file-db.test.ts deleted file mode 100644 index 7e3de6259..000000000 --- a/packages/sdk/src/__tests__/file-db.test.ts +++ /dev/null @@ -1,266 +0,0 @@ -/** - * Tests for JsonFileWorkflowDb — in-memory cache is authoritative. - * - * Regression: before this file existed, `getRun` re-read the jsonl from - * disk on every call. If a write failed (EACCES in cloud, ENOSPC, etc.) - * the cache-less implementation would return stale data, which in turn - * caused `WorkflowRunner.execute()` to report a completed run as - * `status: 'running'` to callers. - */ - -import { afterEach, beforeEach, describe, expect, it } from 'vitest'; -import { chmodSync, existsSync, mkdirSync, mkdtempSync, readFileSync, rmSync, writeFileSync } from 'node:fs'; -import os from 'node:os'; -import path from 'node:path'; - -import { JsonFileWorkflowDb } from '../workflows/file-db.js'; -import type { WorkflowRunRow, WorkflowStepRow } from '../workflows/types.js'; - -function makeRun(overrides: Partial = {}): WorkflowRunRow { - const now = new Date().toISOString(); - return { - id: 'run_test', - workflowName: 'test', - status: 'pending', - createdAt: now, - updatedAt: now, - ...overrides, - }; -} - -function makeStep(overrides: Partial = {}): WorkflowStepRow { - const now = new Date().toISOString(); - return { - id: 'step_test', - runId: 'run_test', - stepName: 'test-step', - status: 'pending', - attempts: 0, - createdAt: now, - updatedAt: now, - ...overrides, - }; -} - -describe('JsonFileWorkflowDb', () => { - let tmpDir: string; - - beforeEach(() => { - tmpDir = mkdtempSync(path.join(os.tmpdir(), 'filedb-test-')); - }); - - afterEach(() => { - try { - // Restore perms in case a test made the dir read-only. - chmodSync(tmpDir, 0o755); - } catch { - /* no-op */ - } - rmSync(tmpDir, { recursive: true, force: true }); - }); - - it('round-trips a run through cache without re-reading disk', async () => { - const dbPath = path.join(tmpDir, 'workflow-runs.jsonl'); - const db = new JsonFileWorkflowDb(dbPath); - expect(db.isWritable()).toBe(true); - - await db.insertRun(makeRun({ id: 'run_1', status: 'running' })); - await db.updateRun('run_1', { status: 'completed' }); - - const run = await db.getRun('run_1'); - expect(run?.status).toBe('completed'); - - // The new write should also be durable. - const raw = readFileSync(dbPath, 'utf8'); - expect(raw).toContain('"status":"completed"'); - }); - - it('returns the latest run status even when the disk write silently fails', async () => { - // Deny writes to the storage directory so appendFileSync throws EACCES. - // On directories the mode controls whether new entries can be added — - // existing files inside become effectively read-only for append. - const dbPath = path.join(tmpDir, 'workflow-runs.jsonl'); - const db = new JsonFileWorkflowDb(dbPath); - await db.insertRun(makeRun({ id: 'run_1', status: 'running' })); - - // Revoke directory write permission AFTER the initial insert so the - // next append fails while the cache should still track the update. - chmodSync(tmpDir, 0o555); - - await db.updateRun('run_1', { status: 'completed' }); - - // The in-memory mirror must reflect the update regardless of disk state. - const run = await db.getRun('run_1'); - expect(run?.status).toBe('completed'); - }); - - it('keeps cache state authoritative when disk writes lazy-fail (default, no fallback)', async () => { - // With homeFallback default (false), the constructor is optimistic about - // an unwritable directory — writable=true, first append() throws lazily. - // The key invariant: cache state is NOT lost even when the durable write - // never lands. This is the regression guard for the "workflow passes but - // reports status: running" bug. - const unwritableDir = path.join(tmpDir, 'unwritable'); - mkdirSync(unwritableDir, { recursive: true }); - chmodSync(unwritableDir, 0o555); - const blockedPath = path.join(unwritableDir, 'workflow-runs.jsonl'); - - const db = new JsonFileWorkflowDb(blockedPath); // default homeFallback: false - expect(db.isWritable()).toBe(true); - - await db.insertRun(makeRun({ id: 'run_mem', status: 'running' })); - await db.updateRun('run_mem', { status: 'completed' }); - - const run = await db.getRun('run_mem'); - expect(run?.status).toBe('completed'); - // The jsonl was never created — disk writes all failed. - expect(existsSync(blockedPath)).toBe(false); - }); - - it('opt-in homeFallback: true → unwritable path routes to $HOME/.agent-relay', () => { - const unwritableDir = path.join(tmpDir, 'unwritable'); - mkdirSync(unwritableDir, { recursive: true }); - chmodSync(unwritableDir, 0o555); - const blockedPath = path.join(unwritableDir, 'workflow-runs.jsonl'); - - const db = new JsonFileWorkflowDb({ - filePath: blockedPath, - homeFallback: true, - }); - - const resolved = db.getStoragePath(); - expect(db.isWritable()).toBe(true); - expect(resolved.startsWith(os.homedir())).toBe(true); - expect(resolved).toContain(path.join('.agent-relay', 'workflow-runs-workflow-runs.jsonl')); - }); - - // Regression for PR #757 Codex review feedback: the primary path's - // directory can be writable while the jsonl file itself is read-only - // (relayfile-mount chmods synced files to 0o444 while leaving the - // parent dir at 0o755). The old dir-only probe would accept the - // primary path, every append would lazy-fail, and homeFallback - // would never kick in despite the caller explicitly opting in. - it('opt-in homeFallback: true → read-only file with writable dir still falls back', () => { - const writableDir = path.join(tmpDir, 'project'); - mkdirSync(writableDir, { recursive: true }); - const primaryPath = path.join(writableDir, 'workflow-runs.jsonl'); - writeFileSync(primaryPath, ''); // create the file so chmod targets it - chmodSync(primaryPath, 0o444); // file read-only; dir still 0o755 - - const db = new JsonFileWorkflowDb({ - filePath: primaryPath, - homeFallback: true, - }); - - const resolved = db.getStoragePath(); - expect(db.isWritable()).toBe(true); - expect(resolved.startsWith(os.homedir())).toBe(true); - expect(resolved).not.toBe(primaryPath); - }); - - it('notifies onWriteFailure on every failed append', async () => { - const dbPath = path.join(tmpDir, 'workflow-runs.jsonl'); - const failures: Array<{ err: unknown; filePath: string }> = []; - const db = new JsonFileWorkflowDb({ - filePath: dbPath, - homeFallback: false, - onWriteFailure: (err, filePath) => failures.push({ err, filePath }), - }); - - await db.insertRun(makeRun({ id: 'run_1', status: 'running' })); - - // Making the file itself read-only forces appendFileSync to throw. - // (Directory chmod alone is insufficient because appending to an - // already-open inode doesn't require directory write.) - chmodSync(dbPath, 0o444); - - await db.updateRun('run_1', { status: 'completed' }); - await db.updateRun('run_1', { status: 'completed' }); // second failure — listener should fire again - - expect(failures.length).toBeGreaterThanOrEqual(2); - expect(failures[0].filePath).toBe(dbPath); - - // The cache still reflects the latest state regardless of the write failure. - const run = await db.getRun('run_1'); - expect(run?.status).toBe('completed'); - }); - - it('replays existing jsonl on construction (--resume path)', async () => { - const dbPath = path.join(tmpDir, 'workflow-runs.jsonl'); - - { - const db = new JsonFileWorkflowDb(dbPath); - await db.insertRun(makeRun({ id: 'run_replay', status: 'running' })); - await db.insertStep(makeStep({ id: 'step_1', runId: 'run_replay', status: 'pending' })); - await db.updateStep('step_1', { status: 'completed' }); - await db.updateRun('run_replay', { status: 'completed' }); - } - - // Fresh instance should see the replayed state. - const reloaded = new JsonFileWorkflowDb(dbPath); - const run = await reloaded.getRun('run_replay'); - expect(run?.status).toBe('completed'); - - const steps = await reloaded.getStepsByRunId('run_replay'); - expect(steps).toHaveLength(1); - expect(steps[0].status).toBe('completed'); - }); - - // Regression for PR #757 Devin review: InMemoryWorkflowDb shallow-copies - // on insert, JsonFileWorkflowDb previously stored the caller's object by - // reference. The runner inserts a row and also keeps it in its own map, - // then mutates state.row.status directly before calling updateStep/Run — - // if the cache held the same reference, those mutations would silently - // bypass updateStep's append + timestamp handling. - it('insertRun/insertStep do not alias the caller object into the cache', async () => { - const dbPath = path.join(tmpDir, 'workflow-runs.jsonl'); - const db = new JsonFileWorkflowDb(dbPath); - - const run = makeRun({ id: 'run_alias', status: 'running' }); - await db.insertRun(run); - - // Mutate the caller's object post-insert — shouldn't reach the cache. - run.status = 'failed'; - run.error = 'direct mutation should not leak into the db'; - - const cached = await db.getRun('run_alias'); - expect(cached?.status).toBe('running'); - expect(cached?.error).toBeUndefined(); - - const step = makeStep({ id: 'step_alias', runId: 'run_alias', status: 'pending' }); - await db.insertStep(step); - step.status = 'failed'; - step.error = 'same hazard'; - - const cachedSteps = await db.getStepsByRunId('run_alias'); - expect(cachedSteps).toHaveLength(1); - expect(cachedSteps[0].status).toBe('pending'); - expect(cachedSteps[0].error).toBeUndefined(); - }); - - it('cache insert/update is visible to getStepsByRunId without a disk round-trip', async () => { - const dbPath = path.join(tmpDir, 'workflow-runs.jsonl'); - const db = new JsonFileWorkflowDb(dbPath); - - await db.insertStep(makeStep({ id: 's1', runId: 'r1', stepName: 'a', status: 'pending' })); - await db.insertStep(makeStep({ id: 's2', runId: 'r1', stepName: 'b', status: 'pending' })); - await db.updateStep('s1', { status: 'completed' }); - - const steps = await db.getStepsByRunId('r1'); - expect(steps.map((s) => `${s.stepName}=${s.status}`).sort()).toEqual(['a=completed', 'b=pending']); - }); - - it('hasStepOutputs still works relative to the resolved storage path', () => { - const dbPath = path.join(tmpDir, 'workflow-runs.jsonl'); - const db = new JsonFileWorkflowDb(dbPath); - - const outputsDir = path.join(tmpDir, 'step-outputs', 'run_x'); - mkdirSync(outputsDir, { recursive: true }); - // Drop a file so readdirSync reports length > 0. - writeFileSync(path.join(outputsDir, 'out.txt'), 'hi'); - - expect(db.hasStepOutputs('run_x')).toBe(true); - expect(db.hasStepOutputs('run_y')).toBe(false); - expect(existsSync(dbPath)).toBe(false); // no writes happened yet - }); -}); diff --git a/packages/sdk/src/__tests__/idle-nudge.test.ts b/packages/sdk/src/__tests__/idle-nudge.test.ts deleted file mode 100644 index 86b85bedd..000000000 --- a/packages/sdk/src/__tests__/idle-nudge.test.ts +++ /dev/null @@ -1,458 +0,0 @@ -/** - * Idle nudge detection and escalation tests. - * - * Covers both modes: - * - No idleNudge config: idle is treated as completion. - * - idleNudge config enabled: waitForExit timeout drives nudges/escalation. - */ - -import { describe, it, expect, vi, beforeEach } from 'vitest'; -import type { WorkflowDb } from '../workflows/runner.js'; -import type { RelayYamlConfig, WorkflowRunRow, WorkflowStepRow } from '../workflows/types.js'; - -// ── Mock fetch to prevent real HTTP calls (Relaycast provisioning) ─────────── - -const mockFetch = vi.fn().mockResolvedValue({ - ok: true, - json: () => Promise.resolve({ data: { api_key: 'rk_live_test', workspace_id: 'ws-test' } }), - text: () => Promise.resolve(''), -}); -vi.stubGlobal('fetch', mockFetch); - -// ── Mock RelayCast SDK ──────────────────────────────────────────────────────── - -const mockRelaycastAgent = { - send: vi.fn().mockResolvedValue(undefined), - heartbeat: vi.fn().mockResolvedValue(undefined), - channels: { - create: vi.fn().mockResolvedValue(undefined), - join: vi.fn().mockResolvedValue(undefined), - invite: vi.fn().mockResolvedValue(undefined), - }, -}; - -const mockRelaycast = { - agents: { - register: vi.fn().mockResolvedValue({ token: 'token-1' }), - }, - as: vi.fn().mockReturnValue(mockRelaycastAgent), -}; - -class MockRelayError extends Error { - code: string; - constructor(code: string, message: string, status = 400) { - super(message); - this.code = code; - this.name = 'RelayError'; - (this as any).status = status; - } -} - -vi.mock('@relaycast/sdk', () => ({ - RelayCast: vi.fn().mockImplementation(() => mockRelaycast), - RelayError: MockRelayError, -})); - -// ── Mock AgentRelay ─────────────────────────────────────────────────────────── - -let waitForExitFn: (ms?: number) => Promise<'exited' | 'timeout' | 'released'>; -let waitForIdleFn: (ms?: number) => Promise<'idle' | 'timeout' | 'exited'>; - -const mockSendMessage = vi.fn().mockResolvedValue(undefined); -const mockRelease = vi.fn().mockResolvedValue(undefined); - -const mockAgent = { - name: 'test-agent-abc', - exitCode: 0, - exitSignal: undefined, - get waitForExit() { - return waitForExitFn; - }, - get waitForIdle() { - return waitForIdleFn; - }, - release: mockRelease, - sendMessage: mockSendMessage, -}; - -const mockHumanSendMessage = vi.fn().mockResolvedValue(undefined); -const mockHuman = { - name: 'workflow-runner', - sendMessage: mockHumanSendMessage, -}; - -vi.mock('../relay.js', () => ({ - AgentRelay: vi.fn().mockImplementation(() => ({ - spawnPty: vi.fn().mockResolvedValue(mockAgent), - human: vi.fn().mockReturnValue(mockHuman), - shutdown: vi.fn().mockResolvedValue(undefined), - onBrokerStderr: vi.fn().mockReturnValue(() => {}), - addListener: vi.fn(() => () => {}), - listAgentsRaw: vi.fn().mockResolvedValue([]), - })), -})); - -const { WorkflowRunner } = await import('../workflows/runner.js'); - -// ── Test fixtures ───────────────────────────────────────────────────────────── - -function makeDb(): WorkflowDb { - const runs = new Map(); - const steps = new Map(); - - return { - insertRun: vi.fn(async (run: WorkflowRunRow) => { - runs.set(run.id, { ...run }); - }), - updateRun: vi.fn(async (id: string, patch: Partial) => { - const existing = runs.get(id); - if (existing) runs.set(id, { ...existing, ...patch }); - }), - getRun: vi.fn(async (id: string) => { - const run = runs.get(id); - return run ? { ...run } : null; - }), - insertStep: vi.fn(async (step: WorkflowStepRow) => { - steps.set(step.id, { ...step }); - }), - updateStep: vi.fn(async (id: string, patch: Partial) => { - const existing = steps.get(id); - if (existing) steps.set(id, { ...existing, ...patch }); - }), - getStepsByRunId: vi.fn(async (runId: string) => { - return [...steps.values()].filter((s) => s.runId === runId); - }), - }; -} - -function makeConfig(overrides: Partial = {}): RelayYamlConfig { - return { - version: '1', - name: 'test-workflow', - swarm: { pattern: 'dag' }, - agents: [{ name: 'agent-a', cli: 'claude' }], - workflows: [ - { - name: 'default', - steps: [{ name: 'step-1', agent: 'agent-a', task: 'Do step 1' }], - }, - ], - trajectories: false, - ...overrides, - }; -} - -function never(): Promise { - return new Promise(() => {}); -} - -describe('Idle Nudge Detection', () => { - let db: WorkflowDb; - let runner: InstanceType; - - beforeEach(() => { - vi.clearAllMocks(); - db = makeDb(); - runner = new WorkflowRunner({ db, workspaceId: 'ws-test' }); - - waitForExitFn = vi.fn().mockResolvedValue('exited'); - waitForIdleFn = vi.fn().mockResolvedValue('timeout'); - }); - - describe('idleNudge enabled', () => { - it('sends direct nudge then completes when exit follows', async () => { - let exitCallCount = 0; - waitForExitFn = vi.fn().mockImplementation(() => { - exitCallCount++; - return Promise.resolve(exitCallCount === 1 ? 'timeout' : 'exited'); - }); - - const run = await runner.execute( - makeConfig({ - swarm: { - pattern: 'mesh', - idleNudge: { nudgeAfterMs: 100, escalateAfterMs: 100, maxNudges: 1 }, - }, - }), - 'default' - ); - - expect(run.status).toBe('completed'); - expect(mockHumanSendMessage).toHaveBeenCalledTimes(1); - expect(mockHumanSendMessage).toHaveBeenCalledWith( - expect.objectContaining({ - to: 'test-agent-abc', - text: expect.stringContaining('/exit'), - }) - ); - expect(mockRelease).not.toHaveBeenCalled(); - expect(waitForIdleFn).not.toHaveBeenCalled(); - }); - - it('uses hub fallback behavior without failing when hub is not active', async () => { - let exitCallCount = 0; - waitForExitFn = vi.fn().mockImplementation(() => { - exitCallCount++; - return Promise.resolve(exitCallCount === 1 ? 'timeout' : 'exited'); - }); - - const config = makeConfig({ - swarm: { - pattern: 'hub-spoke', - idleNudge: { nudgeAfterMs: 100, escalateAfterMs: 100, maxNudges: 1 }, - }, - agents: [ - { name: 'lead', cli: 'claude', role: 'Lead coordinator' }, - { name: 'worker', cli: 'claude' }, - ], - }); - const step = { name: 'step-1', agent: 'worker', task: 'Do work' }; - const agentDef = { name: 'worker', cli: 'claude' }; - - (runner as any).currentConfig = config; - (runner as any).relay = { human: vi.fn().mockReturnValue(mockHuman) }; - const result = await (runner as any).waitForExitWithIdleNudging(mockAgent, agentDef, step, 500); - - expect(result).toBe('exited'); - expect(mockHumanSendMessage).toHaveBeenCalledTimes(1); - }); - - it('force-releases after maxNudges is exceeded', async () => { - waitForExitFn = vi.fn().mockResolvedValue('timeout'); - - const run = await runner.execute( - makeConfig({ - swarm: { - pattern: 'dag', - idleNudge: { nudgeAfterMs: 50, escalateAfterMs: 50, maxNudges: 1 }, - }, - }), - 'default' - ); - - expect(run.status).toBe('failed'); - expect(run.error).toContain('force-released'); - expect(mockHumanSendMessage).toHaveBeenCalledTimes(1); - expect(mockRelease).toHaveBeenCalledTimes(1); - expect(waitForIdleFn).not.toHaveBeenCalled(); - }); - - it('force-releases after multiple nudges', async () => { - waitForExitFn = vi.fn().mockResolvedValue('timeout'); - - const run = await runner.execute( - makeConfig({ - swarm: { - pattern: 'dag', - idleNudge: { nudgeAfterMs: 50, escalateAfterMs: 50, maxNudges: 3 }, - }, - }), - 'default' - ); - - expect(run.status).toBe('failed'); - expect(run.error).toContain('force-released'); - expect(mockHumanSendMessage).toHaveBeenCalledTimes(3); - expect(mockRelease).toHaveBeenCalledTimes(1); - }); - - it('emits step:nudged event', async () => { - let exitCallCount = 0; - waitForExitFn = vi.fn().mockImplementation(() => { - exitCallCount++; - return Promise.resolve(exitCallCount === 1 ? 'timeout' : 'exited'); - }); - - const events: Array<{ type: string }> = []; - runner.on((event) => events.push(event)); - - await runner.execute( - makeConfig({ - swarm: { - pattern: 'dag', - idleNudge: { nudgeAfterMs: 50, escalateAfterMs: 50, maxNudges: 1 }, - }, - }), - 'default' - ); - - expect(events.filter((e) => e.type === 'step:nudged')).toHaveLength(1); - }); - - it('emits step:force-released event on escalation', async () => { - waitForExitFn = vi.fn().mockResolvedValue('timeout'); - - const events: Array<{ type: string }> = []; - runner.on((event) => events.push(event)); - - await runner.execute( - makeConfig({ - swarm: { - pattern: 'dag', - idleNudge: { nudgeAfterMs: 50, escalateAfterMs: 50, maxNudges: 1 }, - }, - }), - 'default' - ); - - expect(events.filter((e) => e.type === 'step:force-released')).toHaveLength(1); - }); - - it('uses defaults when idleNudge is empty object', async () => { - waitForExitFn = vi.fn().mockResolvedValue('timeout'); - - const run = await runner.execute( - makeConfig({ - swarm: { - pattern: 'dag', - idleNudge: {}, - }, - }), - 'default' - ); - - expect(run.status).toBe('failed'); - expect(run.error).toContain('force-released'); - // default maxNudges is 1 - expect(mockHumanSendMessage).toHaveBeenCalledTimes(1); - expect(mockRelease).toHaveBeenCalledTimes(1); - }); - - it('respects overall timeout during nudge loop', async () => { - // Each waitForExit call takes 100ms (real timer), but the overall timeout - // is only 80ms. After the first call (~100ms elapsed), the loop detects - // that remaining time is exhausted and returns 'timeout'. - waitForExitFn = vi - .fn() - .mockImplementation( - () => new Promise<'timeout'>((resolve) => setTimeout(() => resolve('timeout'), 100)) - ); - - const run = await runner.execute( - makeConfig({ - swarm: { - pattern: 'dag', - idleNudge: { nudgeAfterMs: 10, escalateAfterMs: 10, maxNudges: 10 }, - }, - agents: [{ name: 'agent-a', cli: 'claude', constraints: { timeoutMs: 80 } }], - }), - 'default' - ); - - expect(run.status).toBe('failed'); - expect(run.error).toContain('timed out'); - }); - - it('keeps a supervising lead alive after idle nudges are exhausted', async () => { - let exitCallCount = 0; - waitForExitFn = vi.fn().mockImplementation(() => { - exitCallCount++; - return Promise.resolve(exitCallCount < 3 ? 'timeout' : 'exited'); - }); - - const config = makeConfig({ - swarm: { - pattern: 'hub-spoke', - idleNudge: { nudgeAfterMs: 50, escalateAfterMs: 50, maxNudges: 1 }, - channel: 'lead-supervision', - }, - }); - const agentDef = { name: 'team-lead', cli: 'claude', role: 'Lead coordinator' }; - const step = { - name: 'step-1', - agent: 'team-lead', - task: 'Monitor #lead-supervision for WORKER_DONE, wait for the handoff, then exit.', - }; - - (runner as any).currentConfig = config; - expect((runner as any).shouldPreserveIdleSupervisor(agentDef, step)).toBe(true); - - const result = await (runner as any).waitForExitWithIdleNudging( - mockAgent, - agentDef, - step, - 500, - undefined, - true - ); - - expect(result).toBe('exited'); - expect(waitForExitFn).toHaveBeenCalledTimes(3); - expect(mockRelease).not.toHaveBeenCalled(); - }); - }); - - describe('Idle = done (no idleNudge config)', () => { - it('idle fires first: releases agent and completes step', async () => { - waitForIdleFn = vi.fn().mockResolvedValue('idle'); - waitForExitFn = vi.fn().mockImplementation(() => never()); - - const run = await runner.execute(makeConfig(), 'default'); - const steps = await db.getStepsByRunId(run.id); - - expect(run.status).toBe('completed'); - expect(steps).toHaveLength(1); - expect(steps[0]?.status).toBe('completed'); - expect(mockRelease).toHaveBeenCalledTimes(1); - }); - - it('exit fires first: completes without idle-based release', async () => { - waitForExitFn = vi.fn().mockResolvedValue('exited'); - waitForIdleFn = vi.fn().mockResolvedValue('timeout'); - - const run = await runner.execute(makeConfig(), 'default'); - const steps = await db.getStepsByRunId(run.id); - - expect(run.status).toBe('completed'); - expect(steps).toHaveLength(1); - expect(steps[0]?.status).toBe('completed'); - expect(mockRelease).not.toHaveBeenCalled(); - }); - - it('does not treat supervisory lead idleness as completion', async () => { - waitForExitFn = vi.fn().mockResolvedValue('exited'); - waitForIdleFn = vi.fn().mockResolvedValue('idle'); - - const config = makeConfig({ - swarm: { pattern: 'hub-spoke', channel: 'lead-supervision' }, - }); - const agentDef = { name: 'team-lead', cli: 'claude', role: 'Lead coordinator' }; - const step = { - name: 'step-1', - agent: 'team-lead', - task: 'Wait on #lead-supervision for WORKER_DONE before handing off.', - }; - - (runner as any).currentConfig = config; - expect((runner as any).shouldPreserveIdleSupervisor(agentDef, step)).toBe(true); - - const result = await (runner as any).waitForExitWithIdleNudging( - mockAgent, - agentDef, - step, - 500, - undefined, - true - ); - - expect(result).toBe('exited'); - expect(waitForExitFn).toHaveBeenCalledTimes(1); - expect(waitForIdleFn).not.toHaveBeenCalled(); - expect(mockRelease).not.toHaveBeenCalled(); - }); - - it('both timeout: fails step with timeout error', async () => { - waitForExitFn = vi.fn().mockResolvedValue('timeout'); - waitForIdleFn = vi.fn().mockResolvedValue('timeout'); - - const run = await runner.execute(makeConfig(), 'default'); - const steps = await db.getStepsByRunId(run.id); - - expect(run.status).toBe('failed'); - expect(run.error).toContain('timed out'); - expect(steps).toHaveLength(1); - expect(steps[0]?.status).toBe('failed'); - expect(steps[0]?.error).toContain('timed out'); - }); - }); -}); diff --git a/packages/sdk/src/__tests__/provisioner-mount.test.ts b/packages/sdk/src/__tests__/provisioner-mount.test.ts deleted file mode 100644 index 92d1ccebd..000000000 --- a/packages/sdk/src/__tests__/provisioner-mount.test.ts +++ /dev/null @@ -1,126 +0,0 @@ -import { existsSync } from 'node:fs'; -import { mkdtemp, mkdir, rm, writeFile } from 'node:fs/promises'; -import { tmpdir } from 'node:os'; -import path from 'node:path'; - -import { afterEach, describe, expect, it } from 'vitest'; - -import { ensureRelayfileMount } from '../provisioner/mount.js'; -import { createLocalJwksKeyPair, provisionWorkflowAgents } from '../provisioner/index.js'; - -const tempDirs: string[] = []; - -async function makeTempDir(prefix: string): Promise { - const dir = await mkdtemp(path.join(tmpdir(), prefix)); - tempDirs.push(dir); - return dir; -} - -async function createFakeMountBinary(): Promise { - const root = await makeTempDir('relayfile-mount-bin-'); - const binaryPath = path.join(root, 'relayfile-mount'); - await writeFile( - binaryPath, - [ - '#!/bin/sh', - 'LOCAL_DIR=""', - 'ONCE=0', - 'while [ "$#" -gt 0 ]; do', - ' case "$1" in', - ' --local-dir)', - ' LOCAL_DIR="$2"', - ' shift 2', - ' ;;', - ' --once)', - ' ONCE=1', - ' shift', - ' ;;', - ' *)', - ' shift', - ' ;;', - ' esac', - 'done', - 'mkdir -p "$LOCAL_DIR"', - 'if [ "$ONCE" -eq 1 ]; then', - ' printf "seeded\\n" > "$LOCAL_DIR/seeded.txt"', - ' exit 0', - 'fi', - 'printf "live\\n" > "$LOCAL_DIR/live.txt"', - 'trap "exit 0" TERM INT', - 'while :; do sleep 1; done', - '', - ].join('\n'), - { mode: 0o755 } - ); - return binaryPath; -} - -async function waitForPath(filePath: string, timeoutMs = 1000): Promise { - const deadline = Date.now() + timeoutMs; - while (Date.now() < deadline) { - if (existsSync(filePath)) { - return true; - } - await new Promise((resolve) => setTimeout(resolve, 25)); - } - return existsSync(filePath); -} - -afterEach(async () => { - for (const dir of tempDirs.splice(0)) { - await rm(dir, { recursive: true, force: true }); - } -}); - -describe('ensureRelayfileMount', () => { - it('runs initial sync, starts the watcher, and removes the mount on stop', async () => { - const binaryPath = await createFakeMountBinary(); - const mountPoint = path.join(await makeTempDir('relayfile-mount-target-'), 'workspace'); - - const mount = await ensureRelayfileMount({ - binaryPath, - relayfileUrl: 'http://127.0.0.1:8080', - workspace: 'rw_test', - token: 'test-token', - mountPoint, - }); - - expect(mount.pid).toBeGreaterThan(0); - expect(existsSync(path.join(mountPoint, 'seeded.txt'))).toBe(true); - expect(await waitForPath(path.join(mountPoint, 'live.txt'))).toBe(true); - - await mount.stop(); - - expect(existsSync(mountPoint)).toBe(false); - }); -}); - -describe('provisionWorkflowAgents mount integration', () => { - it('starts a per-agent mount and exposes its mount point in the result', async () => { - const projectDir = await makeTempDir('relay-provisioner-project-'); - const binaryPath = await createFakeMountBinary(); - await mkdir(path.join(projectDir, 'src'), { recursive: true }); - await writeFile(path.join(projectDir, 'src', 'index.ts'), 'export const value = 1;\n'); - - const result = await provisionWorkflowAgents({ - tokenSigningKey: createLocalJwksKeyPair(), - workspace: 'rw_workspace', - projectDir, - relayfileBaseUrl: 'http://127.0.0.1:8080', - agents: { - worker: { - access: 'readonly', - }, - }, - skipSeeding: true, - mountBinaryPath: binaryPath, - }); - - const mount = result.mounts.get('worker'); - expect(mount).toBeDefined(); - expect(result.agents.worker.mountPoint).toBe(mount?.mountPoint); - expect(existsSync(path.join(mount!.mountPoint, 'seeded.txt'))).toBe(true); - - await mount?.stop(); - }); -}); diff --git a/packages/sdk/src/__tests__/resume-fallback.test.ts b/packages/sdk/src/__tests__/resume-fallback.test.ts deleted file mode 100644 index dd02769cd..000000000 --- a/packages/sdk/src/__tests__/resume-fallback.test.ts +++ /dev/null @@ -1,429 +0,0 @@ -/** - * Tests for resuming workflow execution from cached step outputs when the JSONL - * run database is missing or unavailable. - */ - -import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; -import { chmodSync, mkdirSync, mkdtempSync, rmSync, writeFileSync } from 'node:fs'; -import os from 'node:os'; -import path from 'node:path'; -import type { WorkflowDb } from '../workflows/runner.js'; -import type { RelayYamlConfig, WorkflowRunRow, WorkflowStepRow } from '../workflows/types.js'; - -// ── Mock fetch ─────────────────────────────────────────────────────────────── - -const mockFetch = vi.fn().mockResolvedValue({ - ok: true, - json: () => Promise.resolve({ data: { api_key: 'rk_live_test', workspace_id: 'ws-test' } }), - text: () => Promise.resolve(''), -}); -vi.stubGlobal('fetch', mockFetch); - -// ── Mock RelayCast SDK ─────────────────────────────────────────────────────── - -const mockRelaycastAgent = { - send: vi.fn().mockResolvedValue(undefined), - heartbeat: vi.fn().mockResolvedValue(undefined), - channels: { - create: vi.fn().mockResolvedValue(undefined), - join: vi.fn().mockResolvedValue(undefined), - invite: vi.fn().mockResolvedValue(undefined), - }, -}; - -const mockRelaycast = { - agents: { - register: vi.fn().mockResolvedValue({ token: 'token-1' }), - }, - as: vi.fn().mockReturnValue(mockRelaycastAgent), -}; - -class MockRelayError extends Error { - code: string; - constructor(code: string, message: string, status = 400) { - super(message); - this.code = code; - this.name = 'RelayError'; - (this as any).status = status; - } -} - -vi.mock('@relaycast/sdk', () => ({ - RelayCast: vi.fn().mockImplementation(() => mockRelaycast), - RelayError: MockRelayError, -})); - -// ── Mock AgentRelay ────────────────────────────────────────────────────────── - -let waitForExitFn: (ms?: number) => Promise<'exited' | 'timeout' | 'released'>; - -const mockAgent = { - name: 'test-agent-abc', - get waitForExit() { - return waitForExitFn; - }, - get waitForIdle() { - return vi.fn().mockImplementation(() => new Promise(() => {})); - }, - release: vi.fn().mockResolvedValue(undefined), -}; - -const mockHuman = { - name: 'WorkflowRunner', - sendMessage: vi.fn().mockResolvedValue(undefined), -}; - -const mockListeners = new Map void>>(); -function emitMockEvent(event: string, ...args: any[]): void { - const set = mockListeners.get(event); - if (set) for (const cb of set) cb(...args); -} - -const mockRelayInstance = { - spawnPty: vi.fn().mockImplementation(async ({ name, task }: { name: string; task?: string }) => { - const stepComplete = task?.match(/STEP_COMPLETE:([^\n]+)/)?.[1]?.trim(); - const isReview = task?.includes('REVIEW_DECISION: APPROVE or REJECT'); - const output = isReview - ? 'REVIEW_DECISION: APPROVE\nREVIEW_REASON: looks good\n' - : stepComplete - ? `STEP_COMPLETE:${stepComplete}\n` - : 'STEP_COMPLETE:unknown\n'; - - queueMicrotask(() => { - emitMockEvent('workerOutput', { name, chunk: output }); - }); - - return { ...mockAgent, name }; - }), - human: vi.fn().mockReturnValue(mockHuman), - shutdown: vi.fn().mockResolvedValue(undefined), - onBrokerStderr: vi.fn().mockReturnValue(() => {}), - addListener: vi.fn((event: string, cb: (...args: any[]) => void) => { - let set = mockListeners.get(event); - if (!set) { - set = new Set(); - mockListeners.set(event, set); - } - set.add(cb); - return () => set!.delete(cb); - }), - listAgentsRaw: vi.fn().mockResolvedValue([]), -}; - -vi.mock('../relay.js', () => ({ - AgentRelay: vi.fn().mockImplementation(() => mockRelayInstance), -})); - -// Import after mocking -const { WorkflowRunner } = await import('../workflows/runner.js'); -const { JsonFileWorkflowDb } = await import('../workflows/file-db.js'); - -// ── Helpers ────────────────────────────────────────────────────────────────── - -function makeDb(): WorkflowDb { - const runs = new Map(); - const steps = new Map(); - - return { - insertRun: vi.fn(async (run: WorkflowRunRow) => { - runs.set(run.id, { ...run }); - }), - updateRun: vi.fn(async (id: string, patch: Partial) => { - const existing = runs.get(id); - if (existing) runs.set(id, { ...existing, ...patch }); - }), - getRun: vi.fn(async (id: string) => { - const run = runs.get(id); - return run ? { ...run } : null; - }), - insertStep: vi.fn(async (step: WorkflowStepRow) => { - steps.set(step.id, { ...step }); - }), - updateStep: vi.fn(async (id: string, patch: Partial) => { - const existing = steps.get(id); - if (existing) steps.set(id, { ...existing, ...patch }); - }), - getStepsByRunId: vi.fn(async (runId: string) => { - return [...steps.values()].filter((s) => s.runId === runId); - }), - }; -} - -function makeResumeConfig(): RelayYamlConfig { - return { - version: '1', - name: 'test-resume-fallback', - swarm: { pattern: 'dag' }, - agents: [{ name: 'agent-a', cli: 'claude' }], - workflows: [ - { - name: 'default', - steps: [ - { name: 'step-a', agent: 'agent-a', task: 'Do step A' }, - { name: 'step-b', agent: 'agent-a', task: 'Do step B', dependsOn: ['step-a'] }, - { name: 'step-c', agent: 'agent-a', task: 'Do step C', dependsOn: ['step-b'] }, - ], - }, - ], - trajectories: false, - }; -} - -function makeTemplateConfig(): RelayYamlConfig { - return { - version: '1', - name: 'test-resume-template', - swarm: { pattern: 'dag' }, - agents: [{ name: 'agent-a', cli: 'claude' }], - workflows: [ - { - name: 'default', - steps: [ - { name: 'step-a', agent: 'agent-a', task: 'Generate input' }, - { - name: 'step-b', - agent: 'agent-a', - task: 'Use cached value: {{steps.step-a.output}}', - dependsOn: ['step-a'], - }, - ], - }, - ], - trajectories: false, - }; -} - -function makeRunRow( - runId: string, - config: RelayYamlConfig, - status: WorkflowRunRow['status'] = 'failed' -): WorkflowRunRow { - const now = new Date().toISOString(); - return { - id: runId, - workspaceId: 'ws-test', - workflowName: 'default', - pattern: config.swarm.pattern, - status, - config, - startedAt: now, - createdAt: now, - updatedAt: now, - }; -} - -function makeStepRow( - runId: string, - stepName: string, - task: string, - dependsOn: string[] = [], - status: WorkflowStepRow['status'] = 'pending', - output?: string -): WorkflowStepRow { - const now = new Date().toISOString(); - return { - id: `${runId}-${stepName}`, - runId, - stepName, - agentName: 'agent-a', - stepType: 'agent', - status, - task, - dependsOn, - output, - retryCount: 0, - createdAt: now, - updatedAt: now, - startedAt: status !== 'pending' ? now : undefined, - completedAt: status === 'completed' ? now : undefined, - }; -} - -function writeCachedOutput(tmpDir: string, runId: string, stepName: string, output: string): void { - const outputDir = path.join(tmpDir, '.agent-relay', 'step-outputs', runId); - mkdirSync(outputDir, { recursive: true }); - writeFileSync(path.join(outputDir, `${stepName}.md`), output); -} - -// ── Tests ──────────────────────────────────────────────────────────────────── - -describe('resume fallback to step-output cache', () => { - let db: WorkflowDb; - let runner: InstanceType; - let tmpDir: string; - - beforeEach(() => { - vi.clearAllMocks(); - waitForExitFn = vi.fn().mockResolvedValue('exited'); - mockListeners.clear(); - tmpDir = mkdtempSync(path.join(os.tmpdir(), 'resume-fallback-')); - db = makeDb(); - runner = new WorkflowRunner({ db, workspaceId: 'ws-test', cwd: tmpDir }); - }); - - afterEach(() => { - try { - rmSync(tmpDir, { recursive: true, force: true }); - } catch { - /* noop */ - } - }); - - it('should reconstruct run from step-output cache when JSONL missing', async () => { - const runId = 'resume-cache-run'; - const config = makeResumeConfig(); - writeCachedOutput(tmpDir, runId, 'step-a', 'cached-a'); - writeCachedOutput(tmpDir, runId, 'step-b', 'cached-b'); - - const events: Array<{ type: string; stepName?: string }> = []; - runner.on((event) => { - if ('stepName' in event) { - events.push({ type: event.type, stepName: event.stepName }); - } - }); - - const run = await (runner as any).resume(runId, undefined, config); - expect(run.status, run.error).toBe('completed'); - - const startedSteps = events.filter((e) => e.type === 'step:started').map((e) => e.stepName); - expect(startedSteps).not.toContain('step-a'); - expect(startedSteps).not.toContain('step-b'); - expect(startedSteps).toContain('step-c'); - }); - - it('should throw "not found" when neither JSONL nor cache exists', async () => { - const config = makeResumeConfig(); - - await expect((runner as any).resume('nonexistent-id', undefined, config)).rejects.toThrow('not found'); - }); - - it('should prefer JSONL database over step-output cache', async () => { - const runId = 'resume-db-run'; - const config = makeResumeConfig(); - const dbPath = path.join(tmpDir, '.agent-relay', 'workflow-runs.jsonl'); - const fileDb = new JsonFileWorkflowDb(dbPath); - const dbRunner = new WorkflowRunner({ db: fileDb, workspaceId: 'ws-test', cwd: tmpDir }); - - await fileDb.insertRun(makeRunRow(runId, config)); - await fileDb.insertStep(makeStepRow(runId, 'step-a', 'Do step A', [], 'failed')); - await fileDb.insertStep(makeStepRow(runId, 'step-b', 'Do step B', ['step-a'], 'pending')); - await fileDb.insertStep(makeStepRow(runId, 'step-c', 'Do step C', ['step-b'], 'pending')); - - writeCachedOutput(tmpDir, runId, 'step-a', 'cached-a-from-fallback'); - - const events: Array<{ type: string; stepName?: string }> = []; - dbRunner.on((event) => { - if ('stepName' in event) { - events.push({ type: event.type, stepName: event.stepName }); - } - }); - - const run = await dbRunner.resume(runId); - expect(run.status, run.error).toBe('completed'); - - const startedSteps = events.filter((e) => e.type === 'step:started').map((e) => e.stepName); - expect(startedSteps).toContain('step-a'); - expect(startedSteps).toContain('step-b'); - expect(startedSteps).toContain('step-c'); - }); - - it('should handle empty step-output directory gracefully', async () => { - const runId = 'resume-empty-cache'; - const config = makeResumeConfig(); - mkdirSync(path.join(tmpDir, '.agent-relay', 'step-outputs', runId), { recursive: true }); - - const events: Array<{ type: string; stepName?: string }> = []; - runner.on((event) => { - if ('stepName' in event) { - events.push({ type: event.type, stepName: event.stepName }); - } - }); - - const run = await (runner as any).resume(runId, undefined, config); - expect(run.status, run.error).toBe('completed'); - - const startedSteps = events.filter((e) => e.type === 'step:started').map((e) => e.stepName); - expect(startedSteps).toContain('step-a'); - expect(startedSteps).toContain('step-b'); - expect(startedSteps).toContain('step-c'); - }); - - it('should load cached output into step template variables', async () => { - const runId = 'resume-template-cache'; - const config = makeTemplateConfig(); - writeCachedOutput(tmpDir, runId, 'step-a', 'hello world'); - - const run = await (runner as any).resume(runId, undefined, config); - expect(run.status, run.error).toBe('completed'); - - const spawnedTasks = mockRelayInstance.spawnPty.mock.calls.map( - ([args]) => (args as { task?: string }).task ?? '' - ); - expect(spawnedTasks.some((task) => task.includes('Use cached value: hello world'))).toBe(true); - }); - - it('should skip .report.json files when scanning step outputs', async () => { - const runId = 'resume-report-cache'; - const config = makeResumeConfig(); - const outputDir = path.join(tmpDir, '.agent-relay', 'step-outputs', runId); - mkdirSync(outputDir, { recursive: true }); - writeFileSync(path.join(outputDir, 'step-a.md'), 'cached-a'); - writeFileSync(path.join(outputDir, 'step-a.report.json'), '{"summary":"done"}'); - writeFileSync(path.join(outputDir, 'step-b.report.json'), '{"summary":"metadata only"}'); - - const events: Array<{ type: string; stepName?: string }> = []; - runner.on((event) => { - if ('stepName' in event) { - events.push({ type: event.type, stepName: event.stepName }); - } - }); - - const run = await (runner as any).resume(runId, undefined, config); - expect(run.status, run.error).toBe('completed'); - - const startedSteps = events.filter((e) => e.type === 'step:started').map((e) => e.stepName); - expect(startedSteps).not.toContain('step-a'); - expect(startedSteps).toContain('step-b'); - expect(startedSteps).toContain('step-c'); - }); -}); - -describe('file-db append diagnostics', () => { - let tmpDir: string; - - beforeEach(() => { - vi.clearAllMocks(); - tmpDir = mkdtempSync(path.join(os.tmpdir(), 'file-db-warn-')); - }); - - afterEach(() => { - try { - chmodSync(path.join(tmpDir, 'readonly'), 0o755); - } catch { - /* noop */ - } - try { - rmSync(tmpDir, { recursive: true, force: true }); - } catch { - /* noop */ - } - }); - - it('should warn once when append fails', async () => { - const readonlyDir = path.join(tmpDir, 'readonly'); - mkdirSync(readonlyDir, { recursive: true }); - chmodSync(readonlyDir, 0o555); - - const dbPath = path.join(readonlyDir, 'workflow-runs.jsonl'); - const fileDb = new JsonFileWorkflowDb(dbPath); - const warnSpy = vi.spyOn(console, 'warn').mockImplementation(() => {}); - const config = makeResumeConfig(); - - await fileDb.insertRun(makeRunRow('warn-run-1', config)); - await fileDb.insertRun(makeRunRow('warn-run-2', config)); - - expect(warnSpy).toHaveBeenCalledTimes(1); - - warnSpy.mockRestore(); - }); -}); diff --git a/packages/sdk/src/__tests__/start-from.test.ts b/packages/sdk/src/__tests__/start-from.test.ts deleted file mode 100644 index 657ff327d..000000000 --- a/packages/sdk/src/__tests__/start-from.test.ts +++ /dev/null @@ -1,357 +0,0 @@ -/** - * Tests for the startFrom workflow execution feature. - * - * Validates that callers can start a workflow from a specific step, - * skipping all predecessor steps and loading cached outputs when available. - */ - -import { afterEach, describe, it, expect, vi, beforeEach } from 'vitest'; -import { existsSync, mkdirSync, mkdtempSync, rmSync, writeFileSync } from 'node:fs'; -import os from 'node:os'; -import path from 'node:path'; -import type { WorkflowDb } from '../workflows/runner.js'; -import type { RelayYamlConfig, WorkflowRunRow, WorkflowStepRow } from '../workflows/types.js'; - -// ── Mock fetch ─────────────────────────────────────────────────────────────── - -const mockFetch = vi.fn().mockResolvedValue({ - ok: true, - json: () => Promise.resolve({ data: { api_key: 'rk_live_test', workspace_id: 'ws-test' } }), - text: () => Promise.resolve(''), -}); -vi.stubGlobal('fetch', mockFetch); - -// ── Mock RelayCast SDK ─────────────────────────────────────────────────────── - -const mockRelaycastAgent = { - send: vi.fn().mockResolvedValue(undefined), - heartbeat: vi.fn().mockResolvedValue(undefined), - channels: { - create: vi.fn().mockResolvedValue(undefined), - join: vi.fn().mockResolvedValue(undefined), - invite: vi.fn().mockResolvedValue(undefined), - }, -}; - -const mockRelaycast = { - agents: { - register: vi.fn().mockResolvedValue({ token: 'token-1' }), - }, - as: vi.fn().mockReturnValue(mockRelaycastAgent), -}; - -class MockRelayError extends Error { - code: string; - constructor(code: string, message: string, status = 400) { - super(message); - this.code = code; - this.name = 'RelayError'; - (this as any).status = status; - } -} - -vi.mock('@relaycast/sdk', () => ({ - RelayCast: vi.fn().mockImplementation(() => mockRelaycast), - RelayError: MockRelayError, -})); - -// ── Mock AgentRelay ────────────────────────────────────────────────────────── - -let waitForExitFn: (ms?: number) => Promise<'exited' | 'timeout' | 'released'>; - -const mockAgent = { - name: 'test-agent-abc', - get waitForExit() { - return waitForExitFn; - }, - get waitForIdle() { - return vi.fn().mockImplementation(() => new Promise(() => {})); - }, - release: vi.fn().mockResolvedValue(undefined), -}; - -const mockHuman = { - name: 'WorkflowRunner', - sendMessage: vi.fn().mockResolvedValue(undefined), -}; - -// Listener registry for the AgentRelay mock — production AgentRelay uses -// addListener('eventName', handler). Tests fire events via emitRelayEvent. -const relayListeners = new Map void>>(); -function emitRelayEvent(event: string, payload: unknown): void { - for (const handler of relayListeners.get(event) ?? []) { - handler(payload); - } -} - -const mockRelayInstance = { - spawnPty: vi.fn().mockImplementation(async ({ name, task }: { name: string; task?: string }) => { - const stepComplete = task?.match(/STEP_COMPLETE:([^\n]+)/)?.[1]?.trim(); - const isReview = task?.includes('REVIEW_DECISION: APPROVE or REJECT'); - const output = isReview - ? 'REVIEW_DECISION: APPROVE\nREVIEW_REASON: looks good\n' - : stepComplete - ? `STEP_COMPLETE:${stepComplete}\n` - : 'STEP_COMPLETE:unknown\n'; - - queueMicrotask(() => emitRelayEvent('workerOutput', { name, chunk: output })); - - return { ...mockAgent, name }; - }), - human: vi.fn().mockReturnValue(mockHuman), - shutdown: vi.fn().mockResolvedValue(undefined), - onBrokerStderr: vi.fn().mockReturnValue(() => {}), - addListener: vi.fn((event: string, handler: (...args: unknown[]) => void) => { - let set = relayListeners.get(event); - if (!set) { - set = new Set(); - relayListeners.set(event, set); - } - set.add(handler); - return () => set!.delete(handler); - }), - listAgentsRaw: vi.fn().mockResolvedValue([]), -}; - -vi.mock('../relay.js', () => ({ - AgentRelay: vi.fn().mockImplementation(() => mockRelayInstance), -})); - -// Import after mocking -const { WorkflowRunner } = await import('../workflows/runner.js'); -const { workflow } = await import('../workflows/builder.js'); - -// ── Helpers ────────────────────────────────────────────────────────────────── - -function makeDb(): WorkflowDb { - const runs = new Map(); - const steps = new Map(); - - return { - insertRun: vi.fn(async (run: WorkflowRunRow) => { - runs.set(run.id, { ...run }); - }), - updateRun: vi.fn(async (id: string, patch: Partial) => { - const existing = runs.get(id); - if (existing) runs.set(id, { ...existing, ...patch }); - }), - getRun: vi.fn(async (id: string) => { - const run = runs.get(id); - return run ? { ...run } : null; - }), - insertStep: vi.fn(async (step: WorkflowStepRow) => { - steps.set(step.id, { ...step }); - }), - updateStep: vi.fn(async (id: string, patch: Partial) => { - const existing = steps.get(id); - if (existing) steps.set(id, { ...existing, ...patch }); - }), - getStepsByRunId: vi.fn(async (runId: string) => { - return [...steps.values()].filter((s) => s.runId === runId); - }), - }; -} - -function makeLinearConfig(): RelayYamlConfig { - return { - version: '1', - name: 'test-start-from', - swarm: { pattern: 'dag' }, - agents: [{ name: 'agent-a', cli: 'claude' }], - workflows: [ - { - name: 'default', - steps: [ - { name: 'step-1', agent: 'agent-a', task: 'Do step 1' }, - { name: 'step-2', agent: 'agent-a', task: 'Do step 2', dependsOn: ['step-1'] }, - { name: 'step-3', agent: 'agent-a', task: 'Do step 3', dependsOn: ['step-2'] }, - ], - }, - ], - trajectories: false, - }; -} - -function makeDiamondConfig(): RelayYamlConfig { - return { - version: '1', - name: 'test-diamond', - swarm: { pattern: 'dag' }, - agents: [{ name: 'agent-a', cli: 'claude' }], - workflows: [ - { - name: 'default', - steps: [ - { name: 'root', agent: 'agent-a', task: 'Root step' }, - { name: 'left', agent: 'agent-a', task: 'Left branch', dependsOn: ['root'] }, - { name: 'right', agent: 'agent-a', task: 'Right branch', dependsOn: ['root'] }, - { name: 'merge', agent: 'agent-a', task: 'Merge', dependsOn: ['left', 'right'] }, - ], - }, - ], - trajectories: false, - }; -} - -// ── Tests ──────────────────────────────────────────────────────────────────── - -describe('startFrom', () => { - let db: WorkflowDb; - let runner: InstanceType; - let tmpDir: string; - - beforeEach(() => { - vi.clearAllMocks(); - waitForExitFn = vi.fn().mockResolvedValue('exited'); - relayListeners.clear(); - tmpDir = mkdtempSync(path.join(os.tmpdir(), 'start-from-')); - db = makeDb(); - runner = new WorkflowRunner({ db, workspaceId: 'ws-test', cwd: tmpDir }); - }); - - it('should throw when startFrom step does not exist', async () => { - const config = makeLinearConfig(); - await expect(runner.execute(config, 'default', undefined, { startFrom: 'nonexistent' })).rejects.toThrow( - 'startFrom step "nonexistent" not found in workflow' - ); - }); - - it('should skip predecessor steps in a linear chain', async () => { - const config = makeLinearConfig(); - const events: Array<{ type: string; stepName?: string }> = []; - runner.on((event) => { - if ('stepName' in event) { - events.push({ type: event.type, stepName: event.stepName }); - } - }); - - const run = await runner.execute(config, 'default', undefined, { startFrom: 'step-3' }); - expect(run.status, run.error).toBe('completed'); - - // step-1 and step-2 should NOT have step:started events (they were pre-completed) - const startedSteps = events.filter((e) => e.type === 'step:started').map((e) => e.stepName); - expect(startedSteps).not.toContain('step-1'); - expect(startedSteps).not.toContain('step-2'); - expect(startedSteps).toContain('step-3'); - }); - - it('should skip all transitive deps in a diamond DAG', async () => { - const config = makeDiamondConfig(); - const events: Array<{ type: string; stepName?: string }> = []; - runner.on((event) => { - if ('stepName' in event) { - events.push({ type: event.type, stepName: event.stepName }); - } - }); - - const run = await runner.execute(config, 'default', undefined, { startFrom: 'merge' }); - expect(run.status, run.error).toBe('completed'); - - const startedSteps = events.filter((e) => e.type === 'step:started').map((e) => e.stepName); - expect(startedSteps).not.toContain('root'); - expect(startedSteps).not.toContain('left'); - expect(startedSteps).not.toContain('right'); - expect(startedSteps).toContain('merge'); - }); - - it('should load cached output from disk for skipped steps', async () => { - const config = makeLinearConfig(); - - // Pre-create cached output for step-1 (simulating a prior run) - // We need to intercept the runId to write to the correct path. - // Instead, we'll verify updateStep was called with expected output. - const run = await runner.execute(config, 'default', undefined, { startFrom: 'step-2' }); - expect(run.status, run.error).toBe('completed'); - - // step-1 should have been marked completed with empty string (no cached output) - const updateCalls = (db.updateStep as any).mock.calls as Array<[string, Partial]>; - const step1Completion = updateCalls.find( - ([_, patch]) => patch.status === 'completed' && patch.output === '' - ); - expect(step1Completion).toBeDefined(); - }); - - it('should load cached output when available on disk via previousRunId', async () => { - const config = makeLinearConfig(); - - // Write cached output for step-1 under a known previous run ID - const prevRunId = 'prev-run-abc123'; - const outputDir = path.join(tmpDir, '.agent-relay', 'step-outputs', prevRunId); - mkdirSync(outputDir, { recursive: true }); - writeFileSync(path.join(outputDir, 'step-1.md'), 'cached-output-from-step-1'); - - const run = await runner.execute(config, 'default', undefined, { - startFrom: 'step-2', - previousRunId: prevRunId, - }); - expect(run.status, run.error).toBe('completed'); - - // Verify step-1 was marked completed with the cached output - const updateCalls = (db.updateStep as any).mock.calls as Array<[string, Partial]>; - const step1WithCachedOutput = updateCalls.find( - ([_, patch]) => patch.status === 'completed' && patch.output === 'cached-output-from-step-1' - ); - expect(step1WithCachedOutput).toBeDefined(); - }); - - it('should work when startFrom targets the first step (no deps to skip)', async () => { - const config = makeLinearConfig(); - const events: Array<{ type: string; stepName?: string }> = []; - runner.on((event) => { - if ('stepName' in event) { - events.push({ type: event.type, stepName: event.stepName }); - } - }); - - const run = await runner.execute(config, 'default', undefined, { startFrom: 'step-1' }); - expect(run.status, run.error).toBe('completed'); - - // All 3 steps should start since step-1 has no deps - const startedSteps = events.filter((e) => e.type === 'step:started').map((e) => e.stepName); - expect(startedSteps).toContain('step-1'); - expect(startedSteps).toContain('step-2'); - expect(startedSteps).toContain('step-3'); - }); - - it('should work with builder .startFrom() chainable method', () => { - const config = workflow('test') - .agent('worker', { cli: 'claude' }) - .step('build', { agent: 'worker', task: 'Build' }) - .step('test', { agent: 'worker', task: 'Test', dependsOn: ['build'] }) - .step('deploy', { agent: 'worker', task: 'Deploy', dependsOn: ['test'] }) - .startFrom('deploy') - .toConfig(); - - // toConfig() should still produce valid config — startFrom is a runtime option - expect(config.workflows![0].steps).toHaveLength(3); - expect(config.agents).toHaveLength(1); - }); - - it('should pass startFrom from WorkflowRunOptions', async () => { - const config = makeLinearConfig(); - const events: Array<{ type: string; stepName?: string }> = []; - - // Test via runner.execute directly with options - runner.on((event) => { - if ('stepName' in event) { - events.push({ type: event.type, stepName: event.stepName }); - } - }); - - const run = await runner.execute(config, 'default', undefined, { startFrom: 'step-2' }); - expect(run.status, run.error).toBe('completed'); - - const startedSteps = events.filter((e) => e.type === 'step:started').map((e) => e.stepName); - expect(startedSteps).not.toContain('step-1'); - expect(startedSteps).toContain('step-2'); - expect(startedSteps).toContain('step-3'); - }); - - afterEach(() => { - try { - rmSync(tmpDir, { recursive: true, force: true }); - } catch { - /* noop */ - } - }); -}); diff --git a/packages/sdk/src/__tests__/swarm-coordinator.test.ts b/packages/sdk/src/__tests__/swarm-coordinator.test.ts deleted file mode 100644 index b066524ba..000000000 --- a/packages/sdk/src/__tests__/swarm-coordinator.test.ts +++ /dev/null @@ -1,904 +0,0 @@ -/** - * SwarmCoordinator integration tests. - * - * Tests pattern selection, topology resolution, run lifecycle, - * and step management with a mocked DbClient. - */ - -import { describe, it, expect, vi, beforeEach } from 'vitest'; -import { SwarmCoordinator } from '../workflows/coordinator.js'; -import type { DbClient } from '../workflows/coordinator.js'; -import type { RelayYamlConfig, WorkflowRunRow, WorkflowStepRow } from '../workflows/types.js'; - -// ── Helpers ────────────────────────────────────────────────────────────────── - -function makeDb(): DbClient { - return { - query: vi.fn().mockResolvedValue({ rows: [] }), - }; -} - -function makeConfig(overrides: Partial = {}): RelayYamlConfig { - return { - version: '1', - name: 'test-workflow', - swarm: { pattern: 'fan-out' }, - agents: [ - { name: 'leader', cli: 'claude', role: 'lead' }, - { name: 'worker-1', cli: 'claude' }, - { name: 'worker-2', cli: 'codex' }, - ], - trajectories: false, - ...overrides, - }; -} - -function makeRunRow(overrides: Partial = {}): WorkflowRunRow { - const now = new Date().toISOString(); - return { - id: 'run_test_1', - workspaceId: 'ws-1', - workflowName: 'test-workflow', - pattern: 'fan-out', - status: 'pending', - config: makeConfig(), - startedAt: now, - createdAt: now, - updatedAt: now, - ...overrides, - }; -} - -function makeStepRow(overrides: Partial = {}): WorkflowStepRow { - const now = new Date().toISOString(); - return { - id: 'step_test_1', - runId: 'run_test_1', - stepName: 'step-1', - agentName: 'worker-1', - status: 'pending', - task: 'Do something', - dependsOn: [], - retryCount: 0, - createdAt: now, - updatedAt: now, - ...overrides, - }; -} - -// ── Tests ──────────────────────────────────────────────────────────────────── - -describe('SwarmCoordinator', () => { - let db: DbClient; - let coordinator: SwarmCoordinator; - - beforeEach(() => { - vi.clearAllMocks(); - db = makeDb(); - coordinator = new SwarmCoordinator(db); - }); - - // ── Pattern selection ────────────────────────────────────────────────── - - describe('selectPattern', () => { - it('should return explicit pattern from config', () => { - expect(coordinator.selectPattern(makeConfig({ swarm: { pattern: 'pipeline' } }))).toBe('pipeline'); - }); - - it('should auto-select dag when steps have dependencies', () => { - const config = makeConfig({ - swarm: { pattern: undefined as unknown as string } as any, - workflows: [ - { - name: 'wf', - steps: [ - { name: 's1', agent: 'worker-1', task: 'x' }, - { name: 's2', agent: 'worker-2', task: 'y', dependsOn: ['s1'] }, - ], - }, - ], - }); - // With pattern set explicitly, it returns it; with undefined it falls through heuristics - // Since config.swarm.pattern is undefined (truthy check fails), heuristics kick in - config.swarm.pattern = '' as any; - const pattern = coordinator.selectPattern(config); - expect(pattern).toBe('dag'); - }); - - it('should auto-select consensus when consensusStrategy is set', () => { - const config = makeConfig({ - swarm: { pattern: '' as any }, - coordination: { consensusStrategy: 'majority' }, - }); - expect(coordinator.selectPattern(config)).toBe('consensus'); - }); - - // ── Auto-selection heuristic tests ────────────────────────────────── - - it('should auto-select map-reduce when mapper and reducer roles present', () => { - const config = makeConfig({ - swarm: { pattern: '' as any }, - agents: [ - { name: 'mapper', cli: 'claude', role: 'mapper' }, - { name: 'reducer', cli: 'claude', role: 'reducer' }, - ], - }); - expect(coordinator.selectPattern(config)).toBe('map-reduce'); - }); - - it('should auto-select red-team when attacker and defender roles present', () => { - const config = makeConfig({ - swarm: { pattern: '' as any }, - agents: [ - { name: 'attacker', cli: 'claude', role: 'attacker' }, - { name: 'defender', cli: 'claude', role: 'defender' }, - ], - }); - expect(coordinator.selectPattern(config)).toBe('red-team'); - }); - - it('should auto-select reflection when critic role present', () => { - const config = makeConfig({ - swarm: { pattern: '' as any }, - agents: [ - { name: 'producer', cli: 'claude' }, - { name: 'critic', cli: 'claude', role: 'critic' }, - ], - }); - expect(coordinator.selectPattern(config)).toBe('reflection'); - }); - - it('should auto-select escalation when tier-N roles present', () => { - const config = makeConfig({ - swarm: { pattern: '' as any }, - agents: [ - { name: 't1', cli: 'claude', role: 'tier-1' }, - { name: 't2', cli: 'claude', role: 'tier-2' }, - ], - }); - expect(coordinator.selectPattern(config)).toBe('escalation'); - }); - - it('should auto-select auction when auctioneer role present', () => { - const config = makeConfig({ - swarm: { pattern: '' as any }, - agents: [ - { name: 'auctioneer', cli: 'claude', role: 'auctioneer' }, - { name: 'bidder', cli: 'claude' }, - ], - }); - expect(coordinator.selectPattern(config)).toBe('auction'); - }); - - it('should auto-select supervisor when supervisor role present', () => { - const config = makeConfig({ - swarm: { pattern: '' as any }, - agents: [ - { name: 'supervisor', cli: 'claude', role: 'supervisor' }, - { name: 'worker', cli: 'claude' }, - ], - }); - expect(coordinator.selectPattern(config)).toBe('supervisor'); - }); - - it('should auto-select verifier when verifier role present', () => { - const config = makeConfig({ - swarm: { pattern: '' as any }, - agents: [ - { name: 'producer', cli: 'claude' }, - { name: 'verifier', cli: 'claude', role: 'verifier' }, - ], - }); - expect(coordinator.selectPattern(config)).toBe('verifier'); - }); - - it('should auto-select swarm when hive-mind role present', () => { - const config = makeConfig({ - swarm: { pattern: '' as any }, - agents: [ - { name: 'hive', cli: 'claude', role: 'hive-mind' }, - { name: 'drone', cli: 'claude' }, - ], - }); - expect(coordinator.selectPattern(config)).toBe('swarm'); - }); - - it('should auto-select circuit-breaker when fallback role present', () => { - const config = makeConfig({ - swarm: { pattern: '' as any }, - agents: [ - { name: 'primary', cli: 'claude', role: 'primary' }, - { name: 'fallback', cli: 'claude', role: 'fallback' }, - ], - }); - expect(coordinator.selectPattern(config)).toBe('circuit-breaker'); - }); - - it('should auto-select review-loop when implementer and multiple reviewers present', () => { - const config = makeConfig({ - swarm: { pattern: '' as any }, - agents: [ - { name: 'implementer', cli: 'claude', role: 'Senior developer implementing the task' }, - { name: 'reviewer-diff', cli: 'codex', role: 'Code quality reviewer' }, - { name: 'reviewer-arch', cli: 'claude', role: 'Architecture reviewer' }, - ], - }); - expect(coordinator.selectPattern(config)).toBe('review-loop'); - }); - - it('should auto-select review-loop when agent names contain implementer and reviewer', () => { - const config = makeConfig({ - swarm: { pattern: '' as any }, - agents: [ - { name: 'implementer', cli: 'claude' }, - { name: 'reviewer-1', cli: 'codex' }, - { name: 'reviewer-2', cli: 'claude' }, - ], - }); - expect(coordinator.selectPattern(config)).toBe('review-loop'); - }); - }); - - // ── Topology resolution ──────────────────────────────────────────────── - - describe('resolveTopology', () => { - it('should build fan-out topology with hub', () => { - const topology = coordinator.resolveTopology(makeConfig()); - expect(topology.pattern).toBe('fan-out'); - expect(topology.hub).toBe('leader'); - expect(topology.edges.get('leader')).toEqual(['worker-1', 'worker-2']); - expect(topology.edges.get('worker-1')).toEqual(['leader']); - }); - - it('should build pipeline topology in step order', () => { - const config = makeConfig({ - swarm: { pattern: 'pipeline' }, - workflows: [ - { - name: 'wf', - steps: [ - { name: 's1', agent: 'worker-1', task: 'step 1' }, - { name: 's2', agent: 'worker-2', task: 'step 2' }, - { name: 's3', agent: 'leader', task: 'step 3' }, - ], - }, - ], - }); - const topology = coordinator.resolveTopology(config); - expect(topology.pattern).toBe('pipeline'); - expect(topology.pipelineOrder).toEqual(['worker-1', 'worker-2', 'leader']); - expect(topology.edges.get('worker-1')).toEqual(['worker-2']); - expect(topology.edges.get('leader')).toEqual([]); - }); - - it('should build hub-spoke topology', () => { - const config = makeConfig({ swarm: { pattern: 'hub-spoke' } }); - const topology = coordinator.resolveTopology(config); - expect(topology.hub).toBe('leader'); - expect(topology.edges.get('leader')).toContain('worker-1'); - expect(topology.edges.get('worker-1')).toEqual(['leader']); - }); - - it('should build mesh topology for consensus', () => { - const config = makeConfig({ swarm: { pattern: 'consensus' } }); - const topology = coordinator.resolveTopology(config); - expect(topology.edges.get('leader')).toContain('worker-1'); - expect(topology.edges.get('leader')).toContain('worker-2'); - expect(topology.edges.get('worker-1')).toContain('leader'); - }); - - it('should build DAG topology from step dependencies', () => { - const config = makeConfig({ - swarm: { pattern: 'dag' }, - workflows: [ - { - name: 'wf', - steps: [ - { name: 's1', agent: 'worker-1', task: 'x' }, - { name: 's2', agent: 'worker-2', task: 'y', dependsOn: ['s1'] }, - ], - }, - ], - }); - const topology = coordinator.resolveTopology(config); - expect(topology.pattern).toBe('dag'); - expect(topology.edges.get('worker-1')).toContain('worker-2'); - }); - - it('should build hierarchical topology', () => { - const config = makeConfig({ swarm: { pattern: 'hierarchical' } }); - const topology = coordinator.resolveTopology(config); - expect(topology.hub).toBe('leader'); - expect(topology.edges.get('leader')).toContain('worker-1'); - }); - - it('should build cascade topology', () => { - const config = makeConfig({ swarm: { pattern: 'cascade' } }); - const topology = coordinator.resolveTopology(config); - expect(topology.pipelineOrder).toEqual(['leader', 'worker-1', 'worker-2']); - }); - - // ── Additional pattern tests ──────────────────────────────────────── - - it('should build map-reduce topology', () => { - const config = makeConfig({ - swarm: { pattern: 'map-reduce' }, - agents: [ - { name: 'coordinator', cli: 'claude', role: 'lead' }, - { name: 'mapper-1', cli: 'claude', role: 'mapper' }, - { name: 'mapper-2', cli: 'claude', role: 'mapper' }, - { name: 'reducer', cli: 'claude', role: 'reducer' }, - ], - }); - const topology = coordinator.resolveTopology(config); - expect(topology.pattern).toBe('map-reduce'); - expect(topology.hub).toBe('coordinator'); - expect(topology.edges.get('coordinator')).toContain('mapper-1'); - expect(topology.edges.get('mapper-1')).toContain('reducer'); - expect(topology.edges.get('reducer')).toContain('coordinator'); - }); - - it('should build scatter-gather topology', () => { - const config = makeConfig({ swarm: { pattern: 'scatter-gather' } }); - const topology = coordinator.resolveTopology(config); - expect(topology.pattern).toBe('scatter-gather'); - expect(topology.hub).toBe('leader'); - expect(topology.edges.get('leader')).toContain('worker-1'); - expect(topology.edges.get('worker-1')).toEqual(['leader']); - }); - - it('should build supervisor topology', () => { - const config = makeConfig({ - swarm: { pattern: 'supervisor' }, - agents: [ - { name: 'supervisor', cli: 'claude', role: 'supervisor' }, - { name: 'worker-1', cli: 'claude' }, - { name: 'worker-2', cli: 'codex' }, - ], - }); - const topology = coordinator.resolveTopology(config); - expect(topology.pattern).toBe('supervisor'); - expect(topology.hub).toBe('supervisor'); - expect(topology.edges.get('supervisor')).toContain('worker-1'); - expect(topology.edges.get('worker-1')).toEqual(['supervisor']); - }); - - it('should build reflection topology', () => { - const config = makeConfig({ - swarm: { pattern: 'reflection' }, - agents: [ - { name: 'producer', cli: 'claude' }, - { name: 'critic', cli: 'claude', role: 'critic' }, - ], - }); - const topology = coordinator.resolveTopology(config); - expect(topology.pattern).toBe('reflection'); - expect(topology.edges.get('producer')).toContain('critic'); - expect(topology.edges.get('critic')).toContain('producer'); - }); - - it('should build red-team topology', () => { - const config = makeConfig({ - swarm: { pattern: 'red-team' }, - agents: [ - { name: 'attacker', cli: 'claude', role: 'attacker' }, - { name: 'defender', cli: 'claude', role: 'defender' }, - { name: 'judge', cli: 'claude', role: 'judge' }, - ], - }); - const topology = coordinator.resolveTopology(config); - expect(topology.pattern).toBe('red-team'); - expect(topology.edges.get('attacker')).toContain('defender'); - expect(topology.edges.get('defender')).toContain('attacker'); - expect(topology.edges.get('attacker')).toContain('judge'); - }); - - it('should build verifier topology', () => { - const config = makeConfig({ - swarm: { pattern: 'verifier' }, - agents: [ - { name: 'producer', cli: 'claude' }, - { name: 'verifier', cli: 'claude', role: 'verifier' }, - ], - }); - const topology = coordinator.resolveTopology(config); - expect(topology.pattern).toBe('verifier'); - expect(topology.edges.get('producer')).toContain('verifier'); - expect(topology.edges.get('verifier')).toContain('producer'); - }); - - it('should build auction topology', () => { - const config = makeConfig({ - swarm: { pattern: 'auction' }, - agents: [ - { name: 'auctioneer', cli: 'claude', role: 'auctioneer' }, - { name: 'bidder-1', cli: 'claude' }, - { name: 'bidder-2', cli: 'codex' }, - ], - }); - const topology = coordinator.resolveTopology(config); - expect(topology.pattern).toBe('auction'); - expect(topology.hub).toBe('auctioneer'); - expect(topology.edges.get('auctioneer')).toContain('bidder-1'); - expect(topology.edges.get('bidder-1')).toEqual(['auctioneer']); - }); - - it('should build escalation topology', () => { - const config = makeConfig({ - swarm: { pattern: 'escalation' }, - agents: [ - { name: 'tier1', cli: 'claude', role: 'tier-1' }, - { name: 'tier2', cli: 'claude', role: 'tier-2' }, - { name: 'tier3', cli: 'claude', role: 'tier-3' }, - ], - }); - const topology = coordinator.resolveTopology(config); - expect(topology.pattern).toBe('escalation'); - expect(topology.pipelineOrder).toEqual(['tier1', 'tier2', 'tier3']); - expect(topology.edges.get('tier1')).toContain('tier2'); - expect(topology.edges.get('tier2')).toContain('tier3'); - }); - - it('should build saga topology', () => { - const config = makeConfig({ swarm: { pattern: 'saga' } }); - const topology = coordinator.resolveTopology(config); - expect(topology.pattern).toBe('saga'); - expect(topology.hub).toBe('leader'); - expect(topology.edges.get('leader')).toContain('worker-1'); - expect(topology.edges.get('worker-1')).toEqual(['leader']); - }); - - it('should build circuit-breaker topology', () => { - const config = makeConfig({ swarm: { pattern: 'circuit-breaker' } }); - const topology = coordinator.resolveTopology(config); - expect(topology.pattern).toBe('circuit-breaker'); - expect(topology.pipelineOrder).toEqual(['leader', 'worker-1', 'worker-2']); - expect(topology.edges.get('leader')).toEqual(['worker-1']); - expect(topology.edges.get('worker-2')).toEqual([]); - }); - - it('should build blackboard topology', () => { - const config = makeConfig({ swarm: { pattern: 'blackboard' } }); - const topology = coordinator.resolveTopology(config); - expect(topology.pattern).toBe('blackboard'); - // Full mesh for blackboard - expect(topology.edges.get('leader')).toContain('worker-1'); - expect(topology.edges.get('worker-1')).toContain('leader'); - }); - - it('should build swarm topology with neighbor communication', () => { - const config = makeConfig({ swarm: { pattern: 'swarm' } }); - const topology = coordinator.resolveTopology(config); - expect(topology.pattern).toBe('swarm'); - // Middle agent should have two neighbors - expect(topology.edges.get('worker-1')).toContain('leader'); - expect(topology.edges.get('worker-1')).toContain('worker-2'); - }); - - // ── Edge case tests ───────────────────────────────────────────────── - - it('should handle map-reduce with no reducers (fallback to coordinator)', () => { - const config = makeConfig({ - swarm: { pattern: 'map-reduce' }, - agents: [ - { name: 'coordinator', cli: 'claude', role: 'lead' }, - { name: 'mapper-1', cli: 'claude', role: 'mapper' }, - { name: 'mapper-2', cli: 'claude', role: 'mapper' }, - ], - }); - const topology = coordinator.resolveTopology(config); - expect(topology.pattern).toBe('map-reduce'); - // Mappers should fallback to coordinator when no reducers - expect(topology.edges.get('mapper-1')).toContain('coordinator'); - }); - - it('should handle verifier with no verifiers (empty edges)', () => { - const config = makeConfig({ - swarm: { pattern: 'verifier' }, - agents: [ - { name: 'producer-1', cli: 'claude' }, - { name: 'producer-2', cli: 'claude' }, - ], - }); - const topology = coordinator.resolveTopology(config); - expect(topology.pattern).toBe('verifier'); - // Producers have no one to send to - expect(topology.edges.get('producer-1')).toEqual([]); - }); - - it('should handle escalation with no tier roles (use agent order)', () => { - const config = makeConfig({ - swarm: { pattern: 'escalation' }, - agents: [ - { name: 'agent-1', cli: 'claude' }, - { name: 'agent-2', cli: 'claude' }, - { name: 'agent-3', cli: 'claude' }, - ], - }); - const topology = coordinator.resolveTopology(config); - expect(topology.pattern).toBe('escalation'); - expect(topology.pipelineOrder).toEqual(['agent-1', 'agent-2', 'agent-3']); - }); - - it('should handle reflection with no critic (fallback to mesh)', () => { - const config = makeConfig({ - swarm: { pattern: 'reflection' }, - agents: [ - { name: 'agent-1', cli: 'claude' }, - { name: 'agent-2', cli: 'claude' }, - ], - }); - const topology = coordinator.resolveTopology(config); - expect(topology.pattern).toBe('reflection'); - // Falls back to full mesh when no critic - expect(topology.edges.get('agent-1')).toContain('agent-2'); - expect(topology.edges.get('agent-2')).toContain('agent-1'); - }); - - it('should handle swarm with hive-mind role', () => { - const config = makeConfig({ - swarm: { pattern: 'swarm' }, - agents: [ - { name: 'hive', cli: 'claude', role: 'hive-mind' }, - { name: 'drone-1', cli: 'claude' }, - { name: 'drone-2', cli: 'claude' }, - { name: 'drone-3', cli: 'claude' }, - ], - }); - const topology = coordinator.resolveTopology(config); - expect(topology.pattern).toBe('swarm'); - expect(topology.hub).toBe('hive'); - // All drones should connect to hive mind - expect(topology.edges.get('drone-1')).toContain('hive'); - expect(topology.edges.get('drone-2')).toContain('hive'); - }); - - it('should exclude non-interactive agents from message edges', () => { - const config = makeConfig({ - swarm: { pattern: 'fan-out' }, - agents: [ - { name: 'leader', cli: 'claude', role: 'lead' }, - { name: 'worker-1', cli: 'codex', interactive: false }, - { name: 'worker-2', cli: 'claude' }, - ], - }); - const topology = coordinator.resolveTopology(config); - expect(topology.pattern).toBe('fan-out'); - // leader should only message worker-2 (not worker-1 which is non-interactive) - expect(topology.edges.get('leader')).toEqual(['worker-2']); - // worker-1 should have empty edges (non-interactive) - expect(topology.edges.get('worker-1')).toEqual([]); - // worker-2 should only message leader - expect(topology.edges.get('worker-2')).toEqual(['leader']); - // All agents should still be in the topology - expect(topology.agents).toHaveLength(3); - }); - - it('should exclude non-interactive agents from DAG topology edges', () => { - const config = makeConfig({ - swarm: { pattern: 'dag' }, - agents: [ - { name: 'leader', cli: 'claude', role: 'lead' }, - { name: 'worker-1', cli: 'codex', interactive: false }, - { name: 'worker-2', cli: 'claude' }, - ], - workflows: [ - { - name: 'wf', - steps: [ - { name: 's1', agent: 'worker-1', task: 'x' }, - { name: 's2', agent: 'worker-2', task: 'y', dependsOn: ['s1'] }, - { name: 's3', agent: 'leader', task: 'z', dependsOn: ['s2'] }, - ], - }, - ], - }); - const topology = coordinator.resolveTopology(config); - expect(topology.pattern).toBe('dag'); - // worker-1 is non-interactive — should have empty edges even though s2 depends on s1 - expect(topology.edges.get('worker-1')).toEqual([]); - // worker-2 should NOT have worker-1 as a target (non-interactive) - const worker2Targets = topology.edges.get('worker-2') ?? []; - expect(worker2Targets).not.toContain('worker-1'); - // worker-2 should still point to leader - expect(worker2Targets).toContain('leader'); - }); - - it('should handle all non-interactive agents gracefully', () => { - const config = makeConfig({ - swarm: { pattern: 'fan-out' }, - agents: [ - { name: 'leader', cli: 'claude', role: 'lead' }, - { name: 'worker-1', cli: 'codex', interactive: false }, - { name: 'worker-2', cli: 'codex', interactive: false }, - ], - }); - const topology = coordinator.resolveTopology(config); - // leader is the only interactive agent, so it fans out to no one - expect(topology.edges.get('leader')).toEqual([]); - expect(topology.edges.get('worker-1')).toEqual([]); - expect(topology.edges.get('worker-2')).toEqual([]); - }); - - it('should handle red-team with multiple attackers and defenders', () => { - const config = makeConfig({ - swarm: { pattern: 'red-team' }, - agents: [ - { name: 'attacker-1', cli: 'claude', role: 'attacker' }, - { name: 'attacker-2', cli: 'claude', role: 'attacker' }, - { name: 'defender-1', cli: 'claude', role: 'defender' }, - { name: 'defender-2', cli: 'claude', role: 'defender' }, - ], - }); - const topology = coordinator.resolveTopology(config); - expect(topology.pattern).toBe('red-team'); - // Attackers should reach all defenders - expect(topology.edges.get('attacker-1')).toContain('defender-1'); - expect(topology.edges.get('attacker-1')).toContain('defender-2'); - // Defenders should reach all attackers - expect(topology.edges.get('defender-1')).toContain('attacker-1'); - expect(topology.edges.get('defender-1')).toContain('attacker-2'); - }); - - it('should build review-loop topology with implementer as hub and reviewer collaboration', () => { - const config = makeConfig({ - swarm: { pattern: 'review-loop' }, - agents: [ - { name: 'implementer', cli: 'claude', role: 'Senior developer implementing the task' }, - { name: 'reviewer-diff', cli: 'codex', role: 'Code quality reviewer' }, - { name: 'reviewer-arch', cli: 'claude', role: 'Architecture reviewer' }, - { name: 'reviewer-security', cli: 'codex', role: 'Security reviewer' }, - ], - }); - const topology = coordinator.resolveTopology(config); - expect(topology.pattern).toBe('review-loop'); - expect(topology.hub).toBe('implementer'); - // Implementer can message all reviewers - expect(topology.edges.get('implementer')).toContain('reviewer-diff'); - expect(topology.edges.get('implementer')).toContain('reviewer-arch'); - expect(topology.edges.get('implementer')).toContain('reviewer-security'); - // Reviewers can message implementer AND other reviewers (collaborative review) - expect(topology.edges.get('reviewer-diff')).toContain('implementer'); - expect(topology.edges.get('reviewer-diff')).toContain('reviewer-arch'); - expect(topology.edges.get('reviewer-diff')).toContain('reviewer-security'); - expect(topology.edges.get('reviewer-arch')).toContain('implementer'); - expect(topology.edges.get('reviewer-arch')).toContain('reviewer-diff'); - expect(topology.edges.get('reviewer-security')).toContain('implementer'); - expect(topology.edges.get('reviewer-security')).toContain('reviewer-diff'); - }); - - it('should build review-loop topology with non-interactive reviewers', () => { - const config = makeConfig({ - swarm: { pattern: 'review-loop' }, - agents: [ - { name: 'implementer', cli: 'claude', role: 'Senior developer implementing the task' }, - { name: 'reviewer-diff', cli: 'codex', role: 'Code quality reviewer', interactive: false }, - { name: 'reviewer-arch', cli: 'claude', role: 'Architecture reviewer', interactive: false }, - ], - }); - const topology = coordinator.resolveTopology(config); - expect(topology.pattern).toBe('review-loop'); - expect(topology.hub).toBe('implementer'); - // Non-interactive reviewers have empty edges - expect(topology.edges.get('reviewer-diff')).toEqual([]); - expect(topology.edges.get('reviewer-arch')).toEqual([]); - // Implementer should not have non-interactive agents in edges - expect(topology.edges.get('implementer')).toEqual([]); - }); - }); - - // ── Run lifecycle ────────────────────────────────────────────────────── - - describe('createRun', () => { - it('should insert a run and emit run:created', async () => { - const run = makeRunRow(); - vi.mocked(db.query).mockResolvedValueOnce({ rows: [run] }); - - const spy = vi.fn(); - coordinator.on('run:created', spy); - - const result = await coordinator.createRun('ws-1', makeConfig()); - expect(result).toEqual(run); - expect(spy).toHaveBeenCalledWith(run); - expect(db.query).toHaveBeenCalledOnce(); - }); - }); - - describe('startRun', () => { - it('should transition pending run to running', async () => { - const run = makeRunRow({ status: 'running' }); - vi.mocked(db.query).mockResolvedValueOnce({ rows: [run] }); - - const spy = vi.fn(); - coordinator.on('run:started', spy); - - const result = await coordinator.startRun('run_test_1'); - expect(result.status).toBe('running'); - expect(spy).toHaveBeenCalledWith(run); - }); - - it('should throw when run not found', async () => { - vi.mocked(db.query).mockResolvedValueOnce({ rows: [] }); - await expect(coordinator.startRun('nonexistent')).rejects.toThrow('not found or not in pending state'); - }); - }); - - describe('completeRun', () => { - it('should transition run to completed and emit event', async () => { - const run = makeRunRow({ status: 'completed' }); - vi.mocked(db.query).mockResolvedValueOnce({ rows: [run] }); - - const spy = vi.fn(); - coordinator.on('run:completed', spy); - - const result = await coordinator.completeRun('run_test_1', { result: 'ok' }); - expect(result.status).toBe('completed'); - expect(spy).toHaveBeenCalledWith(run); - }); - - it('should throw when run not found', async () => { - vi.mocked(db.query).mockResolvedValueOnce({ rows: [] }); - await expect(coordinator.completeRun('nonexistent')).rejects.toThrow('not found'); - }); - }); - - describe('failRun', () => { - it('should transition run to failed with error', async () => { - const run = makeRunRow({ status: 'failed', error: 'boom' }); - vi.mocked(db.query).mockResolvedValueOnce({ rows: [run] }); - - const spy = vi.fn(); - coordinator.on('run:failed', spy); - - await coordinator.failRun('run_test_1', 'boom'); - expect(spy).toHaveBeenCalledWith(run); - }); - }); - - describe('cancelRun', () => { - it('should transition run to cancelled', async () => { - const run = makeRunRow({ status: 'cancelled' }); - vi.mocked(db.query).mockResolvedValueOnce({ rows: [run] }); - - const spy = vi.fn(); - coordinator.on('run:cancelled', spy); - - const result = await coordinator.cancelRun('run_test_1'); - expect(result.status).toBe('cancelled'); - }); - }); - - // ── Step management ──────────────────────────────────────────────────── - - describe('createSteps', () => { - it('should create steps from workflow config', async () => { - const step = makeStepRow(); - vi.mocked(db.query).mockResolvedValue({ rows: [step] }); - - const config = makeConfig({ - workflows: [ - { - name: 'wf', - steps: [ - { name: 's1', agent: 'worker-1', task: 'x' }, - { name: 's2', agent: 'worker-2', task: 'y' }, - ], - }, - ], - }); - - const steps = await coordinator.createSteps('run_1', config); - expect(steps).toHaveLength(2); - expect(db.query).toHaveBeenCalledTimes(2); - }); - }); - - describe('startStep', () => { - it('should transition step to running and emit event', async () => { - const step = makeStepRow({ status: 'running' }); - vi.mocked(db.query).mockResolvedValueOnce({ rows: [step] }); - - const spy = vi.fn(); - coordinator.on('step:started', spy); - - const result = await coordinator.startStep('step_1'); - expect(result.status).toBe('running'); - expect(spy).toHaveBeenCalledWith(step); - }); - - it('should throw for non-pending step', async () => { - vi.mocked(db.query).mockResolvedValueOnce({ rows: [] }); - await expect(coordinator.startStep('bad')).rejects.toThrow('not found or not in pending state'); - }); - }); - - describe('completeStep', () => { - it('should transition step to completed with output', async () => { - const step = makeStepRow({ status: 'completed', output: 'result data' }); - vi.mocked(db.query).mockResolvedValueOnce({ rows: [step] }); - - const spy = vi.fn(); - coordinator.on('step:completed', spy); - - const result = await coordinator.completeStep('step_1', 'result data'); - expect(result.output).toBe('result data'); - expect(spy).toHaveBeenCalledWith(step); - }); - }); - - describe('failStep', () => { - it('should transition step to failed with error', async () => { - const step = makeStepRow({ status: 'failed', error: 'timeout' }); - vi.mocked(db.query).mockResolvedValueOnce({ rows: [step] }); - - const spy = vi.fn(); - coordinator.on('step:failed', spy); - - const result = await coordinator.failStep('step_1', 'timeout'); - expect(result.error).toBe('timeout'); - }); - }); - - describe('skipStep', () => { - it('should mark step as skipped', async () => { - const step = makeStepRow({ status: 'skipped' }); - vi.mocked(db.query).mockResolvedValueOnce({ rows: [step] }); - - const result = await coordinator.skipStep('step_1'); - expect(result.status).toBe('skipped'); - }); - }); - - // ── Queries ──────────────────────────────────────────────────────────── - - describe('getRun', () => { - it('should return run or null', async () => { - vi.mocked(db.query).mockResolvedValueOnce({ rows: [] }); - expect(await coordinator.getRun('nonexistent')).toBeNull(); - - const run = makeRunRow(); - vi.mocked(db.query).mockResolvedValueOnce({ rows: [run] }); - expect(await coordinator.getRun('run_test_1')).toEqual(run); - }); - }); - - describe('getReadySteps', () => { - it('should return pending steps with all dependencies completed', async () => { - const steps: WorkflowStepRow[] = [ - makeStepRow({ id: 's1', stepName: 'step-1', status: 'completed', dependsOn: [] }), - makeStepRow({ id: 's2', stepName: 'step-2', status: 'pending', dependsOn: ['step-1'] }), - makeStepRow({ id: 's3', stepName: 'step-3', status: 'pending', dependsOn: ['step-2'] }), - ]; - vi.mocked(db.query).mockResolvedValueOnce({ rows: steps }); - - const ready = await coordinator.getReadySteps('run_test_1'); - expect(ready).toHaveLength(1); - expect(ready[0].stepName).toBe('step-2'); - }); - - it('should return all pending steps with no dependencies', async () => { - const steps: WorkflowStepRow[] = [ - makeStepRow({ id: 's1', stepName: 'a', status: 'pending', dependsOn: [] }), - makeStepRow({ id: 's2', stepName: 'b', status: 'pending', dependsOn: [] }), - ]; - vi.mocked(db.query).mockResolvedValueOnce({ rows: steps }); - - const ready = await coordinator.getReadySteps('run_test_1'); - expect(ready).toHaveLength(2); - }); - }); - - describe('getRunsByWorkspace', () => { - it('should query by workspace with optional status filter', async () => { - vi.mocked(db.query).mockResolvedValueOnce({ rows: [] }); - await coordinator.getRunsByWorkspace('ws-1', 'running'); - expect(db.query).toHaveBeenCalledWith(expect.stringContaining('status = $2'), ['ws-1', 'running']); - }); - - it('should query without status filter', async () => { - vi.mocked(db.query).mockResolvedValueOnce({ rows: [] }); - await coordinator.getRunsByWorkspace('ws-1'); - expect(db.query).toHaveBeenCalledWith(expect.not.stringContaining('status ='), ['ws-1']); - }); - }); -}); diff --git a/packages/sdk/src/__tests__/workflow-runner.test.ts b/packages/sdk/src/__tests__/workflow-runner.test.ts deleted file mode 100644 index 4444fb7d8..000000000 --- a/packages/sdk/src/__tests__/workflow-runner.test.ts +++ /dev/null @@ -1,1344 +0,0 @@ -/** - * WorkflowRunner integration tests. - * - * Tests parsing, validation, variable resolution, and DAG execution - * with a mocked DB adapter and mocked AgentRelay. - */ - -import { describe, it, expect, vi, beforeEach } from 'vitest'; -import { - existsSync, - mkdirSync, - mkdtempSync, - readFileSync, - readdirSync, - rmSync, - writeFileSync, -} from 'node:fs'; -import os from 'node:os'; -import path from 'node:path'; -import type { WorkflowDb } from '../workflows/runner.js'; -import type { RelayYamlConfig, WorkflowRunRow, WorkflowStepRow } from '../workflows/types.js'; - -// ── Mock fetch to prevent real HTTP calls (Relaycast provisioning) ─────────── - -const mockFetch = vi.fn().mockResolvedValue({ - ok: true, - json: () => Promise.resolve({ data: { api_key: 'rk_live_test', workspace_id: 'ws-test' } }), - text: () => Promise.resolve(''), -}); -vi.stubGlobal('fetch', mockFetch); - -// ── Mock RelayCast SDK ─────────────────────────────────────────────────────── - -const mockRelaycastAgent = { - send: vi.fn().mockResolvedValue(undefined), - heartbeat: vi.fn().mockResolvedValue(undefined), - channels: { - create: vi.fn().mockResolvedValue(undefined), - join: vi.fn().mockResolvedValue(undefined), - invite: vi.fn().mockResolvedValue(undefined), - }, -}; - -const mockRelaycast = { - agents: { - register: vi.fn().mockResolvedValue({ token: 'token-1' }), - }, - as: vi.fn().mockReturnValue(mockRelaycastAgent), -}; - -class MockRelayError extends Error { - code: string; - constructor(code: string, message: string, status = 400) { - super(message); - this.code = code; - this.name = 'RelayError'; - (this as any).status = status; - } -} - -vi.mock('@relaycast/sdk', () => ({ - RelayCast: vi.fn().mockImplementation(() => mockRelaycast), - RelayError: MockRelayError, -})); - -// ── Mock AgentRelay ────────────────────────────────────────────────────────── - -let waitForExitFn: (ms?: number) => Promise<'exited' | 'timeout' | 'released'>; -let waitForIdleFn: (ms?: number) => Promise<'idle' | 'timeout' | 'exited'>; -let mockSpawnOutputs: string[] = []; - -const mockAgent = { - name: 'test-agent-abc', - get waitForExit() { - return waitForExitFn; - }, - get waitForIdle() { - return waitForIdleFn; - }, - release: vi.fn().mockResolvedValue(undefined), -}; - -const mockHuman = { - name: 'WorkflowRunner', - sendMessage: vi.fn().mockResolvedValue(undefined), -}; - -const mockListeners = new Map void>>(); -function emitMockEvent(event: string, ...args: any[]): void { - const set = mockListeners.get(event); - if (set) for (const cb of set) cb(...args); -} - -const defaultSpawnPtyImplementation = async ({ name, task }: { name: string; task?: string }) => { - const queued = mockSpawnOutputs.shift(); - const stepComplete = task?.match(/STEP_COMPLETE:([^\n]+)/)?.[1]?.trim(); - const isReview = task?.includes('REVIEW_DECISION: APPROVE or REJECT'); - const output = - queued ?? - (isReview - ? 'REVIEW_DECISION: APPROVE\nREVIEW_REASON: looks good\n' - : stepComplete - ? `STEP_COMPLETE:${stepComplete}\n` - : 'STEP_COMPLETE:unknown\n'); - - queueMicrotask(() => { - emitMockEvent('workerOutput', { name, chunk: output }); - }); - - return { ...mockAgent, name }; -}; - -const mockRelayInstance = { - spawnPty: vi.fn().mockImplementation(defaultSpawnPtyImplementation), - human: vi.fn().mockReturnValue(mockHuman), - shutdown: vi.fn().mockResolvedValue(undefined), - onBrokerStderr: vi.fn().mockReturnValue(() => {}), - addListener: vi.fn((event: string, cb: (...args: any[]) => void) => { - let set = mockListeners.get(event); - if (!set) { - set = new Set(); - mockListeners.set(event, set); - } - set.add(cb); - return () => set!.delete(cb); - }), - listAgentsRaw: vi.fn().mockResolvedValue([]), -}; - -vi.mock('../relay.js', () => ({ - AgentRelay: vi.fn().mockImplementation(() => mockRelayInstance), -})); - -// Import after mocking -const { WorkflowRunner } = await import('../workflows/runner.js'); - -// ── Test fixtures ──────────────────────────────────────────────────────────── - -function makeDb(): WorkflowDb { - const runs = new Map(); - const steps = new Map(); - - return { - insertRun: vi.fn(async (run: WorkflowRunRow) => { - runs.set(run.id, { ...run }); - }), - updateRun: vi.fn(async (id: string, patch: Partial) => { - const existing = runs.get(id); - if (existing) runs.set(id, { ...existing, ...patch }); - }), - getRun: vi.fn(async (id: string) => { - const run = runs.get(id); - return run ? { ...run } : null; - }), - insertStep: vi.fn(async (step: WorkflowStepRow) => { - steps.set(step.id, { ...step }); - }), - updateStep: vi.fn(async (id: string, patch: Partial) => { - const existing = steps.get(id); - if (existing) steps.set(id, { ...existing, ...patch }); - }), - getStepsByRunId: vi.fn(async (runId: string) => { - return [...steps.values()].filter((s) => s.runId === runId); - }), - }; -} - -function makeConfig(overrides: Partial = {}): RelayYamlConfig { - return { - version: '1', - name: 'test-workflow', - swarm: { pattern: 'dag' }, - agents: [ - { name: 'agent-a', cli: 'claude' }, - { name: 'agent-b', cli: 'claude' }, - ], - workflows: [ - { - name: 'default', - steps: [ - { name: 'step-1', agent: 'agent-a', task: 'Do step 1' }, - { name: 'step-2', agent: 'agent-b', task: 'Do step 2', dependsOn: ['step-1'] }, - ], - }, - ], - trajectories: false, - ...overrides, - }; -} - -function never(): Promise { - return new Promise(() => {}); -} - -type WorkflowStepOverride = Partial[number]['steps'][number]>; - -function makeSupervisedConfig(stepOverrides: WorkflowStepOverride = {}): RelayYamlConfig { - return makeConfig({ - swarm: { pattern: 'hub-spoke' }, - agents: [ - { name: 'specialist', cli: 'claude', role: 'engineer' }, - { name: 'team-lead', cli: 'claude', role: 'lead coordinator' }, - { name: 'reviewer-1', cli: 'claude', role: 'reviewer' }, - ], - workflows: [ - { - name: 'default', - steps: [ - { - name: 'step-1', - agent: 'specialist', - task: 'Implement the requested change', - ...stepOverrides, - }, - ], - }, - ], - }); -} - -function readCompletedTrajectoryFile(dir: string): any { - const completedDir = path.join(dir, '.trajectories', 'completed'); - if (!existsSync(completedDir)) return null; - - const jsonFile = readdirSync(completedDir).find((file) => file.endsWith('.json')); - if (!jsonFile) return null; - - return JSON.parse(readFileSync(path.join(completedDir, jsonFile), 'utf-8')); -} - -// ── Tests ──────────────────────────────────────────────────────────────────── - -describe('WorkflowRunner', () => { - let db: WorkflowDb; - let runner: InstanceType; - - beforeEach(() => { - vi.clearAllMocks(); - waitForExitFn = vi.fn().mockResolvedValue('exited'); - waitForIdleFn = vi.fn().mockImplementation(() => never()); - mockSpawnOutputs = []; - mockAgent.release.mockResolvedValue(undefined); - mockRelayInstance.spawnPty.mockImplementation(defaultSpawnPtyImplementation); - mockListeners.clear(); - db = makeDb(); - runner = new WorkflowRunner({ db, workspaceId: 'ws-test' }); - }); - - // ── Parsing & validation ─────────────────────────────────────────────── - - describe('parseYamlString', () => { - it('should parse valid YAML config', () => { - const yaml = ` -version: "1" -name: test -swarm: - pattern: fan-out -agents: - - name: a1 - cli: claude -`; - const config = runner.parseYamlString(yaml); - expect(config.name).toBe('test'); - expect(config.swarm.pattern).toBe('fan-out'); - expect(config.agents).toHaveLength(1); - }); - - it('should throw on null YAML', () => { - expect(() => runner.parseYamlString('null')).toThrow('config must be a non-null object'); - }); - - it('should throw on invalid YAML syntax', () => { - expect(() => runner.parseYamlString('not: valid: yaml: []')).toThrow(); - }); - }); - - describe('validateConfig', () => { - it('should accept valid config', () => { - expect(() => runner.validateConfig(makeConfig())).not.toThrow(); - }); - - it('should reject null config', () => { - expect(() => runner.validateConfig(null)).toThrow('non-null object'); - }); - - it('should reject missing version', () => { - expect(() => - runner.validateConfig({ - name: 'x', - swarm: { pattern: 'dag' }, - agents: [{ name: 'a', cli: 'claude' }], - }) - ).toThrow('missing required field "version"'); - }); - - it('should reject missing name', () => { - expect(() => - runner.validateConfig({ - version: '1', - swarm: { pattern: 'dag' }, - agents: [{ name: 'a', cli: 'claude' }], - }) - ).toThrow('missing required field "name"'); - }); - - it('should reject empty agents array', () => { - expect(() => - runner.validateConfig({ version: '1', name: 'x', swarm: { pattern: 'dag' }, agents: [] }) - ).not.toThrow(); - }); - - it('should reject agent without cli', () => { - expect(() => - runner.validateConfig({ - version: '1', - name: 'x', - swarm: { pattern: 'dag' }, - agents: [{ name: 'a' }], - }) - ).toThrow('each agent must have a string "cli"'); - }); - - it('should detect unknown dependencies in workflows', () => { - const config = makeConfig({ - workflows: [ - { - name: 'wf', - steps: [{ name: 's1', agent: 'agent-a', task: 'do', dependsOn: ['nonexistent'] }], - }, - ], - }); - expect(() => runner.validateConfig(config)).toThrow('depends on unknown step "nonexistent"'); - }); - - it('should detect dependency cycles', () => { - const config = makeConfig({ - workflows: [ - { - name: 'wf', - steps: [ - { name: 's1', agent: 'agent-a', task: 'do', dependsOn: ['s2'] }, - { name: 's2', agent: 'agent-b', task: 'do', dependsOn: ['s1'] }, - ], - }, - ], - }); - expect(() => runner.validateConfig(config)).toThrow('dependency cycle'); - }); - }); - - // ── Variable resolution ──────────────────────────────────────────────── - - describe('resolveVariables', () => { - it('should replace {{var}} in agent tasks', () => { - const config = makeConfig({ - agents: [{ name: 'a', cli: 'claude', task: 'Fix bug {{bugId}}' }], - }); - const resolved = runner.resolveVariables(config, { bugId: '42' }); - expect(resolved.agents[0].task).toBe('Fix bug 42'); - }); - - it('should replace {{var}} in workflow step tasks', () => { - const config = makeConfig(); - config.workflows![0].steps[0].task = 'Process {{item}}'; - const resolved = runner.resolveVariables(config, { item: 'test-item' }); - expect(resolved.workflows![0].steps[0].task).toBe('Process test-item'); - }); - - it('should throw on unresolved variables', () => { - const config = makeConfig({ - agents: [{ name: 'a', cli: 'claude', task: 'Fix {{unknown}}' }], - }); - expect(() => runner.resolveVariables(config, {})).toThrow('Unresolved variable: {{unknown}}'); - }); - - it('should not mutate original config', () => { - const config = makeConfig({ - agents: [{ name: 'a', cli: 'claude', task: 'Fix {{id}}' }], - }); - runner.resolveVariables(config, { id: '1' }); - expect(config.agents[0].task).toBe('Fix {{id}}'); - }); - }); - - // ── Execution ────────────────────────────────────────────────────────── - - describe('execute', () => { - it('should create run and steps in DB', async () => { - const config = makeConfig(); - const run = await runner.execute(config, 'default'); - - expect(db.insertRun).toHaveBeenCalledTimes(1); - expect(db.insertStep).toHaveBeenCalledTimes(2); - expect(run.status, run.error).toBe('completed'); - }); - - it('should throw when workflow not found', async () => { - const config = makeConfig(); - await expect(runner.execute(config, 'nonexistent')).rejects.toThrow('Workflow "nonexistent" not found'); - }); - - it('should throw when no workflows defined', async () => { - const config = makeConfig({ workflows: undefined }); - await expect(runner.execute(config)).rejects.toThrow('No workflows defined'); - }); - - it('should emit run:started and run:completed events', async () => { - const events: string[] = []; - runner.on((event) => events.push(event.type)); - - await runner.execute(makeConfig(), 'default'); - - expect(events).toContain('run:started'); - expect(events).toContain('run:completed'); - }); - - it('should emit step events in order', async () => { - const stepEvents: Array<{ type: string; stepName?: string }> = []; - runner.on((event) => { - if (event.type.startsWith('step:')) { - stepEvents.push({ - type: event.type, - stepName: 'stepName' in event ? event.stepName : undefined, - }); - } - }); - - await runner.execute(makeConfig(), 'default'); - - const startedSteps = stepEvents.filter((e) => e.type === 'step:started'); - expect(startedSteps).toHaveLength(2); - }); - - it('should emit owner assignment and review completion events for interactive steps', async () => { - const events: Array<{ type: string; stepName?: string }> = []; - runner.on((event) => - events.push({ type: event.type, stepName: 'stepName' in event ? event.stepName : undefined }) - ); - - await runner.execute(makeSupervisedConfig(), 'default'); - - const ownerAssigned = events.filter((e) => e.type === 'step:owner-assigned'); - const reviewCompleted = events.filter((e) => e.type === 'step:review-completed'); - expect(ownerAssigned).toHaveLength(1); - expect(reviewCompleted).toHaveLength(1); - }); - - it('should prioritize lead owner when multiple hub-role candidates exist', async () => { - const ownerAssignments: string[] = []; - runner.on((event) => { - if (event.type === 'step:owner-assigned') ownerAssignments.push(event.ownerName); - }); - - const config = makeConfig({ - swarm: { pattern: 'hub-spoke' }, - agents: [ - { name: 'specialist', cli: 'claude', role: 'engineer' }, - { name: 'coord-1', cli: 'claude', role: 'coordinator' }, - { name: 'lead-1', cli: 'claude', role: 'lead' }, - { name: 'reviewer-1', cli: 'claude', role: 'reviewer' }, - ], - workflows: [ - { - name: 'default', - steps: [{ name: 'step-1', agent: 'specialist', task: 'Do step 1' }], - }, - ], - }); - - const run = await runner.execute(config, 'default'); - expect(run.status).toBe('completed'); - expect(ownerAssignments).toEqual(['lead-1']); - }, 15000); - - it('should not treat github role text as hub owner signal', async () => { - const ownerAssignments: string[] = []; - runner.on((event) => { - if (event.type === 'step:owner-assigned') ownerAssignments.push(event.ownerName); - }); - - const config = makeConfig({ - swarm: { pattern: 'hub-spoke' }, - agents: [ - { name: 'specialist', cli: 'claude', role: 'engineer' }, - { name: 'github-agent', cli: 'claude', role: 'github actions agent' }, - { name: 'reviewer-1', cli: 'claude', role: 'reviewer' }, - ], - workflows: [ - { - name: 'default', - steps: [{ name: 'step-1', agent: 'specialist', task: 'Do step 1' }], - }, - ], - }); - - const run = await runner.execute(config, 'default'); - expect(run.status).toBe('completed'); - expect(ownerAssignments).toEqual(['specialist']); - }); - - it('should not elect github-role agent as owner (hub word-boundary)', async () => { - const ownerAssignments: Array<{ owner: string; specialist: string }> = []; - runner.on((event) => { - if (event.type === 'step:owner-assigned') { - ownerAssignments.push({ owner: event.ownerName, specialist: event.specialistName }); - } - }); - - const config = makeConfig({ - swarm: { pattern: 'hub-spoke' }, - agents: [ - { name: 'specialist', cli: 'claude', role: 'engineer' }, - { name: 'github-bot', cli: 'claude', role: 'github integration' }, - { name: 'reviewer-1', cli: 'claude', role: 'reviewer' }, - ], - workflows: [ - { - name: 'default', - steps: [{ name: 'step-1', agent: 'specialist', task: 'Do step 1' }], - }, - ], - }); - - const run = await runner.execute(config, 'default'); - expect(run.status).toBe('completed'); - // github-bot should NOT be elected as owner (role contains "hub" substring but not word) - expect(ownerAssignments[0].owner).not.toBe('github-bot'); - // specialist should be its own owner since no hub-role agent exists - expect(ownerAssignments[0].owner).toBe('specialist'); - }, 15000); - - it('should parse REJECT from PTY-echoed review output', async () => { - const events: Array<{ type: string; decision?: string }> = []; - runner.on((event) => { - if (event.type === 'step:review-completed') { - events.push({ type: event.type, decision: event.decision }); - } - }); - - // Simulate PTY output that echoes the review prompt before the actual response - const echoedPrompt = - 'Return exactly:\nREVIEW_DECISION: APPROVE or REJECT\nREVIEW_REASON: \n'; - const actualResponse = 'REVIEW_DECISION: REJECT\nREVIEW_REASON: code has bugs\n'; - mockSpawnOutputs = ['worker finished\n', 'STEP_COMPLETE:step-1\n', echoedPrompt + actualResponse]; - - const run = await runner.execute(makeSupervisedConfig(), 'default'); - expect(run.status).toBe('failed'); - expect(run.error).toContain('review rejected'); - // Should parse REJECT from actual response, not APPROVE from echoed instruction - expect(events).toContainEqual({ type: 'step:review-completed', decision: 'rejected' }); - }, 15000); - - it('should resolve variables during execution', async () => { - const config = makeConfig(); - config.workflows![0].steps[0].task = 'Build {{feature}}'; - const run = await runner.execute(config, 'default', { feature: 'auth' }); - expect(run.status, run.error).toBe('completed'); - }); - - it('repairs a failed deterministic gate with a workflow agent before retrying', async () => { - const tmpDir = mkdtempSync(path.join(os.tmpdir(), 'relay-deterministic-repair-')); - const stepDir = path.join(tmpDir, 'step-cwd'); - mkdirSync(stepDir); - const artifactPath = path.join(stepDir, 'repaired.txt'); - const repairAgent = vi.fn(async (step) => { - writeFileSync(path.join(step.cwd!, 'repaired.txt'), 'fixed\n', 'utf-8'); - return 'wrote repaired.txt'; - }); - runner = new WorkflowRunner({ - db, - workspaceId: 'ws-test', - cwd: tmpDir, - executor: { - executeAgentStep: repairAgent, - }, - }); - - try { - const run = await runner.execute( - makeConfig({ - errorHandling: { strategy: 'retry', repairRetries: 1, retryDelayMs: 1 }, - agents: [{ name: 'fixer', cli: 'claude', role: 'implementation engineer' }], - workflows: [ - { - name: 'default', - steps: [ - { - name: 'verify-artifact', - type: 'deterministic', - cwd: 'step-cwd', - command: `node -e "require('node:fs').accessSync('repaired.txt')"`, - }, - ], - }, - ], - }), - 'default' - ); - - expect(run.status, run.error).toBe('completed'); - expect(repairAgent).toHaveBeenCalledTimes(1); - expect(repairAgent.mock.calls[0][0]).toMatchObject({ cwd: stepDir, workdir: undefined }); - expect(repairAgent.mock.calls[0][2]).toContain('A deterministic workflow gate failed'); - expect(repairAgent.mock.calls[0][2]).toContain('verify-artifact'); - expect(readFileSync(artifactPath, 'utf-8')).toBe('fixed\n'); - } finally { - rmSync(tmpDir, { recursive: true, force: true }); - } - }); - - it('does not spawn deterministic repair agents unless repair retries are explicitly enabled', async () => { - const tmpDir = mkdtempSync(path.join(os.tmpdir(), 'relay-deterministic-no-implicit-repair-')); - const repairAgent = vi.fn(async () => 'unexpected repair'); - runner = new WorkflowRunner({ - db, - workspaceId: 'ws-test', - cwd: tmpDir, - executor: { - executeAgentStep: repairAgent, - }, - }); - - try { - const run = await runner.execute( - makeConfig({ - agents: [{ name: 'fixer', cli: 'claude', role: 'implementation engineer' }], - workflows: [ - { - name: 'default', - steps: [ - { - name: 'verify-artifact', - type: 'deterministic', - command: `node -e "require('node:fs').accessSync('missing.txt')"`, - }, - ], - }, - ], - }), - 'default' - ); - - expect(run.status).toBe('failed'); - expect(repairAgent).not.toHaveBeenCalled(); - } finally { - rmSync(tmpDir, { recursive: true, force: true }); - } - }); - - it('should fail when owner response provides no decision, marker, or evidence', async () => { - mockSpawnOutputs = ['Owner completed work but forgot sentinel\n']; - const run = await runner.execute(makeConfig(), 'default'); - expect(run.status).toBe('failed'); - expect(run.error).toContain('owner completion decision missing'); - }); - - it('should run specialist work in a separate process and mirror worker output to the channel', async () => { - mockSpawnOutputs = [ - 'worker progress update\nworker finished\n', - 'Observed worker progress on the channel\nSTEP_COMPLETE:step-1\n', - 'REVIEW_DECISION: APPROVE\nREVIEW_REASON: looks good\n', - ]; - - const run = await runner.execute(makeSupervisedConfig(), 'default'); - - expect(run.status).toBe('completed'); - const spawnCalls = (mockRelayInstance.spawnPty as any).mock.calls; - expect(spawnCalls[0][0].name).toContain('step-1-worker'); - expect(spawnCalls[1][0].name).toContain('step-1-owner'); - expect(spawnCalls[0][0].task).not.toContain('STEP_COMPLETE:step-1'); - expect(spawnCalls[0][0].task).toContain('WORKER COMPLETION CONTRACT'); - expect(spawnCalls[0][0].task).toContain('WORKER_DONE: '); - expect(spawnCalls[1][0].task).toContain('You are the step owner/supervisor for step "step-1".'); - expect(spawnCalls[1][0].task).toContain('runtime: step-1-worker'); - expect(spawnCalls[1][0].task).toContain('LEAD_DONE: '); - - const channelMessages = (mockRelaycastAgent.send as any).mock.calls.map( - ([, text]: [string, string]) => text - ); - expect(channelMessages.some((text: string) => text.includes('Worker `step-1-worker'))).toBe(true); - expect(channelMessages.some((text: string) => text.includes('worker finished'))).toBe(true); - }); - - it('should apply verification fallback for self-owned interactive steps', async () => { - mockSpawnOutputs = ['LEAD_DONE\n', 'REVIEW_DECISION: APPROVE\nREVIEW_REASON: verified\n']; - - const run = await runner.execute( - makeConfig({ - agents: [{ name: 'team-lead', cli: 'claude', role: 'Lead coordinator' }], - workflows: [ - { - name: 'default', - steps: [ - { - name: 'lead-step', - agent: 'team-lead', - task: 'Output exactly:\nLEAD_DONE\n/exit', - verification: { type: 'exit_code', value: 0 }, - }, - ], - }, - ], - }), - 'default' - ); - - expect(run.status, run.error).toBe('completed'); - const steps = await db.getStepsByRunId(run.id); - expect(steps[0]?.completionReason).toBe('completed_verified'); - }); - - it('should keep explicit interactive workers self-owned without extra supervisor/reviewer spawns', async () => { - const ownerAssignments: Array<{ owner: string; specialist: string }> = []; - runner.on((event) => { - if (event.type === 'step:owner-assigned') { - ownerAssignments.push({ owner: event.ownerName, specialist: event.specialistName }); - } - }); - - mockSpawnOutputs = ['STEP_COMPLETE:worker-step\nWORKER_DONE_LOCAL\n']; - - const run = await runner.execute( - makeConfig({ - agents: [ - { name: 'team-lead', cli: 'claude', role: 'Lead coordinator', preset: 'lead' }, - { name: 'relay-worker', cli: 'codex', preset: 'worker', interactive: true }, - ], - workflows: [ - { - name: 'default', - steps: [ - { - name: 'worker-step', - agent: 'relay-worker', - task: 'Output exactly:\nWORKER_DONE_LOCAL\n/exit', - verification: { type: 'output_contains', value: 'WORKER_DONE_LOCAL' }, - }, - ], - }, - ], - }), - 'default' - ); - - expect(ownerAssignments).toContainEqual({ owner: 'relay-worker', specialist: 'relay-worker' }); - expect(run.status, run.error).toBe('completed'); - - const spawnCalls = (mockRelayInstance.spawnPty as any).mock.calls; - expect(spawnCalls).toHaveLength(1); - expect(spawnCalls[0][0].task).toContain('STEP OWNER CONTRACT'); - expect(spawnCalls[0][0].name).not.toContain('-owner-'); - expect(spawnCalls[0][0].name).not.toContain('-review-'); - }); - - it('should spill oversized interactive tasks to a temp file before PTY spawn', async () => { - const tmpDir = mkdtempSync(path.join(os.tmpdir(), 'relay-pty-task-')); - const oversizedBytes = WorkflowRunner.PTY_TASK_ARG_SIZE_LIMIT + 1024; - let spawnedTask = ''; - let taskFilePath = ''; - let taskFileContents = ''; - runner = new WorkflowRunner({ db, workspaceId: 'ws-test', cwd: tmpDir }); - - mockRelayInstance.spawnPty.mockImplementation( - async ({ name, task }: { name: string; task?: string }) => { - spawnedTask = task ?? ''; - const match = spawnedTask.match(/TASK_FILE:(.+)\n/); - if (match) { - taskFilePath = match[1].trim(); - taskFileContents = readFileSync(taskFilePath, 'utf-8'); - } - - const output = mockSpawnOutputs.shift() ?? 'LEAD_DONE\n'; - queueMicrotask(() => { - emitMockEvent('workerOutput', { name, chunk: output }); - }); - - return { ...mockAgent, name }; - } - ); - - try { - mockSpawnOutputs = ['LEAD_DONE\n']; - - const run = await runner.execute( - makeConfig({ - agents: [{ name: 'team-lead', cli: 'claude', role: 'Lead coordinator' }], - workflows: [ - { - name: 'default', - steps: [ - { - name: 'prepare', - type: 'deterministic', - command: `node -e "process.stdout.write('A'.repeat(${oversizedBytes}))"`, - }, - { - name: 'lead-step', - agent: 'team-lead', - dependsOn: ['prepare'], - task: 'Review the injected context below and then print LEAD_DONE:\n{{steps.prepare.output}}\n/exit', - verification: { type: 'exit_code', value: 0 }, - }, - ], - }, - ], - }), - 'default' - ); - - expect(run.status, run.error).toBe('completed'); - expect(spawnedTask).toContain('TASK_FILE:'); - expect(spawnedTask).not.toContain('{{steps.prepare.output}}'); - expect(Buffer.byteLength(spawnedTask, 'utf8')).toBeLessThan(2048); - expect(taskFilePath).toBeTruthy(); - expect(Buffer.byteLength(taskFileContents, 'utf8')).toBeGreaterThan( - WorkflowRunner.PTY_TASK_ARG_SIZE_LIMIT - ); - expect(taskFileContents).toContain('Review the injected context below'); - expect(existsSync(taskFilePath)).toBe(false); - } finally { - rmSync(tmpDir, { recursive: true, force: true }); - } - }); - - it('should pass canonical bypass args to interactive codex PTY spawns', async () => { - mockSpawnOutputs = ['LEAD_DONE\n', 'REVIEW_DECISION: APPROVE\nREVIEW_REASON: verified\n']; - - const run = await runner.execute( - makeConfig({ - agents: [{ name: 'lead', cli: 'codex', role: 'Lead coordinator' }], - workflows: [ - { - name: 'default', - steps: [ - { - name: 'lead-step', - agent: 'lead', - task: 'Output exactly:\nLEAD_DONE\n/exit', - verification: { type: 'exit_code', value: 0 }, - }, - ], - }, - ], - }), - 'default' - ); - - expect(run.status, run.error).toBe('completed'); - const spawnCalls = (mockRelayInstance.spawnPty as any).mock.calls; - expect(spawnCalls[0][0].args).toEqual(['--dangerously-bypass-approvals-and-sandbox']); - }); - - it('should let the owner complete after checking file-based artifacts', async () => { - const tmpDir = mkdtempSync(path.join(os.tmpdir(), 'relay-owner-file-')); - const artifact = path.join(tmpDir, 'artifact.txt'); - writeFileSync(artifact, 'done\n', 'utf-8'); - runner = new WorkflowRunner({ db, workspaceId: 'ws-test', cwd: tmpDir }); - - try { - mockSpawnOutputs = [ - 'worker wrote artifact\n', - 'Bash(git diff --stat)\nSTEP_COMPLETE:step-1\n', - 'REVIEW_DECISION: APPROVE\nREVIEW_REASON: artifact verified\n', - ]; - - const run = await runner.execute( - makeSupervisedConfig({ verification: { type: 'file_exists', value: 'artifact.txt' } }), - 'default' - ); - - expect(run.status).toBe('completed'); - const ownerTask = (mockRelayInstance.spawnPty as any).mock.calls[1][0].task as string; - expect(ownerTask).toContain('Verification gate: confirm the file exists at "artifact.txt"'); - } finally { - rmSync(tmpDir, { recursive: true, force: true }); - } - }); - - it('should keep specialist output for chaining even when the owner signals later', async () => { - mockSpawnOutputs = [ - 'specialist deliverable\n', - 'Worker already exited; artifacts look correct\nSTEP_COMPLETE:step-1\n', - 'REVIEW_DECISION: APPROVE\nREVIEW_REASON: handoff is safe\n', - ]; - - const run = await runner.execute(makeSupervisedConfig(), 'default'); - expect(run.status).toBe('completed'); - - const stepRows = await db.getStepsByRunId(run.id); - expect(stepRows[0].output).toContain('specialist deliverable'); - expect(stepRows[0].output).not.toContain('Worker already exited; artifacts look correct'); - }); - - it('should fail when review response lacks any usable decision signal', async () => { - mockSpawnOutputs = [ - 'worker finished\n', - 'STEP_COMPLETE:step-1\n', - 'I need more context before deciding.\n', - ]; - const run = await runner.execute(makeSupervisedConfig(), 'default'); - expect(run.status).toBe('failed'); - expect(run.error).toContain('review response malformed'); - }); - - it('should fail when review explicitly rejects step output', async () => { - const events: Array<{ type: string; decision?: string }> = []; - runner.on((event) => { - if (event.type === 'step:review-completed') { - events.push({ - type: event.type, - decision: event.decision, - }); - } - }); - - mockSpawnOutputs = [ - 'worker finished\n', - 'STEP_COMPLETE:step-1\n', - 'REVIEW_DECISION: REJECT\nREVIEW_REASON: missing checks\n', - ]; - const run = await runner.execute(makeSupervisedConfig(), 'default'); - expect(run.status).toBe('failed'); - expect(run.error).toContain('review rejected'); - expect(events).toContainEqual({ type: 'step:review-completed', decision: 'rejected' }); - }); - - it('should parse final review decision when PTY output echoes review instructions', async () => { - const events: Array<{ type: string; decision?: string }> = []; - runner.on((event) => { - if (event.type === 'step:review-completed') { - events.push({ - type: event.type, - decision: event.decision, - }); - } - }); - - mockSpawnOutputs = [ - 'worker finished\n', - 'STEP_COMPLETE:step-1\n', - 'Return exactly:\nREVIEW_DECISION: APPROVE or REJECT\nREVIEW_REASON: \nREVIEW_DECISION: REJECT\nREVIEW_REASON: insufficient evidence\n', - ]; - const run = await runner.execute(makeSupervisedConfig(), 'default'); - expect(run.status).toBe('failed'); - expect(run.error).toContain('review rejected'); - expect(events).toContainEqual({ type: 'step:review-completed', decision: 'rejected' }); - }); - - it('should record review completion in trajectory with decision and reason', async () => { - const tmpDir = mkdtempSync(path.join(os.tmpdir(), 'relay-review-traj-')); - runner = new WorkflowRunner({ db, workspaceId: 'ws-test', cwd: tmpDir }); - - try { - mockSpawnOutputs = [ - 'worker finished\n', - 'STEP_COMPLETE:step-1\n', - 'REVIEW_DECISION: APPROVE\nREVIEW_REASON: durable review record\n', - ]; - - const config = makeSupervisedConfig(); - config.trajectories = {}; - const run = await runner.execute(config, 'default'); - expect(run.status).toBe('completed'); - - const trajectory = readCompletedTrajectoryFile(tmpDir); - const events = trajectory.chapters.flatMap((chapter: any) => chapter.events); - const reviewEvent = events.find((event: any) => event.type === 'review-completed'); - - expect(reviewEvent).toBeTruthy(); - expect(reviewEvent.raw).toMatchObject({ - stepName: 'step-1', - reviewer: 'reviewer-1', - decision: 'approved', - reason: 'durable review record', - }); - } finally { - rmSync(tmpDir, { recursive: true, force: true }); - } - }); - - it('should not double release the worker when the owner fails after worker completion', async () => { - const workerRelease = vi.fn().mockResolvedValue(undefined); - const ownerRelease = vi.fn().mockResolvedValue(undefined); - - mockRelayInstance.spawnPty.mockImplementation( - async ({ name, task }: { name: string; task?: string }) => { - const isOwner = name.includes('-owner-'); - const output = isOwner ? 'owner checking\n' : 'worker finished\n'; - - queueMicrotask(() => { - emitMockEvent('workerOutput', { name, chunk: output }); - }); - - if (isOwner) { - return { - name, - waitForExit: vi.fn().mockImplementation(async () => { - await Promise.resolve(); - return 'timeout'; - }), - waitForIdle: vi.fn().mockResolvedValue('timeout'), - release: ownerRelease, - }; - } - - return { - name, - waitForExit: vi.fn().mockImplementation(async () => { - await workerRelease(); - return 'released'; - }), - waitForIdle: vi.fn().mockImplementation(() => never()), - release: workerRelease, - }; - } - ); - - const run = await runner.execute(makeSupervisedConfig(), 'default'); - - expect(run.status).toBe('failed'); - expect(run.error).toContain('owner timed out'); - expect(workerRelease).toHaveBeenCalledTimes(1); - expect(ownerRelease).toHaveBeenCalledTimes(1); - }); - - it('should emit owner-timeout when owner times out', async () => { - const events: Array<{ type: string; stepName?: string }> = []; - runner.on((event) => { - if (event.type === 'step:owner-timeout') { - events.push({ - type: event.type, - stepName: event.stepName, - }); - } - }); - - waitForExitFn = vi.fn().mockResolvedValue('timeout'); - waitForIdleFn = vi.fn().mockResolvedValue('timeout'); - - const run = await runner.execute(makeConfig(), 'default'); - expect(run.status).toBe('failed'); - expect(run.error).toContain('timed out'); - expect(events).toContainEqual({ type: 'step:owner-timeout', stepName: 'step-1' }); - }); - - it('should emit owner-timeout for a dedicated supervisor when the worker is stuck', async () => { - const events: Array<{ type: string; stepName?: string }> = []; - runner.on((event) => { - if (event.type === 'step:owner-timeout') { - events.push({ type: event.type, stepName: event.stepName }); - } - }); - - waitForExitFn = vi.fn().mockResolvedValue('timeout'); - waitForIdleFn = vi.fn().mockResolvedValue('timeout'); - - const run = await runner.execute(makeSupervisedConfig(), 'default'); - expect(run.status).toBe('failed'); - expect(run.error).toContain('owner timed out'); - expect(events).toContainEqual({ type: 'step:owner-timeout', stepName: 'step-1' }); - }); - - it('should preserve self-completion when no dedicated owner is available', async () => { - mockSpawnOutputs = ['STEP_COMPLETE:step-1\n', 'REVIEW_DECISION: APPROVE\nREVIEW_REASON: looks good\n']; - - const config = makeConfig({ - agents: [ - { name: 'specialist', cli: 'claude', role: 'engineer' }, - { name: 'reviewer-1', cli: 'claude', role: 'reviewer' }, - ], - workflows: [ - { - name: 'default', - steps: [{ name: 'step-1', agent: 'specialist', task: 'Do step 1' }], - }, - ], - }); - - const run = await runner.execute(config, 'default'); - - expect(run.status).toBe('completed'); - const spawnCalls = (mockRelayInstance.spawnPty as any).mock.calls; - expect(spawnCalls[0][0].name).toContain('step-1-'); - expect(spawnCalls[0][0].name).not.toContain('worker'); - expect(spawnCalls[0][0].task).toContain('STEP OWNER CONTRACT'); - expect(spawnCalls[0][0].task).toContain('STEP_COMPLETE:step-1'); - }); - - it('should use the full remaining timeout as the review safety backstop', async () => { - const config = makeSupervisedConfig({ timeoutMs: 90_000 }); - const run = await runner.execute(config, 'default'); - - expect(run.status).toBe('completed'); - const waitCalls = (waitForExitFn as any).mock?.calls ?? []; - expect(waitCalls.length).toBeGreaterThanOrEqual(2); - const reviewWaitMs = waitCalls[waitCalls.length - 1][0]; - expect(reviewWaitMs).toBeGreaterThan(60_000); - expect(reviewWaitMs).toBeLessThanOrEqual(90_000); - }); - }); - - // ── Event subscription ───────────────────────────────────────────────── - - describe('on / event subscription', () => { - it('should return unsubscribe function', async () => { - const events: string[] = []; - const unsub = runner.on((event) => events.push(event.type)); - - await runner.execute(makeConfig(), 'default'); - const count = events.length; - - unsub(); - // Events after unsubscribe are not captured (no second execute needed to prove this, - // just verify the unsub function works without error) - expect(count).toBeGreaterThan(0); - }); - }); - - // ── Pause / abort ────────────────────────────────────────────────────── - - describe('pause and abort', () => { - it('should support pause/unpause without error', () => { - expect(() => runner.pause()).not.toThrow(); - expect(() => runner.unpause()).not.toThrow(); - }); - - it('should support abort without error', () => { - expect(() => runner.abort()).not.toThrow(); - }); - }); - - // ── Resume ───────────────────────────────────────────────────────────── - - describe('resume', () => { - it('should throw when run not found', async () => { - await expect(runner.resume('nonexistent')).rejects.toThrow('Run "nonexistent" not found'); - }); - }); - - // ── Non-interactive command builder ──────────────────────────────────── - - describe('buildNonInteractiveCommand', () => { - it('should build claude command with -p flag', () => { - const { cmd, args } = WorkflowRunner.buildNonInteractiveCommand('claude', 'Do the thing'); - expect(cmd).toBe('claude'); - expect(args).toEqual(['-p', '--dangerously-skip-permissions', 'Do the thing']); - }); - - it('should build codex command with exec subcommand and bypass flag', () => { - const { cmd, args } = WorkflowRunner.buildNonInteractiveCommand('codex', 'Build it'); - expect(cmd).toBe('codex'); - expect(args).toEqual(['exec', '--dangerously-bypass-approvals-and-sandbox', 'Build it']); - }); - - it('should build gemini command with -p flag', () => { - const { cmd, args } = WorkflowRunner.buildNonInteractiveCommand('gemini', 'Analyze'); - expect(cmd).toBe('gemini'); - expect(args).toEqual(['-p', 'Analyze']); - }); - - it('should build opencode command with run subcommand', () => { - const { cmd, args } = WorkflowRunner.buildNonInteractiveCommand('opencode', 'Fix bug'); - expect(cmd).toBe('opencode'); - expect(args).toEqual(['run', 'Fix bug']); - }); - - it('should build droid command with exec subcommand', () => { - const { cmd, args } = WorkflowRunner.buildNonInteractiveCommand('droid', 'Deploy'); - expect(cmd).toBe('droid'); - expect(args).toEqual(['exec', 'Deploy']); - }); - - it('should build aider command with --message and safety flags', () => { - const { cmd, args } = WorkflowRunner.buildNonInteractiveCommand('aider', 'Refactor'); - expect(cmd).toBe('aider'); - expect(args).toEqual(['--message', 'Refactor', '--yes-always', '--no-git']); - }); - - it('should build goose command with run subcommand', () => { - const { cmd, args } = WorkflowRunner.buildNonInteractiveCommand('goose', 'Test it'); - expect(cmd).toBe('goose'); - expect(args).toEqual(['run', '--text', 'Test it', '--no-session']); - }); - - it('should append extra args after CLI-specific args', () => { - const { cmd, args } = WorkflowRunner.buildNonInteractiveCommand('claude', 'Task', ['--model', 'opus']); - expect(cmd).toBe('claude'); - expect(args).toEqual(['-p', '--dangerously-skip-permissions', 'Task', '--model', 'opus']); - }); - }); - - // ── Dry run ───────────────────────────────────────────────────────────── - - describe('dryRun', () => { - it('should compute correct waves for a simple DAG', () => { - const config = makeConfig(); - const report = runner.dryRun(config); - - expect(report.valid).toBe(true); - expect(report.errors).toHaveLength(0); - expect(report.totalSteps).toBe(2); - expect(report.estimatedWaves).toBe(2); - expect(report.waves[0].wave).toBe(1); - expect(report.waves[0].steps).toHaveLength(1); - expect(report.waves[0].steps[0].name).toBe('step-1'); - expect(report.waves[1].wave).toBe(2); - expect(report.waves[1].steps).toHaveLength(1); - expect(report.waves[1].steps[0].name).toBe('step-2'); - }); - - it('should compute parallel steps in the same wave', () => { - const config = makeConfig({ - workflows: [ - { - name: 'parallel', - steps: [ - { name: 'a', agent: 'agent-a', task: 'Do A' }, - { name: 'b', agent: 'agent-b', task: 'Do B' }, - { name: 'c', agent: 'agent-a', task: 'Do C', dependsOn: ['a', 'b'] }, - ], - }, - ], - }); - - const report = runner.dryRun(config, 'parallel'); - - expect(report.valid).toBe(true); - expect(report.estimatedWaves).toBe(2); - expect(report.waves[0].steps).toHaveLength(2); - expect(report.waves[0].steps.map((s) => s.name).sort()).toEqual(['a', 'b']); - expect(report.waves[1].steps).toHaveLength(1); - expect(report.waves[1].steps[0].name).toBe('c'); - }); - - it('should report agent step counts', () => { - const config = makeConfig(); - const report = runner.dryRun(config); - - const agentA = report.agents.find((a) => a.name === 'agent-a'); - const agentB = report.agents.find((a) => a.name === 'agent-b'); - expect(agentA?.stepCount).toBe(1); - expect(agentB?.stepCount).toBe(1); - }); - - it('should include resolved permissions without provisioning tokens', () => { - const tmpDir = mkdtempSync(path.join(os.tmpdir(), 'workflow-dry-run-perms-')); - try { - writeFileSync(path.join(tmpDir, 'readme.md'), '# readme\n'); - writeFileSync(path.join(tmpDir, 'notes.txt'), 'notes\n'); - writeFileSync(path.join(tmpDir, '.agentreadonly'), 'readme.md\n'); - - const dryRunRunner = new WorkflowRunner({ db, workspaceId: 'ws-test', cwd: tmpDir }); - const report = dryRunRunner.dryRun( - makeConfig({ - agents: [ - { - name: 'agent-a', - cli: 'claude', - permissions: { - access: 'readonly', - files: { - write: ['notes.txt'], - }, - scopes: ['relay:custom'], - }, - }, - { name: 'agent-b', cli: 'claude' }, - ], - }) - ); - - const agentA = report.permissions?.find((entry) => entry.agent === 'agent-a'); - const agentB = report.permissions?.find((entry) => entry.agent === 'agent-b'); - - expect(agentA?.agent).toBe('agent-a'); - expect(agentA?.access).toBe('readonly'); - expect(agentA?.writePaths).toBe(1); - expect(agentA?.denyPaths).toBe(0); - expect(agentA?.readPaths).toBeGreaterThanOrEqual(1); - expect(agentA?.source).toBe('yaml'); - expect(agentA?.scopes).toBeGreaterThan(1); - - expect(agentB).toMatchObject({ - agent: 'agent-b', - access: 'readwrite', - source: 'preset', - }); - } finally { - rmSync(tmpDir, { recursive: true, force: true }); - } - }); - - it('should warn when step references unknown agent', () => { - const config = makeConfig({ - workflows: [ - { - name: 'default', - steps: [{ name: 'step-1', agent: 'nonexistent', task: 'Do stuff' }], - }, - ], - }); - - const report = runner.dryRun(config); - - expect(report.valid).toBe(true); - expect(report.warnings.some((w) => w.includes('nonexistent'))).toBe(true); - }); - - it('should warn when wave exceeds maxConcurrency', () => { - const config = makeConfig({ - swarm: { pattern: 'dag', maxConcurrency: 1 }, - workflows: [ - { - name: 'default', - steps: [ - { name: 'a', agent: 'agent-a', task: 'Do A' }, - { name: 'b', agent: 'agent-b', task: 'Do B' }, - ], - }, - ], - }); - - const report = runner.dryRun(config); - - expect(report.valid).toBe(true); - expect(report.warnings.some((w) => w.includes('maxConcurrency'))).toBe(true); - }); - - it('should return errors for invalid config', () => { - const report = runner.dryRun({} as any); - - expect(report.valid).toBe(false); - expect(report.errors.length).toBeGreaterThan(0); - }); - - it('should return error when workflow not found', () => { - const config = makeConfig(); - const report = runner.dryRun(config, 'nonexistent'); - - expect(report.valid).toBe(false); - expect(report.errors[0]).toContain('nonexistent'); - }); - }); -}); diff --git a/packages/sdk/src/__tests__/workflow-trajectory.test.ts b/packages/sdk/src/__tests__/workflow-trajectory.test.ts deleted file mode 100644 index 061ea12cc..000000000 --- a/packages/sdk/src/__tests__/workflow-trajectory.test.ts +++ /dev/null @@ -1,466 +0,0 @@ -/** - * WorkflowTrajectory unit tests. - * - * Tests trajectory recording, chapter management, reflections, decisions, - * confidence computation, and the disabled/enabled toggle. - */ - -import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest'; -import { existsSync, readFileSync, readdirSync, rmSync, mkdirSync } from 'node:fs'; -import path from 'node:path'; -import os from 'node:os'; -import { WorkflowTrajectory, type StepOutcome } from '../workflows/trajectory.js'; - -// ── Test helpers ───────────────────────────────────────────────────────────── - -let tmpDir: string; -let originalTrajectoriesDataDir: string | undefined; -let originalWorkflowId: string | undefined; - -function makeTmpDir(): string { - const dir = path.join(os.tmpdir(), `wf-traj-test-${Date.now()}-${Math.random().toString(36).slice(2)}`); - mkdirSync(dir, { recursive: true }); - return dir; -} - -function findFirstJsonFile(dir: string): string | null { - if (!existsSync(dir)) return null; - for (const entry of readdirSync(dir, { withFileTypes: true })) { - const entryPath = path.join(dir, entry.name); - if (entry.isDirectory()) { - const nested = findFirstJsonFile(entryPath); - if (nested) return nested; - } - if (entry.isFile() && entry.name.endsWith('.json')) return entryPath; - } - return null; -} - -function readTrajectoryFile(dir: string): any { - const file = findFirstJsonFile(path.join(dir, '.trajectories', 'active')); - return file ? JSON.parse(readFileSync(file, 'utf-8')) : null; -} - -function readCompletedTrajectoryFile(dir: string): any { - const file = findFirstJsonFile(path.join(dir, '.trajectories', 'completed')); - return file ? JSON.parse(readFileSync(file, 'utf-8')) : null; -} - -function readTrajectoryFileAt(dataDir: string, status: 'active' | 'completed'): any { - const file = findFirstJsonFile(path.join(dataDir, status)); - return file ? JSON.parse(readFileSync(file, 'utf-8')) : null; -} - -// ── Tests ──────────────────────────────────────────────────────────────────── - -describe('WorkflowTrajectory', () => { - beforeEach(() => { - originalTrajectoriesDataDir = process.env.TRAJECTORIES_DATA_DIR; - originalWorkflowId = process.env.TRAJECTORIES_WORKFLOW_ID; - delete process.env.TRAJECTORIES_DATA_DIR; - delete process.env.TRAJECTORIES_WORKFLOW_ID; - tmpDir = makeTmpDir(); - }); - - afterEach(() => { - if (originalTrajectoriesDataDir === undefined) { - delete process.env.TRAJECTORIES_DATA_DIR; - } else { - process.env.TRAJECTORIES_DATA_DIR = originalTrajectoriesDataDir; - } - if (originalWorkflowId === undefined) { - delete process.env.TRAJECTORIES_WORKFLOW_ID; - } else { - process.env.TRAJECTORIES_WORKFLOW_ID = originalWorkflowId; - } - try { - rmSync(tmpDir, { recursive: true, force: true }); - } catch { - // cleanup best-effort - } - }); - - // ── Disabled mode ────────────────────────────────────────────────────── - - describe('disabled', () => { - it('should not create files when trajectories is false', async () => { - const traj = new WorkflowTrajectory(false, 'run-1', tmpDir); - await traj.start('test-workflow', 3); - - expect(traj.isEnabled()).toBe(false); - expect(traj.getTrajectoryId()).toBeNull(); - expect(existsSync(path.join(tmpDir, '.trajectories'))).toBe(false); - }); - - it('should not create files when enabled is false', async () => { - const traj = new WorkflowTrajectory({ enabled: false }, 'run-1', tmpDir); - await traj.start('test-workflow', 3); - - expect(traj.isEnabled()).toBe(false); - }); - - it('should be enabled by default', () => { - const traj = new WorkflowTrajectory(undefined, 'run-1', tmpDir); - expect(traj.isEnabled()).toBe(true); - }); - }); - - // ── Lifecycle ────────────────────────────────────────────────────────── - - describe('lifecycle', () => { - it('should create a trajectory file on start', async () => { - const traj = new WorkflowTrajectory({}, 'run-abc', tmpDir); - await traj.start('my-workflow', 5); - - expect(traj.getTrajectoryId()).toBeTruthy(); - expect(traj.getTrajectoryId()!.startsWith('traj_')).toBe(true); - - const data = readTrajectoryFile(tmpDir); - expect(data).toBeTruthy(); - expect(data.status).toBe('active'); - expect(data.task.title).toContain('my-workflow'); - expect(data.agents).toHaveLength(1); - expect(data.agents[0].name).toBe('orchestrator'); - expect(data.commits).toEqual([]); - expect(data.filesChanged).toEqual([]); - expect(data.tags).toEqual([]); - }); - - it('should create Planning chapter on start', async () => { - const traj = new WorkflowTrajectory({}, 'run-abc', tmpDir); - await traj.start('my-workflow', 3, '3 parallel tracks, 2 barriers'); - - const data = readTrajectoryFile(tmpDir); - expect(data.chapters).toHaveLength(1); - expect(data.chapters[0].title).toBe('Planning'); - expect(data.chapters[0].events.length).toBeGreaterThanOrEqual(1); - }); - - it('should complete trajectory and move to completed dir', async () => { - const traj = new WorkflowTrajectory({}, 'run-abc', tmpDir); - await traj.start('my-workflow', 2); - await traj.complete('All done', 0.95); - - const active = readTrajectoryFile(tmpDir); - expect(active).toBeNull(); // Moved out of active - - const completed = readCompletedTrajectoryFile(tmpDir); - expect(completed).toBeTruthy(); - expect(completed.status).toBe('completed'); - expect(completed.retrospective.summary).toBe('All done'); - expect(completed.retrospective.confidence).toBe(0.95); - }); - - it('should abandon trajectory and move to completed dir', async () => { - const traj = new WorkflowTrajectory({}, 'run-abc', tmpDir); - await traj.start('my-workflow', 2); - await traj.abandon('Something went wrong'); - - const completed = readCompletedTrajectoryFile(tmpDir); - expect(completed).toBeTruthy(); - expect(completed.status).toBe('abandoned'); - }); - - it('should stamp workflowId from TRAJECTORIES_WORKFLOW_ID on start', async () => { - process.env.TRAJECTORIES_WORKFLOW_ID = 'wf-env-123'; - const traj = new WorkflowTrajectory({}, 'run-abc', tmpDir); - await traj.start('my-workflow', 1); - - const data = readTrajectoryFile(tmpDir); - expect(data.workflowId).toBe('wf-env-123'); - }); - - it('should write to TRAJECTORIES_DATA_DIR when set', async () => { - const customDataDir = path.join(tmpDir, 'custom-root', '.trajectories'); - process.env.TRAJECTORIES_DATA_DIR = customDataDir; - - const traj = new WorkflowTrajectory({}, 'run-abc', tmpDir); - await traj.start('my-workflow', 1); - - expect(readTrajectoryFile(tmpDir)).toBeNull(); - expect(readTrajectoryFileAt(customDataDir, 'active')).toBeTruthy(); - }); - }); - - // ── Step events ──────────────────────────────────────────────────────── - - describe('step events', () => { - it('should record step started', async () => { - const traj = new WorkflowTrajectory({}, 'run-1', tmpDir); - await traj.start('wf', 2); - await traj.stepStarted({ name: 'build', agent: 'builder', task: 'Build it' }, 'builder-agent'); - - const data = readTrajectoryFile(tmpDir); - expect(data.agents).toHaveLength(2); // orchestrator + builder-agent - const events = data.chapters.flatMap((c: any) => c.events); - expect(events.some((e: any) => e.content.includes('build'))).toBe(true); - }); - - it('should record step completed', async () => { - const traj = new WorkflowTrajectory({}, 'run-1', tmpDir); - await traj.start('wf', 1); - await traj.stepCompleted({ name: 'test', agent: 'tester', task: 'Run tests' }, 'All tests passing', 1); - - const data = readTrajectoryFile(tmpDir); - const events = data.chapters.flatMap((c: any) => c.events); - expect(events.some((e: any) => e.type === 'finding')).toBe(true); - }); - - it('should record step failed', async () => { - const traj = new WorkflowTrajectory({}, 'run-1', tmpDir); - await traj.start('wf', 1); - await traj.stepFailed( - { name: 'deploy', agent: 'deployer', task: 'Deploy' }, - 'Connection refused', - 1, - 3 - ); - - const data = readTrajectoryFile(tmpDir); - const events = data.chapters.flatMap((c: any) => c.events); - expect(events.some((e: any) => e.type === 'error')).toBe(true); - }); - - it('should record step skipped', async () => { - const traj = new WorkflowTrajectory({}, 'run-1', tmpDir); - await traj.start('wf', 2); - await traj.stepSkipped({ name: 'integration', agent: 'tester', task: 'Test' }, 'Upstream failed'); - - const data = readTrajectoryFile(tmpDir); - const events = data.chapters.flatMap((c: any) => c.events); - expect(events.some((e: any) => e.content.includes('skipped'))).toBe(true); - }); - }); - - // ── Chapters ─────────────────────────────────────────────────────────── - - describe('chapters', () => { - it('should create track chapters', async () => { - const traj = new WorkflowTrajectory({}, 'run-1', tmpDir); - await traj.start('wf', 3); - await traj.beginTrack('backend'); - - const data = readTrajectoryFile(tmpDir); - expect(data.chapters.length).toBeGreaterThanOrEqual(2); - expect(data.chapters.some((c: any) => c.title === 'Execution: backend')).toBe(true); - }); - - it('should create convergence chapters', async () => { - const traj = new WorkflowTrajectory({}, 'run-1', tmpDir); - await traj.start('wf', 3); - await traj.beginConvergence('all-tracks-done'); - - const data = readTrajectoryFile(tmpDir); - expect(data.chapters.some((c: any) => c.title === 'Convergence: all-tracks-done')).toBe(true); - }); - - it('should close previous chapter when opening new one', async () => { - const traj = new WorkflowTrajectory({}, 'run-1', tmpDir); - await traj.start('wf', 3); - await traj.beginTrack('track-a'); - await traj.beginTrack('track-b'); - - const data = readTrajectoryFile(tmpDir); - // Planning chapter should have endedAt - expect(data.chapters[0].endedAt).toBeTruthy(); - // First track chapter should have endedAt - expect(data.chapters[1].endedAt).toBeTruthy(); - }); - }); - - // ── Reflections ──────────────────────────────────────────────────────── - - describe('reflections', () => { - it('should record reflect events', async () => { - const traj = new WorkflowTrajectory({}, 'run-1', tmpDir); - await traj.start('wf', 2); - await traj.reflect('All parallel tracks complete', 0.85, ['step-a: completed', 'step-b: completed']); - - const data = readTrajectoryFile(tmpDir); - const events = data.chapters.flatMap((c: any) => c.events); - const reflection = events.find((e: any) => e.type === 'reflection'); - expect(reflection).toBeTruthy(); - expect(reflection.significance).toBe('high'); - expect(reflection.raw.confidence).toBe(0.85); - expect(reflection.raw.focalPoints).toHaveLength(2); - }); - - it('should synthesize and reflect at convergence', async () => { - const traj = new WorkflowTrajectory({}, 'run-1', tmpDir); - await traj.start('wf', 3); - - const outcomes: StepOutcome[] = [ - { name: 'step-a', agent: 'a', status: 'completed', attempts: 1 }, - { name: 'step-b', agent: 'b', status: 'completed', attempts: 2 }, - ]; - - await traj.synthesizeAndReflect('backend-ready', outcomes, ['step-c']); - - const data = readTrajectoryFile(tmpDir); - // Should have a convergence chapter - expect(data.chapters.some((c: any) => c.title.includes('Convergence'))).toBe(true); - const events = data.chapters.flatMap((c: any) => c.events); - const reflection = events.find((e: any) => e.type === 'reflection'); - expect(reflection).toBeTruthy(); - expect(reflection.content).toContain('backend-ready'); - expect(reflection.content).toContain('step-b'); // retried - }); - }); - - // ── Decisions ────────────────────────────────────────────────────────── - - describe('decisions', () => { - it('should record decisions', async () => { - const traj = new WorkflowTrajectory({}, 'run-1', tmpDir); - await traj.start('wf', 1); - await traj.decide('How to handle failure', 'retry', 'Transient error detected'); - - const data = readTrajectoryFile(tmpDir); - const events = data.chapters.flatMap((c: any) => c.events); - const decision = events.find((e: any) => e.type === 'decision'); - expect(decision).toBeTruthy(); - expect(decision.raw.chosen).toBe('retry'); - }); - - it('should skip decisions when autoDecisions is false', async () => { - const traj = new WorkflowTrajectory({ autoDecisions: false }, 'run-1', tmpDir); - await traj.start('wf', 1); - await traj.decide('How to handle failure', 'retry', 'Transient error'); - - const data = readTrajectoryFile(tmpDir); - const events = data.chapters.flatMap((c: any) => c.events); - expect(events.filter((e: any) => e.type === 'decision')).toHaveLength(0); - }); - }); - - // ── Confidence computation ───────────────────────────────────────────── - - describe('computeConfidence', () => { - it('should return 1.0 for all first-attempt verified completions', () => { - const traj = new WorkflowTrajectory({}, 'run-1', tmpDir); - const outcomes: StepOutcome[] = [ - { name: 'a', agent: 'a', status: 'completed', attempts: 1, verificationPassed: true }, - { name: 'b', agent: 'b', status: 'completed', attempts: 1, verificationPassed: true }, - ]; - expect(traj.computeConfidence(outcomes)).toBe(1.0); - }); - - it('should return lower confidence for retried steps', () => { - const traj = new WorkflowTrajectory({}, 'run-1', tmpDir); - const outcomes: StepOutcome[] = [ - { name: 'a', agent: 'a', status: 'completed', attempts: 1, verificationPassed: true }, - { name: 'b', agent: 'b', status: 'completed', attempts: 3, verificationPassed: true }, - ]; - const confidence = traj.computeConfidence(outcomes); - expect(confidence).toBeLessThan(1.0); - expect(confidence).toBeGreaterThan(0.5); - }); - - it('should return lower confidence for failed steps', () => { - const traj = new WorkflowTrajectory({}, 'run-1', tmpDir); - const outcomes: StepOutcome[] = [ - { name: 'a', agent: 'a', status: 'completed', attempts: 1 }, - { name: 'b', agent: 'b', status: 'failed', attempts: 3 }, - ]; - const confidence = traj.computeConfidence(outcomes); - expect(confidence).toBeLessThan(0.5); - }); - - it('should return 0.7 for empty outcomes', () => { - const traj = new WorkflowTrajectory({}, 'run-1', tmpDir); - expect(traj.computeConfidence([])).toBe(0.7); - }); - }); - - // ── Synthesis helpers ────────────────────────────────────────────────── - - describe('buildSynthesis', () => { - it('should produce meaningful synthesis text', () => { - const traj = new WorkflowTrajectory({}, 'run-1', tmpDir); - const outcomes: StepOutcome[] = [ - { name: 'step-a', agent: 'a', status: 'completed', attempts: 1 }, - { name: 'step-b', agent: 'b', status: 'completed', attempts: 2 }, - { name: 'step-c', agent: 'c', status: 'failed', attempts: 3, error: 'timeout' }, - ]; - - const synthesis = traj.buildSynthesis('barrier-1', outcomes, ['step-d']); - expect(synthesis).toContain('barrier-1'); - expect(synthesis).toContain('2/3 steps completed'); - expect(synthesis).toContain('step-c'); // failed - expect(synthesis).toContain('step-b'); // retried - expect(synthesis).toContain('step-d'); // unblocked - }); - - it('should note all-first-attempt when no retries', () => { - const traj = new WorkflowTrajectory({}, 'run-1', tmpDir); - const outcomes: StepOutcome[] = [ - { name: 'a', agent: 'a', status: 'completed', attempts: 1 }, - { name: 'b', agent: 'b', status: 'completed', attempts: 1 }, - ]; - - const synthesis = traj.buildSynthesis('done', outcomes); - expect(synthesis).toContain('All steps completed on first attempt'); - }); - }); - - describe('buildRunSummary', () => { - it('should produce run summary with stats', () => { - const traj = new WorkflowTrajectory({}, 'run-1', tmpDir); - const outcomes: StepOutcome[] = [ - { name: 'a', agent: 'a', status: 'completed', attempts: 1 }, - { name: 'b', agent: 'b', status: 'completed', attempts: 2 }, - { name: 'c', agent: 'c', status: 'failed', attempts: 3 }, - { name: 'd', agent: 'd', status: 'skipped', attempts: 1 }, - ]; - - const summary = traj.buildRunSummary(outcomes); - expect(summary).toContain('Failed at "c"'); - expect(summary).toContain('2/4 steps completed before failure'); - expect(summary).toContain('downstream step(s) to be skipped'); - }); - }); - - // ── Non-blocking behavior ────────────────────────────────────────────── - - describe('non-blocking', () => { - it('should not throw on flush errors', async () => { - // Use a path that will fail (read-only or invalid) - const traj = new WorkflowTrajectory({}, 'run-1', '/dev/null/impossible-path'); - // Should not throw - await expect(traj.start('wf', 1)).resolves.not.toThrow(); - }); - - it('should handle all methods gracefully when not started', async () => { - const traj = new WorkflowTrajectory({}, 'run-1', tmpDir); - // Don't call start — all methods should be no-ops - await expect(traj.stepStarted({ name: 'x', agent: 'a', task: 't' }, 'a')).resolves.not.toThrow(); - await expect(traj.reflect('test', 0.5)).resolves.not.toThrow(); - await expect(traj.decide('q', 'c', 'r')).resolves.not.toThrow(); - await expect(traj.complete('done', 0.9)).resolves.not.toThrow(); - }); - - it('should save once when completing', async () => { - const traj = new WorkflowTrajectory({}, 'run-1', tmpDir); - await traj.start('wf', 1); - - const save = vi.fn().mockResolvedValue(undefined); - (traj as any).storage = { initialize: vi.fn().mockResolvedValue(undefined), save }; - (traj as any).storageInit = Promise.resolve(); - - await traj.complete('done', 0.9); - expect(save).toHaveBeenCalledTimes(1); - }); - - it('should save once when abandoning', async () => { - const traj = new WorkflowTrajectory({}, 'run-1', tmpDir); - await traj.start('wf', 1); - - const save = vi.fn().mockResolvedValue(undefined); - (traj as any).storage = { initialize: vi.fn().mockResolvedValue(undefined), save }; - (traj as any).storageInit = Promise.resolve(); - - await traj.abandon('nope'); - expect(save).toHaveBeenCalledTimes(1); - }); - }); -}); diff --git a/packages/sdk/src/__tests__/yaml-validation.test.ts b/packages/sdk/src/__tests__/yaml-validation.test.ts deleted file mode 100644 index 6fecd8ccc..000000000 --- a/packages/sdk/src/__tests__/yaml-validation.test.ts +++ /dev/null @@ -1,916 +0,0 @@ -/** - * YAML Workflow Template Validation Tests - * - * Tests that all built-in workflow templates are valid, parse correctly, - * and have correct structure. Also tests error handling for invalid YAML. - * - * These tests are CI-friendly (no CLI or API keys needed). - */ -import { describe, it, expect, beforeEach } from 'vitest'; -import { promises as fs } from 'node:fs'; -import path from 'node:path'; -import { fileURLToPath } from 'node:url'; -import { parse as parseYaml } from 'yaml'; -import { TemplateRegistry, BUILT_IN_TEMPLATE_NAMES } from '../workflows/templates.js'; -import { SwarmCoordinator } from '../workflows/coordinator.js'; -import type { - RelayYamlConfig, - SwarmPattern, - WorkflowStep, - CustomStepDefinition, -} from '../workflows/types.js'; -import { isDeterministicStep, isWorktreeStep, isAgentStep, isCustomStep } from '../workflows/types.js'; -import { - resolveCustomStep, - resolveAllCustomSteps, - validateCustomStepsUsage, - CustomStepsParseError, - CustomStepResolutionError, -} from '../workflows/custom-steps.js'; - -const __dirname = path.dirname(fileURLToPath(import.meta.url)); -const TEMPLATES_DIR = path.resolve(__dirname, '../workflows/builtin-templates'); - -// Mock DB for coordinator tests -const mockDb = { - query: async () => ({ rows: [] }), -}; - -describe('YAML Template Validation', () => { - let registry: TemplateRegistry; - let coordinator: SwarmCoordinator; - - beforeEach(() => { - registry = new TemplateRegistry({ builtInTemplatesDir: TEMPLATES_DIR }); - coordinator = new SwarmCoordinator(mockDb as any); - }); - - // ── Built-in Template Registration ───────────────────────────────────────── - - describe('Built-in Template Registration', () => { - it('should have all expected built-in templates registered', () => { - const templates = registry.listBuiltInTemplates(); - expect(templates).toContain('feature-dev'); - expect(templates).toContain('bug-fix'); - expect(templates).toContain('code-review'); - expect(templates).toContain('security-audit'); - expect(templates).toContain('refactor'); - expect(templates).toContain('documentation'); - expect(templates).toContain('review-loop'); - }); - - it('should have correct number of built-in templates', () => { - const templates = registry.listBuiltInTemplates(); - expect(templates.length).toBeGreaterThanOrEqual(7); - }); - }); - - // ── Individual Template Validation ───────────────────────────────────────── - - describe('Template Loading and Validation', () => { - for (const templateName of BUILT_IN_TEMPLATE_NAMES) { - describe(`${templateName} template`, () => { - it('should load successfully', async () => { - const config = await registry.loadTemplate(templateName); - expect(config).toBeDefined(); - expect(config.name).toBe(templateName); - }); - - it('should have required fields', async () => { - const config = await registry.loadTemplate(templateName); - expect(config.version).toBeDefined(); - expect(config.name).toBeDefined(); - expect(config.swarm).toBeDefined(); - expect(config.swarm.pattern).toBeDefined(); - expect(config.agents).toBeDefined(); - expect(config.agents.length).toBeGreaterThan(0); - }); - - it('should have valid swarm pattern', async () => { - const config = await registry.loadTemplate(templateName); - const validPatterns: SwarmPattern[] = [ - 'fan-out', - 'pipeline', - 'hub-spoke', - 'consensus', - 'mesh', - 'handoff', - 'cascade', - 'dag', - 'debate', - 'hierarchical', - 'map-reduce', - 'scatter-gather', - 'supervisor', - 'reflection', - 'red-team', - 'verifier', - 'auction', - 'escalation', - 'saga', - 'circuit-breaker', - 'blackboard', - 'swarm', - 'competitive', - 'review-loop', - ]; - expect(validPatterns).toContain(config.swarm.pattern); - }); - - it('should have valid agent definitions', async () => { - const config = await registry.loadTemplate(templateName); - for (const agent of config.agents) { - expect(agent.name).toBeDefined(); - expect(typeof agent.name).toBe('string'); - expect(agent.cli).toBeDefined(); - expect(['claude', 'codex', 'gemini', 'aider', 'goose', 'opencode', 'droid']).toContain(agent.cli); - } - }); - - it('should have unique agent names', async () => { - const config = await registry.loadTemplate(templateName); - const names = config.agents.map((a) => a.name); - const uniqueNames = new Set(names); - expect(uniqueNames.size).toBe(names.length); - }); - - it('should resolve topology without error', async () => { - const config = await registry.loadTemplate(templateName); - const topology = coordinator.resolveTopology(config); - expect(topology).toBeDefined(); - expect(topology.pattern).toBe(config.swarm.pattern); - expect(topology.agents).toEqual(config.agents); - expect(topology.edges).toBeInstanceOf(Map); - }); - }); - } - }); - - // ── Workflow Steps Validation ────────────────────────────────────────────── - - describe('Workflow Steps Validation', () => { - for (const templateName of BUILT_IN_TEMPLATE_NAMES) { - it(`${templateName}: workflow steps should be valid`, async () => { - const config = await registry.loadTemplate(templateName); - - if (config.workflows && config.workflows.length > 0) { - for (const workflow of config.workflows) { - expect(workflow.name).toBeDefined(); - expect(workflow.steps).toBeDefined(); - expect(Array.isArray(workflow.steps)).toBe(true); - - for (const step of workflow.steps) { - expect(step.name).toBeDefined(); - expect(typeof step.name).toBe('string'); - - // Agent steps require agent and task - if (step.type !== 'deterministic' && step.type !== 'worktree') { - expect(step.agent).toBeDefined(); - expect(step.task).toBeDefined(); - } - - // Deterministic steps require command - if (step.type === 'deterministic') { - expect(step.command).toBeDefined(); - } - - // Worktree steps require branch - if (step.type === 'worktree') { - expect(step.branch).toBeDefined(); - } - - // Check dependsOn is array if present - if (step.dependsOn) { - expect(Array.isArray(step.dependsOn)).toBe(true); - } - } - } - } - }); - - it(`${templateName}: step dependencies should reference existing steps`, async () => { - const config = await registry.loadTemplate(templateName); - - if (config.workflows && config.workflows.length > 0) { - for (const workflow of config.workflows) { - const stepNames = new Set(workflow.steps.map((s) => s.name)); - - for (const step of workflow.steps) { - if (step.dependsOn) { - for (const dep of step.dependsOn) { - expect(stepNames.has(dep)).toBe(true); - } - } - } - } - } - }); - - it(`${templateName}: step agents should reference existing agents`, async () => { - const config = await registry.loadTemplate(templateName); - const agentNames = new Set(config.agents.map((a) => a.name)); - - if (config.workflows && config.workflows.length > 0) { - for (const workflow of config.workflows) { - for (const step of workflow.steps) { - if (step.agent) { - expect(agentNames.has(step.agent)).toBe(true); - } - } - } - } - }); - } - }); - - // ── review-loop Template Specific Tests ──────────────────────────────────── - - describe('review-loop Template Specifics', () => { - it('should have implementer agent', async () => { - const config = await registry.loadTemplate('review-loop'); - const implementer = config.agents.find((a) => a.name.includes('implementer')); - expect(implementer).toBeDefined(); - }); - - it('should have multiple reviewer agents', async () => { - const config = await registry.loadTemplate('review-loop'); - const reviewers = config.agents.filter((a) => a.name.includes('reviewer')); - expect(reviewers.length).toBeGreaterThanOrEqual(2); - }); - - it('should have non-interactive reviewers', async () => { - const config = await registry.loadTemplate('review-loop'); - const reviewers = config.agents.filter((a) => a.name.includes('reviewer')); - for (const reviewer of reviewers) { - expect(reviewer.interactive).toBe(false); - } - }); - - it('should have deterministic git diff step', async () => { - const config = await registry.loadTemplate('review-loop'); - if (config.workflows && config.workflows.length > 0) { - const workflow = config.workflows[0]; - const diffStep = workflow.steps.find((s) => s.name === 'capture-diff'); - expect(diffStep).toBeDefined(); - expect(diffStep?.type).toBe('deterministic'); - expect(diffStep?.command).toContain('git diff'); - } - }); - - it('should have review steps depending on implement step', async () => { - const config = await registry.loadTemplate('review-loop'); - if (config.workflows && config.workflows.length > 0) { - const workflow = config.workflows[0]; - const reviewSteps = workflow.steps.filter((s) => s.name.startsWith('review-')); - expect(reviewSteps.length).toBeGreaterThan(0); - } - }); - - it('should have consolidate step depending on all reviews', async () => { - const config = await registry.loadTemplate('review-loop'); - if (config.workflows && config.workflows.length > 0) { - const workflow = config.workflows[0]; - const consolidateStep = workflow.steps.find((s) => s.name === 'consolidate'); - expect(consolidateStep).toBeDefined(); - expect(consolidateStep?.dependsOn).toBeDefined(); - expect(consolidateStep?.dependsOn?.length).toBeGreaterThanOrEqual(3); - } - }); - - it('should have address-feedback step', async () => { - const config = await registry.loadTemplate('review-loop'); - if (config.workflows && config.workflows.length > 0) { - const workflow = config.workflows[0]; - const addressStep = workflow.steps.find((s) => s.name === 'address-feedback'); - expect(addressStep).toBeDefined(); - expect(addressStep?.dependsOn).toContain('consolidate'); - } - }); - - it('should have coordination barriers', async () => { - const config = await registry.loadTemplate('review-loop'); - expect(config.coordination).toBeDefined(); - expect(config.coordination?.barriers).toBeDefined(); - expect(config.coordination?.barriers?.length).toBeGreaterThan(0); - }); - }); - - // ── Error Handling Tests ─────────────────────────────────────────────────── - - describe('Error Handling', () => { - it('should reject template with missing version', () => { - const invalidYaml = ` -name: test -swarm: - pattern: fan-out -agents: - - name: test - cli: claude -`; - const parsed = parseYaml(invalidYaml); - expect(() => (registry as any).validateRelayConfig(parsed, 'test')).toThrow(/version/); - }); - - it('should reject template with missing name', () => { - const invalidYaml = ` -version: "1.0" -swarm: - pattern: fan-out -agents: - - name: test - cli: claude -`; - const parsed = parseYaml(invalidYaml); - expect(() => (registry as any).validateRelayConfig(parsed, 'test')).toThrow(/name/); - }); - - it('should reject template with empty agents', () => { - const invalidYaml = ` -version: "1.0" -name: test -swarm: - pattern: fan-out -agents: [] -`; - const parsed = parseYaml(invalidYaml); - expect(() => (registry as any).validateRelayConfig(parsed, 'test')).toThrow(/agents/); - }); - - it('should reject template with invalid agent definition', () => { - const invalidYaml = ` -version: "1.0" -name: test -swarm: - pattern: fan-out -agents: - - name: test -`; - const parsed = parseYaml(invalidYaml); - expect(() => (registry as any).validateRelayConfig(parsed, 'test')).toThrow(/invalid agent/i); - }); - - it('should reject non-existent template', async () => { - await expect(registry.loadTemplate('non-existent-template')).rejects.toThrow(/not found/i); - }); - }); - - // ── Template Override Tests ──────────────────────────────────────────────── - - describe('Template Overrides', () => { - it('should apply simple override', async () => { - const config = await registry.loadTemplate('feature-dev', { - overrides: { description: 'Custom description' }, - }); - expect(config.description).toBe('Custom description'); - }); - - it('should apply nested override', async () => { - const config = await registry.loadTemplate('feature-dev', { - overrides: { 'swarm.maxConcurrency': 10 }, - }); - expect(config.swarm.maxConcurrency).toBe(10); - }); - - it('should apply agent override by index', async () => { - const config = await registry.loadTemplate('feature-dev', { - overrides: { 'agents[0].constraints.model': 'claude-opus' }, - }); - expect(config.agents[0].constraints?.model).toBe('claude-opus'); - }); - }); - - // ── DAG Validation Tests ─────────────────────────────────────────────────── - - describe('DAG Dependency Validation', () => { - for (const templateName of BUILT_IN_TEMPLATE_NAMES) { - it(`${templateName}: should not have circular dependencies`, async () => { - const config = await registry.loadTemplate(templateName); - - if (config.workflows && config.workflows.length > 0) { - for (const workflow of config.workflows) { - const deps = new Map(); - for (const step of workflow.steps) { - deps.set(step.name, step.dependsOn ?? []); - } - - // Check for cycles using DFS - const visited = new Set(); - const recursionStack = new Set(); - - const hasCycle = (node: string): boolean => { - if (recursionStack.has(node)) return true; - if (visited.has(node)) return false; - - visited.add(node); - recursionStack.add(node); - - for (const dep of deps.get(node) ?? []) { - if (hasCycle(dep)) return true; - } - - recursionStack.delete(node); - return false; - }; - - for (const step of workflow.steps) { - expect(hasCycle(step.name)).toBe(false); - } - } - } - }); - } - }); - - // ── Verification Check Tests ─────────────────────────────────────────────── - - describe('Verification Check Validation', () => { - for (const templateName of BUILT_IN_TEMPLATE_NAMES) { - it(`${templateName}: verification checks should be valid`, async () => { - const config = await registry.loadTemplate(templateName); - - if (config.workflows && config.workflows.length > 0) { - for (const workflow of config.workflows) { - for (const step of workflow.steps) { - if (step.verification) { - expect(['output_contains', 'exit_code', 'file_exists', 'custom']).toContain( - step.verification.type - ); - expect(step.verification.value).toBeDefined(); - } - } - } - } - }); - } - }); - - // ── Variable Substitution Tests ──────────────────────────────────────────── - - describe('Variable Substitution Patterns', () => { - for (const templateName of BUILT_IN_TEMPLATE_NAMES) { - it(`${templateName}: variable references should be valid`, async () => { - const config = await registry.loadTemplate(templateName); - - if (config.workflows && config.workflows.length > 0) { - for (const workflow of config.workflows) { - const stepNames = new Set(workflow.steps.map((s) => s.name)); - - for (const step of workflow.steps) { - if (step.task) { - // Check for {{steps.X.output}} references - const stepRefs = step.task.match(/\{\{steps\.([^.]+)\.output\}\}/g) ?? []; - for (const ref of stepRefs) { - const match = ref.match(/\{\{steps\.([^.]+)\.output\}\}/); - if (match) { - const referencedStep = match[1]; - // The referenced step should exist - expect(stepNames.has(referencedStep)).toBe(true); - } - } - } - } - } - } - }); - } - }); - - // ── Error Handling Configuration ─────────────────────────────────────────── - - describe('Error Handling Configuration', () => { - for (const templateName of BUILT_IN_TEMPLATE_NAMES) { - it(`${templateName}: error handling should be valid if present`, async () => { - const config = await registry.loadTemplate(templateName); - - if (config.errorHandling) { - expect(['fail-fast', 'continue', 'retry']).toContain(config.errorHandling.strategy); - - if (config.errorHandling.maxRetries !== undefined) { - expect(config.errorHandling.maxRetries).toBeGreaterThanOrEqual(0); - } - - if (config.errorHandling.retryDelayMs !== undefined) { - expect(config.errorHandling.retryDelayMs).toBeGreaterThanOrEqual(0); - } - } - }); - } - }); -}); - -// ── Step Type Guard Tests ─────────────────────────────────────────────────── - -describe('Step Type Guards', () => { - it('should identify deterministic steps', () => { - const step: WorkflowStep = { name: 'test', type: 'deterministic', command: 'echo hello' }; - expect(isDeterministicStep(step)).toBe(true); - expect(isWorktreeStep(step)).toBe(false); - expect(isAgentStep(step)).toBe(false); - }); - - it('should identify worktree steps', () => { - const step: WorkflowStep = { name: 'test', type: 'worktree', branch: 'feature/test' }; - expect(isDeterministicStep(step)).toBe(false); - expect(isWorktreeStep(step)).toBe(true); - expect(isAgentStep(step)).toBe(false); - }); - - it('should identify agent steps (explicit type)', () => { - const step: WorkflowStep = { name: 'test', type: 'agent', agent: 'dev', task: 'Do work' }; - expect(isDeterministicStep(step)).toBe(false); - expect(isWorktreeStep(step)).toBe(false); - expect(isAgentStep(step)).toBe(true); - }); - - it('should identify agent steps (implicit type)', () => { - const step: WorkflowStep = { name: 'test', agent: 'dev', task: 'Do work' }; - expect(isDeterministicStep(step)).toBe(false); - expect(isWorktreeStep(step)).toBe(false); - expect(isAgentStep(step)).toBe(true); - }); - - it('should identify custom steps', () => { - const step: WorkflowStep = { name: 'test', use: 'docker-build' }; - expect(isCustomStep(step)).toBe(true); - }); - - it('should not identify non-custom steps as custom', () => { - const agentStep: WorkflowStep = { name: 'test', agent: 'dev', task: 'Do work' }; - const deterministicStep: WorkflowStep = { name: 'test', type: 'deterministic', command: 'echo hello' }; - expect(isCustomStep(agentStep)).toBe(false); - expect(isCustomStep(deterministicStep)).toBe(false); - }); -}); - -// ── Custom Step Resolution Tests ──────────────────────────────────────────── - -describe('Custom Step Resolution', () => { - const customSteps = new Map([ - [ - 'docker-build', - { - params: [ - { name: 'image', required: true }, - { name: 'dockerfile', default: 'Dockerfile' }, - ], - command: 'docker build -t {{image}} -f {{dockerfile}} .', - captureOutput: true, - }, - ], - [ - 'setup-worktree', - { - type: 'worktree', - params: [{ name: 'branch', required: true }], - branch: '{{branch}}', - baseBranch: 'main', - createBranch: true, - }, - ], - ]); - - it('should resolve custom step with required param', () => { - const step = { name: 'build', use: 'docker-build', image: 'myapp:latest' } as WorkflowStep; - const resolved = resolveCustomStep(step, customSteps); - - expect(resolved.type).toBe('deterministic'); - expect(resolved.command).toBe('docker build -t myapp:latest -f Dockerfile .'); - expect(resolved.captureOutput).toBe(true); - }); - - it('should resolve custom step with all params', () => { - const step = { - name: 'build', - use: 'docker-build', - image: 'myapp:v2', - dockerfile: 'Dockerfile.prod', - } as WorkflowStep; - const resolved = resolveCustomStep(step, customSteps); - - expect(resolved.command).toBe('docker build -t myapp:v2 -f Dockerfile.prod .'); - }); - - it('should throw on missing required param', () => { - const step = { name: 'build', use: 'docker-build' } as WorkflowStep; - - expect(() => resolveCustomStep(step, customSteps)).toThrow(/Missing required parameter/); - }); - - it('should throw on unknown custom step', () => { - const step = { name: 'build', use: 'unknown-step' } as WorkflowStep; - - expect(() => resolveCustomStep(step, customSteps)).toThrow(/Custom step "unknown-step" not found/); - }); - - it('should resolve worktree custom step', () => { - const step = { name: 'setup', use: 'setup-worktree', branch: 'feature/test' } as WorkflowStep; - const resolved = resolveCustomStep(step, customSteps); - - expect(resolved.type).toBe('worktree'); - expect(resolved.branch).toBe('feature/test'); - expect(resolved.baseBranch).toBe('main'); - expect(resolved.createBranch).toBe(true); - }); - - it('should preserve step name and dependsOn', () => { - const step = { - name: 'my-build', - use: 'docker-build', - image: 'app:latest', - dependsOn: ['setup'], - } as WorkflowStep; - const resolved = resolveCustomStep(step, customSteps); - - expect(resolved.name).toBe('my-build'); - expect(resolved.dependsOn).toEqual(['setup']); - }); - - it('should pass through non-custom steps unchanged', () => { - const step: WorkflowStep = { name: 'test', type: 'deterministic', command: 'echo hello' }; - const resolved = resolveCustomStep(step, customSteps); - - expect(resolved).toBe(step); - }); - - it('should resolve all custom steps in array', () => { - const steps: WorkflowStep[] = [ - { name: 'build1', use: 'docker-build', image: 'app1:latest' } as WorkflowStep, - { name: 'regular', type: 'deterministic', command: 'echo done' }, - { name: 'build2', use: 'docker-build', image: 'app2:latest' } as WorkflowStep, - ]; - - const resolved = resolveAllCustomSteps(steps, customSteps); - - expect(resolved).toHaveLength(3); - expect(resolved[0].command).toBe('docker build -t app1:latest -f Dockerfile .'); - expect(resolved[1].command).toBe('echo done'); - expect(resolved[2].command).toBe('docker build -t app2:latest -f Dockerfile .'); - }); -}); - -// ── Custom Step Validation Tests ──────────────────────────────────────────── - -describe('Custom Step Validation', () => { - const customSteps = new Map([ - [ - 'docker-build', - { - params: [ - { name: 'image', required: true }, - { name: 'dockerfile', default: 'Dockerfile' }, - ], - command: 'docker build -t {{image}} -f {{dockerfile}} .', - }, - ], - [ - 'deploy', - { - params: [{ name: 'env', required: true }], - command: 'deploy --env={{env}}', - }, - ], - ]); - - describe('validateCustomStepsUsage', () => { - it('should pass validation for correctly configured steps', () => { - const steps: WorkflowStep[] = [ - { name: 'build', use: 'docker-build', image: 'myapp:latest' } as WorkflowStep, - ]; - - const result = validateCustomStepsUsage(steps, customSteps); - - expect(result.valid).toBe(true); - expect(result.errors).toHaveLength(0); - }); - - it('should report missing custom step definition', () => { - const steps: WorkflowStep[] = [{ name: 'build', use: 'unknown-step' } as WorkflowStep]; - - const result = validateCustomStepsUsage(steps, customSteps); - - expect(result.valid).toBe(false); - expect(result.errors).toHaveLength(1); - expect(result.errors[0]).toContain('unknown-step'); - expect(result.missingSteps).toContain('unknown-step'); - }); - - it('should report missing required parameters', () => { - const steps: WorkflowStep[] = [ - { name: 'build', use: 'docker-build' } as WorkflowStep, // missing 'image' - ]; - - const result = validateCustomStepsUsage(steps, customSteps); - - expect(result.valid).toBe(false); - expect(result.errors).toHaveLength(1); - expect(result.errors[0]).toContain('image'); - expect(result.missingParams).toHaveLength(1); - expect(result.missingParams[0]).toEqual({ - step: 'build', - use: 'docker-build', - param: 'image', - }); - }); - - it('should warn about undefined variables in command', () => { - const customStepsWithUndefinedVar = new Map([ - [ - 'bad-step', - { - params: [{ name: 'known' }], - command: 'run {{known}} {{unknown}}', - }, - ], - ]); - - const steps: WorkflowStep[] = [{ name: 'test', use: 'bad-step', known: 'value' } as WorkflowStep]; - - const result = validateCustomStepsUsage(steps, customStepsWithUndefinedVar); - - expect(result.valid).toBe(true); // warnings don't fail validation - expect(result.warnings).toHaveLength(1); - expect(result.warnings[0]).toContain('unknown'); - expect(result.unresolvedVariables).toHaveLength(1); - }); - - it('should warn about extra parameters not in definition', () => { - const steps: WorkflowStep[] = [ - { name: 'build', use: 'docker-build', image: 'app', extraParam: 'ignored' } as WorkflowStep, - ]; - - const result = validateCustomStepsUsage(steps, customSteps); - - expect(result.valid).toBe(true); - expect(result.warnings.some((w) => w.includes('extraParam'))).toBe(true); - }); - - it('should validate multiple steps with different issues', () => { - const steps: WorkflowStep[] = [ - { name: 'build1', use: 'docker-build' } as WorkflowStep, // missing image - { name: 'build2', use: 'missing-step' } as WorkflowStep, // unknown step - { name: 'deploy', use: 'deploy', env: 'prod' } as WorkflowStep, // valid - ]; - - const result = validateCustomStepsUsage(steps, customSteps); - - expect(result.valid).toBe(false); - expect(result.errors).toHaveLength(2); - expect(result.missingSteps).toContain('missing-step'); - expect(result.missingParams).toHaveLength(1); - }); - - it('should skip validation for non-custom steps', () => { - const steps: WorkflowStep[] = [ - { name: 'agent-step', agent: 'dev', task: 'Do work' }, - { name: 'det-step', type: 'deterministic', command: 'echo hello' }, - ]; - - const result = validateCustomStepsUsage(steps, customSteps); - - expect(result.valid).toBe(true); - expect(result.errors).toHaveLength(0); - expect(result.warnings).toHaveLength(0); - }); - }); -}); - -// ── Custom Step Error Classes Tests ───────────────────────────────────────── - -describe('Custom Step Error Classes', () => { - it('CustomStepsParseError should have helpful message', () => { - const error = new CustomStepsParseError( - 'Missing "steps" key', - 'Add a "steps" object', - '/path/to/steps.yaml' - ); - - expect(error.name).toBe('CustomStepsParseError'); - expect(error.issue).toBe('Missing "steps" key'); - expect(error.suggestion).toBe('Add a "steps" object'); - expect(error.filePath).toBe('/path/to/steps.yaml'); - expect(error.message).toContain('Missing "steps" key'); - expect(error.message).toContain('Add a "steps" object'); - }); - - it('CustomStepResolutionError should have helpful message', () => { - const error = new CustomStepResolutionError( - 'my-step', - 'Custom step "docker-build" not found', - 'Add it to .relay/steps.yaml' - ); - - expect(error.name).toBe('CustomStepResolutionError'); - expect(error.stepName).toBe('my-step'); - expect(error.issue).toBe('Custom step "docker-build" not found'); - expect(error.suggestion).toBe('Add it to .relay/steps.yaml'); - expect(error.message).toContain('my-step'); - }); -}); - -// ── Worktree Step Tests ───────────────────────────────────────────────────── - -describe('Worktree Step Validation', () => { - it('should accept valid worktree step', () => { - const validYaml = ` -version: "1.0" -name: test-worktree -swarm: - pattern: dag -agents: - - name: developer - cli: claude -workflows: - - name: default - steps: - - name: setup-worktree - type: worktree - branch: feature/test - - name: develop - agent: developer - task: "Work in worktree" - dependsOn: [setup-worktree] -`; - const parsed = parseYaml(validYaml); - expect(parsed.workflows[0].steps[0].type).toBe('worktree'); - expect(parsed.workflows[0].steps[0].branch).toBe('feature/test'); - }); - - it('should accept worktree step with all options', () => { - const validYaml = ` -version: "1.0" -name: test-worktree -swarm: - pattern: dag -agents: - - name: developer - cli: claude -workflows: - - name: default - steps: - - name: setup-worktree - type: worktree - branch: feature/test - baseBranch: main - path: .worktrees/dev - createBranch: true - timeoutMs: 30000 -`; - const parsed = parseYaml(validYaml); - const step = parsed.workflows[0].steps[0]; - expect(step.type).toBe('worktree'); - expect(step.branch).toBe('feature/test'); - expect(step.baseBranch).toBe('main'); - expect(step.path).toBe('.worktrees/dev'); - expect(step.createBranch).toBe(true); - expect(step.timeoutMs).toBe(30000); - }); - - it('should support variable interpolation in worktree branch', () => { - const validYaml = ` -version: "1.0" -name: test-worktree -swarm: - pattern: dag -agents: - - name: developer - cli: claude -workflows: - - name: default - steps: - - name: setup-worktree - type: worktree - branch: "feature/{{branch-name}}" -`; - const parsed = parseYaml(validYaml); - expect(parsed.workflows[0].steps[0].branch).toBe('feature/{{branch-name}}'); - }); -}); - -// ── Pattern Selection Tests ────────────────────────────────────────────────── - -describe('Pattern Selection for Templates', () => { - let registry: TemplateRegistry; - let coordinator: SwarmCoordinator; - - beforeEach(() => { - registry = new TemplateRegistry({ builtInTemplatesDir: TEMPLATES_DIR }); - coordinator = new SwarmCoordinator(mockDb as any); - }); - - it('review-loop should select review-loop pattern', async () => { - const config = await registry.loadTemplate('review-loop'); - const pattern = coordinator.selectPattern(config); - expect(pattern).toBe('review-loop'); - }); - - for (const templateName of BUILT_IN_TEMPLATE_NAMES) { - it(`${templateName}: selected pattern should match declared pattern`, async () => { - const config = await registry.loadTemplate(templateName); - // If pattern is explicit, selection should return it - if (config.swarm.pattern) { - const selected = coordinator.selectPattern(config); - expect(selected).toBe(config.swarm.pattern); - } - }); - } -}); diff --git a/packages/sdk/src/cli-registry.ts b/packages/sdk/src/cli-registry.ts index ec039e26e..4468589e1 100644 --- a/packages/sdk/src/cli-registry.ts +++ b/packages/sdk/src/cli-registry.ts @@ -11,7 +11,7 @@ * in `resolve_command_path()` at crates/broker/src/pty.rs. */ -import type { AgentCli } from './workflows/types.js'; +import type { AgentCli } from '@agent-relay/cloud'; // ── Types ────────────────────────────────────────────────────────────────── diff --git a/packages/sdk/src/cli-resolver.ts b/packages/sdk/src/cli-resolver.ts index 7e5d5dcb2..a4624bb72 100644 --- a/packages/sdk/src/cli-resolver.ts +++ b/packages/sdk/src/cli-resolver.ts @@ -11,7 +11,7 @@ import { accessSync, constants as constantsSync } from 'node:fs'; import { join } from 'node:path'; import { homedir } from 'node:os'; import { promisify } from 'node:util'; -import type { AgentCli } from './workflows/types.js'; +import type { AgentCli } from '@agent-relay/cloud'; import { getCliDefinition, COMMON_SEARCH_PATHS } from './cli-registry.js'; const execFileAsync = promisify(execFile); diff --git a/packages/sdk/src/examples/workflow-superiority.ts b/packages/sdk/src/examples/workflow-superiority.ts deleted file mode 100644 index ef1e2caf3..000000000 --- a/packages/sdk/src/examples/workflow-superiority.ts +++ /dev/null @@ -1,1485 +0,0 @@ -/** - * Workflow Superiority — Multi-Agent Implementation Campaign - * - * A fully specified DAG workflow that orchestrates Claude (lead + reviewer) - * and Codex workers across five implementation tiers to make the relay - * broker-sdk workflow system decisively superior to Agno and Swarms AI. - * - * Architecture: - * - Claude lead: orchestrates each phase, approves trajectories, makes - * final architectural decisions - * - Codex workers: implement code changes, one per specialization domain - * - Claude code-reviewer: independent review after every implementation phase - * (separate from lead to avoid confirmation bias) - * - * DAG phases: - * Phase 0 → Codebase analysis + spec approval - * Phase 1 → Type system extension (condition/loop/router/hitl/sub-workflow) - * Phase 2 → Execution engine (runner handles new primitives + session) - * Phase 3 → Meta-orchestration (parallel with Phase 4) - * Phase 4 → Storage backends (parallel with Phase 3) - * Phase 5 → Deployment & observability - * Phase 6 → Integration validation + final lead sign-off - * - * Run: - * npx tsx src/examples/workflow-superiority.ts - * - * Environment: - * RELAY_API_KEY — optional. If absent the runner auto-provisions a - * fresh Relaycast workspace for each run (fully isolated, no caching). - */ - -import { workflow } from '../workflows/builder.js'; -import type { WorkflowEvent } from '../workflows/runner.js'; - -// ── Spec constants ──────────────────────────────────────────────────────────── - -const WORKFLOW_ROOT = 'packages/sdk/src/workflows'; -const TYPES_FILE = `${WORKFLOW_ROOT}/types.ts`; -const RUNNER_FILE = `${WORKFLOW_ROOT}/runner.ts`; -const BUILDER_FILE = `${WORKFLOW_ROOT}/builder.ts`; -const SCHEMA_FILE = `${WORKFLOW_ROOT}/schema.json`; -const INDEX_FILE = `${WORKFLOW_ROOT}/index.ts`; -const MEMORY_DB = `${WORKFLOW_ROOT}/memory-db.ts`; -const COORDINATOR = `${WORKFLOW_ROOT}/coordinator.ts`; -const BARRIER_FILE = `${WORKFLOW_ROOT}/barrier.ts`; -const TEMPLATES = `${WORKFLOW_ROOT}/templates.ts`; - -// NOTE: No withExit() wrapper needed — the WorkflowRunner automatically -// appends self-termination instructions in spawnAndWait() with the agent's -// actual runtime name. Adding a second exit instruction wastes tokens. - -// ── Event handler ───────────────────────────────────────────────────────────── - -const onEvent = (event: WorkflowEvent): void => { - const ts = new Date().toISOString(); - switch (event.type) { - case 'run:started': - console.log(`[${ts}] 🚀 run started runId=${event.runId}`); - break; - case 'run:completed': - console.log(`[${ts}] ✅ run complete runId=${event.runId}`); - break; - case 'run:failed': - console.error(`[${ts}] ❌ run failed runId=${event.runId} error=${event.error}`); - break; - case 'step:started': - console.log(`[${ts}] → ${event.stepName}`); - break; - case 'step:completed': - console.log(`[${ts}] ✓ ${event.stepName}`); - break; - case 'step:failed': - console.error(`[${ts}] ✗ ${event.stepName}: ${event.error}`); - break; - case 'step:skipped': - console.log(`[${ts}] ⊘ ${event.stepName} (skipped)`); - break; - case 'step:retrying': - console.log(`[${ts}] ↺ ${event.stepName} attempt=${event.attempt}`); - break; - } -}; - -// ── Workflow definition ─────────────────────────────────────────────────────── - -const result = await workflow('broker-sdk-superiority') - .description( - 'Five-phase multi-agent campaign to make relay broker-sdk workflow system ' + - 'decisively superior to Agno and Swarms AI. Claude leads; Codex implements; ' + - 'Claude reviews after every phase.' - ) - .pattern('dag') - .channel('wf-broker-sdk-superiority') - .maxConcurrency(3) - .timeout(28_800_000) // 8 hours — this is a large implementation campaign - - // ── Agents ──────────────────────────────────────────────────────────────── - - .agent('lead', { - cli: 'claude', - role: - 'Lead architect. Sets direction, reviews each phase output, approves ' + - 'trajectories, and resolves architectural conflicts. Has final say on ' + - 'all design decisions.', - retries: 2, - }) - - .agent('code-reviewer', { - cli: 'claude', - role: - 'Independent code reviewer. Reviews implementation quality, correctness, ' + - 'TypeScript type safety, test coverage, and integration coherence after ' + - 'every phase. Catches issues the lead may have missed.', - retries: 2, - }) - - .agent('spec-analyst', { - cli: 'codex', - role: - 'Codebase analyst. Reads the existing workflow source files and produces ' + - 'a precise, file-by-file implementation plan for all five improvement tiers.', - retries: 2, - }) - - .agent('schema-implementer', { - cli: 'codex', - role: - 'Type system specialist. Extends TypeScript interfaces and JSON Schema ' + - 'definitions to support new workflow primitives.', - retries: 2, - }) - - .agent('engine-implementer', { - cli: 'codex', - role: - 'Execution engine specialist. Implements new step-type execution logic ' + - 'inside WorkflowRunner, adds session concept, and expands the event system.', - retries: 3, - }) - - .agent('meta-implementer', { - cli: 'codex', - role: - 'Meta-orchestration specialist. Implements sub-workflow composition, ' + - 'AutoWorkflowBuilder, and semantic pattern selection.', - retries: 2, - }) - - .agent('storage-implementer', { - cli: 'codex', - role: - 'Storage backend specialist. Implements PostgresWorkflowDb, ' + - 'SqliteWorkflowDb, and RedisWorkflowDb adapters.', - retries: 2, - }) - - .agent('deploy-implementer', { - cli: 'codex', - role: - 'Deployment and observability specialist. Implements relay workflow serve ' + - 'HTTP server, OTel tracing integration, and CLI improvements.', - retries: 2, - }) - - .agent('test-validator', { - cli: 'codex', - role: - 'Integration test specialist. Validates all phases compile, tests pass, ' + 'and exports are correct.', - retries: 2, - }) - - // ── Phase 0: Codebase Analysis ──────────────────────────────────────────── - - .step('codebase-analysis', { - agent: 'spec-analyst', - task: ` -You are the first step in a large improvement campaign for the relay broker-sdk -workflow system. Your job is to read the existing source files and produce a -concrete, file-by-file implementation plan. - -READ THESE FILES THOROUGHLY: - - ${TYPES_FILE} - - ${RUNNER_FILE} - - ${BUILDER_FILE} - - ${SCHEMA_FILE} - - ${INDEX_FILE} - - ${MEMORY_DB} - - ${COORDINATOR} - - ${BARRIER_FILE} - - ${TEMPLATES} - -PRODUCE A DETAILED PLAN covering the following improvements (in priority order): - -TIER 1 — Schema Primitives (new step types in types.ts + schema.json): - a) condition step: WorkflowStep gains optional "condition" string (CEL expr) - and optional "type" discriminant. When condition evaluates false → skip. - b) loop step: new LoopStepGroup interface { type:"loop", name, steps[], - until?: string (CEL expr on step outputs), maxIterations: number } - c) router step: new RouterStepGroup interface { type:"router", name, - selector: string (CEL expr), routes: RouterRoute[], default?: string[] } - RouterRoute: { match: string, steps: string[] } - d) parallel step group: new ParallelStepGroup interface { type:"parallel", - name, barrier?: "all"|"any"|"majority", timeout?: number, steps[] } - e) hitl step: new HitlStep interface { type:"hitl", name, message: string, - channel?: string, timeout?: number, onTimeout?: "skip"|"fail" } - f) flow shorthand: RelayYamlConfig gains optional "flow?: string" field, - parsed as "A -> B, C -> D" notation generating dependsOn edges - g) sub-workflow step: new SubWorkflowStep interface { type:"sub-workflow", - name, workflow: string (path or registry name), vars?: Record } - h) Promote retries + timeoutMs to top-level WorkflowStep fields (they already - exist — verify they are exported and used correctly in runner.ts) - -TIER 2 — Execution Engine (runner.ts): - a) Condition evaluation: before executing a step, if step.condition is set, - evaluate the CEL expression; if false, mark step skipped - b) Loop execution: LoopStepGroup runs its steps[].steps repeatedly until - until-condition is true or maxIterations reached - c) Router execution: evaluate selector, match route, execute branch steps - d) Parallel group: run steps concurrently with barrier semantics - e) HITL execution: pause run, write hitl-pending file, poll for response file - f) Sub-workflow: recursively create a WorkflowRunner and execute the referenced - config, return its output as the step output - g) Session concept: WorkflowRunner gains optional sessionId param; a session - groups multiple runs and shares state across them via SessionStore - h) Input schema validation: RelayYamlConfig gains optional inputSchema field - (JSON Schema object); validate vars before execution begins - i) Fallback agent: WorkflowStep gains optional fallbackAgent?: string; - if primary agent fails all retries, retry once with fallbackAgent - j) Expanded events: add 15+ new WorkflowEvent union members for loop iterations, - condition evaluations, router selections, hitl pauses, session updates - -TIER 3 — Meta-Orchestration: - a) AutoWorkflowBuilder: new exported class that takes a task string and - available CLIs, calls a meta-agent to generate RelayYamlConfig, validates - it, and returns it (or optionally executes immediately) - b) Workflow registry: simple JSON file-based registry at - ~/.agent-relay/workflow-registry.json that maps name→path; CLI commands - relay workflow list, search, install, publish - -TIER 4 — Storage Backends: - a) PostgresWorkflowDb: implements WorkflowDb interface using node-postgres (pg) - with schema migration on first connect - b) SqliteWorkflowDb: implements WorkflowDb using better-sqlite3 (sync API - wrapped in async interface) - c) RedisWorkflowDb: implements WorkflowDb using ioredis with JSON serialization - and optional TTL on run records - All three: exported from packages/sdk/src/workflows/db/ submodule - -TIER 5 — Deployment & Observability: - a) relay workflow serve: new CLI subcommand that starts an Express/Fastify HTTP - server exposing POST /run, GET /runs/:id, GET /runs/:id/events (SSE), - POST /runs/:id/hitl/:step, POST /runs/:id/abort, GET /health - b) OTel tracing: optional otel?: { exportTo: string, endpoint: string, - serviceName: string } in RelayYamlConfig; runner creates spans for - run start/end and each step start/end using @opentelemetry/sdk-node - c) CLI improvements: relay workflow dry-run, relay workflow inspect, - relay workflow replay --from - -For each tier, specify: - 1. Which existing files change and what lines/sections to modify - 2. Which new files to create and their full path - 3. New TypeScript interface/type definitions (exact syntax) - 4. New function signatures with JSDoc - -Output a structured plan with clear section headers. -End your output with: ANALYSIS_COMPLETE - `, - retries: 2, - verification: { type: 'output_contains', value: 'ANALYSIS_COMPLETE' }, - }) - - .step('spec-approval', { - agent: 'lead', - task: ` -Review the codebase analysis produced by the spec-analyst: - -{{steps.codebase-analysis.output}} - -Your job: -1. Validate that the plan covers all five tiers correctly -2. Identify any architectural risks or conflicts (e.g., breaking changes to - WorkflowStep that would break existing templates) -3. Clarify the execution order for each tier -4. Approve or amend the plan — note any changes clearly -5. Establish a non-negotiable constraint: ALL existing tests in - packages/sdk/src/__tests__/ must remain passing after each phase - -Key architectural decisions to make explicit: - - How should WorkflowStep handle the new type discriminant without breaking - existing YAML that has no "type" field? (answer: type defaults to "agent") - - Should LoopStepGroup and RouterStepGroup be separate from WorkflowStep in - the types, or unified via a discriminated union? - - Is the session concept stored in the same WorkflowDb or a separate SessionDb? - - For the hitl step, what is the polling mechanism? (file-based, HTTP endpoint, - or webhook?) Choose the simplest that works without requiring a server. - -Output a concise approved plan with your decisions. End with: SPEC_APPROVED - `, - dependsOn: ['codebase-analysis'], - retries: 2, - verification: { type: 'output_contains', value: 'SPEC_APPROVED' }, - }) - - // ── Phase 1: Type System Extension ──────────────────────────────────────── - - .step('p1-type-system', { - agent: 'schema-implementer', - task: ` -Phase 1a: Extend the TypeScript type system for new workflow primitives. - -Approved spec context: -{{steps.spec-approval.output}} - -YOUR TASK: Modify ${TYPES_FILE} to add all new types. - -SPECIFIC CHANGES: - -1. Add a StepType discriminant: - export type StepType = "agent" | "condition" | "loop" | "router" | "parallel" | "hitl" | "sub-workflow"; - -2. Extend WorkflowStep to include new optional fields: - - type?: StepType (default "agent" when absent) - - condition?: string (CEL expression; step skipped if evaluates false) - - fallbackAgent?: string (agent name to use if primary fails all retries) - -3. Add new composite step group interfaces: - - export interface LoopStepGroup { - type: "loop"; - name: string; - description?: string; - steps: WorkflowStep[]; - until?: string; // CEL expression evaluated after each iteration - maxIterations: number; // required — prevents runaway loops - timeoutMs?: number; - } - - export interface RouterRoute { - match: string; // CEL expression or substring match - steps: string[]; // names of steps to execute for this route - } - - export interface RouterStepGroup { - type: "router"; - name: string; - description?: string; - selector: string; // CEL expression producing a string value - routes: RouterRoute[]; - default?: string[]; // step names to run if no route matches - timeoutMs?: number; - } - - export interface ParallelStepGroup { - type: "parallel"; - name: string; - description?: string; - barrier?: "all" | "any" | "majority"; // default "all" - steps: WorkflowStep[]; - timeoutMs?: number; - } - - export interface HitlStep { - type: "hitl"; - name: string; - description?: string; - message: string; // human-readable prompt shown to approver - channel?: string; // notification target e.g. "slack:#approvals" - timeoutMs?: number; // how long to wait before applying onTimeout - onTimeout?: "skip" | "fail" | "use-default"; // default "fail" - defaultResponse?: string; // used when onTimeout is "use-default" - } - - export interface SubWorkflowStep { - type: "sub-workflow"; - name: string; - description?: string; - workflow: string; // path to relay.yaml or registry name - vars?: Record; // variable substitutions for the sub-workflow - timeoutMs?: number; - } - -4. Create a union type for any step or step group: - export type AnyWorkflowStep = - | WorkflowStep - | LoopStepGroup - | RouterStepGroup - | ParallelStepGroup - | HitlStep - | SubWorkflowStep; - -5. Update WorkflowDefinition.steps to use AnyWorkflowStep[]: - steps: AnyWorkflowStep[]; - -6. Add session types: - export interface SessionConfig { - persist?: boolean; // default false - historyRuns?: number; // how many prior runs to inject as context - ttlMs?: number; - } - - export interface SessionRow { - id: string; - workflowName: string; - runIds: string[]; - stateSnapshot: Record; - createdAt: string; - updatedAt: string; - } - -7. Add inputSchema to RelayYamlConfig: - inputSchema?: Record; // JSON Schema object - -8. Add session to RelayYamlConfig: - session?: SessionConfig; - -9. Expand WorkflowEvent union in runner.ts — ADD to the existing union - (note: WorkflowEvent is defined in runner.ts, not types.ts, so create a - comment in types.ts pointing to runner.ts where these will be added): - // New events will be added to WorkflowEvent in runner.ts: - // loop:iteration-started, loop:iteration-completed, loop:ended, - // condition:evaluated, condition:skipped, router:evaluated, - // parallel:branch-started, parallel:branch-completed, - // hitl:paused, hitl:responded, hitl:timeout, - // subworkflow:started, subworkflow:completed, - // session:state-updated, validation:failed, fallback:agent-switched - -10. Export all new types from ${INDEX_FILE} - -Make ALL changes. Run: npx tsc --noEmit to verify no type errors. -End your output with: TYPES_COMPLETE - `, - dependsOn: ['spec-approval'], - retries: 2, - verification: { type: 'output_contains', value: 'TYPES_COMPLETE' }, - }) - - .step('p1-json-schema', { - agent: 'schema-implementer', - task: ` -Phase 1b: Update the JSON Schema to match the new TypeScript types. - -Prior type changes: -{{steps.p1-type-system.output}} - -YOUR TASK: Update ${SCHEMA_FILE} to add definitions for all new step group types. - -Add new $defs entries: - - StepType enum definition - - LoopStepGroup object with required: [type, name, steps, maxIterations] - - RouterRoute object - - RouterStepGroup object with required: [type, name, selector, routes] - - ParallelStepGroup object with required: [type, name, steps] - - HitlStep object with required: [type, name, message] - - SubWorkflowStep object with required: [type, name, workflow] - - SessionConfig object - - AnyWorkflowStep as oneOf the above plus existing WorkflowStep - -Update workflow.steps array to use anyOf: [WorkflowStep, AnyWorkflowStep] -Update top-level RelayYamlConfig to include inputSchema and session properties. - -Also update packages/sdk/src/workflows/builder.ts: - - Add a new builder method for sessions: - session(config: SessionConfig): this - - Add a new builder method for input schema: - inputSchema(schema: Record): this - -Verify with: cat ${SCHEMA_FILE} | python3 -m json.tool (or equivalent JSON lint) -End your output with: SCHEMA_COMPLETE - `, - dependsOn: ['p1-type-system'], - retries: 2, - verification: { type: 'output_contains', value: 'SCHEMA_COMPLETE' }, - }) - - .step('p1-lead-review', { - agent: 'lead', - task: ` -Phase 1 Lead Review: Validate the type system and schema changes. - -Type system changes: -{{steps.p1-type-system.output}} - -Schema changes: -{{steps.p1-json-schema.output}} - -REVIEW CRITERIA: -1. Do the new TypeScript interfaces correctly model the intended semantics? -2. Is the AnyWorkflowStep discriminated union correctly structured? Each variant - must have a unique "type" literal so TypeScript can narrow the type. -3. Does the WorkflowStep backward compatibility hold? (existing YAML with no - "type" field should still work — type defaults to "agent") -4. Are the new fields in RelayYamlConfig (inputSchema, session) properly optional? -5. Does the builder extension make sense? Are the new methods ergonomic? -6. Any naming inconsistencies between the TypeScript types and JSON Schema? - -If you find issues, describe them specifically with file:line references. -If the phase is acceptable, state what should be fixed in the engine phase. -End with: PHASE_1_APPROVED (even if you request minor fixes — fixes go to -the code-reviewer, who will direct the implementer if needed) - `, - dependsOn: ['p1-json-schema'], - retries: 1, - verification: { type: 'output_contains', value: 'PHASE_1_APPROVED' }, - }) - - .step('p1-code-review', { - agent: 'code-reviewer', - task: ` -Phase 1 Independent Code Review: TypeScript type system extension. -(Running in parallel with lead review — review the code independently.) - -JSON Schema changes: -{{steps.p1-json-schema.output}} - -YOUR INDEPENDENT REVIEW of ${TYPES_FILE}, ${SCHEMA_FILE}, ${BUILDER_FILE}: - -Check: -1. TypeScript strict-mode compliance — no implicit any, all fields typed -2. JSDoc on every exported interface and type -3. Correct use of discriminated unions (each variant's type field is a - string literal, not just string) -4. AnyWorkflowStep is correctly exported from ${INDEX_FILE} -5. Builder methods follow existing patterns (return this for chaining) -6. JSON Schema $defs are correctly referenced in anyOf arrays -7. No circular references in the type definitions -8. Run: npx tsc --noEmit from packages/sdk and confirm zero errors - -List any issues found. For each: file, line (if known), problem, fix required. -If zero issues: explicitly state "No issues found." -End with: CODE_REVIEW_1_COMPLETE - `, - dependsOn: ['p1-json-schema'], - retries: 1, - verification: { type: 'output_contains', value: 'CODE_REVIEW_1_COMPLETE' }, - }) - - // ── Phase 2: Execution Engine ───────────────────────────────────────────── - - .step('p2-condition-loop', { - agent: 'engine-implementer', - task: ` -Phase 2a: Implement condition + loop execution in the WorkflowRunner. - -Phase 1 review context: -{{steps.p1-code-review.output}} - -YOUR TASK: Modify ${RUNNER_FILE} to handle condition and loop step types. - -CONDITION STEP EXECUTION: -In the executeStep method (or wherever individual steps are dispatched): -1. Check if step.type === "condition" OR if step.condition is set on a regular step -2. If step.condition exists, evaluate the CEL expression using a lightweight - evaluator. Use the cel-js npm package (add to package.json if not present) - OR implement a minimal evaluator that handles: - - String contains: "X in Y.output" - - String equality: "steps.X.output == 'VALUE'" - - Boolean AND/OR - The expression context object should include: - { steps: Record, - vars: VariableContext } -3. If condition evaluates to false: mark step as "skipped", emit step:skipped event -4. If condition evaluates to true (or no condition): proceed normally - -Also add to WorkflowEvent union in runner.ts: - | { type: "condition:evaluated"; runId: string; stepName: string; result: boolean; expression: string } - | { type: "condition:skipped"; runId: string; stepName: string } - -LOOP STEP GROUP EXECUTION: -Add a new method: private async executeLoopGroup(loop: LoopStepGroup, ...): Promise -1. Run loop.steps sequentially up to loop.maxIterations times -2. After each iteration, evaluate loop.until CEL expression if provided -3. If until evaluates to true, break and return last step output -4. If maxIterations reached without satisfaction, fail with descriptive error -5. Track step outputs by iteration: steps in context include "loop.STEP.output" - for the current iteration - -Add to WorkflowEvent: - | { type: "loop:iteration-started"; runId: string; loopName: string; iteration: number } - | { type: "loop:iteration-completed"; runId: string; loopName: string; iteration: number; continuing: boolean } - | { type: "loop:ended"; runId: string; loopName: string; reason: "condition-met" | "max-iterations" } - -In the main execution loop (findReadySteps / executeSteps), detect when a step -is a LoopStepGroup and route it to executeLoopGroup. - -Run: npx tsc --noEmit to verify. -End your output with: CONDITION_LOOP_COMPLETE - `, - dependsOn: ['p1-code-review'], - retries: 2, - verification: { type: 'output_contains', value: 'CONDITION_LOOP_COMPLETE' }, - }) - - .step('p2-input-validation', { - agent: 'schema-implementer', - task: ` -Phase 2b: Implement input schema validation (runs in parallel with p2-condition-loop). - -Phase 1 review context: -{{steps.p1-code-review.output}} - -YOUR TASK: Add input schema validation to WorkflowRunner before execution starts. - -In ${RUNNER_FILE}, in the execute() method, BEFORE the first findReadySteps call: - -1. If config.inputSchema is defined: - a. Import or inline a minimal JSON Schema validator. Use the ajv npm package - (add to package.json dependencies if not present). - b. Compile the schema: const validate = ajv.compile(config.inputSchema) - c. Validate the vars object against the schema - d. If invalid: emit { type: "validation:failed", runId, errors: AjvError[] } - and throw WorkflowValidationError with a human-readable message listing - each field error - -2. Add WorkflowValidationError class (extends Error) to runner.ts: - export class WorkflowValidationError extends Error { - constructor(public readonly errors: unknown[]) { - super("Workflow input validation failed: " + JSON.stringify(errors)); - this.name = "WorkflowValidationError"; - } - } - -3. Add to WorkflowEvent: - | { type: "validation:failed"; runId: string; errors: unknown[] } - | { type: "validation:passed"; runId: string } - -4. Export WorkflowValidationError from ${INDEX_FILE} - -5. Update builder.ts: add the inputSchema method to WorkflowBuilder that sets - config.inputSchema (this was noted in Phase 1 but implement it now if not done) - -Run: npx tsc --noEmit to verify. -End your output with: INPUT_VALIDATION_COMPLETE - `, - dependsOn: ['p1-code-review'], - retries: 2, - verification: { type: 'output_contains', value: 'INPUT_VALIDATION_COMPLETE' }, - }) - - .step('p2-router-hitl', { - agent: 'engine-implementer', - task: ` -Phase 2c: Implement router + HITL step execution. - -Condition/loop implementation: -{{steps.p2-condition-loop.output}} - -YOUR TASK: Add router and HITL execution to ${RUNNER_FILE}. - -ROUTER STEP GROUP EXECUTION: -Add: private async executeRouterGroup(router: RouterStepGroup, context, ...): Promise -1. Evaluate router.selector CEL expression to get a string value -2. Iterate router.routes, find first route where match expression is true - (match can be substring check: selectorValue.includes(route.match) or - full CEL evaluation depending on complexity) -3. Collect the branch step names to execute (from route.steps or router.default) -4. Execute those steps (they must exist in the parent workflow's step list) -5. Return concatenated outputs of the executed branch steps - -Add to WorkflowEvent: - | { type: "router:evaluated"; runId: string; routerName: string; selectorValue: string; matchedRoute: string | null } - | { type: "router:branch-started"; runId: string; routerName: string; stepName: string } - -HITL STEP EXECUTION: -Add: private async executeHitlStep(hitl: HitlStep, runId: string, ...): Promise -1. Write a file: {summaryDir}/{runId}/hitl-{hitl.name}-pending.json containing: - { runId, stepName: hitl.name, message: hitl.message, channel: hitl.channel, - pendingSince: ISO timestamp, timeoutMs: hitl.timeoutMs } -2. Emit { type: "hitl:paused", runId, stepName, message, channel } -3. Poll every 5 seconds for a response file: - {summaryDir}/{runId}/hitl-{hitl.name}-response.json - The response file should contain: { response: string, respondedBy?: string } -4. If timeoutMs elapses without response: - - If onTimeout === "skip": mark step skipped, emit hitl:timeout, return "" - - If onTimeout === "use-default": emit hitl:timeout, return hitl.defaultResponse ?? "" - - Otherwise (default "fail"): throw Error("HITL step timed out") -5. On response: delete pending file, emit hitl:responded, return response.response - -Add to WorkflowEvent: - | { type: "hitl:paused"; runId: string; stepName: string; message: string; channel?: string } - | { type: "hitl:responded"; runId: string; stepName: string; response: string; respondedBy?: string } - | { type: "hitl:timeout"; runId: string; stepName: string; action: string } - -Run: npx tsc --noEmit -End your output with: ROUTER_HITL_COMPLETE - `, - dependsOn: ['p2-condition-loop'], - retries: 2, - verification: { type: 'output_contains', value: 'ROUTER_HITL_COMPLETE' }, - }) - - .step('p2-session-fallback', { - agent: 'engine-implementer', - task: ` -Phase 2d: Implement session concept and fallback agent switching. - -Router/HITL implementation: -{{steps.p2-router-hitl.output}} - -YOUR TASK: Extend WorkflowRunner with session support and fallback agents. - -SESSION CONCEPT: -A session groups multiple workflow runs, shares state across them, and can -inject prior run outputs as context into new runs. - -1. Extend WorkflowRunnerOptions in runner.ts: - sessionId?: string; // if provided, this run joins a named session - -2. Add a simple SessionStore (in-memory, backed by WorkflowDb if available): - - getSession(sessionId): returns { runIds: string[], stateSnapshot: Record } - - addRunToSession(sessionId, runId): appends runId to session run list - - getSessionHistory(sessionId, n): returns last n run outputs as context string - -3. In execute(), if sessionId is provided: - a. Load session history (last config.session?.historyRuns ?? 3 runs) - b. Prepend history context to the first step's task: - "[Session history - prior runs]\n{history}\n[Current task]\n{task}" - c. After run completes, call addRunToSession(sessionId, runId) - -4. Add to WorkflowEvent: - | { type: "session:run-added"; sessionId: string; runId: string } - | { type: "session:history-injected"; sessionId: string; historyRuns: number } - -FALLBACK AGENT: -In the step execution method, after all retries on the primary agent are -exhausted: -1. Check if step.fallbackAgent is defined -2. If yes: look up the fallback agent definition by name from config.agents -3. Re-attempt execution once with the fallback agent -4. Emit { type: "fallback:agent-switched"; runId, stepName, fromAgent, toAgent } -5. If fallback also fails: mark step as failed - -Add to WorkflowEvent: - | { type: "fallback:agent-switched"; runId: string; stepName: string; fromAgent: string; toAgent: string } - -Also implement the flow shorthand parser (if RelayYamlConfig.flow is set): - parseFlowString("A -> B, C -> D"): WorkflowStep[] - - "A -> B" means B.dependsOn = [A] - - "B, C" means B and C are in parallel (no dependency between them) - - "A, B -> C" means C.dependsOn = [A, B] - - Call this in execute() before building the step graph if config.flow is set - -Run: npx tsc --noEmit -End your output with: SESSION_FALLBACK_COMPLETE - `, - dependsOn: ['p2-router-hitl'], - retries: 2, - verification: { type: 'output_contains', value: 'SESSION_FALLBACK_COMPLETE' }, - }) - - .step('p2-lead-review', { - agent: 'lead', - task: ` -Phase 2 Lead Review: Execution engine implementation. - -Input validation: -{{steps.p2-input-validation.output}} - -Session + fallback: -{{steps.p2-session-fallback.output}} - -REVIEW: -1. Is the CEL condition evaluator robust enough? Does it handle the common - patterns we need (output contains, equality, AND/OR)? -2. Is the loop execution correctly isolated — do step names inside a loop - conflict with top-level step names in the output context? -3. Is the HITL polling approach acceptable? (file-based polling every 5s) - Or should it use a simple readline/stdin approach instead? -4. Does the session history injection make sense — will agents be overwhelmed - by injected context if historyRuns is large? -5. Is the flow string parser correct for all cases: - "A -> B, C -> D" and "A, B -> C"? -6. Are all new WorkflowEvent variants added to the union consistently? - -Note any critical fixes needed before Phase 3 begins. -End with: PHASE_2_APPROVED - `, - dependsOn: ['p2-session-fallback', 'p2-input-validation'], - retries: 1, - verification: { type: 'output_contains', value: 'PHASE_2_APPROVED' }, - }) - - .step('p2-code-review', { - agent: 'code-reviewer', - task: ` -Phase 2 Independent Code Review: Execution engine. - -Lead's review: -{{steps.p2-lead-review.output}} - -INDEPENDENT REVIEW of ${RUNNER_FILE}: - -1. Correctness: does the condition evaluator correctly handle edge cases - (undefined step output, empty strings, non-boolean CEL results)? -2. Loop safety: is there a guard against infinite loops if maxIterations is 0? -3. HITL polling: does the polling correctly clean up pending files on both - success and timeout paths? No file handle leaks? -4. Fallback agent: is the agent definition lookup null-safe? What if - fallbackAgent name doesn't exist in config.agents? -5. Session history injection: is it correctly skipped when sessionId is absent? -6. Flow parser: does it handle edge cases — empty string, single agent, - spaces around arrows? -7. TypeScript: run npx tsc --noEmit and report results -8. Existing tests: run npx jest packages/sdk and report pass/fail counts - -List all issues with specific fixes. State "No issues" for clean sections. -End with: CODE_REVIEW_2_COMPLETE - `, - dependsOn: ['p2-lead-review'], - retries: 1, - verification: { type: 'output_contains', value: 'CODE_REVIEW_2_COMPLETE' }, - }) - - // ── Phase 3: Meta-Orchestration (parallel with Phase 4) ─────────────────── - - .step('p3-sub-workflow', { - agent: 'meta-implementer', - task: ` -Phase 3a: Implement sub-workflow step execution. - -Phase 2 review context: -{{steps.p2-code-review.output}} - -YOUR TASK: Add sub-workflow composition to ${RUNNER_FILE}. - -SUB-WORKFLOW EXECUTION: -Add: private async executeSubWorkflow(step: SubWorkflowStep, vars, runId, ...): Promise -1. Resolve the workflow reference: - - If step.workflow starts with "./" or "/": treat as file path - - Otherwise: look up in the workflow registry at - ~/.agent-relay/workflow-registry.json (create if absent; plain JSON map) -2. Load and parse the referenced relay.yaml file -3. Merge step.vars with the current run's vars (step.vars take precedence) -4. Create a new WorkflowRunner instance (child runner), share the same DB -5. Execute the sub-workflow config via child runner -6. Return the sub-workflow's run output (join all step outputs) - -Add to WorkflowEvent: - | { type: "subworkflow:started"; runId: string; stepName: string; workflowRef: string } - | { type: "subworkflow:completed"; runId: string; stepName: string; output: string } - | { type: "subworkflow:failed"; runId: string; stepName: string; error: string } - -WORKFLOW REGISTRY: -Create packages/sdk/src/workflows/registry.ts: - - export interface WorkflowRegistryEntry { - name: string; - path: string; - description?: string; - tags?: string[]; - installedAt: string; - } - - export class WorkflowRegistry { - private readonly registryPath: string; - // Load registry from ~/.agent-relay/workflow-registry.json - async list(): Promise - async get(name: string): Promise - async register(entry: WorkflowRegistryEntry): Promise - async unregister(name: string): Promise - async resolvePath(nameOrPath: string): Promise - // If starts with ./ or / return as-is; else look up in registry - } - -Export WorkflowRegistry from ${INDEX_FILE}. - -Run: npx tsc --noEmit -End with: SUB_WORKFLOW_COMPLETE - `, - dependsOn: ['p2-code-review'], - retries: 2, - verification: { type: 'output_contains', value: 'SUB_WORKFLOW_COMPLETE' }, - }) - - .step('p3-auto-builder', { - agent: 'meta-implementer', - task: ` -Phase 3b: Implement AutoWorkflowBuilder. - -Sub-workflow implementation: -{{steps.p3-sub-workflow.output}} - -YOUR TASK: Create packages/sdk/src/workflows/auto-builder.ts - -The AutoWorkflowBuilder analyzes a task description and uses a meta-agent -to generate a complete RelayYamlConfig automatically. - - export interface AutoBuildOptions { - availableClis?: AgentCli[]; // defaults to ["claude", "codex"] - maxAgents?: number; // defaults to 5 - maxSteps?: number; // defaults to 10 - preferredPattern?: SwarmPattern; - dryRun?: boolean; // if true, return config without executing - metaCli?: AgentCli; // CLI to use for the meta-agent (default "claude") - } - - export interface AutoBuildResult { - config: RelayYamlConfig; - yaml: string; - reasoning: string; // why the meta-agent chose this structure - run?: WorkflowRunRow; // present if dryRun is false - } - - export class AutoWorkflowBuilder { - constructor(private readonly options: AutoBuildOptions = {}) {} - - async build(task: string): Promise { - // 1. Construct a meta-prompt that instructs the meta-agent to: - // a. Analyze the task - // b. Select the best swarm pattern - // c. Define agents (using available CLIs) - // d. Define workflow steps with appropriate dependencies - // e. Output a valid relay.yaml string (fenced in \`\`\`yaml...\`\`\`) - // f. Explain its reasoning - // 2. Spawn the meta-agent via AgentRelay - // 3. Extract the YAML from the response (find \`\`\`yaml ... \`\`\` block) - // 4. Parse and validate the config using WorkflowRunner.parseYamlString - // 5. If dryRun: return { config, yaml, reasoning } - // 6. Else: execute via WorkflowRunner and return { config, yaml, reasoning, run } - } - - // Convenience: build and run immediately - async run(task: string): Promise { - const result = await this.build(task); - if (!result.run) throw new Error("Set dryRun: false to execute"); - return result.run; - } - } - - // Convenience export - export async function autoWorkflow(task: string, options?: AutoBuildOptions): Promise { - return new AutoWorkflowBuilder(options).build(task); - } - -Export AutoWorkflowBuilder and autoWorkflow from ${INDEX_FILE}. - -Also add the meta-workflow type to types.ts if not already done: - export type MetaWorkflowConfig = RelayYamlConfig & { type: "meta-workflow" }; - (A meta-workflow is a relay.yaml where steps are sub-workflow steps) - -Run: npx tsc --noEmit -End with: AUTO_BUILDER_COMPLETE - `, - dependsOn: ['p3-sub-workflow'], - retries: 2, - verification: { type: 'output_contains', value: 'AUTO_BUILDER_COMPLETE' }, - }) - - // ── Phase 4: Storage Backends (parallel with Phase 3) ──────────────────── - - .step('p4-db-adapters', { - agent: 'storage-implementer', - task: ` -Phase 4: Implement production-ready WorkflowDb adapters. -(Runs in parallel with Phase 3 — no dependency between them.) - -Phase 2 review context: -{{steps.p2-code-review.output}} - -Create a new directory: packages/sdk/src/workflows/db/ - -Create these files: - -1. packages/sdk/src/workflows/db/postgres.ts - export class PostgresWorkflowDb implements WorkflowDb { - constructor(options: { connectionString: string; tablePrefix?: string }) - // Creates tables on first connect if they don't exist: - // {prefix}workflow_runs, {prefix}workflow_steps - // Schema mirrors WorkflowRunRow and WorkflowStepRow - // Uses node-postgres (pg) — add to package.json if absent - async insertRun(run: WorkflowRunRow): Promise - async updateRun(id: string, patch: Partial): Promise - async getRun(id: string): Promise - async insertStep(step: WorkflowStepRow): Promise - async updateStep(id: string, patch: Partial): Promise - async getStepsByRunId(runId: string): Promise - async close(): Promise - } - -2. packages/sdk/src/workflows/db/sqlite.ts - export class SqliteWorkflowDb implements WorkflowDb { - constructor(options: { path: string; tablePrefix?: string }) - // Uses better-sqlite3 — synchronous API wrapped in async methods - // Same schema as PostgresWorkflowDb - // Creates tables if they don't exist on construction - } - -3. packages/sdk/src/workflows/db/redis.ts - export class RedisWorkflowDb implements WorkflowDb { - constructor(options: { - url: string; - keyPrefix?: string; // default "relay:workflow:" - runTtlMs?: number; // optional TTL on run records - }) - // Uses ioredis — add to package.json if absent - // Stores runs as JSON strings at key: {prefix}run:{id} - // Stores step lists at key: {prefix}steps:{runId} (Redis list) - // Stores individual steps at: {prefix}step:{id} - } - -4. packages/sdk/src/workflows/db/index.ts - export { PostgresWorkflowDb } from './postgres.js'; - export { SqliteWorkflowDb } from './sqlite.js'; - export { RedisWorkflowDb } from './redis.js'; - -5. Update packages/sdk/src/workflows/index.ts to export from db/: - export * from './db/index.js'; - -IMPORTANT: Add the three packages to package.json as optional peer dependencies -with peerDependenciesMeta markings optional: true, so users only need to install -the adapter they use. - -Run: npx tsc --noEmit (adapters will have type errors only if packages are absent; -mark them as type-only imports with // @ts-expect-error if needed with clear comment) -End with: DB_ADAPTERS_COMPLETE - `, - dependsOn: ['p2-code-review'], - retries: 2, - verification: { type: 'output_contains', value: 'DB_ADAPTERS_COMPLETE' }, - }) - - // ── Phase 3+4 Combined Review ───────────────────────────────────────────── - - .step('p34-lead-review', { - agent: 'lead', - task: ` -Phase 3+4 Lead Review: Meta-orchestration and storage backends. - -Phase 3 — AutoBuilder: -{{steps.p3-auto-builder.output}} - -Phase 4 — DB adapters: -{{steps.p4-db-adapters.output}} - -REVIEW: - -META-ORCHESTRATION: -1. Is the sub-workflow execution correctly isolated? (child runner should not - share the parent runner's event listeners, but should share the DB) -2. Does the WorkflowRegistry file path (~/.agent-relay/) correctly use os.homedir()? -3. Is the AutoWorkflowBuilder meta-prompt clear enough to reliably generate - valid relay.yaml configs? What guardrails are needed? -4. Does the YAML extraction from meta-agent output handle cases where the - agent outputs multiple code blocks? - -DB ADAPTERS: -5. Are the SQL schemas in PostgresWorkflowDb and SqliteWorkflowDb correct? - (JSON columns for config/stateSnapshot, TEXT for status, ISO timestamps) -6. Is the Redis adapter correctly handling concurrent updates? - (Multiple parallel steps updating the same run record — race condition risk) -7. Are optional peer dependencies correctly marked in package.json? -8. Do the adapters correctly handle NULL/missing optional fields? - -State fixes needed for the code reviewer to validate. -End with: PHASE_34_APPROVED - `, - dependsOn: ['p3-auto-builder', 'p4-db-adapters'], - retries: 1, - verification: { type: 'output_contains', value: 'PHASE_34_APPROVED' }, - }) - - .step('p34-code-review', { - agent: 'code-reviewer', - task: ` -Phase 3+4 Independent Code Review. - -Lead's notes: -{{steps.p34-lead-review.output}} - -REVIEW packages/sdk/src/workflows/registry.ts, - packages/sdk/src/workflows/auto-builder.ts, - packages/sdk/src/workflows/db/: - -1. Registry: does resolvePath correctly return file paths unchanged and look up - by name for non-path strings? Is the registry JSON correctly pretty-printed? -2. AutoBuilder: is the meta-prompt templating safe from injection if the task - string contains special characters or YAML-like content? -3. PostgresWorkflowDb: are SQL queries parameterized? (No string interpolation - in SQL — security requirement) -4. SqliteWorkflowDb: does the sync API wrap correctly without blocking the event - loop for extended periods? -5. RedisWorkflowDb: is the JSON serialization of WorkflowRunRow round-trip safe? - (dates become strings, Record stays correct) -6. All three adapters: does getStepsByRunId return steps in consistent order? -7. Run npx tsc --noEmit and report results - -List issues with specific file + line references. -End with: CODE_REVIEW_34_COMPLETE - `, - dependsOn: ['p34-lead-review'], - retries: 1, - verification: { type: 'output_contains', value: 'CODE_REVIEW_34_COMPLETE' }, - }) - - // ── Phase 5: Deployment and Observability ───────────────────────────────── - - .step('p5-serve-command', { - agent: 'deploy-implementer', - task: ` -Phase 5a: Implement "relay workflow serve" HTTP server. - -Phase 3+4 review context: -{{steps.p34-code-review.output}} - -YOUR TASK: Create packages/sdk/src/workflows/server.ts - -Implement a lightweight HTTP server (use Node.js built-in http module — no -Express/Fastify dependency) that exposes: - - POST /run - Body: { workflowPath: string, vars?: VariableContext, sessionId?: string } - Response: { runId: string, status: "started" } - Behavior: parse the relay.yaml at workflowPath, start execution (non-blocking), - return runId immediately - - GET /runs/:runId - Response: WorkflowRunRow | { error: "not found" } - - GET /runs/:runId/events - Response: text/event-stream (Server-Sent Events) - Each WorkflowEvent becomes a SSE data: {json}\n\n line - Connection stays open until run completes or client disconnects - - POST /runs/:runId/hitl/:stepName/respond - Body: { response: string, respondedBy?: string } - Behavior: write {summaryDir}/{runId}/hitl-{stepName}-response.json - Response: { ok: true } - - POST /runs/:runId/abort - Behavior: call runner.abort(runId) if supported - Response: { ok: true } - - GET /health - Response: { status: "ok", uptime: process.uptime() } - -Also create packages/sdk/src/workflows/serve.ts — the CLI entry point: - export async function serveWorkflows(options: { - port?: number; // default 3747 - host?: string; // default "0.0.0.0" - db?: WorkflowDb; - }): Promise - -Export serveWorkflows from ${INDEX_FILE}. - -The serve command will be integrated into the relay CLI in the next step. - -Run: npx tsc --noEmit -End with: SERVE_COMMAND_COMPLETE - `, - dependsOn: ['p34-code-review'], - retries: 2, - verification: { type: 'output_contains', value: 'SERVE_COMMAND_COMPLETE' }, - }) - - .step('p5-otel-tracing', { - agent: 'deploy-implementer', - task: ` -Phase 5b: Implement OpenTelemetry tracing integration. -(Running in parallel with p5-serve-command — these are independent.) - -Phase 3+4 review context: -{{steps.p34-code-review.output}} - -YOUR TASK: Add optional OTel tracing to WorkflowRunner. - -1. Add otel config to RelayYamlConfig in types.ts (if not already present): - telemetry?: { - otel?: boolean; // enable OTel tracing - endpoint?: string; // OTLP endpoint, default "http://localhost:4318" - serviceName?: string; // default "relay-workflows" - exportTo?: "otlp" | "console" | "none"; // default "otlp" - } - -2. Create packages/sdk/src/workflows/tracing.ts: - - export interface TracingOptions { - enabled: boolean; - endpoint?: string; - serviceName?: string; - exportTo?: "otlp" | "console" | "none"; - } - - export class WorkflowTracer { - constructor(options: TracingOptions) {} - - // Create root span for workflow run - startRun(runId: string, workflowName: string, pattern: string): Span - - // Create child span for a step - startStep(parentSpan: Span, stepName: string, agentName: string): Span - - // Record events on spans - recordEvent(span: Span, event: WorkflowEvent): void - - // End spans - endSpan(span: Span, status: "ok" | "error", error?: string): void - - // Shutdown exporter cleanly - async shutdown(): Promise - } - -Use @opentelemetry/sdk-node and @opentelemetry/api (add as optional peer deps -with peerDependenciesMeta optional: true in package.json). - -Guard all OTel imports with a try/catch or dynamic import so the runner works -without OTel installed: - let tracer: WorkflowTracer | null = null; - try { - const { WorkflowTracer } = await import('./tracing.js'); - tracer = new WorkflowTracer(config.telemetry?.otel ? { enabled: true, ...config.telemetry } : { enabled: false }); - } catch { - // OTel packages not installed — tracing disabled - } - -3. In WorkflowRunner.execute(): - - If tracer enabled: create root run span - - In executeStep(): create child step span, record start/complete/fail events - - On run complete: end root span - -Export WorkflowTracer from ${INDEX_FILE}. - -Run: npx tsc --noEmit -End with: OTEL_TRACING_COMPLETE - `, - dependsOn: ['p34-code-review'], - retries: 2, - verification: { type: 'output_contains', value: 'OTEL_TRACING_COMPLETE' }, - }) - - .step('p5-cli-improvements', { - agent: 'deploy-implementer', - task: ` -Phase 5c: Implement CLI improvements. - -OTel tracing implementation: -{{steps.p5-otel-tracing.output}} - -YOUR TASK: Find the CLI entry point for the relay workflow commands and add: - -First, locate the CLI code (likely in packages/sdk/src/workflows/cli.ts): - cat packages/sdk/src/workflows/cli.ts - -Add these subcommands to the workflow CLI: - -1. relay workflow dry-run [--var KEY=VALUE...] - - Parse and validate the YAML (run validateConfig) - - Resolve variable templates (show substituted values) - - Show the resolved DAG: step names, dependencies, agent assignments - - Show which steps can run in parallel - - Do NOT actually execute — print "DRY RUN: would execute N steps across M agents" - -2. relay workflow inspect - - Show full config parsed and pretty-printed as JSON - - Show detected swarm pattern with reason - - Show agent topology (edges from SwarmCoordinator) - - Show barrier definitions - - Show coordination config - -3. relay workflow replay --from [--db-path ] - - Load existing run record from DB (requires SqliteWorkflowDb) - - Skip steps that completed successfully before the target step - - Re-execute from the specified step name onwards - - Useful for resuming failed runs without restarting from scratch - -4. relay workflow serve [--port ] [--db sqlite:|postgres:|redis:] - - Starts the HTTP server from p5-serve-command - - Accepts DB connection string via --db flag, parses the scheme prefix - -Also update the README at packages/sdk/src/workflows/README.md to document -all new CLI commands with usage examples. - -Run: npx tsc --noEmit -End with: CLI_IMPROVEMENTS_COMPLETE - `, - dependsOn: ['p5-serve-command', 'p5-otel-tracing'], - retries: 2, - verification: { type: 'output_contains', value: 'CLI_IMPROVEMENTS_COMPLETE' }, - }) - - .step('p5-lead-review', { - agent: 'lead', - task: ` -Phase 5 Lead Review: Deployment and observability. - -Serve command: -{{steps.p5-serve-command.output}} - -OTel tracing: -{{steps.p5-otel-tracing.output}} - -CLI improvements: -{{steps.p5-cli-improvements.output}} - -REVIEW: -1. HTTP server: is SSE implemented correctly? (headers: Content-Type: text/event-stream, - Cache-Control: no-cache, Connection: keep-alive; proper flushing with res.write) -2. HTTP server: is the HITL respond endpoint correctly writing the response file - to the same path the runner is polling? -3. OTel: is the dynamic import guard (try/catch) robust? Will it work in CJS builds? -4. OTel: are span hierarchies correct — run span is parent, step spans are children? -5. CLI dry-run: does it show enough information to be useful for debugging? -6. CLI replay: what happens if a replay step depends on a step that was NOT - completed in the prior run? (Should fail with clear error message.) -7. Is the README updated with accurate examples? - -Note critical fixes. End with: PHASE_5_APPROVED - `, - dependsOn: ['p5-cli-improvements'], - retries: 1, - verification: { type: 'output_contains', value: 'PHASE_5_APPROVED' }, - }) - - .step('p5-code-review', { - agent: 'code-reviewer', - task: ` -Phase 5 Independent Code Review: Deployment and observability. -(Running in parallel with lead review — review the code independently.) - -CLI improvements: -{{steps.p5-cli-improvements.output}} - -REVIEW packages/sdk/src/workflows/server.ts, - packages/sdk/src/workflows/tracing.ts, - packages/sdk/src/workflows/cli.ts: - -1. HTTP server: no prototype pollution risk in request body parsing? - (Validate Content-Type, parse JSON safely with try/catch) -2. SSE endpoint: does it correctly handle client disconnect without leaving - zombie event listeners on the WorkflowRunner? -3. HITL respond: path traversal risk? (runId and stepName used in file path — - must sanitize to alphanumeric + hyphen only) -4. OTel: does shutdown() await the exporter flush before process.exit? -5. CLI replay: is the step-skip logic correct? (A skipped-previously-completed - step should return its stored output for downstream template resolution) -6. Run npx jest packages/sdk and report pass/fail counts -7. Run npx tsc --noEmit and report results - -End with: CODE_REVIEW_5_COMPLETE - `, - dependsOn: ['p5-cli-improvements'], - retries: 1, - verification: { type: 'output_contains', value: 'CODE_REVIEW_5_COMPLETE' }, - }) - - // ── Phase 6: Integration Validation + Final Sign-Off ───────────────────── - - .step('integration-validation', { - agent: 'test-validator', - task: ` -Phase 6: Integration validation across all five tiers. - -Phase 5 review: -{{steps.p5-code-review.output}} - -YOUR TASK: Run comprehensive validation of the full implementation. - -1. TYPE CHECK: - cd packages/sdk && npx tsc --noEmit - Report: zero errors or list all errors - -2. EXISTING TESTS: - npx jest packages/sdk/src/__tests__/ - Report: pass count, fail count, any failures - -3. BUILD: - cd packages/sdk && npm run build - Report: success or errors - -4. INTEGRATION SMOKE TESTS — run each of these and report output: - - a. Condition step test: - Create a temporary relay.yaml with a condition step that checks if - "SKIP" is in a prior step's output, run it via the WorkflowRunner - in TypeScript (programmatic, not CLI), verify the step is skipped. - - b. Loop step test: - Create a temporary relay.yaml with a loop that runs max 3 iterations, - verify it runs exactly 3 times when until-condition is never met. - - c. Input validation test: - Create a RelayYamlConfig with inputSchema requiring a "task" field, - call runner.execute() without "task" in vars, verify WorkflowValidationError - is thrown. - - d. Flow shorthand test: - Parse the flow string "planner -> developer, reviewer -> lead" and verify - that developer.dependsOn = ["planner"], reviewer.dependsOn = ["planner"], - lead.dependsOn = ["developer", "reviewer"]. - - e. SqliteWorkflowDb test: - Create an in-memory SQLite DB, insert a run, update it, retrieve it, - verify round-trip fidelity of all fields. - -5. EXPORTS CHECK: - Verify that the following are exported from packages/sdk/src/workflows/index.ts: - - WorkflowBuilder, workflow - - WorkflowRunner, WorkflowRunnerOptions, WorkflowEvent, WorkflowEventListener - - WorkflowValidationError - - AutoWorkflowBuilder, autoWorkflow - - WorkflowRegistry - - SqliteWorkflowDb, PostgresWorkflowDb, RedisWorkflowDb - - WorkflowTracer - - All new types: AnyWorkflowStep, LoopStepGroup, RouterStepGroup, - ParallelStepGroup, HitlStep, SubWorkflowStep, SessionConfig - -Report full results. End with: INTEGRATION_VALIDATED - `, - dependsOn: ['p5-code-review', 'p5-lead-review'], - retries: 2, - verification: { type: 'output_contains', value: 'INTEGRATION_VALIDATED' }, - }) - - .step('final-lead-review', { - agent: 'lead', - task: ` -FINAL LEAD REVIEW: Complete broker-sdk workflow superiority implementation. - -Integration validation results: -{{steps.integration-validation.output}} - -This is the culmination of a five-phase implementation campaign. Your job: - -1. CAPABILITY AUDIT — verify we now have all of these (check integration results): - □ condition step type (CEL-based conditional execution) - □ loop step type (iterative with until-condition) - □ router step type (runtime branch selection) - □ parallel step group (explicit with any/majority barriers) - □ HITL step type (human-in-the-loop with file-based pause/resume) - □ sub-workflow step (workflow composition) - □ flow shorthand (string notation "A -> B, C") - □ session concept (multi-run state sharing) - □ input schema validation (JSON Schema via Ajv) - □ fallback agent switching - □ AutoWorkflowBuilder (LLM-generated workflows) - □ WorkflowRegistry (name-to-path resolution) - □ PostgresWorkflowDb, SqliteWorkflowDb, RedisWorkflowDb - □ HTTP serve command with SSE event streaming - □ OTel tracing (optional, dynamic import) - □ CLI: dry-run, inspect, replay, serve - □ 30+ WorkflowEvent types - □ All exports correct from index.ts - -2. COMPETITIVE POSITION — confirm we now surpass: - AGNO: We have everything Agno has (condition=Condition, loop=Loop, - router=Router, session, input validation, serve, OTel) PLUS barriers, - consensus, HITL, polyglot backends, YAML portability, sub-workflow composition. - - SWARMS: We have equivalent patterns PLUS out-of-process PTY isolation, - YAML portability, true relay protocol, HITL, OTel, sub-workflow composition, - HTTP serve. Swarms has more built-in swarm types (15+ vs our 10) but we - cover all critical execution patterns with richer composition primitives. - -3. REMAINING GAPS (if any) — list anything that was not fully implemented - and should be tracked as future work. - -4. DOCUMENTATION — confirm README.md in the workflows directory is updated - with examples for all new features. - -Produce a final capability report with the checklist above filled in. -End with: IMPLEMENTATION_COMPLETE - `, - dependsOn: ['integration-validation'], - retries: 1, - verification: { type: 'output_contains', value: 'IMPLEMENTATION_COMPLETE' }, - }) - - .onError('retry', { maxRetries: 2, retryDelayMs: 10_000 }) - - .run({ - onEvent, - vars: { - // Override these at runtime if needed: - // workflowRoot: 'packages/sdk/src/workflows', - }, - }); - -console.log('\n── Run complete ─────────────────────────────────────────────────'); -console.log(`status: ${result.status}`); -console.log(`runId: ${result.id}`); -console.log(`workflow: ${result.workflowName}`); -console.log(`pattern: ${result.pattern}`); -console.log(`started: ${result.startedAt}`); -console.log(`completed: ${result.completedAt ?? '—'}`); -if (result.error) { - console.error(`error: ${result.error}`); -} diff --git a/packages/sdk/src/examples/workflows/README.md b/packages/sdk/src/examples/workflows/README.md deleted file mode 100644 index 199ec8408..000000000 --- a/packages/sdk/src/examples/workflows/README.md +++ /dev/null @@ -1,156 +0,0 @@ -# Super-Powered Ralph Loops - -These workflows are the agent-relay take on the [Ralph Wiggum technique](https://github.com/mikeyobrien/ralph-orchestrator) — autonomous AI agents looping on a codebase until work is done. Where vanilla ralph runs one agent at a time, these workflows run **squads of specialists** with real quality gates, multi-model assignment, and observable coordination. - -## What makes these different - -| Vanilla Ralph | agent-relay Ralph | -| ------------------------------ | ------------------------------------------------------------------- | -| 1 agent per loop | 2–10 agents per loop | -| Single model | Multi-model (opus plans, sonnet leads, codex builds, gemini audits) | -| Agent checks its own work | Independent reviewer with no confirmation bias | -| Polling for completion signals | DAG-based wave scheduling with verification gates | -| No parallelism | Workers implement features simultaneously | -| One retry strategy | Per-step `maxIterations` + global `errorHandling` | -| No observability | Trajectories, Relaycast channels, step output chaining | - ---- - -## `ralph-tdd.yaml` — Test-Driven Loop - -**Best for:** Projects with an existing test framework where correctness is the priority. - -``` -architect writes failing tests - ↓ -builder implements to pass them (maxIterations: 3) - ↓ -npm test / pytest / go test ←── deterministic gate - ↓ -arch-review ──┐ - ├── both must PASS (consensus: unanimous) -sec-review ──┘ - ↓ -address feedback (maxIterations: 2) - ↓ -commit + record learnings → next story -``` - -**Key agents:** 3 (architect/sonnet, builder/codex, reviewer/opus) -**Key feature:** Tests are written BEFORE implementation. Reviewer is separate from architect — no confirmation bias. - -```bash -PRD_PATH=my-prd.json QUALITY_CMD="npm test" \ - node packages/sdk/dist/workflows/cli.js ralph-tdd.yaml -``` - ---- - -## `ralph-swarm.yaml` — Parallel Implementation Squad - -**Best for:** Large backlogs where work can be decomposed into independent tasks. - -``` -tech-lead decomposes PRD into 5 atomic tasks - ↓ -worker-1 ──┐ -worker-2 │ -worker-3 ├── all 5 implement in parallel -worker-4 │ -worker-5 ──┘ - ↓ -npm test / tsc / lint ←── gate - ↓ -fix-failures (if gate red) - ↓ -review-correctness ──┐ -review-architecture ├── all 3 must PASS -review-security ──┘ - ↓ -consensus → address rework → commit → loop -``` - -**Key agents:** 10 (opus tech-lead, 5 codex workers, 2 claude reviewers, gemini security auditor) -**Key feature:** 5x parallelism. Three independent reviewers. Gemini on security catches what Claude misses. - -```bash -PRD_PATH=my-prd.json \ - node packages/sdk/dist/workflows/cli.js ralph-swarm.yaml -``` - ---- - -## `ralph-overnight.yaml` — 24-Hour Autonomous Session - -**Best for:** Long sessions where you want to drop a repo before bed and wake up to PRs. - -``` -product-manager reads backlog, prioritizes, assigns to squads - ↓ -tech-lead plans architecture, creates feature branch - ↓ -squad-alpha (lead + 2 codex builders) ──┐ - ├── parallel on different files -squad-beta (lead + 2 codex builders) ──┘ - ↓ -full CI gate (lint + tsc + npm test) - ↓ -qa-engineer writes integration tests ──┐ - ├── parallel -security-auditor (gemini) audits ──┘ - ↓ -tech-lead reviews → fix if needed - ↓ -git push → gh pr create (draft) - ↓ -PM writes session-log.txt morning summary -→ mark stories complete → next batch -``` - -**Key agents:** 10 (PM/opus, tech-lead/sonnet, 2 squad leads/sonnet, 4 codex builders, QA/sonnet, security/gemini-pro) -**Key feature:** True hierarchical org. PM → Tech Lead → Squads. Morning report written to `session-log.txt`. Runs safely overnight with `errorHandling: continue` (one story failure doesn't kill the session). - -```bash -BACKLOG_PATH=my-backlog.json SESSION_LOG=session-log.txt \ - node packages/sdk/dist/workflows/cli.js ralph-overnight.yaml -``` - ---- - -## Running any of these - -```bash -# Preview the execution plan without spending tokens -DRY_RUN=1 node packages/sdk/dist/workflows/cli.js ralph-tdd.yaml - -# Run for real -node packages/sdk/dist/workflows/cli.js ralph-tdd.yaml - -# Watch Relaycast for live agent communication -# → open https://relaycast.dev and join the workflow's channel -``` - -## PRD / Backlog format - -These workflows expect a JSON file with your stories: - -```json -{ - "branchName": "feature/my-project", - "userStories": [ - { - "id": "US-001", - "title": "User can log in with email", - "description": "Implement email/password authentication", - "acceptanceCriteria": [ - "POST /auth/login returns 200 with JWT on valid credentials", - "Returns 401 on invalid credentials", - "Passwords are bcrypt hashed" - ], - "passes": false - } - ] -} -``` - -For `ralph-overnight.yaml`, use `backlog.json` with `priority` and `effort` fields. diff --git a/packages/sdk/src/examples/workflows/fix-dashboard-user-registration.yaml b/packages/sdk/src/examples/workflows/fix-dashboard-user-registration.yaml deleted file mode 100644 index e79be11bd..000000000 --- a/packages/sdk/src/examples/workflows/fix-dashboard-user-registration.yaml +++ /dev/null @@ -1,182 +0,0 @@ -version: '1.0' -name: fix-dashboard-user-registration -description: > - Fixes the dashboard agent registration so the user registers as the current - system user (whoami / os.userInfo().username) instead of the cwd directory name. - - Currently, both relaycast-provider.ts and proxy-server.ts in relay-dashboard - derive projectIdentity from path.basename(path.resolve(dataDir, '..')) — which - gives the working directory name. This workflow changes both to use - os.userInfo().username so the registered name reflects the actual user. - -swarm: - pattern: pipeline - channel: wf-fix-dashboard-user-reg - maxConcurrency: 3 - timeoutMs: 1200000 # 20 min - -agents: - - name: implementer - cli: claude - preset: worker - role: 'Makes targeted edits to two dashboard-server files to use os.userInfo().username instead of path.basename(cwd).' - constraints: - model: sonnet - - - name: reviewer - cli: claude - preset: reviewer - role: 'Reviews the diff to confirm correctness and no unintended side effects.' - constraints: - model: sonnet - -workflows: - - name: default - onError: fail-fast - - steps: - # ── Phase 1: Capture current code for context injection ────────────────── - - - name: read-relaycast-provider - type: deterministic - command: > - sed -n '1,15p' ../relay-dashboard/packages/dashboard-server/src/relaycast-provider.ts && - echo "---" && - sed -n '80,100p' ../relay-dashboard/packages/dashboard-server/src/relaycast-provider.ts - captureOutput: true - failOnError: true - - - name: read-proxy-server - type: deterministic - command: > - grep -n "^import" ../relay-dashboard/packages/dashboard-server/src/proxy-server.ts | head -20 && - echo "---" && - sed -n '205,220p' ../relay-dashboard/packages/dashboard-server/src/proxy-server.ts - captureOutput: true - failOnError: true - - # ── Phase 2: Implement the fix ─────────────────────────────────────────── - - - name: implement - type: agent - agent: implementer - dependsOn: [read-relaycast-provider, read-proxy-server] - task: | - Fix two files in the relay-dashboard package so the dashboard user registers - with the system username instead of the cwd directory name. - - ── File 1: ../relay-dashboard/packages/dashboard-server/src/relaycast-provider.ts ── - - Current imports and loadRelaycastConfig function: - {{steps.read-relaycast-provider.output}} - - Changes required: - 1. Add `import os from 'node:os';` after the existing `import path from 'path';` line. - 2. In the `loadRelaycastConfig` function, replace: - const projectDir = path.basename(path.resolve(dataDir, '..')); - with: - const projectIdentity = os.userInfo().username; - 3. Remove the line `const projectIdentity = projectDir.trim();` (it's now set above). - 4. The return statement `return { apiKey: envApiKey, baseUrl, projectIdentity };` stays the same. - - ── File 2: ../relay-dashboard/packages/dashboard-server/src/proxy-server.ts ── - - Current imports and resolveRelaycastConfig function: - {{steps.read-proxy-server.output}} - - Changes required: - 1. Add `import os from 'node:os';` near the top of the file with the other imports. - 2. In the `resolveRelaycastConfig` function, replace: - const projectDir = path.basename(path.resolve(dataDir, '..')); - with: - const projectIdentity = os.userInfo().username; - 3. Update the `applyCachedAgentIdentity` call: change `projectIdentity: projectDir` - to `projectIdentity: projectIdentity` (variable is already named correctly). - - IMPORTANT: - - Write both files to disk using your file-writing tools. - - Use `import os from 'node:os'` (Node.js built-in with node: prefix). - - Do not change anything else in either file. - - Preserve all existing imports, logic, and formatting. - verification: - type: exit_code - - # ── Phase 3: Verify the changes look correct ───────────────────────────── - - - name: verify-changes - type: deterministic - dependsOn: [implement] - command: | - echo "=== relaycast-provider.ts: os import ===" - grep -n "import os\|node:os" ../relay-dashboard/packages/dashboard-server/src/relaycast-provider.ts || echo "MISSING: os import" - - echo "" - echo "=== relaycast-provider.ts: loadRelaycastConfig ===" - sed -n '85,96p' ../relay-dashboard/packages/dashboard-server/src/relaycast-provider.ts - - echo "" - echo "=== proxy-server.ts: os import ===" - grep -n "import os\|node:os" ../relay-dashboard/packages/dashboard-server/src/proxy-server.ts || echo "MISSING: os import" - - echo "" - echo "=== proxy-server.ts: resolveRelaycastConfig ===" - grep -n -A8 "const resolveRelaycastConfig" ../relay-dashboard/packages/dashboard-server/src/proxy-server.ts | head -15 - - echo "" - echo "=== Confirm no remaining path.basename(.*dataDir) calls ===" - grep -n "path.basename.*dataDir\|basename.*resolve.*dataDir" \ - ../relay-dashboard/packages/dashboard-server/src/relaycast-provider.ts \ - ../relay-dashboard/packages/dashboard-server/src/proxy-server.ts && echo "WARN: stale cwd reference found" || echo "OK: no stale cwd references" - captureOutput: true - failOnError: false - - # ── Phase 4: Capture diff for review ───────────────────────────────────── - - - name: capture-diff - type: deterministic - dependsOn: [verify-changes] - command: > - cd ../relay-dashboard && - git diff packages/dashboard-server/src/relaycast-provider.ts - packages/dashboard-server/src/proxy-server.ts - captureOutput: true - failOnError: false - - # ── Phase 5: Code review ────────────────────────────────────────────────── - - - name: review - type: agent - agent: reviewer - dependsOn: [capture-diff, verify-changes] - task: | - Review this change to the relay-dashboard dashboard-server package. - - The goal: dashboard user now registers with os.userInfo().username (the system user - running the process) instead of path.basename(cwd) (the working directory name). - - Verification output: - {{steps.verify-changes.output}} - - Diff: - {{steps.capture-diff.output}} - - Review checklist: - 1. Both files now import `os` from `'node:os'` — correct prefix? - 2. `os.userInfo().username` is used in both `loadRelaycastConfig` and - `resolveRelaycastConfig` — no leftover `path.basename` / `projectDir` references? - 3. The `projectIdentity` variable name is preserved in both call sites? - 4. No unintended whitespace, import order, or formatting changes? - 5. The `path` import in relaycast-provider.ts is still present if used elsewhere in the file? - -errorHandling: - strategy: fail-fast - maxRetries: 1 - retryDelayMs: 5000 - -state: - backend: memory - ttlMs: 3600000 # 1 hour - -trajectories: - enabled: true - autoDecisions: true diff --git a/packages/sdk/src/examples/workflows/ralph-overnight.yaml b/packages/sdk/src/examples/workflows/ralph-overnight.yaml deleted file mode 100644 index 2ed6124c1..000000000 --- a/packages/sdk/src/examples/workflows/ralph-overnight.yaml +++ /dev/null @@ -1,421 +0,0 @@ -version: '1.0' -name: ralph-overnight -description: > - Autonomous 24-hour dev session. A product manager reads the backlog, a tech - lead plans the architecture, parallel squads implement features, a CI runner - gates every batch, and the cycle repeats until the backlog is empty or - morning arrives. Drop it on a repo before bed, wake up to merged features. - - What this showcases vs vanilla ralph: - - Hierarchical orchestration: PM → Tech Lead → Implementation Squads - - Mixed CLI team: claude thinks, codex builds, gemini audits - - Integrated CI: actual git branch, PR creation, CI status polling - - Progressive learning: each loop iteration reads prior session notes - - Human checkpoint: if stuck 3x on same story, posts a question to Slack - (or a file) and pauses for human input before continuing - - Session summary written at the end for morning review - -swarm: - pattern: hierarchical - maxConcurrency: 5 - timeoutMs: 86400000 # 24 hours — true overnight session - channel: ralph-overnight - idleNudge: - nudgeAfterMs: 600000 # 10 min — these agents think deeply - escalateAfterMs: 600000 - maxNudges: 3 - -agents: - # ── Strategic layer ──────────────────────────────────────────────────────── - - - name: product-manager - cli: claude - channels: [ralph-overnight, strategy] - role: > - Reads the backlog, prioritizes stories by impact/effort, plans the - session's scope, and writes the morning summary report. - constraints: - model: opus - - - name: tech-lead - cli: claude - channels: [ralph-overnight, engineering] - role: > - Translates PM priorities into technical tasks, resolves architectural - decisions, monitors squad progress, unblocks stuck agents, and gates PRs. - constraints: - model: sonnet - - # ── Implementation squads ────────────────────────────────────────────────── - - - name: squad-alpha-lead - cli: claude - channels: [squad-alpha, ralph-overnight] - role: 'Leads squad alpha. Coordinates two codex builders on #squad-alpha.' - constraints: - model: sonnet - - - name: squad-alpha-builder-1 - cli: codex - role: 'Implements BUILDER_1_SPEC from squad-alpha output via {{steps.squad-alpha.output}}.' - interactive: false - constraints: - model: gpt-5.3-codex - - - name: squad-alpha-builder-2 - cli: codex - role: 'Implements BUILDER_2_SPEC from squad-alpha output via {{steps.squad-alpha.output}}.' - interactive: false - constraints: - model: gpt-5.3-codex-spark - - - name: squad-beta-lead - cli: claude - channels: [squad-beta, ralph-overnight] - role: 'Leads squad beta. Coordinates two codex builders on #squad-beta.' - constraints: - model: sonnet - - - name: squad-beta-builder-1 - cli: codex - role: 'Implements BUILDER_1_SPEC from squad-beta output via {{steps.squad-beta.output}}.' - interactive: false - constraints: - model: gpt-5.3-codex - - - name: squad-beta-builder-2 - cli: codex - role: 'Implements BUILDER_2_SPEC from squad-beta output via {{steps.squad-beta.output}}.' - interactive: false - constraints: - model: gpt-5.3-codex-spark - - # ── Quality & audit ──────────────────────────────────────────────────────── - - - name: qa-engineer - cli: claude - channels: [ralph-overnight] - role: 'Writes integration tests for completed features. Runs the test suite.' - interactive: false - constraints: - model: sonnet - - - name: security-auditor - cli: gemini - channels: [ralph-overnight] - role: 'Audits all changes for security issues before PR creation.' - interactive: false - constraints: - model: gemini-2.5-pro - -workflows: - - name: overnight-session - description: > - PM planning → tech architecture → parallel squads (alpha+beta) → - QA + security audit → CI gate → PR → advance backlog → loop. - onError: continue # don't abort entire session on one story failure - preflight: - - command: test -f "${BACKLOG_PATH:-backlog.json}" - description: 'Backlog file exists' - - command: git status --porcelain - failIf: non-empty - description: 'Clean working directory for overnight session' - - command: git rev-parse --abbrev-ref HEAD - description: 'Confirm current branch' - captureOutput: true - - steps: - # ── Wave 1: PM reads backlog and plans the night's scope ───────────── - - - name: pm-planning - type: agent - agent: product-manager - task: | - Read ${BACKLOG_PATH:-backlog.json} and ${SESSION_LOG:-session-log.txt}. - - Select the highest-priority stories for a ~4-hour parallel session. - Assign to two squads working on DIFFERENT files (no conflicts): - Alpha squad: stories + file paths - Beta squad: stories + file paths - Include risk flags for any cross-squad dependencies. - - Post SESSION_PLAN to #ralph-overnight, then output: PLANNING_COMPLETE - verification: - type: output_contains - value: PLANNING_COMPLETE - retries: 1 - - # ── Wave 2: Tech lead creates feature branches + architecture notes ─── - - - name: tech-architecture - type: agent - agent: tech-lead - dependsOn: [pm-planning] - task: | - PM plan: {{steps.pm-planning.output}} - - 1. Create feature branch: git checkout -b overnight/$(date +%Y%m%d) - 2. For each story assigned to squads, write implementation notes: - - Which files to touch and why - - Any shared utilities to create first - - Test strategy (unit / integration / e2e) - 3. Flag any cross-squad dependencies that need sequencing - 4. Post architecture notes to #ralph-overnight - - Output: ARCHITECTURE_READY - verification: - type: output_contains - value: ARCHITECTURE_READY - - - name: create-branch - type: deterministic - dependsOn: [tech-architecture] - command: > - git checkout -b "overnight/$(date +%Y%m%d)" 2>/dev/null || - git checkout "overnight/$(date +%Y%m%d)" - captureOutput: true - - # ── Wave 3: Squad leads plan and produce builder specs ─────────────── - # Leads run first, produce self-contained task specs for each builder. - # Builders run in wave 4 after the lead completes, reading specs via - # {{steps.squad-alpha.output}} — no channel needed for non-interactive agents. - - - name: squad-alpha - type: agent - agent: squad-alpha-lead - dependsOn: [create-branch] - task: | - Lead squad alpha on #squad-alpha. - Architecture: {{steps.tech-architecture.output}} - PM plan (alpha): {{steps.pm-planning.output}} - - Split alpha stories into two self-contained tasks. For each, write - a complete implementation spec including: files to change, exact - changes needed, and the shell command to verify it works. - - Format your output as: - BUILDER_1_SPEC: - BUILDER_2_SPEC: - Output: ALPHA_SPECS_READY - verification: - type: output_contains - value: ALPHA_SPECS_READY - - - name: squad-beta - type: agent - agent: squad-beta-lead - dependsOn: [create-branch] - task: | - Lead squad beta on #squad-beta. - Architecture: {{steps.tech-architecture.output}} - PM plan (beta): {{steps.pm-planning.output}} - - Split beta stories into two self-contained tasks. Write complete - specs for each builder: files, changes, verification command. - - Format: - BUILDER_1_SPEC: - BUILDER_2_SPEC: - Output: BETA_SPECS_READY - verification: - type: output_contains - value: BETA_SPECS_READY - - # ── Wave 4: Builders implement from lead specs (non-interactive) ────── - # Each builder reads its spec from {{steps.squad-X.output}} — no channel - # needed. They run in parallel across both squads (same wave). - - - name: squad-alpha-b1 - type: agent - agent: squad-alpha-builder-1 - dependsOn: [squad-alpha] - task: | - Implement BUILDER_1_SPEC from the squad alpha lead output: - {{steps.squad-alpha.output}} - - Follow the spec exactly. Run the verification command to confirm. - Output: ALPHA_B1_DONE: - verification: - type: output_contains - value: ALPHA_B1_DONE - - - name: squad-alpha-b2 - type: agent - agent: squad-alpha-builder-2 - dependsOn: [squad-alpha] - task: | - Implement BUILDER_2_SPEC from the squad alpha lead output: - {{steps.squad-alpha.output}} - - Follow the spec exactly. Run the verification command to confirm. - Output: ALPHA_B2_DONE: - verification: - type: output_contains - value: ALPHA_B2_DONE - - - name: squad-beta-b1 - type: agent - agent: squad-beta-builder-1 - dependsOn: [squad-beta] - task: | - Implement BUILDER_1_SPEC from the squad beta lead output: - {{steps.squad-beta.output}} - - Follow the spec exactly. Run the verification command to confirm. - Output: BETA_B1_DONE: - verification: - type: output_contains - value: BETA_B1_DONE - - - name: squad-beta-b2 - type: agent - agent: squad-beta-builder-2 - dependsOn: [squad-beta] - task: | - Implement BUILDER_2_SPEC from the squad beta lead output: - {{steps.squad-beta.output}} - - Follow the spec exactly. Run the verification command to confirm. - Output: BETA_B2_DONE: - verification: - type: output_contains - value: BETA_B2_DONE - - # ── Wave 4: Full test suite gate ───────────────────────────────────── - - - name: ci-gate - type: deterministic - dependsOn: [squad-alpha-b1, squad-alpha-b2, squad-beta-b1, squad-beta-b2] - command: > - npm run lint 2>&1 | tail -5 && - npx tsc --noEmit 2>&1 | tail -5 && - npm test 2>&1 | tail -40 && - echo "CI_GREEN" || echo "CI_RED" - captureOutput: true - failOnError: false - - # ── Wave 5: QA writes integration tests + security audit (parallel) ── - - - name: qa-tests - type: agent - agent: qa-engineer - dependsOn: [ci-gate] - task: | - CI results: {{steps.ci-gate.output}} - Alpha built: {{steps.squad-alpha.output}} - Beta built: {{steps.squad-beta.output}} - - Write integration tests for the newly implemented features. - Focus on the happy path and the two most likely failure modes per feature. - Run the full test suite including your new tests. - - Output: QA_DONE: new tests, all green - verification: - type: output_contains - value: QA_DONE - - - name: security-audit - type: agent - agent: security-auditor - dependsOn: [ci-gate] - task: | - Security audit of all changes in this overnight session. - - Run: git diff main 2>/dev/null | head -1000 - - Check for: - - SQL/command injection vectors - - Authentication bypass risks - - Secrets or tokens in code - - Unvalidated user input reaching sensitive operations - - Insecure direct object references - - Output: SECURITY:CLEAN or SECURITY:ISSUES: - verification: - type: output_contains - value: 'SECURITY:' - - # ── Wave 6: Tech lead reviews and gates the PR ──────────────────────── - - - name: tech-review - type: agent - agent: tech-lead - dependsOn: [qa-tests, security-audit] - task: | - QA: {{steps.qa-tests.output}} - Security: {{steps.security-audit.output}} - CI: {{steps.ci-gate.output}} - Alpha: {{steps.squad-alpha.output}} - Beta: {{steps.squad-beta.output}} - - If security issues found: fix them first, then output TECH_REVIEW:PASS - If CI is red: identify root cause, apply fix, re-run test, output TECH_REVIEW:PASS - If all green: output TECH_REVIEW:PASS - - Do NOT output TECH_REVIEW:PASS with outstanding security issues. - verification: - type: output_contains - value: TECH_REVIEW:PASS - maxIterations: 2 - - # ── Wave 7: Commit, push, create PR ─────────────────────────────────── - - - name: push-and-pr - type: deterministic - dependsOn: [tech-review] - command: > - git add -A && - git commit -m "feat: overnight session $(date +%Y-%m-%d) — autonomous batch" && - git push -u origin HEAD 2>&1 && - gh pr create --title "Overnight session $(date +%Y-%m-%d)" \ - --body "Autonomous dev session. See session-log.txt for details." \ - --draft 2>&1 || - echo "PR_CREATED_OR_UPDATED" - captureOutput: true - failOnError: false - - # ── Wave 8: PM writes morning summary ──────────────────────────────── - - - name: morning-summary - type: agent - agent: product-manager - dependsOn: [push-and-pr] - task: | - Write the morning summary to ${SESSION_LOG:-session-log.txt}. - - Results: alpha={{steps.squad-alpha.output}} | beta={{steps.squad-beta.output}} - CI={{steps.ci-gate.output}} | QA={{steps.qa-tests.output}} - Security={{steps.security-audit.output}} | PR={{steps.push-and-pr.output}} - - Include: completed stories, CI/quality status, learnings, next priorities. - Mark completed stories done in ${BACKLOG_PATH:-backlog.json}. - - Output: SESSION_COMPLETE - verification: - type: output_contains - value: SESSION_COMPLETE - -coordination: - barriers: - - name: squads-complete - waitFor: [squad-alpha-b1, squad-alpha-b2, squad-beta-b1, squad-beta-b2] - timeoutMs: 7200000 - - name: quality-gates-complete - waitFor: [qa-tests, security-audit] - timeoutMs: 3600000 - -state: - backend: memory - ttlMs: 86400000 - -errorHandling: - strategy: continue # one story failing shouldn't abort the whole night - maxRetries: 1 - retryDelayMs: 10000 - notifyChannel: ralph-overnight - -trajectories: - enabled: true - reflectOnBarriers: true - reflectOnConverge: true - autoDecisions: true diff --git a/packages/sdk/src/examples/workflows/ralph-swarm.yaml b/packages/sdk/src/examples/workflows/ralph-swarm.yaml deleted file mode 100644 index 652744d67..000000000 --- a/packages/sdk/src/examples/workflows/ralph-swarm.yaml +++ /dev/null @@ -1,411 +0,0 @@ -version: '1.0' -name: ralph-swarm -description: > - Parallel Implementation Squad. A tech lead decomposes the PRD into atomic - tasks, five codex workers implement them simultaneously, three specialized - reviewers run in parallel, a consensus gate synthesizes their verdicts, and - the cycle repeats until the backlog is empty. - - What this showcases vs vanilla ralph: - - Fan-out to N parallel workers (not one at a time) - - Multi-model squad: codex workers + claude reviewers + gemini security - - Real consensus: all 3 reviewers must pass before merge - - Tech lead stays active throughout — course-corrects workers in real-time - - Barriers coordinate the fan-in before the next wave starts - - The whole backlog processed in one autonomous overnight session - -swarm: - pattern: dag - maxConcurrency: 5 - timeoutMs: 14400000 # 4 hours — big PRDs take time - channel: ralph-swarm - idleNudge: - nudgeAfterMs: 300000 - escalateAfterMs: 300000 - maxNudges: 2 - -agents: - # ── Leadership ───────────────────────────────────────────────────────────── - - - name: tech-lead - cli: claude - channels: [ralph-swarm, planning] - role: 'Decomposes PRD into atomic tasks, assigns to workers, reviews integration.' - constraints: - model: opus - - - name: integration-lead - cli: claude - channels: [ralph-swarm] - role: 'Integrates parallel worker output, resolves conflicts, gates the PR.' - constraints: - model: sonnet - - # ── Implementation workers ───────────────────────────────────────────────── - # Each gets one atomic task. interactive: false means one-shot, exits cleanly. - - - name: worker-1 - cli: codex - role: 'Implements assigned atomic task.' - interactive: false - constraints: - model: gpt-5.3-codex - - - name: worker-2 - cli: codex - role: 'Implements assigned atomic task.' - interactive: false - constraints: - model: gpt-5.3-codex - - - name: worker-3 - cli: codex - role: 'Implements assigned atomic task.' - interactive: false - constraints: - model: gpt-5.3-codex - - - name: worker-4 - cli: codex - role: 'Implements assigned atomic task.' - interactive: false - constraints: - model: gpt-5.3-codex-spark - - - name: worker-5 - cli: codex - role: 'Implements assigned atomic task.' - interactive: false - constraints: - model: gpt-5.3-codex-spark - - # ── Review squad ─────────────────────────────────────────────────────────── - - - name: reviewer-correctness - cli: claude - role: 'Reviews logic, tests, edge cases.' - interactive: false - constraints: - model: sonnet - - - name: reviewer-architecture - cli: claude - role: 'Reviews coupling, abstractions, patterns.' - interactive: false - constraints: - model: opus - - - name: reviewer-security - cli: gemini - role: 'Reviews attack surface, injection, secrets.' - interactive: false - constraints: - model: gemini-2.5-flash - -workflows: - - name: swarm-loop - description: > - Decompose → fan-out workers → deterministic test gate → parallel review - → consensus → integration → commit. Repeat until backlog empty. - onError: retry - preflight: - - command: test -f "${PRD_PATH:-prd.json}" - description: 'PRD file exists' - - command: git status --porcelain - failIf: non-empty - description: 'Clean working directory' - - steps: - # ── Wave 1: Decompose PRD into atomic tasks ─────────────────────────── - - - name: decompose - type: agent - agent: tech-lead - task: | - Read ${PRD_PATH:-prd.json} (incomplete stories) and - ${PROGRESS_PATH:-progress.txt} (learnings from previous batches). - - Decompose the next 5 stories into atomic, independent tasks. - Each task: one coding session, no shared files with other tasks, - a shell verify command to confirm it works. - - Output a JSON block: {"batch":[{"id":"task-N","title":"...", - "files":["src/..."],"prompt":"full impl instruction","verify":"cmd"}]} - - Post the decomposition to #ralph-swarm so all workers can read it. - Output: DECOMPOSED - verification: - type: output_contains - value: DECOMPOSED - retries: 1 - - # ── Wave 2: Five workers implement in parallel ──────────────────────── - # Each worker gets the full decomposition and self-assigns task N. - - - name: work-1 - type: agent - agent: worker-1 - dependsOn: [decompose] - task: | - Tech lead decomposition: {{steps.decompose.output}} - - Implement task-1 from the decomposition above. - Follow all existing code conventions in this repo. - Run the verification command from the task spec to confirm it works. - Output: TASK_1_DONE: - verification: - type: output_contains - value: TASK_1_DONE - - - name: work-2 - type: agent - agent: worker-2 - dependsOn: [decompose] - task: | - Tech lead decomposition: {{steps.decompose.output}} - - Implement task-2 from the decomposition above. - Follow all existing code conventions in this repo. - Run the verification command from the task spec to confirm it works. - Output: TASK_2_DONE: - verification: - type: output_contains - value: TASK_2_DONE - - - name: work-3 - type: agent - agent: worker-3 - dependsOn: [decompose] - task: | - Tech lead decomposition: {{steps.decompose.output}} - - Implement task-3 from the decomposition above. If fewer than 3 tasks - exist in the batch, output: TASK_3_DONE:skipped - Follow all existing code conventions in this repo. - Output: TASK_3_DONE: - verification: - type: output_contains - value: TASK_3_DONE - - - name: work-4 - type: agent - agent: worker-4 - dependsOn: [decompose] - task: | - Tech lead decomposition: {{steps.decompose.output}} - - Implement task-4 from the decomposition above. If fewer than 4 tasks - exist in the batch, output: TASK_4_DONE:skipped - Output: TASK_4_DONE: - verification: - type: output_contains - value: TASK_4_DONE - - - name: work-5 - type: agent - agent: worker-5 - dependsOn: [decompose] - task: | - Tech lead decomposition: {{steps.decompose.output}} - - Implement task-5 from the decomposition above. If fewer than 5 tasks - exist in the batch, output: TASK_5_DONE:skipped - Output: TASK_5_DONE: - verification: - type: output_contains - value: TASK_5_DONE - - # ── Wave 3: Quality gate — run the full test suite ─────────────────── - - - name: test-gate - type: deterministic - dependsOn: [work-1, work-2, work-3, work-4, work-5] - command: > - npm run lint 2>&1 | tail -10 && - npx tsc --noEmit 2>&1 | tail -10 && - npm test 2>&1 | tail -30 && - echo "GATE_PASSED" || echo "GATE_FAILED" - captureOutput: true - failOnError: false - - # ── Wave 4: Fix failures if gate failed ───────────────────────────── - - - name: fix-failures - type: agent - agent: tech-lead - dependsOn: [test-gate] - task: | - Test gate results: {{steps.test-gate.output}} - - Worker summaries: - - Task 1: {{steps.work-1.output}} - - Task 2: {{steps.work-2.output}} - - Task 3: {{steps.work-3.output}} - - Task 4: {{steps.work-4.output}} - - Task 5: {{steps.work-5.output}} - - If GATE_PASSED: output FIX_DONE:none - If GATE_FAILED: identify which worker's change broke the gate, - apply the minimal fix, re-run the failing tests, output FIX_DONE: - verification: - type: output_contains - value: FIX_DONE - maxIterations: 3 - retries: 1 - - # ── Wave 5: Capture diff for all three reviewers ───────────────────── - - - name: capture-diff - type: deterministic - dependsOn: [fix-failures] - command: git diff HEAD 2>/dev/null | head -500 - captureOutput: true - - # ── Wave 6: Three parallel reviewers ───────────────────────────────── - - - name: review-correctness - type: agent - agent: reviewer-correctness - dependsOn: [capture-diff] - task: | - Review this diff for logic correctness, test coverage, and edge cases. - - Worker summaries: {{steps.fix-failures.output}} - Diff: {{steps.capture-diff.output}} - - Be specific. Reference file:line for any issues. - Output: CORRECTNESS:PASS or CORRECTNESS:FAIL: - verification: - type: output_contains - value: 'CORRECTNESS:' - - - name: review-architecture - type: agent - agent: reviewer-architecture - dependsOn: [capture-diff] - task: | - Review this diff for architecture quality, coupling, and long-term maintainability. - - Worker summaries: {{steps.fix-failures.output}} - Diff: {{steps.capture-diff.output}} - - Output: ARCHITECTURE:PASS or ARCHITECTURE:FAIL: - verification: - type: output_contains - value: 'ARCHITECTURE:' - - - name: review-security - type: agent - agent: reviewer-security - dependsOn: [capture-diff] - task: | - Security review: check for injection, secrets, auth bypass, input validation. - - Diff: {{steps.capture-diff.output}} - - Output: SECURITY:PASS or SECURITY:FAIL: - verification: - type: output_contains - value: 'SECURITY:' - - # ── Wave 7: Consensus — all three must pass ────────────────────────── - - - name: consensus - type: agent - agent: integration-lead - dependsOn: [review-correctness, review-architecture, review-security] - task: | - Synthesize the three reviews and decide: merge or rework? - - Correctness: {{steps.review-correctness.output}} - Architecture: {{steps.review-architecture.output}} - Security: {{steps.review-security.output}} - - If ALL THREE are PASS: output CONSENSUS:MERGE - If ANY failed: output CONSENSUS:REWORK: - verification: - type: output_contains - value: 'CONSENSUS:' - - # ── Wave 8: Address rework if needed, then commit ──────────────────── - - - name: address-rework - type: agent - agent: tech-lead - dependsOn: [consensus] - task: | - Consensus verdict: {{steps.consensus.output}} - - If CONSENSUS:MERGE — output REWORK_DONE:none - If CONSENSUS:REWORK — fix all issues listed in the verdict. - Run tests again to confirm green, then output REWORK_DONE: - verification: - type: output_contains - value: REWORK_DONE - maxIterations: 2 - - - name: final-test - type: deterministic - dependsOn: [address-rework] - command: npm test 2>&1 | tail -20 && echo "FINAL_GREEN" || echo "FINAL_RED" - captureOutput: true - failOnError: false - - - name: commit-batch - type: deterministic - dependsOn: [final-test] - command: > - git add -A && - git commit -m "feat: parallel swarm batch — $(date +%Y%m%d-%H%M)" 2>/dev/null || - echo "nothing to commit" - - - name: record-and-advance - type: agent - agent: tech-lead - dependsOn: [commit-batch] - task: | - Final results: - - Test gate: {{steps.test-gate.output}} - - Consensus: {{steps.consensus.output}} - - Final: {{steps.final-test.output}} - - 1. Append to ${PROGRESS_PATH:-progress.txt}: - - Which tasks succeeded / failed this batch - - Patterns in the codebase worth remembering - - What caused rework (if any) - - 2. Mark completed stories as passed in ${PRD_PATH:-prd.json} - - 3. Check if any incomplete stories remain. If yes, output BATCH_DONE:more - If all done, output BATCH_DONE:complete - - Output: BATCH_DONE:... - verification: - type: output_contains - value: 'BATCH_DONE:' - -coordination: - barriers: - - name: workers-complete - waitFor: [work-1, work-2, work-3, work-4, work-5] - timeoutMs: 3600000 - - name: reviews-complete - waitFor: [review-correctness, review-architecture, review-security] - timeoutMs: 1800000 - consensusStrategy: unanimous - -state: - backend: memory - ttlMs: 86400000 - -errorHandling: - strategy: retry - maxRetries: 1 - retryDelayMs: 10000 - notifyChannel: ralph-swarm - -trajectories: - enabled: true - reflectOnBarriers: true - reflectOnConverge: true - autoDecisions: true diff --git a/packages/sdk/src/examples/workflows/ralph-tdd.yaml b/packages/sdk/src/examples/workflows/ralph-tdd.yaml deleted file mode 100644 index 5bb6ade64..000000000 --- a/packages/sdk/src/examples/workflows/ralph-tdd.yaml +++ /dev/null @@ -1,259 +0,0 @@ -version: '1.0' -name: ralph-tdd -description: > - Test-Driven Ralph Loop. An architect writes failing tests first, a builder - implements until they pass, a quality gate runs the real test suite, and - the loop repeats with review feedback until every story is green. - - What this showcases vs vanilla ralph: - - Tests are written BEFORE implementation (true TDD) - - Deterministic quality gate: actual `npm test` / `tsc` output chains into next step - - Parallel arch + security reviewer — fresh eyes, no confirmation bias - - maxIterations loop prevents infinite spinning - - Learnings from each round feed back into the next as {{steps.X.output}} - -swarm: - pattern: review-loop - maxConcurrency: 4 - timeoutMs: 7200000 - channel: ralph-tdd - idleNudge: - nudgeAfterMs: 300000 - escalateAfterMs: 300000 - maxNudges: 2 - -agents: - - name: architect - cli: claude - channels: [ralph-tdd] - role: "Writes failing tests, plans implementation, reviews the builder's output." - constraints: - model: sonnet - - - name: builder - cli: codex - role: "Implements code to make the architect's failing tests pass." - interactive: false - constraints: - model: gpt-5.3-codex - - - name: reviewer - cli: claude - channels: [ralph-tdd] - role: 'Independent code quality + security review. No confirmation bias.' - interactive: false - constraints: - model: opus - -workflows: - - name: tdd-loop - description: > - For each story in the PRD: write tests → implement → test gate → - parallel review → address feedback → loop until green. - onError: retry - preflight: - - command: test -f "${PRD_PATH:-prd.json}" - description: 'PRD file exists' - - command: git status --porcelain - failIf: non-empty - description: 'Clean working directory before starting' - - steps: - # ── Phase 1: Architect writes failing tests ────────────────────────── - - - name: write-tests - type: agent - agent: architect - task: | - Read the next unpassed story from ${PRD_PATH:-prd.json}. - Read previous learnings from ${PROGRESS_PATH:-progress.txt} if it exists. - - Write FAILING tests first (TDD). The tests must: - - Cover all acceptance criteria exactly - - Use the project's existing test framework (detect from package.json / pytest / go test) - - Fail when run right now (implementation doesn't exist yet) - - Have clear assertion messages so failures are informative - - After writing tests, run them to confirm they fail: - npm test 2>&1 || pytest 2>&1 || go test ./... 2>&1 - - Post the test file paths and what each test verifies to #ralph-tdd. - Output: TESTS_WRITTEN - verification: - type: output_contains - value: TESTS_WRITTEN - retries: 1 - - # ── Phase 2: Capture failing test output for builder context ───────── - - - name: capture-failures - type: deterministic - dependsOn: [write-tests] - command: npm test 2>&1 || pytest 2>&1 || go test ./... 2>&1 || echo "tests ran" - captureOutput: true - failOnError: false - - # ── Phase 3: Builder implements to make tests pass ─────────────────── - - - name: implement - type: agent - agent: builder - dependsOn: [capture-failures] - task: | - The architect has written failing tests. Your job: make them pass. - - Failing test output: - {{steps.capture-failures.output}} - - Implementation plan from #ralph-tdd: - {{steps.write-tests.output}} - - Rules: - - Do NOT modify the test files - - Implement only what's needed to pass the tests - - Follow existing code patterns and conventions in this repo - - Run the tests yourself to verify they pass before finishing - - Output: IMPLEMENTATION_COMPLETE - verification: - type: output_contains - value: IMPLEMENTATION_COMPLETE - retries: 2 - maxIterations: 3 - - # ── Phase 4: Quality gate — real test suite ────────────────────────── - - - name: test-gate - type: deterministic - dependsOn: [implement] - command: > - npm run lint 2>&1 | tail -5; - npm run type-check 2>&1 | tail -5; - npm test 2>&1 - captureOutput: true - failOnError: false - - # ── Phase 5: Parallel review (arch + independent reviewer) ─────────── - - - name: arch-review - type: agent - agent: architect - dependsOn: [test-gate] - task: | - Review the implementation for correctness, completeness, and test coverage. - - Test results: - {{steps.test-gate.output}} - - Check: - 1. All acceptance criteria met (read the story from prd.json) - 2. No test was skipped or marked pending - 3. Edge cases handled - 4. No obvious performance issues - - Output: ARCH_REVIEW:PASS if all good, ARCH_REVIEW:FAIL: if not. - verification: - type: output_contains - value: 'ARCH_REVIEW:' - - - name: sec-review - type: agent - agent: reviewer - dependsOn: [test-gate] - task: | - Security and code quality review of the implementation. - - Test results: {{steps.test-gate.output}} - - Check git diff for: - - Input validation and sanitization - - No hardcoded secrets or credentials - - Error handling doesn't leak internals - - No obvious injection vectors - - Output: SEC_REVIEW:PASS or SEC_REVIEW:FAIL: - verification: - type: output_contains - value: 'SEC_REVIEW:' - - # ── Phase 6: Address feedback and finalize ─────────────────────────── - - - name: address-feedback - type: agent - agent: builder - dependsOn: [arch-review, sec-review] - task: | - Address all review feedback, then run the full test suite. - - Architecture review: {{steps.arch-review.output}} - Security review: {{steps.sec-review.output}} - - If both are PASS, output: ALL_REVIEWS_PASSED - Otherwise fix the issues, run tests again to confirm still green, - then output: FEEDBACK_ADDRESSED - verification: - type: output_contains - value: 'ALL_REVIEWS_PASSED|FEEDBACK_ADDRESSED' - maxIterations: 2 - - # ── Phase 7: Final test run + commit ───────────────────────────────── - - - name: final-test - type: deterministic - dependsOn: [address-feedback] - command: npm test 2>&1 && echo "ALL_GREEN" || echo "STILL_FAILING" - captureOutput: true - failOnError: false - - - name: commit - type: deterministic - dependsOn: [final-test] - command: > - git add -A && - git commit -m "feat: implement story from prd (TDD)" 2>/dev/null || - echo "nothing to commit" - - - name: record-learnings - type: agent - agent: architect - dependsOn: [commit] - task: | - Record learnings for the next story iteration. - - Final test result: {{steps.final-test.output}} - Reviews: {{steps.arch-review.output}} | {{steps.sec-review.output}} - - Append to ${PROGRESS_PATH:-progress.txt}: - - What approach worked for this story - - Any patterns discovered in this codebase - - Gotchas or non-obvious things to remember - - Then mark this story as passed in ${PRD_PATH:-prd.json} - by setting its "passes" field to true. - - Output: DONE - verification: - type: output_contains - value: DONE - -coordination: - barriers: - - name: reviews-complete - waitFor: [arch-review, sec-review] - timeoutMs: 1800000 - consensusStrategy: unanimous - -state: - backend: memory - ttlMs: 86400000 - -errorHandling: - strategy: retry - maxRetries: 2 - retryDelayMs: 5000 - notifyChannel: ralph-tdd - -trajectories: - enabled: true - reflectOnBarriers: true - autoDecisions: true diff --git a/packages/sdk/src/examples/workflows/runner-idle-refactor.yaml b/packages/sdk/src/examples/workflows/runner-idle-refactor.yaml deleted file mode 100644 index ee3a65eaa..000000000 --- a/packages/sdk/src/examples/workflows/runner-idle-refactor.yaml +++ /dev/null @@ -1,306 +0,0 @@ -version: '1.0' -name: runner-idle-refactor -description: > - Refactors WorkflowRunner with two changes: - 1. Remove agent pre-registration (preflightAgents) — agents spawn only when their step runs. - 2. Idle = done — when no idleNudge config is set, race waitForExit vs waitForIdle; - if idle fires first the step completes immediately. - - Workflow: read context → implement → update tests → type-check → run tests - → fix if broken → final test run → review. - -swarm: - pattern: pipeline - maxConcurrency: 3 - timeoutMs: 1800000 # 30 min - channel: wf-runner-refactor - -# No idleNudge — each agent is spawned per step and exits when done. -# Non-interactive preset for all agents (pure code changes, no relay tools needed). - -agents: - - name: implementer - cli: codex - preset: worker - role: 'Makes the two targeted edits to packages/sdk/src/workflows/runner.ts.' - constraints: - model: gpt-5.3-codex - - - name: test-writer - cli: codex - preset: worker - role: 'Adds new test cases to idle-nudge.test.ts covering the idle=done behavior.' - constraints: - model: gpt-5.3-codex - - - name: fixer - cli: codex - preset: worker - role: 'Fixes TypeScript errors or failing tests found in the test run.' - constraints: - model: gpt-5.3-codex - - - name: reviewer - cli: claude - preset: reviewer - role: 'Reviews the diff for correctness, edge cases, and backwards compatibility.' - constraints: - model: sonnet - -workflows: - - name: default - onError: continue - - steps: - # ── Phase 1: Capture context for agents ─────────────────────────────── - - - name: read-prespawn-block - type: deterministic - command: > - grep -n "Pre-register all interactive\|preflightAgents\|Agent pre-registration" - packages/sdk/src/workflows/runner.ts | head -10 && - echo "---" && - awk '/Pre-register all interactive agent steps/,/Agent pre-registration complete/{print NR": "$0}' - packages/sdk/src/workflows/runner.ts - captureOutput: true - failOnError: false - - - name: read-spawn-comment - type: deterministic - command: > - grep -n "cache.*hit\|preflightAgents\|token cache" - packages/sdk/src/workflows/runner.ts | head -10 - captureOutput: true - failOnError: false - - - name: read-idle-method - type: deterministic - command: > - awk '/private async waitForExitWithIdleNudging/,/^ \}$/{print NR": "$0}' - packages/sdk/src/workflows/runner.ts | head -80 - captureOutput: true - failOnError: false - - - name: read-test-file - type: deterministic - dependsOn: [read-prespawn-block] - command: cat packages/sdk/src/__tests__/idle-nudge.test.ts - captureOutput: true - failOnError: false - - # ── Phase 2: Implement runner.ts changes ────────────────────────────── - - - name: implement - type: agent - agent: implementer - dependsOn: [read-prespawn-block, read-spawn-comment, read-idle-method] - task: | - Make exactly two changes to `packages/sdk/src/workflows/runner.ts`: - - ── Change 1: Remove agent pre-registration block ── - - Find the block that starts with this comment and delete it entirely: - // Pre-register all interactive agent steps with Relaycast before execution. - - The block ends with: - this.log('Agent pre-registration complete'); - followed by the closing `}` of the if statement. - - Here is what the block looks like (with line numbers for reference): - {{steps.read-prespawn-block.output}} - - Also find and update the stale comment inside `spawnAndWait` that says something - about "token cache" / "cache hits" / "preflightAgents" — those references are now - stale. Keep just: "Deterministic name: step name + first 8 chars of run ID." - - Stale comment location: - {{steps.read-spawn-comment.output}} - - ── Change 2: Idle = done in waitForExitWithIdleNudging ── - - Find the `waitForExitWithIdleNudging` method. In the branch where `nudgeConfig` is - absent, replace the simple `return agent.waitForExit(timeoutMs)` with a race between - `waitForExit` and `waitForIdle`. If idle wins, release the agent and return 'released'. - - Current method (with line numbers): - {{steps.read-idle-method.output}} - - Replace the no-nudge-config branch with: - ```typescript - if (!nudgeConfig) { - // Idle = done: race exit against idle. Whichever fires first completes the step. - const result = await Promise.race([ - agent.waitForExit(timeoutMs).then((r) => ({ kind: 'exit' as const, result: r })), - agent.waitForIdle(timeoutMs).then((r) => ({ kind: 'idle' as const, result: r })), - ]); - if (result.kind === 'idle' && result.result === 'idle') { - this.log(`[${step.name}] Agent "${agent.name}" went idle — treating as complete`); - this.postToChannel(`**[${step.name}]** Agent \`${agent.name}\` idle — treating as complete`); - await agent.release(); - return 'released'; - } - // Exit won the race, or idle returned 'exited'/'timeout' — pass through. - return result.result as 'exited' | 'timeout' | 'released'; - } - ``` - - Only modify these two things. Do not change anything else. - verification: - type: exit_code - - # ── Phase 3: Update tests ────────────────────────────────────────────── - - - name: update-tests - type: agent - agent: test-writer - dependsOn: [implement, read-test-file] - task: | - Add new test cases to `packages/sdk/src/__tests__/idle-nudge.test.ts` for the - new idle=done behavior. Do NOT modify any existing tests — only append new ones. - - Current test file: - {{steps.read-test-file.output}} - - Background on the change: - - When `idleNudge` config is absent in the swarm config, `waitForExitWithIdleNudging` - now races `waitForExit` vs `waitForIdle`. - - If `waitForIdle` resolves with `'idle'` first → `agent.release()` is called and - the method returns `'released'`. - - If `waitForExit` resolves first → that result is returned as-is. - - If `waitForIdle` resolves with `'exited'` or `'timeout'` → exit result wins. - - The mock infrastructure already has `waitForExitFn` and `waitForIdleFn` that you - can control. Add a new describe block: `'Idle = done (no idleNudge config)'` with: - - Test 1 — idle fires first: - waitForIdleFn resolves 'idle' immediately - waitForExitFn never resolves (returns a never-settling promise or very long timeout) - → workflow run should complete (step succeeds, agent.release() is called) - - Test 2 — exit fires first: - waitForExitFn resolves 'exited' immediately - waitForIdleFn resolves 'timeout' (or never fires before exit) - → workflow run should complete, agent.release() NOT called by idle logic - - Test 3 — both timeout: - waitForExitFn resolves 'timeout' - waitForIdleFn resolves 'timeout' - → step should fail with a timeout error - - Use the existing `makeConfig()` and `makeDb()` helpers. Use the existing - `WorkflowRunner` import pattern already in the file. - verification: - type: exit_code - - # ── Phase 4: Type-check ──────────────────────────────────────────────── - - - name: type-check - type: deterministic - dependsOn: [implement, update-tests] - command: > - cd packages/sdk && - npx tsc --noEmit 2>&1 | tail -30 && - echo "TYPE_CHECK_PASSED" || echo "TYPE_CHECK_FAILED" - captureOutput: true - failOnError: false - - # ── Phase 5: Run vitest ──────────────────────────────────────────────── - - - name: run-tests - type: deterministic - dependsOn: [type-check] - command: >- - cd packages/sdk && { npx vitest run 2>&1; echo "EXIT:$?"; } | tail -80 - captureOutput: true - failOnError: false - - # ── Phase 6: Fix failures ────────────────────────────────────────────── - - - name: fix-if-broken - type: agent - agent: fixer - dependsOn: [run-tests, type-check] - task: | - Review the type-check and test results. Fix any failures. - - Type-check: - {{steps.type-check.output}} - - Test run: - {{steps.run-tests.output}} - - If both show PASSED / ALL_TESTS_PASSED, output: FIX_DONE:none - - Otherwise: - - For TypeScript errors: fix packages/sdk/src/workflows/runner.ts - - For failing tests: fix packages/sdk/src/__tests__/idle-nudge.test.ts - - Do NOT change the intended behavior — only fix syntax/type/mock issues - - verification: - type: exit_code - maxIterations: 2 - - # ── Phase 7: Final test run ──────────────────────────────────────────── - - - name: final-tests - type: deterministic - dependsOn: [fix-if-broken] - command: >- - cd packages/sdk && { npx vitest run 2>&1; echo "EXIT:$?"; } | tail -60 - captureOutput: true - failOnError: false - - # ── Phase 8: Capture diff for review ────────────────────────────────── - - - name: capture-diff - type: deterministic - dependsOn: [final-tests] - command: > - git diff packages/sdk/src/workflows/runner.ts - packages/sdk/src/__tests__/idle-nudge.test.ts - captureOutput: true - failOnError: false - - # ── Phase 9: Code review ─────────────────────────────────────────────── - - - name: review - type: agent - agent: reviewer - dependsOn: [capture-diff, final-tests] - task: | - Review these changes to WorkflowRunner. Be precise and thorough. - - Final test result: - {{steps.final-tests.output}} - - Diff: - {{steps.capture-diff.output}} - - Review checklist: - 1. Pre-registration block is fully gone — no leftover `preflightAgents` calls or - stale comments referencing "token cache" / "cache hits" - 2. Race logic in `waitForExitWithIdleNudging`: both promises created before awaiting? - No floating promise leak if exit wins before idle settles? - 3. Edge case: `waitForIdle` returns 'exited' (agent already gone) — does the cast - `result.result as 'exited' | 'timeout' | 'released'` hold? Check the union types. - 4. Edge case: `waitForIdle` returns 'timeout' and exit won — correct fallthrough? - 5. New tests: do they actually cover all three cases with proper mock wiring? - 6. No unintended changes to the nudge path (idleNudge config present) — that code - should be unchanged. - - verification: - type: exit_code - -errorHandling: - strategy: continue # best-effort — don't abort if an optional step fails - maxRetries: 1 - retryDelayMs: 3000 - notifyChannel: wf-runner-refactor - -state: - backend: memory - ttlMs: 7200000 # 2 hours - -trajectories: - enabled: true - autoDecisions: true diff --git a/packages/sdk/src/index.ts b/packages/sdk/src/index.ts index 3ac0bcaa0..829fb31eb 100644 --- a/packages/sdk/src/index.ts +++ b/packages/sdk/src/index.ts @@ -44,12 +44,11 @@ export * from './consensus.js'; export * from './shadow.js'; export * from './relay-adapter.js'; export * from './harness.js'; -export * from './workflows/index.js'; export * from './spawn-from-env.js'; export * from './cli-registry.js'; export * from './cli-resolver.js'; export * from './personas.js'; export * as github from './github.js'; -export { createGitHubStep, GitHubClient } from './github.js'; +export { GitHubClient } from '@agent-relay/github-primitive'; export * as slack from './slack.js'; -export { createSlackStep, SlackClient } from './slack.js'; +export { SlackClient } from '@agent-relay/slack-primitive'; diff --git a/packages/sdk/src/provisioner/__tests__/audit.test.ts b/packages/sdk/src/provisioner/__tests__/audit.test.ts deleted file mode 100644 index 7ca616158..000000000 --- a/packages/sdk/src/provisioner/__tests__/audit.test.ts +++ /dev/null @@ -1,62 +0,0 @@ -import assert from 'node:assert/strict'; -import { mkdtemp, mkdir, readFile, rm, writeFile } from 'node:fs/promises'; -import { tmpdir } from 'node:os'; -import path from 'node:path'; -import test from 'node:test'; - -import { createLocalJwksKeyPair, provisionWorkflowAgents } from '../index.js'; - -async function createWorkspace(): Promise<{ dir: string; cleanup: () => Promise }> { - const dir = await mkdtemp(path.join(tmpdir(), 'relay-provisioner-audit-')); - await mkdir(path.join(dir, 'src'), { recursive: true }); - await writeFile(path.join(dir, 'src', 'index.ts'), 'export const value = 1;\n'); - - return { - dir, - cleanup: () => rm(dir, { recursive: true, force: true }), - }; -} - -test('provisionWorkflowAgents writes a permission audit without token values', async () => { - const workspace = await createWorkspace(); - - try { - const result = await provisionWorkflowAgents({ - tokenSigningKey: createLocalJwksKeyPair(), - workspace: 'audit-workspace', - projectDir: workspace.dir, - relayfileBaseUrl: 'http://127.0.0.1:8080', - agents: { - worker: { - access: 'readonly', - }, - }, - skipSeeding: true, - skipMount: true, - }); - - const auditPath = path.join(workspace.dir, '.agent-relay', 'permission-audit.json'); - const auditRaw = await readFile(auditPath, 'utf8'); - const auditJson = JSON.parse(auditRaw) as { - entries: Array<{ - agentName: string; - action: string; - details: Record; - }>; - }; - - assert.ok(auditJson.entries.length >= 3); - assert.deepEqual( - auditJson.entries.map((entry) => `${entry.agentName}:${entry.action}`), - ['worker:resolve', 'worker:mint', 'relay-admin:mint'] - ); - assert.equal( - auditJson.entries[1]?.details.jwtPath, - path.join(workspace.dir, '.relay', 'tokens', 'worker.jwt') - ); - assert.ok(!auditRaw.includes(result.agents.worker.token)); - assert.ok(!auditRaw.includes(result.adminToken)); - } finally { - await workspace.cleanup(); - } -}); diff --git a/packages/sdk/src/provisioner/__tests__/seeder.test.ts b/packages/sdk/src/provisioner/__tests__/seeder.test.ts deleted file mode 100644 index a651074ee..000000000 --- a/packages/sdk/src/provisioner/__tests__/seeder.test.ts +++ /dev/null @@ -1,284 +0,0 @@ -import assert from 'node:assert/strict'; -import { mkdtemp, mkdir, rm, writeFile } from 'node:fs/promises'; -import { tmpdir } from 'node:os'; -import path from 'node:path'; -import test, { afterEach, mock } from 'node:test'; - -import { RelayFileClient } from '@relayfile/sdk'; - -import { seedAclRules, seedWorkflowAcls, seedWorkspace } from '../seeder.js'; - -interface FetchResponseShape { - ok: boolean; - status: number; - text(): Promise; -} - -const originalFetch = globalThis.fetch; - -async function createWorkspace( - files: Record -): Promise<{ dir: string; cleanup: () => Promise }> { - const dir = await mkdtemp(path.join(tmpdir(), 'relay-seeder-')); - - for (const [relativePath, content] of Object.entries(files)) { - const filePath = path.join(dir, relativePath); - await mkdir(path.dirname(filePath), { recursive: true }); - await writeFile(filePath, content); - } - - return { - dir, - cleanup: () => rm(dir, { recursive: true, force: true }), - }; -} - -function createFetchResponse(body: string, status = 200): FetchResponseShape { - return { - ok: status >= 200 && status < 300, - status, - async text(): Promise { - return body; - }, - }; -} - -function parseFetchBody(fetchMock: ReturnType): { files: unknown[] } { - assert.equal(fetchMock.mock.calls.length, 1); - const [, options] = fetchMock.mock.calls[0]!.arguments as [string, RequestInit]; - assert.equal(typeof options.body, 'string'); - return JSON.parse(options.body as string) as { files: unknown[] }; -} - -afterEach(() => { - mock.restoreAll(); - globalThis.fetch = originalFetch; -}); - -test('seedWorkspace posts the expected HTTP payload after SDK fallback', async () => { - const workspace = await createWorkspace({ - 'alpha.txt': 'alpha payload', - 'binary.bin': Buffer.from([0xff, 0x00, 0x61]), - '.relay/ignored.txt': 'skip', - '.git/config': 'skip', - 'node_modules/pkg/index.js': 'skip', - 'custom-skip/ignored.txt': 'skip', - '.relayfile-mount-state.json': '{"skip":true}', - }); - - try { - mock.method(RelayFileClient.prototype, 'bulkWrite', async () => { - throw new Error('fall back to HTTP'); - }); - - const fetchMock = mock.method(globalThis, 'fetch', async () => - createFetchResponse(JSON.stringify({ written: 2, errorCount: 0, errors: [] })) - ); - - const seededCount = await seedWorkspace( - 'https://relay.example///', - 'admin-token', - ' workspace-123 ', - workspace.dir, - ['custom-skip'] - ); - - assert.equal(seededCount, 2); - assert.equal(fetchMock.mock.calls.length, 1); - - const [url, options] = fetchMock.mock.calls[0]!.arguments as [string, RequestInit]; - assert.equal(url, 'https://relay.example/v1/workspaces/workspace-123/fs/bulk'); - assert.equal(options.method, 'POST'); - assert.deepEqual(options.headers, { - Authorization: 'Bearer admin-token', - 'Content-Type': 'application/json', - 'X-Correlation-Id': options.headers && (options.headers as Record)['X-Correlation-Id'], - }); - assert.match( - (options.headers as Record)['X-Correlation-Id'], - /^seed-workspace-workspace-123-\d+-0$/u - ); - - const body = parseFetchBody(fetchMock); - assert.deepEqual(body.files, [ - { - path: '/alpha.txt', - content: 'alpha payload', - encoding: 'utf-8', - }, - { - path: '/binary.bin', - content: Buffer.from([0xff, 0x00, 0x61]).toString('base64'), - encoding: 'base64', - }, - ]); - } finally { - await workspace.cleanup(); - } -}); - -test('seedAclRules formats ACL files for root and nested directories', async () => { - mock.method(RelayFileClient.prototype, 'bulkWrite', async () => { - throw new Error('fall back to HTTP'); - }); - - const fetchMock = mock.method(globalThis, 'fetch', async () => - createFetchResponse(JSON.stringify({ written: 2, errorCount: 0, errors: [] })) - ); - - await seedAclRules('https://relay.example/', 'acl-token', 'workspace-acl', { - '/': ['allow:agent:lead:read'], - '/docs/': ['allow:agent:writer:write', 'deny:agent:reader'], - }); - - const body = parseFetchBody(fetchMock); - assert.deepEqual(body.files, [ - { - path: '/.relayfile.acl', - content: JSON.stringify({ - semantics: { permissions: ['allow:agent:lead:read'] }, - }), - encoding: 'utf-8', - }, - { - path: '/docs/.relayfile.acl', - content: JSON.stringify({ - semantics: { - permissions: ['allow:agent:writer:write', 'deny:agent:reader'], - }, - }), - encoding: 'utf-8', - }, - ]); -}); - -test('seedWorkflowAcls merges multiple agents onto shared directories', async () => { - mock.method(RelayFileClient.prototype, 'bulkWrite', async () => { - throw new Error('fall back to HTTP'); - }); - - const fetchMock = mock.method(globalThis, 'fetch', async () => - createFetchResponse(JSON.stringify({ written: 2, errorCount: 0, errors: [] })) - ); - - await seedWorkflowAcls({ - relayfileUrl: 'https://relay.example', - adminToken: 'workflow-token', - workspace: 'workflow-merge', - agents: [ - { name: 'qa-reviewer', acl: { src: ['read'] } }, - { name: 'builder', acl: { 'src\\': ['write'], '/docs/': ['read'] } }, - { name: 'analyst', acl: { docs: ['read'] } }, - ], - }); - - const body = parseFetchBody(fetchMock); - assert.deepEqual(body.files, [ - { - path: '/docs/.relayfile.acl', - content: JSON.stringify({ - semantics: { - permissions: [ - 'allow:agent:analyst:read', - 'allow:agent:builder:read', - 'allow:agent:qa-reviewer:read', - ], - }, - }), - encoding: 'utf-8', - }, - { - path: '/src/.relayfile.acl', - content: JSON.stringify({ - semantics: { - permissions: [ - 'allow:agent:builder:read', - 'allow:agent:builder:write', - 'allow:agent:qa-reviewer:read', - 'deny:agent:analyst', - ], - }, - }), - encoding: 'utf-8', - }, - ]); -}); - -test('seedWorkflowAcls unions deny rules for agents missing directory access', async () => { - mock.method(RelayFileClient.prototype, 'bulkWrite', async () => { - throw new Error('fall back to HTTP'); - }); - - const fetchMock = mock.method(globalThis, 'fetch', async () => - createFetchResponse(JSON.stringify({ written: 2, errorCount: 0, errors: [] })) - ); - - await seedWorkflowAcls({ - relayfileUrl: 'https://relay.example', - adminToken: 'workflow-token', - workspace: 'workflow-deny', - agents: [ - { name: 'alpha', acl: { src: ['read'] } }, - { name: 'beta', acl: { docs: ['write'] } }, - ], - }); - - const body = parseFetchBody(fetchMock); - assert.deepEqual(body.files, [ - { - path: '/docs/.relayfile.acl', - content: JSON.stringify({ - semantics: { - permissions: ['allow:agent:beta:read', 'allow:agent:beta:write', 'deny:agent:alpha'], - }, - }), - encoding: 'utf-8', - }, - { - path: '/src/.relayfile.acl', - content: JSON.stringify({ - semantics: { - permissions: ['allow:agent:alpha:read', 'deny:agent:beta'], - }, - }), - encoding: 'utf-8', - }, - ]); -}); - -test('seedWorkflowAcls is a no-op when there are no ACL directories to seed', async () => { - const bulkWriteMock = mock.method(RelayFileClient.prototype, 'bulkWrite', async () => { - throw new Error('bulkWrite should not be called'); - }); - const fetchMock = mock.method(globalThis, 'fetch', async () => { - throw new Error('fetch should not be called'); - }); - - await seedWorkflowAcls({ - relayfileUrl: 'https://relay.example', - adminToken: 'workflow-token', - workspace: 'workflow-empty', - agents: [ - { name: 'builder', acl: {} }, - { name: 'qa-reviewer', acl: {} }, - ], - }); - - assert.equal(bulkWriteMock.mock.calls.length, 0); - assert.equal(fetchMock.mock.calls.length, 0); -}); - -test('seedAclRules surfaces HTTP failures from the fallback API', async () => { - mock.method(RelayFileClient.prototype, 'bulkWrite', async () => { - throw new Error('fall back to HTTP'); - }); - - mock.method(globalThis, 'fetch', async () => createFetchResponse('relay unavailable', 503)); - - await assert.rejects( - seedAclRules('https://relay.example', 'acl-token', 'workspace-http', { - '/': ['allow:agent:builder:read'], - }), - new Error('failed to seed workspace workspace-http: HTTP 503 relay unavailable') - ); -}); diff --git a/packages/sdk/src/provisioner/__tests__/tar-seeder.test.ts b/packages/sdk/src/provisioner/__tests__/tar-seeder.test.ts deleted file mode 100644 index bd44174d3..000000000 --- a/packages/sdk/src/provisioner/__tests__/tar-seeder.test.ts +++ /dev/null @@ -1,249 +0,0 @@ -import fs from 'node:fs'; -import { tmpdir } from 'node:os'; -import path from 'node:path'; -import * as tar from 'tar'; -import { afterEach, describe, expect, it, vi } from 'vitest'; - -const bulkWriteMock = vi.hoisted(() => vi.fn()); -const relayFileClientMock = vi.hoisted(() => vi.fn()); -const execSyncMock = vi.hoisted(() => vi.fn()); - -vi.mock('@relayfile/sdk', () => ({ - RelayFileClient: relayFileClientMock.mockImplementation(() => ({ - bulkWrite: bulkWriteMock, - })), -})); - -vi.mock('node:child_process', () => ({ - execSync: execSyncMock, -})); - -import { seedWorkspaceTar } from '../seeder.js'; - -const tempDirs: string[] = []; - -function makeTempDir(prefix: string): string { - const dir = fs.mkdtempSync(path.join(tmpdir(), prefix)); - tempDirs.push(dir); - return dir; -} - -function jsonResponse(payload: unknown, status = 200): Response { - return new Response(JSON.stringify(payload), { - status, - headers: { 'Content-Type': 'application/json' }, - }); -} - -function listRelativeFiles(rootDir: string, currentDir = rootDir): string[] { - const files: string[] = []; - const entries = fs.readdirSync(currentDir, { withFileTypes: true }); - - for (const entry of entries) { - const absolutePath = path.join(currentDir, entry.name); - if (entry.isDirectory()) { - files.push(...listRelativeFiles(rootDir, absolutePath)); - continue; - } - if (entry.isFile()) { - files.push(path.relative(rootDir, absolutePath).split(path.sep).join('/')); - } - } - - return files.sort((left, right) => left.localeCompare(right)); -} - -async function extractTarballEntries(body: unknown): Promise { - const archiveDir = makeTempDir('relay-tar-archive-'); - const extractDir = makeTempDir('relay-tar-extract-'); - const archivePath = path.join(archiveDir, 'seed.tar.gz'); - - fs.writeFileSync(archivePath, Buffer.from(body as Uint8Array)); - await tar.extract({ file: archivePath, cwd: extractDir, gzip: true }); - - return listRelativeFiles(extractDir); -} - -afterEach(() => { - bulkWriteMock.mockReset(); - relayFileClientMock.mockClear(); - execSyncMock.mockReset(); - vi.restoreAllMocks(); - vi.unstubAllGlobals(); - for (const dir of tempDirs.splice(0)) { - fs.rmSync(dir, { recursive: true, force: true }); - } -}); - -describe('seedWorkspaceTar', () => { - it('creates and uploads a tar.gz to the import endpoint and respects excludeDirs', async () => { - const projectDir = makeTempDir('relay-seed-project-'); - fs.mkdirSync(path.join(projectDir, 'src'), { recursive: true }); - fs.mkdirSync(path.join(projectDir, 'ignored'), { recursive: true }); - fs.mkdirSync(path.join(projectDir, 'node_modules', 'left-pad'), { recursive: true }); - - fs.writeFileSync(path.join(projectDir, 'src', 'hello.txt'), 'hello world\n'); - fs.writeFileSync(path.join(projectDir, 'src', 'data.bin'), Buffer.from([0xff, 0x00, 0xaa])); - fs.writeFileSync(path.join(projectDir, 'ignored', 'skip.txt'), 'skip me\n'); - fs.writeFileSync(path.join(projectDir, 'node_modules', 'left-pad', 'index.js'), 'module.exports = 1;\n'); - fs.writeFileSync(path.join(projectDir, '.relayfile-mount-state.json'), '{}\n'); - - execSyncMock.mockReturnValue( - [ - 'src/hello.txt', - 'src/data.bin', - 'ignored/skip.txt', - 'node_modules/left-pad/index.js', - '.relayfile-mount-state.json', - ].join('\0') - ); - - const fetchMock = vi.fn().mockResolvedValue(jsonResponse({ imported: 2 })); - vi.stubGlobal('fetch', fetchMock); - - const imported = await seedWorkspaceTar('https://relayfile.example/', 'token', 'rw_demo', projectDir, [ - 'ignored', - ]); - - expect(imported).toBe(2); - expect(fetchMock).toHaveBeenCalledTimes(1); - - const [url, init] = fetchMock.mock.calls[0]; - expect(String(url)).toContain('/v1/workspaces/rw_demo/fs/import'); - expect(init.method).toBe('POST'); - expect(init.headers).toMatchObject({ - Authorization: 'Bearer token', - 'Content-Type': 'application/gzip', - 'X-Correlation-Id': expect.stringMatching(/^seed-tar-rw_demo-/), - }); - expect(init.body).toBeInstanceOf(Uint8Array); - - const entries = await extractTarballEntries(init.body); - expect(entries).toEqual(expect.arrayContaining(['src/data.bin', 'src/hello.txt'])); - expect(entries).not.toContain('ignored/skip.txt'); - expect(entries).not.toContain('node_modules/left-pad/index.js'); - expect(entries).not.toContain('.relayfile-mount-state.json'); - }); - - it('falls back to seedWorkspace when the import endpoint returns 404', async () => { - const projectDir = makeTempDir('relay-seed-project-'); - fs.mkdirSync(path.join(projectDir, 'src'), { recursive: true }); - fs.writeFileSync(path.join(projectDir, 'src', 'hello.txt'), 'hello fallback\n'); - - execSyncMock.mockReturnValue('src/hello.txt\0'); - bulkWriteMock.mockRejectedValue({ status: undefined }); - - const fetchMock = vi - .fn() - .mockResolvedValueOnce(new Response('missing', { status: 404 })) - .mockResolvedValueOnce(jsonResponse({ written: 1, errorCount: 0, errors: [] })); - vi.stubGlobal('fetch', fetchMock); - - const imported = await seedWorkspaceTar('https://relayfile.example/', 'token', 'rw_demo', projectDir, []); - - expect(imported).toBe(1); - expect(fetchMock).toHaveBeenCalledTimes(2); - expect(String(fetchMock.mock.calls[0]?.[0])).toContain('/v1/workspaces/rw_demo/fs/import'); - expect(String(fetchMock.mock.calls[1]?.[0])).toContain('/v1/workspaces/rw_demo/fs/bulk'); - - const payload = JSON.parse(String(fetchMock.mock.calls[1]?.[1].body)); - expect(payload.files).toEqual([ - { path: '/src/hello.txt', content: 'hello fallback\n', encoding: 'utf-8' }, - ]); - }); - - it('throws on non-404 HTTP errors', async () => { - const projectDir = makeTempDir('relay-seed-project-'); - fs.mkdirSync(path.join(projectDir, 'src'), { recursive: true }); - fs.writeFileSync(path.join(projectDir, 'src', 'hello.txt'), 'hello\n'); - - execSyncMock.mockReturnValue('src/hello.txt\0'); - - const fetchMock = vi.fn().mockResolvedValue(new Response('boom', { status: 500 })); - vi.stubGlobal('fetch', fetchMock); - - await expect( - seedWorkspaceTar('https://relayfile.example/', 'token', 'rw_demo', projectDir, []) - ).rejects.toThrow('tar import failed for workspace rw_demo: HTTP 500 boom'); - }); - - it('works for non-git directories via the directory-walk fallback path', async () => { - const projectDir = makeTempDir('relay-seed-project-'); - fs.mkdirSync(path.join(projectDir, 'src'), { recursive: true }); - fs.mkdirSync(path.join(projectDir, 'nested', 'docs'), { recursive: true }); - fs.mkdirSync(path.join(projectDir, 'custom-ignore'), { recursive: true }); - fs.mkdirSync(path.join(projectDir, 'node_modules', 'left-pad'), { recursive: true }); - - fs.writeFileSync(path.join(projectDir, 'src', 'app.ts'), 'export const app = true;\n'); - fs.writeFileSync(path.join(projectDir, 'nested', 'docs', 'readme.md'), '# hello\n'); - fs.writeFileSync(path.join(projectDir, 'custom-ignore', 'skip.txt'), 'skip\n'); - fs.writeFileSync(path.join(projectDir, 'node_modules', 'left-pad', 'index.js'), 'module.exports = 1;\n'); - fs.writeFileSync(path.join(projectDir, '.relayfile-mount-state.json'), '{}\n'); - - execSyncMock.mockImplementation(() => { - throw new Error('not a git repo'); - }); - - const fetchMock = vi.fn().mockResolvedValue(jsonResponse({ imported: 2 })); - vi.stubGlobal('fetch', fetchMock); - - const imported = await seedWorkspaceTar('https://relayfile.example/', 'token', 'rw_demo', projectDir, [ - 'custom-ignore', - ]); - - expect(imported).toBe(2); - expect(fetchMock).toHaveBeenCalledTimes(1); - - const [, init] = fetchMock.mock.calls[0]; - const entries = await extractTarballEntries(init.body); - expect(entries).toEqual(expect.arrayContaining(['nested/docs/readme.md', 'src/app.ts'])); - expect(entries).not.toContain('custom-ignore/skip.txt'); - expect(entries).not.toContain('node_modules/left-pad/index.js'); - expect(entries).not.toContain('.relayfile-mount-state.json'); - }); - - it('includes untracked files returned by git ls-files and preserves gitignore filtering', async () => { - const projectDir = makeTempDir('relay-seed-project-'); - fs.mkdirSync(path.join(projectDir, 'src'), { recursive: true }); - fs.mkdirSync(path.join(projectDir, 'ignored-by-git'), { recursive: true }); - - fs.writeFileSync(path.join(projectDir, 'src', 'tracked.ts'), 'export const tracked = true;\n'); - fs.writeFileSync(path.join(projectDir, 'src', 'draft.ts'), 'export const draft = true;\n'); - fs.writeFileSync(path.join(projectDir, 'ignored-by-git', 'skip.txt'), 'skip\n'); - - execSyncMock.mockReturnValue(['src/tracked.ts', 'src/draft.ts'].join('\0')); - - const fetchMock = vi.fn().mockResolvedValue(jsonResponse({ imported: 2 })); - vi.stubGlobal('fetch', fetchMock); - - const imported = await seedWorkspaceTar('https://relayfile.example/', 'token', 'rw_demo', projectDir, []); - - expect(imported).toBe(2); - expect(execSyncMock).toHaveBeenCalledWith( - 'git ls-files -z --cached --others --exclude-standard', - expect.objectContaining({ cwd: path.resolve(projectDir), encoding: 'utf-8' }) - ); - - const [, init] = fetchMock.mock.calls[0]; - const entries = await extractTarballEntries(init.body); - expect(entries).toEqual(['src/draft.ts', 'src/tracked.ts']); - expect(entries).not.toContain('ignored-by-git/skip.txt'); - }); - - it('does not fall back to a directory walk when git ls-files succeeds with no files', async () => { - const projectDir = makeTempDir('relay-seed-project-'); - fs.mkdirSync(path.join(projectDir, 'ignored-by-git'), { recursive: true }); - fs.writeFileSync(path.join(projectDir, 'ignored-by-git', 'skip.txt'), 'skip\n'); - - execSyncMock.mockReturnValue(''); - - const fetchMock = vi.fn().mockResolvedValue(jsonResponse({ imported: 0 })); - vi.stubGlobal('fetch', fetchMock); - - const imported = await seedWorkspaceTar('https://relayfile.example/', 'token', 'rw_demo', projectDir, []); - - expect(imported).toBe(0); - - expect(fetchMock).not.toHaveBeenCalled(); - }); -}); diff --git a/packages/sdk/src/provisioner/index.ts b/packages/sdk/src/provisioner/index.ts deleted file mode 100644 index c6d8c88b2..000000000 --- a/packages/sdk/src/provisioner/index.ts +++ /dev/null @@ -1,335 +0,0 @@ -import { existsSync, readdirSync } from 'node:fs'; -import path from 'node:path'; - -import { getDefaultPermissionAuditPath, PermissionAuditLog } from './audit.js'; -import { compileAgentScopes } from './compiler.js'; -import { ensureRelayfileMount } from './mount.js'; -import { createWorkspaceIfNeeded, seedWorkspace, seedWorkflowAcls } from './seeder.js'; -import { DEFAULT_ADMIN_AGENT_NAME, DEFAULT_ADMIN_SCOPES, mintAgentToken } from './token.js'; -import type { - AgentPermissions, - AgentProvisionMap, - AgentProvisionResult, - CompiledAgentPermissions, - ProvisionResult, - ProvisionSummary, - WorkflowProvisionConfig, -} from './types.js'; - -export * from './compiler.js'; -export * from './local-jwks.js'; -export * from './mount.js'; -export * from './seeder.js'; -export * from './token.js'; -export * from './types.js'; -export * from './audit.js'; - -interface ProvisionableAgent { - name: string; - permissions: AgentPermissions; - resolutionSource: 'configured' | 'auto-discovered'; -} - -const DEFAULT_AGENT_NAME = 'default-agent'; - -function discoverAgentNames(projectDir: string): string[] { - if (!existsSync(projectDir)) { - return [DEFAULT_AGENT_NAME]; - } - - const agentNames = new Set(); - - for (const entry of readdirSync(projectDir)) { - const match = entry.match(/^\.(.+)\.(agentignore|agentreadonly)$/u); - if (match?.[1]) { - agentNames.add(match[1]); - } - } - - const discovered = [...agentNames].sort((left, right) => left.localeCompare(right)); - return discovered.length > 0 ? discovered : [DEFAULT_AGENT_NAME]; -} - -function resolveAgents(config: WorkflowProvisionConfig): ProvisionableAgent[] { - const configuredAgents = Object.entries(config.agents ?? {}); - if (configuredAgents.length > 0) { - return configuredAgents.map(([name, permissions]) => ({ - name, - permissions: permissions ?? {}, - resolutionSource: 'configured', - })); - } - - return discoverAgentNames(config.projectDir).map((name) => ({ - name, - permissions: {}, - resolutionSource: 'auto-discovered', - })); -} - -function buildSummary(compilations: readonly CompiledAgentPermissions[]): ProvisionSummary { - return compilations.reduce( - (summary, compiled) => ({ - readonly: summary.readonly + compiled.summary.readonly, - readwrite: summary.readwrite + compiled.summary.readwrite, - denied: summary.denied + compiled.summary.denied, - customScopes: summary.customScopes + compiled.summary.customScopes, - }), - { - readonly: 0, - readwrite: 0, - denied: 0, - customScopes: 0, - } - ); -} - -function buildAgentResult( - projectDir: string, - name: string, - token: string, - compiled: CompiledAgentPermissions, - mountPoint?: string -): AgentProvisionResult { - return { - name, - tokenPath: path.resolve(projectDir, '.relay', 'tokens', `${name}.jwt`), - token, - scopes: [...compiled.scopes], - compiled, - mountPoint, - }; -} - -function sanitizePathComponent(value: string): string { - return value.replace(/[^a-zA-Z0-9._-]+/g, '-'); -} - -function countAclDirectories(compilations: readonly CompiledAgentPermissions[]): number { - const directories = new Set(); - - for (const compilation of compilations) { - for (const directory of Object.keys(compilation.acl)) { - directories.add(directory); - } - } - - return directories.size; -} - -export async function provisionWorkflowAgents(config: WorkflowProvisionConfig): Promise { - const audit = new PermissionAuditLog(); - const auditPath = getDefaultPermissionAuditPath(config.projectDir); - - try { - const agents = resolveAgents(config); - const tokens = new Map(); - const scopes = new Map(); - const mounts = new Map>>(); - const agentResults: AgentProvisionMap = {}; - const compilations: CompiledAgentPermissions[] = []; - const compiledByAgent = new Map(); - - for (const agent of agents) { - audit.log({ - agentName: agent.name, - action: 'resolve', - details: { - source: agent.resolutionSource, - workspace: config.workspace, - permissionKeys: Object.keys(agent.permissions).sort(), - }, - }); - - const compiled = compileAgentScopes({ - agentName: agent.name, - workspace: config.workspace, - projectDir: config.projectDir, - permissions: agent.permissions, - }); - const token = mintAgentToken({ - privateKey: config.tokenSigningKey.privateKey, - kid: config.tokenSigningKey.kid, - agentName: agent.name, - workspace: config.workspace, - scopes: compiled.scopes, - ttlSeconds: config.tokenTtlSeconds, - }); - - audit.log({ - agentName: agent.name, - action: 'mint', - details: { - workspace: config.workspace, - jwtPath: path.resolve(config.projectDir, '.relay', 'tokens', `${agent.name}.jwt`), - scopeCount: compiled.scopes.length, - scopes: [...compiled.scopes], - ttlSeconds: config.tokenTtlSeconds ?? null, - }, - }); - - tokens.set(agent.name, token); - scopes.set(agent.name, [...compiled.scopes]); - compilations.push(compiled); - compiledByAgent.set(agent.name, compiled); - } - - const adminScopes = [...(config.adminScopes ?? DEFAULT_ADMIN_SCOPES)]; - const adminToken = mintAgentToken({ - privateKey: config.tokenSigningKey.privateKey, - kid: config.tokenSigningKey.kid, - agentName: DEFAULT_ADMIN_AGENT_NAME, - workspace: config.workspace, - scopes: adminScopes, - ttlSeconds: config.tokenTtlSeconds, - }); - - audit.log({ - agentName: DEFAULT_ADMIN_AGENT_NAME, - action: 'mint', - details: { - workspace: config.workspace, - role: 'admin', - scopeCount: adminScopes.length, - scopes: adminScopes, - ttlSeconds: config.tokenTtlSeconds ?? null, - }, - }); - - let seededAclCount = 0; - let seededFileCount = 0; - - if (!config.skipSeeding) { - await createWorkspaceIfNeeded(config.relayfileBaseUrl, adminToken, config.workspace); - audit.log({ - agentName: DEFAULT_ADMIN_AGENT_NAME, - action: 'seed', - details: { - workspace: config.workspace, - step: 'workspace', - relayfileBaseUrl: config.relayfileBaseUrl, - }, - }); - - seededFileCount = await seedWorkspace( - config.relayfileBaseUrl, - adminToken, - config.workspace, - config.projectDir, - config.excludeDirs ?? [] - ); - audit.log({ - agentName: DEFAULT_ADMIN_AGENT_NAME, - action: 'seed', - details: { - workspace: config.workspace, - step: 'files', - projectDir: config.projectDir, - excludeDirs: config.excludeDirs ?? [], - fileCount: seededFileCount, - }, - }); - - await seedWorkflowAcls({ - relayfileUrl: config.relayfileBaseUrl, - adminToken, - workspace: config.workspace, - agents: compilations.map((compilation) => ({ - name: compilation.agentName, - acl: compilation.acl, - })), - }); - seededAclCount = countAclDirectories(compilations); - audit.log({ - agentName: DEFAULT_ADMIN_AGENT_NAME, - action: 'seed', - details: { - workspace: config.workspace, - step: 'acl', - directoryCount: seededAclCount, - agentCount: compilations.length, - }, - }); - } - - if (!config.skipMount) { - const mountRoot = path.resolve(config.mountBaseDir ?? path.join(config.projectDir, '.relay')); - try { - for (const agent of agents) { - const token = tokens.get(agent.name); - const compiled = compiledByAgent.get(agent.name); - if (!token || !compiled) { - continue; - } - - const mountHandle = await ensureRelayfileMount({ - binaryPath: config.mountBinaryPath, - relayfileUrl: config.relayfileBaseUrl, - workspace: config.workspace, - token, - mountPoint: path.join( - mountRoot, - `workspace-${sanitizePathComponent(config.workspace)}-${sanitizePathComponent(agent.name)}` - ), - }); - - mounts.set(agent.name, mountHandle); - agentResults[agent.name] = buildAgentResult( - config.projectDir, - agent.name, - token, - compiled, - mountHandle.mountPoint - ); - } - } catch (mountError) { - for (const [, mount] of mounts) { - try { - if (typeof mount.stop === 'function') { - await mount.stop(); - } - } catch { - // Best-effort cleanup — ignore individual stop failures. - } - } - mounts.clear(); - throw mountError; - } - } else { - for (const agent of agents) { - const token = tokens.get(agent.name); - const compiled = compiledByAgent.get(agent.name); - if (!token || !compiled) { - continue; - } - - agentResults[agent.name] = buildAgentResult(config.projectDir, agent.name, token, compiled); - } - } - - return { - agents: agentResults, - agentNames: agents.map((agent) => agent.name), - adminToken, - seededFileCount, - seededAclCount, - summary: buildSummary(compilations), - mounts, - tokens, - scopes, - }; - } finally { - try { - await audit.writeTo(auditPath); - } catch (error) { - if (config.verbose) { - const message = error instanceof Error ? error.message : String(error); - console.warn(`Failed to write permission audit to ${auditPath}: ${message}`); - } - } - - if (config.verbose) { - console.info(audit.summary()); - } - } -} diff --git a/packages/sdk/src/provisioner/mount.ts b/packages/sdk/src/provisioner/mount.ts deleted file mode 100644 index 049197299..000000000 --- a/packages/sdk/src/provisioner/mount.ts +++ /dev/null @@ -1,419 +0,0 @@ -import { execSync, spawn, type ChildProcess } from 'node:child_process'; -import { createHash } from 'node:crypto'; -import { - accessSync, - chmodSync, - constants, - createWriteStream, - existsSync, - mkdirSync, - readFileSync, - renameSync, - rmSync, - writeFileSync, -} from 'node:fs'; -import { mkdtemp, rm } from 'node:fs/promises'; -import https from 'node:https'; -import os from 'node:os'; -import path from 'node:path'; - -const RELAYFILE_VERSION = '0.1.6'; -const RELEASE_BASE_URL = 'https://github.com/AgentWorkforce/relayfile/releases/download'; -const CHECKSUMS_FILE = 'checksums.txt'; -const CACHE_DIR = path.join(os.homedir(), '.agent-relay', 'bin'); -const CACHE_PATH = path.join(CACHE_DIR, 'relayfile-mount'); -const VERSION_PATH = path.join(CACHE_DIR, 'relayfile-mount.version'); -const SUPPORTED_TARGETS = ['darwin-arm64', 'darwin-amd64', 'linux-arm64', 'linux-amd64'].join(', '); - -const PLATFORM_ARCH_MAP: Record = { - 'darwin:arm64': 'darwin-arm64', - 'darwin:x64': 'darwin-amd64', - 'linux:arm64': 'linux-arm64', - 'linux:x64': 'linux-amd64', -}; - -export interface MountConfig { - binaryPath?: string; - relayfileUrl: string; - workspace: string; - token: string; - mountPoint?: string; -} - -export interface MountHandle { - pid: number; - mountPoint: string; - stop(): Promise; -} - -function ensureCacheDir(): void { - mkdirSync(CACHE_DIR, { recursive: true }); -} - -function getRelayfileTarget(): string { - const target = PLATFORM_ARCH_MAP[`${os.platform()}:${os.arch()}`]; - if (!target) { - throw new Error( - `Unsupported platform for relayfile-mount: ${os.platform()}-${os.arch()}. Supported targets: ${SUPPORTED_TARGETS}.` - ); - } - - return target; -} - -function getReleaseAssetUrl(assetName: string): string { - return `${RELEASE_BASE_URL}/v${RELAYFILE_VERSION}/${assetName}`; -} - -function readCachedVersion(): string | null { - try { - return readFileSync(VERSION_PATH, 'utf8').trim() || null; - } catch { - return null; - } -} - -function isExecutable(filePath: string): boolean { - try { - accessSync(filePath, constants.X_OK); - return true; - } catch { - return false; - } -} - -function downloadErrorMessage(url: string, status: number): string { - return `Download failed with status ${status} for ${url}`; -} - -function downloadBinary(url: string, destPath: string, maxRedirects = 5): Promise { - ensureCacheDir(); - - const attemptDownload = ( - currentUrl: string, - redirectsRemaining: number, - resolve: () => void, - reject: (error: Error) => void - ) => { - const request = https.get(currentUrl, (res) => { - const status = res.statusCode ?? 0; - const location = res.headers.location; - const isRedirect = status >= 300 && status < 400 && location; - - if (isRedirect) { - if (redirectsRemaining <= 0) { - res.resume(); - reject(new Error('Too many redirects while downloading relayfile-mount')); - return; - } - - const nextUrl = new URL(location, currentUrl).toString(); - res.resume(); - attemptDownload(nextUrl, redirectsRemaining - 1, resolve, reject); - return; - } - - if (status !== 200) { - res.resume(); - reject(new Error(downloadErrorMessage(currentUrl, status))); - return; - } - - const fileStream = createWriteStream(destPath, { mode: 0o755 }); - res.pipe(fileStream); - fileStream.on('finish', () => { - fileStream.close(() => resolve()); - }); - fileStream.on('error', (error) => reject(error instanceof Error ? error : new Error(String(error)))); - res.on('error', (error) => reject(error instanceof Error ? error : new Error(String(error)))); - }); - - request.on('error', (error) => reject(error instanceof Error ? error : new Error(String(error)))); - }; - - return new Promise((resolve, reject) => { - attemptDownload(url, maxRedirects, resolve, reject); - }).catch((error: unknown) => { - try { - rmSync(destPath, { force: true }); - } catch { - // Ignore cleanup failures. - } - - throw error; - }); -} - -function downloadText(url: string, maxRedirects = 5): Promise { - const fetchWithRedirects = ( - currentUrl: string, - redirectsRemaining: number, - resolve: (text: string) => void, - reject: (error: Error) => void - ) => { - const request = https.get(currentUrl, (res) => { - const status = res.statusCode ?? 0; - const location = res.headers.location; - const isRedirect = status >= 300 && status < 400 && location; - - if (isRedirect) { - if (redirectsRemaining <= 0) { - res.resume(); - reject(new Error('Too many redirects while downloading relayfile checksums')); - return; - } - - const nextUrl = new URL(location, currentUrl).toString(); - res.resume(); - fetchWithRedirects(nextUrl, redirectsRemaining - 1, resolve, reject); - return; - } - - if (status !== 200) { - res.resume(); - reject(new Error(downloadErrorMessage(currentUrl, status))); - return; - } - - const chunks: Buffer[] = []; - res.on('data', (chunk) => { - chunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk)); - }); - res.on('end', () => resolve(Buffer.concat(chunks).toString('utf8'))); - res.on('error', (error) => reject(error instanceof Error ? error : new Error(String(error)))); - }); - - request.on('error', (error) => reject(error instanceof Error ? error : new Error(String(error)))); - }; - - return new Promise((resolve, reject) => { - fetchWithRedirects(url, maxRedirects, resolve, reject); - }); -} - -function getExpectedChecksum(checksumContent: string, binaryName: string): string { - for (const line of checksumContent.split('\n')) { - const trimmed = line.trim(); - if (!trimmed) { - continue; - } - - const match = trimmed.match(/^([a-fA-F0-9]{64})\s+\*?(.+)$/); - if (!match) { - continue; - } - - const entryName = path.basename(match[2].trim()); - if (entryName === binaryName) { - return match[1].toLowerCase(); - } - } - - throw new Error(`No checksum entry found for ${binaryName}`); -} - -async function verifyChecksum(filePath: string, binaryName: string): Promise { - const checksumUrl = getReleaseAssetUrl(CHECKSUMS_FILE); - const checksumContent = await downloadText(checksumUrl); - const expectedHash = getExpectedChecksum(checksumContent, binaryName); - const actualHash = createHash('sha256').update(readFileSync(filePath)).digest('hex'); - - if (actualHash !== expectedHash) { - throw new Error(`Checksum mismatch for ${binaryName}: expected ${expectedHash}, got ${actualHash}`); - } -} - -function resignBinaryForMacOS(binaryPath: string): void { - if (os.platform() !== 'darwin') { - return; - } - - try { - execSync(`codesign --force --sign - "${binaryPath}"`, { stdio: 'pipe' }); - } catch { - // Ignore best-effort re-sign failures. - } -} - -async function ensureRelayfileMountBinary(binaryPath?: string): Promise { - if (binaryPath) { - return binaryPath; - } - - if (process.env.RELAYFILE_ROOT) { - return path.join(process.env.RELAYFILE_ROOT, 'bin', 'relayfile-mount'); - } - - const target = getRelayfileTarget(); - const binaryName = `relayfile-mount-${target}`; - const downloadUrl = getReleaseAssetUrl(binaryName); - - ensureCacheDir(); - - if (existsSync(CACHE_PATH) && readCachedVersion() === RELAYFILE_VERSION) { - if (!isExecutable(CACHE_PATH)) { - chmodSync(CACHE_PATH, 0o755); - } - return CACHE_PATH; - } - - const tempPath = path.join(CACHE_DIR, `relayfile-mount.${process.pid}.${Date.now()}.download`); - - try { - await downloadBinary(downloadUrl, tempPath); - await verifyChecksum(tempPath, binaryName); - chmodSync(tempPath, 0o755); - renameSync(tempPath, CACHE_PATH); - chmodSync(CACHE_PATH, 0o755); - resignBinaryForMacOS(CACHE_PATH); - writeFileSync(VERSION_PATH, `${RELAYFILE_VERSION}\n`, 'utf8'); - return CACHE_PATH; - } catch (error) { - try { - rmSync(tempPath, { force: true }); - } catch { - // Ignore cleanup failures. - } - - const message = error instanceof Error ? error.message : String(error); - throw new Error(`Failed to install relayfile-mount from ${downloadUrl}: ${message}`); - } -} - -async function runCommandCapture(command: string, args: string[], env: NodeJS.ProcessEnv): Promise { - return await new Promise((resolve, reject) => { - const proc = spawn(command, args, { stdio: ['ignore', 'pipe', 'pipe'], env }); - let output = ''; - - proc.stdout.setEncoding('utf8'); - proc.stderr.setEncoding('utf8'); - - proc.stdout.on('data', (chunk: string) => { - output += chunk; - }); - proc.stderr.on('data', (chunk: string) => { - output += chunk; - }); - - proc.on('error', (error) => { - reject(error); - }); - - proc.on('close', (code, signal) => { - if (code === 0) { - resolve(output); - return; - } - - const reason = signal ? `signal ${signal}` : `exit code ${typeof code === 'number' ? code : 'unknown'}`; - const detail = output.trim(); - reject(new Error(detail || `command failed with ${reason}`)); - }); - }); -} - -function ensureProcessRunning(processRef: ChildProcess): boolean { - return processRef.exitCode === null && !processRef.killed; -} - -async function stopMountProcess(processRef: ChildProcess): Promise { - if (processRef.exitCode !== null || !processRef.pid) { - return; - } - - processRef.kill('SIGTERM'); - await new Promise((resolve) => { - const timeout = setTimeout(() => { - if (processRef.exitCode === null && processRef.pid) { - processRef.kill('SIGKILL'); - } - resolve(); - }, 1200); - processRef.once('exit', () => { - clearTimeout(timeout); - resolve(); - }); - }); -} - -export async function ensureRelayfileMount(config: MountConfig): Promise { - const binaryPath = await ensureRelayfileMountBinary(config.binaryPath); - if (!existsSync(binaryPath)) { - throw new Error(`missing relayfile mount binary: ${binaryPath}`); - } - - const mountPoint = - config.mountPoint ?? (await mkdtemp(path.join(os.tmpdir(), `relayfile-mount-${config.workspace}-`))); - mkdirSync(mountPoint, { recursive: true }); - - const mountBaseArgs = [ - '--base-url', - config.relayfileUrl, - '--workspace', - config.workspace, - '--local-dir', - mountPoint, - ]; - const onceArgs = [...mountBaseArgs, '--once']; - const mountEnv = { - ...process.env, - RELAYFILE_TOKEN: config.token, - }; - - let mountProc: ChildProcess | undefined; - let startupPhase = 'initial workspace sync'; - try { - await runCommandCapture(binaryPath, onceArgs, mountEnv); - - startupPhase = 'mount process startup'; - const startedMountProc = spawn(binaryPath, mountBaseArgs, { - stdio: ['ignore', 'ignore', 'ignore'], - env: mountEnv, - }); - mountProc = startedMountProc; - - await new Promise((resolve, reject) => { - const timer = setTimeout(() => resolve(), 600); - startedMountProc.on('error', (spawnError) => { - clearTimeout(timer); - reject(spawnError); - }); - startedMountProc.on('spawn', () => { - clearTimeout(timer); - resolve(); - }); - }); - - if (!ensureProcessRunning(startedMountProc) || typeof startedMountProc.pid !== 'number') { - await stopMountProcess(startedMountProc).catch(() => undefined); - throw new Error(`mount process for workspace ${config.workspace} exited before continuing`); - } - } catch (error) { - if (mountProc) { - await stopMountProcess(mountProc).catch(() => undefined); - } - await rm(mountPoint, { recursive: true, force: true }).catch(() => undefined); - const message = error instanceof Error ? error.message : String(error); - throw new Error(`${startupPhase} failed for ${config.workspace}: ${message}`); - } - - if (!mountProc || typeof mountProc.pid !== 'number') { - await rm(mountPoint, { recursive: true, force: true }).catch(() => undefined); - throw new Error(`mount process startup failed for ${config.workspace}: missing process id`); - } - - let stopped = false; - - return { - pid: mountProc.pid, - mountPoint, - async stop(): Promise { - if (stopped) { - return; - } - stopped = true; - await stopMountProcess(mountProc).catch(() => undefined); - await rm(mountPoint, { recursive: true, force: true }).catch(() => undefined); - }, - }; -} diff --git a/packages/sdk/src/provisioner/seeder.ts b/packages/sdk/src/provisioner/seeder.ts deleted file mode 100644 index 4b3c4e192..000000000 --- a/packages/sdk/src/provisioner/seeder.ts +++ /dev/null @@ -1,571 +0,0 @@ -import { RelayFileClient } from '@relayfile/sdk'; -import { execSync } from 'node:child_process'; -import fs from 'node:fs'; -import path from 'node:path'; -import * as tar from 'tar'; - -interface BulkWriteResponseShape { - written?: number; - errorCount?: number; - errors?: unknown; -} - -interface SeedFile { - path: string; - content: string; - encoding?: 'utf-8' | 'base64'; -} - -interface SeedFileResult { - written: number; - errorCount: number; - errors: unknown; -} - -const DEFAULT_EXCLUDED_DIRS = ['.relay', '.git', 'node_modules']; -const DEFAULT_EXCLUDED_FILES = new Set(['.relayfile-mount-state.json']); -const BATCH_SIZE = 50; -const utf8Decoder = new TextDecoder('utf-8', { fatal: true }); - -interface WorkflowAclAgent { - name: string; - acl: Record; -} - -interface SeedWorkflowAclsOptions { - relayfileUrl: string; - adminToken: string; - workspace: string; - agents: WorkflowAclAgent[]; -} - -function normalizeBaseUrl(baseUrl: string): string { - const url = String(baseUrl ?? '').trim(); - let end = url.length; - while (end > 0 && url.charCodeAt(end - 1) === 0x2f) { - end--; - } - return end === url.length ? url : url.slice(0, end); -} - -function normalizeWorkspaceId(workspaceId: string): string { - const value = String(workspaceId ?? '').trim(); - if (!value) { - throw new Error('workspaceId is required'); - } - return value; -} - -function normalizeExcludeDirs(excludeDirs: string[]): Set { - const result = new Set(); - for (const dir of excludeDirs) { - const normalized = String(dir ?? '') - .trim() - .replace(/^[/\\]+|[/\\]+$/g, ''); - if (!normalized) { - continue; - } - result.add(normalized); - } - return result; -} - -function normalizeAclDirectory(dirPath: string): string { - const normalized = String(dirPath ?? '') - .trim() - .replace(/\\/gu, '/') - .replace(/\/+$/u, ''); - - if (!normalized || normalized === '/') { - return '/'; - } - - return normalized.startsWith('/') ? normalized : `/${normalized}`; -} - -function isReviewerAgent(agentName: string): boolean { - return /reviewer/iu.test(String(agentName ?? '').trim()); -} - -function createClient(baseUrl: string, token: string): RelayFileClient { - return new RelayFileClient({ - baseUrl: normalizeBaseUrl(baseUrl), - token, - retry: { maxRetries: 0 }, - }); -} - -function isUtf8(raw: Buffer): boolean { - try { - utf8Decoder.decode(raw); - return true; - } catch { - return false; - } -} - -function buildSeedFilePayload(filePath: string, rootDir: string): SeedFile { - const relative = path.relative(rootDir, filePath).split(path.sep).join('/'); - const raw = fs.readFileSync(filePath); - if (isUtf8(raw)) { - return { path: `/${relative}`, content: raw.toString('utf8'), encoding: 'utf-8' }; - } - return { path: `/${relative}`, content: raw.toString('base64'), encoding: 'base64' }; -} - -function collectSeedPaths( - rootDir: string, - currentRelative: string, - excludeDirs: Set, - output: string[] -): void { - const absoluteDir = path.join(rootDir, currentRelative); - const entries = fs.readdirSync(absoluteDir, { withFileTypes: true }); - - for (const entry of entries) { - if (excludeDirs.has(entry.name)) { - continue; - } - if (DEFAULT_EXCLUDED_FILES.has(entry.name)) { - continue; - } - - const nextRelative = currentRelative ? `${currentRelative}/${entry.name}` : entry.name; - const absolutePath = path.join(rootDir, nextRelative); - - if (excludeDirs.has(nextRelative)) { - continue; - } - - if (entry.isDirectory()) { - collectSeedPaths(rootDir, nextRelative, excludeDirs, output); - continue; - } - - if (entry.isFile()) { - output.push(absolutePath); - continue; - } - - if (entry.isSymbolicLink()) { - try { - const resolved = fs.realpathSync(absolutePath); - if (!resolved.startsWith(rootDir + path.sep) && resolved !== rootDir) { - continue; - } - const stat = fs.statSync(resolved); - if (stat.isDirectory()) { - collectSeedPaths(rootDir, nextRelative, excludeDirs, output); - continue; - } - if (stat.isFile()) { - output.push(absolutePath); - } - } catch { - // Ignore symlinks that cannot be resolved. - } - } - } -} - -function parseBulkWriteResponse(payload: unknown): SeedFileResult { - if (!payload || typeof payload !== 'object') { - return { written: 0, errorCount: 0, errors: [] }; - } - const parsed = payload as BulkWriteResponseShape; - return { - written: typeof parsed.written === 'number' ? parsed.written : 0, - errorCount: typeof parsed.errorCount === 'number' ? parsed.errorCount : 0, - errors: parsed.errors ?? [], - }; -} - -async function postBulkWrite( - baseUrl: string, - token: string, - workspaceId: string, - files: SeedFile[], - correlationId: string -): Promise { - const response = await fetch( - `${normalizeBaseUrl(baseUrl)}/v1/workspaces/${encodeURIComponent(workspaceId)}/fs/bulk`, - { - method: 'POST', - headers: { - Authorization: `Bearer ${token}`, - 'Content-Type': 'application/json', - 'X-Correlation-Id': correlationId, - }, - body: JSON.stringify({ files }), - } - ); - - const body = await response.text(); - if (!response.ok) { - throw new Error(`failed to seed workspace ${workspaceId}: HTTP ${response.status} ${body}`.trim()); - } - - if (!body) { - return { written: files.length, errorCount: 0, errors: [] }; - } - try { - return parseBulkWriteResponse(JSON.parse(body)); - } catch { - return { written: files.length, errorCount: 0, errors: [] }; - } -} - -async function writeBulkWrite( - baseUrl: string, - token: string, - workspaceId: string, - files: SeedFile[], - correlationId: string -): Promise { - const client = createClient(baseUrl, token); - try { - const response = await client.bulkWrite({ - workspaceId, - files, - correlationId, - }); - return parseBulkWriteResponse(response); - } catch (error) { - if (typeof (error as { status?: number }).status === 'number') { - throw error; - } - } - - return postBulkWrite(baseUrl, token, workspaceId, files, correlationId); -} - -export async function createWorkspaceIfNeeded( - baseUrl: string, - token: string, - workspaceId: string -): Promise { - const workspace = normalizeWorkspaceId(workspaceId); - const client = createClient(baseUrl, token); - - const maybeCreateWorkspace = client as unknown as { - createWorkspace?: (...input: unknown[]) => Promise; - }; - if (typeof maybeCreateWorkspace.createWorkspace === 'function') { - for (const arg of [workspace, { id: workspace }, { workspaceId: workspace }, { name: workspace }]) { - try { - await maybeCreateWorkspace.createWorkspace(arg); - return; - } catch { - // Continue to the next overload candidate, then fallback to HTTP. - } - } - } - - const endpoint = `${normalizeBaseUrl(baseUrl)}/v1/workspaces`; - const bodyCandidates: Array> = [ - { name: workspace }, - { workspace: workspace }, - { workspaceId: workspace }, - { id: workspace }, - ]; - let lastFailure: string | null = null; - - for (const body of bodyCandidates) { - try { - const response = await fetch(endpoint, { - method: 'POST', - headers: { - Authorization: `Bearer ${token}`, - 'Content-Type': 'application/json', - 'X-Correlation-Id': `create-workspace-${Date.now()}`, - }, - body: JSON.stringify(body), - }); - - if ( - response.status === 200 || - response.status === 201 || - response.status === 204 || - response.status === 409 - ) { - return; - } - - const responseBody = await response.text().catch(() => ''); - lastFailure = `HTTP ${response.status} ${responseBody}`.trim(); - if (response.status < 500 && response.status !== 409) { - continue; - } - } catch (error) { - lastFailure = String(error); - } - } - - if (lastFailure) { - throw new Error(`Failed to create workspace ${workspace}: ${lastFailure}`); - } -} - -export async function seedAclRules( - baseUrl: string, - token: string, - workspaceId: string, - aclRules: Record -): Promise { - const workspace = normalizeWorkspaceId(workspaceId); - const files = Object.entries(aclRules).map(([dirPath, rules]) => { - const normalizedDir = String(dirPath ?? '') - .trim() - .replace(/\/+$/, ''); - const aclPath = - normalizedDir === '' || normalizedDir === '/' ? '/.relayfile.acl' : `${normalizedDir}/.relayfile.acl`; - return { - path: aclPath, - content: JSON.stringify({ semantics: { permissions: rules } }), - encoding: 'utf-8' as const, - }; - }); - - if (files.length === 0) { - return; - } - - const result = await writeBulkWrite( - baseUrl, - token, - workspace, - files, - `seed-acl-${workspace}-${Date.now()}` - ); - if (result.errorCount > 0) { - const details = result.errors ? JSON.stringify(result.errors) : '[]'; - throw new Error(`ACL seeding had ${result.errorCount} error(s) for workspace ${workspace}: ${details}`); - } -} - -export async function seedWorkspace( - baseUrl: string, - token: string, - workspaceId: string, - projectDir: string, - excludeDirs: string[] -): Promise { - const workspace = normalizeWorkspaceId(workspaceId); - const rootDir = path.resolve(projectDir); - const excludes = normalizeExcludeDirs([...DEFAULT_EXCLUDED_DIRS, ...excludeDirs]); - const seedPaths: string[] = []; - collectSeedPaths(rootDir, '', excludes, seedPaths); - const allFiles = seedPaths - .sort((left, right) => left.localeCompare(right)) - .map((filePath) => buildSeedFilePayload(filePath, rootDir)); - - let seededCount = 0; - for (let index = 0; index < allFiles.length; index += BATCH_SIZE) { - const batch = allFiles.slice(index, index + BATCH_SIZE); - const batchIndex = Math.floor(index / BATCH_SIZE); - const result = await writeBulkWrite( - baseUrl, - token, - workspace, - batch, - `seed-workspace-${workspace}-${Date.now()}-${batchIndex}` - ); - seededCount += result.written; - } - - return seededCount; -} - -function buildWorkflowAclRules(agents: WorkflowAclAgent[]): Record { - const directories = new Set(); - const normalizedAgents = agents.map((agent) => ({ - name: String(agent.name ?? '').trim(), - acl: Object.fromEntries( - Object.entries(agent.acl ?? {}).map(([dirPath, rules]) => [ - normalizeAclDirectory(dirPath), - Array.isArray(rules) ? rules : [], - ]) - ), - })); - const reviewerNames = normalizedAgents - .map((agent) => agent.name) - .filter((name) => name !== '' && isReviewerAgent(name)); - - for (const agent of normalizedAgents) { - for (const dirPath of Object.keys(agent.acl)) { - directories.add(dirPath); - } - } - - const merged = new Map>(); - - for (const dirPath of [...directories].sort((left, right) => left.localeCompare(right))) { - const rules = new Set(); - - for (const reviewerName of reviewerNames) { - rules.add(`allow:agent:${reviewerName}:read`); - } - - for (const agent of normalizedAgents) { - if (!agent.name) { - continue; - } - - const agentRules = agent.acl[dirPath] ?? []; - const hasRead = agentRules.includes('read') || agentRules.includes('write'); - const hasWrite = agentRules.includes('write'); - - if (hasRead) { - rules.add(`allow:agent:${agent.name}:read`); - } else if (!isReviewerAgent(agent.name)) { - rules.add(`deny:agent:${agent.name}`); - } - - if (hasWrite) { - rules.add(`allow:agent:${agent.name}:write`); - } - } - - if (rules.size > 0) { - merged.set(dirPath, rules); - } - } - - return Object.fromEntries([...merged.entries()].map(([dirPath, rules]) => [dirPath, [...rules].sort()])); -} - -export async function seedWorkflowAcls({ - relayfileUrl, - adminToken, - workspace, - agents, -}: SeedWorkflowAclsOptions): Promise { - const aclRules = buildWorkflowAclRules(agents); - - if (Object.keys(aclRules).length === 0) { - return; - } - - await seedAclRules(relayfileUrl, adminToken, workspace, aclRules); -} - -// ── Tar-based bulk upload ─────────────────────────────────────────────────── - -interface ImportResponseShape { - imported?: number; -} - -function getGitTrackedFiles(rootDir: string): string[] | null { - try { - const output = execSync('git ls-files -z --cached --others --exclude-standard', { - cwd: rootDir, - encoding: 'utf-8', - maxBuffer: 50 * 1024 * 1024, - }); - const files = output.split('\0').filter(Boolean); - return files; - } catch { - return null; - } -} - -function collectAllFiles(rootDir: string, excludeDirs: Set): string[] { - const files: string[] = []; - const stack = ['']; - - while (stack.length > 0) { - const currentRelative = stack.pop()!; - const absoluteDir = path.join(rootDir, currentRelative); - let entries: fs.Dirent[]; - try { - entries = fs.readdirSync(absoluteDir, { withFileTypes: true }); - } catch { - continue; - } - - for (const entry of entries) { - if (excludeDirs.has(entry.name)) continue; - if (DEFAULT_EXCLUDED_FILES.has(entry.name)) continue; - const nextRelative = currentRelative ? `${currentRelative}/${entry.name}` : entry.name; - if (excludeDirs.has(nextRelative)) continue; - - if (entry.isDirectory()) { - stack.push(nextRelative); - } else if (entry.isFile()) { - files.push(nextRelative); - } - } - } - - return files; -} - -async function createTarBuffer(rootDir: string, files: string[]): Promise { - const tarStream = tar.create({ gzip: true, cwd: rootDir, portable: true, follow: true }, files); - const chunks: Buffer[] = []; - for await (const chunk of tarStream) { - chunks.push(Buffer.from(chunk as Uint8Array)); - } - return Buffer.concat(chunks); -} - -export async function seedWorkspaceTar( - baseUrl: string, - token: string, - workspaceId: string, - projectDir: string, - excludeDirs: string[] -): Promise { - const workspace = normalizeWorkspaceId(workspaceId); - const rootDir = path.resolve(projectDir); - const excludes = normalizeExcludeDirs([...DEFAULT_EXCLUDED_DIRS, ...excludeDirs]); - - const gitFiles = getGitTrackedFiles(rootDir); - const rawFiles = gitFiles ?? collectAllFiles(rootDir, excludes); - const files = gitFiles - ? rawFiles.filter((f) => { - const segments = f.split('/'); - if (DEFAULT_EXCLUDED_FILES.has(segments[segments.length - 1])) return false; - return !segments.some((seg) => excludes.has(seg)); - }) - : rawFiles; - - if (files.length === 0) { - return 0; - } - - const tarball = await createTarBuffer(rootDir, files); - - const url = `${normalizeBaseUrl(baseUrl)}/v1/workspaces/${encodeURIComponent(workspace)}/fs/import`; - const response = await fetch(url, { - method: 'POST', - headers: { - Authorization: `Bearer ${token}`, - 'Content-Type': 'application/gzip', - 'X-Correlation-Id': `seed-tar-${workspace}-${Date.now()}`, - }, - body: tarball.buffer.slice(tarball.byteOffset, tarball.byteOffset + tarball.byteLength) as ArrayBuffer, - }); - - if (response.status === 404) { - // Tar import not supported — fall back to batch upload - return seedWorkspace(baseUrl, token, workspaceId, projectDir, excludeDirs); - } - - if (!response.ok) { - const body = await response.text().catch(() => ''); - throw new Error(`tar import failed for workspace ${workspace}: HTTP ${response.status} ${body}`.trim()); - } - - const raw = await response.text(); - if (!raw.trim()) { - return files.length; - } - - try { - const parsed = JSON.parse(raw) as ImportResponseShape; - return typeof parsed.imported === 'number' ? parsed.imported : files.length; - } catch { - return files.length; - } -} diff --git a/packages/sdk/src/provisioner/types.ts b/packages/sdk/src/provisioner/types.ts deleted file mode 100644 index 19fa282cb..000000000 --- a/packages/sdk/src/provisioner/types.ts +++ /dev/null @@ -1,189 +0,0 @@ -import type { - AccessPreset, - AgentPermissions, - CompiledAgentPermissions, - FilePermissions, - PermissionSource, -} from '../workflows/types.js'; -import type { LocalJwksSigningKey } from './local-jwks.js'; -import type { MountHandle } from './mount.js'; - -// ── Input Configuration ──────────────────────────────────────────────────── - -/** Configuration for provisioning workflow agents. */ -export interface WorkflowProvisionConfig { - /** RS256 signing key used to mint JWT tokens. */ - tokenSigningKey: LocalJwksSigningKey; - - /** Workspace identifier (e.g. 'my-project'). */ - workspace: string; - - /** Absolute path to the project directory. */ - projectDir: string; - - /** Base URL of the relayfile server (e.g. 'http://127.0.0.1:4080'). */ - relayfileBaseUrl: string; - - /** - * Agents to provision, keyed by agent name. - * Each entry carries the AgentPermissions from relay.yaml. - * When empty/undefined, agents are auto-discovered from dotfiles. - */ - agents?: Record; - - /** JWT token TTL in seconds. Default: 7200 (2 hours). */ - tokenTtlSeconds?: number; - - /** - * Directories to exclude from workspace seeding. - * Defaults: ['.relay', '.git', 'node_modules']. - */ - excludeDirs?: string[]; - - /** - * When true, skip workspace creation and file seeding. - * Useful when only tokens/ACL are needed. - */ - skipSeeding?: boolean; - - /** - * Admin scopes for the workspace management token. - * Uses DEFAULT_ADMIN_SCOPES when omitted. - */ - adminScopes?: string[]; - - /** Optional explicit relayfile-mount binary path. */ - mountBinaryPath?: string; - - /** Base directory for per-agent mount points. Defaults to /.relay. */ - mountBaseDir?: string; - - /** When true, skip starting relayfile mount processes. */ - skipMount?: boolean; - - /** When true, print a short audit summary to stdout after provisioning. */ - verbose?: boolean; -} - -// ── Output ───────────────────────────────────────────────────────────────── - -/** Aggregate counts for compiled permissions across provisioned agents. */ -export interface ProvisionSummary { - readonly: number; - readwrite: number; - denied: number; - customScopes: number; -} - -/** Convenience shape for a single agent's compiled scopes. */ -export interface CompiledAgentScopes { - /** Agent name. */ - agentName: string; - - /** Workspace identifier. */ - workspace: string; - - /** Final token scopes after compilation. */ - scopes: string[]; - - /** Directory ACL rules derived from the compiled permissions. */ - acl: Record; - - /** Counts for the compiled access model. */ - summary: ProvisionSummary; -} - -/** Result of a single agent's provisioning. */ -export interface AgentProvisionResult { - /** Agent name. */ - name: string; - - /** Absolute path to the written JWT file (.relay/tokens/.jwt). */ - tokenPath: string; - - /** The raw JWT string. */ - token: string; - - /** Scopes baked into the token. */ - scopes: string[]; - - /** Full compiled permissions (for audit / dry-run output). */ - compiled: CompiledAgentPermissions; - - /** Absolute path to the mounted relayfile workspace for this agent, when active. */ - mountPoint?: string; -} - -/** Map of agent names to minted JWT strings. */ -export type AgentTokenMap = Record; - -/** Map of agent names to their provisioning result. */ -export type AgentProvisionMap = Record; - -/** Aggregate result of provisionWorkflowAgents(). */ -export interface ProvisionResult { - /** Per-agent results, keyed by agent name. */ - agents: AgentProvisionMap; - - /** Ordered list of agent names (matches iteration order). */ - agentNames: string[]; - - /** Workspace-level admin token (used for seeding). */ - adminToken: string; - - /** Number of files seeded to the relayfile workspace. */ - seededFileCount: number; - - /** Number of ACL directory rules seeded. */ - seededAclCount: number; - - /** Aggregate summary across all agents. */ - summary: ProvisionSummary; - - /** Per-agent mounted workspace handles. */ - mounts: Map; - - /** Per-agent minted JWT strings. */ - tokens: Map; - - /** Per-agent compiled token scopes. */ - scopes: Map; -} - -// ── Compiler Types ───────────────────────────────────────────────────────── - -/** Input to the permission compiler for a single agent. */ -export interface CompileInput { - agentName: string; - workspace: string; - projectDir: string; - permissions: AgentPermissions; -} - -// ── Seeder Types ─────────────────────────────────────────────────────────── - -/** Options for the ACL seeder. */ -export interface SeedAclOptions { - relayfileBaseUrl: string; - token: string; - workspace: string; - aclRules: Record; -} - -/** Options for workspace file seeding. */ -export interface SeedWorkspaceOptions { - relayfileBaseUrl: string; - token: string; - workspace: string; - projectDir: string; - excludeDirs: string[]; -} - -/** Minimal debug summary written alongside compiled ACL output. */ -export interface AgentAclSummary { - name: string; - summary: Pick; -} - -// Re-export upstream types for convenience. -export type { AccessPreset, AgentPermissions, CompiledAgentPermissions, FilePermissions, PermissionSource }; diff --git a/packages/sdk/src/workflows/README.md b/packages/sdk/src/workflows/README.md deleted file mode 100644 index 9d881c306..000000000 --- a/packages/sdk/src/workflows/README.md +++ /dev/null @@ -1,764 +0,0 @@ -# Agent Relay Workflows - -Orchestrate multi-agent workflows using YAML, TypeScript, or Python. Define agents, wire up dependencies, and let the runner handle execution, retries, and verification. - -## Quick Start - -### CLI - -```bash -# Run a YAML workflow -agent-relay run workflow.yaml - -# Run a TypeScript workflow -agent-relay run workflow.ts - -# Run a Python workflow -agent-relay run workflow.py - -# Run a specific named workflow from a file -agent-relay run workflow.yaml --workflow deploy -``` - -### TypeScript - -```typescript -import { workflow } from '@agent-relay/sdk/workflows'; - -const result = await workflow('ship-feature') - .pattern('dag') - .agent('planner', { cli: 'claude', role: 'Plans implementation' }) - .agent('developer', { cli: 'codex', role: 'Writes code' }) - .agent('reviewer', { cli: 'claude', role: 'Reviews code' }) - .step('plan', { - agent: 'planner', - task: 'Create implementation plan for user authentication', - }) - .step('implement', { - agent: 'developer', - task: 'Implement the plan', - dependsOn: ['plan'], - }) - .step('review', { - agent: 'reviewer', - task: 'Review the implementation', - dependsOn: ['implement'], - }) - .run(); - -console.log(result.status); // "completed" | "failed" -``` - -### Python - -```python -from agent_relay import workflow - -result = ( - workflow("ship-feature") - .pattern("dag") - .agent("planner", cli="claude", role="Plans implementation") - .agent("developer", cli="codex", role="Writes code") - .agent("reviewer", cli="claude", role="Reviews code") - .step("plan", agent="planner", task="Create implementation plan for user auth") - .step("implement", agent="developer", task="Implement the plan", depends_on=["plan"]) - .step("review", agent="reviewer", task="Review the implementation", depends_on=["implement"]) - .run() -) -``` - -## Consumer-Facing Apps + AI SDK Communicate Flows - -A good production split is: - -1. **AI SDK app** handles the user conversation and streaming UI -2. **Communicate / `onRelay()`** lets that point-person coordinate with specialists over Relay -3. **Workflows / `runWorkflow()`** take over when a request needs multi-step execution, verification, or handoffs - -```typescript -import { streamText, wrapLanguageModel } from 'ai'; -import { openai } from '@ai-sdk/openai'; -import { Relay } from '@agent-relay/sdk/communicate'; -import { onRelay } from '@agent-relay/sdk/communicate/adapters/ai-sdk'; -import { runWorkflow } from '@agent-relay/sdk/workflows'; - -export async function POST(req: Request) { - const { prompt, escalate, repo } = await req.json(); - - const relay = new Relay('AppLead'); - const relaySession = onRelay( - { - name: 'AppLead', - instructions: - 'You are the customer-facing lead. Keep the user updated and delegate implementation via Relay when needed.', - }, - relay - ); - - const model = wrapLanguageModel({ - model: openai('gpt-4o-mini'), - middleware: relaySession.middleware, - }); - - if (escalate) { - const workflow = await runWorkflow('workflows/feature-dev.yaml', { - vars: { task: prompt, repo }, - }); - - return Response.json({ status: workflow.status, runId: workflow.runId }); - } - - return streamText({ - model, - tools: relaySession.tools, - system: 'Answer directly when possible; coordinate internally when the task needs specialists.', - prompt, - }).toUIMessageStreamResponse({ - onFinish() { - relaySession.cleanup(); - void relay.close(); - }, - }); -} -``` - -That pattern keeps the user experience snappy while still letting longer Relay workflows run with proper ownership, retries, and verification. - -A compact end-to-end example app for this pattern lives in `examples/ai-sdk-relay-helpdesk/`. - -## YAML Format - -Workflows are defined as `relay.yaml` files: - -```yaml -version: '1.0' -name: my-workflow -description: 'Optional description' - -swarm: - pattern: dag # Execution pattern (see Patterns below) - maxConcurrency: 3 # Max agents running in parallel - timeoutMs: 3600000 # Global timeout (1 hour) - channel: my-channel # Relay channel for agent communication - -agents: - - name: backend - cli: claude # claude | codex | gemini | aider | goose | opencode | droid - role: 'Backend engineer' - constraints: - model: opus - timeoutMs: 600000 - retries: 2 - - - name: tester - cli: codex - role: 'Test engineer' - interactive: false # Non-interactive: runs as subprocess, no PTY/messaging - -workflows: - - name: build-and-test - onError: retry # fail | skip | retry - steps: - - name: build-api - agent: backend - task: 'Build the REST API endpoints for user management' - verification: - type: file_exists - value: 'src/api/users.ts' - retries: 1 - - - name: write-tests - agent: tester - task: 'Write integration tests for: {{steps.build-api.output}}' - dependsOn: [build-api] - - - name: run-tests - agent: tester - task: 'Run the test suite and report results' - dependsOn: [write-tests] - verification: - type: exit_code - value: '0' - -errorHandling: - strategy: retry - maxRetries: 2 - retryDelayMs: 5000 - repairAgent: tester - repairRetries: 2 - notifyChannel: my-channel -``` - -### Template Variables - -Use `{{variable}}` for user-provided values and `{{steps.STEP_NAME.output}}` for previous step outputs: - -```yaml -steps: - - name: plan - agent: planner - task: 'Plan implementation for: {{task}}' # User variable - - - name: implement - agent: developer - dependsOn: [plan] - task: 'Implement: {{steps.plan.output}}' # Previous step output -``` - -User variables are passed via the CLI or programmatically: - -```typescript -await runWorkflow('workflow.yaml', { - vars: { task: 'Add OAuth2 support' }, -}); -``` - -### Verification Checks - -Each step can include a verification check. Verification is one input to the runner's **completion decision pipeline** — when verification passes, the step completes even without a sentinel marker. - -| Type | Description | -| ----------------- | -------------------------------------------------------------------------- | -| `exit_code` | Agent must exit with the specified code (preferred for code-editing steps) | -| `file_exists` | A file must exist at the specified path after the step | -| `output_contains` | Step output must contain the specified string (optional accelerator) | -| `custom` | No-op in the runner; handled by external callers | - -```yaml -# Preferred — deterministic verification -verification: - type: exit_code - value: "0" - description: "Process exited successfully" - -# Also valid — output_contains as an optional accelerator -verification: - type: output_contains - value: "IMPLEMENTATION_COMPLETE" - description: "Agent confirms completion (optional fast-path)" -``` - -### Completion Decision Pipeline - -The runner uses a multi-signal pipeline to decide step completion: - -1. **Deterministic verification** — if a verification check passes, the step completes immediately (`completed_verified`) -2. **Owner decision** — the step owner can issue `OWNER_DECISION: COMPLETE|INCOMPLETE_RETRY|INCOMPLETE_FAIL` (`completed_by_owner_decision`) -3. **Evidence-based completion** — channel messages, file artifacts, and exit codes are collected as evidence (`completed_by_evidence`) -4. **Marker fast-path** — `STEP_COMPLETE:` still works as an accelerator but is never required - -| Completion State | Meaning | -| ----------------------------- | ----------------------------------------------- | -| `completed_verified` | Deterministic verification passed | -| `completed_by_owner_decision` | Owner approved the step | -| `completed_by_evidence` | Evidence-based completion | -| `retry_requested_by_owner` | Owner requested retry | -| `failed_verification` | Verification explicitly failed | -| `failed_owner_decision` | Owner rejected the step | -| `failed_no_evidence` | No verification, no owner decision, no evidence | - -**Review parsing is tolerant:** The runner accepts semantically equivalent outputs like "Approved", "Complete", "LGTM" — not just exact `REVIEW_DECISION: APPROVE` strings. - -## Swarm Patterns - -The `swarm.pattern` field controls how agents are coordinated: - -### Core Patterns - -| Pattern | Description | -| -------------- | ---------------------------------------------------------------------- | -| `dag` | Directed acyclic graph — steps run based on dependency edges (default) | -| `fan-out` | All agents run in parallel | -| `pipeline` | Sequential chaining of steps | -| `hub-spoke` | Central hub coordinates spoke agents | -| `consensus` | Agents vote on decisions | -| `mesh` | Full communication graph between agents | -| `handoff` | Sequential handoff between agents | -| `cascade` | Waterfall with phase gates | -| `debate` | Agents propose and counter-argue | -| `hierarchical` | Multi-level reporting structure | - -### Data Processing Patterns - -| Pattern | Description | -| ---------------- | ----------------------------------------------------------------------------------- | -| `map-reduce` | Split work into chunks (mappers), process in parallel, aggregate results (reducers) | -| `scatter-gather` | Fan out requests to workers, collect and synthesize responses | - -### Supervision & Quality Patterns - -| Pattern | Description | -| ------------ | ------------------------------------------------------------------------- | -| `supervisor` | Monitor agent monitors workers, restarts on failure, manages health | -| `reflection` | Agent produces output, critic reviews and provides feedback for iteration | -| `verifier` | Producer agents submit work to verifier agents for validation | - -### Adversarial & Validation Patterns - -| Pattern | Description | -| ---------- | ---------------------------------------------------------------- | -| `red-team` | Attacker agents probe for weaknesses, defender agents respond | -| `auction` | Auctioneer broadcasts tasks, agents bid based on capability/cost | - -### Resilience Patterns - -| Pattern | Description | -| ----------------- | ----------------------------------------------------------------- | -| `escalation` | Start with fast/cheap agents, escalate to more capable on failure | -| `saga` | Distributed transactions with compensating actions on failure | -| `circuit-breaker` | Primary agent with fallback chain, fail fast and recover | - -### Collaborative Patterns - -| Pattern | Description | -| ------------ | -------------------------------------------------------------------- | -| `blackboard` | Shared workspace where agents contribute incrementally to a solution | -| `swarm` | Emergent behavior from simple agent rules (neighbor communication) | - -### Auto-Selection by Role - -When `swarm.pattern` is omitted, the coordinator auto-selects based on agent roles. -Patterns are checked in priority order below (first match wins): - -| Priority | Pattern | Required Roles/Config | -| -------- | ----------------- | -------------------------------------------------------- | -| 1 | `dag` | Steps with `dependsOn` | -| 2 | `consensus` | Uses `coordination.consensusStrategy` config | -| 3 | `map-reduce` | `mapper` + `reducer` | -| 4 | `red-team` | (`attacker` OR `red-team`) + (`defender` OR `blue-team`) | -| 5 | `reflection` | `critic` | -| 6 | `escalation` | `tier-1`, `tier-2`, etc. | -| 7 | `auction` | `auctioneer` | -| 8 | `saga` | `saga-orchestrator` OR `compensate-handler` | -| 9 | `circuit-breaker` | `fallback`, `backup`, OR `primary` | -| 10 | `blackboard` | `blackboard` OR `shared-workspace` | -| 11 | `swarm` | `hive-mind` OR `swarm-agent` | -| 12 | `verifier` | `verifier` | -| 13 | `supervisor` | `supervisor` | -| 14 | `hierarchical` | `lead` (with 4+ agents) | -| 15 | `hub-spoke` | `hub` OR `coordinator` | -| 16 | `pipeline` | Unique agents per step, 3+ steps | -| 17 | `fan-out` | Default fallback | - -## Error Handling - -### Step-Level - -```yaml -steps: - - name: risky-step - agent: worker - task: 'Do something that might fail' - retries: 3 # Retry up to 3 times on failure - timeoutMs: 300000 # 5 minute timeout -``` - -### Workflow-Level - -The `onError` field on a workflow controls what happens when a step fails: - -| Value | Behavior | -| -------------------- | --------------------------------------------------------------------------------------------------------------- | -| `fail` / `fail-fast` | Stop immediately, skip downstream steps | -| `skip` / `continue` | Skip downstream dependents, continue independent steps | -| `retry` | Retry the step; deterministic gates ask a workflow agent to repair before each retry when an agent is available | - -### Global - -```yaml -errorHandling: - strategy: retry - maxRetries: 2 - retryDelayMs: 5000 - repairAgent: tester - repairRetries: 2 - notifyChannel: alerts -``` - -Retry-mode workflows are repair-aware by default. Deterministic step failures, verification gate failures, and malformed agent artifacts are treated as repairable work before terminal failure. The runner chooses `errorHandling.repairAgent` when set, otherwise it uses the step's owning/upstream agent when possible, then falls back to the best available workflow agent. The selected agent gets the failed command or agent output, working directory, exit information, and captured evidence, then the failed gate or step is retried. Use `repairRetries: 0`, `strategy: fail-fast`, or `strategy: continue` when a workflow intentionally should not invoke repair agents. - -## Built-in Templates - -Six pre-built workflow templates are included: - -| Template | Pattern | Description | -| ---------------- | ------------ | ------------------------------------------------------------- | -| `feature-dev` | hub-spoke | Plan, implement, review, and finalize a feature | -| `bug-fix` | hub-spoke | Investigate, patch, validate, and document a bug fix | -| `code-review` | fan-out | Parallel multi-reviewer assessment with consolidated findings | -| `security-audit` | pipeline | Scan, triage, remediate, and verify security issues | -| `refactor` | hierarchical | Analyze, plan, execute, and validate a refactor | -| `documentation` | handoff | Research, draft, review, and publish documentation | - -### Using Templates - -```typescript -import { TemplateRegistry } from '@agent-relay/sdk/workflows'; - -const registry = new TemplateRegistry(); - -// List available templates -const templates = await registry.listTemplates(); - -// Load and run a template -const config = await registry.loadTemplate('feature-dev'); -const runner = new WorkflowRunner(); -const result = await runner.execute(config, undefined, { - task: 'Add WebSocket support to the API', -}); - -// Install a custom template from a URL -await registry.installExternalTemplate('https://example.com/my-template.yaml', 'my-template'); -``` - -## TypeScript Builder API - -The builder constructs a `RelayYamlConfig` object and can run it, export it as YAML, or return the raw config. - -```typescript -import { workflow } from '@agent-relay/sdk/workflows'; - -// Build and run -const result = await workflow('my-workflow') - .pattern('dag') - .maxConcurrency(3) - .timeout(60 * 60 * 1000) - .channel('my-channel') - .agent('backend', { - cli: 'claude', - role: 'Backend engineer', - model: 'opus', - retries: 2, - }) - .agent('frontend', { - cli: 'codex', - role: 'Frontend engineer', - interactive: false, // Non-interactive subprocess mode - }) - .step('api', { - agent: 'backend', - task: 'Build REST API', - verification: { type: 'output_contains', value: 'API_READY' }, - }) - .step('ui', { - agent: 'frontend', - task: 'Build the UI', - dependsOn: ['api'], - }) - .onError('retry', { maxRetries: 2, retryDelayMs: 5000 }) - .run(); - -// Or export to YAML -const yaml = workflow('my-workflow') - .pattern('dag') - .agent('worker', { cli: 'claude' }) - .step('task1', { agent: 'worker', task: 'Do something' }) - .toYaml(); - -// Or get the raw config object -const config = workflow('my-workflow') - .pattern('dag') - .agent('worker', { cli: 'claude' }) - .step('task1', { agent: 'worker', task: 'Do something' }) - .toConfig(); -``` - -## Python Builder API - -Install the Python SDK: - -```bash -pip install agent-relay -``` - -```python -from agent_relay import workflow, run_yaml - -# Build and run -result = ( - workflow("my-workflow") - .pattern("dag") - .max_concurrency(3) - .timeout(3600000) - .agent("backend", cli="claude", role="Backend engineer") - .agent("frontend", cli="codex", role="Frontend engineer") - .step("api", agent="backend", task="Build REST API") - .step("ui", agent="frontend", task="Build the UI", depends_on=["api"]) - .on_error("retry", max_retries=2, retry_delay_ms=5000) - .run() -) - -# Run an existing YAML file -result = run_yaml("workflows/my-workflow.yaml") - -# Export to YAML string -yaml_str = ( - workflow("my-workflow") - .pattern("dag") - .agent("worker", cli="claude") - .step("task1", agent="worker", task="Do something") - .to_yaml() -) - -# Get the raw config dict -config = ( - workflow("my-workflow") - .pattern("dag") - .agent("worker", cli="claude") - .step("task1", agent="worker", task="Do something") - .to_config() -) -``` - -## Programmatic API - -For full control, use the `WorkflowRunner` directly: - -```typescript -import { WorkflowRunner } from '@agent-relay/sdk/workflows'; - -const runner = new WorkflowRunner({ - cwd: '/path/to/project', // Working directory (default: process.cwd()) - relay: { port: 3000 }, // AgentRelay options (optional) -}); - -// Listen to events (broker:event fires frequently — filter it out for cleaner output) -runner.on((event) => { - if (event.type === 'broker:event') return; - console.log(event.type, event); -}); - -// Parse and execute -const config = await runner.parseYamlFile('workflow.yaml'); -const run = await runner.execute(config, 'workflow-name', { - task: 'Build the feature', -}); - -// Pause / resume / abort -runner.pause(); -runner.unpause(); -runner.abort(); - -// Resume a failed run -const resumed = await runner.resume(run.id); -``` - -### Zero-Config Convenience Function - -```typescript -import { runWorkflow } from '@agent-relay/sdk/workflows'; - -const result = await runWorkflow('workflow.yaml', { - workflow: 'deploy', - vars: { environment: 'staging' }, - onEvent: (event) => { - if (event.type !== 'broker:event') console.log(event.type); - }, -}); -``` - -## Coordination - -### Barriers - -Synchronization points that wait for specific steps to complete: - -```yaml -coordination: - barriers: - - name: all-reviews-done - waitFor: [review-arch, review-security, review-correctness] - timeoutMs: 900000 - consensusStrategy: majority # majority | unanimous | quorum -``` - -### Shared State - -Agents can share state during execution: - -```yaml -state: - backend: memory # memory | redis | database - ttlMs: 86400000 - namespace: my-workflow -``` - -## Supported Agent CLIs - -| CLI | Description | -| ---------- | ----------------------- | -| `claude` | Claude Code (Anthropic) | -| `codex` | Codex CLI (OpenAI) | -| `gemini` | Gemini CLI (Google) | -| `aider` | Aider coding assistant | -| `goose` | Goose AI assistant | -| `opencode` | OpenCode CLI | -| `droid` | Droid CLI | - -## Non-Interactive Agents - -By default, agents run in interactive PTY mode with full relay messaging. For workers that just need to execute a task and return output — common in fan-out, map-reduce, and pipeline patterns — set `interactive: false` to run them as lightweight subprocesses. - -### YAML - -```yaml -agents: - - name: lead - cli: claude - role: 'Coordinates work' - # interactive: true (default) — full PTY, relay messaging, /exit detection - - - name: worker - cli: codex - role: 'Executes tasks' - interactive: false # Runs "codex exec ", captures stdout -``` - -### TypeScript - -```typescript -workflow('fan-out-analysis') - .pattern('fan-out') - .agent('lead', { cli: 'claude', role: 'Coordinator' }) - .agent('worker-1', { cli: 'codex', interactive: false, role: 'Analyst' }) - .agent('worker-2', { cli: 'codex', interactive: false, role: 'Analyst' }) - .step('analyze-1', { agent: 'worker-1', task: 'Analyze module A' }) - .step('analyze-2', { agent: 'worker-2', task: 'Analyze module B' }) - .step('synthesize', { - agent: 'lead', - task: 'Combine: {{steps.analyze-1.output}} + {{steps.analyze-2.output}}', - dependsOn: ['analyze-1', 'analyze-2'], - }) - .run(); -``` - -### How It Works - -| Aspect | Interactive (default) | Non-Interactive | -| ---------------- | ----------------------------------------- | ----------------------------------------------- | -| Execution | Full PTY with stdin/stdout | `child_process.spawn()` with piped stdio | -| CLI invocation | Standard interactive session | One-shot mode (`claude -p`, `codex exec`, etc.) | -| Relay messaging | Can send/receive messages | No messaging — excluded from topology edges | -| Self-termination | Must output `/exit` | Process exits naturally when done | -| Output capture | PTY output buffer | stdout capture | -| Overhead | Higher (PTY, echo verification, SIGWINCH) | Lower (simple subprocess) | - -### Non-Interactive CLI Commands - -| CLI | Command | Notes | -| ---------- | ------------------------------------------------ | -------------------------------- | -| `claude` | `claude -p ""` | Print mode, exits after response | -| `codex` | `codex exec ""` | One-shot execution | -| `gemini` | `gemini -p ""` | Prompt mode | -| `opencode` | `opencode --prompt ""` | One-shot prompt | -| `droid` | `droid exec ""` | One-shot execution | -| `aider` | `aider --message "" --yes-always --no-git` | Auto-approve, skip git | -| `goose` | `goose run --text "" --no-session` | Text mode, no session file | - -### When to Use - -- Fan-out workers that process a task and return results -- Map-reduce mappers that don't need mid-task communication -- Pipeline stages that transform input to output -- Any agent that doesn't need turn-by-turn relay messaging - -### When NOT to Use - -- Lead/coordinator agents that communicate with others -- Agents in debate, consensus, or reflection patterns -- Agents that need to receive messages during execution - -## Agent Slash Commands - -Agents running inside a workflow can output slash commands to signal the broker. These are detected in the agent's PTY output at the broker level — the agent simply prints the command on its own line. - -### `/exit` - -Signals that the agent has completed its current step and is ready to be released. - -``` -/exit -``` - -The workflow runner waits for each agent to `/exit` after delivering a step task. When the broker detects `/exit` in the agent's output (exact line match after ANSI stripping), it: - -1. Emits an `agent_exit` frame with `reason: "agent_requested"` -2. Triggers graceful PTY shutdown - -If an agent does not `/exit` within the step's `timeoutMs`, the runner treats the step as timed out. As a safety net, steps with `file_exists` verification will still pass if the expected file is present despite the timeout. - -**Best practice:** Instruct agents to output `/exit` when done in your step task descriptions: - -```yaml -steps: - - name: build-api - agent: backend - task: | - Build the REST API endpoints for user management. - When finished, output /exit. -``` - -## Idle Agent Detection and Nudging - -Interactive agents sometimes finish their task but forget to `/exit`, sitting idle and blocking downstream steps. The runner can detect idle agents and take action automatically. - -### Configuration - -Add `idleNudge` to your swarm config: - -```yaml -swarm: - pattern: hub-spoke - idleNudge: - nudgeAfterMs: 120000 # 2 min before first nudge (default) - escalateAfterMs: 120000 # 2 min after nudge before force-release (default) - maxNudges: 1 # Nudges before escalation (default) -``` - -All built-in templates include idle nudging with these defaults. - -### How It Works - -1. **Detection**: The broker tracks agent output timestamps and emits `agent_idle` events when an agent goes silent for the configured threshold -2. **Nudge**: For hub patterns (hub-spoke, fan-out, hierarchical, etc.), the runner tells the hub agent to check on the idle agent. For non-hub patterns, a system message is injected directly into the agent's PTY -3. **Escalation**: If the agent remains idle after `maxNudges` attempts, the runner force-releases it and captures whatever output was produced -4. **No config**: When `idleNudge` is omitted, the runner uses simple `waitForExit` (backward compatible) - -### Events - -The runner emits two new events for idle nudging: - -| Event | Description | -| --------------------- | ------------------------------------------------------------- | -| `step:nudged` | Fired when a nudge message is sent to an idle agent | -| `step:force-released` | Fired when an agent is force-released after exhausting nudges | - -## Automatic Step Owner and Review - -For interactive agent steps, the runner uses a point-person-led completion model: - -1. **Elects a step owner** (prefers lead/coordinator-style agents, falls back to the step agent) -2. **Runs a completion decision pipeline** — checks deterministic verification first, then owner judgment, then evidence -3. **Owner can issue structured decisions** via `OWNER_DECISION: COMPLETE|INCOMPLETE_RETRY|INCOMPLETE_FAIL|NEEDS_CLARIFICATION` with optional `REASON: ` -4. **Review parsing is tolerant** — accepts "Approved", "Complete", "LGTM", not just exact `REVIEW_DECISION: APPROVE` -5. **Markers are optional accelerators** — `STEP_COMPLETE:` still works as a fast-path but is never required -6. Stores primary output plus review output in the step artifact - -**Evidence-based completion:** The runner collects channel messages, file artifacts, process exit codes, and coordination signals (e.g., WORKER_DONE posted in channel) as completion evidence. When sufficient evidence exists, the step completes without requiring any sentinel marker. - -Deterministic and worktree steps are unchanged and do not require owner/review delegation. - -## Schema Validation - -A JSON Schema is available at `packages/sdk/src/workflows/schema.json` for editor autocompletion and validation of `relay.yaml` files. - -## Requirements - -- Node.js 22+ -- `agent-relay` CLI installed (`npm install -g agent-relay`) -- For Python: Python 3.10+ with `pip install agent-relay` -- For TypeScript workflow files: `tsx` or `ts-node` installed - -## License - -Apache-2.0 -- Copyright 2025 Agent Workforce Incorporated diff --git a/packages/sdk/src/workflows/__tests__/budget-enforcement.test.ts b/packages/sdk/src/workflows/__tests__/budget-enforcement.test.ts deleted file mode 100644 index 7b7fd9a7b..000000000 --- a/packages/sdk/src/workflows/__tests__/budget-enforcement.test.ts +++ /dev/null @@ -1,533 +0,0 @@ -import { EventEmitter } from 'node:events'; -import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; -import { mkdtempSync, mkdirSync, rmSync } from 'node:fs'; -import os from 'node:os'; -import path from 'node:path'; - -import type { BudgetTracker } from '../budget-tracker.js'; -import type { CliSessionQuery, CliSessionReport } from '../cli-session-collector.js'; -import type { WorkflowDb } from '../runner.js'; -import type { RelayYamlConfig, WorkflowRunRow, WorkflowStepRow } from '../types.js'; - -const tempDirs: string[] = []; - -type QueuedSubprocessResult = { - stdout?: string; - stderr?: string; - code?: number | null; - signal?: string | null; - delayMs?: number; - error?: Error; - onSpawn?: () => void; -}; - -type CollectorResult = - | CliSessionReport - | null - | ((query: CliSessionQuery) => CliSessionReport | null | Promise); - -let queuedSubprocessResults: QueuedSubprocessResult[] = []; -let queuedCollectorResults: CollectorResult[] = []; -let collectorResultsByCwd = new Map(); -let activeRunner: InstanceType | undefined; - -const mockCollectCliSession = vi.fn(async (query: CliSessionQuery): Promise => { - const next = - queuedCollectorResults.length > 0 ? queuedCollectorResults.shift() : collectorResultsByCwd.get(query.cwd); - - if (typeof next === 'function') { - return next(query); - } - - return next ?? null; -}); - -vi.mock('../cli-session-collector.js', () => ({ - collectCliSession: mockCollectCliSession, -})); - -const mockSubprocessSpawn = vi.fn().mockImplementation((_cmd, _args, _options) => { - const result = queuedSubprocessResults.shift() ?? { - stdout: 'completed\n', - code: 0, - }; - - const child = new EventEmitter() as EventEmitter & { - stdout: EventEmitter; - stderr: EventEmitter; - pid: number; - kill: ReturnType; - }; - - child.stdout = new EventEmitter(); - child.stderr = new EventEmitter(); - child.pid = 4321; - child.kill = vi.fn(); - - result.onSpawn?.(); - - const emitResult = () => { - if (result.error) { - child.emit('error', result.error); - return; - } - if (result.stdout) { - child.stdout.emit('data', Buffer.from(result.stdout)); - } - if (result.stderr) { - child.stderr.emit('data', Buffer.from(result.stderr)); - } - child.emit('close', result.code ?? 0, result.signal ?? null); - }; - - if (result.delayMs && result.delayMs > 0) { - setTimeout(emitResult, result.delayMs); - } else { - queueMicrotask(emitResult); - } - - return child; -}); - -vi.mock('node:child_process', async () => { - const actual = await vi.importActual('node:child_process'); - return { - ...actual, - spawn: mockSubprocessSpawn, - }; -}); - -const mockRelayInstance = { - spawnPty: vi.fn(), - human: vi.fn().mockReturnValue({ sendMessage: vi.fn().mockResolvedValue(undefined) }), - shutdown: vi.fn().mockResolvedValue(undefined), - onBrokerStderr: vi.fn().mockReturnValue(() => {}), - listAgentsRaw: vi.fn().mockResolvedValue([]), - listAgents: vi.fn().mockResolvedValue([]), - addListener: vi.fn(() => () => {}), -}; - -vi.mock('@relaycast/sdk', () => ({ - RelayCast: vi.fn(), - RelayError: class RelayError extends Error {}, -})); - -vi.mock('../../relay.js', () => ({ - AgentRelay: vi.fn().mockImplementation(() => mockRelayInstance), -})); - -const { WorkflowRunner } = await import('../runner.js'); - -interface DbHarness { - db: WorkflowDb; - getRun(id: string): WorkflowRunRow | null; - getSteps(runId: string): WorkflowStepRow[]; -} - -function makeDbHarness(): DbHarness { - const runs = new Map(); - const steps = new Map(); - - return { - db: { - insertRun: vi.fn(async (run: WorkflowRunRow) => { - runs.set(run.id, { ...run }); - }), - updateRun: vi.fn(async (id: string, patch: Partial) => { - const existing = runs.get(id); - if (existing) { - runs.set(id, { ...existing, ...patch }); - } - }), - getRun: vi.fn(async (id: string) => { - const run = runs.get(id); - return run ? { ...run } : null; - }), - insertStep: vi.fn(async (step: WorkflowStepRow) => { - steps.set(step.id, { ...step }); - }), - updateStep: vi.fn(async (id: string, patch: Partial) => { - const existing = steps.get(id); - if (existing) { - steps.set(id, { ...existing, ...patch }); - } - }), - getStepsByRunId: vi.fn(async (runId: string) => { - return [...steps.values()].filter((step) => step.runId === runId).map((step) => ({ ...step })); - }), - }, - getRun(id: string) { - const run = runs.get(id); - return run ? { ...run } : null; - }, - getSteps(runId: string) { - return [...steps.values()].filter((step) => step.runId === runId).map((step) => ({ ...step })); - }, - }; -} - -function createWorkspace(subdirs: string[] = []): string { - const dir = mkdtempSync(path.join(os.tmpdir(), 'relay-budget-enforcement-')); - tempDirs.push(dir); - - for (const subdir of subdirs) { - mkdirSync(path.join(dir, subdir), { recursive: true }); - } - - return dir; -} - -function makeRunner(cwd: string, db: WorkflowDb): InstanceType { - return new WorkflowRunner({ - cwd, - db, - workspaceId: 'ws-test', - relay: { - env: { - AGENT_RELAY_WORKFLOW_DISABLE_RELAYCAST: '1', - }, - }, - }); -} - -function makeAgent( - name: string, - overrides: Partial = {} -): RelayYamlConfig['agents'][number] { - return { - name, - cli: 'claude', - interactive: false, - ...overrides, - }; -} - -function makeStep( - name: string, - agent: string, - overrides: Partial[number]['steps'][number]> = {} -): NonNullable[number]['steps'][number] { - return { - name, - agent, - task: `Complete ${name}`, - ...overrides, - }; -} - -function makeConfig(input: { - agents: RelayYamlConfig['agents']; - steps: NonNullable[number]['steps']; - swarm?: Partial; -}): RelayYamlConfig { - return { - version: '1', - name: 'budget-enforcement', - swarm: { - pattern: 'dag', - ...input.swarm, - }, - agents: input.agents, - workflows: [ - { - name: 'default', - steps: input.steps, - }, - ], - trajectories: false, - }; -} - -function makeReport( - tokens: Partial>, - overrides: Partial = {} -): CliSessionReport { - return { - cli: 'claude', - sessionId: 'session-1', - model: 'claude-sonnet-4', - provider: 'anthropic', - durationMs: 1_000, - cost: null, - tokens: { - input: tokens.input ?? 0, - output: tokens.output ?? 0, - cacheRead: tokens.cacheRead ?? 0, - }, - turns: 1, - toolCalls: [], - errors: [], - finalStatus: 'completed', - summary: 'done', - ...overrides, - }; -} - -function getBudgetTracker(runner: InstanceType): BudgetTracker | undefined { - return (runner as any).budgetTracker as BudgetTracker | undefined; -} - -beforeEach(() => { - vi.clearAllMocks(); - queuedSubprocessResults = []; - queuedCollectorResults = []; - collectorResultsByCwd = new Map(); - activeRunner = undefined; - mockRelayInstance.shutdown.mockResolvedValue(undefined); - mockRelayInstance.onBrokerStderr.mockReturnValue(() => {}); - mockRelayInstance.listAgents.mockResolvedValue([]); -}); - -afterEach(() => { - activeRunner = undefined; - while (tempDirs.length > 0) { - rmSync(tempDirs.pop()!, { recursive: true, force: true }); - } -}); - -describe('WorkflowRunner budget enforcement integration', () => { - it('workflow with no budget config runs normally', async () => { - const workspace = createWorkspace(['step-1', 'step-2']); - const dbHarness = makeDbHarness(); - const runner = makeRunner(workspace, dbHarness.db); - activeRunner = runner; - - queuedSubprocessResults = [ - { stdout: 'step 1 complete\n', code: 0 }, - { stdout: 'step 2 complete\n', code: 0 }, - ]; - queuedCollectorResults = [null, null]; - - const run = await runner.execute( - makeConfig({ - agents: [makeAgent('worker-1', { cwd: 'step-1' }), makeAgent('worker-2', { cwd: 'step-2' })], - steps: [makeStep('step-1', 'worker-1'), makeStep('step-2', 'worker-2', { dependsOn: ['step-1'] })], - }), - 'default' - ); - - expect(run.status).toBe('completed'); - expect(getBudgetTracker(runner)).toBeUndefined(); - expect(dbHarness.getSteps(run.id).map((step) => step.status)).toEqual(['completed', 'completed']); - expect(mockSubprocessSpawn).toHaveBeenCalledTimes(2); - }); - - it('per-agent maxTokens recorded in budget tracker', async () => { - const workspace = createWorkspace(['writer']); - const dbHarness = makeDbHarness(); - const runner = makeRunner(workspace, dbHarness.db); - activeRunner = runner; - - queuedSubprocessResults = [{ stdout: 'draft complete\n', code: 0 }]; - collectorResultsByCwd.set(path.join(workspace, 'writer'), makeReport({ input: 800, output: 150 })); - - const run = await runner.execute( - makeConfig({ - agents: [ - makeAgent('writer', { - cwd: 'writer', - constraints: { maxTokens: 1_000 }, - }), - ], - steps: [makeStep('draft', 'writer')], - }), - 'default' - ); - - const tracker = getBudgetTracker(runner); - - expect(run.status).toBe('completed'); - expect(tracker).toBeDefined(); - expect(tracker?.getStepUsage('draft')).toEqual({ - input: 800, - output: 150, - cacheRead: 0, - total: 950, - }); - expect(tracker?.getStepBudgetStatus('draft')).toEqual({ - used: 950, - limit: 1_000, - over: false, - }); - }); - - it('per-workflow tokenBudget prevents spawning when exhausted', async () => { - const workspace = createWorkspace(['planner', 'writer']); - const dbHarness = makeDbHarness(); - const runner = makeRunner(workspace, dbHarness.db); - activeRunner = runner; - - queuedSubprocessResults = [{ stdout: 'plan complete\n', code: 0 }]; - collectorResultsByCwd.set(path.join(workspace, 'planner'), makeReport({ input: 1_500, output: 300 })); - - const run = await runner.execute( - makeConfig({ - agents: [makeAgent('planner', { cwd: 'planner' }), makeAgent('writer', { cwd: 'writer' })], - steps: [makeStep('step-1', 'planner'), makeStep('step-2', 'writer', { dependsOn: ['step-1'] })], - swarm: { - tokenBudget: 2_000, - }, - }), - 'default' - ); - - const tracker = getBudgetTracker(runner); - const failedStep = dbHarness.getSteps(run.id).find((step) => step.stepName === 'step-2'); - - expect(run.status).toBe('failed'); - expect(mockSubprocessSpawn).toHaveBeenCalledTimes(1); - expect(tracker?.getTotalUsage().total).toBe(1_800); - expect(failedStep?.status).toBe('failed'); - expect(failedStep?.error).toContain('workflow budget exhausted'); - expect(failedStep?.error).toContain('1800/2000'); - }); - - it('pre-spawn check allows step when budget has headroom', async () => { - const workspace = createWorkspace(['first', 'second']); - const dbHarness = makeDbHarness(); - const runner = makeRunner(workspace, dbHarness.db); - activeRunner = runner; - - let checkCanSpawnAllowed: boolean | undefined; - - queuedSubprocessResults = [ - { stdout: 'first complete\n', code: 0 }, - { - stdout: 'second complete\n', - code: 0, - onSpawn: () => { - checkCanSpawnAllowed = getBudgetTracker(runner)?.checkCanSpawn('step-2').allowed; - }, - }, - ]; - collectorResultsByCwd.set(path.join(workspace, 'first'), makeReport({ input: 900, output: 100 })); - collectorResultsByCwd.set(path.join(workspace, 'second'), makeReport({ input: 150, output: 50 })); - - const run = await runner.execute( - makeConfig({ - agents: [makeAgent('first-agent', { cwd: 'first' }), makeAgent('second-agent', { cwd: 'second' })], - steps: [ - makeStep('step-1', 'first-agent'), - makeStep('step-2', 'second-agent', { dependsOn: ['step-1'] }), - ], - swarm: { - tokenBudget: 5_000, - }, - }), - 'default' - ); - - expect(run.status).toBe('completed'); - expect(checkCanSpawnAllowed).toBe(true); - expect(mockSubprocessSpawn).toHaveBeenCalledTimes(2); - }); - - it('retry attempts consume from same budget', async () => { - const workspace = createWorkspace(['retry-agent']); - const dbHarness = makeDbHarness(); - const runner = makeRunner(workspace, dbHarness.db); - activeRunner = runner; - - let usageBeforeRetry: number | undefined; - - queuedSubprocessResults = [ - { stdout: 'first attempt failed\n', code: 1 }, - { - stdout: 'retry succeeded\n', - code: 0, - onSpawn: () => { - usageBeforeRetry = getBudgetTracker(runner)?.getStepUsage('retry-step').total; - }, - }, - ]; - queuedCollectorResults = [ - makeReport({ input: 500, output: 100 }, { finalStatus: 'failed' }), - makeReport({ input: 250, output: 100 }), - ]; - - const run = await runner.execute( - makeConfig({ - agents: [ - makeAgent('retry-agent', { - cwd: 'retry-agent', - constraints: { maxTokens: 1_000 }, - }), - ], - steps: [makeStep('retry-step', 'retry-agent', { retries: 1 })], - }), - 'default' - ); - - const tracker = getBudgetTracker(runner); - - expect(run.status).toBe('completed'); - expect(usageBeforeRetry).toBe(600); - expect(tracker?.getStepUsage('retry-step')).toEqual({ - input: 750, - output: 200, - cacheRead: 0, - total: 950, - }); - expect(tracker?.getStepBudgetStatus('retry-step')).toEqual({ - used: 950, - limit: 1_000, - over: false, - }); - expect(mockCollectCliSession).toHaveBeenCalledTimes(2); - }); - - it('parallel steps track budget correctly', async () => { - const workspace = createWorkspace(['parallel-a', 'parallel-b']); - const dbHarness = makeDbHarness(); - const runner = makeRunner(workspace, dbHarness.db); - activeRunner = runner; - - queuedSubprocessResults = [ - { stdout: 'parallel a\n', code: 0, delayMs: 10 }, - { stdout: 'parallel b\n', code: 0, delayMs: 1 }, - ]; - collectorResultsByCwd.set(path.join(workspace, 'parallel-a'), makeReport({ input: 500, output: 200 })); - collectorResultsByCwd.set(path.join(workspace, 'parallel-b'), makeReport({ input: 700, output: 200 })); - - const run = await runner.execute( - makeConfig({ - agents: [ - makeAgent('parallel-a', { cwd: 'parallel-a' }), - makeAgent('parallel-b', { cwd: 'parallel-b' }), - ], - steps: [makeStep('parallel-a', 'parallel-a'), makeStep('parallel-b', 'parallel-b')], - swarm: { - tokenBudget: 5_000, - }, - }), - 'default' - ); - - const tracker = getBudgetTracker(runner); - - expect(run.status).toBe('completed'); - expect(tracker?.getStepUsage('parallel-a')).toEqual({ - input: 500, - output: 200, - cacheRead: 0, - total: 700, - }); - expect(tracker?.getStepUsage('parallel-b')).toEqual({ - input: 700, - output: 200, - cacheRead: 0, - total: 900, - }); - expect(tracker?.getTotalUsage()).toEqual({ - input: 1_200, - output: 400, - cacheRead: 0, - total: 1_600, - }); - expect(tracker?.getRunSummaryBudgetData()?.workflow).toEqual({ - used: 1_600, - limit: 5_000, - exhausted: false, - }); - }); -}); diff --git a/packages/sdk/src/workflows/__tests__/budget-tracker.test.ts b/packages/sdk/src/workflows/__tests__/budget-tracker.test.ts deleted file mode 100644 index 42e07cb53..000000000 --- a/packages/sdk/src/workflows/__tests__/budget-tracker.test.ts +++ /dev/null @@ -1,120 +0,0 @@ -import { describe, expect, it } from 'vitest'; - -import { BudgetExceededError, BudgetTracker, type TokenUsage } from '../budget-tracker.js'; - -function expectUsage(actual: TokenUsage, expected: TokenUsage): void { - expect(actual).toEqual(expected); -} - -describe('BudgetTracker', () => { - it('tracks usage across multiple steps', () => { - const tracker = new BudgetTracker({ perAgent: 100, perWorkflow: 500 }); - - tracker.recordUsage('planner', { input: 10, output: 5, cacheRead: 3 }); - tracker.recordUsage('writer', { input: 20, output: 4 }); - tracker.recordUsage('planner', { input: 1, output: 1, cacheRead: 2 }); - - expectUsage(tracker.getStepUsage('planner'), { - input: 11, - output: 6, - cacheRead: 5, - total: 17, - }); - expectUsage(tracker.getStepUsage('writer'), { - input: 20, - output: 4, - cacheRead: 0, - total: 24, - }); - expectUsage(tracker.getTotalUsage(), { - input: 31, - output: 10, - cacheRead: 5, - total: 41, - }); - // total = input + output (cacheRead excluded from budget) - expect(tracker.getRemainingBudget()).toEqual({ - agent: 59, - workflow: 459, - }); - }); - - it('detects when a step exceeds the per-agent budget', () => { - const tracker = new BudgetTracker({ perAgent: 25, perWorkflow: 100 }); - - tracker.recordUsage('specialist', { input: 18, output: 9 }); - - expect(tracker.isOverBudget('specialist')).toEqual({ - over: true, - reason: 'Step "specialist" exceeded per-agent budget (27/25)', - }); - }); - - it('detects when total usage exceeds the per-workflow budget', () => { - const tracker = new BudgetTracker({ perAgent: 100, perWorkflow: 40 }); - - tracker.recordUsage('step-a', { input: 10, output: 10 }); - tracker.recordUsage('step-b', { input: 15, output: 10 }); - - expect(tracker.isOverBudget('step-b')).toEqual({ - over: true, - reason: 'Workflow exceeded total budget (45/40)', - }); - expect(tracker.checkCanSpawn('step-c')).toEqual({ - allowed: false, - reason: 'Cannot spawn step-c: workflow budget exceeded (45/40)', - }); - }); - - it('refuses to spawn when the remaining workflow budget is nearly exhausted', () => { - const tracker = new BudgetTracker({ perAgent: 100, perWorkflow: 250 }); - - tracker.recordUsage('lead', { input: 120, output: 121 }); - - expect(tracker.checkCanSpawn('reviewer')).toEqual({ - allowed: false, - reason: 'Cannot spawn reviewer: remaining workflow budget 9 is below step budget 100', - }); - }); - - it('maintains correct totals when parallel steps record usage concurrently', async () => { - const tracker = new BudgetTracker({ perAgent: 1_000, perWorkflow: 10_000 }); - - await Promise.all( - Array.from({ length: 40 }, async (_, index) => { - await new Promise((resolve) => setTimeout(resolve, index % 5)); - tracker.recordUsage(`step-${index % 4}`, { - input: 2, - output: 3, - cacheRead: 1, - }); - }) - ); - - expectUsage(tracker.getTotalUsage(), { - input: 80, - output: 120, - cacheRead: 40, - total: 200, - }); - - for (const stepName of ['step-0', 'step-1', 'step-2', 'step-3']) { - expectUsage(tracker.getStepUsage(stepName), { - input: 20, - output: 30, - cacheRead: 10, - total: 50, - }); - } - }); - - it('exposes budget metadata on BudgetExceededError', () => { - const error = new BudgetExceededError('planner', 'workflow', 100, 125); - - expect(error.name).toBe('BudgetExceededError'); - expect(error.stepName).toBe('planner'); - expect(error.budgetType).toBe('workflow'); - expect(error.limit).toBe(100); - expect(error.actual).toBe(125); - }); -}); diff --git a/packages/sdk/src/workflows/__tests__/builder-paths.test.ts b/packages/sdk/src/workflows/__tests__/builder-paths.test.ts deleted file mode 100644 index 3d0f0ff17..000000000 --- a/packages/sdk/src/workflows/__tests__/builder-paths.test.ts +++ /dev/null @@ -1,76 +0,0 @@ -import { describe, it, expect } from 'vitest'; - -import { WorkflowBuilder } from '../builder.js'; - -describe('WorkflowBuilder.paths()', () => { - it('records the declared paths on toConfig() output', () => { - const config = new WorkflowBuilder('multi-repo') - .paths([ - { name: 'alpha', path: 'alpha', description: 'Demo repo A' }, - { name: 'beta', path: 'beta', description: 'Demo repo B' }, - ]) - .agent('worker', { cli: 'codex' }) - .step('noop', { type: 'deterministic', command: 'true' }) - .toConfig(); - - expect(config.paths).toEqual([ - { name: 'alpha', path: 'alpha', description: 'Demo repo A' }, - { name: 'beta', path: 'beta', description: 'Demo repo B' }, - ]); - }); - - it('omits the paths field entirely when none are declared', () => { - const config = new WorkflowBuilder('single-repo') - .agent('worker', { cli: 'codex' }) - .step('noop', { type: 'deterministic', command: 'true' }) - .toConfig(); - - expect(config.paths).toBeUndefined(); - }); - - it('does not allow downstream callers to mutate the recorded paths via the input array', () => { - const original = [{ name: 'alpha', path: 'alpha' }]; - const builder = new WorkflowBuilder('mutation-guard') - .paths(original) - .agent('w', { cli: 'codex' }) - .step('s', { type: 'deterministic', command: 'true' }); - - // Mutating the original array AFTER passing it in should not change - // the config the builder emits. - original.push({ name: 'beta', path: 'beta' }); - original[0].name = 'mutated'; - - const config = builder.toConfig(); - expect(config.paths).toEqual([{ name: 'alpha', path: 'alpha' }]); - }); - - it('rejects non-array inputs', () => { - const builder = new WorkflowBuilder('bad-input'); - // @ts-expect-error — runtime guard, not a type-level test - expect(() => builder.paths('not-an-array')).toThrow(/expects an array/); - }); - - it('rejects entries missing name or path', () => { - const builder = new WorkflowBuilder('bad-entry'); - // @ts-expect-error — runtime guard - expect(() => builder.paths([{ name: 'alpha' }])).toThrow(/string `name` and `path`/); - // @ts-expect-error — runtime guard - expect(() => builder.paths([{ path: 'beta' }])).toThrow(/string `name` and `path`/); - }); - - it('rejects duplicate path names', () => { - const builder = new WorkflowBuilder('dup'); - expect(() => - builder.paths([ - { name: 'alpha', path: 'alpha' }, - { name: 'alpha', path: 'alpha-also' }, - ]) - ).toThrow(/duplicate entry name "alpha"/); - }); - - it('returns the builder so the call chains', () => { - const builder = new WorkflowBuilder('chain'); - const returned = builder.paths([{ name: 'alpha', path: 'alpha' }]); - expect(returned).toBe(builder); - }); -}); diff --git a/packages/sdk/src/workflows/__tests__/channel-messenger.test.ts b/packages/sdk/src/workflows/__tests__/channel-messenger.test.ts deleted file mode 100644 index 1e9867e4d..000000000 --- a/packages/sdk/src/workflows/__tests__/channel-messenger.test.ts +++ /dev/null @@ -1,145 +0,0 @@ -import { describe, expect, it, vi } from 'vitest'; - -// Import from the module that will be extracted from runner.ts -import { - ChannelMessenger, - formatError, - formatStepOutput, - sendToChannel, - truncateMessage, -} from '../channel-messenger.js'; - -describe('channel messenger helpers', () => { - it('sendToChannel forwards messages to the relay client', async () => { - const relay = { send: vi.fn().mockResolvedValue(undefined) }; - await sendToChannel(relay, 'workflow-room', 'hello'); - expect(relay.send).toHaveBeenCalledWith('workflow-room', 'hello'); - }); - - it('truncateMessage keeps the most recent tail within the limit', () => { - expect(truncateMessage('abcdefghij', 4)).toBe('ghij'); - expect(truncateMessage('abc', 10)).toBe('abc'); - }); - - it('formatStepOutput returns a completion note when scrubbed output is empty', () => { - expect(formatStepOutput('plan', '▗▖\n')).toBe('**[plan]** Step completed — output written to disk'); - }); - - it('formatStepOutput scrubs noise and formats a fenced block', () => { - const output = 'Thinking…\nuseful line\n'; - expect(formatStepOutput('plan', output)).toBe('**[plan] Output:**\n```\nuseful line\n```'); - }); - - it('formatStepOutput strips malformed PTY frames through the shared scrubber', () => { - const output = ['real result', 'qW0 | q0 / ql0 _ qqm ~ lqq = qW0 | q0 / ql0 _ qqm', 'done'].join('\n'); - expect(formatStepOutput('plan', output)).toBe('**[plan] Output:**\n```\nreal result\ndone\n```'); - }); - - it('formatStepOutput redacts secrets through the shared scrubber', () => { - const output = 'deploy succeeded\naccess_token=ghp_abcdefghijklmnopqrstuvwxyzABCDEFGHIJ\n'; - const formatted = formatStepOutput('deploy', output); - expect(formatted).toContain('[REDACTED]'); - expect(formatted).not.toContain('ghp_abcdefghijklmnopqrstuvwxyzABCDEFGHIJ'); - }); - - it('formatError normalizes unknown errors', () => { - expect(formatError('build', new Error('Boom'))).toBe('**[build]** Failed: Boom'); - expect(formatError('build', 'bad input')).toBe('**[build]** Failed: bad input'); - }); -}); - -describe('ChannelMessenger', () => { - describe('buildNonInteractiveAwareness', () => { - it('returns undefined when no non-interactive agents exist', () => { - const messenger = new ChannelMessenger(); - const agents = new Map([['worker', { name: 'worker', cli: 'claude', interactive: true }]]); - const result = messenger.buildNonInteractiveAwareness(agents as any, new Map()); - expect(result).toBeUndefined(); - }); - - it('lists non-interactive agents with step references', () => { - const messenger = new ChannelMessenger(); - const agents = new Map([['bg-worker', { name: 'bg-worker', cli: 'claude', interactive: false }]]); - const stepStates = new Map([['analyze', { row: { agentName: 'bg-worker', status: 'running' } }]]); - const result = messenger.buildNonInteractiveAwareness(agents as any, stepStates as any); - expect(result).toContain('bg-worker'); - expect(result).toContain('{{steps.analyze.output}}'); - expect(result).toContain('cannot receive messages'); - }); - }); - - describe('buildDelegationGuidance', () => { - it('includes timeout note when timeout is provided', () => { - const messenger = new ChannelMessenger(); - const result = messenger.buildDelegationGuidance('claude', 300_000); - expect(result).toContain('5 minutes'); - expect(result).toContain('AUTONOMOUS DELEGATION'); - }); - - it('includes sub-agent option only for claude CLI', () => { - const messenger = new ChannelMessenger(); - const claudeResult = messenger.buildDelegationGuidance('claude'); - const codexResult = messenger.buildDelegationGuidance('codex'); - expect(claudeResult).toContain('Task tool'); - expect(codexResult).not.toContain('Task tool'); - }); - - it('omits timeout note when no timeout given', () => { - const messenger = new ChannelMessenger(); - const result = messenger.buildDelegationGuidance('claude'); - expect(result).not.toContain('minutes before this step'); - }); - }); - - describe('buildRelayRegistrationNote', () => { - it('returns empty string for claude CLI', () => { - const messenger = new ChannelMessenger(); - expect(messenger.buildRelayRegistrationNote('claude', 'worker-1')).toBe(''); - }); - - it('returns registration instructions for non-claude CLIs', () => { - const messenger = new ChannelMessenger(); - const result = messenger.buildRelayRegistrationNote('codex', 'helper-1'); - expect(result).toContain('register_agent(name="helper-1")'); - expect(result).toContain('RELAY SETUP'); - }); - }); - - describe('postCompletionReport', () => { - it('formats a completion report with step results', () => { - const postSpy = vi.fn(); - const messenger = new ChannelMessenger({ postFn: postSpy }); - const outcomes = [ - { name: 'plan', agent: 'lead', status: 'completed', attempts: 1, verificationPassed: true }, - { name: 'code', agent: 'worker', status: 'completed', attempts: 2 }, - { name: 'optional', agent: 'worker', status: 'skipped', attempts: 0 }, - ]; - messenger.postCompletionReport('my-workflow', outcomes as any, 'All done', 0.95); - expect(postSpy).toHaveBeenCalledTimes(1); - const text = postSpy.mock.calls[0][0]; - expect(text).toContain('my-workflow'); - expect(text).toContain('Complete'); - expect(text).toContain('95%'); - expect(text).toContain('verified'); - expect(text).toContain('2 attempts'); - expect(text).toContain('skipped'); - }); - }); - - describe('postFailureReport', () => { - it('formats a failure report with error details', () => { - const postSpy = vi.fn(); - const messenger = new ChannelMessenger({ postFn: postSpy }); - const outcomes = [ - { name: 'plan', agent: 'lead', status: 'completed', attempts: 1 }, - { name: 'code', agent: 'worker', status: 'failed', attempts: 3, error: 'Timeout exceeded' }, - ]; - messenger.postFailureReport('my-workflow', outcomes as any, 'Step failed'); - expect(postSpy).toHaveBeenCalledTimes(1); - const text = postSpy.mock.calls[0][0]; - expect(text).toContain('Failed'); - expect(text).toContain('1/2 steps passed'); - expect(text).toContain('Timeout exceeded'); - }); - }); -}); diff --git a/packages/sdk/src/workflows/__tests__/cli-session-collector.test.ts b/packages/sdk/src/workflows/__tests__/cli-session-collector.test.ts deleted file mode 100644 index 808276419..000000000 --- a/packages/sdk/src/workflows/__tests__/cli-session-collector.test.ts +++ /dev/null @@ -1,64 +0,0 @@ -import { afterEach, describe, expect, it, vi } from 'vitest'; -import { mkdtempSync, rmSync } from 'node:fs'; -import os from 'node:os'; -import path from 'node:path'; - -import { collectCliSession } from '../cli-session-collector.js'; - -const tempDirs: string[] = []; -const originalHome = process.env.HOME; - -function makeTempDir(prefix: string): string { - const dir = mkdtempSync(path.join(os.tmpdir(), prefix)); - tempDirs.push(dir); - return dir; -} - -async function importCollectorsWithHome(homeDir: string) { - process.env.HOME = homeDir; - vi.resetModules(); - const [claudeModule, opencodeModule] = await Promise.all([ - import('../collectors/claude.js'), - import('../collectors/opencode.js'), - ]); - return { - ClaudeCodeCollector: claudeModule.ClaudeCodeCollector, - OpenCodeCollector: opencodeModule.OpenCodeCollector, - }; -} - -afterEach(() => { - vi.resetModules(); - process.env.HOME = originalHome; - while (tempDirs.length > 0) { - rmSync(tempDirs.pop()!, { recursive: true, force: true }); - } -}); - -describe('cli-session-collector', () => { - it('returns null for an unknown CLI', async () => { - const report = await collectCliSession({ - cli: 'gemini', - cwd: '/tmp/project', - startedAt: 1000, - completedAt: 2000, - }); - - expect(report).toBeNull(); - }); - - it('reports canCollect=false when configured data stores do not exist', async () => { - const homeDir = makeTempDir('cli-session-collector-empty-home-'); - const { ClaudeCodeCollector, OpenCodeCollector } = await importCollectorsWithHome(homeDir); - const { CodexCollector } = await import('../collectors/codex.js'); - - expect(new ClaudeCodeCollector().canCollect()).toBe(false); - expect(new OpenCodeCollector().canCollect()).toBe(false); - expect( - new CodexCollector({ - historyPath: path.join(homeDir, 'missing-history.jsonl'), - statePath: path.join(homeDir, 'missing-state.sqlite'), - }).canCollect() - ).toBe(false); - }); -}); diff --git a/packages/sdk/src/workflows/__tests__/collectors/claude.test.ts b/packages/sdk/src/workflows/__tests__/collectors/claude.test.ts deleted file mode 100644 index 2da888ab2..000000000 --- a/packages/sdk/src/workflows/__tests__/collectors/claude.test.ts +++ /dev/null @@ -1,108 +0,0 @@ -import { afterEach, describe, expect, it, vi } from 'vitest'; -import { mkdirSync, mkdtempSync, rmSync, writeFileSync } from 'node:fs'; -import os from 'node:os'; -import path from 'node:path'; - -const tempDirs: string[] = []; -const originalHome = process.env.HOME; - -function makeTempDir(prefix: string): string { - const dir = mkdtempSync(path.join(os.tmpdir(), prefix)); - tempDirs.push(dir); - return dir; -} - -function encodeProjectPath(projectPath: string): string { - return projectPath.replace(/\//g, '--').replace(/^-+/, ''); -} - -function createClaudeFixture(homeDir: string, cwd: string, timestamp: number): string { - const claudeHome = path.join(homeDir, '.claude'); - const projectsRoot = path.join(claudeHome, 'projects', encodeProjectPath(cwd)); - mkdirSync(projectsRoot, { recursive: true }); - - const sessionId = 'session-claude-1'; - writeFileSync( - path.join(claudeHome, 'history.jsonl'), - [ - JSON.stringify({ - timestamp: timestamp - 1000, - project: '/other/project', - sessionId: 'ignored-session', - }), - JSON.stringify({ timestamp, project: cwd, sessionId }), - ].join('\n') - ); - - writeFileSync( - path.join(projectsRoot, `${sessionId}.jsonl`), - [ - JSON.stringify({ type: 'user', text: 'Investigate the failing command' }), - JSON.stringify({ type: 'tool_use', name: 'bash' }), - JSON.stringify({ - type: 'assistant', - message: { - model: 'claude-sonnet-4', - provider: 'anthropic', - usage: { - input_tokens: 42, - output_tokens: 24, - cache_read_input_tokens: 7, - }, - content: [{ text: 'Final concise summary' }], - }, - }), - ].join('\n') - ); - - return sessionId; -} - -async function importCollectorWithHome(homeDir: string) { - process.env.HOME = homeDir; - vi.resetModules(); - const module = await import('../../collectors/claude.js'); - return module.ClaudeCodeCollector; -} - -afterEach(() => { - vi.resetModules(); - process.env.HOME = originalHome; - while (tempDirs.length > 0) { - rmSync(tempDirs.pop()!, { recursive: true, force: true }); - } -}); - -describe('ClaudeCodeCollector', () => { - it('matches by project path and timestamp and reads the session jsonl', async () => { - const homeDir = makeTempDir('claude-home-'); - const cwd = '/repo/project'; - const timestamp = 50_000; - const sessionId = createClaudeFixture(homeDir, cwd, timestamp); - const ClaudeCodeCollector = await importCollectorWithHome(homeDir); - - const report = await new ClaudeCodeCollector().collect({ - cli: 'claude', - cwd, - startedAt: timestamp - 100, - completedAt: timestamp + 2_000, - }); - - expect(report).not.toBeNull(); - expect(report?.sessionId).toBe(sessionId); - expect(report?.model).toBe('claude-sonnet-4'); - expect(report?.provider).toBe('anthropic'); - expect(report?.tokens).toEqual({ input: 42, output: 24, cacheRead: 7 }); - expect(report?.turns).toBe(1); - expect(report?.toolCalls).toEqual([{ name: 'bash', count: 1 }]); - expect(report?.summary).toBe('Final concise summary'); - expect(report?.finalStatus).toBe('completed'); - }); - - it('returns false from canCollect when history and project files are missing', async () => { - const homeDir = makeTempDir('claude-empty-home-'); - const ClaudeCodeCollector = await importCollectorWithHome(homeDir); - - expect(new ClaudeCodeCollector().canCollect()).toBe(false); - }); -}); diff --git a/packages/sdk/src/workflows/__tests__/collectors/codex.test.ts b/packages/sdk/src/workflows/__tests__/collectors/codex.test.ts deleted file mode 100644 index 4ecbbfe48..000000000 --- a/packages/sdk/src/workflows/__tests__/collectors/codex.test.ts +++ /dev/null @@ -1,89 +0,0 @@ -import { afterEach, describe, expect, it } from 'vitest'; -import { mkdtempSync, rmSync, writeFileSync } from 'node:fs'; -import os from 'node:os'; -import path from 'node:path'; -import { DatabaseSync } from 'node:sqlite'; - -import { CodexCollector } from '../../collectors/codex.js'; - -const tempDirs: string[] = []; - -function makeTempDir(prefix: string): string { - const dir = mkdtempSync(path.join(os.tmpdir(), prefix)); - tempDirs.push(dir); - return dir; -} - -function createCodexFixture(tempDir: string, cwd: string, createdAtSeconds: number) { - const statePath = path.join(tempDir, 'state_5.sqlite'); - const historyPath = path.join(tempDir, 'history.jsonl'); - const db = new DatabaseSync(statePath); - - db.exec(` - CREATE TABLE threads ( - id TEXT PRIMARY KEY, - cwd TEXT, - model_provider TEXT, - tokens_used INTEGER, - created_at INTEGER, - updated_at INTEGER - ); - CREATE TABLE logs ( - thread_id TEXT, - ts INTEGER, - level TEXT, - message TEXT, - line INTEGER - ); - `); - - db.prepare( - 'INSERT INTO threads (id, cwd, model_provider, tokens_used, created_at, updated_at) VALUES (?, ?, ?, ?, ?, ?)' - ).run('thread-1', cwd, 'openai/gpt-5', 321, createdAtSeconds, createdAtSeconds + 3); - db.prepare('INSERT INTO logs (thread_id, ts, level, message, line) VALUES (?, ?, ?, ?, ?)').run( - 'thread-1', - createdAtSeconds + 1, - 'error', - 'Command failed: bad exit code', - 12 - ); - db.close(); - - writeFileSync( - historyPath, - `${JSON.stringify({ session_id: 'thread-1', ts: createdAtSeconds, text: 'history' })}\n` - ); - - return { statePath, historyPath }; -} - -afterEach(() => { - while (tempDirs.length > 0) { - rmSync(tempDirs.pop()!, { recursive: true, force: true }); - } -}); - -describe('CodexCollector', () => { - it('matches by cwd and time window and extracts errors from logs', async () => { - const tempDir = makeTempDir('codex-fixture-'); - const cwd = '/repo/codex-project'; - const createdAtSeconds = 100; - const { statePath, historyPath } = createCodexFixture(tempDir, cwd, createdAtSeconds); - const collector = new CodexCollector({ statePath, historyPath }); - - const report = await collector.collect({ - cli: 'codex', - cwd, - startedAt: 100_000, - completedAt: 105_000, - }); - - expect(report).not.toBeNull(); - expect(report?.sessionId).toBe('thread-1'); - expect(report?.provider).toBe('openai'); - expect(report?.model).toBe('gpt-5'); - expect(report?.tokens).toEqual({ input: 321, output: 0, cacheRead: 0 }); - expect(report?.errors).toEqual([{ turn: 1, text: 'Command failed: bad exit code' }]); - expect(report?.finalStatus).toBe('failed'); - }); -}); diff --git a/packages/sdk/src/workflows/__tests__/collectors/opencode.test.ts b/packages/sdk/src/workflows/__tests__/collectors/opencode.test.ts deleted file mode 100644 index 89e3ebd88..000000000 --- a/packages/sdk/src/workflows/__tests__/collectors/opencode.test.ts +++ /dev/null @@ -1,182 +0,0 @@ -import { afterEach, describe, expect, it, vi } from 'vitest'; -import { mkdirSync, mkdtempSync, rmSync } from 'node:fs'; -import os from 'node:os'; -import path from 'node:path'; -import { DatabaseSync } from 'node:sqlite'; - -const tempDirs: string[] = []; -const originalHome = process.env.HOME; - -function makeTempDir(prefix: string): string { - const dir = mkdtempSync(path.join(os.tmpdir(), prefix)); - tempDirs.push(dir); - return dir; -} - -function createOpenCodeFixture(homeDir: string, cwd: string, sessionCreatedAt: number): string { - const dbDir = path.join(homeDir, '.local', 'share', 'opencode'); - mkdirSync(dbDir, { recursive: true }); - const dbPath = path.join(dbDir, 'opencode.db'); - const db = new DatabaseSync(dbPath); - - db.exec(` - CREATE TABLE session (id TEXT PRIMARY KEY, directory TEXT, time_created INTEGER); - CREATE TABLE message (id TEXT PRIMARY KEY, session_id TEXT, time_created INTEGER, data TEXT); - CREATE TABLE part (id TEXT PRIMARY KEY, message_id TEXT, session_id TEXT, time_created INTEGER, data TEXT); - `); - - const insertSession = db.prepare('INSERT INTO session (id, directory, time_created) VALUES (?, ?, ?)'); - const insertMessage = db.prepare( - 'INSERT INTO message (id, session_id, time_created, data) VALUES (?, ?, ?, ?)' - ); - const insertPart = db.prepare( - 'INSERT INTO part (id, message_id, session_id, time_created, data) VALUES (?, ?, ?, ?, ?)' - ); - - insertSession.run('session-1', cwd, sessionCreatedAt); - insertSession.run('session-2', '/other/project', sessionCreatedAt + 1000); - - insertMessage.run( - 'msg-1', - 'session-1', - sessionCreatedAt + 10, - JSON.stringify({ role: 'user', tokens: { input: 10, output: 0, cache: { read: 1 } } }) - ); - insertMessage.run( - 'msg-2', - 'session-1', - sessionCreatedAt + 20, - JSON.stringify({ - role: 'assistant', - modelID: 'gpt-5', - providerID: 'openai', - finish: 'error', - cost: 1.25, - tokens: { input: 15, output: 20, cache: { read: 4 } }, - }) - ); - insertMessage.run( - 'msg-other', - 'session-2', - sessionCreatedAt + 30, - JSON.stringify({ role: 'assistant', modelID: 'ignore-me', providerID: 'other', finish: 'completed' }) - ); - - insertPart.run( - 'part-1', - 'msg-1', - 'session-1', - sessionCreatedAt + 11, - JSON.stringify({ type: 'text', text: 'Planning work' }) - ); - insertPart.run( - 'part-2', - 'msg-2', - 'session-1', - sessionCreatedAt + 21, - JSON.stringify({ type: 'tool_call', name: 'write_file' }) - ); - insertPart.run( - 'part-3', - 'msg-2', - 'session-1', - sessionCreatedAt + 22, - JSON.stringify({ type: 'text', text: 'Error: database locked\nCleanup afterwards' }) - ); - insertPart.run( - 'part-4', - 'msg-2', - 'session-1', - sessionCreatedAt + 23, - JSON.stringify({ type: 'text', text: 'Completed summary output' }) - ); - - db.close(); - return dbPath; -} - -async function importCollectorWithHome(homeDir: string) { - process.env.HOME = homeDir; - vi.resetModules(); - vi.doMock('node:module', () => ({ - createRequire: () => (id: string) => { - if (id !== 'better-sqlite3') { - throw new Error(`Unexpected module request: ${id}`); - } - - return class BetterSqliteCompat { - private readonly db: DatabaseSync; - - constructor(filename: string) { - this.db = new DatabaseSync(filename, { open: true, readOnly: true }); - } - - prepare(sql: string) { - const statement = this.db.prepare(sql); - return { - get(params?: unknown): T | undefined { - return statement.get(params as never) as T | undefined; - }, - all(params?: unknown): T[] { - return statement.all(params as never) as T[]; - }, - }; - } - - pragma(_source: string) { - return undefined; - } - - close() { - this.db.close(); - } - }; - }, - })); - const module = await import('../../collectors/opencode.js'); - return module.OpenCodeCollector; -} - -afterEach(() => { - vi.resetModules(); - process.env.HOME = originalHome; - while (tempDirs.length > 0) { - rmSync(tempDirs.pop()!, { recursive: true, force: true }); - } -}); - -describe('OpenCodeCollector', () => { - it('matches by directory and time window, aggregates tokens, and extracts errors', async () => { - const homeDir = makeTempDir('opencode-home-'); - const cwd = path.join(homeDir, 'workspace'); - const sessionCreatedAt = 10_000; - createOpenCodeFixture(homeDir, cwd, sessionCreatedAt); - const OpenCodeCollector = await importCollectorWithHome(homeDir); - - const collector = new OpenCodeCollector(); - const report = await collector.collect({ - cli: 'opencode', - cwd, - startedAt: sessionCreatedAt + 100, - completedAt: sessionCreatedAt + 500, - }); - - expect(report).not.toBeNull(); - expect(report?.sessionId).toBe('session-1'); - expect(report?.model).toBe('gpt-5'); - expect(report?.provider).toBe('openai'); - expect(report?.tokens).toEqual({ input: 25, output: 20, cacheRead: 5 }); - expect(report?.cost).toBe(1.25); - expect(report?.toolCalls).toEqual([{ name: 'write_file', count: 1 }]); - expect(report?.errors).toEqual([{ turn: 3, text: 'Error: database locked' }]); - expect(report?.finalStatus).toBe('failed'); - expect(report?.summary).toBe('Completed summary output'); - }); - - it('returns false from canCollect when the database is missing', async () => { - const homeDir = makeTempDir('opencode-missing-home-'); - const OpenCodeCollector = await importCollectorWithHome(homeDir); - - expect(new OpenCodeCollector().canCollect()).toBe(false); - }); -}); diff --git a/packages/sdk/src/workflows/__tests__/e2big-and-verify.test.ts b/packages/sdk/src/workflows/__tests__/e2big-and-verify.test.ts deleted file mode 100644 index 0747e2b54..000000000 --- a/packages/sdk/src/workflows/__tests__/e2big-and-verify.test.ts +++ /dev/null @@ -1,112 +0,0 @@ -import { describe, expect, it, vi } from 'vitest'; - -vi.mock('@relaycast/sdk', () => ({ - RelayCast: vi.fn(), - RelayError: class RelayError extends Error {}, -})); - -vi.mock('../../relay.js', () => ({ - AgentRelay: vi.fn(), -})); - -const { WorkflowRunner } = await import('../runner.js'); - -describe('runVerification output_contains (token double-count fix)', () => { - function createRunner(): InstanceType { - return new WorkflowRunner({ cwd: '/tmp/test' }); - } - - function runVerification( - runner: InstanceType, - check: { type: 'output_contains'; value: string }, - output: string, - stepName: string, - injectedTaskText?: string - ) { - return (runner as any).runVerification(check, output, stepName, injectedTaskText, { - allowFailure: true, - }); - } - - it('passes when token is in output and not in task injection', () => { - const runner = createRunner(); - const result = runVerification( - runner, - { type: 'output_contains', value: 'DONE' }, - 'Task completed. DONE', - 'step1' - ); - expect(result.passed).toBe(true); - }); - - it('fails when token is missing from output entirely', () => { - const runner = createRunner(); - const result = runVerification( - runner, - { type: 'output_contains', value: 'DONE' }, - 'Task completed without the marker', - 'step1' - ); - expect(result.passed).toBe(false); - expect(result.error).toContain('does not contain "DONE"'); - }); - - it('passes when token is in both task injection and agent output', () => { - const runner = createRunner(); - const result = runVerification( - runner, - { type: 'output_contains', value: 'REFLECTION_COMPLETE' }, - 'Your task: output REFLECTION_COMPLETE when done\n\nI have finished. REFLECTION_COMPLETE', - 'step1', - 'Your task: output REFLECTION_COMPLETE when done' - ); - expect(result.passed).toBe(true); - }); - - it('fails when token appears only in task injection (not produced by agent)', () => { - const runner = createRunner(); - const result = runVerification( - runner, - { type: 'output_contains', value: 'REFLECTION_COMPLETE' }, - 'Your task: output REFLECTION_COMPLETE when done\n\nI worked on it but forgot the marker.', - 'step1', - 'Your task: output REFLECTION_COMPLETE when done' - ); - expect(result.passed).toBe(false); - expect(result.error).toContain('does not contain "REFLECTION_COMPLETE"'); - }); - - it('handles token appearing multiple times in task injection', () => { - const runner = createRunner(); - const taskText = 'Output DONE when done. Remember: DONE is required.'; - const output = taskText + '\n\nAll work complete. DONE'; - const result = runVerification( - runner, - { type: 'output_contains', value: 'DONE' }, - output, - 'step1', - taskText - ); - expect(result.passed).toBe(true); - }); - - it('fails when token appears same number of times as in task injection', () => { - const runner = createRunner(); - const taskText = 'Output DONE when done. Remember: DONE is required.'; - const output = taskText + '\n\nAll work complete but no marker here.'; - const result = runVerification( - runner, - { type: 'output_contains', value: 'DONE' }, - output, - 'step1', - taskText - ); - expect(result.passed).toBe(false); - }); - - it('handles empty token gracefully', () => { - const runner = createRunner(); - const result = runVerification(runner, { type: 'output_contains', value: '' }, 'some output', 'step1'); - expect(result.passed).toBe(false); - }); -}); diff --git a/packages/sdk/src/workflows/__tests__/e2e-permissions.test.ts b/packages/sdk/src/workflows/__tests__/e2e-permissions.test.ts deleted file mode 100644 index 257a802fe..000000000 --- a/packages/sdk/src/workflows/__tests__/e2e-permissions.test.ts +++ /dev/null @@ -1,413 +0,0 @@ -import { beforeEach, afterEach, describe, expect, it, vi } from 'vitest'; -import { mkdtempSync, mkdirSync, rmSync, writeFileSync } from 'node:fs'; -import os from 'node:os'; -import path from 'node:path'; -import { fileURLToPath } from 'node:url'; - -import type { WorkflowDb } from '../runner.js'; -import type { RelayYamlConfig, WorkflowRunRow, WorkflowStepRow } from '../types.js'; -import type { ProvisionResult, WorkflowProvisionConfig } from '../../provisioner/types.js'; - -const fixturePath = fileURLToPath(new URL('./fixtures/permission-test.yaml', import.meta.url)); - -const permissionProfiles = { - reader: { - access: 'readonly', - scopes: ['relayfile:fs:read:/**'], - summary: { readonly: 4, readwrite: 0, denied: 0, customScopes: 0 }, - }, - writer: { - access: 'readwrite', - scopes: ['relayfile:fs:read:/src/tests/**', 'relayfile:fs:write:/src/tests/**'], - summary: { readonly: 0, readwrite: 1, denied: 2, customScopes: 0 }, - }, - 'admin-lead': { - access: 'full', - scopes: ['relayfile:fs:read:/**', 'relayfile:fs:write:/**'], - summary: { readonly: 0, readwrite: 6, denied: 0, customScopes: 0 }, - }, -} as const; - -type PermissionProfile = (typeof permissionProfiles)[keyof typeof permissionProfiles]; - -function buildCompiledPermissions(agentName: string, workspace: string, profile: PermissionProfile) { - return { - agentName, - workspace, - effectiveAccess: profile.access, - inherited: profile.access !== 'full', - sources: [{ type: 'yaml' as const, label: 'permissions', ruleCount: profile.scopes.length }], - readonlyPatterns: profile.access === 'readonly' ? ['**'] : [], - readwritePatterns: - profile.access === 'full' - ? ['**'] - : profile.scopes - .filter((scope) => scope.startsWith('relayfile:fs:write:')) - .map((scope) => scope.split(':').slice(3).join(':')), - deniedPatterns: agentName === 'writer' ? ['.env', 'secrets/**'] : [], - readonlyPaths: Array.from({ length: profile.summary.readonly }, (_, index) => `readonly-${index}.txt`), - readwritePaths: Array.from({ length: profile.summary.readwrite }, (_, index) => `write-${index}.txt`), - deniedPaths: Array.from({ length: profile.summary.denied }, (_, index) => `denied-${index}.txt`), - scopes: [...profile.scopes], - network: undefined, - exec: undefined, - acl: {}, - summary: { ...profile.summary }, - }; -} - -let lastProvisionCall: WorkflowProvisionConfig | null = null; -let lastProvisionResult: ProvisionResult | null = null; - -const mockProvisionWorkflowAgents = vi.fn( - async (input: WorkflowProvisionConfig): Promise => { - lastProvisionCall = input; - - const agentNames = Object.keys(input.agents ?? {}); - const tokens = new Map(); - const scopes = new Map(); - const agents = Object.fromEntries( - agentNames.map((agentName) => { - const profile = permissionProfiles[agentName as keyof typeof permissionProfiles]; - const token = `jwt-${agentName}`; - const compiled = buildCompiledPermissions(agentName, input.workspace, profile); - - tokens.set(agentName, token); - scopes.set(agentName, [...profile.scopes]); - - return [ - agentName, - { - name: agentName, - tokenPath: path.join(input.projectDir, '.relay', 'tokens', `${agentName}.jwt`), - token, - scopes: [...profile.scopes], - compiled, - }, - ]; - }) - ); - - const result: ProvisionResult = { - agents, - agentNames, - adminToken: 'jwt-admin', - seededFileCount: 0, - seededAclCount: 0, - summary: agentNames.reduce( - (acc, agentName) => { - const profile = permissionProfiles[agentName as keyof typeof permissionProfiles]; - acc.readonly += profile.summary.readonly; - acc.readwrite += profile.summary.readwrite; - acc.denied += profile.summary.denied; - acc.customScopes += profile.summary.customScopes; - return acc; - }, - { readonly: 0, readwrite: 0, denied: 0, customScopes: 0 } - ), - mounts: new Map(), - tokens, - scopes, - }; - - lastProvisionResult = result; - return result; - } -); - -const mockResolveAgentPermissions = vi.fn( - (agentName: string, _permissions: unknown, _projectDir: string, workspace: string) => - buildCompiledPermissions( - agentName, - workspace, - permissionProfiles[agentName as keyof typeof permissionProfiles] - ) -); - -vi.mock('../../provisioner/index.js', async (importOriginal) => { - const actual = await importOriginal(); - return { - ...actual, - provisionWorkflowAgents: mockProvisionWorkflowAgents, - resolveAgentPermissions: mockResolveAgentPermissions, - }; -}); - -const mockFetch = vi.fn().mockResolvedValue({ - ok: true, - json: () => Promise.resolve({ data: { api_key: 'rk_live_test', workspace_id: 'ws-test' } }), - text: () => Promise.resolve(''), -}); -vi.stubGlobal('fetch', mockFetch); - -const mockRelaycastAgent = { - send: vi.fn().mockResolvedValue(undefined), - heartbeat: vi.fn().mockResolvedValue(undefined), - channels: { - create: vi.fn().mockResolvedValue(undefined), - join: vi.fn().mockResolvedValue(undefined), - invite: vi.fn().mockResolvedValue(undefined), - }, -}; - -const mockRelaycast = { - agents: { - register: vi.fn().mockResolvedValue({ token: 'token-1' }), - }, - as: vi.fn().mockReturnValue(mockRelaycastAgent), -}; - -class MockRelayError extends Error { - code: string; - - constructor(code: string, message: string, status = 400) { - super(message); - this.code = code; - this.name = 'RelayError'; - (this as any).status = status; - } -} - -vi.mock('@relaycast/sdk', () => ({ - RelayCast: vi.fn().mockImplementation(() => mockRelaycast), - RelayError: MockRelayError, -})); - -let waitForExitFn: (ms?: number) => Promise<'exited' | 'timeout' | 'released'>; -let waitForIdleFn: (ms?: number) => Promise<'idle' | 'timeout' | 'exited'>; -let mockSpawnOutputs: string[] = []; - -const mockAgent = { - name: 'test-agent-abc', - get waitForExit() { - return waitForExitFn; - }, - get waitForIdle() { - return waitForIdleFn; - }, - release: vi.fn().mockResolvedValue(undefined), -}; - -const mockHuman = { - name: 'WorkflowRunner', - sendMessage: vi.fn().mockResolvedValue(undefined), -}; - -const mockListeners = new Map void>>(); -function emitMockEvent(event: string, ...args: any[]): void { - const set = mockListeners.get(event); - if (set) for (const cb of set) cb(...args); -} - -const defaultSpawnPtyImplementation = async ({ name, task }: { name: string; task?: string }) => { - const queued = mockSpawnOutputs.shift(); - const stepComplete = task?.match(/STEP_COMPLETE:([^\n]+)/)?.[1]?.trim(); - const output = queued ?? (stepComplete ? `STEP_COMPLETE:${stepComplete}\n` : 'STEP_COMPLETE:unknown\n'); - - queueMicrotask(() => { - emitMockEvent('workerOutput', { name, chunk: output }); - }); - - return { ...mockAgent, name }; -}; - -const mockRelayInstance = { - spawnPty: vi.fn().mockImplementation(defaultSpawnPtyImplementation), - human: vi.fn().mockReturnValue(mockHuman), - shutdown: vi.fn().mockResolvedValue(undefined), - onBrokerStderr: vi.fn().mockReturnValue(() => {}), - addListener: vi.fn((event: string, cb: (...args: any[]) => void) => { - let set = mockListeners.get(event); - if (!set) { - set = new Set(); - mockListeners.set(event, set); - } - set.add(cb); - return () => set!.delete(cb); - }), - listAgentsRaw: vi.fn().mockResolvedValue([]), -}; - -vi.mock('../../relay.js', () => ({ - AgentRelay: vi.fn().mockImplementation(() => mockRelayInstance), -})); - -const { WorkflowRunner } = await import('../runner.js'); -const { formatDryRunReport } = await import('../dry-run-format.js'); - -function makeDb(): WorkflowDb { - const runs = new Map(); - const steps = new Map(); - - return { - insertRun: vi.fn(async (run: WorkflowRunRow) => { - runs.set(run.id, { ...run }); - }), - updateRun: vi.fn(async (id: string, patch: Partial) => { - const existing = runs.get(id); - if (existing) runs.set(id, { ...existing, ...patch }); - }), - getRun: vi.fn(async (id: string) => { - const run = runs.get(id); - return run ? { ...run } : null; - }), - insertStep: vi.fn(async (step: WorkflowStepRow) => { - steps.set(step.id, { ...step }); - }), - updateStep: vi.fn(async (id: string, patch: Partial) => { - const existing = steps.get(id); - if (existing) steps.set(id, { ...existing, ...patch }); - }), - getStepsByRunId: vi.fn(async (runId: string) => { - return [...steps.values()].filter((step) => step.runId === runId); - }), - }; -} - -function never(): Promise { - return new Promise(() => {}); -} - -function createWorkspace(): string { - const dir = mkdtempSync(path.join(os.tmpdir(), 'relay-workflow-permissions-')); - mkdirSync(path.join(dir, 'src', 'tests'), { recursive: true }); - mkdirSync(path.join(dir, 'src'), { recursive: true }); - mkdirSync(path.join(dir, 'secrets'), { recursive: true }); - writeFileSync(path.join(dir, 'README.md'), '# workspace\n'); - writeFileSync(path.join(dir, 'src', 'index.ts'), 'export const value = 1;\n'); - writeFileSync(path.join(dir, 'src', 'tests', 'fixture.txt'), 'fixture\n'); - writeFileSync(path.join(dir, '.env'), 'TOKEN=secret\n'); - writeFileSync(path.join(dir, 'secrets', 'prod.txt'), 'top-secret\n'); - return dir; -} - -async function loadPermissionFixture( - runner: InstanceType, - options: { includeLeadStep?: boolean } = {} -): Promise { - const config = await runner.parseYamlFile(fixturePath); - config.trajectories = false; - - if (options.includeLeadStep) { - const workflow = config.workflows?.find((entry) => entry.name === 'test'); - workflow?.steps.push({ - name: 'lead-step', - agent: 'admin-lead', - dependsOn: ['read-step', 'write-step'], - task: 'Verify admin lead permissions are available and conclude the workflow.', - }); - } - - return config; -} - -describe('WorkflowRunner permissions integration', () => { - let db: WorkflowDb; - let runner: InstanceType; - let workspaceDir: string; - - beforeEach(() => { - vi.clearAllMocks(); - waitForExitFn = vi.fn().mockResolvedValue('exited'); - waitForIdleFn = vi.fn().mockImplementation(() => never()); - mockSpawnOutputs = []; - mockAgent.release.mockResolvedValue(undefined); - mockRelayInstance.spawnPty.mockImplementation(defaultSpawnPtyImplementation); - mockListeners.clear(); - lastProvisionCall = null; - lastProvisionResult = null; - workspaceDir = createWorkspace(); - db = makeDb(); - runner = new WorkflowRunner({ db, workspaceId: 'ws-test', cwd: workspaceDir }); - }); - - afterEach(() => { - vi.restoreAllMocks(); - rmSync(workspaceDir, { recursive: true, force: true }); - }); - - it('provisions permissions, propagates agent tokens, and clears workflow tokens after completion', async () => { - const config = await loadPermissionFixture(runner, { includeLeadStep: true }); - const provisionSpy = vi.spyOn(runner as any, 'provisionAgents'); - const nonInteractiveCommandSpy = vi - .spyOn(WorkflowRunner, 'buildNonInteractiveCommand') - .mockImplementation(() => ({ - cmd: 'sh', - args: ['-c', 'printf "RELAY_AGENT_TOKEN=%s" "$RELAY_AGENT_TOKEN"'], - })); - - const run = await runner.execute(config, 'test'); - const steps = await db.getStepsByRunId(run.id); - const stepByName = new Map(steps.map((step) => [step.stepName, step])); - const provisionedScopes = lastProvisionResult?.scopes; - const spawnCalls = (mockRelayInstance.spawnPty as any).mock.calls.map( - ([input]: [{ agentToken?: string; name: string }]) => input - ); - - expect(run.status).toBe('completed'); - expect(provisionSpy).toHaveBeenCalledTimes(1); - expect(mockProvisionWorkflowAgents).toHaveBeenCalledTimes(1); - expect(lastProvisionCall?.workspace).toBe('ws-test'); - expect(lastProvisionCall?.projectDir).toBe(workspaceDir); - expect(Object.keys(lastProvisionCall?.agents ?? {})).toEqual(['reader', 'writer', 'admin-lead']); - - expect(provisionedScopes?.get('reader')).toEqual(['relayfile:fs:read:/**']); - expect(provisionedScopes?.get('reader')?.some((scope) => scope.includes(':write:'))).toBe(false); - expect(provisionedScopes?.get('writer')).toEqual([ - 'relayfile:fs:read:/src/tests/**', - 'relayfile:fs:write:/src/tests/**', - ]); - expect(provisionedScopes?.get('writer')?.filter((scope) => scope.includes(':write:'))).toEqual([ - 'relayfile:fs:write:/src/tests/**', - ]); - expect(provisionedScopes?.get('admin-lead')).toEqual(['relayfile:fs:read:/**', 'relayfile:fs:write:/**']); - - expect(nonInteractiveCommandSpy).toHaveBeenCalledTimes(2); - expect(stepByName.get('read-step')?.output).toBe('RELAY_AGENT_TOKEN=jwt-reader'); - expect(stepByName.get('write-step')?.output).toBe('RELAY_AGENT_TOKEN=jwt-writer'); - - expect(spawnCalls.length).toBeGreaterThan(0); - expect( - spawnCalls.every( - (call: { agentToken: string }) => typeof call.agentToken === 'string' && call.agentToken.length > 0 - ) - ).toBe(true); - expect(spawnCalls[0]?.agentToken).toBe('jwt-admin-lead'); - - expect((runner as any).agentTokens.size).toBe(0); - expect((runner as any).agentMounts.size).toBe(0); - }, 20_000); - - it('shows a permissions summary in dry-run mode', async () => { - const config = await loadPermissionFixture(runner); - const report = runner.dryRun(config, 'test'); - const formatted = formatDryRunReport(report); - - expect(report.permissions).toEqual( - expect.arrayContaining([ - expect.objectContaining({ - agent: 'reader', - access: 'readonly', - writePaths: 0, - }), - expect.objectContaining({ - agent: 'writer', - access: 'readwrite', - writePaths: 1, - }), - expect.objectContaining({ - agent: 'admin-lead', - access: 'full', - }), - ]) - ); - - expect(formatted).toContain('Permissions'); - expect(formatted).toContain('reader'); - expect(formatted).toContain('writer'); - expect(formatted).toContain('admin-lead'); - expect(formatted).toContain('readonly'); - expect(formatted).toContain('readwrite'); - expect(formatted).toContain('full'); - }, 20_000); -}); diff --git a/packages/sdk/src/workflows/__tests__/fixtures/.agentignore b/packages/sdk/src/workflows/__tests__/fixtures/.agentignore deleted file mode 100644 index 8c8d008f1..000000000 --- a/packages/sdk/src/workflows/__tests__/fixtures/.agentignore +++ /dev/null @@ -1,2 +0,0 @@ -.env -secrets/** diff --git a/packages/sdk/src/workflows/__tests__/fixtures/.reader.agentreadonly b/packages/sdk/src/workflows/__tests__/fixtures/.reader.agentreadonly deleted file mode 100644 index 2d348c17c..000000000 --- a/packages/sdk/src/workflows/__tests__/fixtures/.reader.agentreadonly +++ /dev/null @@ -1,2 +0,0 @@ -docs/** -README.md diff --git a/packages/sdk/src/workflows/__tests__/fixtures/permission-test.yaml b/packages/sdk/src/workflows/__tests__/fixtures/permission-test.yaml deleted file mode 100644 index a82ef6214..000000000 --- a/packages/sdk/src/workflows/__tests__/fixtures/permission-test.yaml +++ /dev/null @@ -1,42 +0,0 @@ -version: '1.0' -name: permission-e2e-test -swarm: - pattern: dag - channel: wf-perm-e2e -agents: - - name: reader - cli: claude - preset: reviewer - permissions: - access: readonly - - name: writer - cli: codex - preset: worker - permissions: - access: readwrite - files: - write: ['src/tests/**'] - deny: ['.env', 'secrets/**'] - - name: admin-lead - cli: claude - preset: lead - permissions: - access: full -workflows: - - name: test - steps: - - name: check-env - type: deterministic - command: 'echo "checking env"' - - name: read-step - agent: reader - dependsOn: [check-env] - task: 'Verify you have read access. Check RELAY_AGENT_TOKEN is set.' - verification: - type: exit_code - - name: write-step - agent: writer - dependsOn: [check-env] - task: 'Verify you can write to src/tests/. Check RELAY_AGENT_TOKEN is set.' - verification: - type: exit_code diff --git a/packages/sdk/src/workflows/__tests__/permission-types.test.ts b/packages/sdk/src/workflows/__tests__/permission-types.test.ts deleted file mode 100644 index 779ea2f5a..000000000 --- a/packages/sdk/src/workflows/__tests__/permission-types.test.ts +++ /dev/null @@ -1,154 +0,0 @@ -import { mkdir, mkdtemp, rm, writeFile } from 'node:fs/promises'; -import { tmpdir } from 'node:os'; -import path from 'node:path'; - -import { afterEach, describe, expect, it } from 'vitest'; - -import { compileAgentScopes, resolveAgentPermissions } from '../../provisioner/compiler.js'; -import type { - AccessPreset, - AgentDefinition, - AgentWorkflowStep, - DeterministicWorkflowStep, -} from '../types.js'; -import { isAgentStep, isDeterministicStep, isRestrictedAgent } from '../types.js'; - -const tempDirs: string[] = []; - -async function createWorkspace(files: Record) { - const dir = await mkdtemp(path.join(tmpdir(), 'relay-workflow-permission-types-')); - tempDirs.push(dir); - - for (const [relativePath, content] of Object.entries(files)) { - const filePath = path.join(dir, relativePath); - await mkdir(path.dirname(filePath), { recursive: true }); - await writeFile(filePath, content); - } - - return dir; -} - -afterEach(async () => { - while (tempDirs.length > 0) { - await rm(tempDirs.pop()!, { recursive: true, force: true }); - } -}); - -describe('workflow permission types', () => { - it('allows agents to omit permissions without becoming restricted', () => { - const agent: AgentDefinition = { - name: 'worker', - cli: 'codex', - task: 'Write tests', - }; - - expect(agent.permissions).toBeUndefined(); - expect(isRestrictedAgent(agent)).toBe(false); - }); - - it.each(['readonly', 'readwrite', 'restricted', 'full'] as const satisfies readonly AccessPreset[])( - 'accepts the %s access preset', - async (access) => { - const workspace = await createWorkspace({ - 'src/index.ts': 'export const value = 1;\n', - }); - - const compiled = compileAgentScopes({ - agentName: `${access}-agent`, - workspace: 'relay-test', - projectDir: workspace, - permissions: { - access, - inherit: false, - }, - }); - - expect(compiled.effectiveAccess).toBe(access); - expect(isRestrictedAgent({ name: 'agent', cli: 'codex', permissions: { access } })).toBe( - access === 'readonly' || access === 'restricted' - ); - } - ); - - it('compiles full permissions with read/write access for all files', async () => { - const workspace = await createWorkspace({ - '.agentignore': 'secret.txt\n', - '.agentreadonly': 'locked.txt\n', - 'locked.txt': 'lock me\n', - 'secret.txt': 'classified\n', - 'src/index.ts': 'export const value = 1;\n', - }); - - const compiled = compileAgentScopes({ - agentName: 'lead', - workspace: 'relay-test', - projectDir: workspace, - permissions: { - access: 'full', - network: false, - exec: ['npm test'], - scopes: ['custom:relay:debug'], - }, - }); - - expect(compiled.effectiveAccess).toBe('full'); - expect(compiled.inherited).toBe(false); - expect(compiled.readonlyPaths).toEqual([]); - expect(compiled.deniedPaths).toEqual([]); - expect(compiled.readwritePaths).toEqual([ - '.agentignore', - '.agentreadonly', - 'locked.txt', - 'secret.txt', - 'src/index.ts', - ]); - expect(compiled.scopes).toContain('relayfile:fs:read:/secret.txt'); - expect(compiled.scopes).toContain('relayfile:fs:write:/secret.txt'); - expect(compiled.scopes).toContain('relayfile:fs:read:/src/index.ts'); - expect(compiled.scopes).toContain('relayfile:fs:write:/src/index.ts'); - expect(compiled.scopes).toContain('custom:relay:debug'); - expect(compiled.network).toBe(false); - expect(compiled.exec).toEqual(['npm test']); - expect(compiled.summary).toEqual({ - readonly: 0, - readwrite: 5, - denied: 0, - customScopes: 1, - }); - }); - - it('preserves backwards-compatible default resolution when permissions are undefined', async () => { - const workspace = await createWorkspace({ - '.agentignore': 'blocked.txt\n', - '.agentreadonly': 'locked.txt\n', - 'blocked.txt': 'do not read\n', - 'locked.txt': 'read only\n', - 'writable.txt': 'can edit\n', - }); - - const compiled = resolveAgentPermissions('legacy-worker', undefined, workspace, 'relay-test'); - - expect(compiled.effectiveAccess).toBe('readwrite'); - expect(compiled.inherited).toBe(true); - expect(compiled.readonlyPaths).toEqual(['locked.txt']); - expect(compiled.readwritePaths).toEqual(['.agentignore', '.agentreadonly', 'writable.txt']); - expect(compiled.deniedPaths).toEqual(['blocked.txt']); - }); - - it('keeps legacy workflow step aliases compatible with WorkflowStep guards', () => { - const agentStep: AgentWorkflowStep = { - name: 'draft', - agent: 'worker', - task: 'Draft the summary', - }; - const deterministicStep: DeterministicWorkflowStep = { - name: 'check', - type: 'deterministic', - command: 'npm test', - }; - - expect(isAgentStep(agentStep)).toBe(true); - expect(isDeterministicStep(agentStep)).toBe(false); - expect(isDeterministicStep(deterministicStep)).toBe(true); - }); -}); diff --git a/packages/sdk/src/workflows/__tests__/permissions-integration.test.ts b/packages/sdk/src/workflows/__tests__/permissions-integration.test.ts deleted file mode 100644 index 3fe7690a2..000000000 --- a/packages/sdk/src/workflows/__tests__/permissions-integration.test.ts +++ /dev/null @@ -1,701 +0,0 @@ -import { EventEmitter } from 'node:events'; -import { beforeEach, afterEach, describe, expect, it, vi } from 'vitest'; -import { mkdtempSync, mkdirSync, rmSync, writeFileSync } from 'node:fs'; -import os from 'node:os'; -import path from 'node:path'; - -import type { WorkflowDb } from '../runner.js'; -import type { AgentPermissions, RelayYamlConfig, WorkflowRunRow, WorkflowStepRow } from '../types.js'; - -const tempDirs: string[] = []; - -const mockFetch = vi.fn().mockResolvedValue({ - ok: true, - json: () => Promise.resolve({ data: { api_key: 'rk_test', workspace_id: 'ws-test' } }), - text: () => Promise.resolve(''), -}); -vi.stubGlobal('fetch', mockFetch); - -let lastProvisionResult: - | { - scopes: Map; - tokens: Map; - } - | undefined; - -const mockProvisionWorkflowAgents = vi.fn(); - -vi.mock('../../provisioner/index.js', async () => { - const actual = await vi.importActual( - '../../provisioner/index.js' - ); - - mockProvisionWorkflowAgents.mockImplementation(async (config) => { - const scopes = new Map(); - const tokens = new Map(); - const agents: Record = {}; - let readonly = 0; - let readwrite = 0; - let denied = 0; - let customScopes = 0; - - for (const [agentName, permissions] of Object.entries(config.agents ?? {}) as [ - string, - AgentPermissions, - ][]) { - const compiled = actual.resolveAgentPermissions( - agentName, - permissions, - config.projectDir, - config.workspace - ); - const token = `token:${agentName}`; - - scopes.set(agentName, [...compiled.scopes]); - tokens.set(agentName, token); - readonly += compiled.summary.readonly; - readwrite += compiled.summary.readwrite; - denied += compiled.summary.denied; - customScopes += compiled.summary.customScopes; - - agents[agentName] = { - name: agentName, - tokenPath: path.resolve(config.projectDir, '.relay', 'tokens', `${agentName}.jwt`), - token, - scopes: [...compiled.scopes], - compiled, - }; - } - - const result = { - agents, - agentNames: Object.keys(config.agents ?? {}), - adminToken: 'admin-token', - seededFileCount: 0, - seededAclCount: 0, - summary: { readonly, readwrite, denied, customScopes }, - mounts: new Map(), - tokens, - scopes, - }; - - lastProvisionResult = { scopes, tokens }; - return result; - }); - - return { - ...actual, - provisionWorkflowAgents: mockProvisionWorkflowAgents, - }; -}); - -vi.mock('@relaycast/sdk', () => ({ - RelayCast: vi.fn(), - RelayError: class RelayError extends Error {}, -})); - -function never(): Promise { - return new Promise(() => {}); -} - -let queuedPtyOutputs: string[] = []; -let waitForExitFn: (ms?: number) => Promise<'exited' | 'timeout' | 'released'>; -let waitForIdleFn: (ms?: number) => Promise<'idle' | 'timeout' | 'exited'>; - -const mockAgent = { - name: 'workflow-agent', - exitCode: 0, - exitSignal: undefined as string | undefined, - get waitForExit() { - return waitForExitFn; - }, - get waitForIdle() { - return waitForIdleFn; - }, - release: vi.fn().mockResolvedValue(undefined), -}; - -const mockHuman = { - name: 'WorkflowRunner', - sendMessage: vi.fn().mockResolvedValue(undefined), -}; - -const mockListeners = new Map void>>(); -function emitMockEvent(event: string, ...args: any[]): void { - const set = mockListeners.get(event); - if (set) for (const cb of set) cb(...args); -} - -const mockRelayInstance = { - spawnPty: vi.fn(), - human: vi.fn().mockReturnValue(mockHuman), - shutdown: vi.fn().mockResolvedValue(undefined), - onBrokerStderr: vi.fn().mockReturnValue(() => {}), - listAgentsRaw: vi.fn().mockResolvedValue([]), - addListener: vi.fn((event: string, cb: (...args: any[]) => void) => { - let set = mockListeners.get(event); - if (!set) { - set = new Set(); - mockListeners.set(event, set); - } - set.add(cb); - return () => set!.delete(cb); - }), -}; - -const defaultSpawnPtyImplementation = async ({ name, task }: { name: string; task?: string }) => { - const queued = queuedPtyOutputs.shift(); - const stepComplete = task?.match(/STEP_COMPLETE:([^\n]+)/u)?.[1]?.trim(); - const output = queued ?? (stepComplete ? `STEP_COMPLETE:${stepComplete}\n` : 'STEP_COMPLETE:done\n'); - - queueMicrotask(() => { - emitMockEvent('workerOutput', { name, chunk: output }); - }); - - return { ...mockAgent, name }; -}; - -vi.mock('../../relay.js', () => ({ - AgentRelay: vi.fn().mockImplementation(() => mockRelayInstance), -})); - -type QueuedSubprocessResult = { - stdout?: string; - stderr?: string; - code?: number | null; - signal?: string | null; - error?: Error; -}; - -let queuedSubprocessResults: QueuedSubprocessResult[] = []; - -const mockSubprocessSpawn = vi.fn().mockImplementation((_cmd, _args, _options) => { - const result = queuedSubprocessResults.shift() ?? { stdout: 'non-interactive complete\n', code: 0 }; - const child = new EventEmitter() as EventEmitter & { - stdout: EventEmitter; - stderr: EventEmitter; - pid: number; - kill: ReturnType; - }; - - child.stdout = new EventEmitter(); - child.stderr = new EventEmitter(); - child.pid = 4321; - child.kill = vi.fn(); - - queueMicrotask(() => { - if (result.error) { - child.emit('error', result.error); - return; - } - if (result.stdout) { - child.stdout.emit('data', Buffer.from(result.stdout)); - } - if (result.stderr) { - child.stderr.emit('data', Buffer.from(result.stderr)); - } - child.emit('close', result.code ?? 0, result.signal ?? null); - }); - - return child; -}); - -vi.mock('node:child_process', async () => { - const actual = await vi.importActual('node:child_process'); - return { - ...actual, - spawn: mockSubprocessSpawn, - }; -}); - -const { WorkflowRunner } = await import('../runner.js'); - -function makeDb(): WorkflowDb { - const runs = new Map(); - const steps = new Map(); - - return { - insertRun: vi.fn(async (run: WorkflowRunRow) => { - runs.set(run.id, { ...run }); - }), - updateRun: vi.fn(async (id: string, patch: Partial) => { - const existing = runs.get(id); - if (existing) runs.set(id, { ...existing, ...patch }); - }), - getRun: vi.fn(async (id: string) => { - const run = runs.get(id); - return run ? { ...run } : null; - }), - insertStep: vi.fn(async (step: WorkflowStepRow) => { - steps.set(step.id, { ...step }); - }), - updateStep: vi.fn(async (id: string, patch: Partial) => { - const existing = steps.get(id); - if (existing) steps.set(id, { ...existing, ...patch }); - }), - getStepsByRunId: vi.fn(async (runId: string) => { - return [...steps.values()].filter((step) => step.runId === runId); - }), - }; -} - -function createProject(files: Record): string { - const dir = mkdtempSync(path.join(os.tmpdir(), 'relay-permissions-integration-')); - tempDirs.push(dir); - - for (const [relativePath, contents] of Object.entries(files)) { - const absolutePath = path.join(dir, relativePath); - mkdirSync(path.dirname(absolutePath), { recursive: true }); - writeFileSync(absolutePath, contents); - } - - return dir; -} - -function createBaseProject(): string { - return createProject({ - 'src/app.ts': 'export const app = true;\n', - 'docs/review.md': '# review\n', - '.env': 'SECRET=1\n', - }); -} - -function makeRunner(cwd: string): InstanceType { - return new WorkflowRunner({ - cwd, - db: makeDb(), - workspaceId: 'ws-test', - relay: { - env: { - AGENT_RELAY_WORKFLOW_DISABLE_RELAYCAST: '1', - }, - }, - }); -} - -function makeConfig( - agents: RelayYamlConfig['agents'], - steps?: NonNullable[number]['steps'], - permissionProfiles?: RelayYamlConfig['permission_profiles'] -): RelayYamlConfig { - return { - version: '1', - name: 'permissions-integration', - permission_profiles: permissionProfiles, - swarm: { pattern: 'dag' }, - agents, - workflows: [ - { - name: 'default', - steps: - steps ?? - agents.map((agent, index) => ({ - name: `step-${index + 1}`, - agent: agent.name, - task: `Complete work for ${agent.name}`, - })), - }, - ], - trajectories: false, - }; -} - -function getProvisionedScopes(agentName: string): string[] { - expect(lastProvisionResult).toBeDefined(); - const scopes = lastProvisionResult?.scopes.get(agentName); - expect(scopes).toBeDefined(); - return scopes ?? []; -} - -beforeEach(() => { - vi.clearAllMocks(); - lastProvisionResult = undefined; - queuedPtyOutputs = []; - queuedSubprocessResults = []; - waitForExitFn = vi.fn().mockResolvedValue('exited'); - waitForIdleFn = vi.fn().mockImplementation(() => never()); - mockAgent.release.mockResolvedValue(undefined); - mockRelayInstance.spawnPty.mockImplementation(defaultSpawnPtyImplementation); - mockListeners.clear(); -}); - -afterEach(() => { - while (tempDirs.length > 0) { - rmSync(tempDirs.pop()!, { recursive: true, force: true }); - } -}); - -describe('WorkflowRunner permission lifecycle integration', () => { - it('mints workflow tokens before spawning interactive agents', async () => { - const projectDir = createBaseProject(); - const runner = makeRunner(projectDir); - const config = makeConfig([ - { - name: 'writer', - cli: 'claude', - permissions: { access: 'readwrite' }, - }, - ]); - - const run = await runner.execute(config, 'default'); - - expect(run.status).toBe('completed'); - expect(mockProvisionWorkflowAgents).toHaveBeenCalledTimes(1); - expect(mockRelayInstance.spawnPty).toHaveBeenCalledTimes(1); - expect(mockProvisionWorkflowAgents.mock.invocationCallOrder[0]).toBeLessThan( - mockRelayInstance.spawnPty.mock.invocationCallOrder[0] - ); - }); - - it('skips provisioning entirely when no agent permissions are configured', async () => { - const projectDir = createBaseProject(); - const runner = makeRunner(projectDir); - const config = makeConfig([{ name: 'legacy-agent', cli: 'claude' }]); - - const run = await runner.execute(config, 'default'); - - expect(run.status).toBe('completed'); - expect(mockProvisionWorkflowAgents).not.toHaveBeenCalled(); - expect(mockRelayInstance.spawnPty).toHaveBeenCalledTimes(1); - }); - - it('provisions reviewer agents with readonly scopes only', async () => { - const projectDir = createBaseProject(); - const runner = makeRunner(projectDir); - const config = makeConfig([ - { - name: 'reviewer-agent', - cli: 'claude', - permissions: { access: 'readonly' }, - }, - ]); - - const run = await runner.execute(config, 'default'); - const scopes = getProvisionedScopes('reviewer-agent'); - - expect(run.status).toBe('completed'); - expect(lastProvisionResult?.tokens.get('reviewer-agent')).toBe('token:reviewer-agent'); - expect(scopes.length).toBeGreaterThan(0); - expect(scopes.every((scope) => !scope.includes(':write:'))).toBe(true); - expect(scopes.every((scope) => scope.includes(':read:'))).toBe(true); - }); - - it('provisions worker agents with readwrite scopes', async () => { - const projectDir = createBaseProject(); - const runner = makeRunner(projectDir); - const config = makeConfig([ - { - name: 'worker-agent', - cli: 'claude', - permissions: { access: 'readwrite' }, - }, - ]); - - const run = await runner.execute(config, 'default'); - const scopes = getProvisionedScopes('worker-agent'); - - expect(run.status).toBe('completed'); - expect(scopes).toContain('relayfile:fs:write:/src/app.ts'); - expect(scopes).toContain('relayfile:fs:write:/docs/review.md'); - }); - - it('provisions lead agents with full-access scopes', async () => { - const projectDir = createBaseProject(); - const runner = makeRunner(projectDir); - const config = makeConfig([ - { - name: 'lead-agent', - cli: 'claude', - permissions: { access: 'full' }, - }, - ]); - - const run = await runner.execute(config, 'default'); - const scopes = getProvisionedScopes('lead-agent'); - - expect(run.status).toBe('completed'); - expect(scopes).toContain('relayfile:fs:write:/.env'); - expect(scopes).toContain('relayfile:fs:write:/src/app.ts'); - expect(scopes).toContain('relayfile:fs:write:/docs/review.md'); - }); - - it('passes the workflow agent token through to spawnPty', async () => { - const projectDir = createBaseProject(); - const runner = makeRunner(projectDir); - const config = makeConfig([ - { - name: 'interactive-agent', - cli: 'claude', - permissions: { access: 'readwrite' }, - }, - ]); - - const run = await runner.execute(config, 'default'); - - expect(run.status).toBe('completed'); - expect(mockRelayInstance.spawnPty).toHaveBeenCalledWith( - expect.objectContaining({ - agentToken: 'token:interactive-agent', - }) - ); - }); - - it('merges permission profiles into agent permissions before provisioning', async () => { - const projectDir = createBaseProject(); - const runner = makeRunner(projectDir); - const config = makeConfig( - [ - { - name: 'profiled-agent', - cli: 'claude', - permissions: { - profile: 'reviewer', - why: 'Needs shared reviewer constraints with one extra scope', - files: { - read: ['docs/**'], - }, - scopes: ['relay:custom:use:/review'], - }, - }, - ], - undefined, - { - reviewer: { - description: 'Reusable reviewer profile', - access: 'readonly', - files: { - read: ['src/**'], - deny: ['.env'], - }, - exec: ['git diff'], - }, - } - ); - - const run = await runner.execute(config, 'default'); - const provisionedPermissions = mockProvisionWorkflowAgents.mock.calls[0]?.[0]?.agents?.['profiled-agent']; - - expect(run.status).toBe('completed'); - expect(provisionedPermissions).toEqual({ - description: 'Reusable reviewer profile', - profile: 'reviewer', - why: 'Needs shared reviewer constraints with one extra scope', - access: 'readonly', - files: { - read: ['src/**', 'docs/**'], - deny: ['.env'], - }, - scopes: ['relay:custom:use:/review'], - exec: ['git diff'], - }); - }); - - it('injects RELAY_AGENT_TOKEN into non-interactive agent environments', async () => { - const projectDir = createBaseProject(); - const runner = makeRunner(projectDir); - const config = makeConfig([ - { - name: 'headless-agent', - cli: 'claude', - interactive: false, - permissions: { access: 'readwrite' }, - }, - ]); - - const run = await runner.execute(config, 'default'); - const spawnOptions = mockSubprocessSpawn.mock.calls[0]?.[2] as - | { env?: Record } - | undefined; - - expect(run.status).toBe('completed'); - expect(mockRelayInstance.spawnPty).not.toHaveBeenCalled(); - expect(mockSubprocessSpawn).toHaveBeenCalledTimes(1); - expect(spawnOptions?.env?.RELAY_AGENT_TOKEN).toBe('token:headless-agent'); - expect(spawnOptions?.env?.RELAYFILE_TOKEN).toBe('token:headless-agent'); - }); - - it('merges relay.env with inherited process env for non-interactive agents', async () => { - const projectDir = createBaseProject(); - const inheritedEnvKey = 'WORKFLOW_RUNNER_RELAY_ENV_MERGE_TEST'; - const originalInheritedValue = process.env[inheritedEnvKey]; - process.env[inheritedEnvKey] = 'inherited-value'; - - try { - const runner = new WorkflowRunner({ - cwd: projectDir, - db: makeDb(), - workspaceId: 'ws-test', - relay: { - env: { - AGENT_RELAY_WORKFLOW_DISABLE_RELAYCAST: '1', - RELAYFILE_BASE_URL: 'https://relay.example.test', - }, - }, - }); - const config = makeConfig([ - { - name: 'env-merge-agent', - cli: 'claude', - interactive: false, - permissions: { access: 'readwrite' }, - }, - ]); - - const run = await runner.execute(config, 'default'); - const spawnOptions = mockSubprocessSpawn.mock.calls[0]?.[2] as { env?: NodeJS.ProcessEnv } | undefined; - - expect(run.status).toBe('completed'); - expect(mockSubprocessSpawn).toHaveBeenCalledTimes(1); - expect(spawnOptions?.env?.[inheritedEnvKey]).toBe('inherited-value'); - expect(spawnOptions?.env?.RELAYFILE_BASE_URL).toBe('https://relay.example.test'); - expect(spawnOptions?.env?.AGENT_RELAY_WORKFLOW_DISABLE_RELAYCAST).toBe('1'); - } finally { - if (originalInheritedValue === undefined) { - delete process.env[inheritedEnvKey]; - } else { - process.env[inheritedEnvKey] = originalInheritedValue; - } - } - }); - - it('clears workflow-scoped tokens after successful completion', async () => { - const projectDir = createBaseProject(); - const runner = makeRunner(projectDir); - const config = makeConfig([ - { - name: 'cleanup-agent', - cli: 'claude', - permissions: { access: 'readwrite' }, - }, - ]); - - const run = await runner.execute(config, 'default'); - - expect(run.status).toBe('completed'); - expect((runner as any).agentTokens.size).toBe(0); - }); - - it('clears workflow-scoped tokens after failed workflows', async () => { - const projectDir = createBaseProject(); - const runner = makeRunner(projectDir); - const config = makeConfig([ - { - name: 'failing-agent', - cli: 'claude', - permissions: { access: 'readwrite' }, - }, - ]); - - mockRelayInstance.spawnPty.mockRejectedValueOnce(new Error('spawn failed')); - - const run = await runner.execute(config, 'default'); - - expect(run.status).toBe('failed'); - expect((runner as any).agentTokens.size).toBe(0); - }); - - it('reports resolved permissions during dry-run without minting tokens', () => { - const projectDir = createProject({ - 'src/app.ts': 'export const app = true;\n', - '.agentreadonly': 'src/app.ts\n', - }); - const runner = makeRunner(projectDir); - const config = makeConfig([ - { - name: 'dry-run-agent', - cli: 'claude', - permissions: { - access: 'readonly', - files: { - read: ['src/**'], - }, - scopes: ['relay:custom:use:/feature'], - }, - }, - ]); - - const report = runner.dryRun(config, 'default'); - const permissionEntry = report.permissions?.find((entry) => entry.agent === 'dry-run-agent'); - - expect(report.valid).toBe(true); - expect(permissionEntry).toMatchObject({ - agent: 'dry-run-agent', - access: 'readonly', - source: 'yaml', - }); - expect(permissionEntry?.scopes ?? 0).toBeGreaterThan(0); - expect(mockProvisionWorkflowAgents).not.toHaveBeenCalled(); - expect(mockRelayInstance.spawnPty).not.toHaveBeenCalled(); - expect((runner as any).agentTokens.size).toBe(0); - }); - - it('rejects invalid permission config during validation before provisioning', async () => { - const projectDir = createBaseProject(); - const runner = makeRunner(projectDir); - const config = makeConfig([ - { - name: 'invalid-agent', - cli: 'claude', - permissions: { - access: 'bogus' as any, - }, - }, - ]); - - await expect(runner.execute(config, 'default')).rejects.toThrow('Permission validation failed'); - expect(mockProvisionWorkflowAgents).not.toHaveBeenCalled(); - expect(mockRelayInstance.spawnPty).not.toHaveBeenCalled(); - }); - - it('rejects unknown permission profiles during validation before provisioning', async () => { - const projectDir = createBaseProject(); - const runner = makeRunner(projectDir); - const config = makeConfig([ - { - name: 'invalid-profile-agent', - cli: 'claude', - permissions: { - profile: 'missing-reviewer', - }, - }, - ]); - - await expect(runner.execute(config, 'default')).rejects.toThrow('Permission validation failed'); - expect(mockProvisionWorkflowAgents).not.toHaveBeenCalled(); - expect(mockRelayInstance.spawnPty).not.toHaveBeenCalled(); - }); - - it('merges dotfile rules with YAML overrides into the expected scopes', async () => { - const projectDir = createProject({ - '.agentignore': 'blocked.txt\n', - '.agentreadonly': 'locked.txt\n', - 'blocked.txt': 'blocked\n', - 'locked.txt': 'locked\n', - 'plain.txt': 'plain\n', - }); - const runner = makeRunner(projectDir); - const config = makeConfig([ - { - name: 'override-agent', - cli: 'claude', - permissions: { - access: 'restricted', - files: { - read: ['blocked.txt'], - write: ['locked.txt'], - }, - }, - }, - ]); - - const run = await runner.execute(config, 'default'); - const scopes = getProvisionedScopes('override-agent'); - - expect(run.status).toBe('completed'); - expect(scopes).toEqual([ - 'relayfile:fs:read:/blocked.txt', - 'relayfile:fs:read:/locked.txt', - 'relayfile:fs:write:/locked.txt', - ]); - }); -}); diff --git a/packages/sdk/src/workflows/__tests__/process-backend-executor.test.ts b/packages/sdk/src/workflows/__tests__/process-backend-executor.test.ts deleted file mode 100644 index ddc6d284e..000000000 --- a/packages/sdk/src/workflows/__tests__/process-backend-executor.test.ts +++ /dev/null @@ -1,125 +0,0 @@ -import { describe, it, expect, vi } from 'vitest'; - -import { createProcessBackendExecutor } from '../process-backend-executor.js'; -import type { ProcessBackend, ProcessEnvironment, WorkflowStep, AgentDefinition } from '../types.js'; - -function makeEnv( - exec: ProcessEnvironment['exec'], - destroy: ProcessEnvironment['destroy'] = vi.fn(async () => undefined) -): ProcessEnvironment { - return { - id: 'env-1', - homeDir: '/home/runner', - exec, - uploadFile: vi.fn(async () => undefined), - destroy, - }; -} - -function makeBackend(env: ProcessEnvironment): ProcessBackend { - return { createEnvironment: vi.fn(async () => env) }; -} - -function makeStep(overrides: Partial = {}): WorkflowStep { - return { name: 'step-1', ...overrides } as WorkflowStep; -} - -function makeAgent(overrides: Partial = {}): AgentDefinition { - return { name: 'worker-1', cli: 'claude', ...overrides } as AgentDefinition; -} - -describe('createProcessBackendExecutor', () => { - it('creates an environment, runs the built command, and destroys the env', async () => { - const destroy = vi.fn(async () => undefined); - const exec = vi.fn(async () => ({ output: 'hello\n', exitCode: 0 })); - const env = makeEnv(exec, destroy); - const backend = makeBackend(env); - - const executor = createProcessBackendExecutor(backend); - const output = await executor.executeAgentStep( - makeStep({ name: 'planner' }), - makeAgent({ cli: 'claude' }), - 'do the thing', - 30_000 - ); - - expect(backend.createEnvironment).toHaveBeenCalledWith('planner'); - expect(exec).toHaveBeenCalledTimes(1); - const [command, opts] = exec.mock.calls[0]!; - expect(typeof command).toBe('string'); - expect(command).toContain('claude'); - expect(opts?.timeoutSeconds).toBe(30); - expect(destroy).toHaveBeenCalledTimes(1); - expect(output).toBe('hello\n'); - }); - - it('passes injected env and agent cwd through execOpts (not baked into the command)', async () => { - const exec = vi.fn(async () => ({ output: 'ok', exitCode: 0 })); - const env = makeEnv(exec); - const backend = makeBackend(env); - - const executor = createProcessBackendExecutor(backend, { - env: { ANTHROPIC_API_KEY: 'sk-test', RELAY_WORKSPACE: 'ws_123' }, - }); - - await executor.executeAgentStep( - makeStep({ name: 'planner' }), - makeAgent({ cli: 'claude', cwd: '/work/repo' }), - 'do the thing', - 5_000 - ); - - const [command, opts] = exec.mock.calls[0]!; - expect(command.startsWith('claude ') || command.startsWith("'claude'")).toBe(true); - expect(command).not.toMatch(/ANTHROPIC_API_KEY=/); - expect(opts?.env).toEqual({ ANTHROPIC_API_KEY: 'sk-test', RELAY_WORKSPACE: 'ws_123' }); - expect(opts?.cwd).toBe('/work/repo'); - expect(opts?.timeoutSeconds).toBe(5); - }); - - it('throws when the remote command exits non-zero and still destroys', async () => { - const destroy = vi.fn(async () => undefined); - const exec = vi.fn(async () => ({ output: 'boom', exitCode: 2 })); - const env = makeEnv(exec, destroy); - const backend = makeBackend(env); - - const executor = createProcessBackendExecutor(backend); - - await expect(executor.executeAgentStep(makeStep(), makeAgent(), 'task')).rejects.toThrow( - /exited with code 2/ - ); - expect(destroy).toHaveBeenCalledTimes(1); - }); - - it('rejects cli:"api" because it does not run as a subprocess', async () => { - const env = makeEnv(vi.fn()); - const backend = makeBackend(env); - const executor = createProcessBackendExecutor(backend); - - await expect(executor.executeAgentStep(makeStep(), makeAgent({ cli: 'api' }), 'task')).rejects.toThrow( - /cli "api"/ - ); - }); - - it('passes injected env through to exec for deterministic steps', async () => { - const exec = vi.fn(async () => ({ output: 'ok', exitCode: 0 })); - const env = makeEnv(exec); - const backend = makeBackend(env); - - const executor = createProcessBackendExecutor(backend, { - env: { RELAY_WORKSPACE: 'ws_123' }, - }); - - const result = await executor.executeDeterministicStep!( - makeStep({ type: 'deterministic', command: 'echo hi', timeoutMs: 5_000 }), - 'echo hi', - '/work' - ); - - expect(result).toEqual({ output: 'ok', exitCode: 0 }); - const [, opts] = exec.mock.calls[0]!; - expect(opts?.cwd).toBe('/work'); - expect(opts?.env).toEqual({ RELAY_WORKSPACE: 'ws_123' }); - expect(opts?.timeoutSeconds).toBe(5); - }); -}); diff --git a/packages/sdk/src/workflows/__tests__/proxy-env.test.ts b/packages/sdk/src/workflows/__tests__/proxy-env.test.ts deleted file mode 100644 index a0147fd29..000000000 --- a/packages/sdk/src/workflows/__tests__/proxy-env.test.ts +++ /dev/null @@ -1,178 +0,0 @@ -import { afterEach, describe, expect, it, vi } from 'vitest'; - -import type { AgentDefinition, SwarmConfig } from '../types.js'; -import { - buildNormalizedProxyEnv, - createProxyEnvResolver, - getStrippedApiKeyVars, - isProxyEnabled, - RELAY_PROXY_TOKEN_ENV, - RELAY_PROXY_TOKEN_ENV_ALIAS, - RELAY_PROXY_URL_ENV, - RELAY_PROXY_URL_ENV_ALIAS, - resolveProxyTokenFromEnv, - resolveProxyUrlFromEnv, - resolveProxyEnv, - type ProxyEnvRegistry, -} from '../proxy-env.js'; -import { WorkflowRunner } from '../runner.js'; - -describe('proxy-env', () => { - afterEach(() => { - vi.restoreAllMocks(); - }); - - it.each([ - ['claude', { ANTHROPIC_BASE_URL: 'https://proxy.local', ANTHROPIC_API_KEY: 'proxy-token' }], - ['codex', { OPENAI_BASE_URL: 'https://proxy.local', OPENAI_API_KEY: 'proxy-token' }], - ['opencode', { OPENAI_BASE_URL: 'https://proxy.local', OPENAI_API_KEY: 'proxy-token' }], - ['aider', { OPENAI_API_BASE: 'https://proxy.local', OPENAI_API_KEY: 'proxy-token' }], - ['gemini', { GOOGLE_API_BASE: 'https://proxy.local', GOOGLE_API_KEY: 'proxy-token' }], - ['goose', { OPENAI_BASE_URL: 'https://proxy.local', OPENAI_API_KEY: 'proxy-token' }], - ['droid', { OPENAI_BASE_URL: 'https://proxy.local', OPENAI_API_KEY: 'proxy-token' }], - ['cursor', { OPENAI_BASE_URL: 'https://proxy.local', OPENAI_API_KEY: 'proxy-token' }], - ] as const)('returns the correct env overrides for %s', (cli, expected) => { - expect(resolveProxyEnv(cli, 'https://proxy.local', 'proxy-token')).toEqual(expected); - }); - - it('normalizes cli variants before resolving proxy env', () => { - expect(resolveProxyEnv('codex:gpt-5.4', 'https://proxy.local', 'proxy-token')).toEqual({ - OPENAI_BASE_URL: 'https://proxy.local', - OPENAI_API_KEY: 'proxy-token', - }); - expect(resolveProxyEnv('cursor-agent', 'https://proxy.local', 'proxy-token')).toEqual({ - OPENAI_BASE_URL: 'https://proxy.local', - OPENAI_API_KEY: 'proxy-token', - }); - }); - - it('falls back to dual-provider overrides for unknown CLIs and logs a warning', () => { - const warn = vi.spyOn(console, 'warn').mockImplementation(() => {}); - - expect(resolveProxyEnv('mystery-cli', 'https://proxy.local', 'proxy-token')).toEqual({ - OPENAI_BASE_URL: 'https://proxy.local', - OPENAI_API_KEY: 'proxy-token', - ANTHROPIC_BASE_URL: 'https://proxy.local', - ANTHROPIC_API_KEY: 'proxy-token', - }); - expect(warn).toHaveBeenCalledTimes(1); - expect(warn).toHaveBeenCalledWith( - expect.stringContaining('Falling back to generic OpenAI/Anthropic proxy env overrides.') - ); - }); - - it('returns the full provider/base-url strip list', () => { - expect(getStrippedApiKeyVars()).toEqual([ - 'OPENAI_API_KEY', - 'ANTHROPIC_API_KEY', - 'OPENROUTER_API_KEY', - 'GOOGLE_API_KEY', - 'OPENAI_BASE_URL', - 'ANTHROPIC_BASE_URL', - 'OPENAI_API_BASE', - 'GOOGLE_API_BASE', - ]); - }); - - it('does not strip canonical or legacy relay proxy env vars', () => { - expect(getStrippedApiKeyVars()).not.toContain(RELAY_PROXY_URL_ENV); - expect(getStrippedApiKeyVars()).not.toContain(RELAY_PROXY_URL_ENV_ALIAS); - expect(getStrippedApiKeyVars()).not.toContain(RELAY_PROXY_TOKEN_ENV); - expect(getStrippedApiKeyVars()).not.toContain(RELAY_PROXY_TOKEN_ENV_ALIAS); - }); - - it('prefers the canonical relay proxy URL env name', () => { - expect( - resolveProxyUrlFromEnv({ - [RELAY_PROXY_URL_ENV]: 'https://cloud.proxy', - [RELAY_PROXY_URL_ENV_ALIAS]: 'https://legacy.proxy', - }) - ).toBe('https://cloud.proxy'); - }); - - it('falls back to the legacy relay proxy URL env name', () => { - expect( - resolveProxyUrlFromEnv({ - [RELAY_PROXY_URL_ENV_ALIAS]: 'https://legacy.proxy', - }) - ).toBe('https://legacy.proxy'); - }); - - it('prefers the canonical relay proxy token env name', () => { - expect( - resolveProxyTokenFromEnv({ - [RELAY_PROXY_TOKEN_ENV]: 'cloud-token', - [RELAY_PROXY_TOKEN_ENV_ALIAS]: 'legacy-token', - }) - ).toBe('cloud-token'); - }); - - it('falls back to the legacy relay proxy token env name', () => { - expect( - resolveProxyTokenFromEnv({ - [RELAY_PROXY_TOKEN_ENV_ALIAS]: 'legacy-token', - }) - ).toBe('legacy-token'); - }); - - it('emits canonical and legacy relay proxy env vars together', () => { - expect(buildNormalizedProxyEnv('https://proxy.local', 'proxy-token')).toEqual({ - RELAY_LLM_PROXY: 'https://proxy.local', - RELAY_LLM_PROXY_URL: 'https://proxy.local', - CREDENTIAL_PROXY_TOKEN: 'proxy-token', - RELAY_LLM_PROXY_TOKEN: 'proxy-token', - }); - }); - - it('enables proxy mode only when both agent and swarm opt in', () => { - const agentWithProxy = { credentials: { proxy: true } } as AgentDefinition; - const agentWithoutProxy = { credentials: { proxy: false } } as AgentDefinition; - const swarmWithProxy = { - credentialProxy: { - proxyUrl: 'https://proxy.local', - providers: {}, - }, - } as SwarmConfig; - const swarmWithoutProxy = {} as SwarmConfig; - - expect(isProxyEnabled(agentWithProxy, swarmWithProxy)).toBe(true); - expect(isProxyEnabled(agentWithoutProxy, swarmWithProxy)).toBe(false); - expect(isProxyEnabled(agentWithProxy, swarmWithoutProxy)).toBe(false); - expect(isProxyEnabled(undefined, swarmWithProxy)).toBe(false); - expect(isProxyEnabled(agentWithProxy, undefined)).toBe(false); - }); - - it('supports adding a new CLI by supplying one registry entry', () => { - const customRegistry = { - 'custom-cli': [{ baseUrlVar: 'CUSTOM_API_BASE', apiKeyVar: 'CUSTOM_API_KEY' }], - } satisfies ProxyEnvRegistry; - const resolveCustomProxyEnv = createProxyEnvResolver(customRegistry); - - expect(resolveCustomProxyEnv('custom-cli', 'https://proxy.local', 'proxy-token')).toEqual({ - CUSTOM_API_BASE: 'https://proxy.local', - CUSTOM_API_KEY: 'proxy-token', - }); - }); - - it('normalizes inherited proxy env before child-process propagation', () => { - const runner = new WorkflowRunner({ - relay: { - env: { - RELAY_LLM_PROXY_URL: 'https://legacy.proxy', - RELAY_LLM_PROXY_TOKEN: 'legacy-token', - OPENAI_API_KEY: 'should-strip', - }, - }, - }); - - const env = (runner as any).getRelayEnv(); - - expect(env).toMatchObject({ - RELAY_LLM_PROXY: 'https://legacy.proxy', - RELAY_LLM_PROXY_URL: 'https://legacy.proxy', - CREDENTIAL_PROXY_TOKEN: 'legacy-token', - RELAY_LLM_PROXY_TOKEN: 'legacy-token', - }); - expect(env.OPENAI_API_KEY).toBeUndefined(); - }); -}); diff --git a/packages/sdk/src/workflows/__tests__/run-script.test.ts b/packages/sdk/src/workflows/__tests__/run-script.test.ts deleted file mode 100644 index aba29fc43..000000000 --- a/packages/sdk/src/workflows/__tests__/run-script.test.ts +++ /dev/null @@ -1,551 +0,0 @@ -import { describe, expect, it } from 'vitest'; -import fs from 'node:fs'; -import os from 'node:os'; -import path from 'node:path'; - -import { - parseTsxStderr, - formatWorkflowParseError, - runScriptWorkflow, - findLocalSdkWorkspace, - ensureLocalSdkWorkflowRuntime, - shouldSkipNodeStripTypesPreflight, -} from '../run-script.js'; - -describe('parseTsxStderr', () => { - it('extracts file/line/col/message from inline `file:line:col: ERROR:` format', () => { - const stderr = '/repo/workflow.ts:42:7: ERROR: Expected "}" but found end of file\n'; - const parsed = parseTsxStderr(stderr); - - expect(parsed).toEqual({ - file: '/repo/workflow.ts', - line: 42, - column: 7, - message: 'Expected "}" but found end of file', - }); - }); - - it('extracts pretty-printed `✘ [ERROR]` format', () => { - const stderr = `✘ [ERROR] Unexpected "$" - - /repo/workflow.ts:10:4: - 10 │ command: \`echo \${VAR}\` - ╵ ^ -`; - const parsed = parseTsxStderr(stderr); - - expect(parsed).toMatchObject({ - file: '/repo/workflow.ts', - line: 10, - column: 4, - message: 'Unexpected "$"', - }); - }); - - it('strips ANSI color codes before matching', () => { - const stderr = '\x1b[31m/repo/workflow.ts:1:1: ERROR: bad token\x1b[0m\n'; - const parsed = parseTsxStderr(stderr); - - expect(parsed?.file).toBe('/repo/workflow.ts'); - expect(parsed?.message).toBe('bad token'); - }); - - it('returns null when stderr does not look like a parse error', () => { - expect(parseTsxStderr('Error: Cannot find module foo')).toBeNull(); - expect(parseTsxStderr('')).toBeNull(); - }); -}); - -describe('formatWorkflowParseError', () => { - it('produces a WORKFLOW_PARSE_ERROR with template-literal hints when applicable', () => { - const err = formatWorkflowParseError({ - file: '/repo/workflow.ts', - line: 12, - column: 4, - message: 'Unterminated template literal', - }); - - expect((err as Error & { code?: string }).code).toBe('WORKFLOW_PARSE_ERROR'); - expect(err.message).toContain('/repo/workflow.ts:12:4'); - expect(err.message).toMatch(/template literal/i); - }); - - it('falls back to the bare error when no hint is applicable', () => { - const err = formatWorkflowParseError({ - file: '/repo/workflow.ts', - message: 'TypeScript parse error (see tsx output above)', - }); - - expect(err.message).toContain('TypeScript parse error'); - expect(err.message).not.toMatch(/Hint:/); - }); -}); - -describe('runScriptWorkflow', () => { - const nodeSupportsStripTypes = (() => { - const [major = 0, minor = 0] = process.versions.node.split('.').map((part) => Number(part)); - return major > 22 || (major === 22 && minor >= 6); - })(); - - it('throws when the file does not exist', async () => { - await expect(runScriptWorkflow('/definitely/does/not/exist.ts')).rejects.toThrow(/File not found/); - }); - - it('rejects unsupported extensions', async () => { - // Use a file that exists (this test file itself) but with an unsupported ext — - // there is no way to make the extension unsupported on a real path other than - // pointing at one. Use the README as a stand-in. - const fakePath = path.resolve(__dirname, '../../../README.md'); - await expect(runScriptWorkflow(fakePath)).rejects.toThrow(/Unsupported file type/); - }); - - it('falls back past Node strip-only mode for valid TypeScript enums', async () => { - const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'agent-relay-runner-')); - const workflowPath = path.join(tmpDir, 'enum-workflow.ts'); - fs.writeFileSync( - workflowPath, - ` -enum Step { - Done = 'done', -} -if (Step.Done !== 'done') { - throw new Error('enum did not execute'); -} -`, - 'utf8' - ); - - try { - await expect(runScriptWorkflow(workflowPath)).resolves.toBeUndefined(); - } finally { - fs.rmSync(tmpDir, { recursive: true, force: true }); - } - // Skips Node strip-only, then cold-starts tsx to actually compile and run - // the enum — well over Vitest's default 5s budget on a cold runner. - }, 30000); - - it('falls back past Node strip-only mode for enums in static local imports', async () => { - const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'agent-relay-runner-')); - const workflowPath = path.join(tmpDir, 'main.ts'); - const enumModulePath = path.join(tmpDir, 'enum-module.ts'); - fs.writeFileSync( - enumModulePath, - ` -export enum ImportedStep { - Done = 'done', -} -`, - 'utf8' - ); - fs.writeFileSync( - workflowPath, - ` -import { ImportedStep } from './enum-module.ts'; -if (ImportedStep.Done !== 'done') { - throw new Error('imported step did not execute'); -} -`, - 'utf8' - ); - - try { - await expect(runScriptWorkflow(workflowPath)).resolves.toBeUndefined(); - } finally { - fs.rmSync(tmpDir, { recursive: true, force: true }); - } - }, 30000); - - it('falls back past Node strip-only mode for parameter properties and namespaces', async () => { - const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'agent-relay-runner-')); - const workflowPath = path.join(tmpDir, 'parameter-property-workflow.ts'); - fs.writeFileSync( - workflowPath, - ` -class Box { - constructor(public value: string) {} -} -namespace WorkflowValues { - export const done = 'done'; -} -if (new Box(WorkflowValues.done).value !== 'done') { - throw new Error('unsupported syntax did not execute'); -} -`, - 'utf8' - ); - - try { - expect(shouldSkipNodeStripTypesPreflight(workflowPath)).toBe(true); - await expect(runScriptWorkflow(workflowPath)).resolves.toBeUndefined(); - } finally { - fs.rmSync(tmpDir, { recursive: true, force: true }); - } - }, 30000); - - it('falls back past Node strip-only mode for unsupported syntax in static local imports', async () => { - const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'agent-relay-runner-')); - const workflowPath = path.join(tmpDir, 'main.ts'); - const helperPath = path.join(tmpDir, 'helper.ts'); - fs.writeFileSync( - helperPath, - ` -export class ImportedBox { - constructor(public value: string) {} -} -export namespace ImportedValues { - export const done = 'done'; -} -`, - 'utf8' - ); - fs.writeFileSync( - workflowPath, - ` -import { ImportedBox, ImportedValues } from './helper.ts'; -if (new ImportedBox(ImportedValues.done).value !== 'done') { - throw new Error('imported unsupported syntax did not execute'); -} -`, - 'utf8' - ); - - try { - expect(shouldSkipNodeStripTypesPreflight(workflowPath)).toBe(true); - await expect(runScriptWorkflow(workflowPath)).resolves.toBeUndefined(); - } finally { - fs.rmSync(tmpDir, { recursive: true, force: true }); - } - }, 30000); - - it('falls back past Node strip-only mode for import-equals syntax', async () => { - const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'agent-relay-runner-')); - const workflowPath = path.join(tmpDir, 'import-equals-workflow.ts'); - fs.writeFileSync( - workflowPath, - ` -import fs = require('node:fs'); -if (!fs.existsSync(${JSON.stringify(tmpDir)})) { - throw new Error('import-equals workflow did not execute'); -} -`, - 'utf8' - ); - - try { - expect(shouldSkipNodeStripTypesPreflight(workflowPath)).toBe(true); - await expect(runScriptWorkflow(workflowPath)).resolves.toBeUndefined(); - } finally { - fs.rmSync(tmpDir, { recursive: true, force: true }); - } - }, 30000); - - it('falls back past Node strip-only mode for import-equals syntax in static local imports', async () => { - const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'agent-relay-runner-')); - const workflowPath = path.join(tmpDir, 'main.ts'); - const helperPath = path.join(tmpDir, 'helper.ts'); - fs.writeFileSync( - helperPath, - ` -import path = require('node:path'); -export const basename = path.basename('done.txt', '.txt'); -`, - 'utf8' - ); - fs.writeFileSync( - workflowPath, - ` -import { basename } from './helper.ts'; -if (basename !== 'done') { - throw new Error('imported import-equals syntax did not execute'); -} -`, - 'utf8' - ); - - try { - expect(shouldSkipNodeStripTypesPreflight(workflowPath)).toBe(true); - await expect(runScriptWorkflow(workflowPath)).resolves.toBeUndefined(); - } finally { - fs.rmSync(tmpDir, { recursive: true, force: true }); - } - }, 30000); - - it('does not treat enum text in comments or strings as unsupported strip-types syntax', async () => { - const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'agent-relay-runner-')); - const workflowPath = path.join(tmpDir, 'enum-text.ts'); - fs.writeFileSync( - workflowPath, - ` -// enum CommentOnly { Value = 'value' } -const message = "enum StringOnly { Value = 'value' }"; -const template = \`enum TemplateOnly { Value = 'value' }\`; -if (!message || !template) { - throw new Error('missing values'); -} -`, - 'utf8' - ); - - try { - expect(shouldSkipNodeStripTypesPreflight(workflowPath)).toBe(false); - await expect(runScriptWorkflow(workflowPath)).resolves.toBeUndefined(); - } finally { - fs.rmSync(tmpDir, { recursive: true, force: true }); - } - }, 30000); - - it('falls back for NodeNext .js specifiers backed by TypeScript source', async () => { - const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'agent-relay-runner-')); - const workflowPath = path.join(tmpDir, 'main.ts'); - const enumModulePath = path.join(tmpDir, 'enum-module.ts'); - fs.writeFileSync( - enumModulePath, - ` -export enum ImportedStep { - Done = 'done', -} -`, - 'utf8' - ); - fs.writeFileSync( - workflowPath, - ` -import { ImportedStep } from './enum-module.js'; -if (ImportedStep.Done !== 'done') { - throw new Error('imported step did not execute'); -} -`, - 'utf8' - ); - - try { - await expect(runScriptWorkflow(workflowPath)).resolves.toBeUndefined(); - } finally { - fs.rmSync(tmpDir, { recursive: true, force: true }); - } - }, 30000); - - it('handles circular static imports during strip-types preflight', async () => { - const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'agent-relay-runner-')); - const workflowPath = path.join(tmpDir, 'main.ts'); - const helperPath = path.join(tmpDir, 'helper.ts'); - fs.writeFileSync( - workflowPath, - ` -import { helperValue } from './helper.ts'; -if (helperValue !== 'ok') { - throw new Error('helper did not execute'); -} -`, - 'utf8' - ); - fs.writeFileSync( - helperPath, - ` -import type {} from './main.ts'; -export enum HelperStep { - Done = 'done', -} -export const helperValue = HelperStep.Done === 'done' ? 'ok' : 'bad'; -`, - 'utf8' - ); - - try { - await expect(runScriptWorkflow(workflowPath)).resolves.toBeUndefined(); - } finally { - fs.rmSync(tmpDir, { recursive: true, force: true }); - } - }, 30000); - - it('terminates strip-types preflight for circular imports without unsupported syntax', async () => { - const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'agent-relay-runner-')); - const workflowPath = path.join(tmpDir, 'main.ts'); - const helperPath = path.join(tmpDir, 'helper.ts'); - fs.writeFileSync( - workflowPath, - ` -import { helperReady } from './helper.ts'; -void helperReady; -export function mainReady() { - return true; -} -`, - 'utf8' - ); - fs.writeFileSync( - helperPath, - ` -import { mainReady } from './main.ts'; -void mainReady; -export const helperReady = true; -`, - 'utf8' - ); - - try { - expect(shouldSkipNodeStripTypesPreflight(workflowPath)).toBe(false); - await expect(runScriptWorkflow(workflowPath)).resolves.toBeUndefined(); - } finally { - fs.rmSync(tmpDir, { recursive: true, force: true }); - } - }, 30000); - - it('skips node strip-types when a transitive TypeScript import cannot be read', async () => { - const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'agent-relay-runner-')); - const workflowPath = path.join(tmpDir, 'main.ts'); - const helperPath = path.join(tmpDir, 'helper.ts'); - fs.writeFileSync(workflowPath, "import './helper.ts';\n", 'utf8'); - fs.mkdirSync(helperPath); - - try { - expect(shouldSkipNodeStripTypesPreflight(workflowPath)).toBe(true); - await expect(runScriptWorkflow(workflowPath)).rejects.toThrow( - /tsx exited with code 1|EISDIR|directory/i - ); - } finally { - fs.rmSync(tmpDir, { recursive: true, force: true }); - } - }, 30000); - - it('does not mask ordinary runtime failures by falling back to another TypeScript runner', async () => { - const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'agent-relay-runner-')); - const workflowPath = path.join(tmpDir, 'runtime-failure.ts'); - fs.writeFileSync(workflowPath, "throw new Error('intentional runtime failure');\n", 'utf8'); - - try { - await expect(runScriptWorkflow(workflowPath)).rejects.toThrow( - nodeSupportsStripTypes - ? /node --experimental-strip-types exited with code 1/ - : /(?:tsx|ts-node|npx tsx) exited with code 1/ - ); - } finally { - fs.rmSync(tmpDir, { recursive: true, force: true }); - } - }); - - it('does not retry side-effecting user code that only prints strip-types text', async () => { - const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'agent-relay-runner-')); - const workflowPath = path.join(tmpDir, 'spoofed-strip-types.ts'); - const markerPath = path.join(tmpDir, 'marker.txt'); - fs.writeFileSync( - workflowPath, - ` -import fs from 'node:fs'; -fs.appendFileSync(${JSON.stringify(markerPath)}, 'ran\\n'); -console.error('ERR_UNSUPPORTED_TYPESCRIPT_SYNTAX'); -process.exit(7); -`, - 'utf8' - ); - - try { - await expect(runScriptWorkflow(workflowPath)).rejects.toThrow( - nodeSupportsStripTypes - ? /node --experimental-strip-types exited with code 7/ - : /(?:tsx|ts-node|npx tsx) exited with code 7/ - ); - expect(fs.readFileSync(markerPath, 'utf8')).toBe('ran\n'); - } finally { - fs.rmSync(tmpDir, { recursive: true, force: true }); - } - }); - - it('does not retry after user code dynamically imports unsupported strip-types syntax', async () => { - const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'agent-relay-runner-')); - const workflowPath = path.join(tmpDir, 'dynamic-enum-import.ts'); - const enumModulePath = path.join(tmpDir, 'enum-module.ts'); - const markerPath = path.join(tmpDir, 'marker.txt'); - fs.writeFileSync( - enumModulePath, - ` -export enum ImportedStep { - Done = 'done', -} -`, - 'utf8' - ); - fs.writeFileSync( - workflowPath, - ` -import fs from 'node:fs'; -fs.appendFileSync(${JSON.stringify(markerPath)}, 'ran\\n'); -await import(${JSON.stringify(enumModulePath)}); -`, - 'utf8' - ); - - try { - await expect(runScriptWorkflow(workflowPath)).rejects.toThrow(); - if (nodeSupportsStripTypes) { - expect(fs.readFileSync(markerPath, 'utf8')).toBe('ran\n'); - } - } finally { - fs.rmSync(tmpDir, { recursive: true, force: true }); - } - }); -}); - -describe('findLocalSdkWorkspace', () => { - it('returns null when no agent-relay workspace is in the ancestor chain', () => { - expect(findLocalSdkWorkspace('/tmp')).toBeNull(); - }); -}); - -describe('ensureLocalSdkWorkflowRuntime', () => { - function createTempWorkspace(): { rootDir: string; startDir: string; workflowsEntry: string } { - const rootDir = fs.mkdtempSync(path.join(os.tmpdir(), 'agent-relay-workspace-')); - const sdkDir = path.join(rootDir, 'packages', 'sdk'); - const startDir = path.join(sdkDir, 'src', 'workflows'); - fs.mkdirSync(startDir, { recursive: true }); - fs.writeFileSync(path.join(rootDir, 'package.json'), JSON.stringify({ name: 'agent-relay' }), 'utf8'); - fs.writeFileSync(path.join(sdkDir, 'package.json'), JSON.stringify({ name: '@agent-relay/sdk' }), 'utf8'); - return { - rootDir, - startDir, - workflowsEntry: path.join(sdkDir, 'dist', 'workflows', 'index.js'), - }; - } - - it('runs local workflow runtime build commands in dependency order', () => { - const workspace = createTempWorkspace(); - const calls: string[][] = []; - const expectedCommands = [ - ['run', 'build:config'], - ['--prefix', 'packages/workflow-types', 'run', 'build'], - ['--prefix', 'packages/github-primitive', 'run', 'build'], - ['--prefix', 'packages/slack-primitive', 'run', 'build'], - ['--prefix', 'packages/cloud', 'run', 'build'], - ['run', 'build:sdk'], - ]; - - try { - const execRunner = ((_file: string, args?: readonly string[]) => { - calls.push([...(args ?? [])]); - if (calls.length === expectedCommands.length) { - fs.mkdirSync(path.dirname(workspace.workflowsEntry), { recursive: true }); - fs.writeFileSync(workspace.workflowsEntry, '', 'utf8'); - } - return Buffer.from(''); - }) as any; - - ensureLocalSdkWorkflowRuntime(workspace.startDir, execRunner); - - expect(calls).toEqual(expectedCommands); - } finally { - fs.rmSync(workspace.rootDir, { recursive: true, force: true }); - } - }); - - it('throws when local workflow runtime is still missing after build commands', () => { - const workspace = createTempWorkspace(); - - try { - expect(() => ensureLocalSdkWorkflowRuntime(workspace.startDir, (() => Buffer.from('')) as any)).toThrow( - /Local SDK workflows runtime is still missing after build/ - ); - } finally { - fs.rmSync(workspace.rootDir, { recursive: true, force: true }); - } - }); -}); diff --git a/packages/sdk/src/workflows/__tests__/run-summary-table.test.ts b/packages/sdk/src/workflows/__tests__/run-summary-table.test.ts deleted file mode 100644 index 24f7e95c8..000000000 --- a/packages/sdk/src/workflows/__tests__/run-summary-table.test.ts +++ /dev/null @@ -1,168 +0,0 @@ -import { describe, expect, it, vi } from 'vitest'; - -import { formatRunSummaryTable } from '../run-summary-table.js'; - -vi.mock('@relaycast/sdk', () => ({ - RelayCast: vi.fn(), - RelayError: class RelayError extends Error {}, -})); - -vi.mock('../../relay.js', () => ({ - AgentRelay: vi.fn(), -})); - -const { WorkflowRunner } = await import('../runner.js'); - -describe('formatRunSummaryTable', () => { - it('renders all-passing steps', () => { - const output = formatRunSummaryTable( - [ - { name: 'plan', agent: 'lead', status: 'completed', attempts: 1, durationMs: 1_000 }, - { name: 'implement', agent: 'worker', status: 'completed', attempts: 1, durationMs: 2_000 }, - ], - new Map([ - [ - 'plan', - { - cli: 'claude', - sessionId: 's1', - model: 'claude-sonnet-4', - provider: 'anthropic', - durationMs: 1_200, - cost: 0.75, - tokens: { input: 100, output: 50, cacheRead: 10 }, - turns: 2, - toolCalls: [], - errors: [], - finalStatus: 'completed', - summary: 'planned', - }, - ], - [ - 'implement', - { - cli: 'codex', - sessionId: 's2', - model: 'gpt-5', - provider: 'openai', - durationMs: 3_400, - cost: 1.25, - tokens: { input: 300, output: 90, cacheRead: 20 }, - turns: 4, - toolCalls: [], - errors: [{ turn: 2, text: 'Error: recovered after retry' }], - finalStatus: 'completed', - summary: 'implemented', - }, - ], - ]) - ); - - expect(output).toMatchInlineSnapshot(` - " Step Status Model Cost Tokens Duration Errors - plan pass claude-sonnet-4 $0.75 160 1s -- - implement pass gpt-5 $1.25 410 3s 1 (fixed) - ──────────────────────────────────────────────────────────────────────────────────────────── - Total $2.00 570 5s " - `); - }); - - it('renders a failed step with the first error line', () => { - const output = formatRunSummaryTable( - [ - { - name: 'broken-step', - agent: 'worker', - status: 'failed', - attempts: 1, - durationMs: 1_500, - error: 'boom', - }, - ], - new Map([ - [ - 'broken-step', - { - cli: 'opencode', - sessionId: 's3', - model: 'gpt-5', - provider: 'openai', - durationMs: 1_500, - cost: 0.01, - tokens: { input: 10, output: 5, cacheRead: 0 }, - turns: 1, - toolCalls: [], - errors: [{ turn: 1, text: 'Error: database locked' }], - finalStatus: 'failed', - summary: null, - }, - ], - ]) - ); - - expect(output).toContain('broken-step FAIL'); - expect(output).toContain(' └─ Error [turn 1] Error: database locked'); - }); - - it('renders deterministic steps without reports using placeholder columns', () => { - const output = formatRunSummaryTable( - [{ name: 'lint', agent: 'shell', status: 'completed', attempts: 1, durationMs: 900 }], - new Map() - ); - - expect(output).toContain('lint pass --'); - expect(output).toContain('--'); - // No reports means no cost column - expect(output).not.toContain('Cost'); - }); - - it('hides Cost column when no report has reliable cost data', () => { - const output = formatRunSummaryTable( - [{ name: 'gen-code', agent: 'worker', status: 'completed', attempts: 1, durationMs: 5_000 }], - new Map([ - [ - 'gen-code', - { - cli: 'claude', - sessionId: 's1', - model: 'claude-sonnet-4', - provider: 'anthropic', - durationMs: 5_000, - cost: null, - tokens: { input: 200, output: 80, cacheRead: 0 }, - turns: 3, - toolCalls: [], - errors: [], - finalStatus: 'completed', - summary: 'done', - }, - ], - ]) - ); - - expect(output).not.toContain('Cost'); - expect(output).toContain('Tokens'); - expect(output).toContain('280'); - }); -}); - -describe('WorkflowRunner logRunSummary', () => { - it('uses the table summary format even when no reports exist', () => { - const runner = new WorkflowRunner({ cwd: '/tmp/workflow-runner' }); - const logSpy = vi.spyOn(console, 'log').mockImplementation(() => {}); - - (runner as any).logRunSummary( - 'sample-workflow', - [{ name: 'lint', agent: 'shell', status: 'completed', attempts: 1, output: 'ok' }], - 'run-1' - ); - - const combined = logSpy.mock.calls.flat().join('\n'); - expect(combined).toContain('Workflow "sample-workflow"'); - expect(combined).toContain('Step Status'); - expect(combined).toContain('lint'); - expect(combined).toContain('pass'); - - logSpy.mockRestore(); - }); -}); diff --git a/packages/sdk/src/workflows/__tests__/scrub-pty-chrome.test.ts b/packages/sdk/src/workflows/__tests__/scrub-pty-chrome.test.ts deleted file mode 100644 index e9aeefec5..000000000 --- a/packages/sdk/src/workflows/__tests__/scrub-pty-chrome.test.ts +++ /dev/null @@ -1,125 +0,0 @@ -/** - * Regression tests for WorkflowRunner.scrubForChannel — the function that - * strips PTY/TUI chrome from interactive-agent step output before it gets - * surfaced in workflow logs and channel messages. - * - * The patterns covered here are taken from a real captured run of a - * multi-turn workflow against Claude Code's PTY: when its TUI footer - * overwrites itself faster than the PTY flushes whitespace, lines like - * `bypasspermissionson`, `--INSERT--⏵⏵`, and `Opus 4.7 (1M context) ctx:5% - * $1.45` end up in the captured stream. Before these regex additions, the - * step "Output:" block was unreadable on interactive-agent steps. - */ -import { describe, it, expect } from 'vitest'; - -import { WorkflowRunner } from '../runner.js'; - -// scrubForChannel is `private static` — the cast is the minimal-invasive way -// to exercise it from a test without exporting an internal-only helper. -const scrub = (text: string): string => - (WorkflowRunner as unknown as { scrubForChannel(t: string): string }).scrubForChannel(text); - -describe('WorkflowRunner.scrubForChannel — PTY chrome stripping', () => { - it('strips the Claude Code bottom status bar (model + ctx% + cost)', () => { - const input = [ - 'real content line', - 'workflows git:(main) Opus 4.7 (1M context) ctx:5% $1.45', - 'Opus4.7(1Mcontext) ctx:6% $1.54', - 'another real line', - ].join('\n'); - const out = scrub(input); - expect(out).toContain('real content line'); - expect(out).toContain('another real line'); - expect(out).not.toMatch(/ctx\s*:\s*\d+%/); - expect(out).not.toMatch(/\$\d+\.\d+/); - }); - - it('strips vim-style mode indicators emitted by the input bar', () => { - const input = [ - 'pre-mode line', - '--INSERT--', - '--INSERT--⏵⏵bypasspermissionson (shift+tabtocycle)', - 'post-mode line', - ].join('\n'); - const out = scrub(input); - expect(out).toContain('pre-mode line'); - expect(out).toContain('post-mode line'); - expect(out).not.toMatch(/--INSERT--/); - }); - - it('strips no-whitespace TUI hint variants (bypasspermissionson, pasteagaintoexpand)', () => { - const input = ['before', 'bypasspermissionson', 'pasteagaintoexpand', 'shifttabto cycle', 'after'].join( - '\n' - ); - const out = scrub(input); - expect(out).toContain('before'); - expect(out).toContain('after'); - expect(out).not.toMatch(/bypasspermissionson/); - expect(out).not.toMatch(/pasteagaintoexpand/); - }); - - it('strips thinking-status fragments without ellipsis anchors', () => { - const input = [ - 'meaningful: round 3 codex-player guess=19 feedback=correct', - 'thinking with high effort', - '↓ 13 tokens · thinking with high effort', - 'Crunched for 32s', - 'Sautéed for 4s', - 'Gitifying…55', - ].join('\n'); - const out = scrub(input); - expect(out).toContain('feedback=correct'); - expect(out).not.toMatch(/thinking with high effort/); - expect(out).not.toMatch(/Crunched for/); - expect(out).not.toMatch(/Gitifying/); - }); - - it('strips malformed overwritten q0/qW0 PTY frame runs', () => { - const input = [ - 'first useful line', - 'qW0 | q0 / ql0 _ qqm ~ lqq = qW0 | q0 / ql0 _ qqm', - 'summary: kept qW0 | q0 / ql0 _ qqm ~ lqq = qW0 | q0 done', - 'last useful line', - ].join('\n'); - const out = scrub(input); - expect(out).toContain('first useful line'); - expect(out).toContain('last useful line'); - expect(out).toMatch(/summary: kept\s+done/); - expect(out).not.toMatch(/qW0|ql0|qqm|lqq/); - }); - - it('redacts secrets in the runner public preview path', () => { - const out = scrub('deploy succeeded\napi_key=sk-abcdefghijklmnopqrstuvwxyz123456\n'); - expect(out).toContain('deploy succeeded'); - expect(out).toContain('[REDACTED]'); - expect(out).not.toContain('sk-abcdefghijklmnopqrstuvwxyz123456'); - }); - - it('preserves real content and OWNER_DECISION signals', () => { - const input = [ - 'Read 1 file, calling relaycast 2 times', - 'Transcript verification reports TRANSCRIPT_OK with all 6 lines well-formed.', - 'OWNER_DECISION: COMPLETE', - 'REASON: All 6 turns executed, history.log has 6 lines.', - 'STEP_COMPLETE: repair-transcript', - ].join('\n'); - const out = scrub(input); - expect(out).toContain('TRANSCRIPT_OK'); - expect(out).toContain('OWNER_DECISION: COMPLETE'); - expect(out).toContain('STEP_COMPLETE: repair-transcript'); - expect(out).toContain('All 6 turns executed'); - }); - - it('does not strip lines that merely mention model names in prose', () => { - // Guard against the new claudeFooterRe (which looks for `Opus|Sonnet|Haiku - // (...context...) ctx:N%`) being too eager and removing prose that - // mentions a model name. - const input = [ - 'Compared output from Opus 4.7 against Sonnet 4.6 — both passed.', - 'We chose Haiku 4.5 for its latency profile.', - ].join('\n'); - const out = scrub(input); - expect(out).toContain('Opus 4.7 against Sonnet 4.6'); - expect(out).toContain('Haiku 4.5 for its latency profile'); - }); -}); diff --git a/packages/sdk/src/workflows/__tests__/sibling-links.test.ts b/packages/sdk/src/workflows/__tests__/sibling-links.test.ts deleted file mode 100644 index 2ec049be5..000000000 --- a/packages/sdk/src/workflows/__tests__/sibling-links.test.ts +++ /dev/null @@ -1,185 +0,0 @@ -import { describe, expect, it, vi } from 'vitest'; - -import { applySiblingLinks, buildSiblingLinkScript } from '../sibling-links.js'; - -describe('buildSiblingLinkScript', () => { - it('detects npm manifest and emits an npm link block', () => { - const script = buildSiblingLinkScript([{ name: '@scope/pkg', path: '../sibling/packages/pkg' }]); - expect(script).toContain('-f "$SIBLING_PATH/package.json"'); - expect(script).toContain('npm link --silent'); - expect(script).toContain('@scope/pkg'); - expect(script).toContain('../sibling/packages/pkg'); - }); - - it('detects python manifest and emits a pip install -e block', () => { - const script = buildSiblingLinkScript([{ name: 'my_pkg', path: '../py/pkg' }]); - expect(script).toContain('-f "$SIBLING_PATH/pyproject.toml"'); - expect(script).toContain('pip install -e'); - expect(script).toContain('uv pip install --system -e'); - }); - - it('fails-fast shell: script uses set -euo pipefail', () => { - const script = buildSiblingLinkScript([{ name: 'x', path: './x' }]); - expect(script.startsWith('set -euo pipefail')).toBe(true); - }); - - it('guards missing sibling path with explicit error', () => { - const script = buildSiblingLinkScript([{ name: 'x', path: '../missing' }]); - expect(script).toContain('SIBLING_PATH_MISSING'); - expect(script).toContain('exit 1'); - }); - - it('guards unknown manifest with explicit error', () => { - const script = buildSiblingLinkScript([{ name: 'x', path: './x' }]); - expect(script).toContain('UNKNOWN_MANIFEST'); - }); - - it('emits one verify block per link with expected exports', () => { - const script = buildSiblingLinkScript([ - { name: 'pkg-a', path: '../a', expect: ['foo', 'bar'] }, - { name: 'pkg-b', path: '../b' }, - { name: 'pkg-c', path: '../c', expect: ['baz'] }, - ]); - const verifyCount = (script.match(/APPLY_SIBLING_LINKS_EXPECT/g) ?? []).length; - // Two verify blocks (for pkg-a + pkg-c), each referenced at least twice - // (env var declaration + two command variants for node/python fallback). - expect(verifyCount).toBeGreaterThanOrEqual(4); - expect(script).toContain('APPLY_SIBLING_LINKS_OK'); - }); - - it('expects-list survives bash env var round-trip via single-quoted JSON payload', () => { - const script = buildSiblingLinkScript([{ name: 'p', path: './p', expect: ["it's-ok", 'with"quote'] }]); - // Assignment is `EXPECT=''` where the JSON is single-quoted so - // bash leaves it literal (no `$` / backtick substitution), then - // Node/Python JSON.parse it back to the original array. Embedded `'` - // gets the '\'' POSIX-escape treatment. - const jsonPayload = JSON.stringify(["it's-ok", 'with"quote']); - const shellArg = `'${jsonPayload.replace(/'/g, `'\\''`)}'`; - expect(script).toContain(`EXPECT=${shellArg}`); - }); - - it('emits both node and python verifiers wrapped in manifest-conditional', () => { - const script = buildSiblingLinkScript([{ name: 'p', path: './p', expect: ['x'] }]); - expect(script).toContain('node --input-type=module'); - expect(script).toContain('python3 -c'); - // The wrapping if/elif/else pattern keeps python as a fallback inside - // the non-package.json branch. - expect(script).toMatch(/if \[ -f "\$SIBLING_PATH\/package\.json" \]; then[\s\S]+?else[\s\S]+?python/); - }); - - it('assignments use single-quoted literals so $() / backticks do not substitute (review: shell injection)', () => { - // Two-stage review fix: - // (1) echo happens AFTER assignments and references the shell vars, - // not raw link.name / link.path template interpolation. - // (2) assignments themselves use SINGLE-quoted bash literals so that - // `$(cmd)` and backticks inside the value are NOT interpreted as - // command substitution (which JSON.stringify / double-quoted form - // did NOT protect against). - const script = buildSiblingLinkScript([{ name: 'pkg$(evil)', path: '../path`also-evil`' }]); - const echoLines = script.split('\n').filter((l) => l.startsWith('echo "--- link:')); - expect(echoLines).toHaveLength(1); - expect(echoLines[0]).toBe('echo "--- link: $SIBLING_NAME <- $SIBLING_PATH ---"'); - const assignmentLines = script - .split('\n') - .filter((l) => l.startsWith('SIBLING_NAME=') || l.startsWith('SIBLING_PATH=')); - // Assignments should wrap the value in single quotes — the exact literal - // passes through bash. `$(evil)` sits inside single quotes → no - // substitution; same for backticks. - expect(assignmentLines.some((l) => l === "SIBLING_NAME='pkg$(evil)'")).toBe(true); - expect(assignmentLines.some((l) => l === "SIBLING_PATH='../path`also-evil`'")).toBe(true); - // Sanity: no double-quoted assignment form present for these lines. - expect(assignmentLines.some((l) => l.startsWith('SIBLING_NAME="'))).toBe(false); - expect(assignmentLines.some((l) => l.startsWith('SIBLING_PATH="'))).toBe(false); - }); - - it("escapes embedded single quotes in link values via POSIX '\\'' idiom", () => { - const script = buildSiblingLinkScript([{ name: "pkg'q", path: "../p'q" }]); - expect(script).toContain("SIBLING_NAME='pkg'\\''q'"); - expect(script).toContain("SIBLING_PATH='../p'\\''q'"); - }); - - it('uv is invoked with --system and falls through to pip on failure (review: non-venv)', () => { - // Fix for review: uv refuses to install outside a venv without --system. - // The dispatch now uses --system AND wraps the uv attempt in an `if` so - // failure falls through to pip/pip3 instead of exiting under `set -e`. - const script = buildSiblingLinkScript([{ name: 'p', path: '../p' }]); - expect(script).toContain('uv pip install --system -e'); - expect(script).toMatch( - /if command -v uv[^\n]+uv pip install --system[^\n]+; then\s*\n\s*:\s*\n\s*elif command -v pip/ - ); - }); - - it('python verifier avoids backslashes inside f-string expressions (review: Python < 3.12 SyntaxError)', () => { - // Fix for review: backslashes (e.g. `\",\"`) inside f-string expression - // braces are a SyntaxError on Python < 3.12. We bind `sep = ","` outside - // the f-string and reference it from inside. The old escaped form must - // not appear anywhere in the emitted script. - const script = buildSiblingLinkScript([{ name: 'p', path: './p', expect: ['foo'] }]); - expect(script).toContain('sep = ","'); - expect(script).toContain('sep.join(missing)'); - expect(script).toContain('sep.join(want)'); - expect(script).not.toContain('\\",\\".join('); - }); -}); - -describe('applySiblingLinks', () => { - it('is a no-op when links is empty', () => { - const builder = { step: vi.fn() }; - const result = applySiblingLinks(builder, { links: [] }); - expect(builder.step).not.toHaveBeenCalled(); - expect(result).toBe(builder); - }); - - it('adds a single deterministic step named setup-sibling-links by default', () => { - const builder = { step: vi.fn(() => builder) }; - applySiblingLinks(builder, { - links: [{ name: 'pkg', path: '../pkg' }], - }); - expect(builder.step).toHaveBeenCalledTimes(1); - const call = builder.step.mock.calls[0] as unknown as - | [string, { command: string; [k: string]: unknown }] - | undefined; - if (!call) throw new Error('expected step call'); - const [stepName, cfg] = call; - expect(stepName).toBe('setup-sibling-links'); - expect(cfg).toMatchObject({ - type: 'deterministic', - dependsOn: ['install-deps'], - captureOutput: true, - failOnError: true, - }); - expect(cfg.command).toContain("bash -c '"); - }); - - it('honors custom stepName and dependsOn', () => { - const builder = { step: vi.fn(() => builder) }; - applySiblingLinks(builder, { - links: [{ name: 'pkg', path: '../pkg' }], - stepName: 'custom-name', - dependsOn: ['setup-branch'], - }); - const call = builder.step.mock.calls[0] as unknown as - | [string, { command: string; [k: string]: unknown }] - | undefined; - if (!call) throw new Error('expected step call'); - const [stepName, cfg] = call; - expect(stepName).toBe('custom-name'); - expect(cfg).toMatchObject({ dependsOn: ['setup-branch'] }); - }); - - it('escapes single quotes in the embedded script safely for bash -c', () => { - const builder = { step: vi.fn(() => builder) }; - applySiblingLinks(builder, { - links: [{ name: "has'quote", path: "./path'with-quote" }], - }); - const call = builder.step.mock.calls[0] as unknown as [string, { command: string }] | undefined; - if (!call) throw new Error('expected step call'); - const command = call[1].command; - // Verify the bash -c wrapper is well-formed: starts with bash -c ' and - // ends with matching close quote. The POSIX escape pattern is '\'' - // (close-quote, escaped-quote, re-open-quote) — the end result should - // not have an odd number of unescaped single quotes. - expect(command.startsWith(`bash -c '`)).toBe(true); - expect(command.endsWith(`'`)).toBe(true); - }); -}); diff --git a/packages/sdk/src/workflows/__tests__/step-cwd.test.ts b/packages/sdk/src/workflows/__tests__/step-cwd.test.ts deleted file mode 100644 index 4f9700b13..000000000 --- a/packages/sdk/src/workflows/__tests__/step-cwd.test.ts +++ /dev/null @@ -1,72 +0,0 @@ -import { describe, it, expect, vi } from 'vitest'; -import path from 'node:path'; - -vi.mock('@relaycast/sdk', () => ({ - RelayCast: vi.fn(), - RelayError: class RelayError extends Error {}, -})); - -vi.mock('../../relay.js', () => ({ - AgentRelay: vi.fn(), -})); - -const { WorkflowRunner } = await import('../runner.js'); - -describe('WorkflowRunner step cwd resolution', () => { - it('prefers step.cwd over agent.cwd and runner cwd', () => { - const runnerRoot = '/runner-root'; - const runner = new WorkflowRunner({ cwd: runnerRoot }); - - const resolved = (runner as any).resolveEffectiveCwd( - { name: 'generate', agent: 'worker', task: 'Generate', cwd: 'steps/generate' }, - { name: 'worker', cli: 'claude', cwd: 'agents/worker' } - ); - - expect(resolved).toBe(path.resolve(runnerRoot, 'steps/generate')); - }); - - it('respects step.cwd for deterministic steps', () => { - const runnerRoot = '/runner-root'; - const runner = new WorkflowRunner({ cwd: runnerRoot }); - - const resolved = (runner as any).resolveEffectiveCwd({ - name: 'scaffold', - type: 'deterministic', - command: 'mkdir -p out', - cwd: 'deterministic/setup', - }); - - expect(resolved).toBe(path.resolve(runnerRoot, 'deterministic/setup')); - }); - - it('falls back through step.cwd to step.workdir to agent.cwd to runner.cwd', () => { - const runnerRoot = '/runner-root'; - const namedPath = '/named/workdir'; - const runner = new WorkflowRunner({ cwd: runnerRoot }); - (runner as any).resolvedPaths.set('generated', namedPath); - - const agentDef = { name: 'worker', cli: 'claude', cwd: 'agents/worker' } as const; - - expect( - (runner as any).resolveEffectiveCwd( - { name: 's1', agent: 'worker', task: 'Do work', cwd: 'steps/explicit', workdir: 'generated' }, - agentDef - ) - ).toBe(path.resolve(runnerRoot, 'steps/explicit')); - - expect( - (runner as any).resolveEffectiveCwd( - { name: 's2', agent: 'worker', task: 'Do work', workdir: 'generated' }, - agentDef - ) - ).toBe(namedPath); - - expect( - (runner as any).resolveEffectiveCwd({ name: 's3', agent: 'worker', task: 'Do work' }, agentDef) - ).toBe(path.resolve(runnerRoot, 'agents/worker')); - - expect((runner as any).resolveEffectiveCwd({ name: 's4', type: 'deterministic', command: 'pwd' })).toBe( - runnerRoot - ); - }); -}); diff --git a/packages/sdk/src/workflows/__tests__/step-executor.test.ts b/packages/sdk/src/workflows/__tests__/step-executor.test.ts deleted file mode 100644 index 7b289b95b..000000000 --- a/packages/sdk/src/workflows/__tests__/step-executor.test.ts +++ /dev/null @@ -1,456 +0,0 @@ -import { describe, it, expect, vi, beforeEach } from 'vitest'; - -import { StepExecutor, type StepExecutorDeps, type StepResult } from '../step-executor.js'; -import type { ProcessSpawner } from '../process-spawner.js'; -import { createProcessSpawner } from '../process-spawner.js'; -import type { WorkflowStep, AgentDefinition, WorkflowStepStatus } from '../types.js'; - -// ── Helpers ────────────────────────────────────────────────────────────────── - -function makeStep(overrides: Partial = {}): WorkflowStep { - return { - name: 'step-1', - type: 'deterministic', - command: 'echo hello', - ...overrides, - } as WorkflowStep; -} - -function makeAgent(overrides: Partial = {}): AgentDefinition { - return { - name: 'worker-1', - cli: 'claude', - role: 'specialist', - ...overrides, - } as AgentDefinition; -} - -function mockSpawner(overrides: Partial = {}): ProcessSpawner { - return { - spawnShell: vi.fn(async () => ({ output: 'hello\n', exitCode: 0 })), - spawnAgent: vi.fn(async () => ({ output: 'done', exitCode: 0 })), - spawnInteractive: vi.fn(async () => ({ output: 'completed', exitCode: 0 })), - buildCommand: vi.fn(() => ({ bin: 'claude', args: ['--task', 'x'] })), - ...overrides, - }; -} - -function makeDeps(overrides: Partial = {}): StepExecutorDeps { - return { - cwd: '/tmp/test-project', - runId: 'run-001', - postToChannel: vi.fn(), - persistStepRow: vi.fn(), - persistStepOutput: vi.fn(), - resolveTemplate: vi.fn((s: string) => s), - getStepOutput: vi.fn(() => ''), - checkAborted: vi.fn(), - waitIfPaused: vi.fn(async () => {}), - log: vi.fn(), - processSpawner: mockSpawner(), - ...overrides, - }; -} - -function createExecutor(overrides: Partial = {}): StepExecutor { - return new StepExecutor(makeDeps(overrides)); -} - -// ── 1. Deterministic step execution ────────────────────────────────────────── - -describe('StepExecutor — deterministic steps', () => { - it('runs a shell command and captures stdout', async () => { - const executor = createExecutor(); - const step = makeStep({ command: 'echo hello' }); - const result = await executor.executeOne(step, new Map()); - expect(result.status).toBe('completed'); - expect(result.output).toContain('hello'); - expect(result.exitCode).toBe(0); - }); - - it('marks step failed on non-zero exit code', async () => { - const executor = createExecutor({ - processSpawner: mockSpawner({ - spawnShell: vi.fn(async () => ({ output: 'err', exitCode: 1 })), - }), - }); - const step = makeStep({ command: 'false' }); - const result = await executor.executeOne(step, new Map()); - expect(result.status).toBe('failed'); - expect(result.exitCode).toBe(1); - }); - - it('succeeds with non-zero exit when failOnError is false', async () => { - const executor = createExecutor({ - processSpawner: mockSpawner({ - spawnShell: vi.fn(async () => ({ output: 'warn', exitCode: 1 })), - }), - }); - const step = makeStep({ command: 'maybe-fail', failOnError: false }); - const result = await executor.executeOne(step, new Map()); - expect(result.status).toBe('completed'); - }); -}); - -// ── 2. Non-interactive agent step ──────────────────────────────────────────── - -describe('StepExecutor — non-interactive agent steps', () => { - it('spawns a codex worker and captures output', async () => { - const spawner = mockSpawner(); - const executor = createExecutor({ processSpawner: spawner }); - const agent = makeAgent({ cli: 'codex', name: 'codex-worker', interactive: false }); - const step = makeStep({ - name: 'codex-step', - type: 'agent', - agent: 'codex-worker', - task: 'Fix the bug', - command: undefined, - }); - const agentMap = new Map([['codex-worker', agent]]); - - const result = await executor.executeOne(step, agentMap); - expect(spawner.spawnAgent).toHaveBeenCalledWith( - agent, - 'Fix the bug', - expect.objectContaining({ cwd: '/tmp/test-project' }) - ); - expect(result.status).toBe('completed'); - }); - - it('fails when agent is not found in agentMap', async () => { - const executor = createExecutor(); - const step = makeStep({ - name: 'orphan', - type: 'agent', - agent: 'missing-agent', - task: 'Do stuff', - command: undefined, - }); - const result = await executor.executeOne(step, new Map()); - expect(result.status).toBe('failed'); - expect(result.error).toContain('not found'); - }); -}); - -// ── 3. Interactive agent step ──────────────────────────────────────────────── - -describe('StepExecutor — interactive agent steps', () => { - it('spawns a claude lead via spawnInteractive', async () => { - const spawner = mockSpawner(); - const executor = createExecutor({ processSpawner: spawner }); - const agent = makeAgent({ cli: 'claude', name: 'lead-agent' }); - const step = makeStep({ - name: 'lead-step', - type: 'agent', - agent: 'lead-agent', - task: 'Coordinate work', - command: undefined, - }); - const agentMap = new Map([['lead-agent', agent]]); - - const result = await executor.executeOne(step, agentMap); - expect(spawner.spawnInteractive).toHaveBeenCalled(); - expect(result.status).toBe('completed'); - }); -}); - -// ── 4. Step timeout handling ───────────────────────────────────────────────── - -describe('StepExecutor — timeout handling', () => { - it('passes timeoutMs through to process spawner', async () => { - const spawner = mockSpawner(); - const executor = createExecutor({ processSpawner: spawner }); - const step = makeStep({ command: 'sleep 60', timeoutMs: 5000 }); - - await executor.executeOne(step, new Map()); - expect(spawner.spawnShell).toHaveBeenCalledWith('sleep 60', expect.objectContaining({ timeoutMs: 5000 })); - }); - - it('fails step when spawn rejects due to timeout', async () => { - const executor = createExecutor({ - processSpawner: mockSpawner({ - spawnShell: vi.fn(async () => { - throw new Error('Process timed out'); - }), - }), - }); - const step = makeStep({ command: 'sleep 60', timeoutMs: 100 }); - const result = await executor.executeOne(step, new Map()); - expect(result.status).toBe('failed'); - expect(result.error).toContain('timed out'); - }); -}); - -// ── 5. Step dependency resolution (dependsOn) ──────────────────────────────── - -describe('StepExecutor — dependency resolution', () => { - it('returns only steps whose deps are all completed', () => { - const executor = createExecutor(); - const steps = [ - makeStep({ name: 'a' }), - makeStep({ name: 'b', dependsOn: ['a'] }), - makeStep({ name: 'c', dependsOn: ['a', 'b'] }), - ]; - const statuses = new Map([ - ['a', 'completed'], - ['b', 'pending'], - ['c', 'pending'], - ]); - const ready = executor.findReady(steps, statuses); - expect(ready.map((s) => s.name)).toEqual(['b']); - }); - - it('treats skipped deps as satisfied', () => { - const executor = createExecutor(); - const steps = [makeStep({ name: 'a' }), makeStep({ name: 'b', dependsOn: ['a'] })]; - const statuses = new Map([ - ['a', 'skipped'], - ['b', 'pending'], - ]); - const ready = executor.findReady(steps, statuses); - expect(ready.map((s) => s.name)).toEqual(['b']); - }); - - it('returns steps with no deps when all are pending', () => { - const executor = createExecutor(); - const steps = [makeStep({ name: 'a' }), makeStep({ name: 'b', dependsOn: ['a'] })]; - const statuses = new Map([ - ['a', 'pending'], - ['b', 'pending'], - ]); - const ready = executor.findReady(steps, statuses); - expect(ready.map((s) => s.name)).toEqual(['a']); - }); - - it('returns nothing when all deps are failed', () => { - const executor = createExecutor(); - const steps = [makeStep({ name: 'a' }), makeStep({ name: 'b', dependsOn: ['a'] })]; - const statuses = new Map([ - ['a', 'failed'], - ['b', 'pending'], - ]); - const ready = executor.findReady(steps, statuses); - expect(ready.map((s) => s.name)).toEqual([]); - }); -}); - -// ── 6. Step output capture and storage ─────────────────────────────────────── - -describe('StepExecutor — output capture', () => { - it('persists step output after successful completion', async () => { - const deps = makeDeps(); - const executor = new StepExecutor(deps); - const step = makeStep({ command: 'echo result-data' }); - - await executor.executeOne(step, new Map()); - expect(deps.persistStepOutput).toHaveBeenCalledWith( - 'run-001', - 'step-1', - expect.stringContaining('hello') - ); - }); - - it('persists step row status on completion', async () => { - const deps = makeDeps(); - const executor = new StepExecutor(deps); - const step = makeStep({ command: 'echo ok' }); - - await executor.executeOne(step, new Map()); - expect(deps.persistStepRow).toHaveBeenCalledWith( - expect.any(String), - expect.objectContaining({ status: 'completed' }) - ); - }); - - it('captures output on failure', async () => { - const deps = makeDeps({ - processSpawner: mockSpawner({ - spawnShell: vi.fn(async () => ({ output: 'error: not found', exitCode: 1 })), - }), - }); - const executor = new StepExecutor(deps); - const step = makeStep({ command: 'bad-command' }); - const result = await executor.executeOne(step, new Map()); - expect(result.output).toContain('error: not found'); - }); - - it('suppresses output when captureOutput is false', async () => { - const executor = createExecutor(); - const step = makeStep({ command: 'echo secret', captureOutput: false }); - const result = await executor.executeOne(step, new Map()); - expect(result.output).toContain('Command completed'); - expect(result.output).not.toContain('hello'); - }); -}); - -// ── 7. Step retry on failure ───────────────────────────────────────────────── - -describe('StepExecutor — retry logic', () => { - // Note: monitorStep retries on thrown errors (spawn failures), not on non-zero exit codes. - // Non-zero exit codes are handled by toCompletionResult and produce immediate failure. - - it('retries when spawn throws an error', async () => { - let attempt = 0; - const executor = createExecutor({ - processSpawner: mockSpawner({ - spawnShell: vi.fn(async () => { - attempt++; - if (attempt < 3) throw new Error('connection refused'); - return { output: 'ok', exitCode: 0 }; - }), - }), - }); - const step = makeStep({ command: 'flaky', retries: 3 }); - const result = await executor.executeOne(step, new Map()); - expect(result.status).toBe('completed'); - expect(result.retries).toBe(2); - }); - - it('fails after exhausting retries on thrown errors', async () => { - const executor = createExecutor({ - processSpawner: mockSpawner({ - spawnShell: vi.fn(async () => { - throw new Error('always fails'); - }), - }), - }); - const step = makeStep({ command: 'always-fail', retries: 2 }); - const result = await executor.executeOne(step, new Map()); - expect(result.status).toBe('failed'); - expect(result.retries).toBe(2); - expect(result.error).toContain('always fails'); - }); - - it('does not retry on non-zero exit code (immediate failure)', async () => { - const spawnShell = vi.fn(async () => ({ output: 'fail', exitCode: 1 })); - const executor = createExecutor({ - processSpawner: mockSpawner({ spawnShell }), - }); - const step = makeStep({ command: 'bad', retries: 3 }); - const result = await executor.executeOne(step, new Map()); - expect(result.status).toBe('failed'); - // Called only once — no retries for clean non-zero exits - expect(spawnShell).toHaveBeenCalledTimes(1); - }); - - it('calls onStepRetried callback on each retry', async () => { - const onStepRetried = vi.fn(); - let attempt = 0; - const executor = createExecutor({ - onStepRetried, - processSpawner: mockSpawner({ - spawnShell: vi.fn(async () => { - attempt++; - if (attempt < 2) throw new Error('transient'); - return { output: 'ok', exitCode: 0 }; - }), - }), - }); - const step = makeStep({ command: 'flaky', retries: 2 }); - await executor.executeOne(step, new Map()); - expect(onStepRetried).toHaveBeenCalledTimes(1); - }); -}); - -// ── 8. Process spawner — command building ──────────────────────────────────── - -describe('ProcessSpawner — buildCommand', () => { - it('builds claude CLI command', () => { - const spawner = createProcessSpawner({ cwd: '/tmp' }); - const agent = makeAgent({ cli: 'claude', name: 'claude-worker' }); - const cmd = spawner.buildCommand(agent, 'Do the task'); - expect(cmd.bin).toBe('claude'); - expect(cmd.args).toContain('Do the task'); - }); - - it('builds codex CLI command', () => { - const spawner = createProcessSpawner({ cwd: '/tmp' }); - const agent = makeAgent({ cli: 'codex', name: 'codex-worker' }); - const cmd = spawner.buildCommand(agent, 'Fix bug'); - expect(cmd.bin).toBe('codex'); - expect(cmd.args).toContain('Fix bug'); - }); - - it('builds aider CLI command', () => { - const spawner = createProcessSpawner({ cwd: '/tmp' }); - const agent = makeAgent({ cli: 'aider', name: 'aider-worker' }); - const cmd = spawner.buildCommand(agent, 'Refactor'); - expect(cmd.bin).toBe('aider'); - expect(cmd.args).toContain('Refactor'); - }); - - it('builds gemini CLI command', () => { - const spawner = createProcessSpawner({ cwd: '/tmp' }); - const agent = makeAgent({ cli: 'gemini', name: 'gemini-worker' }); - const cmd = spawner.buildCommand(agent, 'Analyze'); - expect(cmd.bin).toBe('gemini'); - expect(cmd.args).toContain('Analyze'); - }); -}); - -// ── 9. executeAll — DAG orchestration ──────────────────────────────────────── - -describe('StepExecutor — executeAll', () => { - it('executes steps in dependency order', async () => { - const order: string[] = []; - const executor = createExecutor({ - processSpawner: mockSpawner({ - spawnShell: vi.fn(async () => { - return { output: 'ok', exitCode: 0 }; - }), - }), - onStepStarted: vi.fn((step) => { - order.push(step.name); - }), - }); - const steps = [ - makeStep({ name: 'a', command: 'echo a' }), - makeStep({ name: 'b', command: 'echo b', dependsOn: ['a'] }), - ]; - - const results = await executor.executeAll(steps, new Map()); - expect(results.size).toBe(2); - expect(order).toEqual(['a', 'b']); - expect(results.get('a')?.status).toBe('completed'); - expect(results.get('b')?.status).toBe('completed'); - }); - - it('skips downstream steps on fail-fast', async () => { - const executor = createExecutor({ - processSpawner: mockSpawner({ - spawnShell: vi.fn(async () => ({ output: 'err', exitCode: 1 })), - }), - markDownstreamSkipped: vi.fn(), - }); - const steps = [ - makeStep({ name: 'a', command: 'fail' }), - makeStep({ name: 'b', command: 'echo b', dependsOn: ['a'] }), - ]; - - await expect(executor.executeAll(steps, new Map(), { strategy: 'fail-fast' })).rejects.toThrow( - 'Step "a" failed' - ); - }); - - it('continues past failures with continue strategy', async () => { - let callCount = 0; - const executor = createExecutor({ - processSpawner: mockSpawner({ - spawnShell: vi.fn(async () => { - callCount++; - if (callCount === 1) return { output: 'err', exitCode: 1 }; - return { output: 'ok', exitCode: 0 }; - }), - }), - markDownstreamSkipped: vi.fn(), - }); - const steps = [ - makeStep({ name: 'a', command: 'fail' }), - makeStep({ name: 'c', command: 'echo c' }), // no dependency on a - ]; - - const results = await executor.executeAll(steps, new Map(), { strategy: 'continue' }); - expect(results.get('a')?.status).toBe('failed'); - expect(results.get('c')?.status).toBe('completed'); - }); -}); diff --git a/packages/sdk/src/workflows/__tests__/template-resolver.test.ts b/packages/sdk/src/workflows/__tests__/template-resolver.test.ts deleted file mode 100644 index 0d0076c8b..000000000 --- a/packages/sdk/src/workflows/__tests__/template-resolver.test.ts +++ /dev/null @@ -1,164 +0,0 @@ -import { describe, expect, it } from 'vitest'; - -// Import from the module that will be extracted from runner.ts -import { resolveStepOutputRef, resolveTemplate, TemplateResolver } from '../template-resolver.js'; - -describe('TemplateResolver', () => { - const resolver = new TemplateResolver(); - - describe('resolveTemplate', () => { - it('replaces non-step placeholders and preserves deferred step outputs', () => { - const result = resolveTemplate('Deploy {{env}} after {{steps.plan.output}}', { env: 'prod' }); - expect(result).toBe('Deploy prod after {{steps.plan.output}}'); - }); - - it('throws on unresolved placeholders', () => { - expect(() => resolveTemplate('Deploy {{missing}}', {})).toThrow('Unresolved variable: {{missing}}'); - }); - }); - - describe('resolveStepOutputRef', () => { - it('resolves a completed step output by reference', () => { - const stepOutputs = new Map([['plan', 'Build a REST API']]); - expect(resolveStepOutputRef('steps.plan.output', stepOutputs)).toBe('Build a REST API'); - }); - - it('accepts references wrapped in template braces', () => { - const stepOutputs = new Map([['code', 'Created 3 files']]); - expect(resolveStepOutputRef('{{steps.code.output}}', stepOutputs)).toBe('Created 3 files'); - }); - }); - - describe('resolveVariables', () => { - it('replaces simple {{var}} placeholders in agent tasks', () => { - const config = { - version: '1', - name: 'test', - swarm: { mode: 'coordinate' as const }, - agents: [{ name: 'a1', cli: 'claude', task: 'Deploy {{env}} to {{region}}' }], - }; - const result = resolver.resolveVariables(config as any, { env: 'staging', region: 'us-east-1' }); - expect(result.agents[0].task).toBe('Deploy staging to us-east-1'); - }); - - it('replaces variables in workflow step tasks and commands', () => { - const config = { - version: '1', - name: 'test', - swarm: { mode: 'coordinate' as const }, - agents: [], - workflows: [ - { - name: 'wf1', - steps: [ - { name: 's1', task: 'Build {{project}}', agent: 'a1' }, - { name: 's2', command: 'deploy --env={{env}}' }, - ], - }, - ], - }; - const result = resolver.resolveVariables(config as any, { project: 'relay', env: 'prod' }); - expect(result.workflows![0].steps[0].task).toBe('Build relay'); - expect(result.workflows![0].steps[1].command).toBe("deploy --env='prod'"); - }); - - it('replaces variables in step params', () => { - const config = { - version: '1', - name: 'test', - swarm: { mode: 'coordinate' as const }, - agents: [], - workflows: [ - { - name: 'wf1', - steps: [{ name: 's1', agent: 'a1', params: { url: '{{base_url}}/api', count: 42 } }], - }, - ], - }; - const result = resolver.resolveVariables(config as any, { base_url: 'https://example.com' }); - expect((result.workflows![0].steps[0].params as any).url).toBe('https://example.com/api'); - // Non-string params are left untouched - expect((result.workflows![0].steps[0].params as any).count).toBe(42); - }); - - it('preserves {{steps.X.output}} placeholders for later resolution', () => { - const config = { - version: '1', - name: 'test', - swarm: { mode: 'coordinate' as const }, - agents: [{ name: 'a1', cli: 'claude', task: 'Use {{steps.plan.output}} for {{env}}' }], - }; - const result = resolver.resolveVariables(config as any, { env: 'prod' }); - expect(result.agents[0].task).toBe('Use {{steps.plan.output}} for prod'); - }); - - it('throws on unresolved non-step variables', () => { - const config = { - version: '1', - name: 'test', - swarm: { mode: 'coordinate' as const }, - agents: [{ name: 'a1', cli: 'claude', task: 'Deploy to {{missing_var}}' }], - }; - expect(() => resolver.resolveVariables(config as any, {})).toThrow( - 'Unresolved variable: {{missing_var}}' - ); - }); - - it('does not mutate the original config', () => { - const config = { - version: '1', - name: 'test', - swarm: { mode: 'coordinate' as const }, - agents: [{ name: 'a1', cli: 'claude', task: 'Deploy {{env}}' }], - }; - resolver.resolveVariables(config as any, { env: 'staging' }); - expect(config.agents[0].task).toBe('Deploy {{env}}'); - }); - }); - - describe('resolveDotPath', () => { - it('resolves nested dot-path variables', () => { - const config = { - version: '1', - name: 'test', - swarm: { mode: 'coordinate' as const }, - agents: [{ name: 'a1', cli: 'claude', task: 'Region: {{aws.region}}' }], - }; - const vars = { aws: { region: 'us-west-2' } } as any; - const result = resolver.resolveVariables(config as any, vars); - expect(result.agents[0].task).toBe('Region: us-west-2'); - }); - - it('throws for undefined nested paths', () => { - const config = { - version: '1', - name: 'test', - swarm: { mode: 'coordinate' as const }, - agents: [{ name: 'a1', cli: 'claude', task: '{{a.b.c}}' }], - }; - expect(() => resolver.resolveVariables(config as any, { a: { b: {} } } as any)).toThrow( - 'Unresolved variable: {{a.b.c}}' - ); - }); - }); - - describe('interpolateStepTask', () => { - it('resolves step output references from completed steps', () => { - const template = 'Review: {{steps.plan.output}} and {{steps.code.output}}'; - const context = { - steps: { - plan: { output: 'Build a REST API' }, - code: { output: 'Created 3 files' }, - }, - } as any; - const result = resolver.interpolateStepTask(template, context); - expect(result).toBe('Review: Build a REST API and Created 3 files'); - }); - - it('leaves unresolved step references intact', () => { - const template = 'Use {{steps.future.output}} later'; - const result = resolver.interpolateStepTask(template, { steps: {} } as any); - expect(result).toBe('Use {{steps.future.output}} later'); - }); - }); -}); diff --git a/packages/sdk/src/workflows/__tests__/verification-custom.test.ts b/packages/sdk/src/workflows/__tests__/verification-custom.test.ts deleted file mode 100644 index 0c737cc90..000000000 --- a/packages/sdk/src/workflows/__tests__/verification-custom.test.ts +++ /dev/null @@ -1,292 +0,0 @@ -import { EventEmitter } from 'node:events'; -import { mkdtempSync, rmSync, writeFileSync } from 'node:fs'; -import os from 'node:os'; -import path from 'node:path'; -import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; - -import type { WorkflowDb } from '../runner.js'; -import type { RelayYamlConfig, WorkflowRunRow, WorkflowStepRow } from '../types.js'; - -const mockRelaycastAgent = { - send: vi.fn().mockResolvedValue(undefined), - heartbeat: vi.fn().mockResolvedValue(undefined), - channels: { - create: vi.fn().mockResolvedValue(undefined), - join: vi.fn().mockResolvedValue(undefined), - invite: vi.fn().mockResolvedValue(undefined), - }, -}; - -const mockRelaycast = { - agents: { - register: vi.fn().mockResolvedValue({ token: 'token-1' }), - }, - as: vi.fn().mockReturnValue(mockRelaycastAgent), -}; - -vi.mock('@relaycast/sdk', () => ({ - RelayCast: vi.fn().mockImplementation(() => mockRelaycast), - RelayError: class RelayError extends Error {}, -})); - -const mockHuman = { - name: 'WorkflowRunner', - sendMessage: vi.fn().mockResolvedValue(undefined), -}; - -const mockRelayInstance = { - spawnPty: vi.fn(), - human: vi.fn().mockReturnValue(mockHuman), - shutdown: vi.fn().mockResolvedValue(undefined), - onBrokerStderr: vi.fn().mockReturnValue(() => {}), - listAgentsRaw: vi.fn().mockResolvedValue([]), - addListener: vi.fn(() => () => {}), -}; - -vi.mock('../../relay.js', () => ({ - AgentRelay: vi.fn().mockImplementation(() => mockRelayInstance), -})); - -type QueuedSubprocessResult = { - stdout?: string; - stderr?: string; - code?: number | null; - signal?: string | null; - error?: Error; - beforeClose?: () => void; -}; - -let queuedSubprocessResults: QueuedSubprocessResult[] = []; - -const mockSubprocessSpawn = vi.fn().mockImplementation((_cmd, _args, _options) => { - const result = queuedSubprocessResults.shift() ?? { stdout: 'done\n', code: 0 }; - const child = new EventEmitter() as EventEmitter & { - stdout: EventEmitter; - stderr: EventEmitter; - pid: number; - kill: ReturnType; - }; - - child.stdout = new EventEmitter(); - child.stderr = new EventEmitter(); - child.pid = 4321; - child.kill = vi.fn(); - - queueMicrotask(() => { - if (result.error) { - child.emit('error', result.error); - return; - } - if (result.stdout) { - child.stdout.emit('data', Buffer.from(result.stdout)); - } - if (result.stderr) { - child.stderr.emit('data', Buffer.from(result.stderr)); - } - result.beforeClose?.(); - child.emit('close', result.code ?? 0, result.signal ?? null); - }); - - return child; -}); - -vi.mock('node:child_process', async () => { - const actual = await vi.importActual('node:child_process'); - return { - ...actual, - spawn: mockSubprocessSpawn, - }; -}); - -const { WorkflowRunner } = await import('../runner.js'); -const { runVerification } = await import('../verification.js'); - -const noopSideEffects = { - recordStepToolSideEffect: vi.fn(), - getOrCreateStepEvidenceRecord: vi.fn(() => ({ - evidence: { coordinationSignals: [] }, - })), - log: vi.fn(), -}; - -const tempDirs: string[] = []; - -function run(check: Parameters[0], output = 'worker output', cwd?: string) { - return runVerification( - check, - output, - 'custom-step', - undefined, - { allowFailure: true, cwd }, - noopSideEffects - ); -} - -function makeDb(): WorkflowDb { - const runs = new Map(); - const steps = new Map(); - - return { - insertRun: vi.fn(async (runRow: WorkflowRunRow) => { - runs.set(runRow.id, { ...runRow }); - }), - updateRun: vi.fn(async (id: string, patch: Partial) => { - const existing = runs.get(id); - if (existing) runs.set(id, { ...existing, ...patch }); - }), - getRun: vi.fn(async (id: string) => { - const runRow = runs.get(id); - return runRow ? { ...runRow } : null; - }), - insertStep: vi.fn(async (stepRow: WorkflowStepRow) => { - steps.set(stepRow.id, { ...stepRow }); - }), - updateStep: vi.fn(async (id: string, patch: Partial) => { - const existing = steps.get(id); - if (existing) steps.set(id, { ...existing, ...patch }); - }), - getStepsByRunId: vi.fn(async (runId: string) => { - return [...steps.values()].filter((stepRow) => stepRow.runId === runId); - }), - }; -} - -function makeConfig(projectDir: string, verificationValue: string): RelayYamlConfig { - return { - version: '1', - name: 'verification-custom', - swarm: { pattern: 'dag' }, - errorHandling: { - strategy: 'retry', - retryDelayMs: 0, - }, - agents: [{ name: 'worker', cli: 'claude', interactive: false }], - workflows: [ - { - name: 'default', - steps: [ - { - name: 'custom-step', - agent: 'worker', - task: 'Implement the requested change', - retries: 1, - cwd: projectDir, - verification: { - type: 'custom', - value: verificationValue, - }, - }, - ], - }, - ], - trajectories: false, - }; -} - -describe('custom verification', () => { - beforeEach(() => { - vi.clearAllMocks(); - queuedSubprocessResults = []; - }); - - afterEach(() => { - while (tempDirs.length > 0) { - const dir = tempDirs.pop(); - if (dir) rmSync(dir, { recursive: true, force: true }); - } - }); - - it('custom verification with command that exits 0 passes', () => { - const result = run({ type: 'custom', value: 'echo ok' }); - - expect(result.passed).toBe(true); - expect(result.completionReason).toBe('completed_verified'); - }); - - it('custom verification with command that exits non-zero fails', () => { - const result = run({ type: 'custom', value: 'exit 1' }); - - expect(result.passed).toBe(false); - expect(result.completionReason).toBe('failed_verification'); - expect(result.error).toContain('custom check "exit 1" failed'); - expect(result.error).toContain('Command failed: exit 1'); - }); - - it('custom verification captures stderr in failure message', () => { - const result = run({ - type: 'custom', - value: "echo 'compile error: missing semicolon' >&2; exit 1", - }); - - expect(result.passed).toBe(false); - expect(result.error).toContain('compile error: missing semicolon'); - }); - - it('custom verification with no value preserves legacy no-op', () => { - const result = run({ type: 'custom', value: '' }); - - expect(result).toEqual({ passed: false }); - }); - - it('custom verification respects cwd', () => { - const tempDir = mkdtempSync(path.join(os.tmpdir(), 'verification-custom-cwd-')); - tempDirs.push(tempDir); - writeFileSync(path.join(tempDir, 'myfile.txt'), 'present'); - - const result = run({ type: 'custom', value: 'test -f myfile.txt' }, 'worker output', tempDir); - - expect(result.passed).toBe(true); - expect(result.completionReason).toBe('completed_verified'); - }); - - it('custom verification timeout kills long-running command', () => { - const result = run({ type: 'custom', value: 'sleep 60', timeoutMs: 1000 }); - - expect(result.passed).toBe(false); - expect(result.completionReason).toBe('failed_verification'); - expect(result.error).toContain('sleep 60'); - expect(result.error).toMatch(/ETIMEDOUT|timed out/i); - }); - - it('verification failure output appears in retry prompt', async () => { - const projectDir = mkdtempSync(path.join(os.tmpdir(), 'verification-custom-runner-')); - tempDirs.push(projectDir); - - const verificationValue = - `sh -c 'if [ -f ready.txt ]; then exit 0; ` + - `else echo "compile error: missing semicolon" >&2; exit 1; fi'`; - - queuedSubprocessResults = [ - { - stdout: 'first attempt\n', - code: 0, - }, - { - stdout: 'second attempt\n', - code: 0, - beforeClose: () => { - writeFileSync(path.join(projectDir, 'ready.txt'), 'ok'); - }, - }, - ]; - - const runner = new WorkflowRunner({ - cwd: projectDir, - db: makeDb(), - workspaceId: 'ws-test', - }); - - const result = await runner.execute(makeConfig(projectDir, verificationValue), 'default'); - - expect(result.status, result.error).toBe('completed'); - expect(mockSubprocessSpawn).toHaveBeenCalledTimes(2); - - const retryArgs = mockSubprocessSpawn.mock.calls[1]?.[1] as string[] | undefined; - const retryPrompt = retryArgs?.find((arg) => arg.includes('[RETRY')) ?? retryArgs?.join('\n') ?? ''; - - expect(retryPrompt).toContain('[VERIFICATION FAILED]'); - expect(retryPrompt).toContain(`Command: ${verificationValue}`); - expect(retryPrompt).toContain('compile error: missing semicolon'); - expect(retryPrompt).toContain('Fix the issues above before proceeding.'); - }); -}); diff --git a/packages/sdk/src/workflows/__tests__/verification-traceback.test.ts b/packages/sdk/src/workflows/__tests__/verification-traceback.test.ts deleted file mode 100644 index 4661ad932..000000000 --- a/packages/sdk/src/workflows/__tests__/verification-traceback.test.ts +++ /dev/null @@ -1,558 +0,0 @@ -import { EventEmitter } from 'node:events'; -import { mkdtempSync, mkdirSync, rmSync, writeFileSync } from 'node:fs'; -import os from 'node:os'; -import path from 'node:path'; -import { afterAll, beforeEach, describe, expect, it, vi } from 'vitest'; - -import type { BudgetTracker } from '../budget-tracker.js'; -import type { CliSessionQuery, CliSessionReport } from '../cli-session-collector.js'; -import type { WorkflowDb } from '../runner.js'; -import type { AgentDefinition, RelayYamlConfig, WorkflowRunRow, WorkflowStepRow } from '../types.js'; - -type WorkflowConfigStep = NonNullable[number]['steps'][number]; - -const tempDirs: string[] = []; - -type QueuedSubprocessResult = { - stdout?: string; - stderr?: string; - code?: number | null; - signal?: string | null; - delayMs?: number; - error?: Error; - beforeClose?: () => void; -}; - -type CollectorResult = - | CliSessionReport - | null - | ((query: CliSessionQuery) => CliSessionReport | null | Promise); - -let queuedSubprocessResults: QueuedSubprocessResult[] = []; -let queuedCollectorResults: CollectorResult[] = []; - -const mockCollectCliSession = vi.fn(async (query: CliSessionQuery): Promise => { - const next = queuedCollectorResults.shift(); - if (typeof next === 'function') { - return next(query); - } - return next ?? null; -}); - -vi.mock('../cli-session-collector.js', () => ({ - collectCliSession: mockCollectCliSession, -})); - -const mockSubprocessSpawn = vi.fn().mockImplementation((_cmd, _args, _options) => { - const result = queuedSubprocessResults.shift() ?? { - stdout: 'completed\n', - code: 0, - }; - - const child = new EventEmitter() as EventEmitter & { - stdout: EventEmitter; - stderr: EventEmitter; - pid: number; - kill: ReturnType; - }; - - child.stdout = new EventEmitter(); - child.stderr = new EventEmitter(); - child.pid = 4321; - - let closed = false; - let delayTimer: ReturnType | undefined; - const clearPending = () => { - if (delayTimer) { - clearTimeout(delayTimer); - delayTimer = undefined; - } - }; - const closeChild = ( - code: number | null = result.code ?? 0, - signal: string | null = result.signal ?? null - ) => { - if (closed) return; - closed = true; - clearPending(); - child.emit('close', code, signal); - }; - - child.kill = vi.fn((signal?: string | number) => { - clearPending(); - queueMicrotask(() => closeChild(null, typeof signal === 'string' ? signal : null)); - return true; - }); - - const emitResult = () => { - if (closed) return; - if (result.error) { - closed = true; - child.emit('error', result.error); - return; - } - if (result.stdout) { - child.stdout.emit('data', Buffer.from(result.stdout)); - } - if (result.stderr) { - child.stderr.emit('data', Buffer.from(result.stderr)); - } - result.beforeClose?.(); - closeChild(result.code ?? 0, result.signal ?? null); - }; - - if (result.delayMs && result.delayMs > 0) { - delayTimer = setTimeout(emitResult, result.delayMs); - } else { - queueMicrotask(emitResult); - } - - return child; -}); - -vi.mock('node:child_process', async () => { - const actual = await vi.importActual('node:child_process'); - return { - ...actual, - spawn: mockSubprocessSpawn, - }; -}); - -const mockHuman = { - sendMessage: vi.fn().mockResolvedValue(undefined), -}; - -const mockRelayInstance = { - spawnPty: vi.fn(), - human: vi.fn().mockReturnValue(mockHuman), - shutdown: vi.fn().mockResolvedValue(undefined), - onBrokerStderr: vi.fn().mockReturnValue(() => {}), - listAgentsRaw: vi.fn().mockResolvedValue([]), - listAgents: vi.fn().mockResolvedValue([]), - addListener: vi.fn(() => () => {}), -}; - -vi.mock('@relaycast/sdk', () => ({ - RelayCast: vi.fn(), - RelayError: class RelayError extends Error {}, -})); - -vi.mock('../../relay.js', () => ({ - AgentRelay: vi.fn().mockImplementation(() => mockRelayInstance), -})); - -const { workflow } = await import('../builder.js'); -const { WorkflowRunner } = await import('../runner.js'); - -function makeDb(): WorkflowDb { - const runs = new Map(); - const steps = new Map(); - - return { - insertRun: vi.fn(async (runRow: WorkflowRunRow) => { - runs.set(runRow.id, { ...runRow }); - }), - updateRun: vi.fn(async (id: string, patch: Partial) => { - const existing = runs.get(id); - if (existing) { - runs.set(id, { ...existing, ...patch }); - } - }), - getRun: vi.fn(async (id: string) => { - const run = runs.get(id); - return run ? { ...run } : null; - }), - insertStep: vi.fn(async (stepRow: WorkflowStepRow) => { - steps.set(stepRow.id, { ...stepRow }); - }), - updateStep: vi.fn(async (id: string, patch: Partial) => { - const existing = steps.get(id); - if (existing) { - steps.set(id, { ...existing, ...patch }); - } - }), - getStepsByRunId: vi.fn(async (runId: string) => { - return [...steps.values()].filter((step) => step.runId === runId).map((step) => ({ ...step })); - }), - }; -} - -function createWorkspace(subdirs: string[] = []): string { - const dir = mkdtempSync(path.join(os.tmpdir(), 'relay-verification-traceback-')); - tempDirs.push(dir); - for (const subdir of subdirs) { - mkdirSync(path.join(dir, subdir), { recursive: true }); - } - return dir; -} - -function makeRunner(cwd: string): InstanceType { - return new WorkflowRunner({ - cwd, - db: makeDb(), - workspaceId: 'ws-test', - relay: { - env: { - AGENT_RELAY_WORKFLOW_DISABLE_RELAYCAST: '1', - }, - }, - }); -} - -function makeReport( - tokens: Partial>, - overrides: Partial = {} -): CliSessionReport { - return { - cli: 'claude', - sessionId: 'session-1', - model: 'claude-sonnet-4', - provider: 'anthropic', - durationMs: 1_000, - cost: null, - tokens: { - input: tokens.input ?? 0, - output: tokens.output ?? 0, - cacheRead: tokens.cacheRead ?? 0, - }, - turns: 1, - toolCalls: [], - errors: [], - finalStatus: 'completed', - summary: 'done', - ...overrides, - }; -} - -function makeConfig(input: { - workspace: string; - verification: WorkflowConfigStep['verification']; - retries?: number; - swarm?: Partial; - includeDiagnosticAgent?: boolean; -}): RelayYamlConfig { - const workerCwd = path.join(input.workspace, 'worker'); - const diagCwd = path.join(input.workspace, 'diag'); - - return { - version: '1', - name: 'verification-traceback', - swarm: { - pattern: 'dag', - ...input.swarm, - }, - errorHandling: { - strategy: 'retry', - retryDelayMs: 0, - }, - agents: [ - { - name: 'worker', - cli: 'claude', - interactive: false, - cwd: workerCwd, - }, - ...(input.includeDiagnosticAgent === false - ? [] - : [ - { - name: 'diag', - cli: 'claude', - interactive: false, - cwd: diagCwd, - } satisfies AgentDefinition, - ]), - ], - workflows: [ - { - name: 'default', - steps: [ - { - name: 'implement', - agent: 'worker', - task: 'Implement the requested change', - retries: input.retries ?? 1, - verification: input.verification, - }, - ], - }, - ], - trajectories: false, - }; -} - -function verificationCommand(): string { - return ( - `sh -c 'if [ -f ready.txt ]; then exit 0; ` + - `else echo "compile error: missing semicolon" >&2; exit 1; fi'` - ); -} - -function taskFromExecCall(execSpy: ReturnType, callIndex: number): string { - const call = execSpy.mock.calls[callIndex] as [AgentDefinition, { task?: string }] | undefined; - return String(call?.[1]?.task ?? ''); -} - -function getBudgetTracker(runner: InstanceType): BudgetTracker | undefined { - return (runner as any).budgetTracker as BudgetTracker | undefined; -} - -describe('verification traceback retry handling', () => { - beforeEach(() => { - vi.clearAllMocks(); - queuedSubprocessResults = []; - queuedCollectorResults = []; - mockRelayInstance.shutdown.mockResolvedValue(undefined); - mockRelayInstance.onBrokerStderr.mockReturnValue(() => {}); - mockRelayInstance.listAgents.mockResolvedValue([]); - mockRelayInstance.listAgentsRaw.mockResolvedValue([]); - }); - - afterAll(async () => { - await new Promise((resolve) => setTimeout(resolve, 250)); - while (tempDirs.length > 0) { - rmSync(tempDirs.pop()!, { recursive: true, force: true }); - } - }); - - it('verification failure without diagnosticAgent uses standard retry', async () => { - const workspace = createWorkspace(['worker']); - const runner = makeRunner(workspace); - const execSpy = vi.spyOn(runner as any, 'execNonInteractive'); - - queuedSubprocessResults = [ - { stdout: 'first attempt\n', code: 0 }, - { - stdout: 'second attempt\n', - code: 0, - beforeClose: () => { - writeFileSync(path.join(workspace, 'ready.txt'), 'ok'); - }, - }, - ]; - - const result = await runner.execute( - makeConfig({ - workspace, - includeDiagnosticAgent: false, - verification: { - type: 'custom', - value: verificationCommand(), - }, - }), - 'default' - ); - - expect(result.status, result.error).toBe('completed'); - expect(execSpy).toHaveBeenCalledTimes(2); - - const retryTask = taskFromExecCall(execSpy, 1); - expect(retryTask).toContain('[VERIFICATION FAILED]'); - expect(retryTask).toContain(`Command: ${verificationCommand()}`); - expect(retryTask).toContain('compile error: missing semicolon'); - expect(retryTask).not.toContain('Diagnostic analysis:'); - }); - - it('verification failure with diagnosticAgent runs diagnostic before retry', async () => { - const workspace = createWorkspace(['worker', 'diag']); - const runner = makeRunner(workspace); - const execSpy = vi.spyOn(runner as any, 'execNonInteractive'); - - queuedSubprocessResults = [ - { stdout: 'first attempt\n', code: 0 }, - { stdout: 'The issue is in file X, line Y: missing semicolon\n', code: 0 }, - { stdout: 'second attempt\n', code: 0 }, - ]; - - const result = await runner.execute( - makeConfig({ - workspace, - verification: { - type: 'custom', - value: 'exit 1', - diagnosticAgent: 'diag', - }, - }), - 'default' - ); - - expect(result.status).toBe('failed'); - expect(result.error).toContain('Step "implement" failed after 1 retries'); - - expect(execSpy).toHaveBeenCalledTimes(3); - - const diagnosticCall = execSpy.mock.calls[1] as [AgentDefinition, { task?: string }]; - expect(diagnosticCall[0].name).toBe('diag'); - expect(String(diagnosticCall[1].task)).toContain( - 'Analyze what went wrong. Be specific. Do NOT fix the code.' - ); - - const retryTask = taskFromExecCall(execSpy, 2); - expect(retryTask).toContain('Diagnostic analysis:'); - expect(retryTask).toContain('The issue is in file X, line Y: missing semicolon'); - }); - - it('diagnostic agent timeout falls back to standard retry', async () => { - const workspace = createWorkspace(['worker', 'diag']); - const runner = makeRunner(workspace); - const execSpy = vi.spyOn(runner as any, 'execNonInteractive'); - const logSpy = vi.spyOn(runner as any, 'log').mockImplementation(() => {}); - - queuedSubprocessResults = [ - { stdout: 'first attempt\n', code: 0 }, - { stdout: 'slow diagnostic\n', code: 0, delayMs: 5_000 }, - { - stdout: 'second attempt\n', - code: 0, - beforeClose: () => { - writeFileSync(path.join(workspace, 'ready.txt'), 'ok'); - }, - }, - ]; - - const result = await runner.execute( - makeConfig({ - workspace, - verification: { - type: 'custom', - value: verificationCommand(), - diagnosticAgent: 'diag', - diagnosticTimeout: 100, - }, - }), - 'default' - ); - - expect(result.status, result.error).toBe('completed'); - expect(execSpy).toHaveBeenCalledTimes(3); - - const retryTask = taskFromExecCall(execSpy, 2); - expect(retryTask).toContain('[VERIFICATION FAILED]'); - expect(retryTask).toContain('compile error: missing semicolon'); - expect(retryTask).not.toContain('Diagnostic analysis:'); - expect(logSpy).toHaveBeenCalledWith(expect.stringContaining('Diagnostic timed out')); - }); - - it('diagnostic agent failure falls back to standard retry', async () => { - const workspace = createWorkspace(['worker', 'diag']); - const runner = makeRunner(workspace); - const execSpy = vi.spyOn(runner as any, 'execNonInteractive'); - const logSpy = vi.spyOn(runner as any, 'log').mockImplementation(() => {}); - - queuedSubprocessResults = [ - { stdout: 'first attempt\n', code: 0 }, - { error: new Error('diagnostic exploded') }, - { - stdout: 'second attempt\n', - code: 0, - beforeClose: () => { - writeFileSync(path.join(workspace, 'ready.txt'), 'ok'); - }, - }, - ]; - - const result = await runner.execute( - makeConfig({ - workspace, - verification: { - type: 'custom', - value: verificationCommand(), - diagnosticAgent: 'diag', - }, - }), - 'default' - ); - - expect(result.status, result.error).toBe('completed'); - expect(execSpy).toHaveBeenCalledTimes(3); - - const retryTask = taskFromExecCall(execSpy, 2); - expect(retryTask).toContain('[VERIFICATION FAILED]'); - expect(retryTask).toContain('compile error: missing semicolon'); - expect(retryTask).not.toContain('Diagnostic analysis:'); - expect(logSpy).toHaveBeenCalledWith(expect.stringContaining('Diagnostic failed')); - }); - - it('diagnosticAgent name validated against agent list', () => { - expect(() => { - workflow('traceback') - .agent('worker', { cli: 'claude' }) - .step('implement', { - agent: 'worker', - task: 'Implement the requested change', - retries: 1, - verification: { - type: 'custom', - value: 'exit 1', - diagnosticAgent: 'nonexistent', - }, - }) - .toConfig(); - }).toThrow('Step "implement" references unknown diagnosticAgent "nonexistent"'); - }); - - it('diagnostic token usage recorded in budget tracker', async () => { - const workspace = createWorkspace(['worker', 'diag']); - const runner = makeRunner(workspace); - - queuedSubprocessResults = [ - { stdout: 'first attempt\n', code: 0 }, - { stdout: 'The issue is in file X, line Y: missing semicolon\n', code: 0 }, - { - stdout: 'second attempt\n', - code: 0, - beforeClose: () => { - writeFileSync(path.join(workspace, 'ready.txt'), 'ok'); - }, - }, - ]; - queuedCollectorResults = [null, makeReport({ input: 40, output: 10 }), null]; - - const result = await runner.execute( - makeConfig({ - workspace, - swarm: { tokenBudget: 1_000 }, - verification: { - type: 'custom', - value: verificationCommand(), - diagnosticAgent: 'diag', - }, - }), - 'default' - ); - - const tracker = getBudgetTracker(runner); - - expect(result.status, result.error).toBe('completed'); - expect(mockCollectCliSession).toHaveBeenCalledTimes(3); - expect(tracker?.getTotalUsage()).toEqual({ - input: 40, - output: 10, - cacheRead: 0, - total: 50, - }); - }); - - it('no retries configured with diagnosticAgent logs warning', () => { - const warnSpy = vi.spyOn(console, 'warn').mockImplementation(() => {}); - - workflow('traceback') - .agent('worker', { cli: 'claude' }) - .agent('diag', { cli: 'claude' }) - .step('implement', { - agent: 'worker', - task: 'Implement the requested change', - retries: 0, - verification: { - type: 'custom', - value: 'exit 1', - diagnosticAgent: 'diag', - }, - }) - .toConfig(); - - expect(warnSpy).toHaveBeenCalledWith( - 'Step "implement": diagnosticAgent configured but no retries — diagnostic will never run' - ); - }); -}); diff --git a/packages/sdk/src/workflows/__tests__/verification.test.ts b/packages/sdk/src/workflows/__tests__/verification.test.ts deleted file mode 100644 index 935451afb..000000000 --- a/packages/sdk/src/workflows/__tests__/verification.test.ts +++ /dev/null @@ -1,381 +0,0 @@ -import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'; -import fs from 'node:fs'; -import path from 'node:path'; -import os from 'node:os'; - -// The module under test — does not exist yet (red phase). -import { - runVerification, - stripInjectedTaskEcho, - checkOutputContains, - checkFileExists, - checkCustom, - execCustomVerification, - findPrUrl, - type VerificationCheck, - type VerificationResult, - type VerificationOptions, - WorkflowCompletionError, -} from '../verification.js'; - -// ── helpers ─────────────────────────────────────────────────────────────────── - -const noopSideEffects = { - recordStepToolSideEffect: vi.fn(), - getOrCreateStepEvidenceRecord: vi.fn(() => ({ - evidence: { coordinationSignals: [] }, - })), - log: vi.fn(), -}; - -function run( - check: VerificationCheck, - output: string, - stepName = 'test-step', - options?: VerificationOptions -): VerificationResult { - return runVerification(check, output, stepName, undefined, options, noopSideEffects); -} - -// ── tests ───────────────────────────────────────────────────────────────────── - -describe('verification logic', () => { - beforeEach(() => { - vi.clearAllMocks(); - }); - - // 1. exit_code — pass on exit 0 (implicit success) - describe('exit_code', () => { - it('should pass when agent exited successfully (exit 0 implicit)', () => { - const result = run({ type: 'exit_code', value: '0' }, 'some output'); - expect(result.passed).toBe(true); - expect(result.completionReason).toBe('completed_verified'); - }); - - it('should still pass for non-zero value (exit_code is implicitly satisfied)', () => { - // per existing logic, exit_code case is a no-op — always passes if we reach it - const result = run({ type: 'exit_code', value: '1' }, 'output'); - expect(result.passed).toBe(true); - }); - }); - - // 2. output_contains — case-sensitive substring match - describe('output_contains', () => { - it('should pass when output contains the token', () => { - const result = run( - { type: 'output_contains', value: 'BUILD_SUCCESS' }, - 'Starting build...\nBUILD_SUCCESS\nDone.' - ); - expect(result.passed).toBe(true); - expect(result.completionReason).toBe('completed_verified'); - }); - - it('should fail when output does not contain the token', () => { - expect(() => run({ type: 'output_contains', value: 'BUILD_SUCCESS' }, 'build failed')).toThrow( - WorkflowCompletionError - ); - }); - - it('should be case-sensitive', () => { - expect(() => run({ type: 'output_contains', value: 'BUILD_SUCCESS' }, 'build_success')).toThrow( - WorkflowCompletionError - ); - }); - - it('should return failure result instead of throwing when allowFailure is set', () => { - const result = run({ type: 'output_contains', value: 'MISSING' }, 'no match here', 'test-step', { - allowFailure: true, - }); - expect(result.passed).toBe(false); - expect(result.completionReason).toBe('failed_verification'); - expect(result.error).toContain('MISSING'); - }); - }); - - // 3. file_exists — checks file presence at path - describe('file_exists', () => { - let tmpDir: string; - let tmpFile: string; - - beforeEach(() => { - tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'verify-test-')); - tmpFile = path.join(tmpDir, 'artifact.txt'); - }); - - afterEach(() => { - fs.rmSync(tmpDir, { recursive: true, force: true }); - }); - - it('should pass when the file exists', () => { - fs.writeFileSync(tmpFile, 'content'); - // file_exists resolves relative to cwd; pass absolute path as value - const result = run({ type: 'file_exists', value: tmpFile }, ''); - expect(result.passed).toBe(true); - }); - - it('should fail when the file does not exist', () => { - expect(() => run({ type: 'file_exists', value: path.join(tmpDir, 'nope.txt') }, '')).toThrow( - WorkflowCompletionError - ); - }); - }); - - // 4. custom verification — shell command execution - describe('custom', () => { - it('should pass when shell command exits 0', () => { - const result = run({ type: 'custom', value: 'true' }, 'output'); - expect(result.passed).toBe(true); - expect(result.completionReason).toBe('completed_verified'); - }); - - it('should fail when shell command exits non-zero', () => { - expect(() => run({ type: 'custom', value: 'false' }, 'output')).toThrow(WorkflowCompletionError); - }); - - it('should return failure with allowFailure', () => { - const result = run({ type: 'custom', value: 'false' }, 'output', 'test-step', { - allowFailure: true, - }); - expect(result.passed).toBe(false); - expect(result.completionReason).toBe('failed_verification'); - }); - - it('should preserve legacy no-op behavior when no command is provided', () => { - const result = run({ type: 'custom', value: '' }, 'output'); - expect(result).toEqual({ passed: false }); - }); - - it('should include command output in the failure message', () => { - const result = run( - { type: 'custom', value: 'printf "compile failed" >&2; exit 1' }, - 'output', - 'test-step', - { - allowFailure: true, - } - ); - expect(result.error).toContain('custom check "printf "compile failed" >&2; exit 1" failed'); - expect(result.error).toContain('compile failed'); - }); - }); - - describe('execCustomVerification', () => { - it('should return passed true for exit-0 command', () => { - expect(execCustomVerification('true', process.cwd())).toEqual({ passed: true, output: '' }); - }); - - it('should return passed false for exit-1 command', () => { - const result = execCustomVerification('false', process.cwd()); - expect(result.passed).toBe(false); - expect(result.output.length).toBeGreaterThanOrEqual(0); - }); - - it('should capture stdout from command', () => { - const result = execCustomVerification('echo hello', process.cwd()); - expect(result.passed).toBe(true); - expect(result.output).toBe('hello'); - }); - - it('should capture stderr from a failing command', () => { - const result = execCustomVerification('printf "boom" >&2; exit 1', process.cwd()); - expect(result.passed).toBe(false); - expect(result.output).toContain('boom'); - }); - }); - - // 4b. checkCustom unit tests - describe('checkCustom', () => { - it('should return passed true for exit-0 command', () => { - expect(checkCustom('true', 'any')).toEqual({ passed: true, stdout: '' }); - }); - - it('should return passed false for exit-1 command', () => { - const result = checkCustom('false', 'any'); - expect(result.passed).toBe(false); - expect(result.error).toBeDefined(); - }); - - it('should capture stdout from command', () => { - const result = checkCustom('echo hello', 'any'); - expect(result.passed).toBe(true); - expect(result.stdout).toBe('hello'); - }); - - it('should handle regex matching', () => { - expect(checkCustom('regex:^foo', 'foobar')).toEqual({ passed: true }); - expect(checkCustom('regex:^foo', 'barfoo').passed).toBe(false); - }); - - it('should handle invalid regex gracefully', () => { - const result = checkCustom('regex:[', 'any'); - expect(result.passed).toBe(false); - expect(result.error).toContain('invalid regex'); - }); - }); - - // 5. Invalid/unknown verification type — falls through gracefully - describe('unknown type', () => { - it('should fall through and pass for unknown verification types', () => { - const result = run({ type: 'nonexistent' as VerificationCheck['type'], value: 'x' }, 'output'); - // falls through the switch with no match, reaches success path - expect(result.passed).toBe(true); - }); - }); - - // 6. completionMarkerFound option - describe('completionMarkerFound option', () => { - it('should log legacy marker message when completionMarkerFound is false', () => { - const result = run({ type: 'exit_code', value: '0' }, 'output', 'my-step', { - completionMarkerFound: false, - }); - expect(result.passed).toBe(true); - expect(noopSideEffects.log).toHaveBeenCalledWith( - expect.stringContaining('without legacy STEP_COMPLETE marker') - ); - }); - }); - - // 7. stripInjectedTaskEcho - describe('stripInjectedTaskEcho', () => { - it('should return output unchanged when no injectedTaskText', () => { - expect(stripInjectedTaskEcho('hello world')).toBe('hello world'); - expect(stripInjectedTaskEcho('hello world', undefined)).toBe('hello world'); - }); - - it('should strip the injected task text from output', () => { - const task = 'Please run the build'; - const output = 'Starting...\nPlease run the build\nBUILD_SUCCESS'; - expect(stripInjectedTaskEcho(output, task)).toBe('Starting...\n\nBUILD_SUCCESS'); - }); - - it('should handle CRLF normalization', () => { - const task = 'Run task\r\nwith newlines'; - const output = 'prefix Run task\nwith newlines suffix'; - expect(stripInjectedTaskEcho(output, task)).toBe('prefix suffix'); - }); - - it('should handle LF to CRLF normalization', () => { - const task = 'Run task\nwith newlines'; - const output = 'prefix Run task\r\nwith newlines suffix'; - expect(stripInjectedTaskEcho(output, task)).toBe('prefix suffix'); - }); - - it('should return output unchanged when task text is not found', () => { - expect(stripInjectedTaskEcho('output text', 'not present')).toBe('output text'); - }); - - it('should handle empty injected task text', () => { - expect(stripInjectedTaskEcho('output', '')).toBe('output'); - }); - }); - - // 8. checkOutputContains with injectedTaskText - describe('checkOutputContains with injectedTaskText', () => { - it('should not match token that only appears in injected task echo', () => { - const task = 'Verify BUILD_SUCCESS appears'; - const output = 'Verify BUILD_SUCCESS appears\nDone.'; - expect(checkOutputContains(output, 'BUILD_SUCCESS', task)).toBe(false); - }); - - it('should match token that appears outside injected task echo', () => { - const task = 'Run the build'; - const output = 'Run the build\nBUILD_SUCCESS'; - expect(checkOutputContains(output, 'BUILD_SUCCESS', task)).toBe(true); - }); - - it('should return false for empty token', () => { - expect(checkOutputContains('anything', '', undefined)).toBe(false); - }); - }); - - // 9. checkFileExists path traversal protection - describe('checkFileExists path traversal', () => { - let tmpDir: string; - - beforeEach(() => { - tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'verify-traversal-')); - }); - - afterEach(() => { - fs.rmSync(tmpDir, { recursive: true, force: true }); - }); - - it('should reject path traversal with ../', () => { - expect(checkFileExists('../../etc/passwd', tmpDir)).toBe(false); - }); - - it('should reject relative path with .. that resolves outside cwd', () => { - expect(checkFileExists('../../../etc/passwd', tmpDir)).toBe(false); - }); - - it('should allow files within cwd', () => { - const file = path.join(tmpDir, 'ok.txt'); - fs.writeFileSync(file, 'ok'); - expect(checkFileExists('ok.txt', tmpDir)).toBe(true); - }); - }); - - describe('pr_url verification', () => { - it('passes when a github PR URL appears in the worker output', () => { - const result = run( - { type: 'pr_url', value: '' }, - 'shipped: https://github.com/AgentWorkforce/cloud/pull/606 ready for review' - ); - expect(result.passed).toBe(true); - expect(result.completionReason).toBe('completed_verified'); - }); - - it('fails with a WorkflowCompletionError when no PR URL is present', () => { - expect(() => - run( - { type: 'pr_url', value: '' }, - 'All tests pass and the build is clean.\nfiles modified: foo.ts, bar.ts' - ) - ).toThrow(WorkflowCompletionError); - }); - - it('rejects PR URLs for a different repository when a qualifier is provided', () => { - expect(() => - run( - { type: 'pr_url', value: 'AgentWorkforce/relaycast' }, - 'Migration done: https://github.com/AgentWorkforce/cloud/pull/606' - ) - ).toThrow(WorkflowCompletionError); - }); - - it('accepts a PR URL whose repo matches the qualifier case-insensitively', () => { - const result = run( - { type: 'pr_url', value: 'agentworkforce/relaycast' }, - 'See https://github.com/AgentWorkforce/relaycast/pull/128 for the SDK change.' - ); - expect(result.passed).toBe(true); - }); - }); - - describe('findPrUrl', () => { - it('returns the first matching URL when no qualifier is given', () => { - const url = findPrUrl( - 'first https://github.com/foo/bar/pull/1 second https://github.com/foo/bar/pull/2' - ); - expect(url).toBe('https://github.com/foo/bar/pull/1'); - }); - - it('filters by repository qualifier', () => { - const url = findPrUrl( - 'wrong https://github.com/foo/bar/pull/1 right https://github.com/baz/qux/pull/9', - 'baz/qux' - ); - expect(url).toBe('https://github.com/baz/qux/pull/9'); - }); - - it('returns null when no PR URL is present', () => { - expect(findPrUrl('OWNER_DECISION: COMPLETE\nfiles modified: foo.ts')).toBeNull(); - }); - - it('ignores PR URLs echoed inside the injected task text', () => { - const injected = 'Reference: https://github.com/foo/bar/pull/42'; - const output = injected + '\nWorker said: tests pass, no PR opened, all good'; - expect(findPrUrl(output, undefined, injected)).toBeNull(); - }); - }); -}); diff --git a/packages/sdk/src/workflows/__tests__/workflow-reliability-contract.test.ts b/packages/sdk/src/workflows/__tests__/workflow-reliability-contract.test.ts deleted file mode 100644 index 276027387..000000000 --- a/packages/sdk/src/workflows/__tests__/workflow-reliability-contract.test.ts +++ /dev/null @@ -1,637 +0,0 @@ -import { afterEach, describe, expect, it, vi } from 'vitest'; -import { mkdirSync, mkdtempSync, rmSync, writeFileSync } from 'node:fs'; -import os from 'node:os'; -import path from 'node:path'; - -import { workflow } from '../builder.js'; -import { WorkflowRunner, type WorkflowDb } from '../runner.js'; -import type { RelayYamlConfig, WorkflowRunRow, WorkflowStepRow } from '../types.js'; - -afterEach(() => { - vi.unstubAllGlobals(); -}); - -function makeDb(): WorkflowDb { - const runs = new Map(); - const steps = new Map(); - - return { - insertRun: vi.fn(async (run: WorkflowRunRow) => { - runs.set(run.id, { ...run }); - }), - updateRun: vi.fn(async (id: string, patch: Partial) => { - const existing = runs.get(id); - if (existing) runs.set(id, { ...existing, ...patch }); - }), - getRun: vi.fn(async (id: string) => { - const run = runs.get(id); - return run ? { ...run } : null; - }), - insertStep: vi.fn(async (step: WorkflowStepRow) => { - steps.set(step.id, { ...step }); - }), - updateStep: vi.fn(async (id: string, patch: Partial) => { - const existing = steps.get(id); - if (existing) steps.set(id, { ...existing, ...patch }); - }), - getStepsByRunId: vi.fn(async (runId: string) => - [...steps.values()].filter((step) => step.runId === runId).map((step) => ({ ...step })) - ), - }; -} - -function baseConfig(overrides: Partial = {}): RelayYamlConfig { - return { - version: '1', - name: 'workflow-reliability-contract', - swarm: { pattern: 'dag' }, - agents: [{ name: 'fixer', cli: 'claude', role: 'implementation engineer', interactive: false }], - workflows: [ - { - name: 'default', - steps: [ - { - name: 'verify', - type: 'deterministic', - command: 'verify', - captureOutput: true, - }, - ], - }, - ], - trajectories: false, - ...overrides, - }; -} - -describe('workflow reliability contract', () => { - it('makes SDK builder workflows repairable by default', () => { - const config = workflow('default-reliable') - .agent('fixer', { cli: 'claude', role: 'implementation engineer' }) - .step('verify', { type: 'deterministic', command: 'npm test' }) - .toConfig(); - - expect(config.errorHandling).toMatchObject({ - strategy: 'retry', - maxRetries: 2, - retryDelayMs: 1000, - repairRetries: 2, - }); - }); - - it('offers reliable and repairable presets for workflow authors', () => { - const reliable = workflow('reliable') - .agent('fixer', { cli: 'claude', role: 'implementation engineer' }) - .step('verify', { type: 'deterministic', command: 'npm test' }) - .reliable({ repairAgent: 'fixer', repairRetries: 3 }) - .toConfig(); - const repairable = workflow('repairable') - .agent('fixer', { cli: 'claude', role: 'implementation engineer' }) - .step('verify', { type: 'deterministic', command: 'npm test' }) - .repairable({ maxRetries: 4 }) - .toConfig(); - - expect(reliable.errorHandling).toMatchObject({ - strategy: 'retry', - maxRetries: 3, - repairAgent: 'fixer', - repairRetries: 3, - }); - expect(repairable.errorHandling).toMatchObject({ - strategy: 'retry', - maxRetries: 4, - repairRetries: 4, - }); - }); - - it('applies repair-aware defaults to raw runner configs with agents', async () => { - const executeDeterministicStep = vi - .fn() - .mockResolvedValueOnce({ output: 'missing artifact', exitCode: 1 }) - .mockResolvedValueOnce({ output: 'artifact exists', exitCode: 0 }); - const executeAgentStep = vi.fn(async () => 'created artifact'); - const runner = new WorkflowRunner({ - db: makeDb(), - workspaceId: 'ws-test', - cwd: process.cwd(), - executor: { executeDeterministicStep, executeAgentStep }, - }); - - const run = await runner.execute(baseConfig(), 'default'); - - expect(run.status, run.error).toBe('completed'); - expect(executeAgentStep).toHaveBeenCalledTimes(1); - expect(executeDeterministicStep).toHaveBeenCalledTimes(2); - }); - - it('routes repairable deterministic failures through a repair agent before retrying', async () => { - const executeDeterministicStep = vi - .fn() - .mockResolvedValueOnce({ output: 'missing generated artifact', exitCode: 1 }) - .mockResolvedValueOnce({ output: 'artifact exists', exitCode: 0 }); - const executeAgentStep = vi.fn(async () => 'created generated artifact'); - const runner = new WorkflowRunner({ - db: makeDb(), - workspaceId: 'ws-test', - cwd: process.cwd(), - executor: { executeDeterministicStep, executeAgentStep }, - }); - - const run = await runner.execute( - baseConfig({ - errorHandling: { strategy: 'retry', repairRetries: 1, retryDelayMs: 1, repairAgent: 'fixer' }, - }), - 'default' - ); - - expect(run.status, run.error).toBe('completed'); - expect(executeAgentStep).toHaveBeenCalledTimes(1); - expect((executeAgentStep as any).mock.calls[0][2]).toContain('A deterministic workflow gate failed'); - expect(executeDeterministicStep).toHaveBeenCalledTimes(2); - }); - - it('still retries the deterministic gate when the repair agent attempt throws', async () => { - const executeDeterministicStep = vi - .fn() - .mockResolvedValueOnce({ output: 'transient failure', exitCode: 1 }) - .mockResolvedValueOnce({ output: 'passed after retry', exitCode: 0 }); - const executeAgentStep = vi.fn(async () => { - throw new Error('repair model unavailable'); - }); - const runner = new WorkflowRunner({ - db: makeDb(), - workspaceId: 'ws-test', - cwd: process.cwd(), - executor: { executeDeterministicStep, executeAgentStep }, - }); - - const run = await runner.execute( - baseConfig({ - errorHandling: { strategy: 'retry', repairRetries: 1, retryDelayMs: 1, repairAgent: 'fixer' }, - }), - 'default' - ); - - expect(run.status, run.error).toBe('completed'); - expect(executeAgentStep).toHaveBeenCalledTimes(1); - expect(executeDeterministicStep).toHaveBeenCalledTimes(2); - }); - - it('fails only after the deterministic repair retry budget is exhausted', async () => { - const executeDeterministicStep = vi.fn(async () => ({ output: 'still broken', exitCode: 1 })); - const executeAgentStep = vi.fn(async () => 'attempted repair'); - const runner = new WorkflowRunner({ - db: makeDb(), - workspaceId: 'ws-test', - cwd: process.cwd(), - executor: { executeDeterministicStep, executeAgentStep }, - }); - - const run = await runner.execute( - baseConfig({ - errorHandling: { strategy: 'retry', repairRetries: 2, retryDelayMs: 1, repairAgent: 'fixer' }, - }), - 'default' - ); - - expect(run.status).toBe('failed'); - expect(run.error).toContain('verify'); - expect(executeAgentStep).toHaveBeenCalledTimes(2); - expect(executeDeterministicStep).toHaveBeenCalledTimes(3); - }); - - it('keeps soft deterministic checks non-terminal so a later agent step can fix them', async () => { - const executeDeterministicStep = vi.fn(async () => ({ output: 'typecheck failed', exitCode: 1 })); - const executeAgentStep = vi.fn(async () => 'fixed typecheck'); - const runner = new WorkflowRunner({ - db: makeDb(), - workspaceId: 'ws-test', - cwd: process.cwd(), - executor: { executeDeterministicStep, executeAgentStep }, - }); - - const run = await runner.execute( - baseConfig({ - workflows: [ - { - name: 'default', - steps: [ - { - name: 'soft-validation', - type: 'deterministic', - command: 'npm run typecheck', - captureOutput: true, - failOnError: false, - }, - { - name: 'fix-validation', - agent: 'fixer', - task: 'Fix validation using {{steps.soft-validation.output}}', - dependsOn: ['soft-validation'], - }, - ], - }, - ], - }), - 'default' - ); - - expect(run.status, run.error).toBe('completed'); - expect(executeAgentStep).toHaveBeenCalledTimes(1); - expect((executeAgentStep as any).mock.calls[0][2]).toContain('typecheck failed'); - }); - - it('treats final hard validation as repairable before terminal failure', async () => { - const executeDeterministicStep = vi - .fn() - .mockResolvedValueOnce({ output: 'final typecheck failed', exitCode: 1 }) - .mockResolvedValueOnce({ output: 'final validation passed', exitCode: 0 }); - const executeAgentStep = vi.fn(async () => 'fixed final validation'); - const runner = new WorkflowRunner({ - db: makeDb(), - workspaceId: 'ws-test', - cwd: process.cwd(), - executor: { executeDeterministicStep, executeAgentStep }, - }); - - const run = await runner.execute( - baseConfig({ - errorHandling: { strategy: 'retry', repairRetries: 1, retryDelayMs: 1, repairAgent: 'fixer' }, - workflows: [ - { - name: 'default', - steps: [ - { - name: 'final-hard-validation', - type: 'deterministic', - command: 'npm run typecheck && npm test', - captureOutput: true, - failOnError: true, - }, - ], - }, - ], - }), - 'default' - ); - - expect(run.status, run.error).toBe('completed'); - expect(executeAgentStep).toHaveBeenCalledTimes(1); - expect((executeAgentStep as any).mock.calls[0][2]).toContain('final-hard-validation'); - expect(executeDeterministicStep).toHaveBeenCalledTimes(2); - }); - - it('keeps sibling branches independent when one branch captures a soft failure for repair', async () => { - const executeDeterministicStep = vi.fn(async (_step, command: string) => { - if (command === 'branch-a-soft-check') return { output: 'branch A needs repair', exitCode: 1 }; - return { output: `${command} ok`, exitCode: 0 }; - }); - const executeAgentStep = vi.fn(async () => 'merged branch evidence and fixed branch A'); - const runner = new WorkflowRunner({ - db: makeDb(), - workspaceId: 'ws-test', - cwd: process.cwd(), - executor: { executeDeterministicStep, executeAgentStep }, - }); - - const run = await runner.execute( - baseConfig({ - swarm: { pattern: 'fan-out' }, - workflows: [ - { - name: 'default', - steps: [ - { - name: 'branch-a-validation', - type: 'deterministic', - command: 'branch-a-soft-check', - captureOutput: true, - failOnError: false, - }, - { - name: 'branch-b-validation', - type: 'deterministic', - command: 'branch-b-check', - captureOutput: true, - failOnError: true, - }, - { - name: 'merge-and-fix', - agent: 'fixer', - task: 'Use {{steps.branch-a-validation.output}} and {{steps.branch-b-validation.output}}.', - dependsOn: ['branch-a-validation', 'branch-b-validation'], - }, - ], - }, - ], - }), - 'default' - ); - - expect(run.status, run.error).toBe('completed'); - expect(executeDeterministicStep).toHaveBeenCalledTimes(2); - expect(executeAgentStep).toHaveBeenCalledTimes(1); - expect((executeAgentStep as any).mock.calls[0][2]).toContain('branch A needs repair'); - expect((executeAgentStep as any).mock.calls[0][2]).toContain('branch-b-check ok'); - }); - - it('uses the best available workflow agent when no explicit repairAgent is configured', async () => { - const executeDeterministicStep = vi - .fn() - .mockResolvedValueOnce({ output: 'needs repair', exitCode: 1 }) - .mockResolvedValueOnce({ output: 'fixed', exitCode: 0 }); - const executeAgentStep = vi.fn(async () => 'fixed by fallback agent'); - const runner = new WorkflowRunner({ - db: makeDb(), - workspaceId: 'ws-test', - cwd: process.cwd(), - executor: { executeDeterministicStep, executeAgentStep }, - }); - - const run = await runner.execute( - baseConfig({ - errorHandling: { strategy: 'retry', repairRetries: 1, retryDelayMs: 1 }, - agents: [ - { name: 'reviewer', cli: 'claude', role: 'reviewer' }, - { name: 'implementer', cli: 'claude', role: 'implementation engineer', interactive: false }, - ], - }), - 'default' - ); - - expect(run.status, run.error).toBe('completed'); - expect((executeAgentStep as any).mock.calls[0][1]).toMatchObject({ name: 'implementer' }); - }); - - it('falls back to a suitable workflow agent when the configured repairAgent is invalid', async () => { - const executeDeterministicStep = vi - .fn() - .mockResolvedValueOnce({ output: 'needs repair', exitCode: 1 }) - .mockResolvedValueOnce({ output: 'fixed', exitCode: 0 }); - const executeAgentStep = vi.fn(async () => 'fixed by fallback agent'); - const runner = new WorkflowRunner({ - db: makeDb(), - workspaceId: 'ws-test', - cwd: process.cwd(), - executor: { executeDeterministicStep, executeAgentStep }, - }); - - const run = await runner.execute( - baseConfig({ - errorHandling: { - strategy: 'retry', - repairRetries: 1, - retryDelayMs: 1, - repairAgent: 'missing-repair-agent', - }, - }), - 'default' - ); - - expect(run.status, run.error).toBe('completed'); - expect((executeAgentStep as any).mock.calls[0][1]).toMatchObject({ name: 'fixer' }); - }); - - it('preserves cached step output when resuming from a later repair step', async () => { - const tmpDir = mkdtempSync(path.join(os.tmpdir(), 'relay-reliability-start-from-')); - const previousRunId = 'previous-run-with-soft-validation'; - const outputDir = path.join(tmpDir, '.agent-relay', 'step-outputs', previousRunId); - mkdirSync(outputDir, { recursive: true }); - writeFileSync(path.join(outputDir, 'soft-validation.md'), 'cached typecheck failure'); - - const executeAgentStep = vi.fn(async () => 'fixed cached validation failure'); - const runner = new WorkflowRunner({ - db: makeDb(), - workspaceId: 'ws-test', - cwd: tmpDir, - executor: { executeAgentStep }, - }); - - try { - const run = await runner.execute( - baseConfig({ - workflows: [ - { - name: 'default', - steps: [ - { - name: 'soft-validation', - type: 'deterministic', - command: 'npm run typecheck', - captureOutput: true, - failOnError: false, - }, - { - name: 'fix-validation', - agent: 'fixer', - task: 'Fix this prior output: {{steps.soft-validation.output}}', - dependsOn: ['soft-validation'], - }, - ], - }, - ], - }), - 'default', - undefined, - { startFrom: 'fix-validation', previousRunId } - ); - - expect(run.status, run.error).toBe('completed'); - expect(executeAgentStep).toHaveBeenCalledTimes(1); - expect((executeAgentStep as any).mock.calls[0][2]).toContain('cached typecheck failure'); - } finally { - rmSync(tmpDir, { recursive: true, force: true }); - } - }); - - it('repairs malformed agent artifacts before retrying the agent step', async () => { - const executeAgentStep = vi.fn(async (step) => { - if (step.name.includes('-repair-')) return 'patched artifact instructions'; - if ( - (executeAgentStep as any).mock.calls.filter(([s]: any[]) => s.name === 'write-artifact').length === 1 - ) { - return 'plain prose without required metadata'; - } - return 'artifact complete\nRICKY_MASTER_CHILD_RUN_VERIFIED'; - }); - const runner = new WorkflowRunner({ - db: makeDb(), - workspaceId: 'ws-test', - cwd: process.cwd(), - executor: { executeAgentStep }, - }); - - const run = await runner.execute( - baseConfig({ - workflows: [ - { - name: 'default', - steps: [ - { - name: 'write-artifact', - agent: 'fixer', - task: 'Write a structured workflow artifact.', - verification: { - type: 'output_contains', - value: 'RICKY_MASTER_CHILD_RUN_VERIFIED', - }, - }, - ], - }, - ], - }), - 'default' - ); - - expect(run.status, run.error).toBe('completed'); - expect(executeAgentStep).toHaveBeenCalledTimes(3); - expect((executeAgentStep as any).mock.calls[1][0]).toMatchObject({ name: 'write-artifact-repair-1' }); - expect((executeAgentStep as any).mock.calls[1][2]).toContain('invalid artifact'); - }); - - it('repairs child INVALID_ARTIFACT failures instead of stopping the master at attempt one', async () => { - const executeAgentStep = vi.fn(async (step) => { - if (step.name.includes('-repair-')) return 'repaired child workflow artifact'; - const childAttempts = (executeAgentStep as any).mock.calls.filter( - ([s]: any[]) => s.name === 'run-update-config-2' - ).length; - if (childAttempts === 1) { - return 'Execution: blocked — INVALID_ARTIFACT at final-hard-validation'; - } - return 'Execution: success — run child-fixed\nRICKY_MASTER_CHILD_RUN_VERIFIED'; - }); - const runner = new WorkflowRunner({ - db: makeDb(), - workspaceId: 'ws-test', - cwd: process.cwd(), - executor: { executeAgentStep }, - }); - - const run = await runner.execute( - baseConfig({ - workflows: [ - { - name: 'default', - steps: [ - { - name: 'run-update-config-2', - agent: 'fixer', - task: 'Run the child workflow and return structured evidence.', - verification: { - type: 'output_contains', - value: 'RICKY_MASTER_CHILD_RUN_VERIFIED', - }, - }, - { - name: 'final-signoff', - type: 'deterministic', - command: 'true', - dependsOn: ['run-update-config-2'], - }, - ], - }, - ], - }), - 'default' - ); - - expect(run.status, run.error).toBe('completed'); - expect(executeAgentStep).toHaveBeenCalledTimes(3); - expect((executeAgentStep as any).mock.calls[1][2]).toContain('INVALID_ARTIFACT'); - }); - - it('keeps retrying the failed gate when a repair agent returns an unusable fix', async () => { - const executeDeterministicStep = vi - .fn() - .mockResolvedValueOnce({ output: 'INVALID_ARTIFACT', exitCode: 1 }) - .mockResolvedValueOnce({ output: 'still INVALID_ARTIFACT', exitCode: 1 }) - .mockResolvedValueOnce({ output: 'artifact valid', exitCode: 0 }); - const executeAgentStep = vi - .fn() - .mockResolvedValueOnce('malformed repair response without fenced artifact') - .mockResolvedValueOnce('valid repair response with metadata'); - const runner = new WorkflowRunner({ - db: makeDb(), - workspaceId: 'ws-test', - cwd: process.cwd(), - executor: { executeDeterministicStep, executeAgentStep }, - }); - - const run = await runner.execute( - baseConfig({ - errorHandling: { strategy: 'retry', repairRetries: 2, retryDelayMs: 1, repairAgent: 'fixer' }, - }), - 'default' - ); - - expect(run.status, run.error).toBe('completed'); - expect(executeAgentStep).toHaveBeenCalledTimes(2); - expect(executeDeterministicStep).toHaveBeenCalledTimes(3); - }); - - it('runs supervised api owners without spawning an interactive owner process', async () => { - const fetch = vi.fn(async () => { - return new Response( - JSON.stringify({ - content: [{ type: 'text', text: 'OWNER_DECISION: COMPLETE\nReason: worker output verified' }], - model: 'claude-sonnet-4-20250514', - }), - { status: 200, headers: { 'content-type': 'application/json' } } - ); - }); - vi.stubGlobal('fetch', fetch); - - const runner = new WorkflowRunner({ - db: makeDb(), - workspaceId: 'ws-test', - cwd: process.cwd(), - envSecrets: { ANTHROPIC_API_KEY: 'test-api-key' }, - }); - const spawnAndWait = vi.fn(async (agent: any, _step: any, _timeoutMs: any, options: any) => { - options?.onSpawned?.({ actualName: agent.name, agent: { release: async () => undefined } }); - if (agent.name === 'worker') { - return { output: 'DONE', exitCode: 0, promptTaskText: 'worker task' }; - } - throw new Error('api owner should not use spawnAndWait'); - }); - (runner as any).spawnAndWait = spawnAndWait; - - const result = await (runner as any).executeSupervisedAgentStep( - { - name: 'supervised-api-owner', - agent: 'worker', - task: 'produce done', - verification: { type: 'output_contains', value: 'DONE' }, - }, - { - specialist: { name: 'worker', cli: 'claude', role: 'worker' }, - owner: { name: 'owner', cli: 'api', role: 'owner' }, - }, - 'produce done' - ); - - expect(result).toMatchObject({ - specialistOutput: 'DONE', - completionReason: 'completed_by_owner_decision', - }); - expect(fetch).toHaveBeenCalledTimes(1); - expect(spawnAndWait).toHaveBeenCalledTimes(1); - }); - - it('does not run repair agents for fail-fast workflows even when agents are present', async () => { - const executeDeterministicStep = vi.fn(async () => ({ output: 'hard failure', exitCode: 1 })); - const executeAgentStep = vi.fn(async () => 'unexpected repair'); - const runner = new WorkflowRunner({ - db: makeDb(), - workspaceId: 'ws-test', - cwd: process.cwd(), - executor: { executeDeterministicStep, executeAgentStep }, - }); - - const run = await runner.execute(baseConfig({ errorHandling: { strategy: 'fail-fast' } }), 'default'); - - expect(run.status).toBe('failed'); - expect(executeAgentStep).not.toHaveBeenCalled(); - expect(executeDeterministicStep).toHaveBeenCalledTimes(1); - }); -}); diff --git a/packages/sdk/src/workflows/__tests__/workflow-reliability-e2e.test.ts b/packages/sdk/src/workflows/__tests__/workflow-reliability-e2e.test.ts deleted file mode 100644 index a9ef17163..000000000 --- a/packages/sdk/src/workflows/__tests__/workflow-reliability-e2e.test.ts +++ /dev/null @@ -1,248 +0,0 @@ -import { describe, expect, it } from 'vitest'; -import { execSync } from 'node:child_process'; -import { mkdirSync, mkdtempSync, rmSync, writeFileSync } from 'node:fs'; -import os from 'node:os'; -import path from 'node:path'; - -import { WorkflowRunner } from '../runner.js'; -import type { AgentDefinition, RelayYamlConfig, WorkflowStep } from '../types.js'; - -const CHECK_MARKER = - "node -e \"const fs=require('fs');const v=fs.readFileSync('marker.txt','utf8').trim();if(v!=='fixed'){console.log('marker='+v);process.exit(1)}console.log('ok')\""; - -function baseConfig( - name: string, - pattern: RelayYamlConfig['swarm']['pattern'], - steps: NonNullable[number]['steps'] -): RelayYamlConfig { - return { - version: '1', - name, - swarm: { pattern }, - agents: [ - { - name: 'fixer', - cli: 'claude', - role: 'implementation engineer', - interactive: false, - }, - ], - workflows: [{ name: 'default', steps }], - trajectories: false, - }; -} - -function makeWorkspace(): string { - const cwd = mkdtempSync(path.join(os.tmpdir(), 'relay-workflow-reliability-e2e-')); - writeFileSync(path.join(cwd, 'marker.txt'), 'broken\n'); - return cwd; -} - -async function runReliabilityWorkflow(config: RelayYamlConfig, cwd = makeWorkspace()) { - const callsByStep = new Map(); - const executeAgentStep = async ( - step: WorkflowStep, - _agent: AgentDefinition, - resolvedTask: string - ): Promise => { - const count = (callsByStep.get(step.name) ?? 0) + 1; - callsByStep.set(step.name, count); - - if (step.name.includes('-repair-')) { - writeFileSync(path.join(step.cwd ?? cwd, 'marker.txt'), 'fixed\n'); - return `repair complete for ${step.name}`; - } - - if (/invalid[- ]artifact/i.test(step.name) && count === 1) { - return 'Execution: blocked — INVALID_ARTIFACT at final-hard-validation'; - } - - if (/child/i.test(resolvedTask) && count === 1) { - return 'Execution: blocked — INVALID_ARTIFACT at final-hard-validation'; - } - - return `Execution: success\nRICKY_MASTER_CHILD_RUN_VERIFIED\n${resolvedTask.slice(0, 80)}`; - }; - - const runner = new WorkflowRunner({ - workspaceId: 'ws-e2e', - cwd, - executor: { executeAgentStep }, - }); - - try { - const run = await runner.execute(config, 'default'); - return { run, callsByStep }; - } finally { - rmSync(cwd, { recursive: true, force: true }); - } -} - -describe('workflow reliability e2e shapes', () => { - it('repairs a failing deterministic gate in a pipeline workflow', async () => { - const { run, callsByStep } = await runReliabilityWorkflow( - baseConfig('reliable-pipeline', 'pipeline', [ - { name: 'prepare', agent: 'fixer', task: 'Prepare inputs.' }, - { - name: 'verify', - type: 'deterministic', - command: CHECK_MARKER, - dependsOn: ['prepare'], - captureOutput: true, - }, - ]) - ); - - expect(run.status, run.error).toBe('completed'); - expect(callsByStep.has('verify-repair-1')).toBe(true); - }); - - it('repairs a failing deterministic gate in a DAG workflow', async () => { - const { run, callsByStep } = await runReliabilityWorkflow( - baseConfig('reliable-dag', 'dag', [ - { name: 'backend', agent: 'fixer', task: 'Prepare backend evidence.' }, - { name: 'frontend', agent: 'fixer', task: 'Prepare frontend evidence.' }, - { - name: 'integrated-validation', - type: 'deterministic', - command: CHECK_MARKER, - dependsOn: ['backend', 'frontend'], - captureOutput: true, - }, - ]) - ); - - expect(run.status, run.error).toBe('completed'); - expect(callsByStep.has('integrated-validation-repair-1')).toBe(true); - }); - - it('keeps fan-out siblings isolated while repairing the failed branch gate', async () => { - const { run, callsByStep } = await runReliabilityWorkflow( - baseConfig('reliable-fan-out', 'fan-out', [ - { - name: 'branch-a-validation', - type: 'deterministic', - command: CHECK_MARKER, - captureOutput: true, - }, - { - name: 'branch-b-validation', - type: 'deterministic', - command: 'node -e "console.log(\'branch-b-ok\')"', - captureOutput: true, - }, - { - name: 'merge', - agent: 'fixer', - task: 'Merge {{steps.branch-a-validation.output}} and {{steps.branch-b-validation.output}}.', - dependsOn: ['branch-a-validation', 'branch-b-validation'], - }, - ]) - ); - - expect(run.status, run.error).toBe('completed'); - expect(callsByStep.has('branch-a-validation-repair-1')).toBe(true); - expect(callsByStep.has('branch-b-validation-repair-1')).toBe(false); - }); - - it('repairs child workflow INVALID_ARTIFACT output before master final validation', async () => { - const { run, callsByStep } = await runReliabilityWorkflow( - baseConfig('reliable-master-child', 'hierarchical', [ - { - name: 'run-child-workflow', - agent: 'fixer', - task: 'Run child workflow and return RICKY_MASTER_CHILD_RUN_VERIFIED.', - verification: { - type: 'output_contains', - value: 'RICKY_MASTER_CHILD_RUN_VERIFIED', - }, - }, - { - name: 'master-final-validation', - type: 'deterministic', - command: CHECK_MARKER, - dependsOn: ['run-child-workflow'], - captureOutput: true, - }, - ]) - ); - - expect(run.status, run.error).toBe('completed'); - expect(callsByStep.has('run-child-workflow-repair-1')).toBe(true); - expect(callsByStep.has('master-final-validation-repair-1')).toBe(false); - }); - - it('repairs a deterministic-only workflow with a configured repair agent', async () => { - const { run, callsByStep } = await runReliabilityWorkflow( - baseConfig('reliable-deterministic-only', 'pipeline', [ - { - name: 'verify-only', - type: 'deterministic', - command: CHECK_MARKER, - captureOutput: true, - }, - ]) - ); - - expect(run.status, run.error).toBe('completed'); - expect(callsByStep.has('verify-only-repair-1')).toBe(true); - }); - - it('repairs agent artifact retries and then passes deterministic validation', async () => { - const { run, callsByStep } = await runReliabilityWorkflow( - baseConfig('reliable-agent-plus-gates', 'pipeline', [ - { - name: 'invalid-artifact-author', - agent: 'fixer', - task: 'Produce structured artifact metadata.', - verification: { - type: 'output_contains', - value: 'RICKY_MASTER_CHILD_RUN_VERIFIED', - }, - }, - { - name: 'verify-artifact', - type: 'deterministic', - command: CHECK_MARKER, - dependsOn: ['invalid-artifact-author'], - captureOutput: true, - }, - ]) - ); - - expect(run.status, run.error).toBe('completed'); - expect(callsByStep.has('invalid-artifact-author-repair-1')).toBe(true); - expect(callsByStep.has('verify-artifact-repair-1')).toBe(false); - }); - - it('repairs validation inside a git worktree-backed workflow', async () => { - const cwd = makeWorkspace(); - execSync('git init -q', { cwd }); - execSync('git config user.email test@example.com', { cwd }); - execSync('git config user.name "Relay Test"', { cwd }); - execSync('git add marker.txt && git commit -q -m init', { cwd }); - - const { run, callsByStep } = await runReliabilityWorkflow( - baseConfig('reliable-worktree', 'pipeline', [ - { - name: 'make-worktree', - type: 'worktree', - branch: 'reliability-worktree-test', - path: 'child-worktree', - }, - { - name: 'verify-in-worktree', - type: 'deterministic', - command: CHECK_MARKER, - cwd: 'child-worktree', - dependsOn: ['make-worktree'], - captureOutput: true, - }, - ]), - cwd - ); - - expect(run.status, run.error).toBe('completed'); - expect(callsByStep.has('verify-in-worktree-repair-1')).toBe(true); - }); -}); diff --git a/packages/sdk/src/workflows/api-executor.ts b/packages/sdk/src/workflows/api-executor.ts deleted file mode 100644 index 63dceb42f..000000000 --- a/packages/sdk/src/workflows/api-executor.ts +++ /dev/null @@ -1,158 +0,0 @@ -/** - * API Executor — calls LLM provider APIs directly via fetch(). - * Used when agent cli is 'api'. No sandbox, no CLI, no PTY. - */ - -type Provider = 'anthropic' | 'openai' | 'google'; - -function detectProvider(model: string): Provider { - if (model.startsWith('claude')) return 'anthropic'; - if (model.startsWith('gpt') || model.startsWith('o1') || model.startsWith('o3') || model.startsWith('o4')) - return 'openai'; - if (model.startsWith('gemini')) return 'google'; - return 'anthropic'; -} - -function getApiKey(provider: Provider, envSecrets?: Record): string { - const envMap: Record = { - anthropic: ['ANTHROPIC_API_KEY'], - openai: ['OPENAI_API_KEY'], - google: ['GOOGLE_API_KEY', 'GEMINI_API_KEY'], - }; - for (const key of envMap[provider]) { - const value = envSecrets?.[key] ?? process.env[key]; - if (value) return value; - } - throw new Error(`No API key for "${provider}". Set ${envMap[provider].join(' or ')}.`); -} - -interface ApiResponse { - content: string; - model: string; - usage?: { inputTokens: number; outputTokens: number }; -} - -async function callAnthropic( - apiKey: string, - model: string, - task: string, - maxTokens: number, - systemPrompt?: string -): Promise { - const res = await fetch('https://api.anthropic.com/v1/messages', { - method: 'POST', - headers: { 'content-type': 'application/json', 'x-api-key': apiKey, 'anthropic-version': '2023-06-01' }, - body: JSON.stringify({ - model, - max_tokens: maxTokens, - ...(systemPrompt ? { system: systemPrompt } : {}), - messages: [{ role: 'user', content: task }], - }), - }); - if (!res.ok) throw new Error(`Anthropic API error (${res.status}): ${await res.text()}`); - const data = (await res.json()) as { - content: Array<{ type: string; text?: string }>; - model: string; - usage?: { input_tokens: number; output_tokens: number }; - }; - return { - content: data.content - .filter((c) => c.type === 'text') - .map((c) => c.text ?? '') - .join(''), - model: data.model, - usage: data.usage - ? { inputTokens: data.usage.input_tokens, outputTokens: data.usage.output_tokens } - : undefined, - }; -} - -async function callOpenAI( - apiKey: string, - model: string, - task: string, - maxTokens: number, - systemPrompt?: string -): Promise { - const messages: Array<{ role: string; content: string }> = []; - if (systemPrompt) messages.push({ role: 'system', content: systemPrompt }); - messages.push({ role: 'user', content: task }); - const res = await fetch('https://api.openai.com/v1/chat/completions', { - method: 'POST', - headers: { 'content-type': 'application/json', authorization: `Bearer ${apiKey}` }, - body: JSON.stringify({ model, max_tokens: maxTokens, messages }), - }); - if (!res.ok) throw new Error(`OpenAI API error (${res.status}): ${await res.text()}`); - const data = (await res.json()) as { - choices: Array<{ message: { content: string } }>; - model: string; - usage?: { prompt_tokens: number; completion_tokens: number }; - }; - return { - content: data.choices[0]?.message?.content ?? '', - model: data.model, - usage: data.usage - ? { inputTokens: data.usage.prompt_tokens, outputTokens: data.usage.completion_tokens } - : undefined, - }; -} - -async function callGoogle( - apiKey: string, - model: string, - task: string, - maxTokens: number, - systemPrompt?: string -): Promise { - const res = await fetch( - `https://generativelanguage.googleapis.com/v1beta/models/${model}:generateContent`, - { - method: 'POST', - headers: { 'content-type': 'application/json', 'x-goog-api-key': apiKey }, - body: JSON.stringify({ - ...(systemPrompt ? { systemInstruction: { parts: [{ text: systemPrompt }] } } : {}), - contents: [{ parts: [{ text: task }] }], - generationConfig: { maxOutputTokens: maxTokens }, - }), - } - ); - if (!res.ok) throw new Error(`Google API error (${res.status}): ${await res.text()}`); - const data = (await res.json()) as { - candidates: Array<{ content: { parts: Array<{ text: string }> } }>; - usageMetadata?: { promptTokenCount: number; candidatesTokenCount: number }; - }; - return { - content: data.candidates[0]?.content?.parts?.map((p) => p.text).join('') ?? '', - model, - usage: data.usageMetadata - ? { - inputTokens: data.usageMetadata.promptTokenCount, - outputTokens: data.usageMetadata.candidatesTokenCount, - } - : undefined, - }; -} - -const PROVIDER_CALLERS = { anthropic: callAnthropic, openai: callOpenAI, google: callGoogle } as const; - -export interface ApiExecutorOptions { - envSecrets?: Record; - defaultModel?: string; - defaultMaxTokens?: number; - skills?: string; -} - -export async function executeApiStep( - model: string, - task: string, - options: ApiExecutorOptions = {} -): Promise { - const resolvedModel = model || options.defaultModel || 'claude-sonnet-4-20250514'; - const maxTokens = options.defaultMaxTokens ?? 4096; - const provider = detectProvider(resolvedModel); - const apiKey = getApiKey(provider, options.envSecrets); - const response = await PROVIDER_CALLERS[provider](apiKey, resolvedModel, task, maxTokens, options.skills); - return response.content; -} - -export { detectProvider, getApiKey }; diff --git a/packages/sdk/src/workflows/barrier.ts b/packages/sdk/src/workflows/barrier.ts deleted file mode 100644 index eb9d764d8..000000000 --- a/packages/sdk/src/workflows/barrier.ts +++ /dev/null @@ -1,239 +0,0 @@ -/** - * Barrier Manager — synchronization barriers with all/any/majority semantics. - * - * Barriers gate downstream workflow steps until a set of upstream agents - * or steps have resolved. Supports three resolution modes: - * - * - **all** — every agent in `waitFor` must resolve (default) - * - **any** — at least one agent resolves - * - **majority** — more than half of `waitFor` must resolve - */ - -import { randomBytes } from 'node:crypto'; -import { EventEmitter } from 'node:events'; -import type { DbClient } from './coordinator.js'; - -// ── Types ─────────────────────────────────────────────────────────────────── - -export type BarrierMode = 'all' | 'any' | 'majority'; - -export interface BarrierDefinition { - name: string; - waitFor: string[]; - mode?: BarrierMode; - timeoutMs?: number; -} - -export interface BarrierRow { - id: string; - runId: string; - barrierName: string; - waitFor: string[]; - resolved: string[]; - isSatisfied: boolean; - timeoutMs: number | null; - createdAt: string; - updatedAt: string; -} - -export interface BarrierManagerEvents { - 'barrier:created': (barrier: BarrierRow) => void; - 'barrier:resolved': (barrierName: string, agent: string) => void; - 'barrier:satisfied': (barrier: BarrierRow) => void; - 'barrier:timeout': (barrier: BarrierRow) => void; -} - -// ── Manager ───────────────────────────────────────────────────────────────── - -export class BarrierManager extends EventEmitter { - private db: DbClient; - /** In-memory mode tracking (not persisted — set once at creation). */ - private modes = new Map(); - private timeoutTimers = new Map>(); - - constructor(db: DbClient) { - super(); - this.db = db; - } - - // ── Create ────────────────────────────────────────────────────────────── - - /** - * Create a barrier for a workflow run. - */ - async createBarrier(runId: string, definition: BarrierDefinition): Promise { - const id = `bar_${Date.now()}_${randomBytes(4).toString('hex')}`; - const now = new Date().toISOString(); - const mode = definition.mode ?? 'all'; - - const { rows } = await this.db.query( - `INSERT INTO workflow_barriers (id, run_id, barrier_name, wait_for, resolved, is_satisfied, timeout_ms, created_at, updated_at) - VALUES ($1, $2, $3, $4, '[]'::jsonb, FALSE, $5, $6, $6) - RETURNING *`, - [id, runId, definition.name, JSON.stringify(definition.waitFor), definition.timeoutMs ?? null, now] - ); - - const barrier = rows[0]; - const key = `${runId}:${definition.name}`; - this.modes.set(key, mode); - - if (definition.timeoutMs) { - this.scheduleTimeout(barrier, definition.timeoutMs); - } - - this.emit('barrier:created', barrier); - return barrier; - } - - /** - * Bulk-create barriers from a list of definitions (e.g. from coordination config). - */ - async createBarriers(runId: string, definitions: BarrierDefinition[]): Promise { - const results: BarrierRow[] = []; - for (const def of definitions) { - results.push(await this.createBarrier(runId, def)); - } - return results; - } - - // ── Resolve ───────────────────────────────────────────────────────────── - - /** - * Mark an agent/step as resolved for a barrier. Returns whether the - * barrier is now fully satisfied. - */ - async resolve( - runId: string, - barrierName: string, - agent: string - ): Promise<{ satisfied: boolean; barrier: BarrierRow }> { - const now = new Date().toISOString(); - - // Atomic: append agent to resolved array if not already present. - const { rows } = await this.db.query( - `UPDATE workflow_barriers - SET resolved = CASE - WHEN resolved @> $3::jsonb THEN resolved - ELSE resolved || $3::jsonb - END, - updated_at = $4 - WHERE run_id = $1 AND barrier_name = $2 AND is_satisfied = FALSE - RETURNING *`, - [runId, barrierName, JSON.stringify(agent), now] - ); - - if (rows.length === 0) { - // Barrier may already be satisfied or not exist. - const existing = await this.getBarrier(runId, barrierName); - if (!existing) throw new Error(`Barrier ${barrierName} not found for run ${runId}`); - return { satisfied: existing.isSatisfied, barrier: existing }; - } - - const barrier = rows[0]; - this.emit('barrier:resolved', barrierName, agent); - - const key = `${runId}:${barrierName}`; - const mode = this.modes.get(key) ?? 'all'; - - if (this.checkSatisfied(barrier, mode)) { - return this.markSatisfied(barrier); - } - - return { satisfied: false, barrier }; - } - - // ── Queries ───────────────────────────────────────────────────────────── - - async getBarrier(runId: string, barrierName: string): Promise { - const { rows } = await this.db.query( - `SELECT * FROM workflow_barriers WHERE run_id = $1 AND barrier_name = $2`, - [runId, barrierName] - ); - return rows[0] ?? null; - } - - async getBarriers(runId: string): Promise { - const { rows } = await this.db.query( - `SELECT * FROM workflow_barriers WHERE run_id = $1 ORDER BY created_at ASC`, - [runId] - ); - return rows; - } - - async getUnsatisfiedBarriers(runId: string): Promise { - const { rows } = await this.db.query( - `SELECT * FROM workflow_barriers WHERE run_id = $1 AND is_satisfied = FALSE ORDER BY created_at ASC`, - [runId] - ); - return rows; - } - - /** - * Check if a named barrier is satisfied (useful for gating downstream work). - */ - async isSatisfied(runId: string, barrierName: string): Promise { - const barrier = await this.getBarrier(runId, barrierName); - return barrier?.isSatisfied ?? false; - } - - // ── Cleanup ───────────────────────────────────────────────────────────── - - cleanup(): void { - for (const timer of this.timeoutTimers.values()) clearTimeout(timer); - this.timeoutTimers.clear(); - this.modes.clear(); - } - - // ── Private ───────────────────────────────────────────────────────────── - - private checkSatisfied(barrier: BarrierRow, mode: BarrierMode): boolean { - const waitFor: string[] = Array.isArray(barrier.waitFor) ? barrier.waitFor : []; - const resolved: string[] = Array.isArray(barrier.resolved) ? barrier.resolved : []; - - switch (mode) { - case 'all': - return waitFor.every((w) => resolved.includes(w)); - case 'any': - return resolved.length > 0; - case 'majority': - return resolved.length > waitFor.length / 2; - } - } - - private async markSatisfied(barrier: BarrierRow): Promise<{ satisfied: boolean; barrier: BarrierRow }> { - const now = new Date().toISOString(); - const { rows } = await this.db.query( - `UPDATE workflow_barriers SET is_satisfied = TRUE, updated_at = $2 - WHERE id = $1 - RETURNING *`, - [barrier.id, now] - ); - - const updated = rows[0]; - const key = `${barrier.runId}:${barrier.barrierName}`; - this.clearTimeout(key); - this.emit('barrier:satisfied', updated); - - return { satisfied: true, barrier: updated }; - } - - private scheduleTimeout(barrier: BarrierRow, timeoutMs: number): void { - const key = `${barrier.runId}:${barrier.barrierName}`; - const timer = setTimeout(async () => { - const current = await this.getBarrier(barrier.runId, barrier.barrierName); - if (current && !current.isSatisfied) { - this.emit('barrier:timeout', current); - } - }, timeoutMs); - timer.unref(); - this.timeoutTimers.set(key, timer); - } - - private clearTimeout(key: string): void { - const timer = this.timeoutTimers.get(key); - if (timer) { - globalThis.clearTimeout(timer); - this.timeoutTimers.delete(key); - } - } -} diff --git a/packages/sdk/src/workflows/budget-tracker.ts b/packages/sdk/src/workflows/budget-tracker.ts deleted file mode 100644 index f1ea8f76c..000000000 --- a/packages/sdk/src/workflows/budget-tracker.ts +++ /dev/null @@ -1,274 +0,0 @@ -export interface BudgetTrackerStepConfig { - stepName: string; - agentName: string; - maxTokens?: number; -} - -export interface BudgetTrackerOptions { - perAgent?: number; - perWorkflow?: number; - workflowBudget?: number; - steps?: BudgetTrackerStepConfig[]; -} - -export interface TokenUsage { - input: number; - output: number; - cacheRead: number; - total: number; -} - -export interface BudgetAvailability { - allowed: boolean; - reason?: string; -} - -export interface OverBudgetResult { - over: boolean; - reason?: string; -} - -export interface BudgetStatus { - agentLimitExceeded: boolean; - workflowBudgetExceeded: boolean; - workflowBudgetExhausted: boolean; -} - -export interface StepBudgetStatus { - used?: number; - limit?: number; - over: boolean; -} - -export interface WorkflowBudgetStatus { - used: number; - limit?: number; - exhausted: boolean; -} - -export interface RunSummaryBudgetData { - steps: Map; - workflow?: WorkflowBudgetStatus; -} - -function emptyUsage(): TokenUsage { - return { input: 0, output: 0, cacheRead: 0, total: 0 }; -} - -function toUsage(value: number | Partial>): TokenUsage { - if (typeof value === 'number') { - const input = Number.isFinite(value) ? Math.max(0, Math.round(value)) : 0; - return { input, output: 0, cacheRead: 0, total: input }; - } - - const input = Number.isFinite(value.input) ? Math.max(0, Math.round(value.input ?? 0)) : 0; - const output = Number.isFinite(value.output) ? Math.max(0, Math.round(value.output ?? 0)) : 0; - const cacheRead = Number.isFinite(value.cacheRead) ? Math.max(0, Math.round(value.cacheRead ?? 0)) : 0; - return { - input, - output, - cacheRead, - total: input + output, - }; -} - -function addUsage(left: TokenUsage, right: TokenUsage): TokenUsage { - const input = left.input + right.input; - const output = left.output + right.output; - const cacheRead = left.cacheRead + right.cacheRead; - return { - input, - output, - cacheRead, - total: input + output, - }; -} - -export class BudgetExceededError extends Error { - readonly stepName: string; - readonly budgetType: 'agent' | 'workflow'; - readonly limit: number; - readonly actual: number; - readonly used: number; - - constructor(stepName: string, budgetType: 'agent' | 'workflow', limit: number, actual: number) { - const qualifier = budgetType === 'workflow' ? 'workflow budget exhausted' : 'agent budget exceeded'; - super(`Step "${stepName}" cannot continue: ${qualifier} (${actual}/${limit})`); - this.name = 'BudgetExceededError'; - this.stepName = stepName; - this.budgetType = budgetType; - this.limit = limit; - this.actual = actual; - this.used = actual; - } -} - -export class BudgetTracker { - private readonly defaultAgentBudget?: number; - private readonly workflowBudget?: number; - private readonly stepLimits = new Map(); - private readonly stepUsage = new Map(); - private totalUsage: TokenUsage = emptyUsage(); - private workflowBudgetExhausted = false; - - constructor(options: BudgetTrackerOptions) { - this.defaultAgentBudget = options.perAgent; - this.workflowBudget = options.workflowBudget ?? options.perWorkflow; - - for (const step of options.steps ?? []) { - this.stepLimits.set(step.stepName, step.maxTokens); - } - } - - recordUsage(stepName: string, usage: number | Partial>): void { - const normalized = toUsage(usage); - const current = this.stepUsage.get(stepName) ?? emptyUsage(); - const next = addUsage(current, normalized); - this.stepUsage.set(stepName, next); - this.totalUsage = addUsage(this.totalUsage, normalized); - - if (this.workflowBudget !== undefined && this.totalUsage.total >= this.workflowBudget) { - this.workflowBudgetExhausted = true; - } - } - - getStepUsage(stepName: string): TokenUsage { - return this.stepUsage.get(stepName) ?? emptyUsage(); - } - - getTotalUsage(): TokenUsage { - return this.totalUsage; - } - - getRemainingBudget(): { agent?: number; workflow?: number } { - return { - agent: - this.defaultAgentBudget !== undefined - ? Math.max(0, this.defaultAgentBudget - this.totalUsage.total) - : undefined, - workflow: - this.workflowBudget !== undefined - ? Math.max(0, this.workflowBudget - this.totalUsage.total) - : undefined, - }; - } - - checkCanSpawn(stepName: string): BudgetAvailability { - if (this.workflowBudget !== undefined && this.totalUsage.total >= this.workflowBudget) { - return { - allowed: false, - reason: `Cannot spawn ${stepName}: workflow budget exceeded (${this.totalUsage.total}/${this.workflowBudget})`, - }; - } - - if (this.workflowBudget !== undefined) { - const remainingWorkflowBudget = this.workflowBudget - this.totalUsage.total; - const stepLimit = this.getStepLimit(stepName); - const minimumHeadroom = - stepLimit !== undefined - ? Math.min(stepLimit, this.workflowBudget) - : this.defaultAgentBudget !== undefined - ? Math.ceil(this.defaultAgentBudget * 0.1) - : Math.ceil(this.workflowBudget * 0.1); - - if (remainingWorkflowBudget <= minimumHeadroom) { - return { - allowed: false, - reason: - stepLimit !== undefined - ? `Cannot spawn ${stepName}: remaining workflow budget ${remainingWorkflowBudget} is below step budget ${stepLimit}` - : this.defaultAgentBudget !== undefined - ? `Cannot spawn ${stepName}: remaining workflow budget ${remainingWorkflowBudget} ` + - `is below 10% of per-agent budget ${this.defaultAgentBudget}` - : `Cannot spawn ${stepName}: remaining workflow budget ${remainingWorkflowBudget} ` + - `is below 10% headroom threshold for workflow budget ${this.workflowBudget}`, - }; - } - } - - return { allowed: true }; - } - - isOverBudget(stepName: string): OverBudgetResult { - const stepUsage = this.getStepUsage(stepName); - const stepLimit = this.getStepLimit(stepName); - if (stepLimit !== undefined && stepUsage.total > stepLimit) { - return { - over: true, - reason: `Step "${stepName}" exceeded per-agent budget (${stepUsage.total}/${stepLimit})`, - }; - } - - if (this.workflowBudget !== undefined && this.totalUsage.total > this.workflowBudget) { - return { - over: true, - reason: `Workflow exceeded total budget (${this.totalUsage.total}/${this.workflowBudget})`, - }; - } - - return { over: false }; - } - - getBudgetStatus(stepName: string): BudgetStatus { - const stepUsage = this.getStepUsage(stepName); - const stepLimit = this.getStepLimit(stepName); - return { - agentLimitExceeded: stepLimit !== undefined && stepUsage.total > stepLimit, - workflowBudgetExceeded: - this.workflowBudget !== undefined && this.totalUsage.total > this.workflowBudget, - workflowBudgetExhausted: - this.workflowBudget !== undefined && - (this.workflowBudgetExhausted || this.totalUsage.total >= this.workflowBudget), - }; - } - - getStepBudgetStatus(stepName: string): StepBudgetStatus | undefined { - const usage = this.stepUsage.get(stepName); - const hasExplicitLimit = this.stepLimits.has(stepName); - const limit = this.getStepLimit(stepName); - - if (!usage && !hasExplicitLimit && limit === undefined) { - return undefined; - } - - return { - used: usage?.total, - limit, - over: limit !== undefined && (usage?.total ?? 0) > limit, - }; - } - - getRunSummaryBudgetData(): RunSummaryBudgetData | undefined { - const steps = new Map(); - const stepNames = new Set([...this.stepLimits.keys(), ...this.stepUsage.keys()]); - - for (const stepName of stepNames) { - const status = this.getStepBudgetStatus(stepName); - if (status) { - steps.set(stepName, status); - } - } - - const workflow = - this.workflowBudget !== undefined || this.totalUsage.total > 0 - ? { - used: this.totalUsage.total, - limit: this.workflowBudget, - exhausted: - this.workflowBudget !== undefined && - (this.workflowBudgetExhausted || this.totalUsage.total >= this.workflowBudget), - } - : undefined; - - if (steps.size === 0 && !workflow) { - return undefined; - } - - return { steps, workflow }; - } - - private getStepLimit(stepName: string): number | undefined { - const limit = this.stepLimits.get(stepName); - return limit ?? this.defaultAgentBudget; - } -} diff --git a/packages/sdk/src/workflows/builder.ts b/packages/sdk/src/workflows/builder.ts deleted file mode 100644 index f2eb1f8f7..000000000 --- a/packages/sdk/src/workflows/builder.ts +++ /dev/null @@ -1,594 +0,0 @@ -import path from 'node:path'; -import { stringify as stringifyYaml } from 'yaml'; - -import type { AgentRelayOptions } from '../relay.js'; -import type { - AgentCli, - AgentDefinition, - AgentPreset, - Barrier, - CoordinationConfig, - DryRunReport, - ErrorHandlingConfig, - IdleNudgeConfig, - PathDefinition, - RelayYamlConfig, - StateConfig, - SwarmPattern, - TrajectoryConfig, - VerificationCheck, - WorkflowDefinition, - WorkflowExecuteOptions, - WorkflowRunRow, - WorkflowStep, -} from './types.js'; -import { JsonFileWorkflowDb } from './file-db.js'; -import { WorkflowRunner, type WorkflowEventListener } from './runner.js'; -import type { RunnerStepExecutor } from './types.js'; -import { formatDryRunReport } from './dry-run-format.js'; -import { createDefaultEventLogger, type LogLevel } from './default-logger.js'; -import { runInCloud, type CloudRunOptions } from './cloud-runner.js'; -import type { VariableContext } from './template-resolver.js'; - -// ── Option types for the builder API ──────────────────────────────────────── - -export interface AgentOptions { - cli: AgentCli; - role?: string; - task?: string; - channels?: string[]; - model?: string; - maxTokens?: number; - timeoutMs?: number; - retries?: number; - /** - * Seconds of silence on the agent's PTY before the runtime marks it idle and - * tears it down. Default: 30s. Set to `0` to disable idle detection entirely. - * - * When to override (per-agent): - * - You expect long quiet stretches by design — a long-running reviewer - * waiting for downstream verdicts, a grader watching a file that updates - * every few minutes, or a `@-mention` recipient whose triggering event - * may arrive >30s after spawn. Setting `0` (or a generous N) prevents - * the runtime from killing the agent before the awaited event arrives. - * - * When NOT to override: - * - One-shot worker steps. The default is right; idle-as-complete is what - * makes `OWNER_DECISION: COMPLETE` + clean exit fast. - * - * See the `writing-agent-relay-workflows` skill ("Idle detection beats - * 'wait for X' prompts") for the trade-offs around long-running interactive - * agents and the Per-turn interactive spawn alternative. - */ - idleThresholdSecs?: number; - /** When false, the agent runs as a non-interactive subprocess (no PTY, no relay messaging). - * Default: true. */ - interactive?: boolean; - /** Agent preset: 'lead' (interactive PTY), 'worker' | 'reviewer' | 'analyst' (non-interactive subprocess). */ - preset?: AgentPreset; - /** Skills to make available to the agent (for API-mode agents). */ - skills?: string; -} - -/** Options for agent steps (default). */ -export interface AgentStepOptions { - agent: string; - task: string; - cwd?: string; - dependsOn?: string[]; - verification?: VerificationCheck; - timeoutMs?: number; - retries?: number; -} - -/** Options for deterministic (shell command) steps. */ -export interface DeterministicStepOptions { - type: 'deterministic'; - command: string; - cwd?: string; - /** Capture stdout as step output for downstream steps. Default: true. */ - captureOutput?: boolean; - /** Fail if command exit code is non-zero. Default: true. */ - failOnError?: boolean; - dependsOn?: string[]; - verification?: VerificationCheck; - timeoutMs?: number; -} - -/** Options for worktree steps (create/checkout git worktrees). */ -export interface WorktreeStepOptions { - type: 'worktree'; - branch: string; - baseBranch?: string; - path?: string; - createBranch?: boolean; - dependsOn?: string[]; - timeoutMs?: number; -} - -export type StepOptions = AgentStepOptions | DeterministicStepOptions | WorktreeStepOptions; - -export interface ErrorOptions { - maxRetries?: number; - retryDelayMs?: number; - notifyChannel?: string; - repairAgent?: string; - repairRetries?: number; -} - -export type ReliabilityOptions = ErrorOptions; - -export interface WorkflowRunOptions { - /** Run a specific workflow by name (default: first). */ - workflow?: string; - /** Template variable substitutions. */ - vars?: VariableContext; - /** Working directory (default: process.cwd()). */ - cwd?: string; - /** AgentRelay options (all optional). */ - relay?: AgentRelayOptions; - /** Progress callback. */ - onEvent?: WorkflowEventListener; - /** Validate and print execution plan without spawning agents. */ - dryRun?: boolean; - /** External step executor (e.g. Daytona sandbox backend). */ - executor?: RunnerStepExecutor; - /** Start from a specific step, skipping all predecessors. */ - startFrom?: string; - /** Previous run ID whose cached outputs are used with startFrom. */ - previousRunId?: string; - /** Console log verbosity: "verbose" | "normal" (default) | "quiet" | false (silent). */ - logLevel?: LogLevel; - /** Renderer: "listr" for listr2 UI, "default" for console logger, false to disable. */ - renderer?: 'listr' | 'default' | false; - /** Run the workflow in the cloud instead of locally. */ - cloud?: boolean; - /** Cloud API base URL (or set CLOUD_API_URL env var). */ - cloudApiUrl?: string; - /** Cloud API authentication token (or set CLOUD_API_TOKEN env var). */ - cloudApiToken?: string; - /** Environment secrets to forward to cloud agents. */ - envSecrets?: Record; - /** Polling interval in ms for cloud run status checks. */ - cloudPollIntervalMs?: number; - /** Callback invoked when the cloud run status changes. */ - onCloudStatusChange?: (status: string, runId: string) => void; -} - -// ── WorkflowBuilder ───────────────────────────────────────────────────────── - -/** - * Fluent builder for constructing workflow configurations programmatically. - * - * @example - * ```typescript - * import { workflow } from "@agent-relay/sdk/workflows"; - * - * const result = await workflow("my-workflow") - * .pattern("dag") - * .agent("worker", { cli: "claude", role: "Backend engineer" }) - * .step("build", { agent: "worker", task: "Build the project" }) - * .step("test", { agent: "worker", task: "Run tests", dependsOn: ["build"] }) - * .run(); - * ``` - */ -export class WorkflowBuilder { - private _name: string; - private _description?: string; - private _pattern: SwarmPattern = 'dag'; - private _maxConcurrency?: number; - private _timeoutMs?: number; - private _channel?: string; - private _idleNudge?: IdleNudgeConfig; - private _paths?: PathDefinition[]; - private _agents: AgentDefinition[] = []; - private _steps: WorkflowStep[] = []; - private _errorHandling?: ErrorHandlingConfig; - private _coordination?: CoordinationConfig; - private _state?: StateConfig; - private _trajectories?: TrajectoryConfig | false; - private _startFrom?: string; - private _previousRunId?: string; - - constructor(name: string) { - this._name = name; - } - - /** Set workflow description. */ - description(desc: string): this { - this._description = desc; - return this; - } - - /** Set swarm pattern (default: "dag"). */ - pattern(p: SwarmPattern): this { - this._pattern = p; - return this; - } - - /** Set maximum concurrent agents. */ - maxConcurrency(n: number): this { - this._maxConcurrency = n; - return this; - } - - /** Set global timeout in milliseconds. */ - timeout(ms: number): this { - this._timeoutMs = ms; - return this; - } - - /** Set the relay channel for agent communication. */ - channel(ch: string): this { - const CHANNEL_RE = /^[a-z0-9][a-z0-9-]*$/; - if (!CHANNEL_RE.test(ch)) { - throw new Error( - `Invalid channel name "${ch}". Channel names must be lowercase alphanumeric and hyphens, starting with a letter or number. ` + - `Fix: use .toLowerCase().replace(/[^a-z0-9-]/g, '-').replace(/-+/g, '-').replace(/^-|-$/g, '')` - ); - } - this._channel = ch; - return this; - } - - /** Configure idle agent detection and nudging for interactive agents. */ - idleNudge(config: IdleNudgeConfig): this { - this._idleNudge = config; - return this; - } - - /** Set workflow coordination settings (barriers, voting threshold, consensus strategy). */ - coordination(config: CoordinationConfig): this { - this._coordination = config; - return this; - } - - /** Configure shared workflow state backend settings. */ - state(config: StateConfig): this { - this._state = config; - return this; - } - - /** Configure trajectory recording, or pass `false` to disable it. */ - trajectories(config: TrajectoryConfig | false): this { - this._trajectories = config; - return this; - } - - /** Start execution from a specific step, skipping all predecessor steps. */ - startFrom(stepName: string): this { - this._startFrom = stepName; - return this; - } - - /** Set the previous run ID whose cached step outputs should be used with startFrom. */ - previousRunId(id: string): this { - this._previousRunId = id; - return this; - } - - /** - * Declare named paths to additional directories the workflow needs. - * - * For multi-repo cloud workflows (relay#774, cloud#302), each entry is - * tarballed by the CLI at submit time and mounted at - * `/home/daytona/workspace/{name}/` in the sandbox. Locally, the runner - * resolves `path` relative to the workflow file's parent directory and - * agents reference each entry by its declared `name`. - * - * Calling this is a no-op for the runtime — the runner doesn't need - * `paths` to execute steps. The CLI and the cloud bootstrap consume - * it. Declaring via the builder keeps single-source-of-truth for tools - * that walk the built config (e.g. dashboards, dry-run reports). - */ - paths(paths: PathDefinition[]): this { - if (!Array.isArray(paths)) { - throw new Error('.paths() expects an array of PathDefinition objects'); - } - const seen = new Set(); - for (const p of paths) { - if (!p || typeof p.name !== 'string' || typeof p.path !== 'string') { - throw new Error('.paths() entries must each have string `name` and `path` fields'); - } - if (seen.has(p.name)) { - throw new Error(`.paths() got duplicate entry name "${p.name}"`); - } - seen.add(p.name); - } - this._paths = paths.map((p) => ({ ...p })); - return this; - } - - /** Add an agent definition. */ - agent(name: string, options: AgentOptions): this { - const def: AgentDefinition = { - name, - cli: options.cli, - }; - - if (options.role !== undefined) def.role = options.role; - if (options.task !== undefined) def.task = options.task; - if (options.channels !== undefined) def.channels = options.channels; - if (options.preset !== undefined) def.preset = options.preset; - if (options.interactive !== undefined) def.interactive = options.interactive; - if (options.skills !== undefined) def.skills = options.skills; - - if ( - options.model !== undefined || - options.maxTokens !== undefined || - options.timeoutMs !== undefined || - options.retries !== undefined || - options.idleThresholdSecs !== undefined - ) { - def.constraints = {}; - if (options.model !== undefined) def.constraints.model = options.model; - if (options.maxTokens !== undefined) def.constraints.maxTokens = options.maxTokens; - if (options.timeoutMs !== undefined) def.constraints.timeoutMs = options.timeoutMs; - if (options.retries !== undefined) def.constraints.retries = options.retries; - if (options.idleThresholdSecs !== undefined) - def.constraints.idleThresholdSecs = options.idleThresholdSecs; - } - - this._agents.push(def); - return this; - } - - /** Add a workflow step (agent or deterministic). */ - step(name: string, options: StepOptions): this { - const step: WorkflowStep = { name }; - - if ('type' in options && options.type === 'deterministic') { - if (!options.command) { - throw new Error('deterministic steps must have a command'); - } - if ('agent' in options || 'task' in options) { - throw new Error('deterministic steps must not have agent or task'); - } - step.type = 'deterministic'; - step.command = options.command; - if (options.cwd !== undefined) step.cwd = options.cwd; - if (options.captureOutput !== undefined) step.captureOutput = options.captureOutput; - if (options.failOnError !== undefined) step.failOnError = options.failOnError; - if (options.dependsOn !== undefined) step.dependsOn = options.dependsOn; - if (options.verification !== undefined) step.verification = options.verification; - if (options.timeoutMs !== undefined) step.timeoutMs = options.timeoutMs; - } else if ('type' in options && options.type === 'worktree') { - if ('agent' in options || 'task' in options) { - throw new Error('worktree steps must not have agent or task'); - } - step.type = 'worktree'; - step.branch = options.branch; - if (options.baseBranch !== undefined) step.baseBranch = options.baseBranch; - if (options.path !== undefined) step.path = options.path; - if (options.createBranch !== undefined) step.createBranch = options.createBranch; - if (options.dependsOn !== undefined) step.dependsOn = options.dependsOn; - if (options.timeoutMs !== undefined) step.timeoutMs = options.timeoutMs; - } else { - // Agent step - const agentOpts = options as AgentStepOptions; - if (!agentOpts.agent || !agentOpts.task) { - throw new Error('Agent steps must have both agent and task'); - } - step.agent = agentOpts.agent; - step.task = agentOpts.task; - if (agentOpts.cwd !== undefined) step.cwd = agentOpts.cwd; - if (agentOpts.dependsOn !== undefined) step.dependsOn = agentOpts.dependsOn; - if (agentOpts.verification !== undefined) step.verification = agentOpts.verification; - if (agentOpts.timeoutMs !== undefined) step.timeoutMs = agentOpts.timeoutMs; - if (agentOpts.retries !== undefined) step.retries = agentOpts.retries; - } - - this._steps.push(step); - return this; - } - - /** Set error handling strategy. */ - onError(strategy: 'fail-fast' | 'continue' | 'retry', options?: ErrorOptions): this { - this._errorHandling = { strategy }; - if (options?.maxRetries !== undefined) this._errorHandling.maxRetries = options.maxRetries; - if (options?.retryDelayMs !== undefined) this._errorHandling.retryDelayMs = options.retryDelayMs; - if (options?.notifyChannel !== undefined) this._errorHandling.notifyChannel = options.notifyChannel; - if (options?.repairAgent !== undefined) this._errorHandling.repairAgent = options.repairAgent; - if (options?.repairRetries !== undefined) this._errorHandling.repairRetries = options.repairRetries; - return this; - } - - /** - * Opt into the product reliability contract: repairable workflow failures get - * routed through an agent and retried before the workflow is allowed to fail. - */ - repairable(options: ReliabilityOptions = {}): this { - return this.onError('retry', { - maxRetries: options.maxRetries ?? options.repairRetries ?? 2, - retryDelayMs: options.retryDelayMs ?? 1000, - notifyChannel: options.notifyChannel, - repairAgent: options.repairAgent, - repairRetries: options.repairRetries ?? options.maxRetries ?? 2, - }); - } - - /** Alias for `.repairable()` for workflow authors who think in product terms. */ - reliable(options: ReliabilityOptions = {}): this { - return this.repairable(options); - } - - private validateBuilderState(): void { - const hasAgentSteps = this._steps.some((s) => s.type !== 'deterministic' && s.type !== 'worktree'); - if (hasAgentSteps && this._agents.length === 0) { - throw new Error('Workflow must have at least one agent when using agent steps'); - } - if (this._steps.length === 0) { - throw new Error('Workflow must have at least one step'); - } - - const agentNames = new Set(this._agents.map((agent) => agent.name)); - for (const step of this._steps) { - const diagnosticAgent = step.verification?.diagnosticAgent; - if (!diagnosticAgent) continue; - - if (!agentNames.has(diagnosticAgent)) { - throw new Error(`Step "${step.name}" references unknown diagnosticAgent "${diagnosticAgent}"`); - } - - if (step.retries === undefined || step.retries === 0) { - console.warn( - `Step "${step.name}": diagnosticAgent configured but no retries — diagnostic will never run` - ); - } - } - } - - /** Build and return the RelayYamlConfig object. */ - toConfig(): RelayYamlConfig { - this.validateBuilderState(); - - const wfDef: WorkflowDefinition = { - name: `${this._name}-workflow`, - steps: [...this._steps], - }; - - const config: RelayYamlConfig = { - version: '1.0', - name: this._name, - swarm: { - pattern: this._pattern, - }, - agents: [...this._agents], - workflows: [wfDef], - }; - - if (this._description !== undefined) config.description = this._description; - if (this._paths !== undefined && this._paths.length > 0) { - config.paths = this._paths.map((p) => ({ ...p })); - } - if (this._maxConcurrency !== undefined) config.swarm.maxConcurrency = this._maxConcurrency; - if (this._timeoutMs !== undefined) config.swarm.timeoutMs = this._timeoutMs; - if (this._channel !== undefined) config.swarm.channel = this._channel; - if (this._idleNudge !== undefined) config.swarm.idleNudge = this._idleNudge; - config.errorHandling = this._errorHandling ?? { - strategy: 'retry', - maxRetries: 2, - retryDelayMs: 1000, - repairRetries: 2, - }; - if (this._coordination !== undefined) config.coordination = this._coordination; - if (this._state !== undefined) config.state = this._state; - if (this._trajectories !== undefined) config.trajectories = this._trajectories; - - return config; - } - - /** Serialize the config to a YAML string. */ - toYaml(): string { - return stringifyYaml(this.toConfig()); - } - - /** Build the config and execute it with the WorkflowRunner. */ - async run(options: WorkflowRunOptions & { dryRun: true }): Promise; - async run(options?: WorkflowRunOptions): Promise; - async run(options: WorkflowRunOptions = {}): Promise { - const config = this.toConfig(); - const runnerCwd = options.cwd ?? process.cwd(); - const dbPath = path.join(runnerCwd, '.agent-relay', 'workflow-runs.jsonl'); - const db = new JsonFileWorkflowDb(dbPath); - - const runner = new WorkflowRunner({ - cwd: options.cwd, - relay: options.relay, - executor: options.executor, - envSecrets: options.envSecrets, - db, - }); - - // Auto-detect DRY_RUN env var so existing scripts get dry-run for free - const isDryRun = options.dryRun ?? !!process.env.DRY_RUN; - - if (isDryRun) { - const report = runner.dryRun(config, options.workflow, options.vars); - console.log(formatDryRunReport(report)); - return report; - } - - // Cloud execution path — submit to remote API and poll for completion - if (options.cloud) { - const cloudApiUrl = options.cloudApiUrl ?? process.env.CLOUD_API_URL; - const cloudApiToken = options.cloudApiToken ?? process.env.CLOUD_API_TOKEN; - if (!cloudApiUrl) throw new Error('cloud: true requires cloudApiUrl or CLOUD_API_URL env var'); - if (!cloudApiToken) throw new Error('cloud: true requires cloudApiToken or CLOUD_API_TOKEN env var'); - return runInCloud(config, { - cloudApiUrl, - cloudApiToken, - envSecrets: options.envSecrets, - pollIntervalMs: options.cloudPollIntervalMs, - timeoutMs: this._timeoutMs, - onStatusChange: options.onCloudStatusChange, - }); - } - - // Wire up default console logger unless explicitly disabled - // renderer: "listr" owns the terminal — skip console logger to avoid garbled output - // renderer: false implies no output at all - const logLevel = - options.renderer === 'listr' || options.renderer === false ? false : (options.logLevel ?? 'normal'); - if (logLevel !== false) { - runner.on(createDefaultEventLogger(logLevel)); - } - - // Wire up user-provided event handler (additive — does not replace the default logger) - if (options.onEvent) { - runner.on(options.onEvent); - } - - // Auto-detect RESUME_RUN_ID env var for resuming failed runs - const resumeRunId = process.env.RESUME_RUN_ID; - - const startFrom = this._startFrom ?? options.startFrom ?? process.env.START_FROM; - const previousRunId = this._previousRunId ?? options.previousRunId ?? process.env.PREVIOUS_RUN_ID; - const executeOptions: WorkflowExecuteOptions | undefined = startFrom - ? { startFrom, previousRunId } - : undefined; - - // If listr renderer requested, wire it up and run concurrently - // Must be set up BEFORE the resume check so resume runs also get event output - if (options.renderer === 'listr') { - const { createWorkflowRenderer } = await import('./listr-renderer.js'); - const renderer = createWorkflowRenderer(); - runner.on(renderer.onEvent); - - const runPromise = resumeRunId - ? runner.resume(resumeRunId, options.vars, config) - : runner.execute(config, options.workflow, options.vars, executeOptions); - - try { - const [result] = await Promise.all([runPromise, renderer.start()]); - return result; - } finally { - renderer.unmount(); - } - } - - if (resumeRunId) { - return runner.resume(resumeRunId, options.vars, config); - } - - return runner.execute(config, options.workflow, options.vars, executeOptions); - } -} - -// ── Entry point ───────────────────────────────────────────────────────────── - -/** - * Create a new workflow builder. - * - * @example - * ```typescript - * const result = await workflow("my-task") - * .pattern("fan-out") - * .agent("worker", { cli: "claude" }) - * .step("do-work", { agent: "worker", task: "Build the feature" }) - * .run(); - * ``` - */ -export function workflow(name: string): WorkflowBuilder { - return new WorkflowBuilder(name); -} diff --git a/packages/sdk/src/workflows/builtin-templates/bug-fix.yaml b/packages/sdk/src/workflows/builtin-templates/bug-fix.yaml deleted file mode 100644 index 7396636fa..000000000 --- a/packages/sdk/src/workflows/builtin-templates/bug-fix.yaml +++ /dev/null @@ -1,139 +0,0 @@ -version: '1.0' -name: bug-fix -description: 'Blueprint-style bug investigation and remediation with validation gates.' -swarm: - pattern: hub-spoke - maxConcurrency: 2 - timeoutMs: 2700000 - channel: swarm-bug-fix - idleNudge: - nudgeAfterMs: 120000 - escalateAfterMs: 120000 - maxNudges: 1 -agents: - - name: lead - cli: claude - role: 'Coordinates debugging and release decisions' - permissions: { access: full } - - name: investigator - cli: codex - role: 'Reproduces and scopes the defect' - permissions: { access: readonly } - interactive: false - - name: fixer - cli: codex - role: 'Implements and tests the fix' - permissions: { access: readwrite } - interactive: false - - name: verifier - cli: claude - role: 'Validates risk, regressions, and completion' - permissions: { access: readonly } -workflows: - - name: bug-remediation - description: 'Investigate root cause, patch safely, and verify no regressions.' - onError: retry - preflight: - - command: git status --porcelain - failIf: non-empty - description: 'Ensure working directory is clean' - - command: npm test 2>/dev/null || echo "baseline" - description: 'Capture baseline test state' - steps: - # Agent: Investigate root cause - - name: investigate - type: agent - agent: investigator - task: | - Reproduce the issue, identify root cause, and provide a fix strategy: - {{task}} - verification: - type: output_contains - value: ROOT_CAUSE_IDENTIFIED - - # Deterministic: Create fix branch - - name: create-branch - type: deterministic - dependsOn: [investigate] - command: git checkout -b fix/{{branch-name}} - - # Agent: Implement the fix - - name: patch - type: agent - agent: fixer - dependsOn: [create-branch] - task: | - Implement the fix based on the investigation report: - {{steps.investigate.output}} - retries: 2 - verification: - type: output_contains - value: PATCH_APPLIED - - # Deterministic: Run tests - - name: test - type: deterministic - dependsOn: [patch] - command: npm test - - # Agent: Fix test failures if any (with iteration limit) - - name: fix-if-broken - type: agent - agent: fixer - dependsOn: [test] - task: | - Review test results. If tests failed, fix them. If all passed, output TESTS_PASSED. - Test output: {{steps.test.output}} - maxIterations: 2 - verification: - type: output_contains - value: TESTS_PASSED - - # Deterministic: Commit - - name: commit - type: deterministic - dependsOn: [fix-if-broken] - command: 'git add -A && git commit -m "fix: {{steps.investigate.output | first-line}}"' - - # Agent: Verify no regressions - - name: regression-check - type: agent - agent: verifier - dependsOn: [commit] - task: | - Validate the patch for correctness and regression risk: - {{steps.patch.output}} - verification: - type: output_contains - value: VERIFICATION_COMPLETE - - # Deterministic: Push to remote - - name: push - type: deterministic - dependsOn: [regression-check] - command: git push origin fix/{{branch-name}} - - # Agent: Closeout - - name: closeout - type: agent - agent: lead - dependsOn: [push] - task: | - Prepare final incident summary, residual risk, and deployment notes. - verification: - type: output_contains - value: DONE -coordination: - barriers: - - name: fix-ready - waitFor: [investigate, patch, regression-check] - timeoutMs: 600000 -state: - backend: memory - ttlMs: 43200000 - namespace: bug-fix -errorHandling: - strategy: retry - maxRetries: 3 - retryDelayMs: 3000 - notifyChannel: swarm-bug-fix diff --git a/packages/sdk/src/workflows/builtin-templates/code-review.yaml b/packages/sdk/src/workflows/builtin-templates/code-review.yaml deleted file mode 100644 index f606f786e..000000000 --- a/packages/sdk/src/workflows/builtin-templates/code-review.yaml +++ /dev/null @@ -1,137 +0,0 @@ -version: '1.0' -name: code-review -description: 'Blueprint-style parallel code review with deterministic diff capture.' -swarm: - pattern: fan-out - maxConcurrency: 4 - timeoutMs: 2400000 - channel: swarm-code-review - idleNudge: - nudgeAfterMs: 120000 - escalateAfterMs: 120000 - maxNudges: 1 -agents: - - name: lead - cli: claude - role: 'Aggregates review output and final recommendations' - permissions: { access: full } - - name: reviewer-architecture - cli: codex - role: 'Assesses architecture and maintainability' - permissions: { access: readonly } - interactive: false - - name: reviewer-correctness - cli: claude - role: 'Assesses correctness and testing' - permissions: { access: readonly } - interactive: false - - name: reviewer-security - cli: gemini - role: 'Assesses security posture and abuse resistance' - permissions: { access: readonly } - interactive: false -workflows: - - name: parallel-review - description: 'Run focused reviews in parallel and synthesize final guidance.' - onError: fail - preflight: - - command: git diff --stat HEAD~1 2>/dev/null || git diff --stat 2>/dev/null || echo "No diff available" - description: 'Check there are changes to review' - steps: - # Deterministic: Capture diff for review - - name: capture-diff - type: deterministic - command: git diff HEAD~1 2>/dev/null || git diff 2>/dev/null || echo "No changes" - captureOutput: true - - # Deterministic: Capture file list - - name: capture-files - type: deterministic - dependsOn: [capture-diff] - command: git diff --name-only HEAD~1 2>/dev/null || git diff --name-only 2>/dev/null || echo "No files" - - # Agent: Prepare context - - name: prepare-context - type: agent - agent: lead - dependsOn: [capture-diff, capture-files] - task: | - Summarize change intent, impacted modules, and review priorities: - {{task}} - - Changed files: {{steps.capture-files.output}} - verification: - type: output_contains - value: REVIEW_CONTEXT_READY - - # Agent: Architecture review (parallel) - - name: architecture-pass - type: agent - agent: reviewer-architecture - dependsOn: [prepare-context] - task: | - Review architecture, coupling, and long-term maintainability: - {{steps.prepare-context.output}} - - Diff: - {{steps.capture-diff.output}} - verification: - type: output_contains - value: ARCH_REVIEW_COMPLETE - - # Agent: Correctness review (parallel) - - name: correctness-pass - type: agent - agent: reviewer-correctness - dependsOn: [prepare-context] - task: | - Review behavior, tests, and likely regression paths: - {{steps.prepare-context.output}} - - Diff: - {{steps.capture-diff.output}} - verification: - type: output_contains - value: CORRECTNESS_REVIEW_COMPLETE - - # Agent: Security review (parallel) - - name: security-pass - type: agent - agent: reviewer-security - dependsOn: [prepare-context] - task: | - Review attack surface, secret handling, and input validation: - {{steps.prepare-context.output}} - - Diff: - {{steps.capture-diff.output}} - verification: - type: output_contains - value: SECURITY_REVIEW_COMPLETE - - # Agent: Consolidate findings - - name: consolidate - type: agent - agent: lead - dependsOn: [architecture-pass, correctness-pass, security-pass] - task: | - Produce merged findings, severity levels, and final recommendation. - Architecture review: {{steps.architecture-pass.output}} - Correctness review: {{steps.correctness-pass.output}} - Security review: {{steps.security-pass.output}} - verification: - type: output_contains - value: DONE -coordination: - barriers: - - name: reviews-complete - waitFor: [architecture-pass, correctness-pass, security-pass] - timeoutMs: 900000 - consensusStrategy: majority -state: - backend: memory - ttlMs: 21600000 - namespace: code-review -errorHandling: - strategy: fail-fast - notifyChannel: swarm-code-review diff --git a/packages/sdk/src/workflows/builtin-templates/competitive.yaml b/packages/sdk/src/workflows/builtin-templates/competitive.yaml deleted file mode 100644 index ab8c41583..000000000 --- a/packages/sdk/src/workflows/builtin-templates/competitive.yaml +++ /dev/null @@ -1,107 +0,0 @@ -version: '1.0' -name: competitive -description: 'Multiple agents independently implement solutions, then compare and select the best approach.' -swarm: - pattern: competitive - maxConcurrency: 4 - timeoutMs: 5400000 - channel: swarm-competitive -agents: - - name: lead - cli: claude - role: 'Defines spec, judges implementations, and selects winner' - permissions: { access: readwrite } - - name: team-alpha - cli: claude - role: 'Independent implementation team A' - permissions: { access: readwrite } - - name: team-beta - cli: codex - role: 'Independent implementation team B' - permissions: { access: readwrite } - - name: team-gamma - cli: gemini - role: 'Independent implementation team C' - permissions: { access: readwrite } -workflows: - - name: competitive-build - description: 'Independent parallel implementations followed by comparison and selection.' - onError: fail - steps: - - name: define-spec - agent: lead - task: | - Define clear requirements, acceptance criteria, and evaluation rubric: - {{task}} - verification: - type: output_contains - value: SPEC_COMPLETE - - name: implement-alpha - agent: team-alpha - dependsOn: [define-spec] - task: | - Implement solution independently based on spec: - {{steps.define-spec.output}} - - Do not coordinate with other teams. Focus on your best approach. - verification: - type: output_contains - value: IMPLEMENTATION_COMPLETE - - name: implement-beta - agent: team-beta - dependsOn: [define-spec] - task: | - Implement solution independently based on spec: - {{steps.define-spec.output}} - - Do not coordinate with other teams. Focus on your best approach. - verification: - type: output_contains - value: IMPLEMENTATION_COMPLETE - - name: implement-gamma - agent: team-gamma - dependsOn: [define-spec] - task: | - Implement solution independently based on spec: - {{steps.define-spec.output}} - - Do not coordinate with other teams. Focus on your best approach. - verification: - type: output_contains - value: IMPLEMENTATION_COMPLETE - - name: compare-solutions - agent: lead - dependsOn: [implement-alpha, implement-beta, implement-gamma] - task: | - Compare all implementations against the evaluation rubric. - - Team Alpha: {{steps.implement-alpha.output}} - Team Beta: {{steps.implement-beta.output}} - Team Gamma: {{steps.implement-gamma.output}} - - Analyze trade-offs, strengths, and weaknesses of each approach. - verification: - type: output_contains - value: COMPARISON_COMPLETE - - name: select-winner - agent: lead - dependsOn: [compare-solutions] - task: | - Select the winning implementation or synthesize the best elements. - Provide rationale and integration plan. - verification: - type: output_contains - value: DONE -coordination: - barriers: - - name: implementations-complete - waitFor: [implement-alpha, implement-beta, implement-gamma] - timeoutMs: 3600000 - consensusStrategy: majority -state: - backend: memory - ttlMs: 21600000 - namespace: competitive -errorHandling: - strategy: continue - notifyChannel: swarm-competitive diff --git a/packages/sdk/src/workflows/builtin-templates/documentation.yaml b/packages/sdk/src/workflows/builtin-templates/documentation.yaml deleted file mode 100644 index a67306a86..000000000 --- a/packages/sdk/src/workflows/builtin-templates/documentation.yaml +++ /dev/null @@ -1,128 +0,0 @@ -version: '1.0' -name: documentation -description: 'Blueprint-style documentation workflow with deterministic file operations.' -swarm: - pattern: handoff - maxConcurrency: 1 - timeoutMs: 3000000 - channel: swarm-documentation - idleNudge: - nudgeAfterMs: 120000 - escalateAfterMs: 120000 - maxNudges: 1 -agents: - - name: lead - cli: claude - role: 'Owns final editorial sign-off' - - name: researcher - cli: codex - role: 'Collects technical context and source details' - interactive: false - - name: writer - cli: codex - role: 'Drafts user-facing documentation' - permissions: - access: readwrite - files: - write: ['docs/**', '*.md', 'web/content/**'] - interactive: false - - name: editor - cli: claude - role: 'Edits for accuracy, clarity, and structure' - permissions: - access: readwrite - files: - write: ['docs/**', '*.md', 'web/content/**'] -workflows: - - name: docs-production - description: 'Gather context, draft docs, edit, and publish summary.' - onError: skip - preflight: - - command: git status --porcelain - failIf: non-empty - description: 'Ensure working directory is clean' - steps: - # Deterministic: List existing docs - - name: list-docs - type: deterministic - command: find . -name "*.md" -o -name "*.mdx" 2>/dev/null | head -50 || echo "No docs found" - - # Agent: Gather context - - name: gather-context - type: agent - agent: researcher - dependsOn: [list-docs] - task: | - Collect source context and required updates: - {{task}} - - Existing documentation files: - {{steps.list-docs.output}} - verification: - type: output_contains - value: CONTEXT_COMPLETE - - # Deterministic: Create docs branch - - name: create-branch - type: deterministic - dependsOn: [gather-context] - command: git checkout -b docs/{{branch-name}} - - # Agent: Draft documentation - - name: draft - type: agent - agent: writer - dependsOn: [create-branch] - task: | - Draft documentation updates based on gathered context: - {{steps.gather-context.output}} - verification: - type: output_contains - value: DRAFT_COMPLETE - - # Agent: Edit draft - - name: edit - type: agent - agent: editor - dependsOn: [draft] - task: | - Edit the draft for technical accuracy and readability: - {{steps.draft.output}} - verification: - type: output_contains - value: EDIT_COMPLETE - - # Deterministic: Commit docs - - name: commit - type: deterministic - dependsOn: [edit] - command: 'git add -A && git commit -m "docs: {{steps.gather-context.output | first-line}}"' - - # Deterministic: Push - - name: push - type: deterministic - dependsOn: [commit] - command: git push origin docs/{{branch-name}} - - # Agent: Publish summary - - name: publish-summary - type: agent - agent: lead - dependsOn: [push] - task: | - Publish a final summary of documentation changes and open items. - verification: - type: output_contains - value: DONE -coordination: - barriers: - - name: docs-ready - waitFor: [gather-context, draft, edit] - timeoutMs: 600000 -state: - backend: memory - ttlMs: 259200000 - namespace: documentation -errorHandling: - strategy: continue - notifyChannel: swarm-documentation diff --git a/packages/sdk/src/workflows/builtin-templates/feature-dev.yaml b/packages/sdk/src/workflows/builtin-templates/feature-dev.yaml deleted file mode 100644 index 83d6dd72e..000000000 --- a/packages/sdk/src/workflows/builtin-templates/feature-dev.yaml +++ /dev/null @@ -1,146 +0,0 @@ -version: '1.0' -name: feature-dev -description: 'Blueprint-style feature development with deterministic quality gates.' -swarm: - pattern: hub-spoke - maxConcurrency: 2 - timeoutMs: 3600000 - channel: swarm-feature-dev - idleNudge: - nudgeAfterMs: 120000 - escalateAfterMs: 120000 - maxNudges: 1 -agents: - - name: lead - cli: claude - role: 'Lead engineer coordinating delivery' - permissions: { access: full } - - name: planner - cli: codex - role: 'Plans implementation and acceptance criteria' - permissions: { access: readonly } - interactive: false - - name: developer - cli: codex - role: 'Implements planned changes' - permissions: { access: readwrite } - interactive: false - - name: reviewer - cli: claude - role: 'Reviews code quality and release risk' - permissions: { access: readonly } -workflows: - - name: feature-delivery - description: 'Plan, implement, review, and finalize a feature request with quality gates.' - onError: retry - preflight: - - command: git status --porcelain - failIf: non-empty - description: 'Ensure working directory is clean' - - command: npm run type-check 2>/dev/null || echo "skip" - description: 'Run type checking if available' - steps: - # Agent: Planning - - name: plan - type: agent - agent: planner - task: | - Analyze the feature request and produce a concrete implementation plan: - {{task}} - retries: 1 - verification: - type: output_contains - value: PLAN_COMPLETE - - # Deterministic: Create feature branch - - name: create-branch - type: deterministic - dependsOn: [plan] - command: git checkout -b feature/{{branch-name}} - - # Agent: Implementation - - name: implement - type: agent - agent: developer - dependsOn: [create-branch] - task: | - Implement the approved plan: - {{steps.plan.output}} - retries: 1 - verification: - type: output_contains - value: IMPLEMENTATION_COMPLETE - - # Deterministic: Lint - - name: lint - type: deterministic - dependsOn: [implement] - command: npm run lint:fix 2>/dev/null || npm run lint 2>/dev/null || echo "No lint configured" - - # Deterministic: Run tests - - name: test - type: deterministic - dependsOn: [lint] - command: npm test 2>/dev/null || echo "No tests configured" - - # Agent: Fix any failures (with iteration limit) - - name: fix-failures - type: agent - agent: developer - dependsOn: [test] - task: | - Review test results and fix any failures. If all tests passed, output TESTS_PASSED. - Test output: {{steps.test.output}} - maxIterations: 2 - verification: - type: output_contains - value: TESTS_PASSED - - # Deterministic: Stage and commit - - name: commit - type: deterministic - dependsOn: [fix-failures] - command: 'git add -A && git commit -m "feat: {{steps.plan.output | first-line}}"' - - # Agent: Code review - - name: review - type: agent - agent: reviewer - dependsOn: [commit] - task: | - Review implementation quality, correctness, and test coverage: - {{steps.implement.output}} - verification: - type: output_contains - value: REVIEW_COMPLETE - - # Deterministic: Push to remote - - name: push - type: deterministic - dependsOn: [review] - command: git push origin feature/{{branch-name}} - - # Agent: Finalize - - name: finalize - type: agent - agent: lead - dependsOn: [push] - task: | - Summarize decisions and ship readiness for the feature. - verification: - type: output_contains - value: DONE -coordination: - barriers: - - name: delivery-ready - waitFor: [plan, implement, review] - timeoutMs: 900000 -state: - backend: memory - ttlMs: 86400000 - namespace: feature-dev -errorHandling: - strategy: retry - maxRetries: 2 - retryDelayMs: 5000 - notifyChannel: swarm-feature-dev diff --git a/packages/sdk/src/workflows/builtin-templates/refactor.yaml b/packages/sdk/src/workflows/builtin-templates/refactor.yaml deleted file mode 100644 index c7ca5e713..000000000 --- a/packages/sdk/src/workflows/builtin-templates/refactor.yaml +++ /dev/null @@ -1,145 +0,0 @@ -version: '1.0' -name: refactor -description: 'Blueprint-style refactor workflow with deterministic quality gates.' -swarm: - pattern: hierarchical - maxConcurrency: 2 - timeoutMs: 4500000 - channel: swarm-refactor - idleNudge: - nudgeAfterMs: 120000 - escalateAfterMs: 120000 - maxNudges: 1 -agents: - - name: lead - cli: claude - role: 'Owns scope, sequencing, and acceptance' - permissions: { access: full } - - name: architect - cli: codex - role: 'Designs target architecture and migration plan' - permissions: { access: readwrite } - interactive: false - - name: refactorer - cli: codex - role: 'Executes scoped refactor changes' - permissions: { access: readwrite } - interactive: false - - name: tester - cli: claude - role: 'Validates behavior parity and risk' - permissions: { access: readonly } -workflows: - - name: refactor-execution - description: 'Analyze current system, design approach, refactor, and validate.' - onError: retry - preflight: - - command: git status --porcelain - failIf: non-empty - description: 'Ensure working directory is clean' - - command: npm test 2>/dev/null || echo "baseline" - description: 'Capture baseline test results' - steps: - # Agent: Analyze current design - - name: analyze - type: agent - agent: architect - task: | - Analyze current design and identify refactor opportunities: - {{task}} - verification: - type: output_contains - value: ANALYSIS_COMPLETE - - # Agent: Design refactor plan - - name: design - type: agent - agent: architect - dependsOn: [analyze] - task: | - Provide incremental refactor plan with rollback notes: - {{steps.analyze.output}} - verification: - type: output_contains - value: PLAN_COMPLETE - - # Deterministic: Create refactor branch - - name: create-branch - type: deterministic - dependsOn: [design] - command: git checkout -b refactor/{{branch-name}} - - # Agent: Execute refactor - - name: refactor-code - type: agent - agent: refactorer - dependsOn: [create-branch] - task: | - Execute the refactor plan while preserving behavior: - {{steps.design.output}} - retries: 2 - verification: - type: output_contains - value: REFACTOR_COMPLETE - - # Deterministic: Run linting - - name: lint - type: deterministic - dependsOn: [refactor-code] - command: npm run lint:fix 2>/dev/null || npm run lint 2>/dev/null || echo "No lint configured" - - # Deterministic: Run tests - - name: test - type: deterministic - dependsOn: [lint] - command: npm test - - # Agent: Validate behavior parity - - name: validate - type: agent - agent: tester - dependsOn: [test] - task: | - Validate no regressions and ensure tests/quality checks pass: - {{steps.refactor-code.output}} - Test results: {{steps.test.output}} - verification: - type: output_contains - value: VALIDATION_COMPLETE - - # Deterministic: Commit - - name: commit - type: deterministic - dependsOn: [validate] - command: 'git add -A && git commit -m "refactor: {{steps.design.output | first-line}}"' - - # Deterministic: Push - - name: push - type: deterministic - dependsOn: [commit] - command: git push origin refactor/{{branch-name}} - - # Agent: Handoff - - name: handoff - type: agent - agent: lead - dependsOn: [push] - task: | - Produce final refactor summary and open follow-up items. - verification: - type: output_contains - value: DONE -coordination: - barriers: - - name: refactor-ready - waitFor: [analyze, design, refactor-code, validate] - timeoutMs: 900000 -state: - backend: memory - ttlMs: 604800000 - namespace: refactor -errorHandling: - strategy: retry - maxRetries: 2 - retryDelayMs: 5000 - notifyChannel: swarm-refactor diff --git a/packages/sdk/src/workflows/builtin-templates/review-loop.yaml b/packages/sdk/src/workflows/builtin-templates/review-loop.yaml deleted file mode 100644 index 6a8ef35ef..000000000 --- a/packages/sdk/src/workflows/builtin-templates/review-loop.yaml +++ /dev/null @@ -1,227 +0,0 @@ -version: '1.0' -name: review-loop -description: 'Implement a task with automated multi-perspective code review loop. Inspired by claude-review-loop pattern.' -swarm: - pattern: review-loop - maxConcurrency: 4 - timeoutMs: 3600000 - channel: swarm-review-loop - idleNudge: - nudgeAfterMs: 180000 - escalateAfterMs: 180000 - maxNudges: 2 -agents: - - name: implementer - cli: claude - role: 'Senior developer implementing the task and addressing review feedback' - permissions: { access: full } - - name: reviewer-diff - cli: codex - role: 'Code quality reviewer focusing on git diff, tests, and potential bugs' - permissions: { access: readonly } - interactive: false - - name: reviewer-architecture - cli: claude - role: 'Architecture and design reviewer assessing structure and maintainability' - permissions: { access: readonly } - interactive: false - - name: reviewer-security - cli: codex - role: 'Security reviewer checking for OWASP Top 10 vulnerabilities' - permissions: { access: readonly } - interactive: false -workflows: - - name: review-loop-workflow - description: 'Implement task, run parallel reviews, consolidate feedback, and address issues.' - onError: fail - steps: - # Phase 1: Implementation - - name: implement - type: agent - agent: implementer - task: | - Implement the following task: - {{task}} - - When complete, output: IMPLEMENTATION COMPLETE - verification: - type: output_contains - value: IMPLEMENTATION COMPLETE - - # Deterministic: Capture diff for review - - name: capture-diff - type: deterministic - dependsOn: [implement] - command: git diff HEAD~1 2>/dev/null || git diff 2>/dev/null || echo "No changes" - captureOutput: true - - # Deterministic: Capture file list - - name: capture-files - type: deterministic - dependsOn: [capture-diff] - command: git diff --name-only HEAD~1 2>/dev/null || git diff --name-only 2>/dev/null || echo "No files" - captureOutput: true - - # Phase 2: Parallel Reviews (fan-out) - - name: review-diff - type: agent - agent: reviewer-diff - dependsOn: [capture-files] - task: | - Review the git diff for code quality issues: - - Focus areas: - - Code readability and clarity - - Test coverage (are new features tested?) - - Potential bugs or edge cases - - Error handling completeness - - Changed files: {{steps.capture-files.output}} - - Diff: - {{steps.capture-diff.output}} - - Output format: - - If all looks good: REVIEW:PASS - - If issues found: REVIEW:ISSUES followed by numbered list of issues - verification: - type: output_contains - value: 'REVIEW:' - - - name: review-architecture - type: agent - agent: reviewer-architecture - dependsOn: [capture-files] - task: | - Review the architecture and design: - - Focus areas: - - Design patterns and best practices - - Separation of concerns - - Code organization and maintainability - - API design (if applicable) - - Changed files: {{steps.capture-files.output}} - - Diff: - {{steps.capture-diff.output}} - - Output format: - - If all looks good: REVIEW:PASS - - If issues found: REVIEW:ISSUES followed by numbered list of issues - verification: - type: output_contains - value: 'REVIEW:' - - - name: review-security - type: agent - agent: reviewer-security - dependsOn: [capture-files] - task: | - Security review for OWASP Top 10 vulnerabilities: - - Focus areas: - - Injection vulnerabilities (SQL, command, XSS) - - Authentication and authorization issues - - Sensitive data exposure - - Security misconfiguration - - Input validation - - Changed files: {{steps.capture-files.output}} - - Diff: - {{steps.capture-diff.output}} - - Output format: - - If secure: REVIEW:PASS - - If vulnerabilities found: REVIEW:ISSUES followed by numbered list with severity - verification: - type: output_contains - value: 'REVIEW:' - - # Phase 3: Consolidate reviews - - name: consolidate - type: agent - agent: implementer - dependsOn: [review-diff, review-architecture, review-security] - task: | - Review all feedback from the code reviewers and consolidate findings: - - ## Diff Review - {{steps.review-diff.output}} - - ## Architecture Review - {{steps.review-architecture.output}} - - ## Security Review - {{steps.review-security.output}} - - Tasks: - 1. Analyze each review's findings - 2. Identify which issues are valid and should be addressed - 3. Note any conflicting feedback - 4. Create a prioritized action plan - - Output: CONSOLIDATED with summary of issues to address (or NO_ISSUES if all reviews passed) - verification: - type: output_contains - value: CONSOLIDATED - - # Phase 4: Address feedback (the loop) - - name: address-feedback - type: agent - agent: implementer - dependsOn: [consolidate] - task: | - Address the consolidated review feedback: - - {{steps.consolidate.output}} - - For each valid issue: - 1. Make the necessary code changes - 2. Explain what was fixed and why - - If there were no issues to address, confirm the implementation is complete. - - Output: ADDRESSED followed by summary of changes made (or NO_CHANGES_NEEDED) - verification: - type: output_contains - value: ADDRESSED - retries: 2 - maxIterations: 3 - - # Final step: Completion summary - - name: complete - type: agent - agent: implementer - dependsOn: [address-feedback] - task: | - Provide a final summary of the completed work: - - Original task: {{task}} - - Include: - 1. What was implemented - 2. Key decisions made - 3. Review feedback that was addressed - 4. Any remaining considerations or follow-up items - - Output: DONE - verification: - type: output_contains - value: DONE - -coordination: - barriers: - - name: reviews-complete - waitFor: [review-diff, review-architecture, review-security] - timeoutMs: 900000 - consensusStrategy: majority -state: - backend: memory - ttlMs: 21600000 - namespace: review-loop -errorHandling: - strategy: continue - maxRetries: 2 - notifyChannel: swarm-review-loop diff --git a/packages/sdk/src/workflows/builtin-templates/security-audit.yaml b/packages/sdk/src/workflows/builtin-templates/security-audit.yaml deleted file mode 100644 index 4c2acd226..000000000 --- a/packages/sdk/src/workflows/builtin-templates/security-audit.yaml +++ /dev/null @@ -1,139 +0,0 @@ -version: '1.0' -name: security-audit -description: 'Blueprint-style security assessment with deterministic scanning and agent triage.' -swarm: - pattern: pipeline - maxConcurrency: 1 - timeoutMs: 5400000 - channel: swarm-security-audit - idleNudge: - nudgeAfterMs: 120000 - escalateAfterMs: 120000 - maxNudges: 1 -agents: - - name: lead - cli: claude - role: 'Owns final risk sign-off and recommendations' - permissions: { access: full } - - name: analyst - cli: claude - role: 'Prioritizes findings and recommends mitigations' - permissions: - access: readonly - files: - deny: ['.env', 'secrets/**', '*.pem', '*.key'] - - name: remediator - cli: codex - role: 'Implements approved remediations' - permissions: { access: readwrite } - interactive: false - - name: verifier - cli: gemini - role: 'Verifies fixes and residual exposure' - permissions: - access: readonly - files: - deny: ['.env', 'secrets/**', '*.pem', '*.key'] -workflows: - - name: audit-pipeline - description: 'Scan, triage, remediate, verify, and report security posture.' - onError: fail - preflight: - - command: npm audit --json 2>/dev/null | head -100 || echo "{}" - description: 'Run npm audit preflight check' - - command: git diff --check 2>/dev/null || echo "clean" - description: 'Check for whitespace errors' - steps: - # Deterministic: Run npm audit - - name: scan-npm - type: deterministic - command: npm audit --json 2>/dev/null || echo '{"vulnerabilities":{}}' - captureOutput: true - - # Deterministic: Run additional security scans if available - - name: scan-extra - type: deterministic - dependsOn: [scan-npm] - command: | - if command -v semgrep &> /dev/null; then - semgrep --config auto --json . 2>/dev/null || echo '{"results":[]}' - else - echo '{"results":[],"note":"semgrep not installed"}' - fi - - # Agent: Triage findings - - name: triage - type: agent - agent: analyst - dependsOn: [scan-npm, scan-extra] - task: | - Prioritize security findings by severity and exploitability: - - NPM Audit: {{steps.scan-npm.output}} - Additional Scans: {{steps.scan-extra.output}} - - Task context: {{task}} - verification: - type: output_contains - value: TRIAGE_COMPLETE - - # Agent: Implement remediations - - name: remediate - type: agent - agent: remediator - dependsOn: [triage] - task: | - Implement mitigations for approved findings: - {{steps.triage.output}} - retries: 1 - verification: - type: output_contains - value: REMEDIATION_COMPLETE - - # Deterministic: Re-run tests - - name: test - type: deterministic - dependsOn: [remediate] - command: npm test 2>/dev/null || echo "No tests configured" - - # Agent: Verify fixes - - name: verify - type: agent - agent: verifier - dependsOn: [test] - task: | - Re-test security posture and confirm mitigations hold: - {{steps.remediate.output}} - Test results: {{steps.test.output}} - verification: - type: output_contains - value: VERIFICATION_COMPLETE - - # Deterministic: Commit security fixes - - name: commit - type: deterministic - dependsOn: [verify] - command: 'git add -A && git commit -m "security: address vulnerabilities from audit" 2>/dev/null || echo "No changes to commit"' - - # Agent: Final report - - name: report - type: agent - agent: lead - dependsOn: [commit] - task: | - Produce final audit report with residual risk and next actions. - verification: - type: output_contains - value: DONE -coordination: - barriers: - - name: audit-complete - waitFor: [scan-npm, triage, remediate, verify] - timeoutMs: 1200000 -state: - backend: memory - ttlMs: 86400000 - namespace: security-audit -errorHandling: - strategy: fail-fast - notifyChannel: swarm-security-audit diff --git a/packages/sdk/src/workflows/channel-messenger.ts b/packages/sdk/src/workflows/channel-messenger.ts deleted file mode 100644 index d5a9a14f3..000000000 --- a/packages/sdk/src/workflows/channel-messenger.ts +++ /dev/null @@ -1,335 +0,0 @@ -import { stripAnsi as stripAnsiFn } from '../pty.js'; -import type { StepOutcome } from './trajectory.js'; -import type { AgentDefinition, WorkflowStepRow } from './types.js'; - -type StepStateLike = { - row: Pick; -}; - -export interface ChannelRelayLike { - send(to: string, text: string): Promise; -} - -export interface ChannelMessengerOptions { - postFn?: (text: string) => void; -} - -export async function sendToChannel( - relay: ChannelRelayLike, - channel: string, - message: string -): Promise { - await relay.send(channel, message); -} - -export function truncateMessage(message: string, maxLength: number): string { - if (maxLength <= 0) return ''; - return message.length > maxLength ? message.slice(-maxLength) : message; -} - -export function formatStepOutput(stepName: string, output: string, maxLength = 2000): string { - const scrubbed = scrubForChannel(output); - if (scrubbed.length === 0) { - return `**[${stepName}]** Step completed — output written to disk`; - } - - const preview = truncateMessage(scrubbed, maxLength); - return `**[${stepName}] Output:**\n\`\`\`\n${preview}\n\`\`\``; -} - -export function formatError(stepName: string, error: unknown): string { - const raw = error instanceof Error ? error.message : String(error); - // Strip absolute paths that could leak internal directory structure - const message = raw.replace(/(?:\/[\w.-]+){3,}/g, '[path]'); - return `**[${stepName}]** Failed: ${message}`; -} - -// Common secret patterns to redact from channel output. -const SECRET_PATTERNS = [ - /(?:api[_-]?key|apikey|secret[_-]?key|access[_-]?token|auth[_-]?token|bearer)\s*[:=]\s*\S+/gi, - /(?:sk|pk|rk|ak)[-_][a-zA-Z0-9]{20,}/g, - /ghp_[a-zA-Z0-9]{36,}/g, - /gho_[a-zA-Z0-9]{36,}/g, - /github_pat_[a-zA-Z0-9_]{22,}/g, - /xox[bpors]-[a-zA-Z0-9-]+/g, - /-----BEGIN\s+(?:RSA\s+)?PRIVATE\s+KEY-----[\s\S]*?-----END/g, -]; - -// Unicode spinner / ornament characters used by Claude TUI animations. -// Includes block-element chars (▗▖▘▝) used in the Claude Code header bar. -const SPINNER = - '\\u2756\\u2738\\u2739\\u273a\\u273b\\u273c\\u273d\\u2731\\u2732\\u2733\\u2734\\u2735\\u2736\\u2737\\u2743\\u2745\\u2746\\u25d6\\u25d7\\u25d8\\u25d9\\u2022\\u25cf\\u25cb\\u25a0\\u25a1\\u25b6\\u25c0\\u23f5\\u23f6\\u23f7\\u23f8\\u23f9\\u25e2\\u25e3\\u25e4\\u25e5\\u2597\\u2596\\u2598\\u259d\\u2bc8\\u2bc7\\u2bc5\\u2bc6\\u00b7' + - '\\u2590\\u258c\\u2588\\u2584\\u2580\\u259a\\u259e' + - '\\u2b21\\u2b22'; - -// Pre-compiled regex constants — hoisted to module level to avoid recompilation per call. -const SPINNER_RE = new RegExp(`[${SPINNER}]`, 'gu'); -const SPINNER_CLASS_RE = new RegExp(`^[\\s${SPINNER}]*$`, 'u'); -const BOX_DRAWING_ONLY_RE = /^[\s\u2500-\u257f\u2580-\u259f\u25a0-\u25ff\-_=~]{3,}$/u; -const BROKER_LOG_RE = /^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+Z\s+(?:INFO|WARN|ERROR|DEBUG)\s/u; -const CLAUDE_HEADER_RE = - /^(?:[\s\u2580-\u259f✢*·▗▖▘▝]+\s*)?(?:Claude\s+Code(?:\s+v?[\d.]+)?|(?:Sonnet|Haiku|Opus)\s*[\d.]+|claude-(?:sonnet|haiku|opus)-[\w.-]+|Running\s+on\s+claude)/iu; -const DIR_BREADCRUMB_RE = /^\s*~[\\/]/u; -const UI_HINT_RE = - /\b(?:Press\s*up\s*to\s*edit|tab\s*to\s*queue|bypass\s*permissions|esc\s*to\s*interrupt|paste\s*again\s*to\s*expand|shift\s*[+]?\s*tab\s*to\s*cycle|running\s+stop\s+hook|fan\s+out\s+subagents)/iu; -const VIM_MODE_RE = - /^[-\s]*--?(?:INSERT|NORMAL|VISUAL|REPLACE)--?[-\s]*$|--?(?:INSERT|NORMAL|VISUAL|REPLACE)--/u; -const CLAUDE_FOOTER_RE = - /(?:Opus|Sonnet|Haiku)\s*\d[\d.]*\s*\(?(?:1M\s*context|context)?\)?\s*ctx\s*:\s*\d+%/iu; -const THINKING_LINE_RE = new RegExp(`^[\\s${SPINNER}]*\\s*\\w[\\w\\s]*\\u2026\\s*$`, 'u'); -const THINKING_STATUS_RE = - /\b(?:thinking\s+(?:with\s+\w+\s+effort|more\s+with|harder)|↓\s*\d+\s*tokens?\b|↑\s*\d+\s*tokens?\b|crunched\s+for\s+\d|sautéed\s+for\s+\d|befuddl|flibbertigib|gitifying|flowing\s*…)/iu; -const CURSOR_ONLY_RE = /^[\s❯⎿›»◀▶←→↑↓⟨⟩⟪⟫·]+$/u; -const CURSOR_AGENT_RE = - /^(?:Cursor Agent|[\s⬡⬢]*Generating[.\s]|\[Pasted text|Auto-run all|Add a follow-up|ctrl\+c to stop|shift\+tab|Auto$|\/\s*commands|@\s*files|!\s*shell|follow-ups?\s|The user ha)/iu; -const SLASH_COMMAND_RE = /^\/\w+\s*$/u; -const MCP_JSON_KV_RE = - /^\s*"(?:type|method|params|result|id|jsonrpc|tool|name|arguments|content|role|metadata)"\s*:/u; -const MEANINGFUL_CONTENT_RE = /[a-zA-Z0-9]/u; -const MALFORMED_PTY_FRAME_RUN_RE = /(?:(?:qW0|q[A-Za-z]?0|[lmjkx]q{2,}|q{2,}[lmjkx]?)[\s|/_=\-~]*){4,}/giu; -const MALFORMED_PTY_FRAME_ONLY_RE = /^[\s|/_=\-~lmjkxqtwuvn0W]{12,}$/iu; - -export function scrubSecrets(text: string): string { - let result = text; - for (const pattern of SECRET_PATTERNS) { - result = result.replace(pattern, '[REDACTED]'); - } - return result; -} - -function stripMalformedPtyFrameGarbage(line: string): string { - const strippedRuns = line.replace(MALFORMED_PTY_FRAME_RUN_RE, ' '); - const compact = strippedRuns.replace(SPINNER_RE, '').replace(/\s+/g, ''); - if (compact.length >= 12 && MALFORMED_PTY_FRAME_ONLY_RE.test(compact)) { - return ''; - } - return strippedRuns; -} - -export function scrubForChannel(text: string): string { - // Strip system-reminder blocks (closed or unclosed) iteratively to avoid - // polynomial backtracking (ReDoS) with [\s\S]*? on adversarial input. - let withoutSystemReminders = text; - const openTag = ''; - const closeTag = ''; - let idx: number; - while ((idx = withoutSystemReminders.toLowerCase().indexOf(openTag)) !== -1) { - const closeIdx = withoutSystemReminders.toLowerCase().indexOf(closeTag, idx + openTag.length); - if (closeIdx !== -1) { - withoutSystemReminders = - withoutSystemReminders.slice(0, idx) + withoutSystemReminders.slice(closeIdx + closeTag.length); - } else { - // Unclosed tag — strip everything from the opening tag onward - withoutSystemReminders = withoutSystemReminders.slice(0, idx); - break; - } - } - - // Normalize CRLF and bare \r before stripping ANSI — PTY output often - // contains \r\r\n which leaves stray \r after stripping that confuse line splitting. - const normalized = withoutSystemReminders.replace(/\r\n/g, '\n').replace(/\r/g, '\n'); - const ansiStripped = stripAnsiFn(normalized); - - // Redact secrets before further processing - const secretsRedacted = scrubSecrets(ansiStripped); - - const countJsonDepth = (line: string): number => { - let depth = 0; - for (const ch of line) { - if (ch === '{' || ch === '[') depth += 1; - if (ch === '}' || ch === ']') depth -= 1; - } - return depth; - }; - - const lines = secretsRedacted.split('\n'); - const meaningful: string[] = []; - let jsonDepth = 0; - - for (const line of lines) { - const cleanedLine = stripMalformedPtyFrameGarbage(line); - const trimmed = cleanedLine.trim(); - - if (jsonDepth > 0) { - jsonDepth += countJsonDepth(cleanedLine); - if (jsonDepth <= 0) jsonDepth = 0; - continue; - } - - if (trimmed.length === 0) continue; - - if (trimmed.startsWith('{') || /^\[\s*\{/.test(trimmed)) { - jsonDepth = Math.max(countJsonDepth(cleanedLine), 0); - continue; - } - - if (MCP_JSON_KV_RE.test(cleanedLine)) continue; - if (SPINNER_CLASS_RE.test(trimmed)) continue; - if (BOX_DRAWING_ONLY_RE.test(trimmed)) continue; - if (BROKER_LOG_RE.test(trimmed)) continue; - if (CLAUDE_HEADER_RE.test(trimmed)) continue; - if (DIR_BREADCRUMB_RE.test(trimmed)) continue; - if (UI_HINT_RE.test(trimmed)) continue; - if (VIM_MODE_RE.test(trimmed)) continue; - if (CLAUDE_FOOTER_RE.test(trimmed)) continue; - if (THINKING_LINE_RE.test(trimmed)) continue; - if (THINKING_STATUS_RE.test(trimmed)) continue; - if (CURSOR_ONLY_RE.test(trimmed)) continue; - if (CURSOR_AGENT_RE.test(trimmed)) continue; - if (SLASH_COMMAND_RE.test(trimmed)) continue; - if (!MEANINGFUL_CONTENT_RE.test(trimmed)) continue; - - const alphanum = trimmed.replace(SPINNER_RE, '').replace(/\s+/g, ''); - if (alphanum.replace(/[^a-zA-Z0-9]/g, '').length <= 3) continue; - - meaningful.push(cleanedLine); - } - - return meaningful - .join('\n') - .replace(/\n{3,}/g, '\n\n') - .trim(); -} - -export class ChannelMessenger { - private readonly postFn?: (text: string) => void; - - constructor(options: ChannelMessengerOptions = {}) { - this.postFn = options.postFn; - } - - buildNonInteractiveAwareness( - agentMap: Map, - stepStates: Map - ): string | undefined { - const nonInteractive = [...agentMap.values()].filter((agent) => agent.interactive === false); - if (nonInteractive.length === 0) return undefined; - - const agentToSteps = new Map(); - for (const [stepName, state] of stepStates) { - const agentName = state.row.agentName; - if (!agentName) continue; - if (!agentToSteps.has(agentName)) agentToSteps.set(agentName, []); - agentToSteps.get(agentName)!.push(stepName); - } - - const lines = nonInteractive.map((agent) => { - const stepRefs = (agentToSteps.get(agent.name) ?? []).map((stepName) => `{{steps.${stepName}.output}}`); - return ( - `- ${agent.name} (${agent.cli}) — will return output when complete` + - (stepRefs.length > 0 ? `. Access via: ${stepRefs.join(', ')}` : '') - ); - }); - - return ( - '\n\n---\n' + - 'Note: The following agents are non-interactive workers and cannot receive messages:\n' + - lines.join('\n') + - '\n' + - 'Do NOT attempt to message these agents. Use the {{steps..output}} references above to access their results.' - ); - } - - buildRelayRegistrationNote(cli: string, agentName: string): string { - if (cli === 'claude') return ''; - return ( - '---\n' + - 'RELAY SETUP — do this FIRST before any other relay tool:\n' + - `1. Call: register_agent(name="${agentName}")\n` + - ' This authenticates you in the Relaycast workspace.\n' + - ' ALL relay tools (mcp__relaycast__send_dm, mcp__relaycast__check_inbox, mcp__relaycast__post_message, etc.) require\n' + - ' registration first — they will fail with "Not registered" otherwise.\n' + - `2. Your agent name is "${agentName}" — use this exact name when registering.` - ); - } - - buildDelegationGuidance(cli: string, timeoutMs?: number): string { - const timeoutNote = timeoutMs - ? `You have approximately ${Math.round(timeoutMs / 60000)} minutes before this step times out. ` + - 'Plan accordingly — delegate early if the work is substantial.\n\n' - : ''; - const subAgentOption = - cli === 'claude' - ? 'Option 2 — Use built-in sub-agents (Task tool) for research or scoped work:\n' + - ' - Good for exploring code, reading files, or making targeted changes\n' + - ' - Can run multiple sub-agents in parallel\n\n' - : ''; - - return ( - '---\n' + - 'AUTONOMOUS DELEGATION — READ THIS BEFORE STARTING:\n' + - timeoutNote + - 'Before diving in, assess whether this task is too large or complex for a single agent. ' + - 'If it involves multiple independent subtasks, touches many files, or could take a long time, ' + - 'you should break it down and delegate to helper agents to avoid timeouts.\n\n' + - 'Option 1 — Spawn relay agents (for real parallel coding work):\n' + - ' - mcp__relaycast__add_agent(name="helper-1", cli="claude", task="Specific subtask description")\n' + - ' - Coordinate via mcp__relaycast__send_dm(to="helper-1", text="...")\n' + - ' - Check on them with mcp__relaycast__check_inbox()\n' + - ' - Clean up when done: mcp__relaycast__remove_agent(name="helper-1")\n\n' + - subAgentOption + - 'Guidelines:\n' + - '- You are the lead — delegate but stay in control, track progress, integrate results\n' + - '- Give each helper a clear, self-contained task with enough context to work independently\n' + - "- For simple or quick work, just do it yourself — don't over-delegate\n" + - '- Always release spawned relay agents when their work is complete\n' + - '- When spawning non-claude agents (codex, gemini, etc.), prepend to their task:\n' + - ' "RELAY SETUP: First call register_agent(name=\'\') before any other relay tool."' - ); - } - - postCompletionReport( - workflowName: string, - outcomes: StepOutcome[], - summary: string, - confidence: number - ): void { - const completed = outcomes.filter((outcome) => outcome.status === 'completed'); - const skipped = outcomes.filter((outcome) => outcome.status === 'skipped'); - const retried = outcomes.filter((outcome) => outcome.attempts > 1); - - const lines: string[] = [ - `## Workflow **${workflowName}** — Complete`, - '', - summary, - `Confidence: ${Math.round(confidence * 100)}%`, - '', - '### Steps', - ...completed.map( - (outcome) => - `- **${outcome.name}** (${outcome.agent}) — passed${outcome.verificationPassed ? ' (verified)' : ''}${outcome.attempts > 1 ? ` after ${outcome.attempts} attempts` : ''}` - ), - ...skipped.map((outcome) => `- **${outcome.name}** — skipped`), - ]; - - if (retried.length > 0) { - lines.push('', '### Retries'); - for (const outcome of retried) { - lines.push(`- ${outcome.name}: ${outcome.attempts} attempts`); - } - } - - this.postFn?.(lines.join('\n')); - } - - postFailureReport(workflowName: string, outcomes: StepOutcome[], errorMsg: string): void { - const completed = outcomes.filter((outcome) => outcome.status === 'completed'); - const failed = outcomes.filter((outcome) => outcome.status === 'failed'); - const skipped = outcomes.filter((outcome) => outcome.status === 'skipped'); - - const lines: string[] = [ - `## Workflow **${workflowName}** — Failed`, - '', - `${completed.length}/${outcomes.length} steps passed. Error: ${errorMsg}`, - '', - '### Steps', - ...completed.map((outcome) => `- **${outcome.name}** (${outcome.agent}) — passed`), - ...failed.map( - (outcome) => `- **${outcome.name}** (${outcome.agent}) — FAILED: ${outcome.error ?? 'unknown'}` - ), - ...skipped.map((outcome) => `- **${outcome.name}** — skipped`), - ]; - - this.postFn?.(lines.join('\n')); - } -} diff --git a/packages/sdk/src/workflows/cli-session-collector.ts b/packages/sdk/src/workflows/cli-session-collector.ts deleted file mode 100644 index 2e7deaf3f..000000000 --- a/packages/sdk/src/workflows/cli-session-collector.ts +++ /dev/null @@ -1,58 +0,0 @@ -import type { AgentCli } from './types.js'; -import { ClaudeCodeCollector } from './collectors/claude.js'; -import { CodexCollector } from './collectors/codex.js'; -import { OpenCodeCollector } from './collectors/opencode.js'; - -export interface CliSessionReport { - cli: AgentCli; - sessionId: string | null; - model: string | null; - provider: string | null; - durationMs: number | null; - cost: number | null; - tokens: { - input: number; - output: number; - cacheRead: number; - } | null; - turns: number; - toolCalls: { name: string; count: number }[]; - errors: { turn: number; text: string }[]; - finalStatus: 'completed' | 'failed' | 'unknown'; - summary: string | null; - raw?: object; -} - -export interface CliSessionQuery { - cli: AgentCli; - cwd: string; - startedAt: number; - completedAt: number; -} - -export interface CliSessionCollector { - canCollect(): boolean; - collect(query: CliSessionQuery): Promise; -} - -export function createCollector(cli: AgentCli): CliSessionCollector | null { - switch (cli) { - case 'opencode': - return new OpenCodeCollector(); - case 'claude': - return new ClaudeCodeCollector(); - case 'codex': - return new CodexCollector(); - default: - return null; - } -} - -export async function collectCliSession(query: CliSessionQuery): Promise { - const collector = createCollector(query.cli); - if (!collector || !collector.canCollect()) { - return null; - } - - return collector.collect(query); -} diff --git a/packages/sdk/src/workflows/cli.ts b/packages/sdk/src/workflows/cli.ts deleted file mode 100644 index 3c2b550b0..000000000 --- a/packages/sdk/src/workflows/cli.ts +++ /dev/null @@ -1,481 +0,0 @@ -#!/usr/bin/env node - -/** - * CLI entry point for running relay.yaml workflows. - * - * Usage: - * relay-workflow [--workflow ] - * relay-workflow --resume - * npx @agent-relay/sdk run [--workflow ] - */ - -import path from 'node:path'; -import chalk from 'chalk'; - -import type { WorkflowEvent } from './runner.js'; -import { WorkflowRunner } from './runner.js'; -import { JsonFileWorkflowDb } from './file-db.js'; - -function printUsage(): void { - console.log( - ` -Usage: relay-workflow [options] - relay-workflow --resume - -Run a relay.yaml workflow file. - -Arguments: - Path to the relay.yaml workflow file - -Options: - --workflow Run a specific workflow by name (default: first) - --resume Resume a failed or interrupted run by its run ID - --start-from Start from a specific step, skipping predecessors - --previous-run-id Use cached outputs from a specific prior run (with --start-from) - --validate Validate workflow YAML for common issues without running - --help Show this help message - -Examples: - relay-workflow workflows/daytona-migration.yaml - relay-workflow workflows/feature-dev.yaml --workflow build-and-test - relay-workflow --resume f409ce1d1788710bcc6abb55 -`.trim() - ); -} - -type RunnerConfig = Awaited>; - -type RunnerResult = Awaited>; - -type ExecuteOptions = { - startFrom: string; - previousRunId?: string; -}; - -/** Flags that consume the next argument as their value. Single source of truth for CLI parsing. */ -const FLAGS_WITH_VALUES = new Set(['--resume', '--workflow', '--start-from', '--previous-run-id']); - -function getYamlPathArg(args: string[]): string | undefined { - for (let i = 0; i < args.length; i += 1) { - const arg = args[i]; - if (arg.startsWith('--')) { - if (FLAGS_WITH_VALUES.has(arg)) i += 1; - continue; - } - return arg; - } - return undefined; -} - -interface RenderableTask { - output?: string; - title: string; -} - -interface StepHandle { - resolve: () => void; - reject: (error: Error) => void; - setOutput: (text: string) => void; - markSkipped: () => void; -} - -// Filter [broker] and [workflow HH:MM] noise while listr owns the terminal, -// but let the observer URL and channel name through. -function installOutputFilter(): () => void { - const orig = console.log.bind(console); - console.log = (...args: unknown[]) => { - const str = String(args[0] ?? ''); - if (str.includes('Observer:') || str.includes('agentrelay.com') || str.includes('Channel: wf-')) { - orig(...args); - return; - } - if (/\[broker\]/.test(str) || /\[workflow\s+\d{2}:\d{2}\]/.test(str)) return; - orig(...args); - }; - return () => { - console.log = orig; - }; -} - -async function runWithListr( - runner: WorkflowRunner, - config: RunnerConfig, - workflowName: string | undefined, - executeOptions: ExecuteOptions | undefined -): Promise { - const stepHandles = new Map(); - const restoreConsole = installOutputFilter(); - - let resolveWorkflow!: () => void; - let rejectWorkflow!: (error: Error) => void; - const workflowDone = new Promise((resolve, reject) => { - resolveWorkflow = resolve; - rejectWorkflow = reject; - }); - workflowDone.catch(() => {}); - - let setHeader: (text: string) => void = () => {}; - - const { Listr } = await import('listr2'); - // eslint-disable-next-line @typescript-eslint/no-explicit-any - const listr = new (Listr as any)( - [ - { - title: chalk.dim('Workflow starting...'), - task: async (_ctx: unknown, task: any): Promise => { - setHeader = (text: string): void => { - task.title = text; - }; - await workflowDone; - }, - }, - ], - { - concurrent: true, - renderer: process.stdout.isTTY ? 'default' : 'verbose', - rendererOptions: { - collapseErrors: false, - showErrorMessage: true, - }, - } - ); - - runner.on((event: WorkflowEvent) => { - switch (event.type) { - case 'run:started': { - setHeader(chalk.dim(`[workflow] run ${event.runId.slice(0, 8)}...`)); - break; - } - - case 'step:started': { - let resolveStep!: () => void; - let rejectStep!: (error: Error) => void; - let taskRef: RenderableTask | null = null; - let skipped = false; - - const done = new Promise((resolve, reject) => { - resolveStep = resolve; - rejectStep = reject; - }); - done.catch(() => {}); - - stepHandles.set(event.stepName, { - resolve: resolveStep, - reject: rejectStep, - setOutput: (text: string) => { - if (taskRef) { - taskRef.output = text; - } - }, - markSkipped: () => { - skipped = true; - if (taskRef) { - taskRef.title = chalk.dim(`${event.stepName} (skipped)`); - } - }, - }); - - listr.add({ - title: chalk.white(event.stepName), - task: async (_ctx: unknown, task: any): Promise => { - taskRef = task as RenderableTask; - if (skipped) { - taskRef.title = chalk.dim(`${event.stepName} (skipped)`); - } - await done; - }, - rendererOptions: { - persistentOutput: true, - }, - }); - break; - } - - case 'step:owner-assigned': { - const handle = stepHandles.get(event.stepName); - if (handle) { - handle.setOutput( - chalk.dim(`> Owner: ${event.ownerName}`) + - (event.specialistName ? chalk.dim(` - specialist: ${event.specialistName}`) : '') - ); - } - break; - } - - case 'step:retrying': { - const handle = stepHandles.get(event.stepName); - if (handle) { - handle.setOutput(chalk.yellow(`Retrying (attempt ${event.attempt})`)); - } - break; - } - - case 'step:nudged': { - const handle = stepHandles.get(event.stepName); - if (handle) { - handle.setOutput(chalk.dim(`> Nudge #${event.nudgeCount}`)); - } - break; - } - - case 'step:force-released': { - const handle = stepHandles.get(event.stepName); - if (handle) { - handle.setOutput(chalk.yellow('> Force-released')); - } - break; - } - - case 'step:review-completed': { - const handle = stepHandles.get(event.stepName); - if (handle) { - handle.setOutput(chalk.dim(`> Review: ${event.decision} by ${event.reviewerName}`)); - } - break; - } - - case 'step:owner-timeout': { - const handle = stepHandles.get(event.stepName); - if (handle) { - handle.setOutput(chalk.red(`> Owner ${event.ownerName} timed out`)); - } - break; - } - - case 'step:agent-report': { - const handle = stepHandles.get(event.stepName); - if (handle) { - const model = event.report.model ? `:${event.report.model}` : ''; - handle.setOutput(chalk.dim(`> Report collected (${event.report.cli}${model})`)); - } - break; - } - - case 'step:completed': { - stepHandles.get(event.stepName)?.resolve(); - break; - } - - case 'step:skipped': { - const handle = stepHandles.get(event.stepName); - if (handle) { - handle.markSkipped(); - handle.resolve(); - } else { - // Step was skipped without ever being started (downstream of a failure). - // Add an already-resolved task so it shows in the listr output. - listr.add({ - title: chalk.dim(`${event.stepName} (skipped)`), - task: async (): Promise => {}, - rendererOptions: { persistentOutput: true }, - }); - } - break; - } - - case 'step:failed': { - stepHandles.get(event.stepName)?.reject(new Error(event.error ?? 'Step failed')); - break; - } - - case 'run:completed': { - setHeader(chalk.green('Workflow completed')); - resolveWorkflow(); - break; - } - - case 'run:failed': { - setHeader(chalk.red(`Workflow failed: ${event.error}`)); - rejectWorkflow(new Error(event.error ?? 'Workflow failed')); - break; - } - - case 'run:cancelled': { - setHeader(chalk.yellow('Workflow cancelled')); - resolveWorkflow(); - break; - } - - case 'broker:event': - break; - - default: { - const _exhaustive: never = event; - void _exhaustive; - } - } - }); - - const [result] = await Promise.all([ - runner.execute(config, workflowName, undefined, executeOptions), - listr.run().catch(() => { - // Step failures are already represented in runner result. - }), - ]); - - restoreConsole(); - return result; -} - -async function main(): Promise { - const args = process.argv.slice(2); - const yamlPath = getYamlPathArg(args); - - if (args.length === 0 || args.includes('--help')) { - printUsage(); - process.exit(args.includes('--help') ? 0 : 1); - } - - // Use a file-backed DB so runs survive process restarts and --resume works. - const dbPath = path.join(process.cwd(), '.agent-relay', 'workflow-runs.jsonl'); - const fileDb = new JsonFileWorkflowDb(dbPath); - if (!fileDb.isWritable()) { - console.warn( - `[workflow] warning: cannot write to ${dbPath} — run state will not be persisted (--resume unavailable)` - ); - } - - const runner = new WorkflowRunner({ db: fileDb }); - let shuttingDown = false; - const shutdown = async (signal: string): Promise => { - if (shuttingDown) return; - shuttingDown = true; - console.log(`\n[workflow] ${signal} received — shutting down broker...`); - await runner.relay?.shutdown().catch(() => undefined); - process.exit(130); - }; - process.on('SIGINT', () => void shutdown('SIGINT')); - process.on('SIGTERM', () => void shutdown('SIGTERM')); - - // ── Resume mode ──────────────────────────────────────────────────────────── - const resumeIdx = args.indexOf('--resume'); - if (resumeIdx !== -1) { - const runId = args[resumeIdx + 1]; - if (!runId) { - console.error(chalk.red('Error: --resume requires a run ID')); - process.exit(1); - } - - console.log(chalk.dim(`Resuming run ${runId}...`)); - runner.on((event: WorkflowEvent) => { - const ts = new Date().toISOString().slice(11, 19); - switch (event.type) { - case 'step:started': - console.log(chalk.dim(`[${ts}]`), chalk.white(event.stepName), chalk.dim('started')); - break; - case 'step:completed': - console.log(chalk.dim(`[${ts}]`), chalk.green('✔'), event.stepName); - break; - case 'step:failed': - console.log(chalk.dim(`[${ts}]`), chalk.red('✗'), event.stepName, chalk.red(event.error ?? '')); - break; - case 'step:skipped': - console.log(chalk.dim(`[${ts}]`), chalk.dim('⊘'), chalk.dim(event.stepName)); - break; - default: - break; - } - }); - let result: RunnerResult; - try { - const resumeConfig = yamlPath ? await runner.parseYamlFile(yamlPath) : undefined; - if (resumeConfig) { - console.warn( - chalk.yellow( - '[workflow] warning: resuming with current config from disk — ' + - 'if the workflow YAML changed since the original run, behaviour may differ' - ) - ); - } - result = await runner.resume(runId, undefined, resumeConfig); - } catch (err) { - const message = err instanceof Error ? err.message : String(err); - const isRunNotFound = message.startsWith(`Run "${runId}" not found`); - if (isRunNotFound) { - if (fileDb.hasStepOutputs(runId)) { - console.error( - chalk.red( - `Error: ${message}. Step outputs exist for this run, but persisted run state is missing from ${dbPath}. ` + - `Use --start-from with --previous-run-id ${runId} to recover from the cached step outputs instead.` - ) - ); - } else { - console.error(chalk.red(`Error: ${message}`)); - } - } else { - console.error(chalk.red(`Error: ${message}`)); - } - process.exit(1); - } - - if (result.status === 'completed') { - console.log(chalk.green('\nWorkflow completed successfully.')); - process.exit(0); - } else { - console.error(chalk.red(`\nWorkflow ${result.status}${result.error ? `: ${result.error}` : ''}`)); - process.exit(1); - } - return; - } - - // ── Normal / validate / dry-run mode ────────────────────────────────────── - let workflowName: string | undefined; - - const workflowIdx = args.indexOf('--workflow'); - if (workflowIdx !== -1 && args[workflowIdx + 1]) { - workflowName = args[workflowIdx + 1]; - } - - let startFromStep: string | undefined; - const startFromIdx = args.indexOf('--start-from'); - if (startFromIdx !== -1 && args[startFromIdx + 1]) { - startFromStep = args[startFromIdx + 1]; - } - - let previousRunId: string | undefined; - const prevRunIdx = args.indexOf('--previous-run-id'); - if (prevRunIdx !== -1 && args[prevRunIdx + 1]) { - previousRunId = args[prevRunIdx + 1]; - } - - if (!yamlPath) { - console.error(chalk.red('Error: workflow YAML path is required')); - printUsage(); - process.exit(1); - } - - const isValidate = args.includes('--validate'); - const isDryRun = !!process.env.DRY_RUN; - - const config = await runner.parseYamlFile(yamlPath); - - if (isValidate) { - const { validateWorkflow, formatValidationReport } = await import('./validator.js'); - const issues = validateWorkflow(config); - console.log(formatValidationReport(issues, yamlPath)); - process.exit(issues.some((issue) => issue.severity === 'error') ? 1 : 0); - } - - if (isDryRun) { - const { formatDryRunReport } = await import('./dry-run-format.js'); - const report = runner.dryRun(config, workflowName); - console.log(formatDryRunReport(report)); - process.exit(report.valid ? 0 : 1); - } - - const executeOptions = startFromStep ? { startFrom: startFromStep, previousRunId } : undefined; - const result = await runWithListr(runner, config, workflowName, executeOptions); - - if (result.status === 'completed') { - console.log(chalk.green('\nWorkflow completed successfully.')); - process.exit(0); - } else { - console.error(chalk.red(`\nWorkflow ${result.status}${result.error ? `: ${result.error}` : ''}`)); - process.exit(1); - } -} - -main().catch((err: Error) => { - console.error(chalk.red(`Error: ${err.message}`)); - process.exit(1); -}); diff --git a/packages/sdk/src/workflows/cloud-runner.ts b/packages/sdk/src/workflows/cloud-runner.ts deleted file mode 100644 index de653f3f1..000000000 --- a/packages/sdk/src/workflows/cloud-runner.ts +++ /dev/null @@ -1,74 +0,0 @@ -/** - * Cloud workflow runner — submits workflows to AgentWorkforce cloud API - * and polls for completion. - */ -import type { RelayYamlConfig, WorkflowRunRow, WorkflowRunStatus } from './types.js'; - -export interface CloudRunOptions { - cloudApiUrl: string; - cloudApiToken: string; - envSecrets?: Record; - pollIntervalMs?: number; - timeoutMs?: number; - onStatusChange?: (status: WorkflowRunStatus, runId: string) => void; -} - -export async function runInCloud(config: RelayYamlConfig, options: CloudRunOptions): Promise { - const { cloudApiUrl, cloudApiToken, envSecrets, pollIntervalMs = 3000, timeoutMs = 1800000 } = options; - const baseUrl = cloudApiUrl.replace(/\/$/, ''); - - const { stringify: stringifyYaml } = await import('yaml'); - const yamlStr = stringifyYaml(config); - - const submitRes = await fetch(`${baseUrl}/api/v1/workflows/run`, { - method: 'POST', - headers: { 'Content-Type': 'application/json', Authorization: `Bearer ${cloudApiToken}` }, - body: JSON.stringify({ - workflow: yamlStr, - fileType: 'yaml' as const, - ...(envSecrets ? { envSecrets } : {}), - }), - }); - if (!submitRes.ok) throw new Error(`Cloud submit failed (${submitRes.status}): ${await submitRes.text()}`); - - const { runId } = (await submitRes.json()) as { runId: string; sandboxId: string; status: string }; - const deadline = Date.now() + timeoutMs; - let lastStatus: WorkflowRunStatus = 'pending'; - - while (Date.now() < deadline) { - await new Promise((r) => setTimeout(r, pollIntervalMs)); - const statusRes = await fetch(`${baseUrl}/api/v1/workflows/runs/${runId}`, { - headers: { Authorization: `Bearer ${cloudApiToken}` }, - }); - if (!statusRes.ok) continue; - - const data = (await statusRes.json()) as { - runId: string; - status: WorkflowRunStatus; - error?: string; - createdAt?: string; - updatedAt?: string; - }; - if (data.status !== lastStatus) { - lastStatus = data.status; - options.onStatusChange?.(lastStatus, runId); - } - - if (data.status === 'completed' || data.status === 'failed') { - return { - id: runId, - workspaceId: '', - workflowName: config.name ?? 'cloud-workflow', - pattern: (config.swarm?.pattern as any) ?? 'dag', - status: data.status, - config, - startedAt: data.createdAt ?? new Date().toISOString(), - completedAt: data.updatedAt ?? new Date().toISOString(), - error: data.error, - createdAt: data.createdAt ?? new Date().toISOString(), - updatedAt: data.updatedAt ?? new Date().toISOString(), - }; - } - } - throw new Error(`Cloud workflow timed out after ${timeoutMs}ms (runId: ${runId})`); -} diff --git a/packages/sdk/src/workflows/cloud-schedules.ts b/packages/sdk/src/workflows/cloud-schedules.ts deleted file mode 100644 index 49b8ae0b4..000000000 --- a/packages/sdk/src/workflows/cloud-schedules.ts +++ /dev/null @@ -1,3 +0,0 @@ -export { listWorkflowSchedules, scheduleWorkflow } from '@agent-relay/cloud'; - -export type { ScheduleWorkflowOptions, WorkflowSchedule } from '@agent-relay/cloud'; diff --git a/packages/sdk/src/workflows/collectors/claude.ts b/packages/sdk/src/workflows/collectors/claude.ts deleted file mode 100644 index 950c0c986..000000000 --- a/packages/sdk/src/workflows/collectors/claude.ts +++ /dev/null @@ -1,410 +0,0 @@ -import { createReadStream, existsSync, statSync } from 'node:fs'; -import { access } from 'node:fs/promises'; -import { homedir } from 'node:os'; -import path from 'node:path'; -import { createInterface } from 'node:readline'; - -import type { CliSessionCollector, CliSessionQuery, CliSessionReport } from '../cli-session-collector.js'; - -const CLAUDE_HOME = path.join(homedir(), '.claude'); -const HISTORY_PATH = path.join(CLAUDE_HOME, 'history.jsonl'); -const PROJECTS_PATH = path.join(CLAUDE_HOME, 'projects'); -const HISTORY_LOOKBACK_MS = 5_000; - -type JsonRecord = Record; - -interface ClaudeHistoryEntry { - display?: string; - timestamp: number; - project: string; - sessionId: string; -} - -export class ClaudeCodeCollector implements CliSessionCollector { - canCollect(): boolean { - return isReadableFile(HISTORY_PATH) && isReadableDirectory(PROJECTS_PATH); - } - - async collect(query: CliSessionQuery): Promise { - const historyEntry = await findMatchingHistoryEntry(query); - if (!historyEntry) { - return null; - } - - const sessionPath = path.join( - PROJECTS_PATH, - encodeProjectPath(historyEntry.project), - `${historyEntry.sessionId}.jsonl` - ); - if (!(await isReadableFileAsync(sessionPath))) { - return null; - } - - return parseSessionLog(sessionPath, query, historyEntry.sessionId); - } -} - -async function findMatchingHistoryEntry(query: CliSessionQuery): Promise { - const history = createInterface({ - input: createReadStream(HISTORY_PATH, { encoding: 'utf8' }), - crlfDelay: Infinity, - }); - let match: ClaudeHistoryEntry | null = null; - - try { - for await (const line of history) { - const parsed = safeParseRecord(line); - if (!parsed) { - continue; - } - - const entry = toHistoryEntry(parsed); - if (!entry) { - continue; - } - - if (entry.project !== query.cwd) { - continue; - } - - if (entry.timestamp < query.startedAt - HISTORY_LOOKBACK_MS || entry.timestamp > query.completedAt) { - continue; - } - - match = entry; - } - } finally { - history.close(); - } - - return match; -} - -async function parseSessionLog( - sessionPath: string, - query: CliSessionQuery, - sessionId: string -): Promise { - const session = createInterface({ - input: createReadStream(sessionPath, { encoding: 'utf8' }), - crlfDelay: Infinity, - }); - - const rawLines: JsonRecord[] = []; - const toolCalls = new Map(); - const errors: { turn: number; text: string }[] = []; - const tokens = { input: 0, output: 0, cacheRead: 0 }; - let turns = 0; - let summary: string | null = null; - let model: string | null = null; - let provider: string | null = null; - let finalStatus: CliSessionReport['finalStatus'] = 'unknown'; - - try { - for await (const line of session) { - const parsed = safeParseRecord(line); - if (!parsed) { - continue; - } - - rawLines.push(parsed); - - const entryType = getString(parsed.type); - if (entryType === 'user') { - turns += 1; - continue; - } - - if (entryType === 'assistant') { - const usage = extractUsage(parsed); - if (usage) { - tokens.input += usage.input; - tokens.output += usage.output; - tokens.cacheRead += usage.cacheRead; - } - - model ??= extractModel(parsed); - provider ??= extractProvider(parsed); - - const assistantText = extractText(parsed); - if (assistantText) { - summary = assistantText; - if (finalStatus !== 'failed') { - finalStatus = 'completed'; - } - } - continue; - } - - if (entryType === 'tool_use') { - const toolName = extractToolName(parsed); - if (toolName) { - toolCalls.set(toolName, (toolCalls.get(toolName) ?? 0) + 1); - } - continue; - } - - if (entryType === 'tool_result') { - const errorText = extractErrorText(parsed); - if (errorText) { - errors.push({ turn: Math.max(turns, 1), text: errorText }); - finalStatus = 'failed'; - } - } - } - } finally { - session.close(); - } - - if (rawLines.length === 0) { - return null; - } - - return { - cli: 'claude', - sessionId, - model, - provider, - durationMs: Math.max(query.completedAt - query.startedAt, 0), - cost: null, - tokens: tokens.input || tokens.output || tokens.cacheRead ? tokens : null, - turns, - toolCalls: Array.from(toolCalls, ([name, count]) => ({ name, count })), - errors, - finalStatus, - summary, - raw: { - historyPath: HISTORY_PATH, - sessionPath, - lines: rawLines, - }, - }; -} - -function toHistoryEntry(record: JsonRecord): ClaudeHistoryEntry | null { - const timestamp = getNumber(record.timestamp); - const project = getString(record.project); - const sessionId = getString(record.sessionId); - if (timestamp === null || !project || !sessionId) { - return null; - } - - return { - display: getString(record.display) ?? undefined, - timestamp, - project, - sessionId, - }; -} - -function encodeProjectPath(projectPath: string): string { - return projectPath.replace(/\//g, '--').replace(/^-+/, ''); -} - -function extractUsage(record: JsonRecord): { input: number; output: number; cacheRead: number } | null { - const usage = findNestedRecord(record, [ - 'usage', - 'message.usage', - 'metadata.usage', - 'message.metadata.usage', - ]); - if (!usage) { - return null; - } - - return { - input: firstNumber(usage, ['input_tokens', 'inputTokens']) ?? 0, - output: firstNumber(usage, ['output_tokens', 'outputTokens']) ?? 0, - cacheRead: - firstNumber(usage, ['cache_read_input_tokens', 'cacheReadInputTokens', 'cache_read_tokens']) ?? 0, - }; -} - -function extractModel(record: JsonRecord): string | null { - return ( - getString(record.model) ?? - getString(record.modelId) ?? - getString(findNestedValue(record, ['message.model', 'message.modelId', 'metadata.model'])) - ); -} - -function extractProvider(record: JsonRecord): string | null { - return ( - getString(record.provider) ?? - getString(record.providerId) ?? - getString(findNestedValue(record, ['message.provider', 'message.providerId', 'metadata.provider'])) ?? - 'anthropic' - ); -} - -function extractToolName(record: JsonRecord): string | null { - return ( - getString(record.name) ?? - getString(record.tool_name) ?? - getString(findNestedValue(record, ['tool.name', 'content.name'])) - ); -} - -function extractErrorText(record: JsonRecord): string | null { - const candidates = [ - getString(record.error), - getString(findNestedValue(record, ['content.error', 'result.error', 'data.error', 'payload.error'])), - extractText(record), - ]; - - for (const candidate of candidates) { - const normalized = normalizeError(candidate); - if (normalized) { - return normalized; - } - } - - return null; -} - -function extractText(value: unknown): string | null { - if (typeof value === 'string') { - return value.trim() || null; - } - - if (!value || typeof value !== 'object') { - return null; - } - - if (Array.isArray(value)) { - const texts = value.map((entry) => extractText(entry)).filter((entry): entry is string => Boolean(entry)); - return texts.length > 0 ? texts.join('\n').trim() : null; - } - - const record = value as JsonRecord; - - if (typeof record.text === 'string' && record.text.trim()) { - return record.text.trim(); - } - - if (typeof record.content === 'string' && record.content.trim()) { - return record.content.trim(); - } - - if (Array.isArray(record.content)) { - const texts = record.content - .map((entry) => extractText(entry)) - .filter((entry): entry is string => Boolean(entry)); - if (texts.length > 0) { - return texts.join('\n').trim(); - } - } - - if (record.message && typeof record.message === 'object') { - return extractText(record.message); - } - - return null; -} - -function normalizeError(text: string | null): string | null { - if (!text) { - return null; - } - - const line = text - .split('\n') - .map((entry) => entry.trim()) - .find((entry) => /(?:^error\b|^error:|^command failed\b|^fail\b|exception|traceback)/i.test(entry)); - - return line ?? null; -} - -function safeParseRecord(line: string): JsonRecord | null { - const trimmed = line.trim(); - if (!trimmed) { - return null; - } - - try { - const parsed = JSON.parse(trimmed); - if (!parsed || typeof parsed !== 'object' || Array.isArray(parsed)) { - return null; - } - - return parsed as JsonRecord; - } catch { - return null; - } -} - -function findNestedRecord(root: JsonRecord, paths: string[]): JsonRecord | null { - for (const candidate of paths) { - const value = findNestedValue(root, [candidate]); - if (value && typeof value === 'object' && !Array.isArray(value)) { - return value as JsonRecord; - } - } - - return null; -} - -function findNestedValue(root: JsonRecord, paths: string[]): unknown { - for (const candidate of paths) { - let current: unknown = root; - let found = true; - - for (const segment of candidate.split('.')) { - if (!current || typeof current !== 'object' || Array.isArray(current) || !(segment in current)) { - found = false; - break; - } - current = (current as JsonRecord)[segment]; - } - - if (found) { - return current; - } - } - - return undefined; -} - -function firstNumber(record: JsonRecord, keys: string[]): number | null { - for (const key of keys) { - const value = record[key]; - if (typeof value === 'number' && Number.isFinite(value)) { - return value; - } - } - - return null; -} - -function getString(value: unknown): string | null { - return typeof value === 'string' && value.trim() ? value.trim() : null; -} - -function getNumber(value: unknown): number | null { - return typeof value === 'number' && Number.isFinite(value) ? value : null; -} - -function isReadableFile(filePath: string): boolean { - try { - return existsSync(filePath) && statSync(filePath).isFile(); - } catch { - return false; - } -} - -function isReadableDirectory(dirPath: string): boolean { - try { - return existsSync(dirPath) && statSync(dirPath).isDirectory(); - } catch { - return false; - } -} - -async function isReadableFileAsync(filePath: string): Promise { - try { - await access(filePath); - return true; - } catch { - return false; - } -} diff --git a/packages/sdk/src/workflows/collectors/codex.ts b/packages/sdk/src/workflows/collectors/codex.ts deleted file mode 100644 index 5f0ffc966..000000000 --- a/packages/sdk/src/workflows/collectors/codex.ts +++ /dev/null @@ -1,355 +0,0 @@ -import fs from 'node:fs'; -import os from 'node:os'; -import path from 'node:path'; -import { createRequire } from 'node:module'; - -import type { CliSessionCollector, CliSessionQuery, CliSessionReport } from '../cli-session-collector.js'; - -const require = createRequire(import.meta.url); -const CODEX_HOME = path.join(os.homedir(), '.codex'); -const DEFAULT_HISTORY_PATH = path.join(CODEX_HOME, 'history.jsonl'); -const DEFAULT_STATE_PATH = path.join(CODEX_HOME, 'state_5.sqlite'); - -type DatabaseInstance = { - prepare(sql: string): { - all(params?: unknown): T[]; - }; - close?: () => void; -}; - -type DatabaseConstructor = new ( - filename: string, - options?: { readonly?: boolean; fileMustExist?: boolean } -) => DatabaseInstance; - -interface DatabaseSyncModule { - DatabaseSync: new (filename: string, options?: { readOnly?: boolean; open?: boolean }) => DatabaseInstance; -} - -interface CodexCollectorOptions { - historyPath?: string; - statePath?: string; -} - -interface HistoryEntry { - session_id?: string; - ts?: number; - text?: string; -} - -interface ThreadRow { - id: string; - cwd: string; - model_provider: string; - tokens_used: number; - created_at: number; - updated_at: number; - [key: string]: unknown; -} - -interface LogRow { - ts?: number; - level?: string; - message?: string | null; - line?: number | null; -} - -function loadBetterSqlite3(): DatabaseConstructor | null { - try { - return require('better-sqlite3') as DatabaseConstructor; - } catch { - return null; - } -} - -async function openDatabase(dbPath: string): Promise { - const BetterSqlite = loadBetterSqlite3(); - if (BetterSqlite) { - try { - return new BetterSqlite(dbPath, { readonly: true, fileMustExist: true }); - } catch { - // Fall through to node:sqlite. - } - } - - try { - const sqlite = (await import('node:sqlite')) as DatabaseSyncModule; - return new sqlite.DatabaseSync(dbPath, { readOnly: true, open: true }); - } catch { - return null; - } -} - -function normalizeTimestamp(value: unknown): number | null { - const numeric = - typeof value === 'number' && Number.isFinite(value) - ? value - : typeof value === 'string' && value.trim() - ? Number(value) - : null; - if (numeric === null || !Number.isFinite(numeric)) { - return null; - } - - return numeric < 10_000_000_000 ? numeric * 1000 : numeric; -} - -function parseJsonLine(line: string): T | null { - try { - return JSON.parse(line) as T; - } catch { - return null; - } -} - -function parseModelProvider(value: string | null | undefined): { - provider: string | null; - model: string | null; -} { - if (!value) { - return { provider: null, model: null }; - } - - if (value.includes('/')) { - const [provider, ...rest] = value.split('/'); - return { - provider: provider || null, - model: rest.join('/') || null, - }; - } - - if (value.includes(':')) { - const [provider, ...rest] = value.split(':'); - return { - provider: provider || null, - model: rest.join(':') || null, - }; - } - - return { - provider: value, - model: null, - }; -} - -function getNumericField(row: ThreadRow, fieldNames: string[]): number | null { - for (const fieldName of fieldNames) { - const value = row[fieldName]; - if (typeof value === 'number' && Number.isFinite(value)) { - return value; - } - } - - return null; -} - -function extractTokens(row: ThreadRow): CliSessionReport['tokens'] { - const input = getNumericField(row, ['input_tokens', 'prompt_tokens', 'tokens_input']); - const output = getNumericField(row, ['output_tokens', 'completion_tokens', 'tokens_output']); - const cacheRead = getNumericField(row, ['cache_read_tokens', 'tokens_cache_read', 'cached_input_tokens']); - - if (input !== null || output !== null || cacheRead !== null) { - return { - input: input ?? 0, - output: output ?? 0, - cacheRead: cacheRead ?? 0, - }; - } - - return typeof row.tokens_used === 'number' - ? { - input: row.tokens_used, - output: 0, - cacheRead: 0, - } - : null; -} - -export class CodexCollector implements CliSessionCollector { - private readonly historyPath: string; - private readonly statePath: string; - - constructor(options: CodexCollectorOptions = {}) { - this.historyPath = options.historyPath ?? DEFAULT_HISTORY_PATH; - this.statePath = options.statePath ?? DEFAULT_STATE_PATH; - } - - canCollect(): boolean { - return fs.existsSync(this.statePath) || fs.existsSync(this.historyPath); - } - - async collect(query: CliSessionQuery): Promise { - const historyEntries = this.readHistoryEntries(); - const matchedThread = await this.findMatchingThread(query); - - if (matchedThread) { - const errors = await this.readThreadErrors(matchedThread.id); - const { provider, model } = parseModelProvider(matchedThread.model_provider); - const createdAtMs = normalizeTimestamp(matchedThread.created_at); - const updatedAtMs = normalizeTimestamp(matchedThread.updated_at); - - return { - cli: 'codex', - sessionId: matchedThread.id, - model, - provider, - durationMs: - createdAtMs !== null && updatedAtMs !== null && updatedAtMs >= createdAtMs - ? updatedAtMs - createdAtMs - : Math.max(query.completedAt - query.startedAt, 0), - cost: null, - tokens: extractTokens(matchedThread), - turns: historyEntries.filter((entry) => entry.session_id === matchedThread.id).length, - toolCalls: [], - errors, - finalStatus: errors.length > 0 ? 'failed' : 'unknown', - summary: null, - raw: { - matchedVia: 'threads', - thread: matchedThread, - }, - }; - } - - const historyMatch = this.findMatchingHistoryEntry(query, historyEntries); - if (!historyMatch) { - return null; - } - - return { - cli: 'codex', - sessionId: historyMatch.session_id ?? null, - model: null, - provider: null, - durationMs: Math.max(query.completedAt - query.startedAt, 0), - cost: null, - tokens: null, - turns: historyMatch.session_id - ? historyEntries.filter((entry) => entry.session_id === historyMatch.session_id).length - : 0, - toolCalls: [], - errors: [], - finalStatus: 'unknown', - summary: null, - raw: { - matchedVia: 'history', - entry: historyMatch, - }, - }; - } - - private readHistoryEntries(): HistoryEntry[] { - if (!fs.existsSync(this.historyPath)) { - return []; - } - - try { - return fs - .readFileSync(this.historyPath, 'utf8') - .split(/\r?\n/) - .map((line) => line.trim()) - .filter(Boolean) - .flatMap((line) => { - const parsed = parseJsonLine(line); - return parsed ? [parsed] : []; - }); - } catch { - return []; - } - } - - private findMatchingHistoryEntry(query: CliSessionQuery, entries: HistoryEntry[]): HistoryEntry | null { - for (let index = entries.length - 1; index >= 0; index -= 1) { - const entry = entries[index]; - const timestamp = normalizeTimestamp(entry.ts); - if (timestamp === null) { - continue; - } - - if (timestamp >= query.startedAt && timestamp <= query.completedAt) { - return entry; - } - } - - return null; - } - - private async findMatchingThread(query: CliSessionQuery): Promise { - if (!fs.existsSync(this.statePath)) { - return null; - } - - const db = await openDatabase(this.statePath); - if (!db) { - return null; - } - - try { - const threads = db - .prepare( - ` - SELECT * - FROM threads - WHERE cwd = ? - ORDER BY created_at DESC - LIMIT 100 - ` - ) - .all(query.cwd); - - return ( - threads.find((thread) => { - const createdAt = normalizeTimestamp(thread.created_at); - return createdAt !== null && createdAt >= query.startedAt && createdAt <= query.completedAt; - }) ?? null - ); - } catch { - return null; - } finally { - db.close?.(); - } - } - - private async readThreadErrors(threadId: string): Promise { - if (!fs.existsSync(this.statePath)) { - return []; - } - - const db = await openDatabase(this.statePath); - if (!db) { - return []; - } - - try { - const rows = db - .prepare( - ` - SELECT ts, level, message, line - FROM logs - WHERE thread_id = ? - AND lower(level) = 'error' - ORDER BY ts ASC - ` - ) - .all(threadId); - - return rows - .map((row, index) => { - const message = typeof row.message === 'string' ? row.message.trim() : ''; - if (!message) { - return null; - } - - return { - turn: index + 1, - text: message, - }; - }) - .filter((row): row is { turn: number; text: string } => row !== null); - } catch { - return []; - } finally { - db.close?.(); - } - } -} diff --git a/packages/sdk/src/workflows/collectors/opencode.ts b/packages/sdk/src/workflows/collectors/opencode.ts deleted file mode 100644 index 4ceb7884b..000000000 --- a/packages/sdk/src/workflows/collectors/opencode.ts +++ /dev/null @@ -1,321 +0,0 @@ -import fs from 'node:fs'; -import os from 'node:os'; -import path from 'node:path'; -import { createRequire } from 'node:module'; - -import type { CliSessionCollector, CliSessionQuery, CliSessionReport } from '../cli-session-collector.js'; - -const require = createRequire(import.meta.url); -const OPENCODE_DB_PATH = path.join(os.homedir(), '.local', 'share', 'opencode', 'opencode.db'); -const MATCH_WINDOW_GRACE_MS = 5_000; -const ERROR_LINE_PATTERN = /^(Error|error:|Command failed|FAIL)\b/; - -type DatabaseInstance = { - prepare(sql: string): { - get(params?: unknown): T | undefined; - all(params?: unknown): T[]; - }; - pragma(source: string): unknown; - close(): void; -}; - -type DatabaseConstructor = new ( - filename: string, - options?: { readonly?: boolean; fileMustExist?: boolean } -) => DatabaseInstance; - -interface SessionRow { - id: string; - directory: string; - time_created: number; -} - -interface MessageRow { - id: string; - session_id: string; - time_created: number; - data: string; -} - -interface PartRow { - id: string; - message_id: string; - session_id: string; - time_created: number; - data: string; -} - -interface OpenCodeMessageData { - role?: string; - modelID?: string; - providerID?: string; - cost?: number; - finish?: string; - tokens?: { - input?: number; - output?: number; - cache?: { - read?: number; - }; - }; -} - -interface OpenCodePartData { - type?: string; - text?: string; - name?: string; -} - -function loadDatabaseConstructor(): DatabaseConstructor | null { - try { - return require('better-sqlite3') as DatabaseConstructor; - } catch { - // fall through - } - - // Fall back to Node 22+ native node:sqlite (experimental) - try { - // eslint-disable-next-line @typescript-eslint/no-require-imports - const { DatabaseSync } = require('node:sqlite'); - return function NativeSqliteWrapper( - filename: string, - options?: { readonly?: boolean; fileMustExist?: boolean } - ) { - const db = new DatabaseSync(filename, { open: true, readOnly: options?.readonly ?? false }); - return { - prepare(sql: string) { - const stmt = db.prepare(sql); - return { - get(params?: unknown): T | undefined { - return params != null ? (stmt.get(params) as T | undefined) : (stmt.get() as T | undefined); - }, - all(params?: unknown): T[] { - return (params != null ? stmt.all(params) : stmt.all()) as T[]; - }, - }; - }, - pragma(source: string) { - db.exec(`PRAGMA ${source}`); - return undefined; - }, - close() { - db.close(); - }, - }; - } as unknown as DatabaseConstructor; - } catch { - return null; - } -} - -function parseJson(value: string): T | null { - try { - return JSON.parse(value) as T; - } catch { - return null; - } -} - -function toNumber(value: unknown): number { - return typeof value === 'number' && Number.isFinite(value) ? value : 0; -} - -function normalizeStatus(finish: string | undefined, hasErrors: boolean): CliSessionReport['finalStatus'] { - if (finish === 'stop' || finish === 'completed') { - return 'completed'; - } - - if (finish === 'error' || finish === 'failed' || hasErrors) { - return 'failed'; - } - - return 'unknown'; -} - -function isToolPart(part: OpenCodePartData | null): part is OpenCodePartData { - return !!part?.type && part.type.toLowerCase().includes('tool'); -} - -export class OpenCodeCollector implements CliSessionCollector { - canCollect(): boolean { - if (!fs.existsSync(OPENCODE_DB_PATH)) { - return false; - } - - const Database = loadDatabaseConstructor(); - if (!Database) { - return false; - } - - let db: DatabaseInstance | null = null; - - try { - db = new Database(OPENCODE_DB_PATH, { readonly: true, fileMustExist: true }); - db.pragma('query_only = ON'); - db.prepare('SELECT 1').get(); - return true; - } catch { - return false; - } finally { - db?.close(); - } - } - - async collect(query: CliSessionQuery): Promise { - const Database = loadDatabaseConstructor(); - if (!Database) { - return null; - } - - let db: DatabaseInstance | null = null; - - try { - db = new Database(OPENCODE_DB_PATH, { readonly: true, fileMustExist: true }); - db.pragma('query_only = ON'); - - const session = db - .prepare( - ` - SELECT id, directory, time_created - FROM session - WHERE directory = @cwd - AND time_created BETWEEN @startedAt AND @completedAt - ORDER BY time_created DESC - LIMIT 1 - ` - ) - .get({ - cwd: query.cwd, - startedAt: query.startedAt - MATCH_WINDOW_GRACE_MS, - completedAt: query.completedAt, - }); - - if (!session) { - return null; - } - - const messages = db - .prepare( - ` - SELECT id, session_id, time_created, data - FROM message - WHERE session_id = ? - ORDER BY time_created ASC - ` - ) - .all(session.id); - - const parts = db - .prepare( - ` - SELECT id, message_id, session_id, time_created, data - FROM part - WHERE session_id = ? - ORDER BY time_created ASC - ` - ) - .all(session.id); - - const parsedMessages = messages.map((message) => ({ - ...message, - parsed: parseJson(message.data), - })); - const parsedParts = parts.map((part) => ({ - ...part, - parsed: parseJson(part.data), - })); - - const lastMessageWithMetadata = [...parsedMessages] - .reverse() - .find((message) => message.parsed?.modelID || message.parsed?.providerID || message.parsed?.finish); - - const tokenTotals = parsedMessages.reduce( - (totals, message) => { - const tokens = message.parsed?.tokens; - totals.input += toNumber(tokens?.input); - totals.output += toNumber(tokens?.output); - totals.cacheRead += toNumber(tokens?.cache?.read); - return totals; - }, - { input: 0, output: 0, cacheRead: 0 } - ); - - const hasCostData = parsedMessages.some( - (message) => typeof message.parsed?.cost === 'number' && Number.isFinite(message.parsed.cost) - ); - const totalCost = parsedMessages.reduce((sum, message) => sum + toNumber(message.parsed?.cost), 0); - - const toolCallCounts = new Map(); - for (const part of parsedParts) { - if (!isToolPart(part.parsed)) { - continue; - } - - const name = part.parsed.name?.trim(); - if (!name) { - continue; - } - - toolCallCounts.set(name, (toolCallCounts.get(name) ?? 0) + 1); - } - - const errors: CliSessionReport['errors'] = []; - for (const [index, part] of parsedParts.entries()) { - const text = part.parsed?.type === 'text' ? part.parsed.text : undefined; - if (!text) { - continue; - } - - for (const line of text.split(/\r?\n/)) { - const trimmed = line.trim(); - if (!trimmed || !ERROR_LINE_PATTERN.test(trimmed)) { - continue; - } - - errors.push({ turn: index + 1, text: trimmed }); - } - } - - const summary = - [...parsedParts] - .reverse() - .find((part) => part.parsed?.type === 'text' && part.parsed.text?.trim()) - ?.parsed?.text?.trim() ?? null; - - const turns = - parsedMessages.filter( - (message) => message.parsed?.role === 'assistant' || message.parsed?.role === 'user' - ).length || parsedMessages.length; - - return { - cli: 'opencode', - sessionId: session.id, - model: lastMessageWithMetadata?.parsed?.modelID ?? null, - provider: lastMessageWithMetadata?.parsed?.providerID ?? null, - durationMs: - parsedMessages.length > 0 - ? Math.max(0, parsedMessages[parsedMessages.length - 1].time_created - session.time_created) - : null, - cost: hasCostData ? totalCost : null, - tokens: tokenTotals, - turns, - toolCalls: [...toolCallCounts.entries()].map(([name, count]) => ({ name, count })), - errors, - finalStatus: normalizeStatus(lastMessageWithMetadata?.parsed?.finish, errors.length > 0), - summary, - raw: { - session, - messages: parsedMessages.map(({ parsed, ...message }) => ({ - ...message, - data: parsed ?? message.data, - })), - parts: parsedParts.map(({ parsed, ...part }) => ({ ...part, data: parsed ?? part.data })), - }, - }; - } catch { - return null; - } finally { - db?.close(); - } - } -} diff --git a/packages/sdk/src/workflows/coordinator.ts b/packages/sdk/src/workflows/coordinator.ts deleted file mode 100644 index b04e880dc..000000000 --- a/packages/sdk/src/workflows/coordinator.ts +++ /dev/null @@ -1,834 +0,0 @@ -/** - * Swarm Coordinator — pattern selection, agent topology, and workflow lifecycle. - * - * Orchestrates workflow runs: picks the right swarm pattern (or auto-selects), - * resolves agent topology from the config, and drives the run through its - * lifecycle states (pending → running → completed / failed / cancelled). - */ - -import { randomBytes } from 'node:crypto'; -import { EventEmitter } from 'node:events'; -import type { - AgentDefinition, - RelayYamlConfig, - SwarmPattern, - WorkflowRunRow, - WorkflowRunStatus, - WorkflowStepRow, - WorkflowStepStatus, -} from './types.js'; - -// ── Database interface ────────────────────────────────────────────────────── - -/** Minimal database client contract accepted by all services. */ -export interface DbClient { - query>(sql: string, params?: unknown[]): Promise<{ rows: T[] }>; -} - -// ── Topology types ────────────────────────────────────────────────────────── - -/** Describes the communication graph for a set of agents. */ -export interface AgentTopology { - pattern: SwarmPattern; - agents: AgentDefinition[]; - /** Agent name → names it can send messages to. */ - edges: Map; - /** Optional hub agent for hub-spoke / hierarchical. */ - hub?: string; - /** Ordered pipeline stages (pipeline pattern only). */ - pipelineOrder?: string[]; -} - -// ── Pattern auto-selection ────────────────────────────────────────────────── - -/** - * Mapping used when auto-selecting a pattern from config heuristics. - * The coordinator checks the config shape and picks the best match. - */ -const PATTERN_HEURISTICS: Array<{ - test: (config: RelayYamlConfig) => boolean; - pattern: SwarmPattern; -}> = [ - // ── Dependency-based patterns (highest priority) ────────────────────── - { - test: (c) => - Array.isArray(c.workflows) && c.workflows.some((w) => w.steps.some((s) => s.dependsOn?.length)), - pattern: 'dag', - }, - { - test: (c) => c.coordination?.consensusStrategy !== undefined, - pattern: 'consensus', - }, - - // ── Specific role-based patterns (check before generic hub patterns) ── - { - // Map-reduce: requires BOTH mapper AND reducer roles - test: (c) => c.agents.some((a) => a.role === 'mapper') && c.agents.some((a) => a.role === 'reducer'), - pattern: 'map-reduce', - }, - { - // Red-team: requires BOTH attacker/red-team AND defender/blue-team - test: (c) => - c.agents.some((a) => a.role === 'attacker' || a.role === 'red-team') && - c.agents.some((a) => a.role === 'defender' || a.role === 'blue-team'), - pattern: 'red-team', - }, - { - // Reflection: requires critic role (not just reviewer, which is too common) - test: (c) => c.agents.some((a) => a.role === 'critic'), - pattern: 'reflection', - }, - { - // Escalation: has tier-N roles - test: (c) => c.agents.some((a) => a.role?.startsWith('tier-')), - pattern: 'escalation', - }, - { - // Auction: has auctioneer role - test: (c) => c.agents.some((a) => a.role === 'auctioneer'), - pattern: 'auction', - }, - { - // Saga: has saga-orchestrator or compensate-handler roles - test: (c) => c.agents.some((a) => a.role === 'saga-orchestrator' || a.role === 'compensate-handler'), - pattern: 'saga', - }, - { - // Circuit-breaker: has fallback or backup roles - test: (c) => c.agents.some((a) => a.role === 'fallback' || a.role === 'backup' || a.role === 'primary'), - pattern: 'circuit-breaker', - }, - { - // Blackboard: has blackboard or shared-workspace role - test: (c) => c.agents.some((a) => a.role === 'blackboard' || a.role === 'shared-workspace'), - pattern: 'blackboard', - }, - { - // Swarm: has hive-mind or swarm-agent roles - test: (c) => c.agents.some((a) => a.role === 'hive-mind' || a.role === 'swarm-agent'), - pattern: 'swarm', - }, - { - // Verifier: has verifier role - test: (c) => c.agents.some((a) => a.role === 'verifier'), - pattern: 'verifier', - }, - { - // Supervisor: has supervisor role - test: (c) => c.agents.some((a) => a.role === 'supervisor'), - pattern: 'supervisor', - }, - { - // Review-loop: implementer + multiple reviewers (code review with feedback loop) - test: (c) => { - const hasImplementer = c.agents.some( - (a) => a.role?.toLowerCase().includes('implement') || a.name.toLowerCase().includes('implement') - ); - const reviewerCount = c.agents.filter( - (a) => a.role?.toLowerCase().includes('reviewer') || a.name.toLowerCase().includes('reviewer') - ).length; - return hasImplementer && reviewerCount >= 2; - }, - pattern: 'review-loop', - }, - - // ── Generic hub-based patterns ──────────────────────────────────────── - { - test: (c) => c.agents.length > 3 && c.agents.some((a) => a.role === 'lead'), - pattern: 'hierarchical', - }, - { - test: (c) => c.agents.some((a) => a.role === 'hub' || a.role === 'coordinator'), - pattern: 'hub-spoke', - }, - - // ── Structural patterns ─────────────────────────────────────────────── - { - test: (c) => - Array.isArray(c.workflows) && - c.workflows.some((w) => { - // Filter to only agent steps - const names = w.steps.filter((s) => s.agent).map((s) => s.agent!); - return new Set(names).size === names.length && names.length > 2; - }), - pattern: 'pipeline', - }, - - // ── Default fallback ────────────────────────────────────────────────── - { - test: () => true, - pattern: 'fan-out', - }, -]; - -// ── Coordinator events ────────────────────────────────────────────────────── - -export interface SwarmCoordinatorEvents { - 'run:created': (run: WorkflowRunRow) => void; - 'run:started': (run: WorkflowRunRow) => void; - 'run:completed': (run: WorkflowRunRow) => void; - 'run:failed': (run: WorkflowRunRow) => void; - 'run:cancelled': (run: WorkflowRunRow) => void; - 'step:started': (step: WorkflowStepRow) => void; - 'step:completed': (step: WorkflowStepRow) => void; - 'step:failed': (step: WorkflowStepRow) => void; -} - -// ── Coordinator ───────────────────────────────────────────────────────────── - -export class SwarmCoordinator extends EventEmitter { - private db: DbClient; - - constructor(db: DbClient) { - super(); - this.db = db; - } - - // ── Pattern selection ─────────────────────────────────────────────────── - - /** - * Select the swarm pattern to use for a config. If the config already - * specifies a pattern, it is returned as-is. Otherwise heuristics apply. - */ - selectPattern(config: RelayYamlConfig): SwarmPattern { - if (config.swarm.pattern) { - return config.swarm.pattern; - } - for (const h of PATTERN_HEURISTICS) { - if (h.test(config)) return h.pattern; - } - return 'fan-out'; - } - - // ── Topology resolution ───────────────────────────────────────────────── - - /** - * Build the agent communication topology for a given config and pattern. - * Non-interactive agents are excluded from message edges — they only communicate - * through step output chaining ({{steps.X.output}}). - */ - resolveTopology(config: RelayYamlConfig, pattern?: SwarmPattern): AgentTopology { - const p = pattern ?? this.selectPattern(config); - const agents = config.agents; - const edges = new Map(); - - // Non-interactive agents have no inbound or outbound message edges - const nonInteractiveNames = new Set(agents.filter((a) => a.interactive === false).map((a) => a.name)); - const names = agents.map((a) => a.name).filter((n) => !nonInteractiveNames.has(n)); - - const topology = this.resolveInteractiveTopology(p, config, agents, edges, names); - - // Apply non-interactive filtering to the actual topology edges (not the local - // `edges` variable, which may not be the same map — e.g., DAG creates its own). - const topologyEdges = topology.edges; - - // Ensure non-interactive agents have empty edge entries (no messaging) - for (const name of nonInteractiveNames) { - topologyEdges.set(name, []); - } - // Also filter out non-interactive agents from any edge targets - for (const [agent, targets] of topologyEdges) { - topologyEdges.set( - agent, - targets.filter((t) => !nonInteractiveNames.has(t)) - ); - } - - return topology; - } - - /** Internal: resolve topology edges for interactive agents only. */ - private resolveInteractiveTopology( - p: SwarmPattern, - config: RelayYamlConfig, - agents: AgentDefinition[], - edges: Map, - names: string[] - ): AgentTopology { - switch (p) { - case 'fan-out': { - // Hub (first agent or role=lead) fans out to all others; no inter-worker edges. - const hub = this.pickHub(agents); - const others = names.filter((n) => n !== hub); - edges.set(hub, others); - for (const o of others) edges.set(o, [hub]); - return { pattern: p, agents, edges, hub }; - } - - case 'pipeline': { - // Linear chain following workflow step order or agent list order. - const order = this.resolvePipelineOrder(config, names); - for (let i = 0; i < order.length; i++) { - edges.set(order[i], i < order.length - 1 ? [order[i + 1]] : []); - } - return { pattern: p, agents, edges, pipelineOrder: order }; - } - - case 'hub-spoke': { - const hub = this.pickHub(agents); - const spokes = names.filter((n) => n !== hub); - edges.set(hub, spokes); - for (const s of spokes) edges.set(s, [hub]); - return { pattern: p, agents, edges, hub }; - } - - case 'consensus': - case 'debate': - case 'mesh': { - // Full mesh — every agent can talk to every other. - for (const n of names) { - edges.set( - n, - names.filter((o) => o !== n) - ); - } - return { pattern: p, agents, edges }; - } - - case 'handoff': { - // Chain with explicit handoff: each agent passes to the next. - const order = this.resolvePipelineOrder(config, names); - for (let i = 0; i < order.length; i++) { - edges.set(order[i], i < order.length - 1 ? [order[i + 1]] : []); - } - return { pattern: p, agents, edges, pipelineOrder: order }; - } - - case 'cascade': { - // Primary tries first; on failure, falls through to next. - for (let i = 0; i < names.length; i++) { - edges.set(names[i], i < names.length - 1 ? [names[i + 1]] : []); - } - return { pattern: p, agents, edges, pipelineOrder: names }; - } - - case 'dag': { - // Edges derived from workflow step dependencies. - const stepEdges = this.resolveDAGEdges(config); - for (const n of names) { - if (!stepEdges.has(n)) stepEdges.set(n, []); - } - return { pattern: p, agents, edges: stepEdges }; - } - - case 'hierarchical': { - const hub = this.pickHub(agents); - const subordinates = names.filter((n) => n !== hub); - edges.set(hub, subordinates); - for (const s of subordinates) edges.set(s, [hub]); - return { pattern: p, agents, edges, hub }; - } - - // ── Additional patterns ──────────────────────────────────────────── - - case 'map-reduce': { - // Mappers fan out from coordinator, all feed into reducer(s) - const coordinator = this.pickHub(agents); - const mappers = agents.filter((a) => a.role === 'mapper').map((a) => a.name); - const reducers = agents.filter((a) => a.role === 'reducer').map((a) => a.name); - const others = names.filter( - (n) => n !== coordinator && !mappers.includes(n) && !reducers.includes(n) - ); - - // Coordinator → mappers (excluding self if coordinator is also a mapper) - edges.set(coordinator, [...mappers.filter((m) => m !== coordinator), ...others]); - // Mappers → reducers (skip coordinator to avoid overwriting its edges) - for (const m of mappers) { - if (m === coordinator) continue; - edges.set(m, reducers.length > 0 ? reducers : [coordinator]); - } - // Reducers → coordinator - for (const r of reducers) edges.set(r, [coordinator]); - // Others → coordinator - for (const o of others) edges.set(o, [coordinator]); - - return { pattern: p, agents, edges, hub: coordinator }; - } - - case 'scatter-gather': { - // Hub scatters to all workers, gathers responses back - const hub = this.pickHub(agents); - const workers = names.filter((n) => n !== hub); - edges.set(hub, workers); - for (const w of workers) edges.set(w, [hub]); - return { pattern: p, agents, edges, hub }; - } - - case 'supervisor': { - // Supervisor monitors all workers; workers report to supervisor - const supervisor = agents.find((a) => a.role === 'supervisor')?.name ?? this.pickHub(agents); - const workers = names.filter((n) => n !== supervisor); - edges.set(supervisor, workers); - for (const w of workers) edges.set(w, [supervisor]); - return { pattern: p, agents, edges, hub: supervisor }; - } - - case 'reflection': { - // Agent produces output, critic reviews and sends feedback - // Linear: producer → critic → producer (loop-capable) - const critic = agents.find((a) => a.role === 'critic' || a.role === 'reviewer')?.name; - const producers = names.filter((n) => n !== critic); - if (critic) { - for (const prod of producers) { - edges.set(prod, [critic]); - } - edges.set(critic, producers); - } else { - // Fallback: self-reflection via mesh - for (const n of names) - edges.set( - n, - names.filter((o) => o !== n) - ); - } - return { pattern: p, agents, edges }; - } - - case 'red-team': { - // Attacker ↔ Defender adversarial communication - const attackers = agents - .filter((a) => a.role === 'attacker' || a.role === 'red-team') - .map((a) => a.name); - const defenders = agents - .filter((a) => a.role === 'defender' || a.role === 'blue-team') - .map((a) => a.name); - const judges = names.filter((n) => !attackers.includes(n) && !defenders.includes(n)); - - // Attackers → defenders and judges - for (const a of attackers) edges.set(a, [...defenders, ...judges]); - // Defenders → attackers and judges - for (const d of defenders) edges.set(d, [...attackers, ...judges]); - // Judges receive from both, can communicate with all - for (const j of judges) edges.set(j, [...attackers, ...defenders]); - - return { pattern: p, agents, edges }; - } - - case 'verifier': { - // Producer → Verifier chain; verifier can reject back to producer - const verifiers = agents.filter((a) => a.role === 'verifier').map((a) => a.name); - const producers = names.filter((n) => !verifiers.includes(n)); - - for (const prod of producers) edges.set(prod, verifiers.length > 0 ? verifiers : []); - for (const v of verifiers) edges.set(v, producers); // Can send rejections back - - return { pattern: p, agents, edges }; - } - - case 'auction': { - // Auctioneer broadcasts tasks; bidders respond to auctioneer only - const auctioneer = agents.find((a) => a.role === 'auctioneer')?.name ?? this.pickHub(agents); - const bidders = names.filter((n) => n !== auctioneer); - edges.set(auctioneer, bidders); - for (const b of bidders) edges.set(b, [auctioneer]); - return { pattern: p, agents, edges, hub: auctioneer }; - } - - case 'escalation': { - // Tiered chain: each level can escalate to the next - // Uses agent order or tier role numbers - const order = this.resolveEscalationOrder(agents); - for (let i = 0; i < order.length; i++) { - // Each tier can escalate up and report down - const canEscalateTo = i < order.length - 1 ? [order[i + 1]] : []; - const canReportTo = i > 0 ? [order[i - 1]] : []; - edges.set(order[i], [...canEscalateTo, ...canReportTo]); - } - // Ensure non-tiered agents still have edge entries (prevents undefined) - for (const n of names) { - if (!edges.has(n)) edges.set(n, []); - } - return { pattern: p, agents, edges, pipelineOrder: order }; - } - - case 'saga': { - // Orchestrator coordinates saga steps; each step can trigger compensate - const orchestrator = agents.find((a) => a.role === 'saga-orchestrator')?.name ?? this.pickHub(agents); - const participants = names.filter((n) => n !== orchestrator); - // Orchestrator → all participants (for commands) - edges.set(orchestrator, participants); - // Participants → orchestrator (for completion/failure signals) - for (const part of participants) edges.set(part, [orchestrator]); - return { pattern: p, agents, edges, hub: orchestrator }; - } - - case 'circuit-breaker': { - // Primary agent with fallback chain - const order = names; // First agent is primary, rest are fallbacks - for (let i = 0; i < order.length; i++) { - // Each can trigger next fallback - edges.set(order[i], i < order.length - 1 ? [order[i + 1]] : []); - } - return { pattern: p, agents, edges, pipelineOrder: order }; - } - - case 'blackboard': { - // All agents can read/write to shared blackboard (full mesh) - // Plus optional moderator - const moderator = agents.find((a) => a.role === 'moderator')?.name; - for (const n of names) { - edges.set( - n, - names.filter((o) => o !== n) - ); - } - return { pattern: p, agents, edges, hub: moderator }; - } - - case 'swarm': { - // Emergent swarm: agents communicate with nearest neighbors - // For simplicity, partial mesh based on agent index proximity - const hiveMind = agents.find((a) => a.role === 'hive-mind')?.name; - for (let i = 0; i < names.length; i++) { - const neighbors: string[] = []; - if (i > 0) neighbors.push(names[i - 1]); - if (i < names.length - 1) neighbors.push(names[i + 1]); - // Also connect to hive mind if present (avoid duplicates if already adjacent) - if (hiveMind && hiveMind !== names[i] && !neighbors.includes(hiveMind)) neighbors.push(hiveMind); - edges.set(names[i], neighbors); - } - return { pattern: p, agents, edges, hub: hiveMind }; - } - - case 'review-loop': { - // Implementer is hub; reviewers can communicate with implementer AND each other - // This enables collaborative review where reviewers can discuss findings - const implementer = - agents.find( - (a) => a.role?.toLowerCase().includes('implement') || a.name.toLowerCase().includes('implement') - )?.name ?? this.pickHub(agents); - const reviewers = agents - .filter( - (a) => - a.name !== implementer && - (a.role?.toLowerCase().includes('reviewer') || a.name.toLowerCase().includes('reviewer')) - ) - .map((a) => a.name); - const others = names.filter((n) => n !== implementer && !reviewers.includes(n)); - - // Implementer → all reviewers and others - edges.set(implementer, [...reviewers, ...others]); - // Reviewers → implementer + other reviewers (collaborative review) - for (const r of reviewers) { - const otherReviewers = reviewers.filter((or) => or !== r); - edges.set(r, [implementer, ...otherReviewers]); - } - // Others → implementer - for (const o of others) edges.set(o, [implementer]); - - return { pattern: p, agents, edges, hub: implementer }; - } - - default: { - // Fallback: full mesh. - for (const n of names) { - edges.set( - n, - names.filter((o) => o !== n) - ); - } - return { pattern: p, agents, edges }; - } - } - } - - // ── Lifecycle: create run ─────────────────────────────────────────────── - - async createRun(workspaceId: string, config: RelayYamlConfig): Promise { - const id = `run_${Date.now()}_${randomBytes(4).toString('hex')}`; - const pattern = this.selectPattern(config); - const now = new Date().toISOString(); - - const { rows } = await this.db.query( - `INSERT INTO workflow_runs (id, workspace_id, workflow_name, pattern, status, config, started_at, created_at, updated_at) - VALUES ($1, $2, $3, $4, 'pending', $5, $6, $6, $6) - RETURNING *`, - [id, workspaceId, config.name, pattern, JSON.stringify(config), now] - ); - - const run = rows[0]; - this.emit('run:created', run); - return run; - } - - // ── Lifecycle: start run ──────────────────────────────────────────────── - - async startRun(runId: string): Promise { - const now = new Date().toISOString(); - const { rows } = await this.db.query( - `UPDATE workflow_runs SET status = 'running', started_at = $2, updated_at = $2 - WHERE id = $1 AND status = 'pending' - RETURNING *`, - [runId, now] - ); - - if (rows.length === 0) { - throw new Error(`Run ${runId} not found or not in pending state`); - } - - const run = rows[0]; - this.emit('run:started', run); - return run; - } - - // ── Lifecycle: complete / fail / cancel ───────────────────────────────── - - async completeRun(runId: string, stateSnapshot?: Record): Promise { - return this.transitionRun(runId, 'completed', undefined, stateSnapshot); - } - - async failRun(runId: string, error: string): Promise { - return this.transitionRun(runId, 'failed', error); - } - - async cancelRun(runId: string): Promise { - return this.transitionRun(runId, 'cancelled'); - } - - // ── Step management ───────────────────────────────────────────────────── - - async createSteps(runId: string, config: RelayYamlConfig): Promise { - const workflows = config.workflows ?? []; - const created: WorkflowStepRow[] = []; - - for (const wf of workflows) { - for (const step of wf.steps) { - const id = `step_${Date.now()}_${randomBytes(4).toString('hex')}`; - const now = new Date().toISOString(); - - const { rows } = await this.db.query( - `INSERT INTO workflow_steps (id, run_id, step_name, agent_name, status, task, depends_on, created_at, updated_at) - VALUES ($1, $2, $3, $4, 'pending', $5, $6, $7, $7) - RETURNING *`, - [ - id, - runId, - step.name, - step.agent ?? null, - step.task ?? step.command ?? '', - JSON.stringify(step.dependsOn ?? []), - now, - ] - ); - - created.push(rows[0]); - } - } - - return created; - } - - async startStep(stepId: string): Promise { - const now = new Date().toISOString(); - const { rows } = await this.db.query( - `UPDATE workflow_steps SET status = 'running', started_at = $2, updated_at = $2 - WHERE id = $1 AND status = 'pending' - RETURNING *`, - [stepId, now] - ); - - if (rows.length === 0) { - throw new Error(`Step ${stepId} not found or not in pending state`); - } - - const step = rows[0]; - this.emit('step:started', step); - return step; - } - - async completeStep(stepId: string, output?: string): Promise { - const now = new Date().toISOString(); - const { rows } = await this.db.query( - `UPDATE workflow_steps SET status = 'completed', output = $2, completed_at = $3, updated_at = $3 - WHERE id = $1 AND status = 'running' - RETURNING *`, - [stepId, output ?? null, now] - ); - - if (rows.length === 0) { - throw new Error(`Step ${stepId} not found or not in running state`); - } - - const step = rows[0]; - this.emit('step:completed', step); - return step; - } - - async failStep(stepId: string, error: string): Promise { - const now = new Date().toISOString(); - const { rows } = await this.db.query( - `UPDATE workflow_steps SET status = 'failed', error = $2, completed_at = $3, updated_at = $3 - WHERE id = $1 AND status = 'running' - RETURNING *`, - [stepId, error, now] - ); - - if (rows.length === 0) { - throw new Error(`Step ${stepId} not found or not in running state`); - } - - const step = rows[0]; - this.emit('step:failed', step); - return step; - } - - async skipStep(stepId: string): Promise { - const now = new Date().toISOString(); - const { rows } = await this.db.query( - `UPDATE workflow_steps SET status = 'skipped', completed_at = $2, updated_at = $2 - WHERE id = $1 - RETURNING *`, - [stepId, now] - ); - - if (rows.length === 0) { - throw new Error(`Step ${stepId} not found`); - } - - return rows[0]; - } - - // ── Queries ───────────────────────────────────────────────────────────── - - async getRun(runId: string): Promise { - const { rows } = await this.db.query(`SELECT * FROM workflow_runs WHERE id = $1`, [ - runId, - ]); - return rows[0] ?? null; - } - - async getSteps(runId: string): Promise { - const { rows } = await this.db.query( - `SELECT * FROM workflow_steps WHERE run_id = $1 ORDER BY created_at ASC`, - [runId] - ); - return rows; - } - - async getReadySteps(runId: string): Promise { - const steps = await this.getSteps(runId); - const completedNames = new Set(steps.filter((s) => s.status === 'completed').map((s) => s.stepName)); - - return steps.filter((s) => { - if (s.status !== 'pending') return false; - const deps: string[] = Array.isArray(s.dependsOn) ? s.dependsOn : []; - return deps.every((d) => completedNames.has(d)); - }); - } - - async getRunsByWorkspace(workspaceId: string, status?: WorkflowRunStatus): Promise { - if (status) { - const { rows } = await this.db.query( - `SELECT * FROM workflow_runs WHERE workspace_id = $1 AND status = $2 ORDER BY created_at DESC`, - [workspaceId, status] - ); - return rows; - } - const { rows } = await this.db.query( - `SELECT * FROM workflow_runs WHERE workspace_id = $1 ORDER BY created_at DESC`, - [workspaceId] - ); - return rows; - } - - // ── Private helpers ───────────────────────────────────────────────────── - - private async transitionRun( - runId: string, - status: WorkflowRunStatus, - error?: string, - stateSnapshot?: Record - ): Promise { - const now = new Date().toISOString(); - const { rows } = await this.db.query( - `UPDATE workflow_runs - SET status = $2, completed_at = $3, error = $4, state_snapshot = $5, updated_at = $3 - WHERE id = $1 - RETURNING *`, - [runId, status, now, error ?? null, stateSnapshot ? JSON.stringify(stateSnapshot) : null] - ); - - if (rows.length === 0) { - throw new Error(`Run ${runId} not found`); - } - - const run = rows[0]; - const eventName = `run:${status}` as keyof SwarmCoordinatorEvents; - this.emit(eventName, run); - return run; - } - - private pickHub(agents: AgentDefinition[]): string { - // Prefer interactive agents as hub — non-interactive agents cannot receive messages - const interactiveAgents = agents.filter((a) => a.interactive !== false); - const pool = interactiveAgents.length > 0 ? interactiveAgents : agents; - const lead = pool.find((a) => a.role === 'lead' || a.role === 'hub' || a.role === 'coordinator'); - return lead?.name ?? pool[0].name; - } - - private resolvePipelineOrder(config: RelayYamlConfig, fallback: string[]): string[] { - const workflow = config.workflows?.[0]; - if (!workflow) return fallback; - - // Use step order — each step's agent in sequence, deduped. - const seen = new Set(); - const order: string[] = []; - for (const step of workflow.steps) { - // Skip deterministic steps (no agent) - if (!step.agent) continue; - if (!seen.has(step.agent)) { - seen.add(step.agent); - order.push(step.agent); - } - } - return order.length > 0 ? order : fallback; - } - - private resolveEscalationOrder(agents: AgentDefinition[]): string[] { - // Sort by tier role (e.g., "tier-1", "tier-2") or by agent order - const tiered = agents.filter((a) => a.role?.startsWith('tier-')); - if (tiered.length > 0) { - return tiered - .sort((a, b) => { - const tierA = parseInt(a.role?.replace('tier-', '') ?? '0', 10); - const tierB = parseInt(b.role?.replace('tier-', '') ?? '0', 10); - return tierA - tierB; - }) - .map((a) => a.name); - } - // Fallback: use agent order - return agents.map((a) => a.name); - } - - private resolveDAGEdges(config: RelayYamlConfig): Map { - const edges = new Map(); - const workflows = config.workflows ?? []; - - for (const wf of workflows) { - // Build step-name → agent-name mapping (skip deterministic steps) - const stepAgent = new Map(); - for (const step of wf.steps) { - if (step.agent) { - stepAgent.set(step.name, step.agent); - } - } - - for (const step of wf.steps) { - // Skip deterministic steps - if (!step.agent) continue; - if (!step.dependsOn?.length) continue; - for (const dep of step.dependsOn) { - const fromAgent = stepAgent.get(dep); - if (!fromAgent) continue; - const existing = edges.get(fromAgent) ?? []; - if (!existing.includes(step.agent)) { - existing.push(step.agent); - } - edges.set(fromAgent, existing); - } - } - } - - return edges; - } -} diff --git a/packages/sdk/src/workflows/custom-steps.ts b/packages/sdk/src/workflows/custom-steps.ts deleted file mode 100644 index 5ac686513..000000000 --- a/packages/sdk/src/workflows/custom-steps.ts +++ /dev/null @@ -1,442 +0,0 @@ -/** - * Custom Steps Loader - * - * Loads and resolves custom step definitions from .relay/steps.yaml - */ - -import { existsSync, readFileSync } from 'node:fs'; -import path from 'node:path'; -import { parse as parseYaml } from 'yaml'; -import type { CustomStepsConfig, CustomStepDefinition, CustomStepParam, WorkflowStep } from './types.js'; - -/** Default location for custom steps configuration. */ -export const CUSTOM_STEPS_FILE = '.relay/steps.yaml'; - -/** Result of validating custom steps usage in a workflow. */ -export interface CustomStepsValidationResult { - valid: boolean; - errors: string[]; - warnings: string[]; - /** Custom steps that were referenced but not found. */ - missingSteps: string[]; - /** Parameters that were required but not provided. */ - missingParams: Array<{ step: string; use: string; param: string }>; - /** Unreferenced variables in step definitions. */ - unresolvedVariables: Array<{ step: string; variable: string }>; -} - -/** - * Load custom step definitions from .relay/steps.yaml. - * Returns an empty map if the file doesn't exist. - */ -export function loadCustomSteps(cwd: string): Map { - const stepsPath = path.join(cwd, CUSTOM_STEPS_FILE); - const steps = new Map(); - - if (!existsSync(stepsPath)) { - return steps; - } - - try { - const content = readFileSync(stepsPath, 'utf-8'); - - // Handle empty file - if (!content.trim()) { - return steps; - } - - const config = parseYaml(content) as CustomStepsConfig; - - if (!config || typeof config !== 'object') { - throw new CustomStepsParseError( - 'Invalid file format', - 'The file must contain a valid YAML object with a "steps" key', - stepsPath - ); - } - - if (!config.steps) { - throw new CustomStepsParseError( - 'Missing "steps" key', - 'Add a "steps" object containing your custom step definitions:\n\n' + - 'steps:\n' + - ' my-step:\n' + - ' command: "echo hello"', - stepsPath - ); - } - - if (typeof config.steps !== 'object' || Array.isArray(config.steps)) { - throw new CustomStepsParseError( - 'Invalid "steps" format', - 'The "steps" key must be an object (not an array) mapping step names to definitions', - stepsPath - ); - } - - for (const [name, definition] of Object.entries(config.steps)) { - validateCustomStepDefinition(name, definition, stepsPath); - steps.set(name, definition); - } - - return steps; - } catch (err) { - if (err instanceof CustomStepsParseError) { - throw err; - } - const message = err instanceof Error ? err.message : String(err); - throw new CustomStepsParseError('Failed to parse file', message, stepsPath); - } -} - -/** - * Custom error class for parse errors with helpful context. - */ -export class CustomStepsParseError extends Error { - constructor( - public readonly issue: string, - public readonly suggestion: string, - public readonly filePath: string - ) { - super(`${filePath}: ${issue}\n\n${suggestion}`); - this.name = 'CustomStepsParseError'; - } -} - -/** - * Custom error class for step resolution errors. - */ -export class CustomStepResolutionError extends Error { - constructor( - public readonly stepName: string, - public readonly issue: string, - public readonly suggestion: string - ) { - super(`Step "${stepName}": ${issue}\n\n${suggestion}`); - this.name = 'CustomStepResolutionError'; - } -} - -/** - * Validate a custom step definition with clear error messages. - */ -function validateCustomStepDefinition( - name: string, - def: unknown, - filePath: string -): asserts def is CustomStepDefinition { - if (!def || typeof def !== 'object') { - throw new CustomStepsParseError( - `Invalid step "${name}"`, - 'Each step must be an object with at least a "command" or "branch" field:\n\n' + - `steps:\n` + - ` ${name}:\n` + - ` command: "your-command-here"`, - filePath - ); - } - - const stepDef = def as Record; - - // Validate type if specified - if (stepDef.type !== undefined) { - if (stepDef.type !== 'deterministic' && stepDef.type !== 'worktree') { - throw new CustomStepsParseError( - `Invalid type "${stepDef.type}" for step "${name}"`, - 'Step type must be either "deterministic" or "worktree"', - filePath - ); - } - } - - // Determine step type (default to deterministic if command is provided) - const hasCommand = typeof stepDef.command === 'string'; - const hasBranch = typeof stepDef.branch === 'string'; - const explicitType = stepDef.type as string | undefined; - const stepType = explicitType ?? (hasCommand ? 'deterministic' : hasBranch ? 'worktree' : undefined); - - if (!stepType) { - throw new CustomStepsParseError( - `Step "${name}" is missing required fields`, - 'Deterministic steps need "command", worktree steps need "branch":\n\n' + - '# Deterministic step:\n' + - ` ${name}:\n` + - ' command: "your-command {{param}}"\n\n' + - '# Worktree step:\n' + - ` ${name}:\n` + - ' type: worktree\n' + - ' branch: "{{branch-name}}"', - filePath - ); - } - - if (stepType === 'deterministic' && !hasCommand) { - throw new CustomStepsParseError( - `Deterministic step "${name}" is missing "command"`, - 'Add a command field:\n\n' + ` ${name}:\n` + ' command: "your-shell-command"', - filePath - ); - } - - if (stepType === 'worktree' && !hasBranch) { - throw new CustomStepsParseError( - `Worktree step "${name}" is missing "branch"`, - 'Add a branch field:\n\n' + - ` ${name}:\n` + - ' type: worktree\n' + - ' branch: "feature/{{branch-name}}"', - filePath - ); - } - - // Validate params if present - if (stepDef.params !== undefined) { - if (!Array.isArray(stepDef.params)) { - throw new CustomStepsParseError( - `Invalid params for step "${name}"`, - 'Params must be an array:\n\n' + - ` ${name}:\n` + - ' params:\n' + - ' - name: myParam\n' + - ' required: true\n' + - ' - name: optionalParam\n' + - ' default: "value"', - filePath - ); - } - - for (let i = 0; i < stepDef.params.length; i++) { - const param = stepDef.params[i] as Record; - if (!param || typeof param !== 'object') { - throw new CustomStepsParseError( - `Invalid param at index ${i} for step "${name}"`, - 'Each param must be an object with at least a "name" field', - filePath - ); - } - if (!param.name || typeof param.name !== 'string') { - throw new CustomStepsParseError( - `Param at index ${i} for step "${name}" is missing "name"`, - 'Add a name to the parameter:\n\n' + ' params:\n' + ' - name: myParam', - filePath - ); - } - if (param.required !== undefined && typeof param.required !== 'boolean') { - throw new CustomStepsParseError( - `Invalid "required" value for param "${param.name}" in step "${name}"`, - 'The "required" field must be true or false', - filePath - ); - } - } - } -} - -/** - * Extract all variable references ({{varName}}) from a string. - */ -function extractVariables(text: string): string[] { - const matches = text.match(/\{\{(\w+)\}\}/g) ?? []; - return matches.map((m) => m.slice(2, -2)); -} - -/** - * Validate custom step usage in workflow steps without resolving. - * Returns validation errors and warnings for dry-run. - */ -export function validateCustomStepsUsage( - steps: WorkflowStep[], - customSteps: Map -): CustomStepsValidationResult { - const errors: string[] = []; - const warnings: string[] = []; - const missingSteps: string[] = []; - const missingParams: Array<{ step: string; use: string; param: string }> = []; - const unresolvedVariables: Array<{ step: string; variable: string }> = []; - - for (const step of steps) { - if (!step.use) continue; - - const customDef = customSteps.get(step.use); - if (!customDef) { - missingSteps.push(step.use); - errors.push( - `Step "${step.name}" uses undefined custom step "${step.use}". ` + - `Add it to .relay/steps.yaml or check for typos.` - ); - continue; - } - - // Check required parameters - const stepAny = step as unknown as Record; - const providedParams = new Set(); - - if (customDef.params) { - for (const param of customDef.params) { - const providedValue = stepAny[param.name]; - if (providedValue !== undefined) { - providedParams.add(param.name); - } else if (param.default !== undefined) { - providedParams.add(param.name); - } else if (param.required) { - missingParams.push({ step: step.name, use: step.use, param: param.name }); - errors.push( - `Step "${step.name}" is missing required parameter "${param.name}" for custom step "${step.use}".` - ); - } - } - } - - // Check for unresolved variables in the resolved command/branch - const textToCheck = customDef.command ?? customDef.branch ?? ''; - const variables = extractVariables(textToCheck); - for (const variable of variables) { - if (!providedParams.has(variable)) { - // Check if it's a known param with a default - const paramDef = customDef.params?.find((p) => p.name === variable); - if (!paramDef) { - unresolvedVariables.push({ step: step.name, variable }); - warnings.push( - `Step "${step.name}": Variable "{{${variable}}}" in custom step "${step.use}" ` + - `is not defined as a parameter. It will not be interpolated.` - ); - } - } - } - - // Check for extra parameters that aren't defined - const definedParams = new Set((customDef.params ?? []).map((p) => p.name)); - const stepKeys = Object.keys(stepAny).filter( - (k) => !['name', 'use', 'dependsOn', 'timeoutMs'].includes(k) - ); - for (const key of stepKeys) { - if (!definedParams.has(key)) { - warnings.push( - `Step "${step.name}": Parameter "${key}" is not defined in custom step "${step.use}" and will be ignored.` - ); - } - } - } - - return { - valid: errors.length === 0, - errors, - warnings, - missingSteps, - missingParams, - unresolvedVariables, - }; -} - -/** - * Resolve a workflow step that uses a custom step definition. - * Returns a new step with the custom definition merged in. - */ -export function resolveCustomStep( - step: WorkflowStep, - customSteps: Map -): WorkflowStep { - if (!step.use) { - return step; - } - - const customDef = customSteps.get(step.use); - if (!customDef) { - throw new CustomStepResolutionError( - step.name, - `Custom step "${step.use}" not found`, - `Make sure "${step.use}" is defined in .relay/steps.yaml:\n\n` + - 'steps:\n' + - ` ${step.use}:\n` + - ' command: "your-command"' - ); - } - - // Build parameter values from step properties and defaults - const paramValues: Record = {}; - const missingRequired: string[] = []; - - if (customDef.params) { - // Cast step to access arbitrary parameters (custom step params are passed as extra properties) - const stepAny = step as unknown as Record; - for (const param of customDef.params) { - // Check if value provided in step - const providedValue = stepAny[param.name]; - if (providedValue !== undefined) { - paramValues[param.name] = String(providedValue); - } else if (param.default !== undefined) { - paramValues[param.name] = param.default; - } else if (param.required) { - missingRequired.push(param.name); - } - } - } - - if (missingRequired.length > 0) { - const paramList = missingRequired.map((p) => ` - ${p}`).join('\n'); - throw new CustomStepResolutionError( - step.name, - `Missing required parameter(s) for custom step "${step.use}"`, - `Add the following to your step:\n\n` + - `- name: ${step.name}\n` + - ` use: ${step.use}\n` + - missingRequired.map((p) => ` ${p}: `).join('\n') - ); - } - - // Determine step type - const stepType = customDef.type ?? (customDef.command ? 'deterministic' : 'worktree'); - - // Interpolate parameter values into the definition - const interpolate = (value: string | undefined): string | undefined => { - if (!value) return value; - return value.replace(/\{\{(\w+)\}\}/g, (match, paramName) => { - return paramValues[paramName] ?? match; - }); - }; - - // Build resolved step - const resolvedStep: WorkflowStep = { - name: step.name, - type: stepType as 'deterministic' | 'worktree', - dependsOn: step.dependsOn, - timeoutMs: step.timeoutMs ?? customDef.timeoutMs, - }; - - if (stepType === 'deterministic') { - resolvedStep.command = interpolate(customDef.command); - resolvedStep.failOnError = customDef.failOnError; - resolvedStep.captureOutput = customDef.captureOutput; - } else if (stepType === 'worktree') { - resolvedStep.branch = interpolate(customDef.branch); - resolvedStep.baseBranch = interpolate(customDef.baseBranch); - resolvedStep.path = interpolate(customDef.path); - resolvedStep.createBranch = customDef.createBranch; - } - - return resolvedStep; -} - -/** - * Resolve all custom steps in a workflow's steps array. - */ -export function resolveAllCustomSteps( - steps: WorkflowStep[], - customSteps: Map -): WorkflowStep[] { - return steps.map((step) => resolveCustomStep(step, customSteps)); -} - -/** - * Check if .relay/steps.yaml exists. - */ -export function customStepsFileExists(cwd: string): boolean { - return existsSync(path.join(cwd, CUSTOM_STEPS_FILE)); -} - -/** - * Get the full path to the custom steps file. - */ -export function getCustomStepsPath(cwd: string): string { - return path.join(cwd, CUSTOM_STEPS_FILE); -} diff --git a/packages/sdk/src/workflows/default-logger.ts b/packages/sdk/src/workflows/default-logger.ts deleted file mode 100644 index 11a3a93e2..000000000 --- a/packages/sdk/src/workflows/default-logger.ts +++ /dev/null @@ -1,122 +0,0 @@ -import chalk from 'chalk'; -import type { WorkflowEvent, WorkflowEventListener } from './runner.js'; - -export type LogLevel = 'verbose' | 'normal' | 'quiet' | false; - -const noop: WorkflowEventListener = () => {}; - -/** - * Create a default event logger that writes workflow progress to the console. - * - * @param level - Log verbosity: "verbose" | "normal" (default) | "quiet" | false (no-op) - */ -export function createDefaultEventLogger(level: LogLevel = 'normal'): WorkflowEventListener { - if (level === false) return noop; - - return (event: WorkflowEvent) => { - switch (event.type) { - // ── Run lifecycle ── - case 'run:started': - if (level !== 'quiet') { - console.log(chalk.cyan(`[workflow] run ${event.runId}`)); - } - break; - - case 'run:completed': - console.log(chalk.green(`[workflow] completed`)); - break; - - case 'run:failed': - console.log(chalk.red(`[workflow] FAILED: ${event.error}`)); - break; - - case 'run:cancelled': - if (level !== 'quiet') { - console.log(chalk.yellow(`[workflow] cancelled`)); - } - break; - - // ── Step lifecycle ── - case 'step:started': - if (level !== 'quiet') { - console.log(chalk.blue(` ● ${event.stepName} — started`)); - } - break; - - case 'step:completed': - if (level !== 'quiet') { - console.log(chalk.green(` ✓ ${event.stepName} — completed`)); - } - break; - - case 'step:failed': - console.log(chalk.red(` ✗ ${event.stepName} — FAILED: ${event.error}`)); - break; - - case 'step:skipped': - if (level !== 'quiet') { - console.log(chalk.gray(` ○ ${event.stepName} — skipped`)); - } - break; - - case 'step:retrying': - if (level !== 'quiet') { - console.log(chalk.yellow(` ↻ ${event.stepName} — retrying (attempt ${event.attempt})`)); - } - break; - - case 'step:nudged': - if (level !== 'quiet') { - console.log(chalk.yellow(` ⚡ ${event.stepName} — nudged (${event.nudgeCount})`)); - } - break; - - case 'step:agent-report': { - if (level !== 'quiet') { - const r = event.report; - const parts: string[] = []; - if (r.model) parts.push(r.model); - if (r.cost != null) parts.push(`$${r.cost.toFixed(2)}`); - if (r.tokens) parts.push(`${r.tokens.input}+${r.tokens.output} tokens`); - parts.push(`${r.errors.length} errors`); - console.log(chalk.dim(` 📊 ${event.stepName} — ${parts.join(' · ')}`)); - } - break; - } - - // ── Broker-level events (verbose only) ── - case 'broker:event': - if (level === 'verbose') { - console.log(chalk.dim(` [broker] ${JSON.stringify(event.event)}`)); - } - break; - - // ── Other events (verbose only) ── - case 'step:owner-assigned': - if (level === 'verbose') { - console.log( - chalk.dim(` ${event.stepName} — owner: ${event.ownerName}, specialist: ${event.specialistName}`) - ); - } - break; - - case 'step:review-completed': - if (level === 'verbose') { - console.log(chalk.dim(` ${event.stepName} — review: ${event.decision} by ${event.reviewerName}`)); - } - break; - - case 'step:owner-timeout': - if (level !== 'quiet') { - console.log(chalk.yellow(` ⏱ ${event.stepName} — owner timeout (${event.ownerName})`)); - } - break; - - case 'step:force-released': - if (level === 'verbose') { - console.log(chalk.dim(` ${event.stepName} — force-released`)); - } - break; - } - }; -} diff --git a/packages/sdk/src/workflows/dry-run-format.ts b/packages/sdk/src/workflows/dry-run-format.ts deleted file mode 100644 index aadb97c9b..000000000 --- a/packages/sdk/src/workflows/dry-run-format.ts +++ /dev/null @@ -1,88 +0,0 @@ -import type { DryRunReport } from './types.js'; - -/** - * Format a DryRunReport as human-readable text for terminal output. - */ -export function formatDryRunReport(report: DryRunReport): string { - const lines: string[] = []; - - // Header - lines.push(`Dry Run: ${report.name}`); - const meta: string[] = [`Pattern: ${report.pattern}`]; - if (report.maxConcurrency !== undefined) { - meta.push(`Max Concurrency: ${report.maxConcurrency}`); - } - lines.push(meta.join(' | ')); - if (report.description) { - lines.push(report.description); - } - lines.push(''); - - // Agents - if (report.agents.length > 0) { - lines.push(`Agents (${report.agents.length}):`); - const maxNameLen = Math.max(...report.agents.map((a) => a.name.length)); - const maxCliLen = Math.max(...report.agents.map((a) => a.cli.length)); - for (const agent of report.agents) { - const stepLabel = agent.stepCount === 1 ? '1 step' : `${agent.stepCount} steps`; - const cwdInfo = agent.cwd ? ` [cwd: ${agent.cwd}]` : ''; - lines.push( - ` ${agent.name.padEnd(maxNameLen)} ${agent.cli.padEnd(maxCliLen)} ${stepLabel}${cwdInfo}` - ); - } - lines.push(''); - } - - // Permissions - if (report.permissions && report.permissions.length > 0) { - lines.push(`Permissions (${report.permissions.length} agents):`); - for (const perm of report.permissions) { - lines.push( - ` ${perm.agent}: ${perm.access} (read: ${perm.readPaths}, write: ${perm.writePaths}, deny: ${perm.denyPaths}, scopes: ${perm.scopes}) [${perm.source}]` - ); - } - lines.push(''); - } - - // Execution Plan - if (report.waves.length > 0) { - lines.push(`Execution Plan (${report.totalSteps} steps, ${report.estimatedWaves} waves):`); - lines.push(''); - for (const wave of report.waves) { - for (let i = 0; i < wave.steps.length; i++) { - const step = wave.steps[i]; - const prefix = i === 0 ? ` Wave ${String(wave.wave).padStart(2)}:` : ' '; - lines.push(`${prefix} ${step.name} (${step.agent})`); - } - } - lines.push(''); - } - - // Resource estimation - if (report.estimatedPeakConcurrency !== undefined) { - lines.push(`Resource Estimate:`); - lines.push(` Peak Concurrency: ${report.estimatedPeakConcurrency} agents`); - if (report.estimatedTotalAgentSteps !== undefined) { - lines.push(` Total Agent Steps: ${report.estimatedTotalAgentSteps}`); - } - lines.push(''); - } - - // Validation summary - if (report.errors.length > 0) { - lines.push(`Validation: FAIL (${report.errors.length} errors, ${report.warnings.length} warnings)`); - for (const err of report.errors) { - lines.push(` ERROR: ${err}`); - } - } else { - lines.push(`Validation: PASS (0 errors, ${report.warnings.length} warnings)`); - } - - if (report.warnings.length > 0) { - for (const warn of report.warnings) { - lines.push(` WARNING: ${warn}`); - } - } - - return lines.join('\n'); -} diff --git a/packages/sdk/src/workflows/file-db.ts b/packages/sdk/src/workflows/file-db.ts deleted file mode 100644 index 12b43fe11..000000000 --- a/packages/sdk/src/workflows/file-db.ts +++ /dev/null @@ -1,277 +0,0 @@ -import { - accessSync, - appendFileSync, - constants as fsConstants, - existsSync, - mkdirSync, - readdirSync, - readFileSync, -} from 'node:fs'; -import os from 'node:os'; -import path from 'node:path'; - -import type { WorkflowRunRow, WorkflowStepRow } from './types.js'; -import type { WorkflowDb } from './runner.js'; - -type DbEntry = { kind: 'run'; row: WorkflowRunRow } | { kind: 'step'; row: WorkflowStepRow }; - -/** - * Optional hook: fired whenever a persistence write fails (e.g. EACCES, - * ENOSPC). Surfaced so the CLI, dashboard, or bootstrap can decide how - * to react beyond the single console.warn. Not called for the initial - * "directory unwritable" detection — that's stored in {@link isWritable}. - */ -export type DbWriteFailureListener = (err: unknown, filePath: string) => void; - -export interface JsonFileWorkflowDbOptions { - /** Override the resolved filePath. Kept for tests / advanced callers. */ - filePath?: string; - /** Notified on every underlying write error. */ - onWriteFailure?: DbWriteFailureListener; - /** - * When true, if the preferred file path is unwritable, fall back to - * `$HOME/.agent-relay/workflow-runs-.jsonl` so `--resume` - * still works in environments where the workflow cwd is read-only - * (cloud sandboxes with restrictive workspace ACLs). - * - * Defaults to `false` — strict "write to this path or run in-memory" - * semantics, matching the pre-cache behavior. Opt-in via `true`. - */ - homeFallback?: boolean; -} - -/** - * JSONL-backed WorkflowDb for the CLI. - * - * Design: the **in-memory cache is the single source of truth** for the - * process lifetime. Every mutation updates the cache synchronously and - * then best-effort appends to the jsonl file for durability / `--resume`. - * - * This matters because the runtime correctness of a running workflow - * must not depend on disk writes succeeding. If the storage path is - * unwritable (ACL-restricted workspace, full disk, ENOSPC), the workflow - * still progresses through its state machine correctly — we just lose - * the ability to resume a future process from that run. - * - * Read paths used to re-snapshot the jsonl on every call, which meant - * a failed `updateRun(..., { status: 'completed' })` would leave a - * subsequent `getRun` returning the stale 'running' row from disk. - * That bug surfaced as workflows passing per-step but reporting - * `status: 'running'` to callers. - * - * Storage path resolution: - * 1. Try the caller-supplied file path. If the parent directory is - * writable, use it. - * 2. If (1) fails and `homeFallback` is true (opt-in, default false), - * try `$HOME/.agent-relay/workflow-runs-.jsonl`. This is - * outside any workspace mount in cloud sandboxes and almost always - * writable by the agent. - * 3. If both fail, run in memory-only mode. The workflow still - * executes correctly; `--resume` won't be available for this run. - * - * File: `.agent-relay/workflow-runs.jsonl` in the workflow cwd by default. - */ -export class JsonFileWorkflowDb implements WorkflowDb { - private readonly filePath: string; - - /** Whether persistence is active. False = in-memory-only mode. */ - private readonly writable: boolean; - private appendFailedOnce = false; - private readonly onWriteFailure?: DbWriteFailureListener; - - /** - * Authoritative in-memory mirror. Every mutation updates this; reads - * return from here. The jsonl file is only consulted at construction - * (to replay prior state for `--resume`) and is otherwise write-only. - */ - private readonly cache: { - runs: Map; - steps: Map; - }; - - constructor(filePathOrOptions: string | JsonFileWorkflowDbOptions) { - const options: JsonFileWorkflowDbOptions = - typeof filePathOrOptions === 'string' ? { filePath: filePathOrOptions } : filePathOrOptions; - this.onWriteFailure = options.onWriteFailure; - - const requestedPath = options.filePath ?? path.join('.agent-relay', 'workflow-runs.jsonl'); - const homeFallback = options.homeFallback ?? false; - - const { resolvedPath, writable } = JsonFileWorkflowDb.resolveStoragePath(requestedPath, homeFallback); - this.filePath = resolvedPath; - this.writable = writable; - - // Load existing state from disk (for --resume) once at construction. - // From this point on, the cache is authoritative. - this.cache = JsonFileWorkflowDb.loadSnapshot(this.filePath); - } - - /** Returns false if persistence is not active (in-memory-only mode). */ - isWritable(): boolean { - return this.writable; - } - - /** Resolved path on disk. For tests + diagnostics. */ - getStoragePath(): string { - return this.filePath; - } - - hasStepOutputs(runId: string): boolean { - try { - const dir = path.join(path.dirname(this.filePath), 'step-outputs', runId); - return existsSync(dir) && readdirSync(dir).length > 0; - } catch { - return false; - } - } - - // ── Private helpers ───────────────────────────────────────────────────── - - private static resolveStoragePath( - requestedPath: string, - homeFallback: boolean - ): { resolvedPath: string; writable: boolean } { - const candidates: string[] = [requestedPath]; - if (homeFallback) { - const base = path.basename(requestedPath) || 'workflow-runs.jsonl'; - candidates.push(path.join(os.homedir(), '.agent-relay', `workflow-runs-${base}`)); - } - - for (let i = 0; i < candidates.length; i++) { - const candidate = candidates[i]; - const isLastCandidate = i === candidates.length - 1; - try { - mkdirSync(path.dirname(candidate), { recursive: true }); - // If there's a later fallback to try, actively probe writability - // so we know whether to move on. Two levels matter: - // 1. Directory must be writable to create the jsonl file. - // 2. If the jsonl file already exists, IT must also be writable - // — a writable directory does not guarantee a writable file. - // Relayfile-mount, for example, can sync a file and chmod it - // to 0o444 while leaving the parent dir at 0o755; the old - // dir-only check would accept the path and every append would - // then lazy-fail, bypassing the fallback. - // If this is already the last candidate, skip the probe and be - // optimistic — an unwritable path will surface as a lazy append() - // failure handled by the cache + onWriteFailure path. Matches the - // pre-cache "warn on first failure" semantic callers expect. - if (!isLastCandidate) { - accessSync(path.dirname(candidate), fsConstants.W_OK); - if (existsSync(candidate)) { - accessSync(candidate, fsConstants.W_OK); - } - } - return { resolvedPath: candidate, writable: true }; - } catch { - // Try the next candidate; if this was the last, fall through - // to memory-only. - } - } - - // Memory-only mode. Path is reported for diagnostics but nothing - // is written to it. - return { resolvedPath: requestedPath, writable: false }; - } - - private static loadSnapshot(filePath: string): { - runs: Map; - steps: Map; - } { - const runs = new Map(); - const steps = new Map(); - let raw = ''; - try { - raw = readFileSync(filePath, 'utf8'); - } catch { - return { runs, steps }; - } - for (const line of raw.split('\n')) { - const trimmed = line.trim(); - if (!trimmed) continue; - try { - const entry = JSON.parse(trimmed) as DbEntry; - if (entry.kind === 'run') { - runs.set(entry.row.id, entry.row); - } else { - steps.set(entry.row.id, entry.row); - } - } catch { - // Skip malformed lines - } - } - return { runs, steps }; - } - - private append(entry: DbEntry): void { - if (!this.writable) return; - try { - appendFileSync(this.filePath, JSON.stringify(entry) + '\n', 'utf8'); - } catch (err) { - // Notify every failure so callers can aggregate / surface. - this.onWriteFailure?.(err, this.filePath); - // Warn to console once per process — subsequent failures are noise. - if (!this.appendFailedOnce) { - this.appendFailedOnce = true; - console.warn( - '[workflow] warning: failed to write run state to ' + - this.filePath + - ' — --resume will not be available for this run. Use --start-from instead. ' + - 'Error: ' + - (err instanceof Error ? err.message : String(err)) - ); - } - } - } - - // ── WorkflowDb interface ───────────────────────────────────────────────── - - async insertRun(run: WorkflowRunRow): Promise { - // Shallow-copy so later mutations on the caller's object don't silently - // alias into the cache. Matches InMemoryWorkflowDb semantics. The runner - // keeps inserted rows in its own stepStates map and occasionally mutates - // state.row.status directly before calling updateRun — without this copy - // the mutation would land in the cache and bypass updateRun's - // updatedAt + append path, causing exactly the observability hazard this - // cache is meant to prevent. - this.cache.runs.set(run.id, { ...run }); - this.append({ kind: 'run', row: run }); - } - - async updateRun(id: string, patch: Partial): Promise { - const existing = this.cache.runs.get(id); - if (!existing) return; - const updated: WorkflowRunRow = { - ...existing, - ...patch, - updatedAt: new Date().toISOString(), - }; - this.cache.runs.set(id, updated); - this.append({ kind: 'run', row: updated }); - } - - async getRun(id: string): Promise { - return this.cache.runs.get(id) ?? null; - } - - async insertStep(step: WorkflowStepRow): Promise { - // Shallow-copy to prevent caller-mutation aliasing — see insertRun. - this.cache.steps.set(step.id, { ...step }); - this.append({ kind: 'step', row: step }); - } - - async updateStep(id: string, patch: Partial): Promise { - const existing = this.cache.steps.get(id); - if (!existing) return; - const updated: WorkflowStepRow = { - ...existing, - ...patch, - updatedAt: new Date().toISOString(), - }; - this.cache.steps.set(id, updated); - this.append({ kind: 'step', row: updated }); - } - - async getStepsByRunId(runId: string): Promise { - return Array.from(this.cache.steps.values()).filter((s) => s.runId === runId); - } -} diff --git a/packages/sdk/src/workflows/index.ts b/packages/sdk/src/workflows/index.ts deleted file mode 100644 index 95a8ed6dd..000000000 --- a/packages/sdk/src/workflows/index.ts +++ /dev/null @@ -1,64 +0,0 @@ -export * from './types.js'; -export * from './runner.js'; -export * from './custom-steps.js'; -export * from './cli-session-collector.js'; -export * from './channel-messenger.js'; -export * from './process-spawner.js'; -export { - createProcessBackendExecutor, - type ProcessBackendExecutorOptions, -} from './process-backend-executor.js'; -export * from './run-summary-table.js'; -export * from './template-resolver.js'; -export * from './verification.js'; -export { - StepExecutor, - /** @deprecated Use {@link StepExecutor} instead. */ - StepExecutor as WorkflowStepLifecycleExecutor, - type StepExecutorDeps, - type StepResult, - type StepSchedule, -} from './step-executor.js'; -export { - Models, - ClaudeModels, - CodexModels, - GeminiModels, - CursorModels, - CLIs, - CLIVersions, - CLIRegistry, - SwarmPatterns, -} from '../models.js'; -export * from './memory-db.js'; -export * from './file-db.js'; -export * from './run.js'; -export * from './cloud-schedules.js'; -export * from './builder.js'; -export * from './coordinator.js'; -export * from './barrier.js'; -export * from './state.js'; -export * from './templates.js'; -export { WorkflowTrajectory, type StepOutcome } from './trajectory.js'; -export { formatDryRunReport } from './dry-run-format.js'; -export { createWorkflowRenderer, type WorkflowRenderer } from './listr-renderer.js'; -export { createDefaultEventLogger } from './default-logger.js'; -export { executeApiStep, type ApiExecutorOptions } from './api-executor.js'; -export type { CloudRunOptions } from './cloud-runner.js'; -export * from './proxy-env.js'; -export * from './budget-tracker.js'; -export { applySiblingLinks, buildSiblingLinkScript } from './sibling-links.js'; -export type { SiblingLink, SiblingLinkOptions } from './sibling-links.js'; -export { - runScriptWorkflow, - parseTsxStderr, - formatWorkflowParseError, - findLocalSdkWorkspace, - ensureLocalSdkWorkflowRuntime, -} from './run-script.js'; -export type { - RunScriptWorkflowOptions, - ParsedWorkflowError, - LocalSdkWorkspace, - ExecFileSyncLike, -} from './run-script.js'; diff --git a/packages/sdk/src/workflows/listr-renderer.ts b/packages/sdk/src/workflows/listr-renderer.ts deleted file mode 100644 index f88a8be23..000000000 --- a/packages/sdk/src/workflows/listr-renderer.ts +++ /dev/null @@ -1,276 +0,0 @@ -import chalk from 'chalk'; -import type { ListrTask } from 'listr2'; -import type { WorkflowEvent, WorkflowEventListener } from './runner.js'; - -// Filter console.log while listr owns the terminal. -// Blocks [broker] noise and [workflow HH:MM] timing lines, but lets the -// observer URL and channel name through so users can track the run. -function installOutputFilter(): () => void { - const orig = console.log.bind(console); - console.log = (...args: unknown[]) => { - const str = String(args[0] ?? ''); - // Always show the observer URL and channel so users can follow the run - if (str.includes('Observer:') || str.includes('agentrelay.com') || str.includes('Channel: wf-')) { - orig(...args); - return; - } - // Block [broker] lines and [workflow HH:MM] timing lines - if (/\[broker\]/.test(str) || /\[workflow\s+\d{2}:\d{2}\]/.test(str)) return; - orig(...args); - }; - return () => { - console.log = orig; - }; -} - -interface RenderableTask { - title: string; - output: string; -} - -interface StepHandle { - resolve: () => void; - reject: (error: Error) => void; - setOutput: (text: string) => void; - markSkipped: () => void; -} - -export interface WorkflowRenderer { - /** Pass this to `.run({ onEvent })` in your TypeScript workflow. */ - onEvent: WorkflowEventListener; - /** Start the listr renderer. Run this concurrently with your workflow. */ - start: () => Promise; - /** Restore console.log after the workflow finishes. */ - unmount: () => void; -} - -/** - * Creates a listr2-based renderer for TypeScript workflows. - * - * @example - * ```typescript - * import { workflow, createWorkflowRenderer } from '@agent-relay/sdk/workflows'; - * - * const renderer = createWorkflowRenderer(); - * const [result] = await Promise.all([ - * workflow('my-workflow').step(...).run({ onEvent: renderer.onEvent }), - * renderer.start(), - * ]); - * renderer.unmount(); - * ``` - */ -export function createWorkflowRenderer(): WorkflowRenderer { - const stepHandles = new Map(); - - let resolveWorkflow!: () => void; - let rejectWorkflow!: (error: Error) => void; - const workflowDone = new Promise((resolve, reject) => { - resolveWorkflow = resolve; - rejectWorkflow = reject; - }); - // Prevent unhandled rejection if run:failed fires before the header task - // reaches `await workflowDone`. - workflowDone.catch(() => {}); - - let setHeader: (text: string) => void = () => {}; - // eslint-disable-next-line @typescript-eslint/no-explicit-any - let listr: any = null; - const pendingAdds: ListrTask[] = []; - - async function ensureListr(): Promise { - if (listr) return listr; - const { Listr } = await import('listr2'); - listr = new (Listr as any)( - [ - { - title: chalk.dim('Workflow starting...'), - task: async (_ctx, task): Promise => { - setHeader = (text: string): void => { - task.title = text; - }; - await workflowDone; - }, - } as ListrTask, - ], - { - concurrent: true, - renderer: process.stdout.isTTY ? 'default' : 'verbose', - rendererOptions: { - collapseErrors: false, - showErrorMessage: true, - }, - } - ); - for (const task of pendingAdds) listr.add(task); - pendingAdds.length = 0; - return listr; - } - - const addTask = (task: ListrTask): void => { - if (listr) listr.add(task); - else pendingAdds.push(task); - }; - - const onEvent: WorkflowEventListener = (event: WorkflowEvent) => { - switch (event.type) { - case 'run:started': { - setHeader(chalk.dim(`[workflow] run ${event.runId.slice(0, 8)}...`)); - break; - } - - case 'step:started': { - let resolveStep!: () => void; - let rejectStep!: (error: Error) => void; - let taskRef: RenderableTask | null = null; - let skipped = false; - - const done = new Promise((resolve, reject) => { - resolveStep = resolve; - rejectStep = reject; - }); - // Prevent unhandled rejection if the step fails before the listr - // task function has started and reached `await done`. - done.catch(() => {}); - - stepHandles.set(event.stepName, { - resolve: resolveStep, - reject: rejectStep, - setOutput: (text: string) => { - if (taskRef) taskRef.output = text; - }, - markSkipped: () => { - skipped = true; - if (taskRef) taskRef.title = chalk.dim(`${event.stepName} (skipped)`); - }, - }); - - addTask({ - title: chalk.white(event.stepName), - task: async (_ctx, task): Promise => { - taskRef = task as RenderableTask; - if (skipped) taskRef.title = chalk.dim(`${event.stepName} (skipped)`); - await done; - }, - rendererOptions: { persistentOutput: true }, - } as ListrTask); - break; - } - - case 'step:owner-assigned': { - const handle = stepHandles.get(event.stepName); - if (handle) { - handle.setOutput( - chalk.dim(`> Owner: ${event.ownerName}`) + - (event.specialistName ? chalk.dim(` · specialist: ${event.specialistName}`) : '') - ); - } - break; - } - - case 'step:retrying': { - stepHandles.get(event.stepName)?.setOutput(chalk.yellow(`Retrying (attempt ${event.attempt})`)); - break; - } - - case 'step:nudged': { - stepHandles.get(event.stepName)?.setOutput(chalk.dim(`> Nudge #${event.nudgeCount}`)); - break; - } - - case 'step:force-released': { - stepHandles.get(event.stepName)?.setOutput(chalk.yellow('> Force-released')); - break; - } - - case 'step:review-completed': { - stepHandles - .get(event.stepName) - ?.setOutput(chalk.dim(`> Review: ${event.decision} by ${event.reviewerName}`)); - break; - } - - case 'step:owner-timeout': { - stepHandles.get(event.stepName)?.setOutput(chalk.red(`> Owner ${event.ownerName} timed out`)); - break; - } - - case 'step:agent-report': { - const handle = stepHandles.get(event.stepName); - if (handle) { - const model = event.report.model ? `:${event.report.model}` : ''; - handle.setOutput(chalk.dim(`> Report collected (${event.report.cli}${model})`)); - } - break; - } - - case 'step:completed': { - stepHandles.get(event.stepName)?.resolve(); - break; - } - - case 'step:skipped': { - const handle = stepHandles.get(event.stepName); - if (handle) { - handle.markSkipped(); - handle.resolve(); - } else { - // Step was skipped without ever being started (downstream of a failure). - addTask({ - title: chalk.dim(`${event.stepName} (skipped)`), - task: async (): Promise => {}, - rendererOptions: { persistentOutput: true }, - } as ListrTask); - } - break; - } - - case 'step:failed': { - stepHandles.get(event.stepName)?.reject(new Error(event.error ?? 'Step failed')); - break; - } - - case 'run:completed': { - setHeader(chalk.green('Workflow completed')); - resolveWorkflow(); - break; - } - - case 'run:failed': { - setHeader(chalk.red(`Workflow failed: ${event.error ?? 'unknown error'}`)); - rejectWorkflow(new Error(event.error ?? 'Workflow failed')); - break; - } - - case 'run:cancelled': { - setHeader(chalk.yellow('Workflow cancelled')); - resolveWorkflow(); - break; - } - - case 'broker:event': - break; - - default: { - const _exhaustive: never = event; - void _exhaustive; - } - } - }; - - let restoreConsole: (() => void) | undefined; - - return { - onEvent, - start: async () => { - restoreConsole = installOutputFilter(); - const l = await ensureListr(); - return l.run().catch(() => { - // Step failures are already represented in the workflow result. - }); - }, - unmount: () => { - restoreConsole?.(); - restoreConsole = undefined; - }, - }; -} diff --git a/packages/sdk/src/workflows/memory-db.ts b/packages/sdk/src/workflows/memory-db.ts deleted file mode 100644 index 980bace38..000000000 --- a/packages/sdk/src/workflows/memory-db.ts +++ /dev/null @@ -1,39 +0,0 @@ -import type { WorkflowRunRow, WorkflowStepRow } from './types.js'; -import type { WorkflowDb } from './runner.js'; - -/** - * In-memory implementation of WorkflowDb for local workflow runs. - * No persistence — state lives only for the duration of the process. - */ -export class InMemoryWorkflowDb implements WorkflowDb { - private runs = new Map(); - private steps = new Map(); - - async insertRun(run: WorkflowRunRow): Promise { - this.runs.set(run.id, { ...run }); - } - - async updateRun(id: string, patch: Partial): Promise { - const existing = this.runs.get(id); - if (!existing) return; - this.runs.set(id, { ...existing, ...patch, updatedAt: new Date().toISOString() }); - } - - async getRun(id: string): Promise { - return this.runs.get(id) ?? null; - } - - async insertStep(step: WorkflowStepRow): Promise { - this.steps.set(step.id, { ...step }); - } - - async updateStep(id: string, patch: Partial): Promise { - const existing = this.steps.get(id); - if (!existing) return; - this.steps.set(id, { ...existing, ...patch, updatedAt: new Date().toISOString() }); - } - - async getStepsByRunId(runId: string): Promise { - return Array.from(this.steps.values()).filter((s) => s.runId === runId); - } -} diff --git a/packages/sdk/src/workflows/process-backend-executor.ts b/packages/sdk/src/workflows/process-backend-executor.ts deleted file mode 100644 index d578fcea7..000000000 --- a/packages/sdk/src/workflows/process-backend-executor.ts +++ /dev/null @@ -1,102 +0,0 @@ -/** - * Adapter that implements {@link RunnerStepExecutor} on top of a - * {@link ProcessBackend}. Relay owns command construction (CLI flags, env, - * cwd, timeout); the backend only provides "where to run" — create an - * isolated environment, exec the command, destroy. - * - * The WorkflowRunner synthesizes one of these when a caller passes - * `processBackend` without an explicit `executor`, so every existing - * `executor.executeAgentStep(...)` call site transparently flows through - * the backend (e.g. a cloud sandbox) without any further plumbing. - */ - -import { buildCommand } from './process-spawner.js'; -import type { ProcessBackend, AgentDefinition, WorkflowStep, RunnerStepExecutor } from './types.js'; - -function shellEscape(value: string): string { - if (value === '') return "''"; - if (/^[A-Za-z0-9_\/.:,=+@%-]+$/.test(value)) return value; - return `'${value.replace(/'/g, `'\\''`)}'`; -} - -function commandToShell(argv: string[]): string { - return argv.map(shellEscape).join(' '); -} - -export interface ProcessBackendExecutorOptions { - /** Env vars injected into every step (e.g. auth tokens, relayfile config). */ - env?: Record; -} - -export function createProcessBackendExecutor( - backend: ProcessBackend, - options: ProcessBackendExecutorOptions = {} -): RunnerStepExecutor { - const baseEnv = options.env ?? {}; - - return { - async executeAgentStep( - step: WorkflowStep, - agentDef: AgentDefinition, - resolvedTask: string, - timeoutMs?: number - ): Promise { - if (agentDef.cli === 'api') { - throw new Error( - `processBackend cannot execute cli "api" agents — api agents call the Anthropic API directly. ` + - `Route agent "${agentDef.name}" through a subprocess CLI (claude, codex, etc.) or omit processBackend.` - ); - } - - const extraArgs = agentDef.constraints?.model ? ['--model', agentDef.constraints.model] : []; - const argv = buildCommand(agentDef.cli, extraArgs, resolvedTask); - const commandString = commandToShell(argv); - - const env = await backend.createEnvironment(step.name); - try { - const execOpts: { - cwd?: string; - env?: Record; - timeoutSeconds?: number; - } = {}; - if (agentDef.cwd) execOpts.cwd = agentDef.cwd; - if (Object.keys(baseEnv).length > 0) execOpts.env = baseEnv; - // timeoutSeconds is ceil-rounded from the caller's timeoutMs; a 500ms - // timeout becomes 1s because the backend protocol uses seconds. - if (timeoutMs && timeoutMs > 0) { - execOpts.timeoutSeconds = Math.max(1, Math.ceil(timeoutMs / 1000)); - } - const result = await env.exec(commandString, execOpts); - if (result.exitCode !== 0) { - const tail = result.output.slice(-2000); - throw new Error(`Agent step "${step.name}" exited with code ${result.exitCode}: ${tail}`); - } - return result.output; - } finally { - await env.destroy().catch(() => undefined); - } - }, - - async executeDeterministicStep( - step: WorkflowStep, - resolvedCommand: string, - cwd: string - ): Promise<{ output: string; exitCode: number }> { - const env = await backend.createEnvironment(step.name); - try { - const execOpts: { - cwd?: string; - env?: Record; - timeoutSeconds?: number; - } = { cwd }; - if (Object.keys(baseEnv).length > 0) execOpts.env = baseEnv; - if (step.timeoutMs && step.timeoutMs > 0) { - execOpts.timeoutSeconds = Math.max(1, Math.ceil(step.timeoutMs / 1000)); - } - return await env.exec(resolvedCommand, execOpts); - } finally { - await env.destroy().catch(() => undefined); - } - }, - }; -} diff --git a/packages/sdk/src/workflows/process-spawner.ts b/packages/sdk/src/workflows/process-spawner.ts deleted file mode 100644 index 2c3c74fd7..000000000 --- a/packages/sdk/src/workflows/process-spawner.ts +++ /dev/null @@ -1,201 +0,0 @@ -import { spawn as cpSpawn } from 'node:child_process'; -import type { ChildProcess, SpawnOptions } from 'node:child_process'; - -import { getCliDefinition } from '../cli-registry.js'; -import { resolveCliSync } from '../cli-resolver.js'; -import { runVerification } from './verification.js'; -import type { AgentCli, AgentDefinition, VerificationCheck } from './types.js'; - -export interface SpawnOutcome { - output: string; - exitCode?: number; - exitSignal?: string; -} - -export interface SpawnCommand { - bin: string; - args: string[]; - env?: Record; -} - -export interface ShellOpts { - cwd: string; - timeoutMs?: number; -} - -// eslint-disable-next-line @typescript-eslint/no-empty-object-type -export interface AgentOpts extends ShellOpts {} - -// eslint-disable-next-line @typescript-eslint/no-empty-object-type -export interface InteractiveOpts extends ShellOpts {} - -export interface ProcessSpawnerDeps { - cwd: string; -} - -export interface ProcessSpawner { - spawnShell(command: string, opts: ShellOpts): Promise; - spawnAgent(agent: AgentDefinition, task: string, opts: AgentOpts): Promise; - spawnInteractive(agent: AgentDefinition, task: string, opts: InteractiveOpts): Promise; - buildCommand(agent: AgentDefinition, task: string): SpawnCommand; -} - -function resolveNonInteractiveCli(cli: AgentCli): AgentCli { - if (cli !== 'cursor') { - return cli; - } - - const resolved = resolveCliSync('cursor'); - return (resolved?.binary as 'cursor-agent' | 'agent' | undefined) ?? 'agent'; -} - -export function buildCommand(cli: AgentCli, extraArgs: string[] = [], task: string): string[] { - if (cli === 'api') { - throw new Error('cli "api" uses direct API calls, not a subprocess command'); - } - - const resolvedCli = resolveNonInteractiveCli(cli); - const definition = getCliDefinition(resolvedCli); - if (!definition || definition.binaries.length === 0) { - throw new Error(`Unknown or non-executable CLI: ${resolvedCli}`); - } - - return [definition.binaries[0], ...definition.nonInteractiveArgs(task, extraArgs)]; -} - -export function spawnProcess(command: string[], options: SpawnOptions): ChildProcess { - const [bin, ...args] = command; - return cpSpawn(bin, args, options); -} - -export function collectOutput(process: ChildProcess): Promise { - return new Promise((resolve, reject) => { - let settled = false; - const stdout: string[] = []; - const stderr: string[] = []; - - process.stdout?.on('data', (chunk: Buffer | string) => { - stdout.push(chunk.toString()); - }); - - process.stderr?.on('data', (chunk: Buffer | string) => { - stderr.push(chunk.toString()); - }); - - process.once('error', (err) => { - if (!settled) { - settled = true; - reject(err); - } - }); - process.once('close', () => { - if (!settled) { - settled = true; - resolve(`${stdout.join('')}${stderr.join('')}`); - } - }); - }); -} - -export function detectCompletion(output: string, verification?: VerificationCheck): boolean { - if (/OWNER_DECISION:\s*(?:INCOMPLETE_RETRY|INCOMPLETE_FAIL|NEEDS_CLARIFICATION)\b/i.test(output)) { - return false; - } - - if (/OWNER_DECISION:\s*COMPLETE\b/i.test(output)) { - return true; - } - - if (/\bSTEP_COMPLETE:([A-Za-z0-9_.:-]+)/.test(output)) { - return true; - } - - if (!verification) { - return false; - } - - return runVerification(verification, output, 'process', undefined, { allowFailure: true }).passed; -} - -async function runCommand(command: SpawnCommand, opts: ShellOpts): Promise { - const child = spawnProcess([command.bin, ...command.args], { - cwd: opts.cwd, - env: { ...process.env, ...command.env }, - stdio: 'pipe', - }); - - const outputPromise = collectOutput(child); - const exitPromise = new Promise<{ exitCode?: number; exitSignal?: string }>((resolve, reject) => { - let timedOut = false; - let timer: ReturnType | undefined; - let killTimer: ReturnType | undefined; - - if (opts.timeoutMs) { - timer = setTimeout(() => { - timedOut = true; - child.kill('SIGTERM'); - killTimer = setTimeout(() => child.kill('SIGKILL'), 5000); - }, opts.timeoutMs); - } - - const clearTimer = () => { - if (timer) clearTimeout(timer); - if (killTimer) clearTimeout(killTimer); - }; - - child.once('error', (error) => { - clearTimer(); - reject(error); - }); - - child.once('close', (exitCode, exitSignal) => { - clearTimer(); - - if (timedOut) { - reject(new Error(`Process timed out after ${opts.timeoutMs ?? 'unknown'}ms`)); - return; - } - - resolve({ - exitCode: exitCode ?? undefined, - exitSignal: exitSignal ?? undefined, - }); - }); - }); - - const [outputResult, exitResult] = await Promise.allSettled([outputPromise, exitPromise]); - const output = outputResult.status === 'fulfilled' ? outputResult.value : ''; - if (exitResult.status === 'rejected') { - const err = exitResult.reason instanceof Error ? exitResult.reason : new Error(String(exitResult.reason)); - (err as Error & { partialOutput?: string }).partialOutput = output; - throw err; - } - return { - output, - exitCode: exitResult.value.exitCode, - exitSignal: exitResult.value.exitSignal, - }; -} - -export function createProcessSpawner(deps: ProcessSpawnerDeps): ProcessSpawner { - const buildAgentCommand = (agent: AgentDefinition, task: string): SpawnCommand => { - const extraArgs = agent.constraints?.model ? ['--model', agent.constraints.model] : []; - const [bin, ...args] = buildCommand(agent.cli, extraArgs, task); - return { bin, args }; - }; - - return { - async spawnShell(command, opts) { - return runCommand({ bin: 'sh', args: ['-c', command] }, { ...opts, cwd: opts.cwd ?? deps.cwd }); - }, - async spawnAgent(agent, task, opts) { - return runCommand(buildAgentCommand(agent, task), { ...opts, cwd: opts.cwd ?? deps.cwd }); - }, - async spawnInteractive(agent, task, opts) { - return runCommand(buildAgentCommand(agent, task), { ...opts, cwd: opts.cwd ?? deps.cwd }); - }, - buildCommand(agent, task) { - return buildAgentCommand(agent, task); - }, - }; -} diff --git a/packages/sdk/src/workflows/proxy-env.ts b/packages/sdk/src/workflows/proxy-env.ts deleted file mode 100644 index 29fb641e7..000000000 --- a/packages/sdk/src/workflows/proxy-env.ts +++ /dev/null @@ -1,133 +0,0 @@ -import { getCliDefinition } from '../cli-registry.js'; -import type { AgentDefinition, SwarmConfig } from './types.js'; - -export interface ProxyEnvBinding { - baseUrlVar: string; - apiKeyVar: string; -} - -export type ProxyEnvRegistry = Record; - -export const RELAY_PROXY_URL_ENV = 'RELAY_LLM_PROXY' as const; -export const RELAY_PROXY_URL_ENV_ALIAS = 'RELAY_LLM_PROXY_URL' as const; -export const RELAY_PROXY_TOKEN_ENV = 'CREDENTIAL_PROXY_TOKEN' as const; -export const RELAY_PROXY_TOKEN_ENV_ALIAS = 'RELAY_LLM_PROXY_TOKEN' as const; - -const OPENAI_COMPATIBLE_BINDINGS = [ - { baseUrlVar: 'OPENAI_BASE_URL', apiKeyVar: 'OPENAI_API_KEY' }, -] as const satisfies readonly ProxyEnvBinding[]; - -const ANTHROPIC_BINDINGS = [ - { baseUrlVar: 'ANTHROPIC_BASE_URL', apiKeyVar: 'ANTHROPIC_API_KEY' }, -] as const satisfies readonly ProxyEnvBinding[]; - -const AIDER_BINDINGS = [ - { baseUrlVar: 'OPENAI_API_BASE', apiKeyVar: 'OPENAI_API_KEY' }, -] as const satisfies readonly ProxyEnvBinding[]; - -const GEMINI_BINDINGS = [ - { baseUrlVar: 'GOOGLE_API_BASE', apiKeyVar: 'GOOGLE_API_KEY' }, -] as const satisfies readonly ProxyEnvBinding[]; - -const GENERIC_FALLBACK_BINDINGS = [ - ...OPENAI_COMPATIBLE_BINDINGS, - ...ANTHROPIC_BINDINGS, -] as const satisfies readonly ProxyEnvBinding[]; - -const STRIPPED_API_KEY_VARS = [ - 'OPENAI_API_KEY', - 'ANTHROPIC_API_KEY', - 'OPENROUTER_API_KEY', - 'GOOGLE_API_KEY', - 'OPENAI_BASE_URL', - 'ANTHROPIC_BASE_URL', - 'OPENAI_API_BASE', - 'GOOGLE_API_BASE', -] as const; - -const CLI_ALIASES: Record = { - agent: 'cursor', - 'cursor-agent': 'cursor', -}; - -export const DEFAULT_PROXY_ENV_REGISTRY = { - claude: ANTHROPIC_BINDINGS, - codex: OPENAI_COMPATIBLE_BINDINGS, - opencode: OPENAI_COMPATIBLE_BINDINGS, - aider: AIDER_BINDINGS, - gemini: GEMINI_BINDINGS, - goose: OPENAI_COMPATIBLE_BINDINGS, - droid: OPENAI_COMPATIBLE_BINDINGS, - cursor: OPENAI_COMPATIBLE_BINDINGS, -} as const satisfies ProxyEnvRegistry; - -function normalizeCli(cli: string): string { - const baseCli = cli.includes(':') ? cli.split(':')[0] : cli; - return CLI_ALIASES[baseCli] ?? baseCli; -} - -function buildProxyEnv( - bindings: readonly ProxyEnvBinding[], - proxyUrl: string, - proxyToken: string -): Record { - return bindings.reduce>((env, binding) => { - env[binding.baseUrlVar] = proxyUrl; - env[binding.apiKeyVar] = proxyToken; - return env; - }, {}); -} - -export function createProxyEnvResolver(registry: ProxyEnvRegistry = DEFAULT_PROXY_ENV_REGISTRY) { - return (cli: string, proxyUrl: string, proxyToken: string): Record => { - const normalizedCli = normalizeCli(cli); - const bindings = registry[normalizedCli]; - - if (bindings) { - return buildProxyEnv(bindings, proxyUrl, proxyToken); - } - - const knownCli = getCliDefinition(normalizedCli); - const warningPrefix = knownCli ? 'No proxy env registry entry' : 'Unknown CLI'; - console.warn( - `[proxy-env] ${warningPrefix} for "${normalizedCli}". ` + - 'Falling back to generic OpenAI/Anthropic proxy env overrides.' - ); - - return buildProxyEnv(GENERIC_FALLBACK_BINDINGS, proxyUrl, proxyToken); - }; -} - -export const resolveProxyEnv = createProxyEnvResolver(); - -export function resolveProxyUrlFromEnv( - env: Record = process.env -): string | undefined { - return env[RELAY_PROXY_URL_ENV] ?? env[RELAY_PROXY_URL_ENV_ALIAS]; -} - -export function resolveProxyTokenFromEnv( - env: Record = process.env -): string | undefined { - return env[RELAY_PROXY_TOKEN_ENV] ?? env[RELAY_PROXY_TOKEN_ENV_ALIAS]; -} - -export function buildNormalizedProxyEnv(proxyUrl: string, proxyToken: string): Record { - return { - [RELAY_PROXY_URL_ENV]: proxyUrl, - [RELAY_PROXY_URL_ENV_ALIAS]: proxyUrl, - [RELAY_PROXY_TOKEN_ENV]: proxyToken, - [RELAY_PROXY_TOKEN_ENV_ALIAS]: proxyToken, - }; -} - -export function getStrippedApiKeyVars(): string[] { - return [...STRIPPED_API_KEY_VARS]; -} - -export function isProxyEnabled( - agentDef?: Pick | null, - swarmConfig?: Pick | null -): boolean { - return Boolean(agentDef?.credentials?.proxy && swarmConfig?.credentialProxy); -} diff --git a/packages/sdk/src/workflows/run-script.ts b/packages/sdk/src/workflows/run-script.ts deleted file mode 100644 index 2ca667b39..000000000 --- a/packages/sdk/src/workflows/run-script.ts +++ /dev/null @@ -1,630 +0,0 @@ -/** - * Programmatic local runner for `.ts` / `.tsx` / `.py` workflow scripts. - * - * This is the body of the `agent-relay run