diff --git a/.github/workflows/ricky-evals.yml b/.github/workflows/ricky-evals.yml
new file mode 100644
index 00000000..3f5e9116
--- /dev/null
+++ b/.github/workflows/ricky-evals.yml
@@ -0,0 +1,114 @@
+name: Ricky Evals
+
+on:
+  pull_request:
+    paths:
+      - ".github/workflows/ricky-evals.yml"
+      - "evals/**"
+      - "scripts/evals/**"
+      - "AGENTS.md"
+      - "README.md"
+      - "SPEC.md"
+      - "docs/**"
+      - "specs/**"
+      - "src/cloud/**"
+      - "src/local/**"
+      - "src/product/**"
+      - "src/runtime/**"
+      - "src/surfaces/**"
+      - "src/shared/**"
+      - "workflows/shared/**"
+      - "workflows/meta/spec/**"
+      - "package.json"
+      - "package-lock.json"
+  push:
+    branches:
+      - main
+    paths:
+      - ".github/workflows/ricky-evals.yml"
+      - "evals/**"
+      - "scripts/evals/**"
+      - "AGENTS.md"
+      - "README.md"
+      - "SPEC.md"
+      - "docs/**"
+      - "specs/**"
+      - "src/cloud/**"
+      - "src/local/**"
+      - "src/product/**"
+      - "src/runtime/**"
+      - "src/surfaces/**"
+      - "src/shared/**"
+      - "workflows/shared/**"
+      - "workflows/meta/spec/**"
+      - "package.json"
+      - "package-lock.json"
+  workflow_dispatch:
+
+concurrency:
+  group: ricky-evals-${{ github.ref }}
+  cancel-in-progress: true
+
+permissions:
+  contents: read
+  pull-requests: write
+
+env:
+  NODE_VERSION: "22.14.0"
+  NPM_CONFIG_FUND: "false"
+  RICKY_EVAL_OPENROUTER_MODEL: openai/gpt-oss-120b:free
+
+jobs:
+  evals:
+    name: Provider-backed evals
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Setup Node.js
+        uses: actions/setup-node@v4
+        with:
+          node-version: ${{ env.NODE_VERSION }}
+          cache: npm
+          cache-dependency-path: package-lock.json
+
+      - name: Install dependencies
+        run: npm ci
+
+      - name: Require OpenRouter API key
+        if: github.event_name != 'pull_request' || github.event.pull_request.head.repo.full_name == github.repository
+        env:
+          OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }}
+        run: |
+          if [ -z "${OPENROUTER_API_KEY}" ]; then
+            echo "OPENROUTER_API_KEY GitHub secret is required for provider-backed Ricky evals."
+            exit 1
+          fi
+
+      - name: Run provider evals
+        if: github.event_name != 'pull_request' || github.event.pull_request.head.repo.full_name == github.repository
+        env:
+          OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }}
+        run: npm run evals:provider -- --trials 1
+
+      - name: Summarize evals
+        if: always()
+        run: node scripts/evals/ci-summary.mjs
+
+      - name: Comment human-review cases
+        if: always() && github.event_name == 'pull_request'
+        env:
+          GITHUB_TOKEN: ${{ github.token }}
+          PR_NUMBER: ${{ github.event.pull_request.number }}
+        run: node scripts/evals/ci-review-comment.mjs
+
+      - name: Upload eval artifacts
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: ricky-eval-run
+          path: .ricky/evals/runs/
+          retention-days: 14
+          if-no-files-found: ignore
diff --git a/package.json b/package.json
index 9cef880d..3bb6d170 100644
--- a/package.json
+++ b/package.json
@@ -44,6 +44,7 @@
     "test": "npm run bundle && vitest run",
     "evals:compile": "node scripts/evals/compile-ricky-evals.mjs",
     "evals": "npm run evals:compile && node scripts/evals/run-ricky-evals.mjs",
+    "evals:provider": "npm run build && npm run evals -- --provider --executor openrouter",
     "evals:opencode": "npm run evals:compile && node scripts/evals/run-ricky-evals.mjs --provider --executor opencode",
     "evals:list": "npm run evals:compile && node scripts/evals/run-ricky-evals.mjs --list",
     "evals:summary": "node scripts/evals/summarize-ricky-evals.mjs",
diff --git a/scripts/evals/ci-review-comment.mjs b/scripts/evals/ci-review-comment.mjs
new file mode 100644
index 00000000..9b13ff90
--- /dev/null
+++ b/scripts/evals/ci-review-comment.mjs
@@ -0,0 +1,262 @@
+#!/usr/bin/env node
+
+import { appendFileSync, existsSync, readdirSync, readFileSync } from 'node:fs';
+import path from 'node:path';
+import process from 'node:process';
+import { fileURLToPath } from 'node:url';
+
+const ROOT = path.resolve(path.dirname(fileURLToPath(import.meta.url)), '../..');
+const RUNS_DIR = path.join(ROOT, '.ricky', 'evals', 'runs');
+const MARKER = '<!-- ricky-eval-human-review -->';
+const MAX_COMMENT_CHARS = 60000;
+const MAX_OUTPUT_CHARS = 1200;
+
+const runDir = findLatestRunDir();
+if (!runDir) {
+  console.log('No Ricky eval run found; skipping PR comment.');
+  process.exit(0);
+}
+
+const result = readResultJson(path.join(runDir, 'result.json'));
+const comment = renderComment({ result, runDir });
+
+if (process.env.GITHUB_STEP_SUMMARY) {
+  appendFileSync(process.env.GITHUB_STEP_SUMMARY, [
+    '',
+    '## Ricky Eval Review Comment',
+    '',
+    'A detailed human-review comment was generated for this PR.',
+    '',
+  ].join('\n'));
+}
+
+if (process.env.GITHUB_TOKEN && process.env.GITHUB_REPOSITORY && process.env.PR_NUMBER) {
+  await upsertPullRequestComment(comment);
+} else {
+  console.log(comment);
+}
+
+function renderComment({ result, runDir }) {
+  const failed = result.tests.filter((test) => test.status === 'failed');
+  const skipped = result.tests.filter((test) => test.status === 'skipped');
+  const needsHuman = result.tests.filter((test) => test.status === 'needs-human');
+  const reviewableNeedsHuman = needsHuman.filter(hasCapturedOutput);
+  const missingOutputNeedsHuman = needsHuman.filter((test) => !hasCapturedOutput(test));
+  const lines = [
+    MARKER,
+    '# Ricky Eval Review',
+    '',
+    `Run: \`${path.relative(ROOT, runDir)}\``,
+    `Mode: \`${result.mode}\``,
+    `Git SHA: \`${result.git_sha}\``,
+    '',
+    `**Passed:** ${result.passed} | **Needs human:** ${result.needs_human} | **Reviewable:** ${reviewableNeedsHuman.length} | **Missing output:** ${missingOutputNeedsHuman.length} | **Failed:** ${result.failed} | **Skipped:** ${result.skipped}`,
+    '',
+  ];
+
+  if (failed.length > 0 || skipped.length > 0) {
+    lines.push('## Blocking Cases', '');
+    for (const test of [...failed, ...skipped]) {
+      appendCaseDetails(lines, test, { forceOpen: true });
+    }
+  }
+
+  if (reviewableNeedsHuman.length > 0) {
+    lines.push(
+      '## Human Review Cases',
+      '',
+      'These cases passed deterministic checks and include captured Ricky output for a human verdict against their `Must` / `Must Not` rubric.',
+      '',
+    );
+    for (const test of reviewableNeedsHuman) {
+      appendCaseDetails(lines, test, { forceOpen: false });
+    }
+  } else {
+    lines.push('## Human Review Cases', '', 'No reviewable human-review cases captured Ricky output.', '');
+  }
+
+  if (missingOutputNeedsHuman.length > 0) {
+    lines.push(
+      '## Cases Missing Ricky Output',
+      '',
+      'These cases are not expanded because there is no candidate Ricky response to judge. Change them to `Executor: openrouter`, run with `--executor openrouter`, or provide `Candidate Output`, before treating them as human-review evidence.',
+      '',
+    );
+    for (const test of missingOutputNeedsHuman) {
+      lines.push(`- \`${test.id}\` (${test.suite}/${test.executor})`);
+    }
+    lines.push('');
+  }
+
+  const body = `${lines.join('\n')}\n`;
+  if (body.length <= MAX_COMMENT_CHARS) return body;
+  return `${body.slice(0, MAX_COMMENT_CHARS - 2000)}\n\n---\n\n_Comment truncated to stay within GitHub limits. Download the \`ricky-eval-run\` artifact for the full \`human-review.md\`._\n`;
+}
+
+function appendCaseDetails(lines, test, { forceOpen }) {
+  const summaryStatus = test.status === 'failed' ? 'FAIL' : test.status === 'skipped' ? 'SKIP' : 'REVIEW';
+  lines.push(`<details${forceOpen ? ' open' : ''}>`);
+  lines.push(`<summary><strong>${summaryStatus}</strong> <code>${escapeHtml(test.id)}</code> (${escapeHtml(test.suite)}/${escapeHtml(test.executor)})</summary>`);
+  lines.push('');
+
+  if (test.input?.message) {
+    lines.push('**User message**', '');
+    lines.push(blockquote(String(test.input.message)));
+    lines.push('');
+  }
+
+  appendRickyOutput(lines, test);
+  appendRubricList(lines, 'Must', test.expected?.must);
+  appendRubricList(lines, 'Must Not', test.expected?.mustNot);
+
+  const deterministicChecks = (test.checks ?? []).filter((check) => !String(check.name).startsWith('human:'));
+  if (deterministicChecks.length > 0) {
+    lines.push('**Deterministic checks**', '');
+    for (const check of deterministicChecks) {
+      lines.push(`- ${check.passed ? 'PASS' : 'FAIL'} \`${check.name}\`: ${check.message ?? ''}`);
+    }
+    lines.push('');
+  }
+
+  if (test.error) {
+    lines.push('**Error**', '');
+    lines.push('```text');
+    lines.push(String(test.error).slice(0, MAX_OUTPUT_CHARS));
+    lines.push('```', '');
+  }
+
+  lines.push('</details>', '');
+}
+
+function appendRickyOutput(lines, test) {
+  const actualContent = getCapturedOutput(test).trim();
+  lines.push('**Ricky output**', '');
+  if (actualContent.length > 0) {
+    const preview = actualContent.length > MAX_OUTPUT_CHARS
+      ? `${actualContent.slice(0, MAX_OUTPUT_CHARS)}\n...[truncated]`
+      : actualContent;
+    lines.push('```text');
+    lines.push(preview);
+    lines.push('```', '');
+  } else {
+    lines.push(`_No Ricky output captured for this case. Executor: \`${test.executor}\`._`, '');
+  }
+}
+
+function hasCapturedOutput(test) {
+  return getCapturedOutput(test).trim().length > 0;
+}
+
+function getCapturedOutput(test) {
+  return String(
+    test.actual?.content ??
+      test.candidate_output ??
+      test.candidateOutput ??
+      test.candidate?.content ??
+      '',
+  );
+}
+
+function appendRubricList(lines, title, items) {
+  if (!Array.isArray(items) || items.length === 0) return;
+  lines.push(`**${title}**`, '');
+  for (const item of items) {
+    lines.push(`- ${String(item)}`);
+  }
+  lines.push('');
+}
+
+function blockquote(text) {
+  return text.split('\n').map((line) => `> ${line}`).join('\n');
+}
+
+function escapeHtml(value) {
+  return String(value)
+    .replace(/&/g, '&amp;')
+    .replace(/</g, '&lt;')
+    .replace(/>/g, '&gt;');
+}
+
+function findLatestRunDir() {
+  if (!existsSync(RUNS_DIR)) return null;
+  const runs = readdirSync(RUNS_DIR)
+    .map((dir) => path.join(RUNS_DIR, dir))
+    .filter((dir) => existsSync(path.join(dir, 'result.json')))
+    .flatMap((dir) => {
+      const result = safeReadResultJson(path.join(dir, 'result.json'));
+      return result ? [{ dir, result }] : [];
+    })
+    .sort((a, b) => String(b.result.timestamp).localeCompare(String(a.result.timestamp)));
+  return runs[0]?.dir ?? null;
+}
+
+function readResultJson(filePath) {
+  const result = safeReadResultJson(filePath);
+  if (!result) {
+    throw new Error(`Could not parse Ricky eval result: ${path.relative(ROOT, filePath)}`);
+  }
+  return result;
+}
+
+function safeReadResultJson(filePath) {
+  try {
+    return JSON.parse(readFileSync(filePath, 'utf8'));
+  } catch (error) {
+    console.warn(`Skipping malformed Ricky eval result ${path.relative(ROOT, filePath)}: ${error instanceof Error ? error.message : String(error)}`);
+    return null;
+  }
+}
+
+async function upsertPullRequestComment(body) {
+  const [owner, repo] = process.env.GITHUB_REPOSITORY.split('/');
+  if (!owner || !repo) {
+    throw new Error(`Invalid GITHUB_REPOSITORY format: expected "owner/repo", got "${process.env.GITHUB_REPOSITORY}"`);
+  }
+  const prNumber = process.env.PR_NUMBER;
+  const headers = {
+    accept: 'application/vnd.github+json',
+    authorization: `Bearer ${process.env.GITHUB_TOKEN}`,
+    'content-type': 'application/json',
+    'x-github-api-version': '2022-11-28',
+  };
+  const commentsUrl = `https://api.github.com/repos/${owner}/${repo}/issues/${prNumber}/comments`;
+  const existing = await findExistingReviewComment(commentsUrl, headers);
+
+  const method = existing?.url ? 'PATCH' : 'POST';
+  const url = existing?.url ?? commentsUrl;
+  const response = await globalThis.fetch(url, {
+    method,
+    headers,
+    body: JSON.stringify({ body }),
+  });
+  if (!response.ok) {
+    throw new Error(`Failed to ${method === 'PATCH' ? 'update' : 'create'} PR comment: ${response.status} ${await response.text()}`);
+  }
+  console.log(`${method === 'PATCH' ? 'Updated' : 'Created'} Ricky eval review comment.`);
+}
+
+async function findExistingReviewComment(commentsUrl, headers) {
+  let url = `${commentsUrl}?per_page=100`;
+  while (url) {
+    const response = await globalThis.fetch(url, { headers });
+    if (!response.ok) {
+      throw new Error(`Failed to list PR comments: ${response.status} ${await response.text()}`);
+    }
+    const comments = await response.json();
+    if (Array.isArray(comments)) {
+      const existing = comments.find((comment) => typeof comment.body === 'string' && comment.body.includes(MARKER));
+      if (existing) return existing;
+    }
+    url = getNextLink(response.headers.get('link'));
+  }
+  return undefined;
+}
+
+function getNextLink(linkHeader) {
+  if (!linkHeader) return null;
+  for (const part of linkHeader.split(',')) {
+    const match = /<([^>]+)>;\s*rel="next"/.exec(part.trim());
+    if (match) return match[1];
+  }
+  return null;
+}
diff --git a/scripts/evals/ci-summary.mjs b/scripts/evals/ci-summary.mjs
new file mode 100644
index 00000000..0cf6d236
--- /dev/null
+++ b/scripts/evals/ci-summary.mjs
@@ -0,0 +1,161 @@
+#!/usr/bin/env node
+
+import { existsSync, readdirSync, readFileSync, writeFileSync } from 'node:fs';
+import path from 'node:path';
+import { fileURLToPath } from 'node:url';
+
+const ROOT = path.resolve(path.dirname(fileURLToPath(import.meta.url)), '../..');
+const RUNS_DIR = path.join(ROOT, '.ricky', 'evals', 'runs');
+
+const runDir = findLatestRunDir();
+if (!runDir) {
+  const summary = '# Ricky Eval CI Summary\n\nNo Ricky eval run found; provider evals may have been skipped for an untrusted fork PR.\n';
+  console.log(summary);
+  if (process.env.GITHUB_STEP_SUMMARY) {
+    writeFileSync(process.env.GITHUB_STEP_SUMMARY, summary, { flag: 'a' });
+  }
+  process.exit(0);
+}
+
+const resultPath = path.join(runDir, 'result.json');
+const reviewPath = path.join(runDir, 'human-review.md');
+const result = readResultJson(resultPath);
+
+const failed = result.tests.filter((test) => test.status === 'failed');
+const skipped = result.tests.filter((test) => test.status === 'skipped');
+const needsHuman = result.tests.filter((test) => test.status === 'needs-human');
+const reviewableNeedsHuman = needsHuman.filter(hasCapturedOutput);
+const missingOutputNeedsHuman = needsHuman.filter((test) => !hasCapturedOutput(test));
+
+const lines = [
+  '# Ricky Eval CI Summary',
+  '',
+  `- Run directory: \`${path.relative(ROOT, runDir)}\``,
+  `- Mode: \`${result.mode}\``,
+  `- Git SHA: \`${result.git_sha}\``,
+  `- Passed: ${result.passed}`,
+  `- Needs human review: ${result.needs_human}`,
+  `- Reviewable human cases: ${reviewableNeedsHuman.length}`,
+  `- Human cases missing Ricky output: ${missingOutputNeedsHuman.length}`,
+  `- Failed: ${result.failed}`,
+  `- Skipped: ${result.skipped}`,
+  '',
+];
+
+appendStatusSection(lines, 'Failed', failed);
+appendStatusSection(lines, 'Skipped', skipped);
+appendHumanReviewSection(lines, reviewableNeedsHuman, missingOutputNeedsHuman);
+
+const summary = `${lines.join('\n')}\n`;
+console.log(summary);
+
+if (process.env.GITHUB_STEP_SUMMARY) {
+  writeFileSync(process.env.GITHUB_STEP_SUMMARY, summary, { flag: 'a' });
+}
+
+if (failed.length > 0 || skipped.length > 0 || missingOutputNeedsHuman.length > 0) {
+  process.exitCode = 1;
+}
+
+function appendStatusSection(lines, title, tests) {
+  if (tests.length === 0) return;
+  lines.push(`## ${title}`, '');
+  for (const test of tests) {
+    lines.push(`- \`${test.id}\` (${test.suite}/${test.executor})`);
+    if (test.error) lines.push(`  - ${test.error}`);
+    for (const check of test.checks ?? []) {
+      if (check.passed) continue;
+      lines.push(`  - FAIL ${check.name}: ${check.message}`);
+    }
+  }
+  lines.push('');
+}
+
+function appendHumanReviewSection(lines, reviewableTests, missingOutputTests) {
+  if (reviewableTests.length === 0 && missingOutputTests.length === 0) {
+    lines.push('## Human Review', '', 'No cases require human review.', '');
+    return;
+  }
+
+  lines.push(
+    '## Human Review',
+    '',
+    `These ${reviewableTests.length} cases passed deterministic checks and include captured Ricky output for human review.`,
+    '',
+    `Review worksheet: \`${path.relative(ROOT, reviewPath)}\``,
+    '',
+  );
+
+  const bySuite = new Map();
+  for (const test of reviewableTests) {
+    const suite = test.suite ?? 'unknown';
+    if (!bySuite.has(suite)) bySuite.set(suite, []);
+    bySuite.get(suite).push(test);
+  }
+
+  for (const [suite, suiteTests] of [...bySuite.entries()].sort(([a], [b]) => a.localeCompare(b))) {
+    lines.push(`### ${suite}`, '');
+    for (const test of suiteTests) {
+      lines.push(`- \`${test.id}\``);
+    }
+    lines.push('');
+  }
+
+  if (missingOutputTests.length > 0) {
+    lines.push(
+      '### Missing Ricky Output',
+      '',
+      'These cases cannot be reviewed because no candidate Ricky response was captured. Use `Executor: openrouter`, run with `--executor openrouter`, or provide `Candidate Output`.',
+      '',
+    );
+    for (const test of missingOutputTests) {
+      lines.push(`- \`${test.id}\` (${test.suite}/${test.executor})`);
+    }
+    lines.push('');
+  }
+}
+
+function hasCapturedOutput(test) {
+  return getCapturedOutput(test).trim().length > 0;
+}
+
+function getCapturedOutput(test) {
+  return String(
+    test.actual?.content ??
+      test.candidate_output ??
+      test.candidateOutput ??
+      test.candidate?.content ??
+      '',
+  );
+}
+
+function findLatestRunDir() {
+  if (!existsSync(RUNS_DIR)) return null;
+  const runs = readdirSync(RUNS_DIR)
+    .map((dir) => path.join(RUNS_DIR, dir))
+    .filter((dir) => existsSync(path.join(dir, 'result.json')))
+    .flatMap((dir) => {
+      const result = safeReadResultJson(path.join(dir, 'result.json'));
+      return result ? [{ dir, result }] : [];
+    })
+    .sort((a, b) => String(b.result.timestamp).localeCompare(String(a.result.timestamp)));
+
+  return runs[0]?.dir ?? null;
+}
+
+function readResultJson(filePath) {
+  const result = safeReadResultJson(filePath);
+  if (!result) {
+    throw new Error(`Could not parse Ricky eval result: ${path.relative(ROOT, filePath)}`);
+  }
+  return result;
+}
+
+function safeReadResultJson(filePath) {
+  try {
+    return JSON.parse(readFileSync(filePath, 'utf8'));
+  } catch (error) {
+    console.warn(`Skipping malformed Ricky eval result ${path.relative(ROOT, filePath)}: ${error instanceof Error ? error.message : String(error)}`);
+    return null;
+  }
+}
diff --git a/scripts/evals/run-ricky-evals.mjs b/scripts/evals/run-ricky-evals.mjs
index 4c841e10..32a98753 100644
--- a/scripts/evals/run-ricky-evals.mjs
+++ b/scripts/evals/run-ricky-evals.mjs
@@ -15,6 +15,8 @@ import {
 
 const ROOT = path.resolve(path.dirname(fileURLToPath(import.meta.url)), '../..');
 const DEFAULT_OPENCODE_MODEL = 'opencode/minimax-m2.5-free';
+const DEFAULT_OPENROUTER_MODEL = 'openai/gpt-oss-120b:free';
+const OPENROUTER_CHAT_COMPLETIONS_ENDPOINT = 'https://openrouter.ai/api/v1/chat/completions';
 const { argv: evalArgv, executorOverride } = parseRickyEvalArgs(process.argv.slice(2));
 const defaultExecutors = createDefaultHumanEvalExecutors(ROOT);
 
@@ -25,6 +27,7 @@ const exitCode = await runHumanEvalCli({
   runsDir: path.join(ROOT, '.ricky', 'evals', 'runs'),
   executors: {
     manual: executeManual,
+    openrouter: executeOpenRouter,
     opencode: executeOpenCode,
     'ricky-cli': executeRickyCli,
   },
@@ -41,12 +44,167 @@ const exitCode = await runHumanEvalCli({
 process.exitCode = exitCode;
 
 function executeManual(testCase, context) {
-  if (context.providerMode && executorOverride === 'opencode') {
+  if (executorOverride === 'openrouter') {
+    return executeOpenRouter(testCase, context);
+  }
+  if (executorOverride === 'opencode') {
     return executeOpenCode(testCase, context);
   }
   return defaultExecutors.manual(testCase, context);
 }
 
+async function executeOpenRouter(testCase, context) {
+  if (!context.providerMode) {
+    throw createSkippedEvalError('openrouter executor skipped; rerun with --provider or HUMAN_EVAL_PROVIDER=1');
+  }
+
+  const apiKey = process.env.OPENROUTER_API_KEY;
+  if (!apiKey) {
+    throw createSkippedEvalError('openrouter executor skipped; OPENROUTER_API_KEY is missing');
+  }
+
+  const model = process.env.RICKY_EVAL_OPENROUTER_MODEL ?? DEFAULT_OPENROUTER_MODEL;
+  const timeoutMs = readPositiveInt(process.env.RICKY_EVAL_OPENROUTER_TIMEOUT_MS, 120_000);
+  const maxAttempts = readPositiveInt(process.env.RICKY_EVAL_OPENROUTER_MAX_ATTEMPTS, 3);
+  const maxTokens = readPositiveInt(process.env.RICKY_EVAL_OPENROUTER_MAX_TOKENS, 1200);
+  const startedAt = Date.now();
+  const emptyAttempts = [];
+
+  for (let attempt = 1; attempt <= maxAttempts; attempt += 1) {
+    try {
+      const { content, note } = await runOpenRouterAttempt({
+        apiKey,
+        model,
+        timeoutMs,
+        maxTokens,
+        testCase,
+      });
+      if (content) {
+        const durationMs = Date.now() - startedAt;
+        return {
+          ok: true,
+          status: 'completed',
+          content,
+          model,
+          toolCalls: [],
+          notes: `Ran OpenRouter eval with model ${model}; attempts=${attempt}; durationMs=${durationMs}.${note ? ` ${note}` : ''}`,
+        };
+      }
+      emptyAttempts.push(`attempt ${attempt}: ${note || 'empty content'}`);
+    } catch (error) {
+      if (attempt >= maxAttempts || !isRetryableOpenRouterError(error)) {
+        throw error;
+      }
+      emptyAttempts.push(`attempt ${attempt}: ${error instanceof Error ? error.message : String(error)}`);
+    }
+  }
+
+  return {
+    ok: false,
+    status: 'completed',
+    content: [
+      `OpenRouter returned an empty response after ${maxAttempts} attempts for ${testCase.id}.`,
+      'This provider response is reviewable as an infrastructure-quality signal, but it is not a Ricky product answer.',
+      '',
+      'Attempts:',
+      ...emptyAttempts.map((attempt) => `- ${attempt}`),
+    ].join('\n'),
+    model,
+    toolCalls: [],
+    notes: `OpenRouter empty response fallback after ${maxAttempts} attempts; durationMs=${Date.now() - startedAt}.`,
+  };
+}
+
+async function runOpenRouterAttempt({ apiKey, model, timeoutMs, maxTokens, testCase }) {
+  const controller = new AbortController();
+  const timeout = setTimeout(() => controller.abort(), timeoutMs);
+  try {
+    const response = await fetch(OPENROUTER_CHAT_COMPLETIONS_ENDPOINT, {
+      method: 'POST',
+      signal: controller.signal,
+      headers: {
+        authorization: `Bearer ${apiKey}`,
+        'content-type': 'application/json',
+        'http-referer': process.env.GITHUB_SERVER_URL
+          ? `${process.env.GITHUB_SERVER_URL}/${process.env.GITHUB_REPOSITORY ?? ''}`
+          : 'https://github.com/AgentWorkforce/ricky',
+        'x-title': 'Ricky Evals',
+      },
+      body: JSON.stringify({
+        model,
+        temperature: 0,
+        max_tokens: maxTokens,
+        messages: [
+          {
+            role: 'system',
+            content: [
+              'You are Ricky, the AgentWorkforce workflow reliability, coordination, and authoring assistant.',
+              'Follow Ricky repository conventions from AGENTS.md, workflow standards, shared authoring rules, and product specs.',
+              'Answer the user request directly. Keep the answer concise and under 700 words.',
+              'Do not mention this eval harness or hidden rubric.',
+            ].join(' '),
+          },
+          {
+            role: 'user',
+            content: buildProviderPrompt(testCase),
+          },
+        ],
+      }),
+    });
+
+    const payload = await response.json().catch(() => ({}));
+    if (!response.ok) {
+      const detail = typeof payload?.error?.message === 'string' ? payload.error.message : JSON.stringify(payload);
+      const error = new Error(`OpenRouter eval failed: ${response.status} ${detail}`);
+      error.status = response.status;
+      throw error;
+    }
+
+    const choice = payload?.choices?.[0];
+    const content = contentFromOpenRouterChoice(choice);
+    const finishReason = typeof choice?.finish_reason === 'string' ? choice.finish_reason : undefined;
+    return {
+      content,
+      note: finishReason ? `finish_reason=${finishReason}` : undefined,
+    };
+  } catch (error) {
+    if (error instanceof Error && error.name === 'AbortError') {
+      const timeoutError = new Error(`OpenRouter eval timed out after ${timeoutMs}ms.`);
+      timeoutError.retryable = true;
+      throw timeoutError;
+    }
+    throw error;
+  } finally {
+    clearTimeout(timeout);
+  }
+}
+
+function contentFromOpenRouterChoice(choice) {
+  const message = choice?.message;
+  const direct = typeof message?.content === 'string' ? message.content.trim() : '';
+  if (direct) return direct;
+
+  const contentParts = Array.isArray(message?.content) ? message.content : [];
+  const fromParts = contentParts
+    .map((part) => {
+      if (typeof part === 'string') return part;
+      if (typeof part?.text === 'string') return part.text;
+      if (typeof part?.content === 'string') return part.content;
+      return '';
+    })
+    .join('\n')
+    .trim();
+  if (fromParts) return fromParts;
+
+  return '';
+}
+
+function isRetryableOpenRouterError(error) {
+  if (!(error instanceof Error)) return false;
+  const status = typeof error.status === 'number' ? error.status : undefined;
+  return error.retryable === true || error.name === 'AbortError' || status === 408 || status === 409 || status === 429 || (status !== undefined && status >= 500);
+}
+
 function executeOpenCode(testCase, context) {
   if (!context.providerMode) {
     throw createSkippedEvalError('opencode executor skipped; rerun with --provider or HUMAN_EVAL_PROVIDER=1');
@@ -188,6 +346,10 @@ function decodeMockText(value) {
 }
 
 function buildOpenCodePrompt(testCase) {
+  return buildProviderPrompt(testCase);
+}
+
+function buildProviderPrompt(testCase) {
   const systemPrompt = stringValue(testCase.input.systemPrompt);
   const threadHistory = Array.isArray(testCase.input.threadHistory)
     ? testCase.input.threadHistory