diff --git a/examples/features/agent-skills-evals/.agentv/judges/trigger-judge.ts b/examples/features/agent-skills-evals/.agentv/judges/trigger-judge.ts
new file mode 100644
index 000000000..fb2d4d866
--- /dev/null
+++ b/examples/features/agent-skills-evals/.agentv/judges/trigger-judge.ts
@@ -0,0 +1,89 @@
+#!/usr/bin/env bun
+/**
+ * trigger-judge: detects whether the agent invoked a named Claude Code skill.
+ *
+ * Mirrors the post-hoc fallback detection in skill-creator's run_eval.py:
+ *   - Only the FIRST tool call matters. Any non-Skill/Read tool as the first
+ *     call means the skill was not triggered (mirrors run_eval.py's early-exit).
+ *   - Skill tool: checks input.skill contains the skill name (case-sensitive).
+ *   - Read tool: checks input.file_path contains the skill name (case-sensitive).
+ *   - Supports negative cases via should_trigger: false.
+ *
+ * Usage in eval YAML:
+ *   assertions:
+ *     - type: trigger-judge          # discovered from .agentv/judges/
+ *       skill: my-skill-name         # required: exact name as installed in .claude/commands/
+ *       should_trigger: true         # optional: expected behaviour (default: true)
+ *
+ * Positive case (should_trigger: true):  passes when skill fires.
+ * Negative case (should_trigger: false): passes when skill does NOT fire.
+ */
+import { defineCodeJudge } from '@agentv/eval';
+
+export default defineCodeJudge(({ output, config }) => {
+  const skillName = config?.skill as string | undefined;
+  const shouldTrigger = (config?.should_trigger ?? true) as boolean;
+
+  if (!skillName) {
+    return {
+      score: 0,
+      misses: ['config.skill is required'],
+      reasoning: 'No skill name configured',
+    };
+  }
+
+  // Flatten all tool calls across messages and take only the first one.
+  // run_eval.py returns false as soon as a non-Skill/Read tool starts, so
+  // only the first tool call is relevant.
+  const firstTool = (output ?? []).flatMap((msg) => msg.toolCalls ?? [])[0];
+
+  let triggered = false;
+  let evidence = '';
+
+  if (firstTool) {
+    const input = (firstTool.input ?? {}) as Record<string, unknown>;
+
+    if (firstTool.tool === 'Skill') {
+      const skillArg = String(input.skill ?? '');
+      if (skillArg.includes(skillName)) {
+        triggered = true;
+        evidence = `Skill tool invoked with skill="${skillArg}"`;
+      }
+    } else if (firstTool.tool === 'Read') {
+      const filePath = String(input.file_path ?? '');
+      if (filePath.includes(skillName)) {
+        triggered = true;
+        evidence = `Read tool loaded skill file: ${filePath}`;
+      }
+    }
+    // Any other tool as first call: triggered remains false
+  }
+
+  const pass = triggered === shouldTrigger;
+
+  if (pass) {
+    return {
+      score: 1,
+      hits: [
+        shouldTrigger
+          ? evidence || `Skill "${skillName}" triggered as expected`
+          : `Skill "${skillName}" correctly did not trigger`,
+      ],
+      reasoning: shouldTrigger ? 'Skill triggered correctly' : 'No false trigger',
+    };
+  }
+
+  return {
+    score: 0,
+    misses: [
+      shouldTrigger
+        ? firstTool
+          ? `First tool was "${firstTool.tool}" — not Skill/Read for "${skillName}"`
+          : `No tool calls recorded`
+        : evidence || `Skill "${skillName}" triggered unexpectedly`,
+    ],
+    reasoning: shouldTrigger
+      ? `Skill "${skillName}" was not triggered`
+      : `False trigger: skill fired when it should not have`,
+  };
+});
diff --git a/packages/core/src/evaluation/orchestrator.ts b/packages/core/src/evaluation/orchestrator.ts
index 17d0f2abb..12047bb8c 100644
--- a/packages/core/src/evaluation/orchestrator.ts
+++ b/packages/core/src/evaluation/orchestrator.ts
@@ -28,7 +28,7 @@ import type {
   TargetDefinition,
 } from './providers/types.js';
 import { extractLastAssistantContent, isAgentProvider } from './providers/types.js';
-import { createBuiltinRegistry, discoverAssertions } from './registry/index.js';
+import { createBuiltinRegistry, discoverAssertions, discoverJudges } from './registry/index.js';
 import {
   type TokenUsage,
   type TraceSummary,
@@ -375,6 +375,7 @@ export async function runEvaluation(
   // Directory containing the eval YAML file, used as default cwd for workspace scripts
   const evalDir = discoveryBaseDir;
   await discoverAssertions(typeRegistry, discoveryBaseDir);
+  await discoverJudges(typeRegistry, discoveryBaseDir);
 
   // Discover custom providers from .agentv/providers/ directory
   const providerRegistry = createBuiltinProviderRegistry();
diff --git a/packages/core/src/evaluation/providers/claude-cli.ts b/packages/core/src/evaluation/providers/claude-cli.ts
new file mode 100644
index 000000000..659d82097
--- /dev/null
+++ b/packages/core/src/evaluation/providers/claude-cli.ts
@@ -0,0 +1,597 @@
+import { spawn } from 'node:child_process';
+import { randomUUID } from 'node:crypto';
+import { createWriteStream } from 'node:fs';
+import type { WriteStream } from 'node:fs';
+import { mkdir } from 'node:fs/promises';
+import path from 'node:path';
+
+import { recordClaudeLogEntry } from './claude-log-tracker.js';
+import { buildPromptDocument, normalizeInputFiles } from './preread.js';
+import type { ClaudeResolvedConfig } from './targets.js';
+import type {
+  Message,
+  Provider,
+  ProviderRequest,
+  ProviderResponse,
+  ProviderTokenUsage,
+  ToolCall,
+} from './types.js';
+
+/**
+ * Claude CLI provider that spawns `claude -p` as a subprocess.
+ * Uses --output-format stream-json --include-partial-messages for structured output.
+ * This is the default `claude` provider. Use `claude-sdk` for SDK-based invocation.
+ */
+export class ClaudeCliProvider implements Provider {
+  readonly id: string;
+  readonly kind = 'claude-cli' as const;
+  readonly targetName: string;
+  readonly supportsBatch = false;
+
+  private readonly config: ClaudeResolvedConfig;
+
+  constructor(targetName: string, config: ClaudeResolvedConfig) {
+    this.id = `claude-cli:${targetName}`;
+    this.targetName = targetName;
+    this.config = config;
+  }
+
+  async invoke(request: ProviderRequest): Promise<ProviderResponse> {
+    if (request.signal?.aborted) {
+      throw new Error('Claude CLI request was aborted before execution');
+    }
+
+    const startTime = new Date().toISOString();
+    const startMs = Date.now();
+
+    const logger = await this.createStreamLogger(request).catch(() => undefined);
+
+    // Build the prompt
+    const inputFiles = normalizeInputFiles(request.inputFiles);
+    const prompt = buildPromptDocument(request, inputFiles);
+
+    const args = this.buildArgs();
+    const cwd = this.resolveCwd(request.cwd);
+    const env = sanitizeEnvForClaude(request.braintrustSpanIds);
+
+    // Track state from stream events
+    const completedToolCalls: ToolCall[] = [];
+    const output: Message[] = [];
+    let tokenUsage: ProviderTokenUsage | undefined;
+    let costUsd: number | undefined;
+    let durationMs: number | undefined;
+
+    try {
+      const result = await this.runClaude({
+        args,
+        cwd,
+        prompt,
+        env,
+        signal: request.signal,
+        onLine: (line) => {
+          logger?.handleLine(line);
+          const event = tryParseJson(line);
+          if (!event) return;
+
+          if (event.type === 'assistant') {
+            const betaMessage = event.message;
+            if (betaMessage && typeof betaMessage === 'object') {
+              const msg = betaMessage as Record<string, unknown>;
+              const content = msg.content;
+              const textContent = extractTextContent(content);
+              const toolCalls = extractToolCalls(content);
+
+              const outputMsg: Message = {
+                role: 'assistant',
+                content: textContent,
+                toolCalls: toolCalls.length > 0 ? toolCalls : undefined,
+              };
+              output.push(outputMsg);
+              completedToolCalls.push(...toolCalls);
+
+              // Stream callbacks for real-time observability
+              if (request.streamCallbacks) {
+                for (const tc of toolCalls) {
+                  request.streamCallbacks.onToolCallEnd?.(
+                    tc.tool,
+                    tc.input,
+                    tc.output,
+                    tc.durationMs ?? 0,
+                    tc.id,
+                  );
+                }
+              }
+            }
+          }
+
+          if (event.type === 'result') {
+            const resultEvent = event as Record<string, unknown>;
+            if (typeof resultEvent.total_cost_usd === 'number') {
+              costUsd = resultEvent.total_cost_usd;
+            }
+            if (typeof resultEvent.duration_ms === 'number') {
+              durationMs = resultEvent.duration_ms;
+            }
+            const usage = resultEvent.usage as Record<string, unknown> | undefined;
+            if (usage) {
+              const inputTokens =
+                ((usage.input_tokens as number) ?? 0) +
+                ((usage.cache_read_input_tokens as number) ?? 0) +
+                ((usage.cache_creation_input_tokens as number) ?? 0);
+              const outputTokens = (usage.output_tokens as number) ?? 0;
+              tokenUsage = {
+                input: inputTokens,
+                output: outputTokens,
+                cached: (usage.cache_read_input_tokens as number) ?? undefined,
+              };
+
+              // Stream callback for LLM usage
+              request.streamCallbacks?.onLlmCallEnd?.(this.config.model ?? 'claude', tokenUsage);
+            }
+          }
+        },
+      });
+
+      if (result.timedOut) {
+        throw new Error(
+          `Claude CLI timed out${formatTimeoutSuffix(this.config.timeoutMs ?? undefined)}`,
+        );
+      }
+
+      if (result.exitCode !== 0) {
+        const detail = result.stderr.trim() || result.stdout.trim();
+        const prefix = `Claude CLI exited with code ${result.exitCode}`;
+        throw new Error(detail ? `${prefix}: ${detail}` : prefix);
+      }
+
+      const endTime = new Date().toISOString();
+      const totalDurationMs = durationMs ?? Date.now() - startMs;
+
+      return {
+        raw: {
+          model: this.config.model,
+          logFile: logger?.filePath,
+          args,
+          exitCode: result.exitCode,
+        },
+        output,
+        tokenUsage,
+        costUsd,
+        durationMs: totalDurationMs,
+        startTime,
+        endTime,
+      };
+    } finally {
+      await logger?.close();
+    }
+  }
+
+  private buildArgs(): string[] {
+    // --verbose is required when combining -p with --output-format stream-json
+    const args = [
+      '-p',
+      '--output-format',
+      'stream-json',
+      '--include-partial-messages',
+      '--verbose',
+    ];
+
+    if (this.config.model) {
+      args.push('--model', this.config.model);
+    }
+
+    if (this.config.maxTurns !== undefined) {
+      args.push('--max-turns', String(this.config.maxTurns));
+    }
+
+    return args;
+  }
+
+  private resolveCwd(cwdOverride?: string): string | undefined {
+    if (cwdOverride) {
+      return path.resolve(cwdOverride);
+    }
+    if (this.config.cwd) {
+      return path.resolve(this.config.cwd);
+    }
+    return undefined;
+  }
+
+  private resolveLogDirectory(): string | undefined {
+    const disabled = isClaudeCliLogStreamingDisabled();
+    if (disabled) {
+      return undefined;
+    }
+    if (this.config.logDir) {
+      return path.resolve(this.config.logDir);
+    }
+    return path.join(process.cwd(), '.agentv', 'logs', 'claude-cli');
+  }
+
+  private async createStreamLogger(
+    request: ProviderRequest,
+  ): Promise<ClaudeCliStreamLogger | undefined> {
+    const logDir = this.resolveLogDirectory();
+    if (!logDir) {
+      return undefined;
+    }
+    try {
+      await mkdir(logDir, { recursive: true });
+    } catch (error) {
+      const message = error instanceof Error ? error.message : String(error);
+      console.warn(`Skipping Claude CLI stream logging (could not create ${logDir}): ${message}`);
+      return undefined;
+    }
+
+    const filePath = path.join(logDir, buildLogFilename(request, this.targetName));
+
+    try {
+      const logger = await ClaudeCliStreamLogger.create({
+        filePath,
+        targetName: this.targetName,
+        evalCaseId: request.evalCaseId,
+        attempt: request.attempt,
+        format: this.config.logFormat ?? 'summary',
+      });
+      recordClaudeLogEntry({
+        filePath,
+        targetName: this.targetName,
+        evalCaseId: request.evalCaseId,
+        attempt: request.attempt,
+      });
+      return logger;
+    } catch (error) {
+      const message = error instanceof Error ? error.message : String(error);
+      console.warn(`Skipping Claude CLI stream logging for ${filePath}: ${message}`);
+      return undefined;
+    }
+  }
+
+  private async runClaude(options: {
+    readonly args: string[];
+    readonly cwd: string | undefined;
+    readonly prompt: string;
+    readonly env: Record<string, string | undefined>;
+    readonly signal?: AbortSignal;
+    readonly onLine: (line: string) => void;
+  }): Promise<{ stdout: string; stderr: string; exitCode: number; timedOut: boolean }> {
+    return new Promise((resolve, reject) => {
+      const spawnOptions: Parameters<typeof spawn>[2] = {
+        stdio: ['pipe', 'pipe', 'pipe'],
+        env: options.env as NodeJS.ProcessEnv,
+      };
+      if (options.cwd) {
+        spawnOptions.cwd = options.cwd;
+      }
+
+      const child = spawn('claude', options.args, spawnOptions);
+
+      let stdout = '';
+      let stderr = '';
+      let timedOut = false;
+      let stdoutBuffer = '';
+
+      const onAbort = (): void => {
+        child.kill('SIGTERM');
+      };
+
+      if (options.signal) {
+        if (options.signal.aborted) {
+          onAbort();
+        } else {
+          options.signal.addEventListener('abort', onAbort, { once: true });
+        }
+      }
+
+      let timeoutHandle: NodeJS.Timeout | undefined;
+      if (this.config.timeoutMs && this.config.timeoutMs > 0) {
+        timeoutHandle = setTimeout(() => {
+          timedOut = true;
+          child.kill('SIGTERM');
+        }, this.config.timeoutMs);
+        timeoutHandle.unref?.();
+      }
+
+      if (child.stdout) {
+        child.stdout.setEncoding('utf8');
+        child.stdout.on('data', (chunk: string) => {
+          stdout += chunk;
+          stdoutBuffer += chunk;
+          // Process complete lines
+          const lines = stdoutBuffer.split(/\r?\n/);
+          stdoutBuffer = lines.pop() ?? '';
+          for (const line of lines) {
+            const trimmed = line.trim();
+            if (trimmed.length > 0) {
+              options.onLine(trimmed);
+            }
+          }
+        });
+      }
+
+      if (child.stderr) {
+        child.stderr.setEncoding('utf8');
+        child.stderr.on('data', (chunk: string) => {
+          stderr += chunk;
+        });
+      }
+
+      // Send prompt via stdin
+      child.stdin?.end(options.prompt);
+
+      const cleanup = (): void => {
+        if (timeoutHandle) {
+          clearTimeout(timeoutHandle);
+        }
+        if (options.signal) {
+          options.signal.removeEventListener('abort', onAbort);
+        }
+      };
+
+      child.on('error', (error) => {
+        cleanup();
+        const err = error as NodeJS.ErrnoException;
+        if (err.code === 'ENOENT') {
+          reject(
+            new Error(
+              `Claude CLI executable 'claude' was not found on PATH. Install claude-code or ensure it is in PATH.`,
+            ),
+          );
+        } else {
+          reject(error);
+        }
+      });
+
+      child.on('close', (code) => {
+        cleanup();
+        // Flush remaining buffer
+        if (stdoutBuffer.trim().length > 0) {
+          options.onLine(stdoutBuffer.trim());
+        }
+        resolve({
+          stdout,
+          stderr,
+          exitCode: typeof code === 'number' ? code : -1,
+          timedOut,
+        });
+      });
+    });
+  }
+}
+
+class ClaudeCliStreamLogger {
+  readonly filePath: string;
+  private readonly stream: WriteStream;
+  private readonly startedAt = Date.now();
+  private readonly format: 'summary' | 'json';
+
+  private constructor(filePath: string, format: 'summary' | 'json') {
+    this.filePath = filePath;
+    this.format = format;
+    this.stream = createWriteStream(filePath, { flags: 'a' });
+  }
+
+  static async create(options: {
+    readonly filePath: string;
+    readonly targetName: string;
+    readonly evalCaseId?: string;
+    readonly attempt?: number;
+    readonly format: 'summary' | 'json';
+  }): Promise<ClaudeCliStreamLogger> {
+    const logger = new ClaudeCliStreamLogger(options.filePath, options.format);
+    const header = [
+      '# Claude CLI stream log',
+      `# target: ${options.targetName}`,
+      options.evalCaseId ? `# eval: ${options.evalCaseId}` : undefined,
+      options.attempt !== undefined ? `# attempt: ${options.attempt + 1}` : undefined,
+      `# started: ${new Date().toISOString()}`,
+      '',
+    ].filter((line): line is string => Boolean(line));
+    for (const line of header) {
+      logger.stream.write(`${line}\n`);
+    }
+    return logger;
+  }
+
+  handleLine(line: string): void {
+    const elapsed = formatElapsed(this.startedAt);
+    const event = tryParseJson(line);
+
+    if (this.format === 'json') {
+      if (event) {
+        this.stream.write(`${JSON.stringify({ time: elapsed, data: event })}\n`);
+      } else {
+        this.stream.write(`${JSON.stringify({ time: elapsed, raw: line })}\n`);
+      }
+    } else {
+      if (event) {
+        const summary = summarizeEvent(event);
+        if (summary) {
+          const type = typeof event.type === 'string' ? event.type : 'unknown';
+          this.stream.write(`[+${elapsed}] [${type}] ${summary}\n`);
+        }
+      } else {
+        this.stream.write(`[+${elapsed}] ${line}\n`);
+      }
+    }
+  }
+
+  async close(): Promise<void> {
+    await new Promise<void>((resolve, reject) => {
+      this.stream.once('error', reject);
+      this.stream.end(() => resolve());
+    });
+  }
+}
+
+function summarizeEvent(event: Record<string, unknown>): string | undefined {
+  const type = event.type as string;
+  switch (type) {
+    case 'assistant': {
+      const message = event.message as Record<string, unknown> | undefined;
+      if (message) {
+        const content = message.content;
+        if (Array.isArray(content) && content.length > 0) {
+          const first = content[0] as Record<string, unknown> | undefined;
+          if (first?.type === 'tool_use') {
+            return `tool_use (${first.name})`;
+          }
+          if (first?.type === 'text') {
+            const text = first.text;
+            if (typeof text === 'string') {
+              const preview = text.length > 50 ? `${text.slice(0, 50)}...` : text;
+              return preview;
+            }
+          }
+        }
+      }
+      return 'message';
+    }
+    case 'user': {
+      const message = event.message as Record<string, unknown> | undefined;
+      if (message) {
+        const content = message.content;
+        if (Array.isArray(content) && content.length > 0) {
+          const first = content[0] as Record<string, unknown> | undefined;
+          if (first?.type === 'tool_result') {
+            return `tool_result (${first.tool_use_id})`;
+          }
+        }
+      }
+      return 'user';
+    }
+    case 'result': {
+      const cost = event.total_cost_usd;
+      const duration = event.duration_ms;
+      if (typeof cost === 'number' && typeof duration === 'number') {
+        return `$${cost.toFixed(4)}, ${Math.round(duration)}ms`;
+      }
+      return 'result';
+    }
+    case 'system':
+      return 'init';
+    default:
+      return undefined;
+  }
+}
+
+/**
+ * Extract text content from Claude's content array format.
+ */
+function extractTextContent(content: unknown): string | undefined {
+  if (typeof content === 'string') {
+    return content;
+  }
+  if (!Array.isArray(content)) {
+    return undefined;
+  }
+  const textParts: string[] = [];
+  for (const part of content) {
+    if (!part || typeof part !== 'object') {
+      continue;
+    }
+    const p = part as Record<string, unknown>;
+    if (p.type === 'text' && typeof p.text === 'string') {
+      textParts.push(p.text);
+    }
+  }
+  return textParts.length > 0 ? textParts.join('\n') : undefined;
+}
+
+/**
+ * Extract tool calls from Claude's content array format.
+ */
+function extractToolCalls(content: unknown): readonly ToolCall[] {
+  if (!Array.isArray(content)) {
+    return [];
+  }
+  const toolCalls: ToolCall[] = [];
+  for (const part of content) {
+    if (!part || typeof part !== 'object') {
+      continue;
+    }
+    const p = part as Record<string, unknown>;
+    if (p.type === 'tool_use' && typeof p.name === 'string') {
+      toolCalls.push({
+        tool: p.name,
+        input: p.input,
+        id: typeof p.id === 'string' ? p.id : undefined,
+      });
+    }
+  }
+  return toolCalls;
+}
+
+/**
+ * Build a sanitized process.env without variables that block nested Claude sessions.
+ * Removes CLAUDECODE so the spawned CLI doesn't refuse to run inside another session.
+ */
+function sanitizeEnvForClaude(braintrustSpanIds?: {
+  readonly parentSpanId: string;
+  readonly rootSpanId: string;
+}): Record<string, string | undefined> {
+  const env = { ...process.env };
+  // Remove all Claude Code session markers to allow nested sessions
+  env.CLAUDECODE = undefined;
+  env.CLAUDE_CODE_ENTRYPOINT = undefined;
+  // Inject Braintrust trace IDs so the trace-claude-code plugin can attach
+  // Claude Code session traces to the AgentV eval span
+  if (braintrustSpanIds) {
+    env.CC_PARENT_SPAN_ID = braintrustSpanIds.parentSpanId;
+    env.CC_ROOT_SPAN_ID = braintrustSpanIds.rootSpanId;
+  }
+  return env;
+}
+
+function isClaudeCliLogStreamingDisabled(): boolean {
+  const envValue = process.env.AGENTV_CLAUDE_STREAM_LOGS;
+  if (!envValue) {
+    return false;
+  }
+  const normalized = envValue.trim().toLowerCase();
+  return normalized === 'false' || normalized === '0' || normalized === 'off';
+}
+
+function buildLogFilename(request: ProviderRequest, targetName: string): string {
+  const timestamp = new Date().toISOString().replace(/[:.]/g, '-');
+  const evalId = sanitizeForFilename(request.evalCaseId ?? 'claude-cli');
+  const attemptSuffix = request.attempt !== undefined ? `_attempt-${request.attempt + 1}` : '';
+  const target = sanitizeForFilename(targetName);
+  return `${timestamp}_${target}_${evalId}${attemptSuffix}_${randomUUID().slice(0, 8)}.log`;
+}
+
+function sanitizeForFilename(value: string): string {
+  const sanitized = value.replace(/[^A-Za-z0-9._-]+/g, '_');
+  return sanitized.length > 0 ? sanitized : 'claude-cli';
+}
+
+function formatElapsed(startedAt: number): string {
+  const elapsedSeconds = Math.floor((Date.now() - startedAt) / 1000);
+  const hours = Math.floor(elapsedSeconds / 3600);
+  const minutes = Math.floor((elapsedSeconds % 3600) / 60);
+  const seconds = elapsedSeconds % 60;
+  if (hours > 0) {
+    return `${hours.toString().padStart(2, '0')}:${minutes.toString().padStart(2, '0')}:${seconds.toString().padStart(2, '0')}`;
+  }
+  return `${minutes.toString().padStart(2, '0')}:${seconds.toString().padStart(2, '0')}`;
+}
+
+function formatTimeoutSuffix(timeoutMs: number | undefined): string {
+  if (!timeoutMs || timeoutMs <= 0) {
+    return '';
+  }
+  const seconds = Math.ceil(timeoutMs / 1000);
+  return ` after ${seconds}s`;
+}
+
+function tryParseJson(line: string): Record<string, unknown> | undefined {
+  try {
+    const parsed = JSON.parse(line);
+    if (parsed && typeof parsed === 'object' && !Array.isArray(parsed)) {
+      return parsed as Record<string, unknown>;
+    }
+    return undefined;
+  } catch {
+    return undefined;
+  }
+}
diff --git a/packages/core/src/evaluation/providers/claude-sdk.ts b/packages/core/src/evaluation/providers/claude-sdk.ts
new file mode 100644
index 000000000..d4a768b4e
--- /dev/null
+++ b/packages/core/src/evaluation/providers/claude-sdk.ts
@@ -0,0 +1,495 @@
+import { randomUUID } from 'node:crypto';
+import { createWriteStream } from 'node:fs';
+import type { WriteStream } from 'node:fs';
+import { mkdir } from 'node:fs/promises';
+import path from 'node:path';
+
+import { recordClaudeLogEntry } from './claude-log-tracker.js';
+import { buildPromptDocument, normalizeInputFiles } from './preread.js';
+import type { ClaudeResolvedConfig } from './targets.js';
+import type {
+  Message,
+  Provider,
+  ProviderRequest,
+  ProviderResponse,
+  ProviderTokenUsage,
+  ToolCall,
+} from './types.js';
+
+// Lazy-loaded module to avoid bundling issues with dynamic requires
+// biome-ignore lint/suspicious/noExplicitAny: dynamic import type
+let claudeSdkModule: any = null;
+
+async function loadClaudeSdk(): Promise<typeof import('@anthropic-ai/claude-agent-sdk')> {
+  if (!claudeSdkModule) {
+    try {
+      claudeSdkModule = await import('@anthropic-ai/claude-agent-sdk');
+    } catch (error) {
+      throw new Error(
+        `Failed to load @anthropic-ai/claude-agent-sdk. Please install it:\n  npm install @anthropic-ai/claude-agent-sdk\n\nOriginal error: ${error instanceof Error ? error.message : String(error)}`,
+      );
+    }
+  }
+  return claudeSdkModule;
+}
+
+/**
+ * Claude Agent SDK provider using the @anthropic-ai/claude-agent-sdk library directly.
+ * This provides typed SDK access for structured tool calls, token usage, and clean
+ * session lifecycle. Use `claude-cli` for subprocess-based invocation.
+ *
+ * Note: The SDK is loaded lazily on first use to avoid bundling issues.
+ * Users must install @anthropic-ai/claude-agent-sdk separately.
+ */
+export class ClaudeSdkProvider implements Provider {
+  readonly id: string;
+  readonly kind = 'claude-sdk' as const;
+  readonly targetName: string;
+  readonly supportsBatch = false;
+
+  private readonly config: ClaudeResolvedConfig;
+
+  constructor(targetName: string, config: ClaudeResolvedConfig) {
+    this.id = `claude-sdk:${targetName}`;
+    this.targetName = targetName;
+    this.config = config;
+  }
+
+  async invoke(request: ProviderRequest): Promise<ProviderResponse> {
+    if (request.signal?.aborted) {
+      throw new Error('Claude SDK request was aborted before execution');
+    }
+
+    const sdk = await loadClaudeSdk();
+
+    const startTime = new Date().toISOString();
+    const startMs = Date.now();
+
+    const logger = await this.createStreamLogger(request).catch(() => undefined);
+
+    // Build the prompt
+    const inputFiles = normalizeInputFiles(request.inputFiles);
+    const prompt = buildPromptDocument(request, inputFiles);
+
+    // Skip forced diff prompt when AgentV captures file changes
+    const systemPrompt = this.config.systemPrompt;
+
+    // Build query options
+    // biome-ignore lint/suspicious/noExplicitAny: SDK options type is dynamically loaded
+    const queryOptions: any = {
+      permissionMode: 'bypassPermissions',
+      allowDangerouslySkipPermissions: true,
+      // The SDK spawns a Claude Code subprocess. When AgentV itself runs inside
+      // a Claude Code session the CLAUDECODE env var is set, which causes the
+      // subprocess to refuse to start ("cannot be launched inside another Claude
+      // Code session"). Passing a sanitized env removes that guard.
+      env: sanitizeEnvForClaudeSdk(request.braintrustSpanIds),
+    };
+
+    if (this.config.model) {
+      queryOptions.model = this.config.model;
+    }
+
+    const cwd = this.resolveCwd(request.cwd);
+    if (cwd) {
+      queryOptions.cwd = cwd;
+    }
+
+    if (systemPrompt) {
+      queryOptions.systemPrompt = systemPrompt;
+    }
+
+    if (this.config.maxTurns !== undefined) {
+      queryOptions.maxTurns = this.config.maxTurns;
+    }
+
+    if (this.config.maxBudgetUsd !== undefined) {
+      queryOptions.maxBudgetUsd = this.config.maxBudgetUsd;
+    }
+
+    if (request.signal) {
+      queryOptions.abortController = { signal: request.signal } as AbortController;
+    }
+
+    // Track state from messages
+    const completedToolCalls: ToolCall[] = [];
+    const output: Message[] = [];
+    let tokenUsage: ProviderTokenUsage | undefined;
+    let costUsd: number | undefined;
+    let durationMs: number | undefined;
+
+    try {
+      const q = sdk.query({ prompt, options: queryOptions });
+
+      // Set up timeout if configured
+      let timeoutTimer: ReturnType<typeof setTimeout> | undefined;
+      if (this.config.timeoutMs) {
+        timeoutTimer = setTimeout(() => {
+          q.return(undefined as never).catch(() => {});
+        }, this.config.timeoutMs);
+        timeoutTimer.unref?.();
+      }
+
+      try {
+        for await (const message of q) {
+          logger?.handleMessage(message);
+
+          if (message.type === 'assistant') {
+            const betaMessage = (message as { message?: unknown }).message;
+            if (betaMessage && typeof betaMessage === 'object') {
+              const msg = betaMessage as Record<string, unknown>;
+              const content = msg.content;
+              const textContent = extractTextContent(content);
+              const toolCalls = extractToolCalls(content);
+
+              const outputMsg: Message = {
+                role: 'assistant',
+                content: textContent,
+                toolCalls: toolCalls.length > 0 ? toolCalls : undefined,
+              };
+              output.push(outputMsg);
+              completedToolCalls.push(...toolCalls);
+
+              // Stream callbacks for real-time observability
+              if (request.streamCallbacks) {
+                for (const tc of toolCalls) {
+                  request.streamCallbacks.onToolCallEnd?.(
+                    tc.tool,
+                    tc.input,
+                    tc.output,
+                    tc.durationMs ?? 0,
+                    tc.id,
+                  );
+                }
+              }
+            }
+          }
+
+          if (message.type === 'result') {
+            const result = message as Record<string, unknown>;
+            if (typeof result.total_cost_usd === 'number') {
+              costUsd = result.total_cost_usd;
+            }
+            if (typeof result.duration_ms === 'number') {
+              durationMs = result.duration_ms;
+            }
+            const usage = result.usage as Record<string, unknown> | undefined;
+            if (usage) {
+              const inputTokens =
+                ((usage.input_tokens as number) ?? 0) +
+                ((usage.cache_read_input_tokens as number) ?? 0) +
+                ((usage.cache_creation_input_tokens as number) ?? 0);
+              const outputTokens = (usage.output_tokens as number) ?? 0;
+              tokenUsage = {
+                input: inputTokens,
+                output: outputTokens,
+                cached: (usage.cache_read_input_tokens as number) ?? undefined,
+              };
+
+              // Stream callback for LLM usage
+              request.streamCallbacks?.onLlmCallEnd?.(this.config.model ?? 'claude', tokenUsage);
+            }
+          }
+        }
+      } finally {
+        if (timeoutTimer) {
+          clearTimeout(timeoutTimer);
+        }
+      }
+
+      const endTime = new Date().toISOString();
+      const totalDurationMs = durationMs ?? Date.now() - startMs;
+
+      return {
+        raw: {
+          model: this.config.model,
+          logFile: logger?.filePath,
+        },
+        output,
+        tokenUsage,
+        costUsd,
+        durationMs: totalDurationMs,
+        startTime,
+        endTime,
+      };
+    } finally {
+      await logger?.close();
+    }
+  }
+
+  private resolveCwd(cwdOverride?: string): string | undefined {
+    if (cwdOverride) {
+      return path.resolve(cwdOverride);
+    }
+    if (this.config.cwd) {
+      return path.resolve(this.config.cwd);
+    }
+    return undefined;
+  }
+
+  private resolveLogDirectory(): string | undefined {
+    const disabled = isClaudeLogStreamingDisabled();
+    if (disabled) {
+      return undefined;
+    }
+    if (this.config.logDir) {
+      return path.resolve(this.config.logDir);
+    }
+    return path.join(process.cwd(), '.agentv', 'logs', 'claude');
+  }
+
+  private async createStreamLogger(
+    request: ProviderRequest,
+  ): Promise<ClaudeStreamLogger | undefined> {
+    const logDir = this.resolveLogDirectory();
+    if (!logDir) {
+      return undefined;
+    }
+    try {
+      await mkdir(logDir, { recursive: true });
+    } catch (error) {
+      const message = error instanceof Error ? error.message : String(error);
+      console.warn(`Skipping Claude stream logging (could not create ${logDir}): ${message}`);
+      return undefined;
+    }
+
+    const filePath = path.join(logDir, buildLogFilename(request, this.targetName));
+
+    try {
+      const logger = await ClaudeStreamLogger.create({
+        filePath,
+        targetName: this.targetName,
+        evalCaseId: request.evalCaseId,
+        attempt: request.attempt,
+        format: this.config.logFormat ?? 'summary',
+      });
+      recordClaudeLogEntry({
+        filePath,
+        targetName: this.targetName,
+        evalCaseId: request.evalCaseId,
+        attempt: request.attempt,
+      });
+      return logger;
+    } catch (error) {
+      const message = error instanceof Error ? error.message : String(error);
+      console.warn(`Skipping Claude stream logging for ${filePath}: ${message}`);
+      return undefined;
+    }
+  }
+}
+
+/**
+ * Extract text content from Claude's content array format.
+ * Claude uses: content: [{ type: "text", text: "..." }, ...]
+ */
+function extractTextContent(content: unknown): string | undefined {
+  if (typeof content === 'string') {
+    return content;
+  }
+  if (!Array.isArray(content)) {
+    return undefined;
+  }
+  const textParts: string[] = [];
+  for (const part of content) {
+    if (!part || typeof part !== 'object') {
+      continue;
+    }
+    const p = part as Record<string, unknown>;
+    if (p.type === 'text' && typeof p.text === 'string') {
+      textParts.push(p.text);
+    }
+  }
+  return textParts.length > 0 ? textParts.join('\n') : undefined;
+}
+
+/**
+ * Extract tool calls from Claude's content array format.
+ * Claude uses: content: [{ type: "tool_use", name: "...", input: {...}, id: "..." }, ...]
+ */
+function extractToolCalls(content: unknown): readonly ToolCall[] {
+  if (!Array.isArray(content)) {
+    return [];
+  }
+  const toolCalls: ToolCall[] = [];
+  for (const part of content) {
+    if (!part || typeof part !== 'object') {
+      continue;
+    }
+    const p = part as Record<string, unknown>;
+    if (p.type === 'tool_use' && typeof p.name === 'string') {
+      toolCalls.push({
+        tool: p.name,
+        input: p.input,
+        id: typeof p.id === 'string' ? p.id : undefined,
+      });
+    }
+  }
+  return toolCalls;
+}
+
+class ClaudeStreamLogger {
+  readonly filePath: string;
+  private readonly stream: WriteStream;
+  private readonly startedAt = Date.now();
+  private readonly format: 'summary' | 'json';
+
+  private constructor(filePath: string, format: 'summary' | 'json') {
+    this.filePath = filePath;
+    this.format = format;
+    this.stream = createWriteStream(filePath, { flags: 'a' });
+  }
+
+  static async create(options: {
+    readonly filePath: string;
+    readonly targetName: string;
+    readonly evalCaseId?: string;
+    readonly attempt?: number;
+    readonly format: 'summary' | 'json';
+  }): Promise<ClaudeStreamLogger> {
+    const logger = new ClaudeStreamLogger(options.filePath, options.format);
+    const header = [
+      '# Claude Agent SDK stream log',
+      `# target: ${options.targetName}`,
+      options.evalCaseId ? `# eval: ${options.evalCaseId}` : undefined,
+      options.attempt !== undefined ? `# attempt: ${options.attempt + 1}` : undefined,
+      `# started: ${new Date().toISOString()}`,
+      '',
+    ].filter((line): line is string => Boolean(line));
+    for (const line of header) {
+      logger.stream.write(`${line}\n`);
+    }
+    return logger;
+  }
+
+  handleMessage(message: unknown): void {
+    if (!message || typeof message !== 'object') {
+      return;
+    }
+    const elapsed = formatElapsed(this.startedAt);
+    const msg = message as Record<string, unknown>;
+    const type = typeof msg.type === 'string' ? msg.type : 'unknown';
+
+    if (this.format === 'json') {
+      this.stream.write(`${JSON.stringify({ time: elapsed, type, data: message })}\n`);
+    } else {
+      const summary = summarizeMessage(msg);
+      if (summary) {
+        this.stream.write(`[+${elapsed}] [${type}] ${summary}\n`);
+      }
+    }
+  }
+
+  async close(): Promise<void> {
+    await new Promise<void>((resolve, reject) => {
+      this.stream.once('error', reject);
+      this.stream.end(() => resolve());
+    });
+  }
+}
+
+function summarizeMessage(msg: Record<string, unknown>): string | undefined {
+  const type = msg.type as string;
+  switch (type) {
+    case 'assistant': {
+      const message = msg.message as Record<string, unknown> | undefined;
+      if (message) {
+        const content = message.content;
+        if (Array.isArray(content) && content.length > 0) {
+          const first = content[0] as Record<string, unknown> | undefined;
+          if (first?.type === 'tool_use') {
+            return `tool_use (${first.name})`;
+          }
+          if (first?.type === 'text') {
+            const text = first.text;
+            if (typeof text === 'string') {
+              const preview = text.length > 50 ? `${text.slice(0, 50)}...` : text;
+              return preview;
+            }
+          }
+        }
+      }
+      return 'message';
+    }
+    case 'user': {
+      const message = msg.message as Record<string, unknown> | undefined;
+      if (message) {
+        const content = message.content;
+        if (Array.isArray(content) && content.length > 0) {
+          const first = content[0] as Record<string, unknown> | undefined;
+          if (first?.type === 'tool_result') {
+            return `tool_result (${first.tool_use_id})`;
+          }
+        }
+      }
+      return 'user';
+    }
+    case 'result': {
+      const cost = msg.total_cost_usd;
+      const duration = msg.duration_ms;
+      if (typeof cost === 'number' && typeof duration === 'number') {
+        return `$${cost.toFixed(4)}, ${Math.round(duration)}ms`;
+      }
+      return 'result';
+    }
+    case 'system':
+      return 'init';
+    default:
+      return undefined;
+  }
+}
+
+/**
+ * Build a process.env copy without variables that block nested Claude sessions.
+ * The Claude Agent SDK spawns Claude Code as a child process; if CLAUDECODE is
+ * present the child immediately exits with "cannot be launched inside another
+ * Claude Code session".
+ */
+function sanitizeEnvForClaudeSdk(braintrustSpanIds?: {
+  readonly parentSpanId: string;
+  readonly rootSpanId: string;
+}): Record<string, string | undefined> {
+  const env = { ...process.env };
+  // Remove all Claude Code session markers to allow nested sessions
+  env.CLAUDECODE = undefined;
+  env.CLAUDE_CODE_ENTRYPOINT = undefined;
+  // Inject Braintrust trace IDs so the trace-claude-code plugin can attach
+  // Claude Code session traces to the AgentV eval span
+  if (braintrustSpanIds) {
+    env.CC_PARENT_SPAN_ID = braintrustSpanIds.parentSpanId;
+    env.CC_ROOT_SPAN_ID = braintrustSpanIds.rootSpanId;
+  }
+  return env;
+}
+
+function isClaudeLogStreamingDisabled(): boolean {
+  const envValue = process.env.AGENTV_CLAUDE_STREAM_LOGS;
+  if (!envValue) {
+    return false;
+  }
+  const normalized = envValue.trim().toLowerCase();
+  return normalized === 'false' || normalized === '0' || normalized === 'off';
+}
+
+function buildLogFilename(request: ProviderRequest, targetName: string): string {
+  const timestamp = new Date().toISOString().replace(/[:.]/g, '-');
+  const evalId = sanitizeForFilename(request.evalCaseId ?? 'claude');
+  const attemptSuffix = request.attempt !== undefined ? `_attempt-${request.attempt + 1}` : '';
+  const target = sanitizeForFilename(targetName);
+  return `${timestamp}_${target}_${evalId}${attemptSuffix}_${randomUUID().slice(0, 8)}.log`;
+}
+
+function sanitizeForFilename(value: string): string {
+  const sanitized = value.replace(/[^A-Za-z0-9._-]+/g, '_');
+  return sanitized.length > 0 ? sanitized : 'claude';
+}
+
+function formatElapsed(startedAt: number): string {
+  const elapsedSeconds = Math.floor((Date.now() - startedAt) / 1000);
+  const hours = Math.floor(elapsedSeconds / 3600);
+  const minutes = Math.floor((elapsedSeconds % 3600) / 60);
+  const seconds = elapsedSeconds % 60;
+  if (hours > 0) {
+    return `${hours.toString().padStart(2, '0')}:${minutes.toString().padStart(2, '0')}:${seconds.toString().padStart(2, '0')}`;
+  }
+  return `${minutes.toString().padStart(2, '0')}:${seconds.toString().padStart(2, '0')}`;
+}
diff --git a/packages/core/src/evaluation/providers/index.ts b/packages/core/src/evaluation/providers/index.ts
index 547183776..62cd8eef8 100644
--- a/packages/core/src/evaluation/providers/index.ts
+++ b/packages/core/src/evaluation/providers/index.ts
@@ -1,4 +1,6 @@
 import { AnthropicProvider, AzureProvider, GeminiProvider } from './ai-sdk.js';
+import { ClaudeCliProvider } from './claude-cli.js';
+import { ClaudeSdkProvider } from './claude-sdk.js';
 import { ClaudeProvider } from './claude.js';
 import { CliProvider } from './cli.js';
 import { CodexProvider } from './codex.js';
@@ -87,7 +89,11 @@ export function createBuiltinProviderRegistry(): ProviderRegistry {
     .register('copilot-cli', (t) => new CopilotCliProvider(t.name, t.config as never))
     .register('pi-coding-agent', (t) => new PiCodingAgentProvider(t.name, t.config as never))
     .register('pi-agent-sdk', (t) => new PiAgentSdkProvider(t.name, t.config as never))
-    .register('claude', (t) => new ClaudeProvider(t.name, t.config as never))
+    // claude-cli is the new default subprocess provider; claude is an alias
+    .register('claude-cli', (t) => new ClaudeCliProvider(t.name, t.config as never))
+    .register('claude', (t) => new ClaudeCliProvider(t.name, t.config as never))
+    // claude-sdk is the explicit SDK provider (requires @anthropic-ai/claude-agent-sdk)
+    .register('claude-sdk', (t) => new ClaudeSdkProvider(t.name, t.config as never))
     .register('mock', (t) => new MockProvider(t.name, t.config as never))
     .register('vscode', (t) => new VSCodeProvider(t.name, t.config as never, 'vscode'))
     .register(
diff --git a/packages/core/src/evaluation/providers/targets.ts b/packages/core/src/evaluation/providers/targets.ts
index 1fb331d6c..aa30b06b6 100644
--- a/packages/core/src/evaluation/providers/targets.ts
+++ b/packages/core/src/evaluation/providers/targets.ts
@@ -596,6 +596,22 @@ export type ResolvedTarget =
       readonly providerBatching?: boolean;
       readonly config: ClaudeResolvedConfig;
     }
+  | {
+      readonly kind: 'claude-cli';
+      readonly name: string;
+      readonly judgeTarget?: string;
+      readonly workers?: number;
+      readonly providerBatching?: boolean;
+      readonly config: ClaudeResolvedConfig;
+    }
+  | {
+      readonly kind: 'claude-sdk';
+      readonly name: string;
+      readonly judgeTarget?: string;
+      readonly workers?: number;
+      readonly providerBatching?: boolean;
+      readonly config: ClaudeResolvedConfig;
+    }
   | {
       readonly kind: 'mock';
       readonly name: string;
@@ -788,9 +804,18 @@ export function resolveTargetDefinition(
       };
     case 'claude':
     case 'claude-code':
+    case 'claude-cli':
+      return {
+        kind: 'claude-cli',
+        name: parsed.name,
+        judgeTarget: parsed.judge_target,
+        workers: parsed.workers,
+        providerBatching,
+        config: resolveClaudeConfig(parsed, env, evalFilePath),
+      };
     case 'claude-sdk':
       return {
-        kind: 'claude',
+        kind: 'claude-sdk',
         name: parsed.name,
         judgeTarget: parsed.judge_target,
         workers: parsed.workers,
diff --git a/packages/core/src/evaluation/providers/types.ts b/packages/core/src/evaluation/providers/types.ts
index a30108d5b..af5e3b6a1 100644
--- a/packages/core/src/evaluation/providers/types.ts
+++ b/packages/core/src/evaluation/providers/types.ts
@@ -20,6 +20,8 @@ export type ProviderKind =
   | 'pi-coding-agent'
   | 'pi-agent-sdk'
   | 'claude'
+  | 'claude-cli'
+  | 'claude-sdk'
   | 'cli'
   | 'mock'
   | 'vscode'
@@ -35,6 +37,8 @@ export const AGENT_PROVIDER_KINDS: readonly ProviderKind[] = [
   'copilot-cli',
   'pi-coding-agent',
   'claude',
+  'claude-cli',
+  'claude-sdk',
   'vscode',
   'vscode-insiders',
 ] as const;
@@ -53,6 +57,8 @@ export const KNOWN_PROVIDERS: readonly ProviderKind[] = [
   'pi-coding-agent',
   'pi-agent-sdk',
   'claude',
+  'claude-cli',
+  'claude-sdk',
   'cli',
   'mock',
   'vscode',
@@ -73,7 +79,6 @@ export const PROVIDER_ALIASES: readonly string[] = [
 
   'pi', // alias for "pi-coding-agent"
   'claude-code', // alias for "claude" (legacy)
-  'claude-sdk', // alias for "claude"
   'openai', // legacy/future support
   'bedrock', // legacy/future support
   'vertex', // legacy/future support
diff --git a/packages/core/src/evaluation/registry/index.ts b/packages/core/src/evaluation/registry/index.ts
index fc60bb177..75c8332dc 100644
--- a/packages/core/src/evaluation/registry/index.ts
+++ b/packages/core/src/evaluation/registry/index.ts
@@ -7,3 +7,4 @@ export { EvaluatorRegistry, DeterministicAssertionEvaluator } from './evaluator-
 export type { EvaluatorDispatchContext, EvaluatorFactoryFn } from './evaluator-registry.js';
 export { createBuiltinRegistry } from './builtin-evaluators.js';
 export { discoverAssertions } from './assertion-discovery.js';
+export { discoverJudges } from './judge-discovery.js';
diff --git a/packages/core/src/evaluation/registry/judge-discovery.ts b/packages/core/src/evaluation/registry/judge-discovery.ts
new file mode 100644
index 000000000..c4a843565
--- /dev/null
+++ b/packages/core/src/evaluation/registry/judge-discovery.ts
@@ -0,0 +1,78 @@
+/**
+ * Convention-based discovery of custom judge scripts.
+ *
+ * Scans `.agentv/judges/` for TypeScript/JavaScript files and registers
+ * them as code-judge evaluators in the registry. The file name (without
+ * extension) becomes the evaluator type name.
+ *
+ * Example: `.agentv/judges/trigger-judge.ts` → type "trigger-judge" in EVAL.yaml
+ */
+
+import path from 'node:path';
+import fg from 'fast-glob';
+
+import { CodeEvaluator } from '../evaluators/code-evaluator.js';
+import type { EvaluatorFactoryFn } from './evaluator-registry.js';
+import type { EvaluatorRegistry } from './evaluator-registry.js';
+
+/**
+ * Discover custom judge scripts from `.agentv/judges/` and register
+ * them as evaluator types in the registry.
+ *
+ * @param registry - The evaluator registry to register discovered judges into
+ * @param baseDir - The base directory to search from (typically project root or eval file dir)
+ * @returns Names of discovered judge types
+ */
+export async function discoverJudges(
+  registry: EvaluatorRegistry,
+  baseDir: string,
+): Promise<string[]> {
+  const patterns = ['*.ts', '*.js', '*.mts', '*.mjs'];
+
+  // Search baseDir and its ancestors for .agentv/judges/
+  const candidateDirs: string[] = [];
+  let dir = path.resolve(baseDir);
+  const root = path.parse(dir).root;
+  while (dir !== root) {
+    candidateDirs.push(path.join(dir, '.agentv', 'judges'));
+    dir = path.dirname(dir);
+  }
+
+  let files: string[] = [];
+  for (const judgesDir of candidateDirs) {
+    try {
+      const found = await fg(patterns, {
+        cwd: judgesDir,
+        absolute: true,
+        onlyFiles: true,
+      });
+      files = files.concat(found);
+    } catch {
+      // Directory doesn't exist — skip
+    }
+  }
+
+  const discoveredTypes: string[] = [];
+
+  for (const filePath of files) {
+    const basename = path.basename(filePath);
+    const typeName = basename.replace(/\.(ts|js|mts|mjs)$/, '');
+
+    // Don't override built-in types
+    if (registry.has(typeName)) {
+      continue;
+    }
+
+    const factory: EvaluatorFactoryFn = (_config, context) => {
+      return new CodeEvaluator({
+        command: ['bun', 'run', filePath],
+        agentTimeoutMs: context.agentTimeoutMs,
+      });
+    };
+
+    registry.register(typeName, factory);
+    discoveredTypes.push(typeName);
+  }
+
+  return discoveredTypes;
+}
diff --git a/packages/core/src/evaluation/validation/targets-validator.ts b/packages/core/src/evaluation/validation/targets-validator.ts
index c507308e9..068848e00 100644
--- a/packages/core/src/evaluation/validation/targets-validator.ts
+++ b/packages/core/src/evaluation/validation/targets-validator.ts
@@ -217,6 +217,7 @@ function getKnownSettings(provider: string): Set<string> | null {
       return COPILOT_CLI_SETTINGS;
     case 'claude':
     case 'claude-code':
+    case 'claude-cli':
     case 'claude-sdk':
       return CLAUDE_SETTINGS;
     case 'vscode':
diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts
index 7df57f3f2..514f7acae 100644
--- a/packages/core/src/index.ts
+++ b/packages/core/src/index.ts
@@ -57,6 +57,7 @@ export type {
 } from './evaluation/registry/evaluator-registry.js';
 export { createBuiltinRegistry } from './evaluation/registry/builtin-evaluators.js';
 export { discoverAssertions } from './evaluation/registry/assertion-discovery.js';
+export { discoverJudges } from './evaluation/registry/judge-discovery.js';
 
 export type AgentKernel = {
   status: string;
diff --git a/packages/core/test/evaluation/providers/claude-provider-aliases.test.ts b/packages/core/test/evaluation/providers/claude-provider-aliases.test.ts
new file mode 100644
index 000000000..4834d02c5
--- /dev/null
+++ b/packages/core/test/evaluation/providers/claude-provider-aliases.test.ts
@@ -0,0 +1,63 @@
+import { describe, expect, it } from 'bun:test';
+
+import { ClaudeCliProvider } from '../../../src/evaluation/providers/claude-cli.js';
+import { ClaudeSdkProvider } from '../../../src/evaluation/providers/claude-sdk.js';
+import { ClaudeProvider } from '../../../src/evaluation/providers/claude.js';
+import { createBuiltinProviderRegistry } from '../../../src/evaluation/providers/index.js';
+
+const mockClaudeConfig = {
+  model: undefined,
+  cwd: undefined,
+  timeoutMs: undefined,
+  logDir: undefined,
+  logFormat: 'summary' as const,
+  systemPrompt: undefined,
+  maxTurns: undefined,
+  maxBudgetUsd: undefined,
+};
+
+describe('Claude provider alias resolution', () => {
+  const registry = createBuiltinProviderRegistry();
+
+  it('creates a ClaudeCliProvider for claude-cli kind', () => {
+    const provider = registry.create({
+      name: 'test-target',
+      kind: 'claude-cli',
+      config: mockClaudeConfig,
+    });
+    expect(provider).toBeInstanceOf(ClaudeCliProvider);
+    expect(provider.kind).toBe('claude-cli');
+    expect(provider.id).toBe('claude-cli:test-target');
+  });
+
+  it('creates a ClaudeCliProvider for claude kind (alias for claude-cli)', () => {
+    const provider = registry.create({
+      name: 'test-target',
+      kind: 'claude',
+      config: mockClaudeConfig,
+    });
+    expect(provider).toBeInstanceOf(ClaudeCliProvider);
+    expect(provider.kind).toBe('claude-cli');
+  });
+
+  it('creates a ClaudeSdkProvider for claude-sdk kind', () => {
+    const provider = registry.create({
+      name: 'test-target',
+      kind: 'claude-sdk',
+      config: mockClaudeConfig,
+    });
+    expect(provider).toBeInstanceOf(ClaudeSdkProvider);
+    expect(provider.kind).toBe('claude-sdk');
+    expect(provider.id).toBe('claude-sdk:test-target');
+  });
+
+  it('ClaudeCliProvider and ClaudeProvider are different classes', () => {
+    // ClaudeProvider is the legacy SDK provider kept for reference
+    const cliProvider = new ClaudeCliProvider('target', mockClaudeConfig);
+    const sdkProvider = new ClaudeProvider('target', mockClaudeConfig as never);
+    expect(cliProvider).toBeInstanceOf(ClaudeCliProvider);
+    expect(sdkProvider).toBeInstanceOf(ClaudeProvider);
+    expect(cliProvider.kind).toBe('claude-cli');
+    expect(sdkProvider.kind).toBe('claude');
+  });
+});