From 2cb3a005e880cc1c6a8b443ec31ea2c95863a3a5 Mon Sep 17 00:00:00 2001
From: Christopher Tso <christso@gmail.com>
Date: Sun, 15 Mar 2026 12:39:08 +0000
Subject: [PATCH 01/13] feat: add agentv to ProviderKind

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 packages/core/src/evaluation/providers/types.ts | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/packages/core/src/evaluation/providers/types.ts b/packages/core/src/evaluation/providers/types.ts
index af5e3b6a1..e0106071a 100644
--- a/packages/core/src/evaluation/providers/types.ts
+++ b/packages/core/src/evaluation/providers/types.ts
@@ -25,7 +25,8 @@ export type ProviderKind =
   | 'cli'
   | 'mock'
   | 'vscode'
-  | 'vscode-insiders';
+  | 'vscode-insiders'
+  | 'agentv';
 
 /**
  * Agent providers that have filesystem access and don't need unwrapped guidelines.
@@ -63,6 +64,7 @@ export const KNOWN_PROVIDERS: readonly ProviderKind[] = [
   'mock',
   'vscode',
   'vscode-insiders',
+  'agentv',
 ] as const;
 
 /**

From 80a20c1ea250c66d740ceb36e1429dc86ace1c12 Mon Sep 17 00:00:00 2001
From: Christopher Tso <christso@gmail.com>
Date: Sun, 15 Mar 2026 12:42:43 +0000
Subject: [PATCH 02/13] feat: add agentv provider to target resolution

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .../core/src/evaluation/providers/targets.ts  | 30 +++++++++++
 .../test/evaluation/providers/targets.test.ts | 51 +++++++++++++++++++
 2 files changed, 81 insertions(+)

diff --git a/packages/core/src/evaluation/providers/targets.ts b/packages/core/src/evaluation/providers/targets.ts
index aa30b06b6..26f827eae 100644
--- a/packages/core/src/evaluation/providers/targets.ts
+++ b/packages/core/src/evaluation/providers/targets.ts
@@ -514,6 +514,11 @@ export interface VSCodeResolvedConfig {
   readonly timeoutMs?: number;
 }
 
+export interface AgentVResolvedConfig {
+  readonly model: string;
+  readonly temperature: number;
+}
+
 /**
  * Healthcheck configuration type derived from CliHealthcheckSchema.
  * Supports both HTTP and command-based healthchecks.
@@ -628,6 +633,14 @@ export type ResolvedTarget =
       readonly providerBatching?: boolean;
       readonly config: VSCodeResolvedConfig;
     }
+  | {
+      readonly kind: 'agentv';
+      readonly name: string;
+      readonly judgeTarget?: string;
+      readonly workers?: number;
+      readonly providerBatching?: boolean;
+      readonly config: AgentVResolvedConfig;
+    }
   | {
       readonly kind: 'cli';
       readonly name: string;
@@ -841,6 +854,23 @@ export function resolveTargetDefinition(
         providerBatching,
         config: resolveVSCodeConfig(parsed, env, provider === 'vscode-insiders', evalFilePath),
       };
+    case 'agentv': {
+      const model = typeof parsed.model === 'string' ? parsed.model : undefined;
+      if (!model) {
+        throw new Error(
+          `Target "${parsed.name}" (provider: agentv) requires a "model" field (e.g., "openai:gpt-5-mini")`,
+        );
+      }
+      const temperature = typeof parsed.temperature === 'number' ? parsed.temperature : 0;
+      return {
+        kind: 'agentv',
+        name: parsed.name,
+        judgeTarget: parsed.judge_target,
+        workers: typeof parsed.workers === 'number' ? parsed.workers : undefined,
+        providerBatching,
+        config: { model, temperature },
+      };
+    }
     case 'cli':
       return {
         kind: 'cli',
diff --git a/packages/core/test/evaluation/providers/targets.test.ts b/packages/core/test/evaluation/providers/targets.test.ts
index eacd573b2..7c7d2b0c2 100644
--- a/packages/core/test/evaluation/providers/targets.test.ts
+++ b/packages/core/test/evaluation/providers/targets.test.ts
@@ -559,6 +559,57 @@ describe('resolveTargetDefinition', () => {
       ),
     ).toThrow(/workspace_template has been removed/i);
   });
+
+  it('resolves agentv target with model and default temperature', () => {
+    const target = resolveTargetDefinition(
+      {
+        name: 'agentv-judge',
+        provider: 'agentv',
+        model: 'openai:gpt-5-mini',
+      },
+      {},
+    );
+
+    expect(target.kind).toBe('agentv');
+    if (target.kind !== 'agentv') {
+      throw new Error('expected agentv target');
+    }
+
+    expect(target.config.model).toBe('openai:gpt-5-mini');
+    expect(target.config.temperature).toBe(0);
+  });
+
+  it('resolves agentv target with explicit temperature', () => {
+    const target = resolveTargetDefinition(
+      {
+        name: 'agentv-warm',
+        provider: 'agentv',
+        model: 'anthropic:claude-haiku-4.5',
+        temperature: 0.7,
+      },
+      {},
+    );
+
+    expect(target.kind).toBe('agentv');
+    if (target.kind !== 'agentv') {
+      throw new Error('expected agentv target');
+    }
+
+    expect(target.config.model).toBe('anthropic:claude-haiku-4.5');
+    expect(target.config.temperature).toBe(0.7);
+  });
+
+  it('throws when agentv target is missing model', () => {
+    expect(() =>
+      resolveTargetDefinition(
+        {
+          name: 'agentv-no-model',
+          provider: 'agentv',
+        },
+        {},
+      ),
+    ).toThrow(/model/i);
+  });
 });
 
 describe('createProvider', () => {

From 64d8d6d37c2c9cfe72feb21ebab84e56e0f913e2 Mon Sep 17 00:00:00 2001
From: Christopher Tso <christso@gmail.com>
Date: Sun, 15 Mar 2026 12:51:21 +0000
Subject: [PATCH 03/13] feat: add agentv provider implementation

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 bun.lock                                      |   7 +-
 packages/core/package.json                    |   3 +-
 .../evaluation/providers/agentv-provider.ts   |  74 ++++++++++
 .../core/src/evaluation/providers/index.ts    |   3 +
 .../providers/agentv-provider.test.ts         | 132 ++++++++++++++++++
 5 files changed, 215 insertions(+), 4 deletions(-)
 create mode 100644 packages/core/src/evaluation/providers/agentv-provider.ts
 create mode 100644 packages/core/test/evaluation/providers/agentv-provider.test.ts

diff --git a/bun.lock b/bun.lock
index 200a436cc..70471cff6 100644
--- a/bun.lock
+++ b/bun.lock
@@ -24,7 +24,7 @@
     },
     "apps/cli": {
       "name": "agentv",
-      "version": "2.12.0",
+      "version": "2.19.0",
       "bin": {
         "agentv": "./dist/cli.js",
       },
@@ -61,13 +61,14 @@
     },
     "packages/core": {
       "name": "@agentv/core",
-      "version": "2.12.0",
+      "version": "2.19.0",
       "dependencies": {
         "@agentclientprotocol/sdk": "^0.14.1",
         "@agentv/eval": "workspace:*",
         "@ai-sdk/anthropic": "^2.0.53",
         "@ai-sdk/azure": "^2.0.78",
         "@ai-sdk/google": "^2.0.44",
+        "@ai-sdk/openai": "^2.0.0",
         "@anthropic-ai/claude-agent-sdk": "^0.2.49",
         "@github/copilot-sdk": "^0.1.25",
         "@mariozechner/pi-agent-core": "^0.54.2",
@@ -95,7 +96,7 @@
     },
     "packages/eval": {
       "name": "@agentv/eval",
-      "version": "2.12.0",
+      "version": "2.19.0",
       "dependencies": {
         "zod": "^3.23.8",
       },
diff --git a/packages/core/package.json b/packages/core/package.json
index d0c0a031e..600890177 100644
--- a/packages/core/package.json
+++ b/packages/core/package.json
@@ -39,11 +39,12 @@
   },
   "files": ["dist", "README.md"],
   "dependencies": {
-    "@agentv/eval": "workspace:*",
     "@agentclientprotocol/sdk": "^0.14.1",
+    "@agentv/eval": "workspace:*",
     "@ai-sdk/anthropic": "^2.0.53",
     "@ai-sdk/azure": "^2.0.78",
     "@ai-sdk/google": "^2.0.44",
+    "@ai-sdk/openai": "^2.0.0",
     "@anthropic-ai/claude-agent-sdk": "^0.2.49",
     "@github/copilot-sdk": "^0.1.25",
     "@mariozechner/pi-agent-core": "^0.54.2",
diff --git a/packages/core/src/evaluation/providers/agentv-provider.ts b/packages/core/src/evaluation/providers/agentv-provider.ts
new file mode 100644
index 000000000..8e5e2c589
--- /dev/null
+++ b/packages/core/src/evaluation/providers/agentv-provider.ts
@@ -0,0 +1,74 @@
+import { createAnthropic } from '@ai-sdk/anthropic';
+import { createAzure } from '@ai-sdk/azure';
+import { createGoogleGenerativeAI } from '@ai-sdk/google';
+import { createOpenAI } from '@ai-sdk/openai';
+import { type LanguageModel, createProviderRegistry } from 'ai';
+
+import type { AgentVResolvedConfig } from './targets.js';
+import type { Provider, ProviderRequest, ProviderResponse } from './types.js';
+
+/**
+ * Lazily-created singleton provider registry for resolving AI SDK model strings.
+ * Maps provider prefixes (e.g., "openai", "anthropic") to their AI SDK provider
+ * implementations so that model strings like "openai:gpt-5-mini" can be resolved
+ * to LanguageModel instances.
+ */
+let _registry: { languageModel: (id: string) => LanguageModel } | null = null;
+
+function getAiSdkRegistry(): { languageModel: (id: string) => LanguageModel } {
+  if (!_registry) {
+    // Cast through unknown: the registry's languageModel signature uses narrowed
+    // literal types, but we need to accept arbitrary model strings at runtime.
+    _registry = createProviderRegistry({
+      openai: createOpenAI(),
+      anthropic: createAnthropic(),
+      azure: createAzure(),
+      google: createGoogleGenerativeAI(),
+    }) as unknown as { languageModel: (id: string) => LanguageModel };
+  }
+  return _registry;
+}
+
+/**
+ * AgentV built-in provider for LLM judge evaluation.
+ *
+ * Resolves an AI SDK model string (e.g., "openai:gpt-5-mini", "anthropic:claude-sonnet-4-20250514")
+ * to a Vercel AI SDK LanguageModel using createProviderRegistry. This provider is used
+ * exclusively for judge evaluation — it does not support direct agent invocation.
+ *
+ * Usage: `--judge-target agentv --model openai:gpt-5-mini`
+ */
+export class AgentvProvider implements Provider {
+  readonly id: string;
+  readonly kind = 'agentv' as const;
+  readonly targetName: string;
+
+  private readonly model: LanguageModel;
+  private readonly config: AgentVResolvedConfig;
+
+  constructor(targetName: string, config: AgentVResolvedConfig) {
+    this.id = `agentv:${targetName}`;
+    this.targetName = targetName;
+    this.config = config;
+
+    const registry = getAiSdkRegistry();
+    this.model = registry.languageModel(config.model);
+  }
+
+  /**
+   * Direct invoke is not supported for the agentv provider.
+   * Use asLanguageModel() with generateText() instead.
+   */
+  async invoke(_request: ProviderRequest): Promise<ProviderResponse> {
+    throw new Error(
+      'AgentvProvider does not support direct invoke(). Use asLanguageModel() with generateText() instead.',
+    );
+  }
+
+  /**
+   * Returns the resolved AI SDK LanguageModel for use with generateText/generateObject.
+   */
+  asLanguageModel(): LanguageModel {
+    return this.model;
+  }
+}
diff --git a/packages/core/src/evaluation/providers/index.ts b/packages/core/src/evaluation/providers/index.ts
index 62cd8eef8..6ec6e2dfa 100644
--- a/packages/core/src/evaluation/providers/index.ts
+++ b/packages/core/src/evaluation/providers/index.ts
@@ -1,3 +1,4 @@
+import { AgentvProvider } from './agentv-provider.js';
 import { AnthropicProvider, AzureProvider, GeminiProvider } from './ai-sdk.js';
 import { ClaudeCliProvider } from './claude-cli.js';
 import { ClaudeSdkProvider } from './claude-sdk.js';
@@ -30,6 +31,7 @@ export type {
 } from './types.js';
 
 export type {
+  AgentVResolvedConfig,
   AnthropicResolvedConfig,
   AzureResolvedConfig,
   ClaudeResolvedConfig,
@@ -95,6 +97,7 @@ export function createBuiltinProviderRegistry(): ProviderRegistry {
     // claude-sdk is the explicit SDK provider (requires @anthropic-ai/claude-agent-sdk)
     .register('claude-sdk', (t) => new ClaudeSdkProvider(t.name, t.config as never))
     .register('mock', (t) => new MockProvider(t.name, t.config as never))
+    .register('agentv', (t) => new AgentvProvider(t.name, t.config as never))
     .register('vscode', (t) => new VSCodeProvider(t.name, t.config as never, 'vscode'))
     .register(
       'vscode-insiders',
diff --git a/packages/core/test/evaluation/providers/agentv-provider.test.ts b/packages/core/test/evaluation/providers/agentv-provider.test.ts
new file mode 100644
index 000000000..b2b3be2c2
--- /dev/null
+++ b/packages/core/test/evaluation/providers/agentv-provider.test.ts
@@ -0,0 +1,132 @@
+import { describe, expect, it, vi } from 'vitest';
+
+// Mock AI SDK provider packages before importing the provider
+vi.mock('@ai-sdk/openai', () => ({
+  createOpenAI: () => {
+    const provider = (modelId: string) => ({
+      modelId,
+      specificationVersion: 'v2',
+      provider: 'openai',
+    });
+    provider.languageModel = (modelId: string) => ({
+      modelId,
+      specificationVersion: 'v2',
+      provider: 'openai',
+    });
+    provider.chatModel = provider.languageModel;
+    provider.textEmbeddingModel = () => ({});
+    return provider;
+  },
+}));
+
+vi.mock('@ai-sdk/anthropic', () => ({
+  createAnthropic: () => {
+    const provider = (modelId: string) => ({
+      modelId,
+      specificationVersion: 'v2',
+      provider: 'anthropic',
+    });
+    provider.languageModel = (modelId: string) => ({
+      modelId,
+      specificationVersion: 'v2',
+      provider: 'anthropic',
+    });
+    provider.chatModel = provider.languageModel;
+    provider.textEmbeddingModel = () => ({});
+    return provider;
+  },
+}));
+
+vi.mock('@ai-sdk/azure', () => ({
+  createAzure: () => {
+    const provider = (modelId: string) => ({
+      modelId,
+      specificationVersion: 'v2',
+      provider: 'azure',
+    });
+    provider.languageModel = (modelId: string) => ({
+      modelId,
+      specificationVersion: 'v2',
+      provider: 'azure',
+    });
+    provider.chatModel = provider.languageModel;
+    provider.textEmbeddingModel = () => ({});
+    return provider;
+  },
+}));
+
+vi.mock('@ai-sdk/google', () => ({
+  createGoogleGenerativeAI: () => {
+    const provider = (modelId: string) => ({
+      modelId,
+      specificationVersion: 'v2',
+      provider: 'google',
+    });
+    provider.languageModel = (modelId: string) => ({
+      modelId,
+      specificationVersion: 'v2',
+      provider: 'google',
+    });
+    provider.chatModel = provider.languageModel;
+    provider.textEmbeddingModel = () => ({});
+    return provider;
+  },
+}));
+
+import { AgentvProvider } from '../../../src/evaluation/providers/agentv-provider.js';
+
+describe('AgentvProvider', () => {
+  it('has kind "agentv"', () => {
+    const provider = new AgentvProvider('test-judge', {
+      model: 'openai:gpt-5-mini',
+      temperature: 0,
+    });
+    expect(provider.kind).toBe('agentv');
+  });
+
+  it('has correct targetName', () => {
+    const provider = new AgentvProvider('my-judge', {
+      model: 'openai:gpt-5-mini',
+      temperature: 0,
+    });
+    expect(provider.targetName).toBe('my-judge');
+  });
+
+  it('has correct id format', () => {
+    const provider = new AgentvProvider('test-judge', {
+      model: 'openai:gpt-5-mini',
+      temperature: 0,
+    });
+    expect(provider.id).toBe('agentv:test-judge');
+  });
+
+  it('asLanguageModel() returns a defined LanguageModel', () => {
+    const provider = new AgentvProvider('test-judge', {
+      model: 'openai:gpt-5-mini',
+      temperature: 0,
+    });
+    const model = provider.asLanguageModel();
+    expect(model).toBeDefined();
+    expect(model.modelId).toBe('gpt-5-mini');
+  });
+
+  it('asLanguageModel() works with anthropic model strings', () => {
+    const provider = new AgentvProvider('test-judge', {
+      model: 'anthropic:claude-sonnet-4-20250514',
+      temperature: 0,
+    });
+    const model = provider.asLanguageModel();
+    expect(model).toBeDefined();
+    expect(model.modelId).toBe('claude-sonnet-4-20250514');
+  });
+
+  it('invoke() throws an error', async () => {
+    const provider = new AgentvProvider('test-judge', {
+      model: 'openai:gpt-5-mini',
+      temperature: 0,
+    });
+    await expect(provider.invoke({ question: 'test' })).rejects.toThrow(
+      'AgentvProvider does not support direct invoke()',
+    );
+  });
+});

From d6dbacdd89781ac842e8d90860dd4a403e30e393 Mon Sep 17 00:00:00 2001
From: Christopher Tso <christso@gmail.com>
Date: Sun, 15 Mar 2026 12:54:54 +0000
Subject: [PATCH 04/13] fix: rewrite agentv provider to use direct SDK calls
 instead of registry

Replace createProviderRegistry with direct createOpenAI/createAnthropic/
createAzure/createGoogleGenerativeAI calls to resolve v2/v3 spec version
type compatibility issues. Parse "provider:model" strings manually via a
switch statement. Simplify test mocks and add coverage for google, azure,
and error cases.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .../evaluation/providers/agentv-provider.ts   |  62 +++++----
 .../providers/agentv-provider.test.ts         | 123 +++++++++---------
 2 files changed, 100 insertions(+), 85 deletions(-)

diff --git a/packages/core/src/evaluation/providers/agentv-provider.ts b/packages/core/src/evaluation/providers/agentv-provider.ts
index 8e5e2c589..c9b1484a2 100644
--- a/packages/core/src/evaluation/providers/agentv-provider.ts
+++ b/packages/core/src/evaluation/providers/agentv-provider.ts
@@ -2,39 +2,57 @@ import { createAnthropic } from '@ai-sdk/anthropic';
 import { createAzure } from '@ai-sdk/azure';
 import { createGoogleGenerativeAI } from '@ai-sdk/google';
 import { createOpenAI } from '@ai-sdk/openai';
-import { type LanguageModel, createProviderRegistry } from 'ai';
+import type { LanguageModel } from 'ai';
 
 import type { AgentVResolvedConfig } from './targets.js';
 import type { Provider, ProviderRequest, ProviderResponse } from './types.js';
 
 /**
- * Lazily-created singleton provider registry for resolving AI SDK model strings.
- * Maps provider prefixes (e.g., "openai", "anthropic") to their AI SDK provider
- * implementations so that model strings like "openai:gpt-5-mini" can be resolved
- * to LanguageModel instances.
+ * Parse a model string like "openai:gpt-5-mini" into provider prefix and model name.
  */
-let _registry: { languageModel: (id: string) => LanguageModel } | null = null;
+function parseModelString(model: string): { provider: string; modelName: string } {
+  const colonIndex = model.indexOf(':');
+  if (colonIndex === -1) {
+    throw new Error(
+      `Invalid model string "${model}". Expected format "provider:model" (e.g., "openai:gpt-5-mini")`,
+    );
+  }
+  return {
+    provider: model.slice(0, colonIndex),
+    modelName: model.slice(colonIndex + 1),
+  };
+}
 
-function getAiSdkRegistry(): { languageModel: (id: string) => LanguageModel } {
-  if (!_registry) {
-    // Cast through unknown: the registry's languageModel signature uses narrowed
-    // literal types, but we need to accept arbitrary model strings at runtime.
-    _registry = createProviderRegistry({
-      openai: createOpenAI(),
-      anthropic: createAnthropic(),
-      azure: createAzure(),
-      google: createGoogleGenerativeAI(),
-    }) as unknown as { languageModel: (id: string) => LanguageModel };
+/**
+ * Create a LanguageModel from a model string using the appropriate AI SDK provider.
+ */
+function createLanguageModel(modelString: string): LanguageModel {
+  const { provider, modelName } = parseModelString(modelString);
+
+  switch (provider) {
+    case 'openai':
+      return createOpenAI()(modelName);
+    case 'anthropic':
+      return createAnthropic()(modelName);
+    case 'azure':
+      return createAzure()(modelName);
+    case 'google':
+      return createGoogleGenerativeAI()(modelName);
+    default:
+      throw new Error(
+        `Unsupported AI SDK provider "${provider}" in model string "${modelString}". ` +
+          'Supported providers: openai, anthropic, azure, google',
+      );
   }
-  return _registry;
 }
 
 /**
  * AgentV built-in provider for LLM judge evaluation.
  *
  * Resolves an AI SDK model string (e.g., "openai:gpt-5-mini", "anthropic:claude-sonnet-4-20250514")
- * to a Vercel AI SDK LanguageModel using createProviderRegistry. This provider is used
- * exclusively for judge evaluation — it does not support direct agent invocation.
+ * to a Vercel AI SDK LanguageModel by parsing the provider prefix and creating the appropriate
+ * AI SDK provider directly. This provider is used exclusively for judge evaluation — it does not
+ * support direct agent invocation.
  *
  * Usage: `--judge-target agentv --model openai:gpt-5-mini`
  */
@@ -44,15 +62,11 @@ export class AgentvProvider implements Provider {
   readonly targetName: string;
 
   private readonly model: LanguageModel;
-  private readonly config: AgentVResolvedConfig;
 
   constructor(targetName: string, config: AgentVResolvedConfig) {
     this.id = `agentv:${targetName}`;
     this.targetName = targetName;
-    this.config = config;
-
-    const registry = getAiSdkRegistry();
-    this.model = registry.languageModel(config.model);
+    this.model = createLanguageModel(config.model);
   }
 
   /**
diff --git a/packages/core/test/evaluation/providers/agentv-provider.test.ts b/packages/core/test/evaluation/providers/agentv-provider.test.ts
index b2b3be2c2..f2c66a523 100644
--- a/packages/core/test/evaluation/providers/agentv-provider.test.ts
+++ b/packages/core/test/evaluation/providers/agentv-provider.test.ts
@@ -1,76 +1,37 @@
 import { describe, expect, it, vi } from 'vitest';
 
-// Mock AI SDK provider packages before importing the provider
+// Mock AI SDK provider packages before importing the provider.
+// Each createXxx() returns a callable factory: createXxx()(modelName) => model stub.
 vi.mock('@ai-sdk/openai', () => ({
-  createOpenAI: () => {
-    const provider = (modelId: string) => ({
-      modelId,
-      specificationVersion: 'v2',
-      provider: 'openai',
-    });
-    provider.languageModel = (modelId: string) => ({
-      modelId,
-      specificationVersion: 'v2',
-      provider: 'openai',
-    });
-    provider.chatModel = provider.languageModel;
-    provider.textEmbeddingModel = () => ({});
-    return provider;
-  },
+  createOpenAI: () => (modelId: string) => ({
+    modelId,
+    specificationVersion: 'v2',
+    provider: 'openai',
+  }),
 }));
 
 vi.mock('@ai-sdk/anthropic', () => ({
-  createAnthropic: () => {
-    const provider = (modelId: string) => ({
-      modelId,
-      specificationVersion: 'v2',
-      provider: 'anthropic',
-    });
-    provider.languageModel = (modelId: string) => ({
-      modelId,
-      specificationVersion: 'v2',
-      provider: 'anthropic',
-    });
-    provider.chatModel = provider.languageModel;
-    provider.textEmbeddingModel = () => ({});
-    return provider;
-  },
+  createAnthropic: () => (modelId: string) => ({
+    modelId,
+    specificationVersion: 'v2',
+    provider: 'anthropic',
+  }),
 }));
 
 vi.mock('@ai-sdk/azure', () => ({
-  createAzure: () => {
-    const provider = (modelId: string) => ({
-      modelId,
-      specificationVersion: 'v2',
-      provider: 'azure',
-    });
-    provider.languageModel = (modelId: string) => ({
-      modelId,
-      specificationVersion: 'v2',
-      provider: 'azure',
-    });
-    provider.chatModel = provider.languageModel;
-    provider.textEmbeddingModel = () => ({});
-    return provider;
-  },
+  createAzure: () => (modelId: string) => ({
+    modelId,
+    specificationVersion: 'v2',
+    provider: 'azure',
+  }),
 }));
 
 vi.mock('@ai-sdk/google', () => ({
-  createGoogleGenerativeAI: () => {
-    const provider = (modelId: string) => ({
-      modelId,
-      specificationVersion: 'v2',
-      provider: 'google',
-    });
-    provider.languageModel = (modelId: string) => ({
-      modelId,
-      specificationVersion: 'v2',
-      provider: 'google',
-    });
-    provider.chatModel = provider.languageModel;
-    provider.textEmbeddingModel = () => ({});
-    return provider;
-  },
+  createGoogleGenerativeAI: () => (modelId: string) => ({
+    modelId,
+    specificationVersion: 'v2',
+    provider: 'google',
+  }),
 }));
 
 import { AgentvProvider } from '../../../src/evaluation/providers/agentv-provider.js';
@@ -120,6 +81,46 @@ describe('AgentvProvider', () => {
     expect(model.modelId).toBe('claude-sonnet-4-20250514');
   });
 
+  it('asLanguageModel() works with google model strings', () => {
+    const provider = new AgentvProvider('test-judge', {
+      model: 'google:gemini-2.5-flash',
+      temperature: 0,
+    });
+    const model = provider.asLanguageModel();
+    expect(model).toBeDefined();
+    expect(model.modelId).toBe('gemini-2.5-flash');
+  });
+
+  it('asLanguageModel() works with azure model strings', () => {
+    const provider = new AgentvProvider('test-judge', {
+      model: 'azure:gpt-4o-deployment',
+      temperature: 0,
+    });
+    const model = provider.asLanguageModel();
+    expect(model).toBeDefined();
+    expect(model.modelId).toBe('gpt-4o-deployment');
+  });
+
+  it('throws for unsupported provider prefix', () => {
+    expect(
+      () =>
+        new AgentvProvider('test-judge', {
+          model: 'unsupported:some-model',
+          temperature: 0,
+        }),
+    ).toThrow('Unsupported AI SDK provider "unsupported"');
+  });
+
+  it('throws for model string without colon separator', () => {
+    expect(
+      () =>
+        new AgentvProvider('test-judge', {
+          model: 'gpt-5-mini',
+          temperature: 0,
+        }),
+    ).toThrow('Invalid model string "gpt-5-mini"');
+  });
+
   it('invoke() throws an error', async () => {
     const provider = new AgentvProvider('test-judge', {
       model: 'openai:gpt-5-mini',

From d58b34cd5eddb2d7c9a6fd10687e6bcfdfdb3337 Mon Sep 17 00:00:00 2001
From: Christopher Tso <christso@gmail.com>
Date: Sun, 15 Mar 2026 12:56:29 +0000
Subject: [PATCH 05/13] fix: cast openai v3 model to LanguageModel, fix test
 assertions

---
 packages/core/src/evaluation/providers/agentv-provider.ts | 4 +++-
 .../test/evaluation/providers/agentv-provider.test.ts     | 8 ++++----
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/packages/core/src/evaluation/providers/agentv-provider.ts b/packages/core/src/evaluation/providers/agentv-provider.ts
index c9b1484a2..06abd9cfe 100644
--- a/packages/core/src/evaluation/providers/agentv-provider.ts
+++ b/packages/core/src/evaluation/providers/agentv-provider.ts
@@ -31,7 +31,9 @@ function createLanguageModel(modelString: string): LanguageModel {
 
   switch (provider) {
     case 'openai':
-      return createOpenAI()(modelName);
+      // Cast: @ai-sdk/openai may return LanguageModelV3 while the rest of the
+      // codebase uses LanguageModelV2. The runtime API is compatible.
+      return createOpenAI()(modelName) as unknown as LanguageModel;
     case 'anthropic':
       return createAnthropic()(modelName);
     case 'azure':
diff --git a/packages/core/test/evaluation/providers/agentv-provider.test.ts b/packages/core/test/evaluation/providers/agentv-provider.test.ts
index f2c66a523..8670f4ec3 100644
--- a/packages/core/test/evaluation/providers/agentv-provider.test.ts
+++ b/packages/core/test/evaluation/providers/agentv-provider.test.ts
@@ -68,7 +68,7 @@ describe('AgentvProvider', () => {
     });
     const model = provider.asLanguageModel();
     expect(model).toBeDefined();
-    expect(model.modelId).toBe('gpt-5-mini');
+    expect((model as any).modelId).toBe('gpt-5-mini');
   });
 
   it('asLanguageModel() works with anthropic model strings', () => {
@@ -78,7 +78,7 @@ describe('AgentvProvider', () => {
     });
     const model = provider.asLanguageModel();
     expect(model).toBeDefined();
-    expect(model.modelId).toBe('claude-sonnet-4-20250514');
+    expect((model as any).modelId).toBe('claude-sonnet-4-20250514');
   });
 
   it('asLanguageModel() works with google model strings', () => {
@@ -88,7 +88,7 @@ describe('AgentvProvider', () => {
     });
     const model = provider.asLanguageModel();
     expect(model).toBeDefined();
-    expect(model.modelId).toBe('gemini-2.5-flash');
+    expect((model as any).modelId).toBe('gemini-2.5-flash');
   });
 
   it('asLanguageModel() works with azure model strings', () => {
@@ -98,7 +98,7 @@ describe('AgentvProvider', () => {
     });
     const model = provider.asLanguageModel();
     expect(model).toBeDefined();
-    expect(model.modelId).toBe('gpt-4o-deployment');
+    expect((model as any).modelId).toBe('gpt-4o-deployment');
   });
 
   it('throws for unsupported provider prefix', () => {

From 4f8f9f0bda18e0a244b917ccb1b4c3a32fd0bca3 Mon Sep 17 00:00:00 2001
From: Christopher Tso <christso@gmail.com>
Date: Sun, 15 Mar 2026 13:14:27 +0000
Subject: [PATCH 06/13] feat: absorb agent-judge into llm-judge with
 auto-detection

Remove agent-judge as a separate evaluator type. LlmJudgeEvaluator now
auto-detects mode based on the resolved judge provider:
- LLM providers (azure, anthropic, gemini): structured JSON mode
- Agent providers (claude-cli, copilot, etc.): delegate mode
- agentv provider: built-in AI SDK agent mode with filesystem tools

Closes #614
---
 .../src/evaluation/evaluators/agent-judge.ts  |  598 ---
 .../core/src/evaluation/evaluators/index.ts   |    3 -
 .../src/evaluation/evaluators/llm-judge.ts    |  674 ++-
 .../evaluation/loaders/evaluator-parser.ts    |   33 +-
 .../evaluation/registry/builtin-evaluators.ts |   55 +-
 packages/core/src/evaluation/types.ts         |   35 +-
 .../evaluation/validation/eval-file.schema.ts |    2 +
 packages/eval/src/assertion.ts                |    2 -
 .../references/eval-schema.json               | 3870 ++++++++++++++---
 9 files changed, 3865 insertions(+), 1407 deletions(-)
 delete mode 100644 packages/core/src/evaluation/evaluators/agent-judge.ts

diff --git a/packages/core/src/evaluation/evaluators/agent-judge.ts b/packages/core/src/evaluation/evaluators/agent-judge.ts
deleted file mode 100644
index 2dc00f769..000000000
--- a/packages/core/src/evaluation/evaluators/agent-judge.ts
+++ /dev/null
@@ -1,598 +0,0 @@
-import fs from 'node:fs/promises';
-import path from 'node:path';
-
-import { generateText, stepCountIs, tool } from 'ai';
-import { z } from 'zod';
-
-import { extractLastAssistantContent } from '../providers/types.js';
-import type { Provider } from '../providers/types.js';
-import { TEMPLATE_VARIABLES } from '../template-variables.js';
-import type { JsonObject, RubricItem } from '../types.js';
-import {
-  buildOutputSchema,
-  buildRubricOutputSchema,
-  calculateRubricScore,
-  freeformEvaluationSchema,
-  rubricEvaluationSchema,
-  substituteVariables,
-} from './llm-judge.js';
-import { clampScore, isNonEmptyString, parseJsonFromText, scoreToVerdict } from './scoring.js';
-import type { EvaluationContext, EvaluationScore, Evaluator } from './types.js';
-
-const DEFAULT_MAX_STEPS = 10;
-const MAX_STEPS_LIMIT = 50;
-const MAX_FILE_SIZE = 50 * 1024; // 50KB
-const MAX_SEARCH_MATCHES = 20;
-
-/**
- * Directories/patterns to skip during file search.
- */
-const SEARCH_SKIP_DIRS = new Set([
-  'node_modules',
-  '.git',
-  '.next',
-  'dist',
-  '__pycache__',
-  '.cache',
-]);
-
-/**
- * Binary file extensions to skip during search.
- */
-const BINARY_EXTENSIONS = new Set([
-  '.png',
-  '.jpg',
-  '.jpeg',
-  '.gif',
-  '.ico',
-  '.svg',
-  '.woff',
-  '.woff2',
-  '.ttf',
-  '.eot',
-  '.mp3',
-  '.mp4',
-  '.wav',
-  '.zip',
-  '.tar',
-  '.gz',
-  '.pdf',
-  '.exe',
-  '.dll',
-  '.so',
-  '.dylib',
-]);
-
-export interface AgentJudgeEvaluatorOptions {
-  readonly resolveJudgeProvider: (ctx: EvaluationContext) => Promise<Provider | undefined>;
-  readonly maxSteps?: number;
-  readonly temperature?: number;
-  readonly evaluatorTemplate?: string;
-  readonly judgeTargetProvider?: Provider;
-}
-
-export class AgentJudgeEvaluator implements Evaluator {
-  readonly kind = 'agent-judge';
-
-  private readonly resolveJudgeProvider: (ctx: EvaluationContext) => Promise<Provider | undefined>;
-  private readonly maxSteps: number;
-  private readonly temperature: number;
-  private readonly evaluatorTemplate?: string;
-  private readonly judgeTargetProvider?: Provider;
-
-  constructor(options: AgentJudgeEvaluatorOptions) {
-    this.resolveJudgeProvider = options.resolveJudgeProvider;
-    this.maxSteps = Math.min(options.maxSteps ?? DEFAULT_MAX_STEPS, MAX_STEPS_LIMIT);
-    this.temperature = options.temperature ?? 0;
-    this.evaluatorTemplate = options.evaluatorTemplate;
-    this.judgeTargetProvider = options.judgeTargetProvider;
-  }
-
-  async evaluate(context: EvaluationContext): Promise<EvaluationScore> {
-    if (this.judgeTargetProvider) {
-      return this.evaluateWithJudgeTarget(context);
-    }
-    return this.evaluateBuiltIn(context);
-  }
-
-  /**
-   * Built-in mode: Uses Vercel AI SDK generateText() with sandboxed filesystem tools.
-   */
-  private async evaluateBuiltIn(context: EvaluationContext): Promise<EvaluationScore> {
-    const judgeProvider = await this.resolveJudgeProvider(context);
-    if (!judgeProvider) {
-      throw new Error('No judge provider available for agent-judge evaluation');
-    }
-
-    const model = judgeProvider.asLanguageModel?.();
-    if (!model) {
-      throw new Error(
-        `Judge provider '${judgeProvider.targetName}' does not support asLanguageModel() — required for built-in agent-judge mode`,
-      );
-    }
-
-    const workspacePath = context.workspacePath;
-    if (!workspacePath) {
-      throw new Error(
-        'agent-judge evaluator requires a workspace_template target (workspacePath is not set)',
-      );
-    }
-
-    const systemPrompt = this.buildSystemPrompt(context);
-    const userPrompt = this.buildUserPrompt(context);
-
-    const config = context.evaluator;
-    const rubrics = config?.type === 'agent-judge' ? config.rubrics : undefined;
-
-    const fsTools = createFilesystemTools(workspacePath);
-
-    const evaluatorRawRequest: JsonObject = {
-      mode: 'built-in',
-      systemPrompt,
-      userPrompt,
-      target: judgeProvider.targetName,
-      maxSteps: this.maxSteps,
-    };
-
-    try {
-      const { text, steps } = await generateText({
-        model,
-        system: systemPrompt,
-        prompt: userPrompt,
-        tools: fsTools,
-        stopWhen: stepCountIs(this.maxSteps),
-        temperature: this.temperature,
-      });
-
-      const toolCallCount = steps.reduce((count, step) => count + (step.toolCalls?.length ?? 0), 0);
-
-      const details: JsonObject = {
-        mode: 'built-in',
-        steps: steps.length,
-        tool_calls: toolCallCount,
-      };
-
-      return this.parseResult(text, rubrics, evaluatorRawRequest, details);
-    } catch (error) {
-      const message = error instanceof Error ? error.message : String(error);
-      return {
-        score: 0,
-        verdict: 'fail',
-        hits: [],
-        misses: [`agent-judge built-in evaluation failed: ${message}`],
-        expectedAspectCount: 1,
-        evaluatorRawRequest,
-        details: { mode: 'built-in', error: message },
-      };
-    }
-  }
-
-  /**
-   * Judge target mode: Delegates to an external agent provider via Provider.invoke().
-   */
-  private async evaluateWithJudgeTarget(context: EvaluationContext): Promise<EvaluationScore> {
-    const provider = this.judgeTargetProvider as Provider;
-
-    const workspacePath = context.workspacePath;
-    const prompt = this.buildDelegatedPrompt(context);
-
-    const evaluatorRawRequest: JsonObject = {
-      mode: 'judge_target',
-      judge_target: provider.targetName,
-      prompt,
-    };
-
-    try {
-      const response = await provider.invoke({
-        question: prompt,
-        cwd: workspacePath,
-        evalCaseId: context.evalCase.id,
-        attempt: context.attempt,
-      });
-
-      const assistantContent = extractLastAssistantContent(response.output);
-      if (!assistantContent) {
-        return {
-          score: 0,
-          verdict: 'fail',
-          hits: [],
-          misses: ['agent-judge judge_target returned no assistant response'],
-          expectedAspectCount: 1,
-          evaluatorRawRequest,
-          details: { mode: 'judge_target', judge_target: provider.targetName },
-        };
-      }
-
-      const config = context.evaluator;
-      const rubrics = config?.type === 'agent-judge' ? config.rubrics : undefined;
-
-      const details: JsonObject = {
-        mode: 'judge_target',
-        judge_target: provider.targetName,
-      };
-
-      return this.parseResult(assistantContent, rubrics, evaluatorRawRequest, details);
-    } catch (error) {
-      const message = error instanceof Error ? error.message : String(error);
-      return {
-        score: 0,
-        verdict: 'fail',
-        hits: [],
-        misses: [`agent-judge judge_target evaluation failed: ${message}`],
-        expectedAspectCount: 1,
-        evaluatorRawRequest,
-        details: {
-          mode: 'judge_target',
-          judge_target: provider.targetName,
-          error: message,
-        },
-      };
-    }
-  }
-
-  /**
-   * Parse the agent's response text into an EvaluationScore.
-   * Supports both freeform and rubric modes.
-   */
-  private parseResult(
-    text: string,
-    rubrics: readonly RubricItem[] | undefined,
-    evaluatorRawRequest: JsonObject,
-    details: JsonObject,
-  ): EvaluationScore {
-    try {
-      const parsed = parseJsonFromText(text);
-
-      if (rubrics && rubrics.length > 0) {
-        const data = rubricEvaluationSchema.parse(parsed);
-        const { score, verdict, hits, misses } = calculateRubricScore(data, rubrics);
-        return {
-          score,
-          verdict,
-          hits,
-          misses,
-          expectedAspectCount: rubrics.length,
-          reasoning: data.overall_reasoning,
-          evaluatorRawRequest,
-          details,
-        };
-      }
-
-      const data = freeformEvaluationSchema.parse(parsed);
-      const score = clampScore(data.score);
-      const hits = Array.isArray(data.hits) ? data.hits.filter(isNonEmptyString).slice(0, 4) : [];
-      const misses = Array.isArray(data.misses)
-        ? data.misses.filter(isNonEmptyString).slice(0, 4)
-        : [];
-
-      return {
-        score,
-        verdict: scoreToVerdict(score),
-        hits,
-        misses,
-        expectedAspectCount: Math.max(hits.length + misses.length, 1),
-        reasoning: data.reasoning,
-        evaluatorRawRequest,
-        details,
-      };
-    } catch {
-      return {
-        score: 0,
-        verdict: 'fail',
-        hits: [],
-        misses: ['Failed to parse agent-judge response as valid evaluation JSON'],
-        expectedAspectCount: 1,
-        evaluatorRawRequest,
-        details,
-      };
-    }
-  }
-
-  /**
-   * Build system prompt for built-in mode.
-   * Includes output format instructions.
-   */
-  private buildSystemPrompt(context: EvaluationContext): string {
-    const config = context.evaluator;
-    const rubrics = config?.type === 'agent-judge' ? config.rubrics : undefined;
-
-    const parts: string[] = [
-      'You are an expert evaluator with access to the workspace filesystem.',
-      'Use the provided tools to investigate the workspace and verify the criteria are met.',
-      'Thoroughly examine relevant files before making your assessment.',
-      '',
-    ];
-
-    if (rubrics && rubrics.length > 0) {
-      parts.push(buildRubricOutputSchema());
-    } else {
-      parts.push(buildOutputSchema());
-    }
-
-    return parts.join('\n');
-  }
-
-  /**
-   * Build user prompt for built-in mode.
-   * Uses custom template if provided, otherwise builds default prompt.
-   */
-  private buildUserPrompt(context: EvaluationContext): string {
-    const formattedQuestion =
-      context.promptInputs.question && context.promptInputs.question.trim().length > 0
-        ? context.promptInputs.question
-        : context.evalCase.question;
-
-    const variables: Record<string, string> = {
-      [TEMPLATE_VARIABLES.ANSWER]: context.candidate.trim(),
-      [TEMPLATE_VARIABLES.REFERENCE_ANSWER]: (context.evalCase.reference_answer ?? '').trim(),
-      [TEMPLATE_VARIABLES.CRITERIA]: context.evalCase.criteria.trim(),
-      [TEMPLATE_VARIABLES.QUESTION]: formattedQuestion.trim(),
-      [TEMPLATE_VARIABLES.FILE_CHANGES]: context.fileChanges ?? '',
-    };
-
-    if (this.evaluatorTemplate) {
-      return substituteVariables(this.evaluatorTemplate, variables);
-    }
-
-    const config = context.evaluator;
-    const rubrics = config?.type === 'agent-judge' ? config.rubrics : undefined;
-
-    const parts: string[] = [
-      'Evaluate the candidate answer by investigating the workspace.',
-      '',
-      '[[ ## question ## ]]',
-      formattedQuestion,
-      '',
-      '[[ ## criteria ## ]]',
-      context.evalCase.criteria,
-      '',
-    ];
-
-    if (context.evalCase.reference_answer && context.evalCase.reference_answer.trim().length > 0) {
-      parts.push('[[ ## reference_answer ## ]]', context.evalCase.reference_answer, '');
-    }
-
-    parts.push('[[ ## answer ## ]]', context.candidate, '');
-
-    if (context.fileChanges) {
-      parts.push('[[ ## file_changes ## ]]', context.fileChanges, '');
-    }
-
-    if (rubrics && rubrics.length > 0) {
-      parts.push('[[ ## rubrics ## ]]');
-      for (const rubric of rubrics) {
-        const requiredLabel = rubric.required ? ' (REQUIRED)' : '';
-        const weightLabel = rubric.weight !== 1.0 ? ` (weight: ${rubric.weight})` : '';
-        parts.push(`- [${rubric.id}]${requiredLabel}${weightLabel}: ${rubric.outcome}`);
-      }
-      parts.push(
-        '',
-        'For each rubric, investigate the workspace to determine if it is satisfied. Provide brief reasoning.',
-      );
-    } else {
-      parts.push(
-        'Investigate the workspace to verify the criteria. Provide a score between 0.0 and 1.0.',
-      );
-    }
-
-    return parts.join('\n');
-  }
-
-  /**
-   * Build the full evaluation prompt for judge target mode (delegation).
-   * Combines task context, criteria, candidate info, and output format instructions.
-   */
-  private buildDelegatedPrompt(context: EvaluationContext): string {
-    const formattedQuestion =
-      context.promptInputs.question && context.promptInputs.question.trim().length > 0
-        ? context.promptInputs.question
-        : context.evalCase.question;
-
-    const config = context.evaluator;
-    const rubrics = config?.type === 'agent-judge' ? config.rubrics : undefined;
-
-    if (this.evaluatorTemplate) {
-      const variables: Record<string, string> = {
-        [TEMPLATE_VARIABLES.ANSWER]: context.candidate.trim(),
-        [TEMPLATE_VARIABLES.REFERENCE_ANSWER]: (context.evalCase.reference_answer ?? '').trim(),
-        [TEMPLATE_VARIABLES.CRITERIA]: context.evalCase.criteria.trim(),
-        [TEMPLATE_VARIABLES.QUESTION]: formattedQuestion.trim(),
-        [TEMPLATE_VARIABLES.FILE_CHANGES]: context.fileChanges ?? '',
-      };
-      const customPrompt = substituteVariables(this.evaluatorTemplate, variables);
-
-      const outputSchema =
-        rubrics && rubrics.length > 0 ? buildRubricOutputSchema() : buildOutputSchema();
-
-      return `${customPrompt}\n\n${outputSchema}`;
-    }
-
-    const parts: string[] = [
-      'You are an expert evaluator. Investigate the workspace to verify the criteria are met.',
-      '',
-      '[[ ## question ## ]]',
-      formattedQuestion,
-      '',
-      '[[ ## criteria ## ]]',
-      context.evalCase.criteria,
-      '',
-    ];
-
-    if (context.evalCase.reference_answer && context.evalCase.reference_answer.trim().length > 0) {
-      parts.push('[[ ## reference_answer ## ]]', context.evalCase.reference_answer, '');
-    }
-
-    parts.push('[[ ## answer ## ]]', context.candidate, '');
-
-    if (context.fileChanges) {
-      parts.push('[[ ## file_changes ## ]]', context.fileChanges, '');
-    }
-
-    if (rubrics && rubrics.length > 0) {
-      parts.push('[[ ## rubrics ## ]]');
-      for (const rubric of rubrics) {
-        const requiredLabel = rubric.required ? ' (REQUIRED)' : '';
-        const weightLabel = rubric.weight !== 1.0 ? ` (weight: ${rubric.weight})` : '';
-        parts.push(`- [${rubric.id}]${requiredLabel}${weightLabel}: ${rubric.outcome}`);
-      }
-      parts.push('');
-      parts.push(buildRubricOutputSchema());
-    } else {
-      parts.push(buildOutputSchema());
-    }
-
-    return parts.join('\n');
-  }
-}
-
-// ---------------------------------------------------------------------------
-// Sandboxed filesystem tools for built-in mode
-// ---------------------------------------------------------------------------
-
-/**
- * Resolve a relative path within the sandbox, preventing path traversal.
- * Returns the absolute path if valid, or throws if the path escapes the sandbox.
- */
-function resolveSandboxed(basePath: string, relativePath: string): string {
-  const resolved = path.resolve(basePath, relativePath);
-  if (!resolved.startsWith(basePath + path.sep) && resolved !== basePath) {
-    throw new Error(`Path '${relativePath}' is outside the workspace`);
-  }
-  return resolved;
-}
-
-/**
- * Create sandboxed filesystem tools for the AI SDK agent loop.
- */
-function createFilesystemTools(workspacePath: string) {
-  return {
-    list_files: tool({
-      description:
-        'List files and directories at a relative path within the workspace. Returns names only (single level, no recursion).',
-      inputSchema: z.object({
-        path: z.string().describe('Relative path within workspace (use "." for root)').default('.'),
-      }),
-      execute: async (input: { path: string }) => {
-        try {
-          const resolved = resolveSandboxed(workspacePath, input.path);
-          const entries = await fs.readdir(resolved, { withFileTypes: true });
-          return entries
-            .map((e) => ({
-              name: e.name,
-              type: e.isDirectory() ? 'directory' : 'file',
-            }))
-            .slice(0, 100);
-        } catch (error) {
-          return { error: error instanceof Error ? error.message : String(error) };
-        }
-      },
-    }),
-
-    read_file: tool({
-      description:
-        'Read the content of a file at a relative path within the workspace. Large files are truncated at 50KB.',
-      inputSchema: z.object({
-        path: z.string().describe('Relative path to file within workspace'),
-      }),
-      execute: async (input: { path: string }) => {
-        try {
-          const resolved = resolveSandboxed(workspacePath, input.path);
-          const stat = await fs.stat(resolved);
-          if (stat.isDirectory()) {
-            return { error: `'${input.path}' is a directory, not a file` };
-          }
-          const buffer = Buffer.alloc(Math.min(stat.size, MAX_FILE_SIZE));
-          const fd = await fs.open(resolved, 'r');
-          try {
-            await fd.read(buffer, 0, buffer.length, 0);
-          } finally {
-            await fd.close();
-          }
-          const content = buffer.toString('utf-8');
-          const truncated = stat.size > MAX_FILE_SIZE;
-          return { content, truncated, size: stat.size };
-        } catch (error) {
-          return { error: error instanceof Error ? error.message : String(error) };
-        }
-      },
-    }),
-
-    search_files: tool({
-      description:
-        'Search for a regex pattern across files in the workspace. Returns up to 20 matches. Skips binary files and node_modules/.git.',
-      inputSchema: z.object({
-        pattern: z.string().describe('Regex pattern to search for'),
-        path: z.string().describe('Relative path to search within (use "." for root)').default('.'),
-      }),
-      execute: async (input: { pattern: string; path: string }) => {
-        try {
-          const resolved = resolveSandboxed(workspacePath, input.path);
-          const regex = new RegExp(input.pattern, 'gi');
-          const matches: Array<{ file: string; line: number; text: string }> = [];
-
-          await searchDirectory(resolved, workspacePath, regex, matches);
-
-          return { matches, total: matches.length };
-        } catch (error) {
-          return { error: error instanceof Error ? error.message : String(error) };
-        }
-      },
-    }),
-  };
-}
-
-/**
- * Recursively search a directory for regex matches.
- */
-async function searchDirectory(
-  dirPath: string,
-  workspacePath: string,
-  regex: RegExp,
-  matches: Array<{ file: string; line: number; text: string }>,
-): Promise<void> {
-  if (matches.length >= MAX_SEARCH_MATCHES) return;
-
-  let entries: import('node:fs').Dirent[];
-  try {
-    entries = await fs.readdir(dirPath, { withFileTypes: true });
-  } catch {
-    return;
-  }
-
-  for (const entry of entries) {
-    if (matches.length >= MAX_SEARCH_MATCHES) return;
-
-    if (SEARCH_SKIP_DIRS.has(entry.name)) continue;
-
-    const fullPath = path.join(dirPath, entry.name);
-
-    if (entry.isDirectory()) {
-      await searchDirectory(fullPath, workspacePath, regex, matches);
-    } else if (entry.isFile()) {
-      const ext = path.extname(entry.name).toLowerCase();
-      if (BINARY_EXTENSIONS.has(ext)) continue;
-
-      try {
-        const stat = await fs.stat(fullPath);
-        if (stat.size > MAX_FILE_SIZE) continue;
-
-        const content = await fs.readFile(fullPath, 'utf-8');
-        const lines = content.split('\n');
-
-        for (let i = 0; i < lines.length; i++) {
-          if (matches.length >= MAX_SEARCH_MATCHES) return;
-          regex.lastIndex = 0;
-          if (regex.test(lines[i])) {
-            matches.push({
-              file: path.relative(workspacePath, fullPath),
-              line: i + 1,
-              text: lines[i].substring(0, 200),
-            });
-          }
-        }
-      } catch {
-        // Skip unreadable files
-      }
-    }
-  }
-}
diff --git a/packages/core/src/evaluation/evaluators/index.ts b/packages/core/src/evaluation/evaluators/index.ts
index 157ff7c99..a64705fbe 100644
--- a/packages/core/src/evaluation/evaluators/index.ts
+++ b/packages/core/src/evaluation/evaluators/index.ts
@@ -52,9 +52,6 @@ export {
 } from './llm-judge.js';
 export type { LlmJudgeEvaluatorOptions } from './llm-judge.js';
 
-export { AgentJudgeEvaluator } from './agent-judge.js';
-export type { AgentJudgeEvaluatorOptions } from './agent-judge.js';
-
 export { SkillTriggerEvaluator } from './skill-trigger.js';
 
 export { assembleLlmJudgePrompt } from './llm-judge-prompt.js';
diff --git a/packages/core/src/evaluation/evaluators/llm-judge.ts b/packages/core/src/evaluation/evaluators/llm-judge.ts
index 46125f3e7..91e6578bc 100644
--- a/packages/core/src/evaluation/evaluators/llm-judge.ts
+++ b/packages/core/src/evaluation/evaluators/llm-judge.ts
@@ -1,14 +1,65 @@
-import { generateText } from 'ai';
+import fs from 'node:fs/promises';
+import path from 'node:path';
+
+import { generateText, stepCountIs, tool } from 'ai';
 import { z } from 'zod';
 
 import type { Provider, ProviderResponse } from '../providers/types.js';
-import { extractLastAssistantContent } from '../providers/types.js';
+import { extractLastAssistantContent, isAgentProvider } from '../providers/types.js';
 import { TEMPLATE_VARIABLES } from '../template-variables.js';
 import type { TokenUsage } from '../trace.js';
 import type { JsonObject, RubricItem } from '../types.js';
 import { clampScore, isNonEmptyString, parseJsonFromText, scoreToVerdict } from './scoring.js';
 import type { EvaluationContext, EvaluationScore, Evaluator } from './types.js';
 
+// ---------------------------------------------------------------------------
+// Constants for built-in agent mode (filesystem tools)
+// ---------------------------------------------------------------------------
+
+const DEFAULT_MAX_STEPS = 10;
+const MAX_STEPS_LIMIT = 50;
+const MAX_FILE_SIZE = 50 * 1024; // 50KB
+const MAX_SEARCH_MATCHES = 20;
+
+/**
+ * Directories/patterns to skip during file search.
+ */
+const SEARCH_SKIP_DIRS = new Set([
+  'node_modules',
+  '.git',
+  '.next',
+  'dist',
+  '__pycache__',
+  '.cache',
+]);
+
+/**
+ * Binary file extensions to skip during search.
+ */
+const BINARY_EXTENSIONS = new Set([
+  '.png',
+  '.jpg',
+  '.jpeg',
+  '.gif',
+  '.ico',
+  '.svg',
+  '.woff',
+  '.woff2',
+  '.ttf',
+  '.eot',
+  '.mp3',
+  '.mp4',
+  '.wav',
+  '.zip',
+  '.tar',
+  '.gz',
+  '.pdf',
+  '.exe',
+  '.dll',
+  '.so',
+  '.dylib',
+]);
+
 /**
  * Default evaluator template for the user prompt (variables will be substituted).
  * Custom evaluators can override this via evaluatorTemplate option.
@@ -38,6 +89,8 @@ export interface LlmJudgeEvaluatorOptions {
   readonly maxOutputTokens?: number;
   readonly temperature?: number;
   readonly evaluatorTemplate?: string;
+  readonly maxSteps?: number;
+  readonly judgeTargetProvider?: Provider;
 }
 
 const freeformEvaluationSchema = z.object({
@@ -82,20 +135,40 @@ export class LlmJudgeEvaluator implements Evaluator {
   private readonly maxOutputTokens?: number;
   private readonly temperature?: number;
   private readonly evaluatorTemplate?: string;
+  private readonly maxSteps: number;
+  private readonly judgeTargetProvider?: Provider;
 
   constructor(options: LlmJudgeEvaluatorOptions) {
     this.resolveJudgeProvider = options.resolveJudgeProvider;
     this.maxOutputTokens = options.maxOutputTokens;
     this.temperature = options.temperature;
     this.evaluatorTemplate = options.evaluatorTemplate;
+    this.maxSteps = Math.min(options.maxSteps ?? DEFAULT_MAX_STEPS, MAX_STEPS_LIMIT);
+    this.judgeTargetProvider = options.judgeTargetProvider;
   }
 
   async evaluate(context: EvaluationContext): Promise<EvaluationScore> {
+    // Delegate mode: judge target provider is an agent provider — send prompt via invoke()
+    if (this.judgeTargetProvider) {
+      return this.evaluateWithJudgeTarget(context);
+    }
+
     const judgeProvider = await this.resolveJudgeProvider(context);
     if (!judgeProvider) {
       throw new Error('No judge provider available for LLM grading');
     }
 
+    // Built-in agent mode: agentv provider → AI SDK generateText with filesystem tools
+    if (judgeProvider.kind === 'agentv') {
+      return this.evaluateBuiltIn(context, judgeProvider);
+    }
+
+    // Delegate mode: resolved provider is an agent provider → send prompt via invoke()
+    if (isAgentProvider(judgeProvider)) {
+      return this.evaluateWithDelegatedAgent(context, judgeProvider);
+    }
+
+    // LLM mode: structured JSON evaluation
     const config = context.evaluator;
     if (config?.type === 'llm-judge' && config.rubrics && config.rubrics.length > 0) {
       return this.evaluateWithRubrics(context, judgeProvider, config.rubrics);
@@ -104,6 +177,10 @@ export class LlmJudgeEvaluator implements Evaluator {
     return this.evaluateFreeform(context, judgeProvider);
   }
 
+  // ---------------------------------------------------------------------------
+  // LLM mode (existing)
+  // ---------------------------------------------------------------------------
+
   private async evaluateFreeform(
     context: EvaluationContext,
     judgeProvider: Provider,
@@ -177,7 +254,7 @@ export class LlmJudgeEvaluator implements Evaluator {
         tokenUsage,
       };
     } catch (e: unknown) {
-      // Judge parse failure → skip (not silent zero).
+      // Judge parse failure -> skip (not silent zero).
       // Signals infrastructure error to downstream consumers, excluded from score averages.
       const message = e instanceof Error ? e.message : String(e);
       const evalName = context.evaluator?.name ?? 'llm-judge';
@@ -314,6 +391,437 @@ export class LlmJudgeEvaluator implements Evaluator {
     }
   }
 
+  // ---------------------------------------------------------------------------
+  // Built-in agent mode (agentv provider — AI SDK generateText with filesystem tools)
+  // ---------------------------------------------------------------------------
+
+  /**
+   * Built-in mode: Uses Vercel AI SDK generateText() with sandboxed filesystem tools.
+   */
+  private async evaluateBuiltIn(
+    context: EvaluationContext,
+    judgeProvider: Provider,
+  ): Promise<EvaluationScore> {
+    const model = judgeProvider.asLanguageModel?.();
+    if (!model) {
+      throw new Error(
+        `Judge provider '${judgeProvider.targetName}' does not support asLanguageModel() — required for built-in agent mode`,
+      );
+    }
+
+    const workspacePath = context.workspacePath;
+    if (!workspacePath) {
+      throw new Error(
+        'llm-judge built-in agent mode requires a workspace_template target (workspacePath is not set)',
+      );
+    }
+
+    const systemPrompt = this.buildAgentSystemPrompt(context);
+    const userPrompt = this.buildAgentUserPrompt(context);
+
+    const config = context.evaluator;
+    const rubrics = config?.type === 'llm-judge' ? config.rubrics : undefined;
+
+    const fsTools = createFilesystemTools(workspacePath);
+
+    const evaluatorRawRequest: JsonObject = {
+      mode: 'built-in',
+      systemPrompt,
+      userPrompt,
+      target: judgeProvider.targetName,
+      maxSteps: this.maxSteps,
+    };
+
+    try {
+      const { text, steps } = await generateText({
+        model,
+        system: systemPrompt,
+        prompt: userPrompt,
+        tools: fsTools,
+        stopWhen: stepCountIs(this.maxSteps),
+        temperature: this.temperature ?? 0,
+      });
+
+      const toolCallCount = steps.reduce((count, step) => count + (step.toolCalls?.length ?? 0), 0);
+
+      const details: JsonObject = {
+        mode: 'built-in',
+        steps: steps.length,
+        tool_calls: toolCallCount,
+      };
+
+      return this.parseAgentResult(text, rubrics, evaluatorRawRequest, details);
+    } catch (error) {
+      const message = error instanceof Error ? error.message : String(error);
+      return {
+        score: 0,
+        verdict: 'fail',
+        hits: [],
+        misses: [`llm-judge built-in evaluation failed: ${message}`],
+        expectedAspectCount: 1,
+        evaluatorRawRequest,
+        details: { mode: 'built-in', error: message },
+      };
+    }
+  }
+
+  // ---------------------------------------------------------------------------
+  // Delegate mode (agent provider — send prompt via Provider.invoke())
+  // ---------------------------------------------------------------------------
+
+  /**
+   * Judge target mode: Delegates to an explicit judgeTargetProvider via Provider.invoke().
+   */
+  private async evaluateWithJudgeTarget(context: EvaluationContext): Promise<EvaluationScore> {
+    const provider = this.judgeTargetProvider as Provider;
+
+    const workspacePath = context.workspacePath;
+    const prompt = this.buildDelegatedPrompt(context);
+
+    const evaluatorRawRequest: JsonObject = {
+      mode: 'judge_target',
+      judge_target: provider.targetName,
+      prompt,
+    };
+
+    try {
+      const response = await provider.invoke({
+        question: prompt,
+        cwd: workspacePath,
+        evalCaseId: context.evalCase.id,
+        attempt: context.attempt,
+      });
+
+      const assistantContent = extractLastAssistantContent(response.output);
+      if (!assistantContent) {
+        return {
+          score: 0,
+          verdict: 'fail',
+          hits: [],
+          misses: ['llm-judge judge_target returned no assistant response'],
+          expectedAspectCount: 1,
+          evaluatorRawRequest,
+          details: { mode: 'judge_target', judge_target: provider.targetName },
+        };
+      }
+
+      const config = context.evaluator;
+      const rubrics = config?.type === 'llm-judge' ? config.rubrics : undefined;
+
+      const details: JsonObject = {
+        mode: 'judge_target',
+        judge_target: provider.targetName,
+      };
+
+      return this.parseAgentResult(assistantContent, rubrics, evaluatorRawRequest, details);
+    } catch (error) {
+      const message = error instanceof Error ? error.message : String(error);
+      return {
+        score: 0,
+        verdict: 'fail',
+        hits: [],
+        misses: [`llm-judge judge_target evaluation failed: ${message}`],
+        expectedAspectCount: 1,
+        evaluatorRawRequest,
+        details: {
+          mode: 'judge_target',
+          judge_target: provider.targetName,
+          error: message,
+        },
+      };
+    }
+  }
+
+  /**
+   * Delegate mode: resolved provider is an agent provider — send prompt via invoke().
+   */
+  private async evaluateWithDelegatedAgent(
+    context: EvaluationContext,
+    judgeProvider: Provider,
+  ): Promise<EvaluationScore> {
+    const workspacePath = context.workspacePath;
+    const prompt = this.buildDelegatedPrompt(context);
+
+    const evaluatorRawRequest: JsonObject = {
+      mode: 'judge_target',
+      judge_target: judgeProvider.targetName,
+      prompt,
+    };
+
+    try {
+      const response = await judgeProvider.invoke({
+        question: prompt,
+        cwd: workspacePath,
+        evalCaseId: context.evalCase.id,
+        attempt: context.attempt,
+      });
+
+      const assistantContent = extractLastAssistantContent(response.output);
+      if (!assistantContent) {
+        return {
+          score: 0,
+          verdict: 'fail',
+          hits: [],
+          misses: ['llm-judge delegate returned no assistant response'],
+          expectedAspectCount: 1,
+          evaluatorRawRequest,
+          details: { mode: 'judge_target', judge_target: judgeProvider.targetName },
+        };
+      }
+
+      const config = context.evaluator;
+      const rubrics = config?.type === 'llm-judge' ? config.rubrics : undefined;
+
+      const details: JsonObject = {
+        mode: 'judge_target',
+        judge_target: judgeProvider.targetName,
+      };
+
+      return this.parseAgentResult(assistantContent, rubrics, evaluatorRawRequest, details);
+    } catch (error) {
+      const message = error instanceof Error ? error.message : String(error);
+      return {
+        score: 0,
+        verdict: 'fail',
+        hits: [],
+        misses: [`llm-judge delegate evaluation failed: ${message}`],
+        expectedAspectCount: 1,
+        evaluatorRawRequest,
+        details: {
+          mode: 'judge_target',
+          judge_target: judgeProvider.targetName,
+          error: message,
+        },
+      };
+    }
+  }
+
+  // ---------------------------------------------------------------------------
+  // Prompt builders for agent modes
+  // ---------------------------------------------------------------------------
+
+  /**
+   * Build system prompt for built-in agent mode.
+   * Includes output format instructions.
+   */
+  private buildAgentSystemPrompt(context: EvaluationContext): string {
+    const config = context.evaluator;
+    const rubrics = config?.type === 'llm-judge' ? config.rubrics : undefined;
+
+    const parts: string[] = [
+      'You are an expert evaluator with access to the workspace filesystem.',
+      'Use the provided tools to investigate the workspace and verify the criteria are met.',
+      'Thoroughly examine relevant files before making your assessment.',
+      '',
+    ];
+
+    if (rubrics && rubrics.length > 0) {
+      parts.push(buildRubricOutputSchema());
+    } else {
+      parts.push(buildOutputSchema());
+    }
+
+    return parts.join('\n');
+  }
+
+  /**
+   * Build user prompt for built-in agent mode.
+   * Uses custom template if provided, otherwise builds default prompt.
+   */
+  private buildAgentUserPrompt(context: EvaluationContext): string {
+    const formattedQuestion =
+      context.promptInputs.question && context.promptInputs.question.trim().length > 0
+        ? context.promptInputs.question
+        : context.evalCase.question;
+
+    const variables: Record<string, string> = {
+      [TEMPLATE_VARIABLES.ANSWER]: context.candidate.trim(),
+      [TEMPLATE_VARIABLES.REFERENCE_ANSWER]: (context.evalCase.reference_answer ?? '').trim(),
+      [TEMPLATE_VARIABLES.CRITERIA]: context.evalCase.criteria.trim(),
+      [TEMPLATE_VARIABLES.QUESTION]: formattedQuestion.trim(),
+      [TEMPLATE_VARIABLES.FILE_CHANGES]: context.fileChanges ?? '',
+    };
+
+    if (this.evaluatorTemplate) {
+      return substituteVariables(this.evaluatorTemplate, variables);
+    }
+
+    const config = context.evaluator;
+    const rubrics = config?.type === 'llm-judge' ? config.rubrics : undefined;
+
+    const parts: string[] = [
+      'Evaluate the candidate answer by investigating the workspace.',
+      '',
+      '[[ ## question ## ]]',
+      formattedQuestion,
+      '',
+      '[[ ## criteria ## ]]',
+      context.evalCase.criteria,
+      '',
+    ];
+
+    if (context.evalCase.reference_answer && context.evalCase.reference_answer.trim().length > 0) {
+      parts.push('[[ ## reference_answer ## ]]', context.evalCase.reference_answer, '');
+    }
+
+    parts.push('[[ ## answer ## ]]', context.candidate, '');
+
+    if (context.fileChanges) {
+      parts.push('[[ ## file_changes ## ]]', context.fileChanges, '');
+    }
+
+    if (rubrics && rubrics.length > 0) {
+      parts.push('[[ ## rubrics ## ]]');
+      for (const rubric of rubrics) {
+        const requiredLabel = rubric.required ? ' (REQUIRED)' : '';
+        const weightLabel = rubric.weight !== 1.0 ? ` (weight: ${rubric.weight})` : '';
+        parts.push(`- [${rubric.id}]${requiredLabel}${weightLabel}: ${rubric.outcome}`);
+      }
+      parts.push(
+        '',
+        'For each rubric, investigate the workspace to determine if it is satisfied. Provide brief reasoning.',
+      );
+    } else {
+      parts.push(
+        'Investigate the workspace to verify the criteria. Provide a score between 0.0 and 1.0.',
+      );
+    }
+
+    return parts.join('\n');
+  }
+
+  /**
+   * Build the full evaluation prompt for delegate mode (agent providers).
+   * Combines task context, criteria, candidate info, and output format instructions.
+   */
+  private buildDelegatedPrompt(context: EvaluationContext): string {
+    const formattedQuestion =
+      context.promptInputs.question && context.promptInputs.question.trim().length > 0
+        ? context.promptInputs.question
+        : context.evalCase.question;
+
+    const config = context.evaluator;
+    const rubrics = config?.type === 'llm-judge' ? config.rubrics : undefined;
+
+    if (this.evaluatorTemplate) {
+      const variables: Record<string, string> = {
+        [TEMPLATE_VARIABLES.ANSWER]: context.candidate.trim(),
+        [TEMPLATE_VARIABLES.REFERENCE_ANSWER]: (context.evalCase.reference_answer ?? '').trim(),
+        [TEMPLATE_VARIABLES.CRITERIA]: context.evalCase.criteria.trim(),
+        [TEMPLATE_VARIABLES.QUESTION]: formattedQuestion.trim(),
+        [TEMPLATE_VARIABLES.FILE_CHANGES]: context.fileChanges ?? '',
+      };
+      const customPrompt = substituteVariables(this.evaluatorTemplate, variables);
+
+      const outputSchema =
+        rubrics && rubrics.length > 0 ? buildRubricOutputSchema() : buildOutputSchema();
+
+      return `${customPrompt}\n\n${outputSchema}`;
+    }
+
+    const parts: string[] = [
+      'You are an expert evaluator. Investigate the workspace to verify the criteria are met.',
+      '',
+      '[[ ## question ## ]]',
+      formattedQuestion,
+      '',
+      '[[ ## criteria ## ]]',
+      context.evalCase.criteria,
+      '',
+    ];
+
+    if (context.evalCase.reference_answer && context.evalCase.reference_answer.trim().length > 0) {
+      parts.push('[[ ## reference_answer ## ]]', context.evalCase.reference_answer, '');
+    }
+
+    parts.push('[[ ## answer ## ]]', context.candidate, '');
+
+    if (context.fileChanges) {
+      parts.push('[[ ## file_changes ## ]]', context.fileChanges, '');
+    }
+
+    if (rubrics && rubrics.length > 0) {
+      parts.push('[[ ## rubrics ## ]]');
+      for (const rubric of rubrics) {
+        const requiredLabel = rubric.required ? ' (REQUIRED)' : '';
+        const weightLabel = rubric.weight !== 1.0 ? ` (weight: ${rubric.weight})` : '';
+        parts.push(`- [${rubric.id}]${requiredLabel}${weightLabel}: ${rubric.outcome}`);
+      }
+      parts.push('');
+      parts.push(buildRubricOutputSchema());
+    } else {
+      parts.push(buildOutputSchema());
+    }
+
+    return parts.join('\n');
+  }
+
+  // ---------------------------------------------------------------------------
+  // Agent result parser (shared by built-in and delegate modes)
+  // ---------------------------------------------------------------------------
+
+  /**
+   * Parse the agent's response text into an EvaluationScore.
+   * Supports both freeform and rubric modes.
+   */
+  private parseAgentResult(
+    text: string,
+    rubrics: readonly RubricItem[] | undefined,
+    evaluatorRawRequest: JsonObject,
+    details: JsonObject,
+  ): EvaluationScore {
+    try {
+      const parsed = parseJsonFromText(text);
+
+      if (rubrics && rubrics.length > 0) {
+        const data = rubricEvaluationSchema.parse(parsed);
+        const { score, verdict, hits, misses } = calculateRubricScore(data, rubrics);
+        return {
+          score,
+          verdict,
+          hits,
+          misses,
+          expectedAspectCount: rubrics.length,
+          reasoning: data.overall_reasoning,
+          evaluatorRawRequest,
+          details,
+        };
+      }
+
+      const data = freeformEvaluationSchema.parse(parsed);
+      const score = clampScore(data.score);
+      const hits = Array.isArray(data.hits) ? data.hits.filter(isNonEmptyString).slice(0, 4) : [];
+      const misses = Array.isArray(data.misses)
+        ? data.misses.filter(isNonEmptyString).slice(0, 4)
+        : [];
+
+      return {
+        score,
+        verdict: scoreToVerdict(score),
+        hits,
+        misses,
+        expectedAspectCount: Math.max(hits.length + misses.length, 1),
+        reasoning: data.reasoning,
+        evaluatorRawRequest,
+        details,
+      };
+    } catch {
+      return {
+        score: 0,
+        verdict: 'fail',
+        hits: [],
+        misses: ['Failed to parse llm-judge agent response as valid evaluation JSON'],
+        expectedAspectCount: 1,
+        evaluatorRawRequest,
+        details,
+      };
+    }
+  }
+
+  // ---------------------------------------------------------------------------
+  // LLM mode prompt builders
+  // ---------------------------------------------------------------------------
+
   /**
    * Build prompt for score-range rubric evaluation.
    */
@@ -421,6 +929,10 @@ export class LlmJudgeEvaluator implements Evaluator {
     return parts.join('\n');
   }
 
+  // ---------------------------------------------------------------------------
+  // LLM mode retry logic
+  // ---------------------------------------------------------------------------
+
   private async runWithRetry<T>(options: {
     readonly context: EvaluationContext;
     readonly judgeProvider: Provider;
@@ -474,6 +986,10 @@ export class LlmJudgeEvaluator implements Evaluator {
   }
 }
 
+// ---------------------------------------------------------------------------
+// Output schema builders (exported for reuse)
+// ---------------------------------------------------------------------------
+
 /**
  * Build the mandatory output schema that all evaluators must follow.
  * This schema is always appended to the evaluator template.
@@ -656,3 +1172,155 @@ function calculateScoreRangeResult(
     },
   };
 }
+
+// ---------------------------------------------------------------------------
+// Sandboxed filesystem tools for built-in agent mode
+// ---------------------------------------------------------------------------
+
+/**
+ * Resolve a relative path within the sandbox, preventing path traversal.
+ * Returns the absolute path if valid, or throws if the path escapes the sandbox.
+ */
+function resolveSandboxed(basePath: string, relativePath: string): string {
+  const resolved = path.resolve(basePath, relativePath);
+  if (!resolved.startsWith(basePath + path.sep) && resolved !== basePath) {
+    throw new Error(`Path '${relativePath}' is outside the workspace`);
+  }
+  return resolved;
+}
+
+/**
+ * Create sandboxed filesystem tools for the AI SDK agent loop.
+ */
+function createFilesystemTools(workspacePath: string) {
+  return {
+    list_files: tool({
+      description:
+        'List files and directories at a relative path within the workspace. Returns names only (single level, no recursion).',
+      inputSchema: z.object({
+        path: z.string().describe('Relative path within workspace (use "." for root)').default('.'),
+      }),
+      execute: async (input: { path: string }) => {
+        try {
+          const resolved = resolveSandboxed(workspacePath, input.path);
+          const entries = await fs.readdir(resolved, { withFileTypes: true });
+          return entries
+            .map((e) => ({
+              name: e.name,
+              type: e.isDirectory() ? 'directory' : 'file',
+            }))
+            .slice(0, 100);
+        } catch (error) {
+          return { error: error instanceof Error ? error.message : String(error) };
+        }
+      },
+    }),
+
+    read_file: tool({
+      description:
+        'Read the content of a file at a relative path within the workspace. Large files are truncated at 50KB.',
+      inputSchema: z.object({
+        path: z.string().describe('Relative path to file within workspace'),
+      }),
+      execute: async (input: { path: string }) => {
+        try {
+          const resolved = resolveSandboxed(workspacePath, input.path);
+          const stat = await fs.stat(resolved);
+          if (stat.isDirectory()) {
+            return { error: `'${input.path}' is a directory, not a file` };
+          }
+          const buffer = Buffer.alloc(Math.min(stat.size, MAX_FILE_SIZE));
+          const fd = await fs.open(resolved, 'r');
+          try {
+            await fd.read(buffer, 0, buffer.length, 0);
+          } finally {
+            await fd.close();
+          }
+          const content = buffer.toString('utf-8');
+          const truncated = stat.size > MAX_FILE_SIZE;
+          return { content, truncated, size: stat.size };
+        } catch (error) {
+          return { error: error instanceof Error ? error.message : String(error) };
+        }
+      },
+    }),
+
+    search_files: tool({
+      description:
+        'Search for a regex pattern across files in the workspace. Returns up to 20 matches. Skips binary files and node_modules/.git.',
+      inputSchema: z.object({
+        pattern: z.string().describe('Regex pattern to search for'),
+        path: z.string().describe('Relative path to search within (use "." for root)').default('.'),
+      }),
+      execute: async (input: { pattern: string; path: string }) => {
+        try {
+          const resolved = resolveSandboxed(workspacePath, input.path);
+          const regex = new RegExp(input.pattern, 'gi');
+          const matches: Array<{ file: string; line: number; text: string }> = [];
+
+          await searchDirectory(resolved, workspacePath, regex, matches);
+
+          return { matches, total: matches.length };
+        } catch (error) {
+          return { error: error instanceof Error ? error.message : String(error) };
+        }
+      },
+    }),
+  };
+}
+
+/**
+ * Recursively search a directory for regex matches.
+ */
+async function searchDirectory(
+  dirPath: string,
+  workspacePath: string,
+  regex: RegExp,
+  matches: Array<{ file: string; line: number; text: string }>,
+): Promise<void> {
+  if (matches.length >= MAX_SEARCH_MATCHES) return;
+
+  let entries: import('node:fs').Dirent[];
+  try {
+    entries = await fs.readdir(dirPath, { withFileTypes: true });
+  } catch {
+    return;
+  }
+
+  for (const entry of entries) {
+    if (matches.length >= MAX_SEARCH_MATCHES) return;
+
+    if (SEARCH_SKIP_DIRS.has(entry.name)) continue;
+
+    const fullPath = path.join(dirPath, entry.name);
+
+    if (entry.isDirectory()) {
+      await searchDirectory(fullPath, workspacePath, regex, matches);
+    } else if (entry.isFile()) {
+      const ext = path.extname(entry.name).toLowerCase();
+      if (BINARY_EXTENSIONS.has(ext)) continue;
+
+      try {
+        const stat = await fs.stat(fullPath);
+        if (stat.size > MAX_FILE_SIZE) continue;
+
+        const content = await fs.readFile(fullPath, 'utf-8');
+        const lines = content.split('\n');
+
+        for (let i = 0; i < lines.length; i++) {
+          if (matches.length >= MAX_SEARCH_MATCHES) return;
+          regex.lastIndex = 0;
+          if (regex.test(lines[i])) {
+            matches.push({
+              file: path.relative(workspacePath, fullPath),
+              line: i + 1,
+              text: lines[i].substring(0, 200),
+            });
+          }
+        }
+      } catch {
+        // Skip unreadable files
+      }
+    }
+  }
+}
diff --git a/packages/core/src/evaluation/loaders/evaluator-parser.ts b/packages/core/src/evaluation/loaders/evaluator-parser.ts
index 2eb72cb92..e931db1c8 100644
--- a/packages/core/src/evaluation/loaders/evaluator-parser.ts
+++ b/packages/core/src/evaluation/loaders/evaluator-parser.ts
@@ -134,7 +134,9 @@ async function parseEvaluatorList(
     const typeValue = typeof rawType === 'string' ? normalizeEvaluatorType(rawType) : rawType;
 
     // Unknown types are treated as custom assertion types (resolved via registry discovery)
-    const isCustomType = typeof typeValue === 'string' && !isEvaluatorKind(typeValue);
+    // 'agent-judge' is a known alias (maps to 'llm-judge'), not a custom type
+    const isCustomType =
+      typeof typeValue === 'string' && !isEvaluatorKind(typeValue) && typeValue !== 'agent-judge';
     if (typeof typeValue !== 'string') {
       logWarning(`Skipping evaluator with invalid type in '${evalId}'`);
       continue;
@@ -852,7 +854,8 @@ async function parseEvaluatorList(
       continue;
     }
 
-    if (typeValue === 'agent-judge') {
+    // Backward compat: agent-judge / agent_judge → llm-judge with agent-specific fields
+    if ((typeValue as string) === 'agent-judge') {
       // Validate max_steps (1-50)
       const rawMaxSteps = rawEvaluator.max_steps ?? rawEvaluator.maxSteps;
       let maxSteps: number | undefined;
@@ -864,7 +867,7 @@ async function parseEvaluatorList(
           rawMaxSteps > 50
         ) {
           logWarning(
-            `Skipping agent-judge evaluator '${name}' in '${evalId}': max_steps must be an integer 1-50`,
+            `Skipping llm-judge evaluator '${name}' in '${evalId}': max_steps must be an integer 1-50`,
           );
           continue;
         }
@@ -877,7 +880,7 @@ async function parseEvaluatorList(
       if (rawTemperature !== undefined) {
         if (typeof rawTemperature !== 'number' || rawTemperature < 0 || rawTemperature > 2) {
           logWarning(
-            `Skipping agent-judge evaluator '${name}' in '${evalId}': temperature must be a number 0-2`,
+            `Skipping llm-judge evaluator '${name}' in '${evalId}': temperature must be a number 0-2`,
           );
           continue;
         }
@@ -910,7 +913,7 @@ async function parseEvaluatorList(
 
       evaluators.push({
         name,
-        type: 'agent-judge',
+        type: 'llm-judge',
         ...(agentPrompt ? { prompt: agentPrompt } : {}),
         ...(agentPromptPath
           ? { promptPath: agentPromptPath, resolvedPromptPath: agentPromptPath }
@@ -1266,6 +1269,9 @@ async function parseEvaluatorList(
       'config',
       'required',
       'negate',
+      'max_steps',
+      'maxSteps',
+      'temperature',
     ]);
     const config: Record<string, JsonValue> = {};
     for (const [key, value] of Object.entries(rawEvaluator)) {
@@ -1284,6 +1290,21 @@ async function parseEvaluatorList(
     const finalConfig =
       promptScriptConfig ?? (Object.keys(mergedConfig).length > 0 ? mergedConfig : undefined);
 
+    // Parse optional max_steps and temperature (used in agent mode)
+    const rawMaxStepsLlm = rawEvaluator.max_steps ?? rawEvaluator.maxSteps;
+    const llmMaxSteps =
+      typeof rawMaxStepsLlm === 'number' &&
+      Number.isInteger(rawMaxStepsLlm) &&
+      rawMaxStepsLlm >= 1 &&
+      rawMaxStepsLlm <= 50
+        ? rawMaxStepsLlm
+        : undefined;
+    const rawTempLlm = rawEvaluator.temperature;
+    const llmTemperature =
+      typeof rawTempLlm === 'number' && rawTempLlm >= 0 && rawTempLlm <= 2
+        ? rawTempLlm
+        : undefined;
+
     evaluators.push({
       name,
       type: 'llm-judge',
@@ -1297,6 +1318,8 @@ async function parseEvaluatorList(
       ...(required !== undefined ? { required } : {}),
       ...(negate !== undefined ? { negate } : {}),
       ...(finalConfig ? { config: finalConfig } : {}),
+      ...(llmMaxSteps !== undefined ? { max_steps: llmMaxSteps } : {}),
+      ...(llmTemperature !== undefined ? { temperature: llmTemperature } : {}),
     });
   }
 
diff --git a/packages/core/src/evaluation/registry/builtin-evaluators.ts b/packages/core/src/evaluation/registry/builtin-evaluators.ts
index dee6b0237..a370c08b4 100644
--- a/packages/core/src/evaluation/registry/builtin-evaluators.ts
+++ b/packages/core/src/evaluation/registry/builtin-evaluators.ts
@@ -6,9 +6,7 @@
  * the EvaluatorRegistry at startup.
  */
 
-import { readFileSync } from 'node:fs';
 import {
-  AgentJudgeEvaluator,
   CodeEvaluator,
   CompositeEvaluator,
   CostEvaluator,
@@ -34,10 +32,10 @@ import {
 } from '../evaluators.js';
 import { InlineAssertEvaluator } from '../evaluators/inline-assert.js';
 import { resolveCustomPrompt } from '../evaluators/prompt-resolution.js';
+import { isAgentProvider } from '../providers/types.js';
 import type { Provider } from '../providers/types.js';
 import type { ToolTrajectoryEvaluatorConfig } from '../trace.js';
 import type {
-  AgentJudgeEvaluatorConfig,
   CodeEvaluatorConfig,
   CompositeEvaluatorConfig,
   ContainsAllEvaluatorConfig,
@@ -74,6 +72,11 @@ export const INLINE_ASSERT_FN = Symbol.for('agentv.inline-assert-fn');
  * Factory for `llm-judge` evaluators.
  * Creates a wrapper that resolves custom prompts at evaluation time and
  * optionally overrides the judge target per evaluator.
+ *
+ * Auto-detects mode based on the resolved judge provider:
+ * - LLM providers (azure, anthropic, gemini): structured JSON mode
+ * - Agent providers (claude-cli, copilot, etc.): delegate mode
+ * - agentv provider: built-in AI SDK agent mode with filesystem tools
  */
 export const llmJudgeFactory: EvaluatorFactoryFn = (config, context) => {
   const c = config as LlmJudgeEvaluatorConfig;
@@ -88,12 +91,18 @@ export const llmJudgeFactory: EvaluatorFactoryFn = (config, context) => {
     if (!judgeTargetProvider) {
       throw new Error(`llm-judge evaluator '${c.name}': target '${c.target}' not found in targets`);
     }
+    // Only pass judgeTargetProvider for agent providers (delegate mode).
+    // LLM providers use the normal resolveJudgeProvider path for structured JSON mode.
+    const isAgent = isAgentProvider(judgeTargetProvider) || judgeTargetProvider.kind === 'agentv';
     evaluator = new LlmJudgeEvaluator({
       resolveJudgeProvider: async (evalContext) => {
         if (judgeTargetProvider) return judgeTargetProvider;
         if (evalContext.judgeProvider) return evalContext.judgeProvider;
         return judgeProvider;
       },
+      maxSteps: c.max_steps,
+      temperature: c.temperature,
+      ...(isAgent ? { judgeTargetProvider } : {}),
     });
   }
 
@@ -198,45 +207,6 @@ export const executionMetricsFactory: EvaluatorFactoryFn = (config) => {
   });
 };
 
-/** Factory for `agent-judge` evaluators. */
-export const agentJudgeFactory: EvaluatorFactoryFn = (config, context) => {
-  const c = config as AgentJudgeEvaluatorConfig;
-  const { judgeProvider, targetResolver } = context;
-
-  let customPrompt: string | undefined;
-  if (c.resolvedPromptPath) {
-    try {
-      customPrompt = readFileSync(c.resolvedPromptPath, 'utf-8');
-    } catch (error) {
-      const message = error instanceof Error ? error.message : String(error);
-      console.warn(`Could not read agent-judge prompt at ${c.resolvedPromptPath}: ${message}`);
-    }
-  } else if (c.prompt) {
-    customPrompt = c.prompt;
-  }
-
-  let judgeTargetProvider: Provider | undefined;
-  if (c.target && targetResolver) {
-    judgeTargetProvider = targetResolver(c.target);
-    if (!judgeTargetProvider) {
-      throw new Error(
-        `agent-judge evaluator '${c.name}': target '${c.target}' not found in targets`,
-      );
-    }
-  }
-
-  return new AgentJudgeEvaluator({
-    resolveJudgeProvider: async (ctx) => {
-      if (ctx.judgeProvider) return ctx.judgeProvider;
-      return judgeProvider;
-    },
-    maxSteps: c.max_steps,
-    temperature: c.temperature,
-    evaluatorTemplate: customPrompt,
-    judgeTargetProvider,
-  });
-};
-
 /** Factory for `skill-trigger` evaluator. */
 export const skillTriggerFactory: EvaluatorFactoryFn = (config) => {
   return new SkillTriggerEvaluator(config as SkillTriggerEvaluatorConfig);
@@ -440,7 +410,6 @@ export function createBuiltinRegistry(): EvaluatorRegistry {
     .register('cost', costFactory)
     .register('token-usage', tokenUsageFactory)
     .register('execution-metrics', executionMetricsFactory)
-    .register('agent-judge', agentJudgeFactory)
     .register('skill-trigger', skillTriggerFactory)
     .register('contains', containsFactory)
     .register('contains-any', containsAnyFactory)
diff --git a/packages/core/src/evaluation/types.ts b/packages/core/src/evaluation/types.ts
index b69c272ab..b174af42f 100644
--- a/packages/core/src/evaluation/types.ts
+++ b/packages/core/src/evaluation/types.ts
@@ -158,7 +158,6 @@ const EVALUATOR_KIND_VALUES = [
   'cost',
   'token-usage',
   'execution-metrics',
-  'agent-judge',
   'skill-trigger',
   'contains',
   'contains-any',
@@ -337,6 +336,10 @@ export type LlmJudgeEvaluatorConfig = {
   readonly target?: string;
   /** Pass-through configuration for custom evaluator prompts (legacy, prefer prompt.config) */
   readonly config?: Record<string, unknown>;
+  /** Maximum agent steps for agentv built-in mode (default 10, max 50). Ignored in LLM mode. */
+  readonly max_steps?: number;
+  /** Temperature override for judge calls */
+  readonly temperature?: number;
 };
 
 /**
@@ -529,35 +532,6 @@ export type ExecutionMetricsEvaluatorConfig = {
   readonly negate?: boolean;
 };
 
-/**
- * Configuration for the agent-judge evaluator.
- * Runs an agentic investigation loop to audit workspaces and verify criteria.
- * Two modes:
- * - Built-in: Uses AI SDK generateText() with sandboxed filesystem tools
- * - Judge target: Delegates to an external agent provider via Provider.invoke()
- */
-export type AgentJudgeEvaluatorConfig = {
-  readonly name: string;
-  readonly type: 'agent-judge';
-  /** Custom evaluation prompt (inline text or file path) */
-  readonly prompt?: string;
-  readonly promptPath?: string;
-  /** Resolved absolute path for prompt file */
-  readonly resolvedPromptPath?: string;
-  /** Rubric items for structured evaluation (reuses llm-judge rubric infra) */
-  readonly rubrics?: readonly RubricItem[];
-  /** Maximum agent steps for built-in mode (default 10, max 50) */
-  readonly max_steps?: number;
-  /** Temperature for built-in mode (default 0) */
-  readonly temperature?: number;
-  /** Target name — delegates agent loop to this provider instead of built-in mode */
-  readonly target?: string;
-  readonly weight?: number;
-  readonly required?: boolean | number;
-  /** When true, inverts the evaluator score (1 - score) and swaps pass/fail verdict */
-  readonly negate?: boolean;
-};
-
 /**
  * Configuration for the contains assertion evaluator.
  * Checks whether the candidate output contains a specified substring.
@@ -766,7 +740,6 @@ export type EvaluatorConfig =
   | CostEvaluatorConfig
   | TokenUsageEvaluatorConfig
   | ExecutionMetricsEvaluatorConfig
-  | AgentJudgeEvaluatorConfig
   | SkillTriggerEvaluatorConfig
   | ContainsEvaluatorConfig
   | ContainsAnyEvaluatorConfig
diff --git a/packages/core/src/evaluation/validation/eval-file.schema.ts b/packages/core/src/evaluation/validation/eval-file.schema.ts
index 690373b43..977b68daa 100644
--- a/packages/core/src/evaluation/validation/eval-file.schema.ts
+++ b/packages/core/src/evaluation/validation/eval-file.schema.ts
@@ -87,6 +87,8 @@ const LlmJudgeSchema = EvaluatorCommonSchema.extend({
   model: z.string().optional(),
   target: z.string().optional(),
   config: z.record(z.unknown()).optional(),
+  max_steps: z.number().int().min(1).max(50).optional(),
+  temperature: z.number().min(0).max(2).optional(),
 });
 
 /** Aggregator configs for composite evaluator */
diff --git a/packages/eval/src/assertion.ts b/packages/eval/src/assertion.ts
index dd28ea304..bb77b4710 100644
--- a/packages/eval/src/assertion.ts
+++ b/packages/eval/src/assertion.ts
@@ -47,7 +47,6 @@ export type AssertionType =
   | 'cost'
   | 'token-usage'
   | 'execution-metrics'
-  | 'agent-judge'
   | 'skill-trigger'
   | 'contains'
   | 'contains-any'
@@ -67,7 +66,6 @@ export type AssertionType =
   | 'field_accuracy'
   | 'token_usage'
   | 'execution_metrics'
-  | 'agent_judge'
   | 'contains_any'
   | 'contains_all'
   | 'icontains_any'
diff --git a/plugins/agentv-dev/skills/agentv-eval-writer/references/eval-schema.json b/plugins/agentv-dev/skills/agentv-eval-writer/references/eval-schema.json
index 9093c7e48..b55528f3c 100644
--- a/plugins/agentv-dev/skills/agentv-eval-writer/references/eval-schema.json
+++ b/plugins/agentv-dev/skills/agentv-eval-writer/references/eval-schema.json
@@ -53,7 +53,12 @@
                 "properties": {
                   "role": {
                     "type": "string",
-                    "enum": ["system", "user", "assistant", "tool"]
+                    "enum": [
+                      "system",
+                      "user",
+                      "assistant",
+                      "tool"
+                    ]
                   },
                   "content": {
                     "anyOf": [
@@ -67,20 +72,29 @@
                           "properties": {
                             "type": {
                               "type": "string",
-                              "enum": ["text", "file"]
+                              "enum": [
+                                "text",
+                                "file"
+                              ]
                             },
                             "value": {
                               "type": "string"
                             }
                           },
-                          "required": ["type", "value"],
+                          "required": [
+                            "type",
+                            "value"
+                          ],
                           "additionalProperties": false
                         }
                       }
                     ]
                   }
                 },
-                "required": ["role", "content"],
+                "required": [
+                  "role",
+                  "content"
+                ],
                 "additionalProperties": false
               }
             }
@@ -115,7 +129,12 @@
                           "properties": {
                             "role": {
                               "type": "string",
-                              "enum": ["system", "user", "assistant", "tool"]
+                              "enum": [
+                                "system",
+                                "user",
+                                "assistant",
+                                "tool"
+                              ]
                             },
                             "content": {
                               "anyOf": [
@@ -129,20 +148,29 @@
                                     "properties": {
                                       "type": {
                                         "type": "string",
-                                        "enum": ["text", "file"]
+                                        "enum": [
+                                          "text",
+                                          "file"
+                                        ]
                                       },
                                       "value": {
                                         "type": "string"
                                       }
                                     },
-                                    "required": ["type", "value"],
+                                    "required": [
+                                      "type",
+                                      "value"
+                                    ],
                                     "additionalProperties": false
                                   }
                                 }
                               ]
                             }
                           },
-                          "required": ["role", "content"],
+                          "required": [
+                            "role",
+                            "content"
+                          ],
                           "additionalProperties": false
                         }
                       }
@@ -164,7 +192,12 @@
                           "properties": {
                             "role": {
                               "type": "string",
-                              "enum": ["system", "user", "assistant", "tool"]
+                              "enum": [
+                                "system",
+                                "user",
+                                "assistant",
+                                "tool"
+                              ]
                             },
                             "content": {
                               "anyOf": [
@@ -178,20 +211,29 @@
                                     "properties": {
                                       "type": {
                                         "type": "string",
-                                        "enum": ["text", "file"]
+                                        "enum": [
+                                          "text",
+                                          "file"
+                                        ]
                                       },
                                       "value": {
                                         "type": "string"
                                       }
                                     },
-                                    "required": ["type", "value"],
+                                    "required": [
+                                      "type",
+                                      "value"
+                                    ],
                                     "additionalProperties": false
                                   }
                                 }
                               ]
                             }
                           },
-                          "required": ["role", "content"],
+                          "required": [
+                            "role",
+                            "content"
+                          ],
                           "additionalProperties": false
                         }
                       }
@@ -228,7 +270,10 @@
                             },
                             "type": {
                               "type": "string",
-                              "enum": ["code-judge", "code_judge"]
+                              "enum": [
+                                "code-judge",
+                                "code_judge"
+                              ]
                             },
                             "command": {
                               "anyOf": [
@@ -280,7 +325,10 @@
                               "additionalProperties": {}
                             }
                           },
-                          "required": ["type", "command"],
+                          "required": [
+                            "type",
+                            "command"
+                          ],
                           "additionalProperties": false
                         },
                         {
@@ -310,7 +358,10 @@
                             },
                             "type": {
                               "type": "string",
-                              "enum": ["llm-judge", "llm_judge"]
+                              "enum": [
+                                "llm-judge",
+                                "llm_judge"
+                              ]
                             },
                             "prompt": {
                               "anyOf": [
@@ -404,7 +455,10 @@
                                           "minLength": 1
                                         }
                                       },
-                                      "required": ["score_range", "outcome"],
+                                      "required": [
+                                        "score_range",
+                                        "outcome"
+                                      ],
                                       "additionalProperties": false
                                     }
                                   }
@@ -421,9 +475,21 @@
                             "config": {
                               "type": "object",
                               "additionalProperties": {}
+                            },
+                            "max_steps": {
+                              "type": "integer",
+                              "minimum": 1,
+                              "maximum": 50
+                            },
+                            "temperature": {
+                              "type": "number",
+                              "minimum": 0,
+                              "maximum": 2
                             }
                           },
-                          "required": ["type"],
+                          "required": [
+                            "type"
+                          ],
                           "additionalProperties": false
                         },
                         {
@@ -483,7 +549,9 @@
                                       }
                                     }
                                   },
-                                  "required": ["type"],
+                                  "required": [
+                                    "type"
+                                  ],
                                   "additionalProperties": false
                                 },
                                 {
@@ -499,7 +567,10 @@
                                       "maximum": 1
                                     }
                                   },
-                                  "required": ["type", "threshold"],
+                                  "required": [
+                                    "type",
+                                    "threshold"
+                                  ],
                                   "additionalProperties": false
                                 },
                                 {
@@ -516,7 +587,10 @@
                                       "type": "string"
                                     }
                                   },
-                                  "required": ["type", "path"],
+                                  "required": [
+                                    "type",
+                                    "path"
+                                  ],
                                   "additionalProperties": false
                                 },
                                 {
@@ -533,13 +607,18 @@
                                       "type": "string"
                                     }
                                   },
-                                  "required": ["type"],
+                                  "required": [
+                                    "type"
+                                  ],
                                   "additionalProperties": false
                                 }
                               ]
                             }
                           },
-                          "required": ["type", "aggregator"],
+                          "required": [
+                            "type",
+                            "aggregator"
+                          ],
                           "additionalProperties": false
                         },
                         {
@@ -569,11 +648,20 @@
                             },
                             "type": {
                               "type": "string",
-                              "enum": ["tool-trajectory", "tool_trajectory"]
+                              "enum": [
+                                "tool-trajectory",
+                                "tool_trajectory"
+                              ]
                             },
                             "mode": {
                               "type": "string",
-                              "enum": ["any_order", "in_order", "exact", "subset", "superset"]
+                              "enum": [
+                                "any_order",
+                                "in_order",
+                                "exact",
+                                "subset",
+                                "superset"
+                              ]
                             },
                             "minimums": {
                               "type": "object",
@@ -614,7 +702,12 @@
                                     "anyOf": [
                                       {
                                         "type": "string",
-                                        "enum": ["exact", "ignore", "subset", "superset"]
+                                        "enum": [
+                                          "exact",
+                                          "ignore",
+                                          "subset",
+                                          "superset"
+                                        ]
                                       },
                                       {
                                         "type": "array",
@@ -628,7 +721,12 @@
                                     "anyOf": [
                                       {
                                         "type": "string",
-                                        "enum": ["exact", "ignore", "subset", "superset"]
+                                        "enum": [
+                                          "exact",
+                                          "ignore",
+                                          "subset",
+                                          "superset"
+                                        ]
                                       },
                                       {
                                         "type": "array",
@@ -639,7 +737,9 @@
                                     ]
                                   }
                                 },
-                                "required": ["tool"],
+                                "required": [
+                                  "tool"
+                                ],
                                 "additionalProperties": false
                               }
                             },
@@ -647,7 +747,12 @@
                               "anyOf": [
                                 {
                                   "type": "string",
-                                  "enum": ["exact", "ignore", "subset", "superset"]
+                                  "enum": [
+                                    "exact",
+                                    "ignore",
+                                    "subset",
+                                    "superset"
+                                  ]
                                 },
                                 {
                                   "type": "array",
@@ -661,7 +766,12 @@
                               "anyOf": [
                                 {
                                   "type": "string",
-                                  "enum": ["exact", "ignore", "subset", "superset"]
+                                  "enum": [
+                                    "exact",
+                                    "ignore",
+                                    "subset",
+                                    "superset"
+                                  ]
                                 },
                                 {
                                   "type": "array",
@@ -672,7 +782,10 @@
                               ]
                             }
                           },
-                          "required": ["type", "mode"],
+                          "required": [
+                            "type",
+                            "mode"
+                          ],
                           "additionalProperties": false
                         },
                         {
@@ -702,7 +815,10 @@
                             },
                             "type": {
                               "type": "string",
-                              "enum": ["field-accuracy", "field_accuracy"]
+                              "enum": [
+                                "field-accuracy",
+                                "field_accuracy"
+                              ]
                             },
                             "fields": {
                               "type": "array",
@@ -714,7 +830,11 @@
                                   },
                                   "match": {
                                     "type": "string",
-                                    "enum": ["exact", "numeric_tolerance", "date"]
+                                    "enum": [
+                                      "exact",
+                                      "numeric_tolerance",
+                                      "date"
+                                    ]
                                   },
                                   "required": {
                                     "type": "boolean"
@@ -736,17 +856,26 @@
                                     }
                                   }
                                 },
-                                "required": ["path", "match"],
+                                "required": [
+                                  "path",
+                                  "match"
+                                ],
                                 "additionalProperties": false
                               },
                               "minItems": 1
                             },
                             "aggregation": {
                               "type": "string",
-                              "enum": ["weighted_average", "all_or_nothing"]
+                              "enum": [
+                                "weighted_average",
+                                "all_or_nothing"
+                              ]
                             }
                           },
-                          "required": ["type", "fields"],
+                          "required": [
+                            "type",
+                            "fields"
+                          ],
                           "additionalProperties": false
                         },
                         {
@@ -783,7 +912,10 @@
                               "minimum": 0
                             }
                           },
-                          "required": ["type", "threshold"],
+                          "required": [
+                            "type",
+                            "threshold"
+                          ],
                           "additionalProperties": false
                         },
                         {
@@ -820,7 +952,10 @@
                               "minimum": 0
                             }
                           },
-                          "required": ["type", "budget"],
+                          "required": [
+                            "type",
+                            "budget"
+                          ],
                           "additionalProperties": false
                         },
                         {
@@ -850,7 +985,10 @@
                             },
                             "type": {
                               "type": "string",
-                              "enum": ["token-usage", "token_usage"]
+                              "enum": [
+                                "token-usage",
+                                "token_usage"
+                              ]
                             },
                             "max_total": {
                               "type": "number",
@@ -865,7 +1003,9 @@
                               "minimum": 0
                             }
                           },
-                          "required": ["type"],
+                          "required": [
+                            "type"
+                          ],
                           "additionalProperties": false
                         },
                         {
@@ -895,7 +1035,10 @@
                             },
                             "type": {
                               "type": "string",
-                              "enum": ["execution-metrics", "execution_metrics"]
+                              "enum": [
+                                "execution-metrics",
+                                "execution_metrics"
+                              ]
                             },
                             "max_tool_calls": {
                               "type": "number",
@@ -927,7 +1070,9 @@
                               "minimum": 0
                             }
                           },
-                          "required": ["type"],
+                          "required": [
+                            "type"
+                          ],
                           "additionalProperties": false
                         },
                         {
@@ -957,7 +1102,10 @@
                             },
                             "type": {
                               "type": "string",
-                              "enum": ["agent-judge", "agent_judge"]
+                              "enum": [
+                                "agent-judge",
+                                "agent_judge"
+                              ]
                             },
                             "prompt": {
                               "type": "string"
@@ -1011,7 +1159,10 @@
                                           "minLength": 1
                                         }
                                       },
-                                      "required": ["score_range", "outcome"],
+                                      "required": [
+                                        "score_range",
+                                        "outcome"
+                                      ],
                                       "additionalProperties": false
                                     }
                                   }
@@ -1033,7 +1184,9 @@
                               "type": "string"
                             }
                           },
-                          "required": ["type"],
+                          "required": [
+                            "type"
+                          ],
                           "additionalProperties": false
                         },
                         {
@@ -1069,7 +1222,10 @@
                               "type": "string"
                             }
                           },
-                          "required": ["type", "value"],
+                          "required": [
+                            "type",
+                            "value"
+                          ],
                           "additionalProperties": false
                         },
                         {
@@ -1105,7 +1261,10 @@
                               "type": "string"
                             }
                           },
-                          "required": ["type", "value"],
+                          "required": [
+                            "type",
+                            "value"
+                          ],
                           "additionalProperties": false
                         },
                         {
@@ -1135,10 +1294,15 @@
                             },
                             "type": {
                               "type": "string",
-                              "enum": ["is-json", "is_json"]
+                              "enum": [
+                                "is-json",
+                                "is_json"
+                              ]
                             }
                           },
-                          "required": ["type"],
+                          "required": [
+                            "type"
+                          ],
                           "additionalProperties": false
                         },
                         {
@@ -1174,7 +1338,10 @@
                               "type": "string"
                             }
                           },
-                          "required": ["type", "value"],
+                          "required": [
+                            "type",
+                            "value"
+                          ],
                           "additionalProperties": false
                         },
                         {
@@ -1255,7 +1422,10 @@
                                           "minLength": 1
                                         }
                                       },
-                                      "required": ["score_range", "outcome"],
+                                      "required": [
+                                        "score_range",
+                                        "outcome"
+                                      ],
                                       "additionalProperties": false
                                     }
                                   }
@@ -1265,7 +1435,10 @@
                               "minItems": 1
                             }
                           },
-                          "required": ["type", "criteria"],
+                          "required": [
+                            "type",
+                            "criteria"
+                          ],
                           "additionalProperties": false
                         }
                       ]
@@ -1302,7 +1475,10 @@
                             },
                             "type": {
                               "type": "string",
-                              "enum": ["code-judge", "code_judge"]
+                              "enum": [
+                                "code-judge",
+                                "code_judge"
+                              ]
                             },
                             "command": {
                               "anyOf": [
@@ -1354,7 +1530,10 @@
                               "additionalProperties": {}
                             }
                           },
-                          "required": ["type", "command"],
+                          "required": [
+                            "type",
+                            "command"
+                          ],
                           "additionalProperties": false
                         },
                         {
@@ -1384,7 +1563,10 @@
                             },
                             "type": {
                               "type": "string",
-                              "enum": ["llm-judge", "llm_judge"]
+                              "enum": [
+                                "llm-judge",
+                                "llm_judge"
+                              ]
                             },
                             "prompt": {
                               "anyOf": [
@@ -1478,7 +1660,10 @@
                                           "minLength": 1
                                         }
                                       },
-                                      "required": ["score_range", "outcome"],
+                                      "required": [
+                                        "score_range",
+                                        "outcome"
+                                      ],
                                       "additionalProperties": false
                                     }
                                   }
@@ -1495,9 +1680,21 @@
                             "config": {
                               "type": "object",
                               "additionalProperties": {}
+                            },
+                            "max_steps": {
+                              "type": "integer",
+                              "minimum": 1,
+                              "maximum": 50
+                            },
+                            "temperature": {
+                              "type": "number",
+                              "minimum": 0,
+                              "maximum": 2
                             }
                           },
-                          "required": ["type"],
+                          "required": [
+                            "type"
+                          ],
                           "additionalProperties": false
                         },
                         {
@@ -1557,7 +1754,9 @@
                                       }
                                     }
                                   },
-                                  "required": ["type"],
+                                  "required": [
+                                    "type"
+                                  ],
                                   "additionalProperties": false
                                 },
                                 {
@@ -1573,7 +1772,10 @@
                                       "maximum": 1
                                     }
                                   },
-                                  "required": ["type", "threshold"],
+                                  "required": [
+                                    "type",
+                                    "threshold"
+                                  ],
                                   "additionalProperties": false
                                 },
                                 {
@@ -1590,7 +1792,10 @@
                                       "type": "string"
                                     }
                                   },
-                                  "required": ["type", "path"],
+                                  "required": [
+                                    "type",
+                                    "path"
+                                  ],
                                   "additionalProperties": false
                                 },
                                 {
@@ -1607,13 +1812,18 @@
                                       "type": "string"
                                     }
                                   },
-                                  "required": ["type"],
+                                  "required": [
+                                    "type"
+                                  ],
                                   "additionalProperties": false
                                 }
                               ]
                             }
                           },
-                          "required": ["type", "aggregator"],
+                          "required": [
+                            "type",
+                            "aggregator"
+                          ],
                           "additionalProperties": false
                         },
                         {
@@ -1643,11 +1853,20 @@
                             },
                             "type": {
                               "type": "string",
-                              "enum": ["tool-trajectory", "tool_trajectory"]
+                              "enum": [
+                                "tool-trajectory",
+                                "tool_trajectory"
+                              ]
                             },
                             "mode": {
                               "type": "string",
-                              "enum": ["any_order", "in_order", "exact", "subset", "superset"]
+                              "enum": [
+                                "any_order",
+                                "in_order",
+                                "exact",
+                                "subset",
+                                "superset"
+                              ]
                             },
                             "minimums": {
                               "type": "object",
@@ -1688,7 +1907,12 @@
                                     "anyOf": [
                                       {
                                         "type": "string",
-                                        "enum": ["exact", "ignore", "subset", "superset"]
+                                        "enum": [
+                                          "exact",
+                                          "ignore",
+                                          "subset",
+                                          "superset"
+                                        ]
                                       },
                                       {
                                         "type": "array",
@@ -1702,7 +1926,12 @@
                                     "anyOf": [
                                       {
                                         "type": "string",
-                                        "enum": ["exact", "ignore", "subset", "superset"]
+                                        "enum": [
+                                          "exact",
+                                          "ignore",
+                                          "subset",
+                                          "superset"
+                                        ]
                                       },
                                       {
                                         "type": "array",
@@ -1713,7 +1942,9 @@
                                     ]
                                   }
                                 },
-                                "required": ["tool"],
+                                "required": [
+                                  "tool"
+                                ],
                                 "additionalProperties": false
                               }
                             },
@@ -1721,7 +1952,12 @@
                               "anyOf": [
                                 {
                                   "type": "string",
-                                  "enum": ["exact", "ignore", "subset", "superset"]
+                                  "enum": [
+                                    "exact",
+                                    "ignore",
+                                    "subset",
+                                    "superset"
+                                  ]
                                 },
                                 {
                                   "type": "array",
@@ -1735,7 +1971,12 @@
                               "anyOf": [
                                 {
                                   "type": "string",
-                                  "enum": ["exact", "ignore", "subset", "superset"]
+                                  "enum": [
+                                    "exact",
+                                    "ignore",
+                                    "subset",
+                                    "superset"
+                                  ]
                                 },
                                 {
                                   "type": "array",
@@ -1746,7 +1987,10 @@
                               ]
                             }
                           },
-                          "required": ["type", "mode"],
+                          "required": [
+                            "type",
+                            "mode"
+                          ],
                           "additionalProperties": false
                         },
                         {
@@ -1776,7 +2020,10 @@
                             },
                             "type": {
                               "type": "string",
-                              "enum": ["field-accuracy", "field_accuracy"]
+                              "enum": [
+                                "field-accuracy",
+                                "field_accuracy"
+                              ]
                             },
                             "fields": {
                               "type": "array",
@@ -1788,7 +2035,11 @@
                                   },
                                   "match": {
                                     "type": "string",
-                                    "enum": ["exact", "numeric_tolerance", "date"]
+                                    "enum": [
+                                      "exact",
+                                      "numeric_tolerance",
+                                      "date"
+                                    ]
                                   },
                                   "required": {
                                     "type": "boolean"
@@ -1810,17 +2061,26 @@
                                     }
                                   }
                                 },
-                                "required": ["path", "match"],
+                                "required": [
+                                  "path",
+                                  "match"
+                                ],
                                 "additionalProperties": false
                               },
                               "minItems": 1
                             },
                             "aggregation": {
                               "type": "string",
-                              "enum": ["weighted_average", "all_or_nothing"]
+                              "enum": [
+                                "weighted_average",
+                                "all_or_nothing"
+                              ]
                             }
                           },
-                          "required": ["type", "fields"],
+                          "required": [
+                            "type",
+                            "fields"
+                          ],
                           "additionalProperties": false
                         },
                         {
@@ -1857,7 +2117,10 @@
                               "minimum": 0
                             }
                           },
-                          "required": ["type", "threshold"],
+                          "required": [
+                            "type",
+                            "threshold"
+                          ],
                           "additionalProperties": false
                         },
                         {
@@ -1894,7 +2157,10 @@
                               "minimum": 0
                             }
                           },
-                          "required": ["type", "budget"],
+                          "required": [
+                            "type",
+                            "budget"
+                          ],
                           "additionalProperties": false
                         },
                         {
@@ -1924,7 +2190,10 @@
                             },
                             "type": {
                               "type": "string",
-                              "enum": ["token-usage", "token_usage"]
+                              "enum": [
+                                "token-usage",
+                                "token_usage"
+                              ]
                             },
                             "max_total": {
                               "type": "number",
@@ -1939,7 +2208,9 @@
                               "minimum": 0
                             }
                           },
-                          "required": ["type"],
+                          "required": [
+                            "type"
+                          ],
                           "additionalProperties": false
                         },
                         {
@@ -1969,7 +2240,10 @@
                             },
                             "type": {
                               "type": "string",
-                              "enum": ["execution-metrics", "execution_metrics"]
+                              "enum": [
+                                "execution-metrics",
+                                "execution_metrics"
+                              ]
                             },
                             "max_tool_calls": {
                               "type": "number",
@@ -2001,7 +2275,9 @@
                               "minimum": 0
                             }
                           },
-                          "required": ["type"],
+                          "required": [
+                            "type"
+                          ],
                           "additionalProperties": false
                         },
                         {
@@ -2031,7 +2307,10 @@
                             },
                             "type": {
                               "type": "string",
-                              "enum": ["agent-judge", "agent_judge"]
+                              "enum": [
+                                "agent-judge",
+                                "agent_judge"
+                              ]
                             },
                             "prompt": {
                               "type": "string"
@@ -2085,7 +2364,10 @@
                                           "minLength": 1
                                         }
                                       },
-                                      "required": ["score_range", "outcome"],
+                                      "required": [
+                                        "score_range",
+                                        "outcome"
+                                      ],
                                       "additionalProperties": false
                                     }
                                   }
@@ -2107,7 +2389,9 @@
                               "type": "string"
                             }
                           },
-                          "required": ["type"],
+                          "required": [
+                            "type"
+                          ],
                           "additionalProperties": false
                         },
                         {
@@ -2143,7 +2427,10 @@
                               "type": "string"
                             }
                           },
-                          "required": ["type", "value"],
+                          "required": [
+                            "type",
+                            "value"
+                          ],
                           "additionalProperties": false
                         },
                         {
@@ -2179,7 +2466,10 @@
                               "type": "string"
                             }
                           },
-                          "required": ["type", "value"],
+                          "required": [
+                            "type",
+                            "value"
+                          ],
                           "additionalProperties": false
                         },
                         {
@@ -2209,10 +2499,15 @@
                             },
                             "type": {
                               "type": "string",
-                              "enum": ["is-json", "is_json"]
+                              "enum": [
+                                "is-json",
+                                "is_json"
+                              ]
                             }
                           },
-                          "required": ["type"],
+                          "required": [
+                            "type"
+                          ],
                           "additionalProperties": false
                         },
                         {
@@ -2248,7 +2543,10 @@
                               "type": "string"
                             }
                           },
-                          "required": ["type", "value"],
+                          "required": [
+                            "type",
+                            "value"
+                          ],
                           "additionalProperties": false
                         },
                         {
@@ -2329,7 +2627,10 @@
                                           "minLength": 1
                                         }
                                       },
-                                      "required": ["score_range", "outcome"],
+                                      "required": [
+                                        "score_range",
+                                        "outcome"
+                                      ],
                                       "additionalProperties": false
                                     }
                                   }
@@ -2339,7 +2640,10 @@
                               "minItems": 1
                             }
                           },
-                          "required": ["type", "criteria"],
+                          "required": [
+                            "type",
+                            "criteria"
+                          ],
                           "additionalProperties": false
                         }
                       ]
@@ -2376,7 +2680,10 @@
                             },
                             "type": {
                               "type": "string",
-                              "enum": ["code-judge", "code_judge"]
+                              "enum": [
+                                "code-judge",
+                                "code_judge"
+                              ]
                             },
                             "command": {
                               "anyOf": [
@@ -2428,7 +2735,10 @@
                               "additionalProperties": {}
                             }
                           },
-                          "required": ["type", "command"],
+                          "required": [
+                            "type",
+                            "command"
+                          ],
                           "additionalProperties": false
                         },
                         {
@@ -2458,7 +2768,10 @@
                             },
                             "type": {
                               "type": "string",
-                              "enum": ["llm-judge", "llm_judge"]
+                              "enum": [
+                                "llm-judge",
+                                "llm_judge"
+                              ]
                             },
                             "prompt": {
                               "anyOf": [
@@ -2552,7 +2865,10 @@
                                           "minLength": 1
                                         }
                                       },
-                                      "required": ["score_range", "outcome"],
+                                      "required": [
+                                        "score_range",
+                                        "outcome"
+                                      ],
                                       "additionalProperties": false
                                     }
                                   }
@@ -2569,9 +2885,21 @@
                             "config": {
                               "type": "object",
                               "additionalProperties": {}
+                            },
+                            "max_steps": {
+                              "type": "integer",
+                              "minimum": 1,
+                              "maximum": 50
+                            },
+                            "temperature": {
+                              "type": "number",
+                              "minimum": 0,
+                              "maximum": 2
                             }
                           },
-                          "required": ["type"],
+                          "required": [
+                            "type"
+                          ],
                           "additionalProperties": false
                         },
                         {
@@ -2631,7 +2959,9 @@
                                       }
                                     }
                                   },
-                                  "required": ["type"],
+                                  "required": [
+                                    "type"
+                                  ],
                                   "additionalProperties": false
                                 },
                                 {
@@ -2647,7 +2977,10 @@
                                       "maximum": 1
                                     }
                                   },
-                                  "required": ["type", "threshold"],
+                                  "required": [
+                                    "type",
+                                    "threshold"
+                                  ],
                                   "additionalProperties": false
                                 },
                                 {
@@ -2664,7 +2997,10 @@
                                       "type": "string"
                                     }
                                   },
-                                  "required": ["type", "path"],
+                                  "required": [
+                                    "type",
+                                    "path"
+                                  ],
                                   "additionalProperties": false
                                 },
                                 {
@@ -2681,13 +3017,18 @@
                                       "type": "string"
                                     }
                                   },
-                                  "required": ["type"],
+                                  "required": [
+                                    "type"
+                                  ],
                                   "additionalProperties": false
                                 }
                               ]
                             }
                           },
-                          "required": ["type", "aggregator"],
+                          "required": [
+                            "type",
+                            "aggregator"
+                          ],
                           "additionalProperties": false
                         },
                         {
@@ -2717,11 +3058,20 @@
                             },
                             "type": {
                               "type": "string",
-                              "enum": ["tool-trajectory", "tool_trajectory"]
+                              "enum": [
+                                "tool-trajectory",
+                                "tool_trajectory"
+                              ]
                             },
                             "mode": {
                               "type": "string",
-                              "enum": ["any_order", "in_order", "exact", "subset", "superset"]
+                              "enum": [
+                                "any_order",
+                                "in_order",
+                                "exact",
+                                "subset",
+                                "superset"
+                              ]
                             },
                             "minimums": {
                               "type": "object",
@@ -2762,7 +3112,12 @@
                                     "anyOf": [
                                       {
                                         "type": "string",
-                                        "enum": ["exact", "ignore", "subset", "superset"]
+                                        "enum": [
+                                          "exact",
+                                          "ignore",
+                                          "subset",
+                                          "superset"
+                                        ]
                                       },
                                       {
                                         "type": "array",
@@ -2776,7 +3131,12 @@
                                     "anyOf": [
                                       {
                                         "type": "string",
-                                        "enum": ["exact", "ignore", "subset", "superset"]
+                                        "enum": [
+                                          "exact",
+                                          "ignore",
+                                          "subset",
+                                          "superset"
+                                        ]
                                       },
                                       {
                                         "type": "array",
@@ -2787,7 +3147,9 @@
                                     ]
                                   }
                                 },
-                                "required": ["tool"],
+                                "required": [
+                                  "tool"
+                                ],
                                 "additionalProperties": false
                               }
                             },
@@ -2795,7 +3157,12 @@
                               "anyOf": [
                                 {
                                   "type": "string",
-                                  "enum": ["exact", "ignore", "subset", "superset"]
+                                  "enum": [
+                                    "exact",
+                                    "ignore",
+                                    "subset",
+                                    "superset"
+                                  ]
                                 },
                                 {
                                   "type": "array",
@@ -2809,7 +3176,12 @@
                               "anyOf": [
                                 {
                                   "type": "string",
-                                  "enum": ["exact", "ignore", "subset", "superset"]
+                                  "enum": [
+                                    "exact",
+                                    "ignore",
+                                    "subset",
+                                    "superset"
+                                  ]
                                 },
                                 {
                                   "type": "array",
@@ -2820,7 +3192,10 @@
                               ]
                             }
                           },
-                          "required": ["type", "mode"],
+                          "required": [
+                            "type",
+                            "mode"
+                          ],
                           "additionalProperties": false
                         },
                         {
@@ -2850,7 +3225,10 @@
                             },
                             "type": {
                               "type": "string",
-                              "enum": ["field-accuracy", "field_accuracy"]
+                              "enum": [
+                                "field-accuracy",
+                                "field_accuracy"
+                              ]
                             },
                             "fields": {
                               "type": "array",
@@ -2862,7 +3240,11 @@
                                   },
                                   "match": {
                                     "type": "string",
-                                    "enum": ["exact", "numeric_tolerance", "date"]
+                                    "enum": [
+                                      "exact",
+                                      "numeric_tolerance",
+                                      "date"
+                                    ]
                                   },
                                   "required": {
                                     "type": "boolean"
@@ -2884,17 +3266,26 @@
                                     }
                                   }
                                 },
-                                "required": ["path", "match"],
+                                "required": [
+                                  "path",
+                                  "match"
+                                ],
                                 "additionalProperties": false
                               },
                               "minItems": 1
                             },
                             "aggregation": {
                               "type": "string",
-                              "enum": ["weighted_average", "all_or_nothing"]
+                              "enum": [
+                                "weighted_average",
+                                "all_or_nothing"
+                              ]
                             }
                           },
-                          "required": ["type", "fields"],
+                          "required": [
+                            "type",
+                            "fields"
+                          ],
                           "additionalProperties": false
                         },
                         {
@@ -2931,7 +3322,10 @@
                               "minimum": 0
                             }
                           },
-                          "required": ["type", "threshold"],
+                          "required": [
+                            "type",
+                            "threshold"
+                          ],
                           "additionalProperties": false
                         },
                         {
@@ -2968,7 +3362,10 @@
                               "minimum": 0
                             }
                           },
-                          "required": ["type", "budget"],
+                          "required": [
+                            "type",
+                            "budget"
+                          ],
                           "additionalProperties": false
                         },
                         {
@@ -2998,7 +3395,10 @@
                             },
                             "type": {
                               "type": "string",
-                              "enum": ["token-usage", "token_usage"]
+                              "enum": [
+                                "token-usage",
+                                "token_usage"
+                              ]
                             },
                             "max_total": {
                               "type": "number",
@@ -3013,7 +3413,9 @@
                               "minimum": 0
                             }
                           },
-                          "required": ["type"],
+                          "required": [
+                            "type"
+                          ],
                           "additionalProperties": false
                         },
                         {
@@ -3043,7 +3445,10 @@
                             },
                             "type": {
                               "type": "string",
-                              "enum": ["execution-metrics", "execution_metrics"]
+                              "enum": [
+                                "execution-metrics",
+                                "execution_metrics"
+                              ]
                             },
                             "max_tool_calls": {
                               "type": "number",
@@ -3075,7 +3480,9 @@
                               "minimum": 0
                             }
                           },
-                          "required": ["type"],
+                          "required": [
+                            "type"
+                          ],
                           "additionalProperties": false
                         },
                         {
@@ -3105,7 +3512,10 @@
                             },
                             "type": {
                               "type": "string",
-                              "enum": ["agent-judge", "agent_judge"]
+                              "enum": [
+                                "agent-judge",
+                                "agent_judge"
+                              ]
                             },
                             "prompt": {
                               "type": "string"
@@ -3159,7 +3569,10 @@
                                           "minLength": 1
                                         }
                                       },
-                                      "required": ["score_range", "outcome"],
+                                      "required": [
+                                        "score_range",
+                                        "outcome"
+                                      ],
                                       "additionalProperties": false
                                     }
                                   }
@@ -3181,7 +3594,9 @@
                               "type": "string"
                             }
                           },
-                          "required": ["type"],
+                          "required": [
+                            "type"
+                          ],
                           "additionalProperties": false
                         },
                         {
@@ -3217,7 +3632,10 @@
                               "type": "string"
                             }
                           },
-                          "required": ["type", "value"],
+                          "required": [
+                            "type",
+                            "value"
+                          ],
                           "additionalProperties": false
                         },
                         {
@@ -3253,7 +3671,10 @@
                               "type": "string"
                             }
                           },
-                          "required": ["type", "value"],
+                          "required": [
+                            "type",
+                            "value"
+                          ],
                           "additionalProperties": false
                         },
                         {
@@ -3283,10 +3704,15 @@
                             },
                             "type": {
                               "type": "string",
-                              "enum": ["is-json", "is_json"]
+                              "enum": [
+                                "is-json",
+                                "is_json"
+                              ]
                             }
                           },
-                          "required": ["type"],
+                          "required": [
+                            "type"
+                          ],
                           "additionalProperties": false
                         },
                         {
@@ -3322,7 +3748,10 @@
                               "type": "string"
                             }
                           },
-                          "required": ["type", "value"],
+                          "required": [
+                            "type",
+                            "value"
+                          ],
                           "additionalProperties": false
                         },
                         {
@@ -3403,7 +3832,10 @@
                                           "minLength": 1
                                         }
                                       },
-                                      "required": ["score_range", "outcome"],
+                                      "required": [
+                                        "score_range",
+                                        "outcome"
+                                      ],
                                       "additionalProperties": false
                                     }
                                   }
@@ -3413,7 +3845,10 @@
                               "minItems": 1
                             }
                           },
-                          "required": ["type", "criteria"],
+                          "required": [
+                            "type",
+                            "criteria"
+                          ],
                           "additionalProperties": false
                         }
                       ]
@@ -3462,7 +3897,10 @@
                                 },
                                 "type": {
                                   "type": "string",
-                                  "enum": ["code-judge", "code_judge"]
+                                  "enum": [
+                                    "code-judge",
+                                    "code_judge"
+                                  ]
                                 },
                                 "command": {
                                   "anyOf": [
@@ -3514,7 +3952,10 @@
                                   "additionalProperties": {}
                                 }
                               },
-                              "required": ["type", "command"],
+                              "required": [
+                                "type",
+                                "command"
+                              ],
                               "additionalProperties": false
                             },
                             {
@@ -3544,7 +3985,10 @@
                                 },
                                 "type": {
                                   "type": "string",
-                                  "enum": ["llm-judge", "llm_judge"]
+                                  "enum": [
+                                    "llm-judge",
+                                    "llm_judge"
+                                  ]
                                 },
                                 "prompt": {
                                   "anyOf": [
@@ -3638,7 +4082,10 @@
                                               "minLength": 1
                                             }
                                           },
-                                          "required": ["score_range", "outcome"],
+                                          "required": [
+                                            "score_range",
+                                            "outcome"
+                                          ],
                                           "additionalProperties": false
                                         }
                                       }
@@ -3655,9 +4102,21 @@
                                 "config": {
                                   "type": "object",
                                   "additionalProperties": {}
+                                },
+                                "max_steps": {
+                                  "type": "integer",
+                                  "minimum": 1,
+                                  "maximum": 50
+                                },
+                                "temperature": {
+                                  "type": "number",
+                                  "minimum": 0,
+                                  "maximum": 2
                                 }
                               },
-                              "required": ["type"],
+                              "required": [
+                                "type"
+                              ],
                               "additionalProperties": false
                             },
                             {
@@ -3717,7 +4176,9 @@
                                           }
                                         }
                                       },
-                                      "required": ["type"],
+                                      "required": [
+                                        "type"
+                                      ],
                                       "additionalProperties": false
                                     },
                                     {
@@ -3733,7 +4194,10 @@
                                           "maximum": 1
                                         }
                                       },
-                                      "required": ["type", "threshold"],
+                                      "required": [
+                                        "type",
+                                        "threshold"
+                                      ],
                                       "additionalProperties": false
                                     },
                                     {
@@ -3750,7 +4214,10 @@
                                           "type": "string"
                                         }
                                       },
-                                      "required": ["type", "path"],
+                                      "required": [
+                                        "type",
+                                        "path"
+                                      ],
                                       "additionalProperties": false
                                     },
                                     {
@@ -3767,13 +4234,18 @@
                                           "type": "string"
                                         }
                                       },
-                                      "required": ["type"],
+                                      "required": [
+                                        "type"
+                                      ],
                                       "additionalProperties": false
                                     }
                                   ]
                                 }
                               },
-                              "required": ["type", "aggregator"],
+                              "required": [
+                                "type",
+                                "aggregator"
+                              ],
                               "additionalProperties": false
                             },
                             {
@@ -3803,11 +4275,20 @@
                                 },
                                 "type": {
                                   "type": "string",
-                                  "enum": ["tool-trajectory", "tool_trajectory"]
+                                  "enum": [
+                                    "tool-trajectory",
+                                    "tool_trajectory"
+                                  ]
                                 },
                                 "mode": {
                                   "type": "string",
-                                  "enum": ["any_order", "in_order", "exact", "subset", "superset"]
+                                  "enum": [
+                                    "any_order",
+                                    "in_order",
+                                    "exact",
+                                    "subset",
+                                    "superset"
+                                  ]
                                 },
                                 "minimums": {
                                   "type": "object",
@@ -3848,7 +4329,12 @@
                                         "anyOf": [
                                           {
                                             "type": "string",
-                                            "enum": ["exact", "ignore", "subset", "superset"]
+                                            "enum": [
+                                              "exact",
+                                              "ignore",
+                                              "subset",
+                                              "superset"
+                                            ]
                                           },
                                           {
                                             "type": "array",
@@ -3862,7 +4348,12 @@
                                         "anyOf": [
                                           {
                                             "type": "string",
-                                            "enum": ["exact", "ignore", "subset", "superset"]
+                                            "enum": [
+                                              "exact",
+                                              "ignore",
+                                              "subset",
+                                              "superset"
+                                            ]
                                           },
                                           {
                                             "type": "array",
@@ -3873,7 +4364,9 @@
                                         ]
                                       }
                                     },
-                                    "required": ["tool"],
+                                    "required": [
+                                      "tool"
+                                    ],
                                     "additionalProperties": false
                                   }
                                 },
@@ -3881,7 +4374,12 @@
                                   "anyOf": [
                                     {
                                       "type": "string",
-                                      "enum": ["exact", "ignore", "subset", "superset"]
+                                      "enum": [
+                                        "exact",
+                                        "ignore",
+                                        "subset",
+                                        "superset"
+                                      ]
                                     },
                                     {
                                       "type": "array",
@@ -3895,7 +4393,12 @@
                                   "anyOf": [
                                     {
                                       "type": "string",
-                                      "enum": ["exact", "ignore", "subset", "superset"]
+                                      "enum": [
+                                        "exact",
+                                        "ignore",
+                                        "subset",
+                                        "superset"
+                                      ]
                                     },
                                     {
                                       "type": "array",
@@ -3906,7 +4409,10 @@
                                   ]
                                 }
                               },
-                              "required": ["type", "mode"],
+                              "required": [
+                                "type",
+                                "mode"
+                              ],
                               "additionalProperties": false
                             },
                             {
@@ -3936,7 +4442,10 @@
                                 },
                                 "type": {
                                   "type": "string",
-                                  "enum": ["field-accuracy", "field_accuracy"]
+                                  "enum": [
+                                    "field-accuracy",
+                                    "field_accuracy"
+                                  ]
                                 },
                                 "fields": {
                                   "type": "array",
@@ -3948,7 +4457,11 @@
                                       },
                                       "match": {
                                         "type": "string",
-                                        "enum": ["exact", "numeric_tolerance", "date"]
+                                        "enum": [
+                                          "exact",
+                                          "numeric_tolerance",
+                                          "date"
+                                        ]
                                       },
                                       "required": {
                                         "type": "boolean"
@@ -3970,17 +4483,26 @@
                                         }
                                       }
                                     },
-                                    "required": ["path", "match"],
+                                    "required": [
+                                      "path",
+                                      "match"
+                                    ],
                                     "additionalProperties": false
                                   },
                                   "minItems": 1
                                 },
                                 "aggregation": {
                                   "type": "string",
-                                  "enum": ["weighted_average", "all_or_nothing"]
+                                  "enum": [
+                                    "weighted_average",
+                                    "all_or_nothing"
+                                  ]
                                 }
                               },
-                              "required": ["type", "fields"],
+                              "required": [
+                                "type",
+                                "fields"
+                              ],
                               "additionalProperties": false
                             },
                             {
@@ -4017,7 +4539,10 @@
                                   "minimum": 0
                                 }
                               },
-                              "required": ["type", "threshold"],
+                              "required": [
+                                "type",
+                                "threshold"
+                              ],
                               "additionalProperties": false
                             },
                             {
@@ -4054,7 +4579,10 @@
                                   "minimum": 0
                                 }
                               },
-                              "required": ["type", "budget"],
+                              "required": [
+                                "type",
+                                "budget"
+                              ],
                               "additionalProperties": false
                             },
                             {
@@ -4084,7 +4612,10 @@
                                 },
                                 "type": {
                                   "type": "string",
-                                  "enum": ["token-usage", "token_usage"]
+                                  "enum": [
+                                    "token-usage",
+                                    "token_usage"
+                                  ]
                                 },
                                 "max_total": {
                                   "type": "number",
@@ -4099,7 +4630,9 @@
                                   "minimum": 0
                                 }
                               },
-                              "required": ["type"],
+                              "required": [
+                                "type"
+                              ],
                               "additionalProperties": false
                             },
                             {
@@ -4129,7 +4662,10 @@
                                 },
                                 "type": {
                                   "type": "string",
-                                  "enum": ["execution-metrics", "execution_metrics"]
+                                  "enum": [
+                                    "execution-metrics",
+                                    "execution_metrics"
+                                  ]
                                 },
                                 "max_tool_calls": {
                                   "type": "number",
@@ -4161,7 +4697,9 @@
                                   "minimum": 0
                                 }
                               },
-                              "required": ["type"],
+                              "required": [
+                                "type"
+                              ],
                               "additionalProperties": false
                             },
                             {
@@ -4191,7 +4729,10 @@
                                 },
                                 "type": {
                                   "type": "string",
-                                  "enum": ["agent-judge", "agent_judge"]
+                                  "enum": [
+                                    "agent-judge",
+                                    "agent_judge"
+                                  ]
                                 },
                                 "prompt": {
                                   "type": "string"
@@ -4245,7 +4786,10 @@
                                               "minLength": 1
                                             }
                                           },
-                                          "required": ["score_range", "outcome"],
+                                          "required": [
+                                            "score_range",
+                                            "outcome"
+                                          ],
                                           "additionalProperties": false
                                         }
                                       }
@@ -4267,7 +4811,9 @@
                                   "type": "string"
                                 }
                               },
-                              "required": ["type"],
+                              "required": [
+                                "type"
+                              ],
                               "additionalProperties": false
                             },
                             {
@@ -4303,7 +4849,10 @@
                                   "type": "string"
                                 }
                               },
-                              "required": ["type", "value"],
+                              "required": [
+                                "type",
+                                "value"
+                              ],
                               "additionalProperties": false
                             },
                             {
@@ -4339,7 +4888,10 @@
                                   "type": "string"
                                 }
                               },
-                              "required": ["type", "value"],
+                              "required": [
+                                "type",
+                                "value"
+                              ],
                               "additionalProperties": false
                             },
                             {
@@ -4369,10 +4921,15 @@
                                 },
                                 "type": {
                                   "type": "string",
-                                  "enum": ["is-json", "is_json"]
+                                  "enum": [
+                                    "is-json",
+                                    "is_json"
+                                  ]
                                 }
                               },
-                              "required": ["type"],
+                              "required": [
+                                "type"
+                              ],
                               "additionalProperties": false
                             },
                             {
@@ -4408,7 +4965,10 @@
                                   "type": "string"
                                 }
                               },
-                              "required": ["type", "value"],
+                              "required": [
+                                "type",
+                                "value"
+                              ],
                               "additionalProperties": false
                             },
                             {
@@ -4489,7 +5049,10 @@
                                               "minLength": 1
                                             }
                                           },
-                                          "required": ["score_range", "outcome"],
+                                          "required": [
+                                            "score_range",
+                                            "outcome"
+                                          ],
                                           "additionalProperties": false
                                         }
                                       }
@@ -4499,7 +5062,10 @@
                                   "minItems": 1
                                 }
                               },
-                              "required": ["type", "criteria"],
+                              "required": [
+                                "type",
+                                "criteria"
+                              ],
                               "additionalProperties": false
                             }
                           ]
@@ -4536,7 +5102,10 @@
                                 },
                                 "type": {
                                   "type": "string",
-                                  "enum": ["code-judge", "code_judge"]
+                                  "enum": [
+                                    "code-judge",
+                                    "code_judge"
+                                  ]
                                 },
                                 "command": {
                                   "anyOf": [
@@ -4588,7 +5157,10 @@
                                   "additionalProperties": {}
                                 }
                               },
-                              "required": ["type", "command"],
+                              "required": [
+                                "type",
+                                "command"
+                              ],
                               "additionalProperties": false
                             },
                             {
@@ -4618,7 +5190,10 @@
                                 },
                                 "type": {
                                   "type": "string",
-                                  "enum": ["llm-judge", "llm_judge"]
+                                  "enum": [
+                                    "llm-judge",
+                                    "llm_judge"
+                                  ]
                                 },
                                 "prompt": {
                                   "anyOf": [
@@ -4712,7 +5287,10 @@
                                               "minLength": 1
                                             }
                                           },
-                                          "required": ["score_range", "outcome"],
+                                          "required": [
+                                            "score_range",
+                                            "outcome"
+                                          ],
                                           "additionalProperties": false
                                         }
                                       }
@@ -4729,9 +5307,21 @@
                                 "config": {
                                   "type": "object",
                                   "additionalProperties": {}
+                                },
+                                "max_steps": {
+                                  "type": "integer",
+                                  "minimum": 1,
+                                  "maximum": 50
+                                },
+                                "temperature": {
+                                  "type": "number",
+                                  "minimum": 0,
+                                  "maximum": 2
                                 }
                               },
-                              "required": ["type"],
+                              "required": [
+                                "type"
+                              ],
                               "additionalProperties": false
                             },
                             {
@@ -4791,7 +5381,9 @@
                                           }
                                         }
                                       },
-                                      "required": ["type"],
+                                      "required": [
+                                        "type"
+                                      ],
                                       "additionalProperties": false
                                     },
                                     {
@@ -4807,7 +5399,10 @@
                                           "maximum": 1
                                         }
                                       },
-                                      "required": ["type", "threshold"],
+                                      "required": [
+                                        "type",
+                                        "threshold"
+                                      ],
                                       "additionalProperties": false
                                     },
                                     {
@@ -4824,7 +5419,10 @@
                                           "type": "string"
                                         }
                                       },
-                                      "required": ["type", "path"],
+                                      "required": [
+                                        "type",
+                                        "path"
+                                      ],
                                       "additionalProperties": false
                                     },
                                     {
@@ -4841,13 +5439,18 @@
                                           "type": "string"
                                         }
                                       },
-                                      "required": ["type"],
+                                      "required": [
+                                        "type"
+                                      ],
                                       "additionalProperties": false
                                     }
                                   ]
                                 }
                               },
-                              "required": ["type", "aggregator"],
+                              "required": [
+                                "type",
+                                "aggregator"
+                              ],
                               "additionalProperties": false
                             },
                             {
@@ -4877,11 +5480,20 @@
                                 },
                                 "type": {
                                   "type": "string",
-                                  "enum": ["tool-trajectory", "tool_trajectory"]
+                                  "enum": [
+                                    "tool-trajectory",
+                                    "tool_trajectory"
+                                  ]
                                 },
                                 "mode": {
                                   "type": "string",
-                                  "enum": ["any_order", "in_order", "exact", "subset", "superset"]
+                                  "enum": [
+                                    "any_order",
+                                    "in_order",
+                                    "exact",
+                                    "subset",
+                                    "superset"
+                                  ]
                                 },
                                 "minimums": {
                                   "type": "object",
@@ -4922,7 +5534,12 @@
                                         "anyOf": [
                                           {
                                             "type": "string",
-                                            "enum": ["exact", "ignore", "subset", "superset"]
+                                            "enum": [
+                                              "exact",
+                                              "ignore",
+                                              "subset",
+                                              "superset"
+                                            ]
                                           },
                                           {
                                             "type": "array",
@@ -4936,7 +5553,12 @@
                                         "anyOf": [
                                           {
                                             "type": "string",
-                                            "enum": ["exact", "ignore", "subset", "superset"]
+                                            "enum": [
+                                              "exact",
+                                              "ignore",
+                                              "subset",
+                                              "superset"
+                                            ]
                                           },
                                           {
                                             "type": "array",
@@ -4947,7 +5569,9 @@
                                         ]
                                       }
                                     },
-                                    "required": ["tool"],
+                                    "required": [
+                                      "tool"
+                                    ],
                                     "additionalProperties": false
                                   }
                                 },
@@ -4955,7 +5579,12 @@
                                   "anyOf": [
                                     {
                                       "type": "string",
-                                      "enum": ["exact", "ignore", "subset", "superset"]
+                                      "enum": [
+                                        "exact",
+                                        "ignore",
+                                        "subset",
+                                        "superset"
+                                      ]
                                     },
                                     {
                                       "type": "array",
@@ -4969,7 +5598,12 @@
                                   "anyOf": [
                                     {
                                       "type": "string",
-                                      "enum": ["exact", "ignore", "subset", "superset"]
+                                      "enum": [
+                                        "exact",
+                                        "ignore",
+                                        "subset",
+                                        "superset"
+                                      ]
                                     },
                                     {
                                       "type": "array",
@@ -4980,7 +5614,10 @@
                                   ]
                                 }
                               },
-                              "required": ["type", "mode"],
+                              "required": [
+                                "type",
+                                "mode"
+                              ],
                               "additionalProperties": false
                             },
                             {
@@ -5010,7 +5647,10 @@
                                 },
                                 "type": {
                                   "type": "string",
-                                  "enum": ["field-accuracy", "field_accuracy"]
+                                  "enum": [
+                                    "field-accuracy",
+                                    "field_accuracy"
+                                  ]
                                 },
                                 "fields": {
                                   "type": "array",
@@ -5022,7 +5662,11 @@
                                       },
                                       "match": {
                                         "type": "string",
-                                        "enum": ["exact", "numeric_tolerance", "date"]
+                                        "enum": [
+                                          "exact",
+                                          "numeric_tolerance",
+                                          "date"
+                                        ]
                                       },
                                       "required": {
                                         "type": "boolean"
@@ -5044,17 +5688,26 @@
                                         }
                                       }
                                     },
-                                    "required": ["path", "match"],
+                                    "required": [
+                                      "path",
+                                      "match"
+                                    ],
                                     "additionalProperties": false
                                   },
                                   "minItems": 1
                                 },
                                 "aggregation": {
                                   "type": "string",
-                                  "enum": ["weighted_average", "all_or_nothing"]
+                                  "enum": [
+                                    "weighted_average",
+                                    "all_or_nothing"
+                                  ]
                                 }
                               },
-                              "required": ["type", "fields"],
+                              "required": [
+                                "type",
+                                "fields"
+                              ],
                               "additionalProperties": false
                             },
                             {
@@ -5091,7 +5744,10 @@
                                   "minimum": 0
                                 }
                               },
-                              "required": ["type", "threshold"],
+                              "required": [
+                                "type",
+                                "threshold"
+                              ],
                               "additionalProperties": false
                             },
                             {
@@ -5128,7 +5784,10 @@
                                   "minimum": 0
                                 }
                               },
-                              "required": ["type", "budget"],
+                              "required": [
+                                "type",
+                                "budget"
+                              ],
                               "additionalProperties": false
                             },
                             {
@@ -5158,7 +5817,10 @@
                                 },
                                 "type": {
                                   "type": "string",
-                                  "enum": ["token-usage", "token_usage"]
+                                  "enum": [
+                                    "token-usage",
+                                    "token_usage"
+                                  ]
                                 },
                                 "max_total": {
                                   "type": "number",
@@ -5173,7 +5835,9 @@
                                   "minimum": 0
                                 }
                               },
-                              "required": ["type"],
+                              "required": [
+                                "type"
+                              ],
                               "additionalProperties": false
                             },
                             {
@@ -5203,7 +5867,10 @@
                                 },
                                 "type": {
                                   "type": "string",
-                                  "enum": ["execution-metrics", "execution_metrics"]
+                                  "enum": [
+                                    "execution-metrics",
+                                    "execution_metrics"
+                                  ]
                                 },
                                 "max_tool_calls": {
                                   "type": "number",
@@ -5235,7 +5902,9 @@
                                   "minimum": 0
                                 }
                               },
-                              "required": ["type"],
+                              "required": [
+                                "type"
+                              ],
                               "additionalProperties": false
                             },
                             {
@@ -5265,7 +5934,10 @@
                                 },
                                 "type": {
                                   "type": "string",
-                                  "enum": ["agent-judge", "agent_judge"]
+                                  "enum": [
+                                    "agent-judge",
+                                    "agent_judge"
+                                  ]
                                 },
                                 "prompt": {
                                   "type": "string"
@@ -5319,7 +5991,10 @@
                                               "minLength": 1
                                             }
                                           },
-                                          "required": ["score_range", "outcome"],
+                                          "required": [
+                                            "score_range",
+                                            "outcome"
+                                          ],
                                           "additionalProperties": false
                                         }
                                       }
@@ -5341,7 +6016,9 @@
                                   "type": "string"
                                 }
                               },
-                              "required": ["type"],
+                              "required": [
+                                "type"
+                              ],
                               "additionalProperties": false
                             },
                             {
@@ -5377,7 +6054,10 @@
                                   "type": "string"
                                 }
                               },
-                              "required": ["type", "value"],
+                              "required": [
+                                "type",
+                                "value"
+                              ],
                               "additionalProperties": false
                             },
                             {
@@ -5413,7 +6093,10 @@
                                   "type": "string"
                                 }
                               },
-                              "required": ["type", "value"],
+                              "required": [
+                                "type",
+                                "value"
+                              ],
                               "additionalProperties": false
                             },
                             {
@@ -5443,10 +6126,15 @@
                                 },
                                 "type": {
                                   "type": "string",
-                                  "enum": ["is-json", "is_json"]
+                                  "enum": [
+                                    "is-json",
+                                    "is_json"
+                                  ]
                                 }
                               },
-                              "required": ["type"],
+                              "required": [
+                                "type"
+                              ],
                               "additionalProperties": false
                             },
                             {
@@ -5482,7 +6170,10 @@
                                   "type": "string"
                                 }
                               },
-                              "required": ["type", "value"],
+                              "required": [
+                                "type",
+                                "value"
+                              ],
                               "additionalProperties": false
                             },
                             {
@@ -5563,7 +6254,10 @@
                                               "minLength": 1
                                             }
                                           },
-                                          "required": ["score_range", "outcome"],
+                                          "required": [
+                                            "score_range",
+                                            "outcome"
+                                          ],
                                           "additionalProperties": false
                                         }
                                       }
@@ -5573,7 +6267,10 @@
                                   "minItems": 1
                                 }
                               },
-                              "required": ["type", "criteria"],
+                              "required": [
+                                "type",
+                                "criteria"
+                              ],
                               "additionalProperties": false
                             }
                           ]
@@ -5610,7 +6307,10 @@
                                 },
                                 "type": {
                                   "type": "string",
-                                  "enum": ["code-judge", "code_judge"]
+                                  "enum": [
+                                    "code-judge",
+                                    "code_judge"
+                                  ]
                                 },
                                 "command": {
                                   "anyOf": [
@@ -5662,7 +6362,10 @@
                                   "additionalProperties": {}
                                 }
                               },
-                              "required": ["type", "command"],
+                              "required": [
+                                "type",
+                                "command"
+                              ],
                               "additionalProperties": false
                             },
                             {
@@ -5692,7 +6395,10 @@
                                 },
                                 "type": {
                                   "type": "string",
-                                  "enum": ["llm-judge", "llm_judge"]
+                                  "enum": [
+                                    "llm-judge",
+                                    "llm_judge"
+                                  ]
                                 },
                                 "prompt": {
                                   "anyOf": [
@@ -5786,7 +6492,10 @@
                                               "minLength": 1
                                             }
                                           },
-                                          "required": ["score_range", "outcome"],
+                                          "required": [
+                                            "score_range",
+                                            "outcome"
+                                          ],
                                           "additionalProperties": false
                                         }
                                       }
@@ -5803,9 +6512,21 @@
                                 "config": {
                                   "type": "object",
                                   "additionalProperties": {}
+                                },
+                                "max_steps": {
+                                  "type": "integer",
+                                  "minimum": 1,
+                                  "maximum": 50
+                                },
+                                "temperature": {
+                                  "type": "number",
+                                  "minimum": 0,
+                                  "maximum": 2
                                 }
                               },
-                              "required": ["type"],
+                              "required": [
+                                "type"
+                              ],
                               "additionalProperties": false
                             },
                             {
@@ -5865,7 +6586,9 @@
                                           }
                                         }
                                       },
-                                      "required": ["type"],
+                                      "required": [
+                                        "type"
+                                      ],
                                       "additionalProperties": false
                                     },
                                     {
@@ -5881,7 +6604,10 @@
                                           "maximum": 1
                                         }
                                       },
-                                      "required": ["type", "threshold"],
+                                      "required": [
+                                        "type",
+                                        "threshold"
+                                      ],
                                       "additionalProperties": false
                                     },
                                     {
@@ -5898,7 +6624,10 @@
                                           "type": "string"
                                         }
                                       },
-                                      "required": ["type", "path"],
+                                      "required": [
+                                        "type",
+                                        "path"
+                                      ],
                                       "additionalProperties": false
                                     },
                                     {
@@ -5915,13 +6644,18 @@
                                           "type": "string"
                                         }
                                       },
-                                      "required": ["type"],
+                                      "required": [
+                                        "type"
+                                      ],
                                       "additionalProperties": false
                                     }
                                   ]
                                 }
                               },
-                              "required": ["type", "aggregator"],
+                              "required": [
+                                "type",
+                                "aggregator"
+                              ],
                               "additionalProperties": false
                             },
                             {
@@ -5951,11 +6685,20 @@
                                 },
                                 "type": {
                                   "type": "string",
-                                  "enum": ["tool-trajectory", "tool_trajectory"]
+                                  "enum": [
+                                    "tool-trajectory",
+                                    "tool_trajectory"
+                                  ]
                                 },
                                 "mode": {
                                   "type": "string",
-                                  "enum": ["any_order", "in_order", "exact", "subset", "superset"]
+                                  "enum": [
+                                    "any_order",
+                                    "in_order",
+                                    "exact",
+                                    "subset",
+                                    "superset"
+                                  ]
                                 },
                                 "minimums": {
                                   "type": "object",
@@ -5996,7 +6739,12 @@
                                         "anyOf": [
                                           {
                                             "type": "string",
-                                            "enum": ["exact", "ignore", "subset", "superset"]
+                                            "enum": [
+                                              "exact",
+                                              "ignore",
+                                              "subset",
+                                              "superset"
+                                            ]
                                           },
                                           {
                                             "type": "array",
@@ -6010,7 +6758,12 @@
                                         "anyOf": [
                                           {
                                             "type": "string",
-                                            "enum": ["exact", "ignore", "subset", "superset"]
+                                            "enum": [
+                                              "exact",
+                                              "ignore",
+                                              "subset",
+                                              "superset"
+                                            ]
                                           },
                                           {
                                             "type": "array",
@@ -6021,7 +6774,9 @@
                                         ]
                                       }
                                     },
-                                    "required": ["tool"],
+                                    "required": [
+                                      "tool"
+                                    ],
                                     "additionalProperties": false
                                   }
                                 },
@@ -6029,7 +6784,12 @@
                                   "anyOf": [
                                     {
                                       "type": "string",
-                                      "enum": ["exact", "ignore", "subset", "superset"]
+                                      "enum": [
+                                        "exact",
+                                        "ignore",
+                                        "subset",
+                                        "superset"
+                                      ]
                                     },
                                     {
                                       "type": "array",
@@ -6043,7 +6803,12 @@
                                   "anyOf": [
                                     {
                                       "type": "string",
-                                      "enum": ["exact", "ignore", "subset", "superset"]
+                                      "enum": [
+                                        "exact",
+                                        "ignore",
+                                        "subset",
+                                        "superset"
+                                      ]
                                     },
                                     {
                                       "type": "array",
@@ -6054,7 +6819,10 @@
                                   ]
                                 }
                               },
-                              "required": ["type", "mode"],
+                              "required": [
+                                "type",
+                                "mode"
+                              ],
                               "additionalProperties": false
                             },
                             {
@@ -6084,7 +6852,10 @@
                                 },
                                 "type": {
                                   "type": "string",
-                                  "enum": ["field-accuracy", "field_accuracy"]
+                                  "enum": [
+                                    "field-accuracy",
+                                    "field_accuracy"
+                                  ]
                                 },
                                 "fields": {
                                   "type": "array",
@@ -6096,7 +6867,11 @@
                                       },
                                       "match": {
                                         "type": "string",
-                                        "enum": ["exact", "numeric_tolerance", "date"]
+                                        "enum": [
+                                          "exact",
+                                          "numeric_tolerance",
+                                          "date"
+                                        ]
                                       },
                                       "required": {
                                         "type": "boolean"
@@ -6118,17 +6893,26 @@
                                         }
                                       }
                                     },
-                                    "required": ["path", "match"],
+                                    "required": [
+                                      "path",
+                                      "match"
+                                    ],
                                     "additionalProperties": false
                                   },
                                   "minItems": 1
                                 },
                                 "aggregation": {
                                   "type": "string",
-                                  "enum": ["weighted_average", "all_or_nothing"]
+                                  "enum": [
+                                    "weighted_average",
+                                    "all_or_nothing"
+                                  ]
                                 }
                               },
-                              "required": ["type", "fields"],
+                              "required": [
+                                "type",
+                                "fields"
+                              ],
                               "additionalProperties": false
                             },
                             {
@@ -6165,7 +6949,10 @@
                                   "minimum": 0
                                 }
                               },
-                              "required": ["type", "threshold"],
+                              "required": [
+                                "type",
+                                "threshold"
+                              ],
                               "additionalProperties": false
                             },
                             {
@@ -6202,7 +6989,10 @@
                                   "minimum": 0
                                 }
                               },
-                              "required": ["type", "budget"],
+                              "required": [
+                                "type",
+                                "budget"
+                              ],
                               "additionalProperties": false
                             },
                             {
@@ -6232,7 +7022,10 @@
                                 },
                                 "type": {
                                   "type": "string",
-                                  "enum": ["token-usage", "token_usage"]
+                                  "enum": [
+                                    "token-usage",
+                                    "token_usage"
+                                  ]
                                 },
                                 "max_total": {
                                   "type": "number",
@@ -6247,7 +7040,9 @@
                                   "minimum": 0
                                 }
                               },
-                              "required": ["type"],
+                              "required": [
+                                "type"
+                              ],
                               "additionalProperties": false
                             },
                             {
@@ -6277,7 +7072,10 @@
                                 },
                                 "type": {
                                   "type": "string",
-                                  "enum": ["execution-metrics", "execution_metrics"]
+                                  "enum": [
+                                    "execution-metrics",
+                                    "execution_metrics"
+                                  ]
                                 },
                                 "max_tool_calls": {
                                   "type": "number",
@@ -6309,7 +7107,9 @@
                                   "minimum": 0
                                 }
                               },
-                              "required": ["type"],
+                              "required": [
+                                "type"
+                              ],
                               "additionalProperties": false
                             },
                             {
@@ -6339,7 +7139,10 @@
                                 },
                                 "type": {
                                   "type": "string",
-                                  "enum": ["agent-judge", "agent_judge"]
+                                  "enum": [
+                                    "agent-judge",
+                                    "agent_judge"
+                                  ]
                                 },
                                 "prompt": {
                                   "type": "string"
@@ -6393,7 +7196,10 @@
                                               "minLength": 1
                                             }
                                           },
-                                          "required": ["score_range", "outcome"],
+                                          "required": [
+                                            "score_range",
+                                            "outcome"
+                                          ],
                                           "additionalProperties": false
                                         }
                                       }
@@ -6415,7 +7221,9 @@
                                   "type": "string"
                                 }
                               },
-                              "required": ["type"],
+                              "required": [
+                                "type"
+                              ],
                               "additionalProperties": false
                             },
                             {
@@ -6451,7 +7259,10 @@
                                   "type": "string"
                                 }
                               },
-                              "required": ["type", "value"],
+                              "required": [
+                                "type",
+                                "value"
+                              ],
                               "additionalProperties": false
                             },
                             {
@@ -6487,7 +7298,10 @@
                                   "type": "string"
                                 }
                               },
-                              "required": ["type", "value"],
+                              "required": [
+                                "type",
+                                "value"
+                              ],
                               "additionalProperties": false
                             },
                             {
@@ -6517,10 +7331,15 @@
                                 },
                                 "type": {
                                   "type": "string",
-                                  "enum": ["is-json", "is_json"]
+                                  "enum": [
+                                    "is-json",
+                                    "is_json"
+                                  ]
                                 }
                               },
-                              "required": ["type"],
+                              "required": [
+                                "type"
+                              ],
                               "additionalProperties": false
                             },
                             {
@@ -6556,7 +7375,10 @@
                                   "type": "string"
                                 }
                               },
-                              "required": ["type", "value"],
+                              "required": [
+                                "type",
+                                "value"
+                              ],
                               "additionalProperties": false
                             },
                             {
@@ -6637,7 +7459,10 @@
                                               "minLength": 1
                                             }
                                           },
-                                          "required": ["score_range", "outcome"],
+                                          "required": [
+                                            "score_range",
+                                            "outcome"
+                                          ],
                                           "additionalProperties": false
                                         }
                                       }
@@ -6647,7 +7472,10 @@
                                   "minItems": 1
                                 }
                               },
-                              "required": ["type", "criteria"],
+                              "required": [
+                                "type",
+                                "criteria"
+                              ],
                               "additionalProperties": false
                             }
                           ]
@@ -6668,7 +7496,11 @@
                           },
                           "strategy": {
                             "type": "string",
-                            "enum": ["pass_at_k", "mean", "confidence_interval"]
+                            "enum": [
+                              "pass_at_k",
+                              "mean",
+                              "confidence_interval"
+                            ]
                           },
                           "cost_limit_usd": {
                             "type": "number",
@@ -6679,7 +7511,9 @@
                             "minimum": 0
                           }
                         },
-                        "required": ["count"],
+                        "required": [
+                          "count"
+                        ],
                         "additionalProperties": false
                       },
                       "total_budget_usd": {
@@ -6707,7 +7541,10 @@
                       },
                       "isolation": {
                         "type": "string",
-                        "enum": ["shared", "per_test"]
+                        "enum": [
+                          "shared",
+                          "per_test"
+                        ]
                       },
                       "repos": {
                         "type": "array",
@@ -6731,7 +7568,10 @@
                                       "format": "uri"
                                     }
                                   },
-                                  "required": ["type", "url"],
+                                  "required": [
+                                    "type",
+                                    "url"
+                                  ],
                                   "additionalProperties": false
                                 },
                                 {
@@ -6745,7 +7585,10 @@
                                       "type": "string"
                                     }
                                   },
-                                  "required": ["type", "path"],
+                                  "required": [
+                                    "type",
+                                    "path"
+                                  ],
                                   "additionalProperties": false
                                 }
                               ]
@@ -6758,7 +7601,10 @@
                                 },
                                 "resolve": {
                                   "type": "string",
-                                  "enum": ["remote", "local"]
+                                  "enum": [
+                                    "remote",
+                                    "local"
+                                  ]
                                 },
                                 "ancestor": {
                                   "type": "integer",
@@ -6787,7 +7633,10 @@
                               "additionalProperties": false
                             }
                           },
-                          "required": ["path", "source"],
+                          "required": [
+                            "path",
+                            "source"
+                          ],
                           "additionalProperties": false
                         }
                       },
@@ -6823,7 +7672,11 @@
                               },
                               "reset": {
                                 "type": "string",
-                                "enum": ["none", "fast", "strict"]
+                                "enum": [
+                                  "none",
+                                  "fast",
+                                  "strict"
+                                ]
                               }
                             },
                             "additionalProperties": false
@@ -6854,7 +7707,11 @@
                               },
                               "reset": {
                                 "type": "string",
-                                "enum": ["none", "fast", "strict"]
+                                "enum": [
+                                  "none",
+                                  "fast",
+                                  "strict"
+                                ]
                               }
                             },
                             "additionalProperties": false
@@ -6885,7 +7742,11 @@
                               },
                               "reset": {
                                 "type": "string",
-                                "enum": ["none", "fast", "strict"]
+                                "enum": [
+                                  "none",
+                                  "fast",
+                                  "strict"
+                                ]
                               }
                             },
                             "additionalProperties": false
@@ -6916,7 +7777,11 @@
                               },
                               "reset": {
                                 "type": "string",
-                                "enum": ["none", "fast", "strict"]
+                                "enum": [
+                                  "none",
+                                  "fast",
+                                  "strict"
+                                ]
                               }
                             },
                             "additionalProperties": false
@@ -6926,7 +7791,11 @@
                       },
                       "mode": {
                         "type": "string",
-                        "enum": ["pooled", "temp", "static"]
+                        "enum": [
+                          "pooled",
+                          "temp",
+                          "static"
+                        ]
                       },
                       "path": {
                         "type": "string"
@@ -6948,7 +7817,9 @@
                     "type": "string"
                   }
                 },
-                "required": ["id"],
+                "required": [
+                  "id"
+                ],
                 "additionalProperties": false
               }
             },
@@ -6986,7 +7857,12 @@
                           "properties": {
                             "role": {
                               "type": "string",
-                              "enum": ["system", "user", "assistant", "tool"]
+                              "enum": [
+                                "system",
+                                "user",
+                                "assistant",
+                                "tool"
+                              ]
                             },
                             "content": {
                               "anyOf": [
@@ -7000,20 +7876,29 @@
                                     "properties": {
                                       "type": {
                                         "type": "string",
-                                        "enum": ["text", "file"]
+                                        "enum": [
+                                          "text",
+                                          "file"
+                                        ]
                                       },
                                       "value": {
                                         "type": "string"
                                       }
                                     },
-                                    "required": ["type", "value"],
+                                    "required": [
+                                      "type",
+                                      "value"
+                                    ],
                                     "additionalProperties": false
                                   }
                                 }
                               ]
                             }
                           },
-                          "required": ["role", "content"],
+                          "required": [
+                            "role",
+                            "content"
+                          ],
                           "additionalProperties": false
                         }
                       }
@@ -7035,7 +7920,12 @@
                           "properties": {
                             "role": {
                               "type": "string",
-                              "enum": ["system", "user", "assistant", "tool"]
+                              "enum": [
+                                "system",
+                                "user",
+                                "assistant",
+                                "tool"
+                              ]
                             },
                             "content": {
                               "anyOf": [
@@ -7049,20 +7939,29 @@
                                     "properties": {
                                       "type": {
                                         "type": "string",
-                                        "enum": ["text", "file"]
+                                        "enum": [
+                                          "text",
+                                          "file"
+                                        ]
                                       },
                                       "value": {
                                         "type": "string"
                                       }
                                     },
-                                    "required": ["type", "value"],
+                                    "required": [
+                                      "type",
+                                      "value"
+                                    ],
                                     "additionalProperties": false
                                   }
                                 }
                               ]
                             }
                           },
-                          "required": ["role", "content"],
+                          "required": [
+                            "role",
+                            "content"
+                          ],
                           "additionalProperties": false
                         }
                       }
@@ -7099,7 +7998,10 @@
                             },
                             "type": {
                               "type": "string",
-                              "enum": ["code-judge", "code_judge"]
+                              "enum": [
+                                "code-judge",
+                                "code_judge"
+                              ]
                             },
                             "command": {
                               "anyOf": [
@@ -7151,7 +8053,10 @@
                               "additionalProperties": {}
                             }
                           },
-                          "required": ["type", "command"],
+                          "required": [
+                            "type",
+                            "command"
+                          ],
                           "additionalProperties": false
                         },
                         {
@@ -7181,7 +8086,10 @@
                             },
                             "type": {
                               "type": "string",
-                              "enum": ["llm-judge", "llm_judge"]
+                              "enum": [
+                                "llm-judge",
+                                "llm_judge"
+                              ]
                             },
                             "prompt": {
                               "anyOf": [
@@ -7275,7 +8183,10 @@
                                           "minLength": 1
                                         }
                                       },
-                                      "required": ["score_range", "outcome"],
+                                      "required": [
+                                        "score_range",
+                                        "outcome"
+                                      ],
                                       "additionalProperties": false
                                     }
                                   }
@@ -7292,9 +8203,21 @@
                             "config": {
                               "type": "object",
                               "additionalProperties": {}
+                            },
+                            "max_steps": {
+                              "type": "integer",
+                              "minimum": 1,
+                              "maximum": 50
+                            },
+                            "temperature": {
+                              "type": "number",
+                              "minimum": 0,
+                              "maximum": 2
                             }
                           },
-                          "required": ["type"],
+                          "required": [
+                            "type"
+                          ],
                           "additionalProperties": false
                         },
                         {
@@ -7354,7 +8277,9 @@
                                       }
                                     }
                                   },
-                                  "required": ["type"],
+                                  "required": [
+                                    "type"
+                                  ],
                                   "additionalProperties": false
                                 },
                                 {
@@ -7370,7 +8295,10 @@
                                       "maximum": 1
                                     }
                                   },
-                                  "required": ["type", "threshold"],
+                                  "required": [
+                                    "type",
+                                    "threshold"
+                                  ],
                                   "additionalProperties": false
                                 },
                                 {
@@ -7387,7 +8315,10 @@
                                       "type": "string"
                                     }
                                   },
-                                  "required": ["type", "path"],
+                                  "required": [
+                                    "type",
+                                    "path"
+                                  ],
                                   "additionalProperties": false
                                 },
                                 {
@@ -7404,13 +8335,18 @@
                                       "type": "string"
                                     }
                                   },
-                                  "required": ["type"],
+                                  "required": [
+                                    "type"
+                                  ],
                                   "additionalProperties": false
                                 }
                               ]
                             }
                           },
-                          "required": ["type", "aggregator"],
+                          "required": [
+                            "type",
+                            "aggregator"
+                          ],
                           "additionalProperties": false
                         },
                         {
@@ -7440,11 +8376,20 @@
                             },
                             "type": {
                               "type": "string",
-                              "enum": ["tool-trajectory", "tool_trajectory"]
+                              "enum": [
+                                "tool-trajectory",
+                                "tool_trajectory"
+                              ]
                             },
                             "mode": {
                               "type": "string",
-                              "enum": ["any_order", "in_order", "exact", "subset", "superset"]
+                              "enum": [
+                                "any_order",
+                                "in_order",
+                                "exact",
+                                "subset",
+                                "superset"
+                              ]
                             },
                             "minimums": {
                               "type": "object",
@@ -7485,7 +8430,12 @@
                                     "anyOf": [
                                       {
                                         "type": "string",
-                                        "enum": ["exact", "ignore", "subset", "superset"]
+                                        "enum": [
+                                          "exact",
+                                          "ignore",
+                                          "subset",
+                                          "superset"
+                                        ]
                                       },
                                       {
                                         "type": "array",
@@ -7499,7 +8449,12 @@
                                     "anyOf": [
                                       {
                                         "type": "string",
-                                        "enum": ["exact", "ignore", "subset", "superset"]
+                                        "enum": [
+                                          "exact",
+                                          "ignore",
+                                          "subset",
+                                          "superset"
+                                        ]
                                       },
                                       {
                                         "type": "array",
@@ -7510,7 +8465,9 @@
                                     ]
                                   }
                                 },
-                                "required": ["tool"],
+                                "required": [
+                                  "tool"
+                                ],
                                 "additionalProperties": false
                               }
                             },
@@ -7518,7 +8475,12 @@
                               "anyOf": [
                                 {
                                   "type": "string",
-                                  "enum": ["exact", "ignore", "subset", "superset"]
+                                  "enum": [
+                                    "exact",
+                                    "ignore",
+                                    "subset",
+                                    "superset"
+                                  ]
                                 },
                                 {
                                   "type": "array",
@@ -7532,7 +8494,12 @@
                               "anyOf": [
                                 {
                                   "type": "string",
-                                  "enum": ["exact", "ignore", "subset", "superset"]
+                                  "enum": [
+                                    "exact",
+                                    "ignore",
+                                    "subset",
+                                    "superset"
+                                  ]
                                 },
                                 {
                                   "type": "array",
@@ -7543,7 +8510,10 @@
                               ]
                             }
                           },
-                          "required": ["type", "mode"],
+                          "required": [
+                            "type",
+                            "mode"
+                          ],
                           "additionalProperties": false
                         },
                         {
@@ -7573,7 +8543,10 @@
                             },
                             "type": {
                               "type": "string",
-                              "enum": ["field-accuracy", "field_accuracy"]
+                              "enum": [
+                                "field-accuracy",
+                                "field_accuracy"
+                              ]
                             },
                             "fields": {
                               "type": "array",
@@ -7585,7 +8558,11 @@
                                   },
                                   "match": {
                                     "type": "string",
-                                    "enum": ["exact", "numeric_tolerance", "date"]
+                                    "enum": [
+                                      "exact",
+                                      "numeric_tolerance",
+                                      "date"
+                                    ]
                                   },
                                   "required": {
                                     "type": "boolean"
@@ -7607,17 +8584,26 @@
                                     }
                                   }
                                 },
-                                "required": ["path", "match"],
+                                "required": [
+                                  "path",
+                                  "match"
+                                ],
                                 "additionalProperties": false
                               },
                               "minItems": 1
                             },
                             "aggregation": {
                               "type": "string",
-                              "enum": ["weighted_average", "all_or_nothing"]
+                              "enum": [
+                                "weighted_average",
+                                "all_or_nothing"
+                              ]
                             }
                           },
-                          "required": ["type", "fields"],
+                          "required": [
+                            "type",
+                            "fields"
+                          ],
                           "additionalProperties": false
                         },
                         {
@@ -7654,7 +8640,10 @@
                               "minimum": 0
                             }
                           },
-                          "required": ["type", "threshold"],
+                          "required": [
+                            "type",
+                            "threshold"
+                          ],
                           "additionalProperties": false
                         },
                         {
@@ -7691,7 +8680,10 @@
                               "minimum": 0
                             }
                           },
-                          "required": ["type", "budget"],
+                          "required": [
+                            "type",
+                            "budget"
+                          ],
                           "additionalProperties": false
                         },
                         {
@@ -7721,7 +8713,10 @@
                             },
                             "type": {
                               "type": "string",
-                              "enum": ["token-usage", "token_usage"]
+                              "enum": [
+                                "token-usage",
+                                "token_usage"
+                              ]
                             },
                             "max_total": {
                               "type": "number",
@@ -7736,7 +8731,9 @@
                               "minimum": 0
                             }
                           },
-                          "required": ["type"],
+                          "required": [
+                            "type"
+                          ],
                           "additionalProperties": false
                         },
                         {
@@ -7766,7 +8763,10 @@
                             },
                             "type": {
                               "type": "string",
-                              "enum": ["execution-metrics", "execution_metrics"]
+                              "enum": [
+                                "execution-metrics",
+                                "execution_metrics"
+                              ]
                             },
                             "max_tool_calls": {
                               "type": "number",
@@ -7798,7 +8798,9 @@
                               "minimum": 0
                             }
                           },
-                          "required": ["type"],
+                          "required": [
+                            "type"
+                          ],
                           "additionalProperties": false
                         },
                         {
@@ -7828,7 +8830,10 @@
                             },
                             "type": {
                               "type": "string",
-                              "enum": ["agent-judge", "agent_judge"]
+                              "enum": [
+                                "agent-judge",
+                                "agent_judge"
+                              ]
                             },
                             "prompt": {
                               "type": "string"
@@ -7882,7 +8887,10 @@
                                           "minLength": 1
                                         }
                                       },
-                                      "required": ["score_range", "outcome"],
+                                      "required": [
+                                        "score_range",
+                                        "outcome"
+                                      ],
                                       "additionalProperties": false
                                     }
                                   }
@@ -7904,7 +8912,9 @@
                               "type": "string"
                             }
                           },
-                          "required": ["type"],
+                          "required": [
+                            "type"
+                          ],
                           "additionalProperties": false
                         },
                         {
@@ -7940,7 +8950,10 @@
                               "type": "string"
                             }
                           },
-                          "required": ["type", "value"],
+                          "required": [
+                            "type",
+                            "value"
+                          ],
                           "additionalProperties": false
                         },
                         {
@@ -7976,7 +8989,10 @@
                               "type": "string"
                             }
                           },
-                          "required": ["type", "value"],
+                          "required": [
+                            "type",
+                            "value"
+                          ],
                           "additionalProperties": false
                         },
                         {
@@ -8006,10 +9022,15 @@
                             },
                             "type": {
                               "type": "string",
-                              "enum": ["is-json", "is_json"]
+                              "enum": [
+                                "is-json",
+                                "is_json"
+                              ]
                             }
                           },
-                          "required": ["type"],
+                          "required": [
+                            "type"
+                          ],
                           "additionalProperties": false
                         },
                         {
@@ -8045,7 +9066,10 @@
                               "type": "string"
                             }
                           },
-                          "required": ["type", "value"],
+                          "required": [
+                            "type",
+                            "value"
+                          ],
                           "additionalProperties": false
                         },
                         {
@@ -8126,7 +9150,10 @@
                                           "minLength": 1
                                         }
                                       },
-                                      "required": ["score_range", "outcome"],
+                                      "required": [
+                                        "score_range",
+                                        "outcome"
+                                      ],
                                       "additionalProperties": false
                                     }
                                   }
@@ -8136,7 +9163,10 @@
                               "minItems": 1
                             }
                           },
-                          "required": ["type", "criteria"],
+                          "required": [
+                            "type",
+                            "criteria"
+                          ],
                           "additionalProperties": false
                         }
                       ]
@@ -8173,7 +9203,10 @@
                             },
                             "type": {
                               "type": "string",
-                              "enum": ["code-judge", "code_judge"]
+                              "enum": [
+                                "code-judge",
+                                "code_judge"
+                              ]
                             },
                             "command": {
                               "anyOf": [
@@ -8225,7 +9258,10 @@
                               "additionalProperties": {}
                             }
                           },
-                          "required": ["type", "command"],
+                          "required": [
+                            "type",
+                            "command"
+                          ],
                           "additionalProperties": false
                         },
                         {
@@ -8255,7 +9291,10 @@
                             },
                             "type": {
                               "type": "string",
-                              "enum": ["llm-judge", "llm_judge"]
+                              "enum": [
+                                "llm-judge",
+                                "llm_judge"
+                              ]
                             },
                             "prompt": {
                               "anyOf": [
@@ -8349,7 +9388,10 @@
                                           "minLength": 1
                                         }
                                       },
-                                      "required": ["score_range", "outcome"],
+                                      "required": [
+                                        "score_range",
+                                        "outcome"
+                                      ],
                                       "additionalProperties": false
                                     }
                                   }
@@ -8366,9 +9408,21 @@
                             "config": {
                               "type": "object",
                               "additionalProperties": {}
+                            },
+                            "max_steps": {
+                              "type": "integer",
+                              "minimum": 1,
+                              "maximum": 50
+                            },
+                            "temperature": {
+                              "type": "number",
+                              "minimum": 0,
+                              "maximum": 2
                             }
                           },
-                          "required": ["type"],
+                          "required": [
+                            "type"
+                          ],
                           "additionalProperties": false
                         },
                         {
@@ -8428,7 +9482,9 @@
                                       }
                                     }
                                   },
-                                  "required": ["type"],
+                                  "required": [
+                                    "type"
+                                  ],
                                   "additionalProperties": false
                                 },
                                 {
@@ -8444,7 +9500,10 @@
                                       "maximum": 1
                                     }
                                   },
-                                  "required": ["type", "threshold"],
+                                  "required": [
+                                    "type",
+                                    "threshold"
+                                  ],
                                   "additionalProperties": false
                                 },
                                 {
@@ -8461,7 +9520,10 @@
                                       "type": "string"
                                     }
                                   },
-                                  "required": ["type", "path"],
+                                  "required": [
+                                    "type",
+                                    "path"
+                                  ],
                                   "additionalProperties": false
                                 },
                                 {
@@ -8478,13 +9540,18 @@
                                       "type": "string"
                                     }
                                   },
-                                  "required": ["type"],
+                                  "required": [
+                                    "type"
+                                  ],
                                   "additionalProperties": false
                                 }
                               ]
                             }
                           },
-                          "required": ["type", "aggregator"],
+                          "required": [
+                            "type",
+                            "aggregator"
+                          ],
                           "additionalProperties": false
                         },
                         {
@@ -8514,11 +9581,20 @@
                             },
                             "type": {
                               "type": "string",
-                              "enum": ["tool-trajectory", "tool_trajectory"]
+                              "enum": [
+                                "tool-trajectory",
+                                "tool_trajectory"
+                              ]
                             },
                             "mode": {
                               "type": "string",
-                              "enum": ["any_order", "in_order", "exact", "subset", "superset"]
+                              "enum": [
+                                "any_order",
+                                "in_order",
+                                "exact",
+                                "subset",
+                                "superset"
+                              ]
                             },
                             "minimums": {
                               "type": "object",
@@ -8559,7 +9635,12 @@
                                     "anyOf": [
                                       {
                                         "type": "string",
-                                        "enum": ["exact", "ignore", "subset", "superset"]
+                                        "enum": [
+                                          "exact",
+                                          "ignore",
+                                          "subset",
+                                          "superset"
+                                        ]
                                       },
                                       {
                                         "type": "array",
@@ -8573,7 +9654,12 @@
                                     "anyOf": [
                                       {
                                         "type": "string",
-                                        "enum": ["exact", "ignore", "subset", "superset"]
+                                        "enum": [
+                                          "exact",
+                                          "ignore",
+                                          "subset",
+                                          "superset"
+                                        ]
                                       },
                                       {
                                         "type": "array",
@@ -8584,7 +9670,9 @@
                                     ]
                                   }
                                 },
-                                "required": ["tool"],
+                                "required": [
+                                  "tool"
+                                ],
                                 "additionalProperties": false
                               }
                             },
@@ -8592,7 +9680,12 @@
                               "anyOf": [
                                 {
                                   "type": "string",
-                                  "enum": ["exact", "ignore", "subset", "superset"]
+                                  "enum": [
+                                    "exact",
+                                    "ignore",
+                                    "subset",
+                                    "superset"
+                                  ]
                                 },
                                 {
                                   "type": "array",
@@ -8606,7 +9699,12 @@
                               "anyOf": [
                                 {
                                   "type": "string",
-                                  "enum": ["exact", "ignore", "subset", "superset"]
+                                  "enum": [
+                                    "exact",
+                                    "ignore",
+                                    "subset",
+                                    "superset"
+                                  ]
                                 },
                                 {
                                   "type": "array",
@@ -8617,7 +9715,10 @@
                               ]
                             }
                           },
-                          "required": ["type", "mode"],
+                          "required": [
+                            "type",
+                            "mode"
+                          ],
                           "additionalProperties": false
                         },
                         {
@@ -8647,7 +9748,10 @@
                             },
                             "type": {
                               "type": "string",
-                              "enum": ["field-accuracy", "field_accuracy"]
+                              "enum": [
+                                "field-accuracy",
+                                "field_accuracy"
+                              ]
                             },
                             "fields": {
                               "type": "array",
@@ -8659,7 +9763,11 @@
                                   },
                                   "match": {
                                     "type": "string",
-                                    "enum": ["exact", "numeric_tolerance", "date"]
+                                    "enum": [
+                                      "exact",
+                                      "numeric_tolerance",
+                                      "date"
+                                    ]
                                   },
                                   "required": {
                                     "type": "boolean"
@@ -8681,17 +9789,26 @@
                                     }
                                   }
                                 },
-                                "required": ["path", "match"],
+                                "required": [
+                                  "path",
+                                  "match"
+                                ],
                                 "additionalProperties": false
                               },
                               "minItems": 1
                             },
                             "aggregation": {
                               "type": "string",
-                              "enum": ["weighted_average", "all_or_nothing"]
+                              "enum": [
+                                "weighted_average",
+                                "all_or_nothing"
+                              ]
                             }
                           },
-                          "required": ["type", "fields"],
+                          "required": [
+                            "type",
+                            "fields"
+                          ],
                           "additionalProperties": false
                         },
                         {
@@ -8728,7 +9845,10 @@
                               "minimum": 0
                             }
                           },
-                          "required": ["type", "threshold"],
+                          "required": [
+                            "type",
+                            "threshold"
+                          ],
                           "additionalProperties": false
                         },
                         {
@@ -8765,7 +9885,10 @@
                               "minimum": 0
                             }
                           },
-                          "required": ["type", "budget"],
+                          "required": [
+                            "type",
+                            "budget"
+                          ],
                           "additionalProperties": false
                         },
                         {
@@ -8795,7 +9918,10 @@
                             },
                             "type": {
                               "type": "string",
-                              "enum": ["token-usage", "token_usage"]
+                              "enum": [
+                                "token-usage",
+                                "token_usage"
+                              ]
                             },
                             "max_total": {
                               "type": "number",
@@ -8810,7 +9936,9 @@
                               "minimum": 0
                             }
                           },
-                          "required": ["type"],
+                          "required": [
+                            "type"
+                          ],
                           "additionalProperties": false
                         },
                         {
@@ -8840,7 +9968,10 @@
                             },
                             "type": {
                               "type": "string",
-                              "enum": ["execution-metrics", "execution_metrics"]
+                              "enum": [
+                                "execution-metrics",
+                                "execution_metrics"
+                              ]
                             },
                             "max_tool_calls": {
                               "type": "number",
@@ -8872,7 +10003,9 @@
                               "minimum": 0
                             }
                           },
-                          "required": ["type"],
+                          "required": [
+                            "type"
+                          ],
                           "additionalProperties": false
                         },
                         {
@@ -8902,7 +10035,10 @@
                             },
                             "type": {
                               "type": "string",
-                              "enum": ["agent-judge", "agent_judge"]
+                              "enum": [
+                                "agent-judge",
+                                "agent_judge"
+                              ]
                             },
                             "prompt": {
                               "type": "string"
@@ -8956,7 +10092,10 @@
                                           "minLength": 1
                                         }
                                       },
-                                      "required": ["score_range", "outcome"],
+                                      "required": [
+                                        "score_range",
+                                        "outcome"
+                                      ],
                                       "additionalProperties": false
                                     }
                                   }
@@ -8978,7 +10117,9 @@
                               "type": "string"
                             }
                           },
-                          "required": ["type"],
+                          "required": [
+                            "type"
+                          ],
                           "additionalProperties": false
                         },
                         {
@@ -9014,7 +10155,10 @@
                               "type": "string"
                             }
                           },
-                          "required": ["type", "value"],
+                          "required": [
+                            "type",
+                            "value"
+                          ],
                           "additionalProperties": false
                         },
                         {
@@ -9050,7 +10194,10 @@
                               "type": "string"
                             }
                           },
-                          "required": ["type", "value"],
+                          "required": [
+                            "type",
+                            "value"
+                          ],
                           "additionalProperties": false
                         },
                         {
@@ -9080,10 +10227,15 @@
                             },
                             "type": {
                               "type": "string",
-                              "enum": ["is-json", "is_json"]
+                              "enum": [
+                                "is-json",
+                                "is_json"
+                              ]
                             }
                           },
-                          "required": ["type"],
+                          "required": [
+                            "type"
+                          ],
                           "additionalProperties": false
                         },
                         {
@@ -9119,7 +10271,10 @@
                               "type": "string"
                             }
                           },
-                          "required": ["type", "value"],
+                          "required": [
+                            "type",
+                            "value"
+                          ],
                           "additionalProperties": false
                         },
                         {
@@ -9200,7 +10355,10 @@
                                           "minLength": 1
                                         }
                                       },
-                                      "required": ["score_range", "outcome"],
+                                      "required": [
+                                        "score_range",
+                                        "outcome"
+                                      ],
                                       "additionalProperties": false
                                     }
                                   }
@@ -9210,7 +10368,10 @@
                               "minItems": 1
                             }
                           },
-                          "required": ["type", "criteria"],
+                          "required": [
+                            "type",
+                            "criteria"
+                          ],
                           "additionalProperties": false
                         }
                       ]
@@ -9247,7 +10408,10 @@
                             },
                             "type": {
                               "type": "string",
-                              "enum": ["code-judge", "code_judge"]
+                              "enum": [
+                                "code-judge",
+                                "code_judge"
+                              ]
                             },
                             "command": {
                               "anyOf": [
@@ -9299,7 +10463,10 @@
                               "additionalProperties": {}
                             }
                           },
-                          "required": ["type", "command"],
+                          "required": [
+                            "type",
+                            "command"
+                          ],
                           "additionalProperties": false
                         },
                         {
@@ -9329,7 +10496,10 @@
                             },
                             "type": {
                               "type": "string",
-                              "enum": ["llm-judge", "llm_judge"]
+                              "enum": [
+                                "llm-judge",
+                                "llm_judge"
+                              ]
                             },
                             "prompt": {
                               "anyOf": [
@@ -9423,7 +10593,10 @@
                                           "minLength": 1
                                         }
                                       },
-                                      "required": ["score_range", "outcome"],
+                                      "required": [
+                                        "score_range",
+                                        "outcome"
+                                      ],
                                       "additionalProperties": false
                                     }
                                   }
@@ -9440,9 +10613,21 @@
                             "config": {
                               "type": "object",
                               "additionalProperties": {}
+                            },
+                            "max_steps": {
+                              "type": "integer",
+                              "minimum": 1,
+                              "maximum": 50
+                            },
+                            "temperature": {
+                              "type": "number",
+                              "minimum": 0,
+                              "maximum": 2
                             }
                           },
-                          "required": ["type"],
+                          "required": [
+                            "type"
+                          ],
                           "additionalProperties": false
                         },
                         {
@@ -9502,7 +10687,9 @@
                                       }
                                     }
                                   },
-                                  "required": ["type"],
+                                  "required": [
+                                    "type"
+                                  ],
                                   "additionalProperties": false
                                 },
                                 {
@@ -9518,7 +10705,10 @@
                                       "maximum": 1
                                     }
                                   },
-                                  "required": ["type", "threshold"],
+                                  "required": [
+                                    "type",
+                                    "threshold"
+                                  ],
                                   "additionalProperties": false
                                 },
                                 {
@@ -9535,7 +10725,10 @@
                                       "type": "string"
                                     }
                                   },
-                                  "required": ["type", "path"],
+                                  "required": [
+                                    "type",
+                                    "path"
+                                  ],
                                   "additionalProperties": false
                                 },
                                 {
@@ -9552,13 +10745,18 @@
                                       "type": "string"
                                     }
                                   },
-                                  "required": ["type"],
+                                  "required": [
+                                    "type"
+                                  ],
                                   "additionalProperties": false
                                 }
                               ]
                             }
                           },
-                          "required": ["type", "aggregator"],
+                          "required": [
+                            "type",
+                            "aggregator"
+                          ],
                           "additionalProperties": false
                         },
                         {
@@ -9588,11 +10786,20 @@
                             },
                             "type": {
                               "type": "string",
-                              "enum": ["tool-trajectory", "tool_trajectory"]
+                              "enum": [
+                                "tool-trajectory",
+                                "tool_trajectory"
+                              ]
                             },
                             "mode": {
                               "type": "string",
-                              "enum": ["any_order", "in_order", "exact", "subset", "superset"]
+                              "enum": [
+                                "any_order",
+                                "in_order",
+                                "exact",
+                                "subset",
+                                "superset"
+                              ]
                             },
                             "minimums": {
                               "type": "object",
@@ -9633,7 +10840,12 @@
                                     "anyOf": [
                                       {
                                         "type": "string",
-                                        "enum": ["exact", "ignore", "subset", "superset"]
+                                        "enum": [
+                                          "exact",
+                                          "ignore",
+                                          "subset",
+                                          "superset"
+                                        ]
                                       },
                                       {
                                         "type": "array",
@@ -9647,7 +10859,12 @@
                                     "anyOf": [
                                       {
                                         "type": "string",
-                                        "enum": ["exact", "ignore", "subset", "superset"]
+                                        "enum": [
+                                          "exact",
+                                          "ignore",
+                                          "subset",
+                                          "superset"
+                                        ]
                                       },
                                       {
                                         "type": "array",
@@ -9658,7 +10875,9 @@
                                     ]
                                   }
                                 },
-                                "required": ["tool"],
+                                "required": [
+                                  "tool"
+                                ],
                                 "additionalProperties": false
                               }
                             },
@@ -9666,7 +10885,12 @@
                               "anyOf": [
                                 {
                                   "type": "string",
-                                  "enum": ["exact", "ignore", "subset", "superset"]
+                                  "enum": [
+                                    "exact",
+                                    "ignore",
+                                    "subset",
+                                    "superset"
+                                  ]
                                 },
                                 {
                                   "type": "array",
@@ -9680,7 +10904,12 @@
                               "anyOf": [
                                 {
                                   "type": "string",
-                                  "enum": ["exact", "ignore", "subset", "superset"]
+                                  "enum": [
+                                    "exact",
+                                    "ignore",
+                                    "subset",
+                                    "superset"
+                                  ]
                                 },
                                 {
                                   "type": "array",
@@ -9691,7 +10920,10 @@
                               ]
                             }
                           },
-                          "required": ["type", "mode"],
+                          "required": [
+                            "type",
+                            "mode"
+                          ],
                           "additionalProperties": false
                         },
                         {
@@ -9721,7 +10953,10 @@
                             },
                             "type": {
                               "type": "string",
-                              "enum": ["field-accuracy", "field_accuracy"]
+                              "enum": [
+                                "field-accuracy",
+                                "field_accuracy"
+                              ]
                             },
                             "fields": {
                               "type": "array",
@@ -9733,7 +10968,11 @@
                                   },
                                   "match": {
                                     "type": "string",
-                                    "enum": ["exact", "numeric_tolerance", "date"]
+                                    "enum": [
+                                      "exact",
+                                      "numeric_tolerance",
+                                      "date"
+                                    ]
                                   },
                                   "required": {
                                     "type": "boolean"
@@ -9755,17 +10994,26 @@
                                     }
                                   }
                                 },
-                                "required": ["path", "match"],
+                                "required": [
+                                  "path",
+                                  "match"
+                                ],
                                 "additionalProperties": false
                               },
                               "minItems": 1
                             },
                             "aggregation": {
                               "type": "string",
-                              "enum": ["weighted_average", "all_or_nothing"]
+                              "enum": [
+                                "weighted_average",
+                                "all_or_nothing"
+                              ]
                             }
                           },
-                          "required": ["type", "fields"],
+                          "required": [
+                            "type",
+                            "fields"
+                          ],
                           "additionalProperties": false
                         },
                         {
@@ -9802,7 +11050,10 @@
                               "minimum": 0
                             }
                           },
-                          "required": ["type", "threshold"],
+                          "required": [
+                            "type",
+                            "threshold"
+                          ],
                           "additionalProperties": false
                         },
                         {
@@ -9839,7 +11090,10 @@
                               "minimum": 0
                             }
                           },
-                          "required": ["type", "budget"],
+                          "required": [
+                            "type",
+                            "budget"
+                          ],
                           "additionalProperties": false
                         },
                         {
@@ -9869,7 +11123,10 @@
                             },
                             "type": {
                               "type": "string",
-                              "enum": ["token-usage", "token_usage"]
+                              "enum": [
+                                "token-usage",
+                                "token_usage"
+                              ]
                             },
                             "max_total": {
                               "type": "number",
@@ -9884,7 +11141,9 @@
                               "minimum": 0
                             }
                           },
-                          "required": ["type"],
+                          "required": [
+                            "type"
+                          ],
                           "additionalProperties": false
                         },
                         {
@@ -9914,7 +11173,10 @@
                             },
                             "type": {
                               "type": "string",
-                              "enum": ["execution-metrics", "execution_metrics"]
+                              "enum": [
+                                "execution-metrics",
+                                "execution_metrics"
+                              ]
                             },
                             "max_tool_calls": {
                               "type": "number",
@@ -9946,7 +11208,9 @@
                               "minimum": 0
                             }
                           },
-                          "required": ["type"],
+                          "required": [
+                            "type"
+                          ],
                           "additionalProperties": false
                         },
                         {
@@ -9976,7 +11240,10 @@
                             },
                             "type": {
                               "type": "string",
-                              "enum": ["agent-judge", "agent_judge"]
+                              "enum": [
+                                "agent-judge",
+                                "agent_judge"
+                              ]
                             },
                             "prompt": {
                               "type": "string"
@@ -10030,7 +11297,10 @@
                                           "minLength": 1
                                         }
                                       },
-                                      "required": ["score_range", "outcome"],
+                                      "required": [
+                                        "score_range",
+                                        "outcome"
+                                      ],
                                       "additionalProperties": false
                                     }
                                   }
@@ -10052,7 +11322,9 @@
                               "type": "string"
                             }
                           },
-                          "required": ["type"],
+                          "required": [
+                            "type"
+                          ],
                           "additionalProperties": false
                         },
                         {
@@ -10088,7 +11360,10 @@
                               "type": "string"
                             }
                           },
-                          "required": ["type", "value"],
+                          "required": [
+                            "type",
+                            "value"
+                          ],
                           "additionalProperties": false
                         },
                         {
@@ -10124,7 +11399,10 @@
                               "type": "string"
                             }
                           },
-                          "required": ["type", "value"],
+                          "required": [
+                            "type",
+                            "value"
+                          ],
                           "additionalProperties": false
                         },
                         {
@@ -10154,10 +11432,15 @@
                             },
                             "type": {
                               "type": "string",
-                              "enum": ["is-json", "is_json"]
+                              "enum": [
+                                "is-json",
+                                "is_json"
+                              ]
                             }
                           },
-                          "required": ["type"],
+                          "required": [
+                            "type"
+                          ],
                           "additionalProperties": false
                         },
                         {
@@ -10193,7 +11476,10 @@
                               "type": "string"
                             }
                           },
-                          "required": ["type", "value"],
+                          "required": [
+                            "type",
+                            "value"
+                          ],
                           "additionalProperties": false
                         },
                         {
@@ -10274,7 +11560,10 @@
                                           "minLength": 1
                                         }
                                       },
-                                      "required": ["score_range", "outcome"],
+                                      "required": [
+                                        "score_range",
+                                        "outcome"
+                                      ],
                                       "additionalProperties": false
                                     }
                                   }
@@ -10284,7 +11573,10 @@
                               "minItems": 1
                             }
                           },
-                          "required": ["type", "criteria"],
+                          "required": [
+                            "type",
+                            "criteria"
+                          ],
                           "additionalProperties": false
                         }
                       ]
@@ -10333,7 +11625,10 @@
                                 },
                                 "type": {
                                   "type": "string",
-                                  "enum": ["code-judge", "code_judge"]
+                                  "enum": [
+                                    "code-judge",
+                                    "code_judge"
+                                  ]
                                 },
                                 "command": {
                                   "anyOf": [
@@ -10385,7 +11680,10 @@
                                   "additionalProperties": {}
                                 }
                               },
-                              "required": ["type", "command"],
+                              "required": [
+                                "type",
+                                "command"
+                              ],
                               "additionalProperties": false
                             },
                             {
@@ -10415,7 +11713,10 @@
                                 },
                                 "type": {
                                   "type": "string",
-                                  "enum": ["llm-judge", "llm_judge"]
+                                  "enum": [
+                                    "llm-judge",
+                                    "llm_judge"
+                                  ]
                                 },
                                 "prompt": {
                                   "anyOf": [
@@ -10509,7 +11810,10 @@
                                               "minLength": 1
                                             }
                                           },
-                                          "required": ["score_range", "outcome"],
+                                          "required": [
+                                            "score_range",
+                                            "outcome"
+                                          ],
                                           "additionalProperties": false
                                         }
                                       }
@@ -10526,9 +11830,21 @@
                                 "config": {
                                   "type": "object",
                                   "additionalProperties": {}
+                                },
+                                "max_steps": {
+                                  "type": "integer",
+                                  "minimum": 1,
+                                  "maximum": 50
+                                },
+                                "temperature": {
+                                  "type": "number",
+                                  "minimum": 0,
+                                  "maximum": 2
                                 }
                               },
-                              "required": ["type"],
+                              "required": [
+                                "type"
+                              ],
                               "additionalProperties": false
                             },
                             {
@@ -10588,7 +11904,9 @@
                                           }
                                         }
                                       },
-                                      "required": ["type"],
+                                      "required": [
+                                        "type"
+                                      ],
                                       "additionalProperties": false
                                     },
                                     {
@@ -10604,7 +11922,10 @@
                                           "maximum": 1
                                         }
                                       },
-                                      "required": ["type", "threshold"],
+                                      "required": [
+                                        "type",
+                                        "threshold"
+                                      ],
                                       "additionalProperties": false
                                     },
                                     {
@@ -10621,7 +11942,10 @@
                                           "type": "string"
                                         }
                                       },
-                                      "required": ["type", "path"],
+                                      "required": [
+                                        "type",
+                                        "path"
+                                      ],
                                       "additionalProperties": false
                                     },
                                     {
@@ -10638,13 +11962,18 @@
                                           "type": "string"
                                         }
                                       },
-                                      "required": ["type"],
+                                      "required": [
+                                        "type"
+                                      ],
                                       "additionalProperties": false
                                     }
                                   ]
                                 }
                               },
-                              "required": ["type", "aggregator"],
+                              "required": [
+                                "type",
+                                "aggregator"
+                              ],
                               "additionalProperties": false
                             },
                             {
@@ -10674,11 +12003,20 @@
                                 },
                                 "type": {
                                   "type": "string",
-                                  "enum": ["tool-trajectory", "tool_trajectory"]
+                                  "enum": [
+                                    "tool-trajectory",
+                                    "tool_trajectory"
+                                  ]
                                 },
                                 "mode": {
                                   "type": "string",
-                                  "enum": ["any_order", "in_order", "exact", "subset", "superset"]
+                                  "enum": [
+                                    "any_order",
+                                    "in_order",
+                                    "exact",
+                                    "subset",
+                                    "superset"
+                                  ]
                                 },
                                 "minimums": {
                                   "type": "object",
@@ -10719,7 +12057,12 @@
                                         "anyOf": [
                                           {
                                             "type": "string",
-                                            "enum": ["exact", "ignore", "subset", "superset"]
+                                            "enum": [
+                                              "exact",
+                                              "ignore",
+                                              "subset",
+                                              "superset"
+                                            ]
                                           },
                                           {
                                             "type": "array",
@@ -10733,7 +12076,12 @@
                                         "anyOf": [
                                           {
                                             "type": "string",
-                                            "enum": ["exact", "ignore", "subset", "superset"]
+                                            "enum": [
+                                              "exact",
+                                              "ignore",
+                                              "subset",
+                                              "superset"
+                                            ]
                                           },
                                           {
                                             "type": "array",
@@ -10744,7 +12092,9 @@
                                         ]
                                       }
                                     },
-                                    "required": ["tool"],
+                                    "required": [
+                                      "tool"
+                                    ],
                                     "additionalProperties": false
                                   }
                                 },
@@ -10752,7 +12102,12 @@
                                   "anyOf": [
                                     {
                                       "type": "string",
-                                      "enum": ["exact", "ignore", "subset", "superset"]
+                                      "enum": [
+                                        "exact",
+                                        "ignore",
+                                        "subset",
+                                        "superset"
+                                      ]
                                     },
                                     {
                                       "type": "array",
@@ -10766,7 +12121,12 @@
                                   "anyOf": [
                                     {
                                       "type": "string",
-                                      "enum": ["exact", "ignore", "subset", "superset"]
+                                      "enum": [
+                                        "exact",
+                                        "ignore",
+                                        "subset",
+                                        "superset"
+                                      ]
                                     },
                                     {
                                       "type": "array",
@@ -10777,7 +12137,10 @@
                                   ]
                                 }
                               },
-                              "required": ["type", "mode"],
+                              "required": [
+                                "type",
+                                "mode"
+                              ],
                               "additionalProperties": false
                             },
                             {
@@ -10807,7 +12170,10 @@
                                 },
                                 "type": {
                                   "type": "string",
-                                  "enum": ["field-accuracy", "field_accuracy"]
+                                  "enum": [
+                                    "field-accuracy",
+                                    "field_accuracy"
+                                  ]
                                 },
                                 "fields": {
                                   "type": "array",
@@ -10819,7 +12185,11 @@
                                       },
                                       "match": {
                                         "type": "string",
-                                        "enum": ["exact", "numeric_tolerance", "date"]
+                                        "enum": [
+                                          "exact",
+                                          "numeric_tolerance",
+                                          "date"
+                                        ]
                                       },
                                       "required": {
                                         "type": "boolean"
@@ -10841,17 +12211,26 @@
                                         }
                                       }
                                     },
-                                    "required": ["path", "match"],
+                                    "required": [
+                                      "path",
+                                      "match"
+                                    ],
                                     "additionalProperties": false
                                   },
                                   "minItems": 1
                                 },
                                 "aggregation": {
                                   "type": "string",
-                                  "enum": ["weighted_average", "all_or_nothing"]
+                                  "enum": [
+                                    "weighted_average",
+                                    "all_or_nothing"
+                                  ]
                                 }
                               },
-                              "required": ["type", "fields"],
+                              "required": [
+                                "type",
+                                "fields"
+                              ],
                               "additionalProperties": false
                             },
                             {
@@ -10888,7 +12267,10 @@
                                   "minimum": 0
                                 }
                               },
-                              "required": ["type", "threshold"],
+                              "required": [
+                                "type",
+                                "threshold"
+                              ],
                               "additionalProperties": false
                             },
                             {
@@ -10925,7 +12307,10 @@
                                   "minimum": 0
                                 }
                               },
-                              "required": ["type", "budget"],
+                              "required": [
+                                "type",
+                                "budget"
+                              ],
                               "additionalProperties": false
                             },
                             {
@@ -10955,7 +12340,10 @@
                                 },
                                 "type": {
                                   "type": "string",
-                                  "enum": ["token-usage", "token_usage"]
+                                  "enum": [
+                                    "token-usage",
+                                    "token_usage"
+                                  ]
                                 },
                                 "max_total": {
                                   "type": "number",
@@ -10970,7 +12358,9 @@
                                   "minimum": 0
                                 }
                               },
-                              "required": ["type"],
+                              "required": [
+                                "type"
+                              ],
                               "additionalProperties": false
                             },
                             {
@@ -11000,7 +12390,10 @@
                                 },
                                 "type": {
                                   "type": "string",
-                                  "enum": ["execution-metrics", "execution_metrics"]
+                                  "enum": [
+                                    "execution-metrics",
+                                    "execution_metrics"
+                                  ]
                                 },
                                 "max_tool_calls": {
                                   "type": "number",
@@ -11032,7 +12425,9 @@
                                   "minimum": 0
                                 }
                               },
-                              "required": ["type"],
+                              "required": [
+                                "type"
+                              ],
                               "additionalProperties": false
                             },
                             {
@@ -11062,7 +12457,10 @@
                                 },
                                 "type": {
                                   "type": "string",
-                                  "enum": ["agent-judge", "agent_judge"]
+                                  "enum": [
+                                    "agent-judge",
+                                    "agent_judge"
+                                  ]
                                 },
                                 "prompt": {
                                   "type": "string"
@@ -11116,7 +12514,10 @@
                                               "minLength": 1
                                             }
                                           },
-                                          "required": ["score_range", "outcome"],
+                                          "required": [
+                                            "score_range",
+                                            "outcome"
+                                          ],
                                           "additionalProperties": false
                                         }
                                       }
@@ -11138,7 +12539,9 @@
                                   "type": "string"
                                 }
                               },
-                              "required": ["type"],
+                              "required": [
+                                "type"
+                              ],
                               "additionalProperties": false
                             },
                             {
@@ -11174,7 +12577,10 @@
                                   "type": "string"
                                 }
                               },
-                              "required": ["type", "value"],
+                              "required": [
+                                "type",
+                                "value"
+                              ],
                               "additionalProperties": false
                             },
                             {
@@ -11210,7 +12616,10 @@
                                   "type": "string"
                                 }
                               },
-                              "required": ["type", "value"],
+                              "required": [
+                                "type",
+                                "value"
+                              ],
                               "additionalProperties": false
                             },
                             {
@@ -11240,10 +12649,15 @@
                                 },
                                 "type": {
                                   "type": "string",
-                                  "enum": ["is-json", "is_json"]
+                                  "enum": [
+                                    "is-json",
+                                    "is_json"
+                                  ]
                                 }
                               },
-                              "required": ["type"],
+                              "required": [
+                                "type"
+                              ],
                               "additionalProperties": false
                             },
                             {
@@ -11279,7 +12693,10 @@
                                   "type": "string"
                                 }
                               },
-                              "required": ["type", "value"],
+                              "required": [
+                                "type",
+                                "value"
+                              ],
                               "additionalProperties": false
                             },
                             {
@@ -11360,7 +12777,10 @@
                                               "minLength": 1
                                             }
                                           },
-                                          "required": ["score_range", "outcome"],
+                                          "required": [
+                                            "score_range",
+                                            "outcome"
+                                          ],
                                           "additionalProperties": false
                                         }
                                       }
@@ -11370,7 +12790,10 @@
                                   "minItems": 1
                                 }
                               },
-                              "required": ["type", "criteria"],
+                              "required": [
+                                "type",
+                                "criteria"
+                              ],
                               "additionalProperties": false
                             }
                           ]
@@ -11407,7 +12830,10 @@
                                 },
                                 "type": {
                                   "type": "string",
-                                  "enum": ["code-judge", "code_judge"]
+                                  "enum": [
+                                    "code-judge",
+                                    "code_judge"
+                                  ]
                                 },
                                 "command": {
                                   "anyOf": [
@@ -11459,7 +12885,10 @@
                                   "additionalProperties": {}
                                 }
                               },
-                              "required": ["type", "command"],
+                              "required": [
+                                "type",
+                                "command"
+                              ],
                               "additionalProperties": false
                             },
                             {
@@ -11489,7 +12918,10 @@
                                 },
                                 "type": {
                                   "type": "string",
-                                  "enum": ["llm-judge", "llm_judge"]
+                                  "enum": [
+                                    "llm-judge",
+                                    "llm_judge"
+                                  ]
                                 },
                                 "prompt": {
                                   "anyOf": [
@@ -11583,7 +13015,10 @@
                                               "minLength": 1
                                             }
                                           },
-                                          "required": ["score_range", "outcome"],
+                                          "required": [
+                                            "score_range",
+                                            "outcome"
+                                          ],
                                           "additionalProperties": false
                                         }
                                       }
@@ -11600,9 +13035,21 @@
                                 "config": {
                                   "type": "object",
                                   "additionalProperties": {}
+                                },
+                                "max_steps": {
+                                  "type": "integer",
+                                  "minimum": 1,
+                                  "maximum": 50
+                                },
+                                "temperature": {
+                                  "type": "number",
+                                  "minimum": 0,
+                                  "maximum": 2
                                 }
                               },
-                              "required": ["type"],
+                              "required": [
+                                "type"
+                              ],
                               "additionalProperties": false
                             },
                             {
@@ -11662,7 +13109,9 @@
                                           }
                                         }
                                       },
-                                      "required": ["type"],
+                                      "required": [
+                                        "type"
+                                      ],
                                       "additionalProperties": false
                                     },
                                     {
@@ -11678,7 +13127,10 @@
                                           "maximum": 1
                                         }
                                       },
-                                      "required": ["type", "threshold"],
+                                      "required": [
+                                        "type",
+                                        "threshold"
+                                      ],
                                       "additionalProperties": false
                                     },
                                     {
@@ -11695,7 +13147,10 @@
                                           "type": "string"
                                         }
                                       },
-                                      "required": ["type", "path"],
+                                      "required": [
+                                        "type",
+                                        "path"
+                                      ],
                                       "additionalProperties": false
                                     },
                                     {
@@ -11712,13 +13167,18 @@
                                           "type": "string"
                                         }
                                       },
-                                      "required": ["type"],
+                                      "required": [
+                                        "type"
+                                      ],
                                       "additionalProperties": false
                                     }
                                   ]
                                 }
                               },
-                              "required": ["type", "aggregator"],
+                              "required": [
+                                "type",
+                                "aggregator"
+                              ],
                               "additionalProperties": false
                             },
                             {
@@ -11748,11 +13208,20 @@
                                 },
                                 "type": {
                                   "type": "string",
-                                  "enum": ["tool-trajectory", "tool_trajectory"]
+                                  "enum": [
+                                    "tool-trajectory",
+                                    "tool_trajectory"
+                                  ]
                                 },
                                 "mode": {
                                   "type": "string",
-                                  "enum": ["any_order", "in_order", "exact", "subset", "superset"]
+                                  "enum": [
+                                    "any_order",
+                                    "in_order",
+                                    "exact",
+                                    "subset",
+                                    "superset"
+                                  ]
                                 },
                                 "minimums": {
                                   "type": "object",
@@ -11793,7 +13262,12 @@
                                         "anyOf": [
                                           {
                                             "type": "string",
-                                            "enum": ["exact", "ignore", "subset", "superset"]
+                                            "enum": [
+                                              "exact",
+                                              "ignore",
+                                              "subset",
+                                              "superset"
+                                            ]
                                           },
                                           {
                                             "type": "array",
@@ -11807,7 +13281,12 @@
                                         "anyOf": [
                                           {
                                             "type": "string",
-                                            "enum": ["exact", "ignore", "subset", "superset"]
+                                            "enum": [
+                                              "exact",
+                                              "ignore",
+                                              "subset",
+                                              "superset"
+                                            ]
                                           },
                                           {
                                             "type": "array",
@@ -11818,7 +13297,9 @@
                                         ]
                                       }
                                     },
-                                    "required": ["tool"],
+                                    "required": [
+                                      "tool"
+                                    ],
                                     "additionalProperties": false
                                   }
                                 },
@@ -11826,7 +13307,12 @@
                                   "anyOf": [
                                     {
                                       "type": "string",
-                                      "enum": ["exact", "ignore", "subset", "superset"]
+                                      "enum": [
+                                        "exact",
+                                        "ignore",
+                                        "subset",
+                                        "superset"
+                                      ]
                                     },
                                     {
                                       "type": "array",
@@ -11840,7 +13326,12 @@
                                   "anyOf": [
                                     {
                                       "type": "string",
-                                      "enum": ["exact", "ignore", "subset", "superset"]
+                                      "enum": [
+                                        "exact",
+                                        "ignore",
+                                        "subset",
+                                        "superset"
+                                      ]
                                     },
                                     {
                                       "type": "array",
@@ -11851,7 +13342,10 @@
                                   ]
                                 }
                               },
-                              "required": ["type", "mode"],
+                              "required": [
+                                "type",
+                                "mode"
+                              ],
                               "additionalProperties": false
                             },
                             {
@@ -11881,7 +13375,10 @@
                                 },
                                 "type": {
                                   "type": "string",
-                                  "enum": ["field-accuracy", "field_accuracy"]
+                                  "enum": [
+                                    "field-accuracy",
+                                    "field_accuracy"
+                                  ]
                                 },
                                 "fields": {
                                   "type": "array",
@@ -11893,7 +13390,11 @@
                                       },
                                       "match": {
                                         "type": "string",
-                                        "enum": ["exact", "numeric_tolerance", "date"]
+                                        "enum": [
+                                          "exact",
+                                          "numeric_tolerance",
+                                          "date"
+                                        ]
                                       },
                                       "required": {
                                         "type": "boolean"
@@ -11915,17 +13416,26 @@
                                         }
                                       }
                                     },
-                                    "required": ["path", "match"],
+                                    "required": [
+                                      "path",
+                                      "match"
+                                    ],
                                     "additionalProperties": false
                                   },
                                   "minItems": 1
                                 },
                                 "aggregation": {
                                   "type": "string",
-                                  "enum": ["weighted_average", "all_or_nothing"]
+                                  "enum": [
+                                    "weighted_average",
+                                    "all_or_nothing"
+                                  ]
                                 }
                               },
-                              "required": ["type", "fields"],
+                              "required": [
+                                "type",
+                                "fields"
+                              ],
                               "additionalProperties": false
                             },
                             {
@@ -11962,7 +13472,10 @@
                                   "minimum": 0
                                 }
                               },
-                              "required": ["type", "threshold"],
+                              "required": [
+                                "type",
+                                "threshold"
+                              ],
                               "additionalProperties": false
                             },
                             {
@@ -11999,7 +13512,10 @@
                                   "minimum": 0
                                 }
                               },
-                              "required": ["type", "budget"],
+                              "required": [
+                                "type",
+                                "budget"
+                              ],
                               "additionalProperties": false
                             },
                             {
@@ -12029,7 +13545,10 @@
                                 },
                                 "type": {
                                   "type": "string",
-                                  "enum": ["token-usage", "token_usage"]
+                                  "enum": [
+                                    "token-usage",
+                                    "token_usage"
+                                  ]
                                 },
                                 "max_total": {
                                   "type": "number",
@@ -12044,7 +13563,9 @@
                                   "minimum": 0
                                 }
                               },
-                              "required": ["type"],
+                              "required": [
+                                "type"
+                              ],
                               "additionalProperties": false
                             },
                             {
@@ -12074,7 +13595,10 @@
                                 },
                                 "type": {
                                   "type": "string",
-                                  "enum": ["execution-metrics", "execution_metrics"]
+                                  "enum": [
+                                    "execution-metrics",
+                                    "execution_metrics"
+                                  ]
                                 },
                                 "max_tool_calls": {
                                   "type": "number",
@@ -12106,7 +13630,9 @@
                                   "minimum": 0
                                 }
                               },
-                              "required": ["type"],
+                              "required": [
+                                "type"
+                              ],
                               "additionalProperties": false
                             },
                             {
@@ -12136,7 +13662,10 @@
                                 },
                                 "type": {
                                   "type": "string",
-                                  "enum": ["agent-judge", "agent_judge"]
+                                  "enum": [
+                                    "agent-judge",
+                                    "agent_judge"
+                                  ]
                                 },
                                 "prompt": {
                                   "type": "string"
@@ -12190,7 +13719,10 @@
                                               "minLength": 1
                                             }
                                           },
-                                          "required": ["score_range", "outcome"],
+                                          "required": [
+                                            "score_range",
+                                            "outcome"
+                                          ],
                                           "additionalProperties": false
                                         }
                                       }
@@ -12212,7 +13744,9 @@
                                   "type": "string"
                                 }
                               },
-                              "required": ["type"],
+                              "required": [
+                                "type"
+                              ],
                               "additionalProperties": false
                             },
                             {
@@ -12248,7 +13782,10 @@
                                   "type": "string"
                                 }
                               },
-                              "required": ["type", "value"],
+                              "required": [
+                                "type",
+                                "value"
+                              ],
                               "additionalProperties": false
                             },
                             {
@@ -12284,7 +13821,10 @@
                                   "type": "string"
                                 }
                               },
-                              "required": ["type", "value"],
+                              "required": [
+                                "type",
+                                "value"
+                              ],
                               "additionalProperties": false
                             },
                             {
@@ -12314,10 +13854,15 @@
                                 },
                                 "type": {
                                   "type": "string",
-                                  "enum": ["is-json", "is_json"]
+                                  "enum": [
+                                    "is-json",
+                                    "is_json"
+                                  ]
                                 }
                               },
-                              "required": ["type"],
+                              "required": [
+                                "type"
+                              ],
                               "additionalProperties": false
                             },
                             {
@@ -12353,7 +13898,10 @@
                                   "type": "string"
                                 }
                               },
-                              "required": ["type", "value"],
+                              "required": [
+                                "type",
+                                "value"
+                              ],
                               "additionalProperties": false
                             },
                             {
@@ -12434,7 +13982,10 @@
                                               "minLength": 1
                                             }
                                           },
-                                          "required": ["score_range", "outcome"],
+                                          "required": [
+                                            "score_range",
+                                            "outcome"
+                                          ],
                                           "additionalProperties": false
                                         }
                                       }
@@ -12444,7 +13995,10 @@
                                   "minItems": 1
                                 }
                               },
-                              "required": ["type", "criteria"],
+                              "required": [
+                                "type",
+                                "criteria"
+                              ],
                               "additionalProperties": false
                             }
                           ]
@@ -12481,7 +14035,10 @@
                                 },
                                 "type": {
                                   "type": "string",
-                                  "enum": ["code-judge", "code_judge"]
+                                  "enum": [
+                                    "code-judge",
+                                    "code_judge"
+                                  ]
                                 },
                                 "command": {
                                   "anyOf": [
@@ -12533,7 +14090,10 @@
                                   "additionalProperties": {}
                                 }
                               },
-                              "required": ["type", "command"],
+                              "required": [
+                                "type",
+                                "command"
+                              ],
                               "additionalProperties": false
                             },
                             {
@@ -12563,7 +14123,10 @@
                                 },
                                 "type": {
                                   "type": "string",
-                                  "enum": ["llm-judge", "llm_judge"]
+                                  "enum": [
+                                    "llm-judge",
+                                    "llm_judge"
+                                  ]
                                 },
                                 "prompt": {
                                   "anyOf": [
@@ -12657,7 +14220,10 @@
                                               "minLength": 1
                                             }
                                           },
-                                          "required": ["score_range", "outcome"],
+                                          "required": [
+                                            "score_range",
+                                            "outcome"
+                                          ],
                                           "additionalProperties": false
                                         }
                                       }
@@ -12674,9 +14240,21 @@
                                 "config": {
                                   "type": "object",
                                   "additionalProperties": {}
+                                },
+                                "max_steps": {
+                                  "type": "integer",
+                                  "minimum": 1,
+                                  "maximum": 50
+                                },
+                                "temperature": {
+                                  "type": "number",
+                                  "minimum": 0,
+                                  "maximum": 2
                                 }
                               },
-                              "required": ["type"],
+                              "required": [
+                                "type"
+                              ],
                               "additionalProperties": false
                             },
                             {
@@ -12736,7 +14314,9 @@
                                           }
                                         }
                                       },
-                                      "required": ["type"],
+                                      "required": [
+                                        "type"
+                                      ],
                                       "additionalProperties": false
                                     },
                                     {
@@ -12752,7 +14332,10 @@
                                           "maximum": 1
                                         }
                                       },
-                                      "required": ["type", "threshold"],
+                                      "required": [
+                                        "type",
+                                        "threshold"
+                                      ],
                                       "additionalProperties": false
                                     },
                                     {
@@ -12769,7 +14352,10 @@
                                           "type": "string"
                                         }
                                       },
-                                      "required": ["type", "path"],
+                                      "required": [
+                                        "type",
+                                        "path"
+                                      ],
                                       "additionalProperties": false
                                     },
                                     {
@@ -12786,13 +14372,18 @@
                                           "type": "string"
                                         }
                                       },
-                                      "required": ["type"],
+                                      "required": [
+                                        "type"
+                                      ],
                                       "additionalProperties": false
                                     }
                                   ]
                                 }
                               },
-                              "required": ["type", "aggregator"],
+                              "required": [
+                                "type",
+                                "aggregator"
+                              ],
                               "additionalProperties": false
                             },
                             {
@@ -12822,11 +14413,20 @@
                                 },
                                 "type": {
                                   "type": "string",
-                                  "enum": ["tool-trajectory", "tool_trajectory"]
+                                  "enum": [
+                                    "tool-trajectory",
+                                    "tool_trajectory"
+                                  ]
                                 },
                                 "mode": {
                                   "type": "string",
-                                  "enum": ["any_order", "in_order", "exact", "subset", "superset"]
+                                  "enum": [
+                                    "any_order",
+                                    "in_order",
+                                    "exact",
+                                    "subset",
+                                    "superset"
+                                  ]
                                 },
                                 "minimums": {
                                   "type": "object",
@@ -12867,7 +14467,12 @@
                                         "anyOf": [
                                           {
                                             "type": "string",
-                                            "enum": ["exact", "ignore", "subset", "superset"]
+                                            "enum": [
+                                              "exact",
+                                              "ignore",
+                                              "subset",
+                                              "superset"
+                                            ]
                                           },
                                           {
                                             "type": "array",
@@ -12881,7 +14486,12 @@
                                         "anyOf": [
                                           {
                                             "type": "string",
-                                            "enum": ["exact", "ignore", "subset", "superset"]
+                                            "enum": [
+                                              "exact",
+                                              "ignore",
+                                              "subset",
+                                              "superset"
+                                            ]
                                           },
                                           {
                                             "type": "array",
@@ -12892,7 +14502,9 @@
                                         ]
                                       }
                                     },
-                                    "required": ["tool"],
+                                    "required": [
+                                      "tool"
+                                    ],
                                     "additionalProperties": false
                                   }
                                 },
@@ -12900,7 +14512,12 @@
                                   "anyOf": [
                                     {
                                       "type": "string",
-                                      "enum": ["exact", "ignore", "subset", "superset"]
+                                      "enum": [
+                                        "exact",
+                                        "ignore",
+                                        "subset",
+                                        "superset"
+                                      ]
                                     },
                                     {
                                       "type": "array",
@@ -12914,7 +14531,12 @@
                                   "anyOf": [
                                     {
                                       "type": "string",
-                                      "enum": ["exact", "ignore", "subset", "superset"]
+                                      "enum": [
+                                        "exact",
+                                        "ignore",
+                                        "subset",
+                                        "superset"
+                                      ]
                                     },
                                     {
                                       "type": "array",
@@ -12925,7 +14547,10 @@
                                   ]
                                 }
                               },
-                              "required": ["type", "mode"],
+                              "required": [
+                                "type",
+                                "mode"
+                              ],
                               "additionalProperties": false
                             },
                             {
@@ -12955,7 +14580,10 @@
                                 },
                                 "type": {
                                   "type": "string",
-                                  "enum": ["field-accuracy", "field_accuracy"]
+                                  "enum": [
+                                    "field-accuracy",
+                                    "field_accuracy"
+                                  ]
                                 },
                                 "fields": {
                                   "type": "array",
@@ -12967,7 +14595,11 @@
                                       },
                                       "match": {
                                         "type": "string",
-                                        "enum": ["exact", "numeric_tolerance", "date"]
+                                        "enum": [
+                                          "exact",
+                                          "numeric_tolerance",
+                                          "date"
+                                        ]
                                       },
                                       "required": {
                                         "type": "boolean"
@@ -12989,17 +14621,26 @@
                                         }
                                       }
                                     },
-                                    "required": ["path", "match"],
+                                    "required": [
+                                      "path",
+                                      "match"
+                                    ],
                                     "additionalProperties": false
                                   },
                                   "minItems": 1
                                 },
                                 "aggregation": {
                                   "type": "string",
-                                  "enum": ["weighted_average", "all_or_nothing"]
+                                  "enum": [
+                                    "weighted_average",
+                                    "all_or_nothing"
+                                  ]
                                 }
                               },
-                              "required": ["type", "fields"],
+                              "required": [
+                                "type",
+                                "fields"
+                              ],
                               "additionalProperties": false
                             },
                             {
@@ -13036,7 +14677,10 @@
                                   "minimum": 0
                                 }
                               },
-                              "required": ["type", "threshold"],
+                              "required": [
+                                "type",
+                                "threshold"
+                              ],
                               "additionalProperties": false
                             },
                             {
@@ -13073,7 +14717,10 @@
                                   "minimum": 0
                                 }
                               },
-                              "required": ["type", "budget"],
+                              "required": [
+                                "type",
+                                "budget"
+                              ],
                               "additionalProperties": false
                             },
                             {
@@ -13103,7 +14750,10 @@
                                 },
                                 "type": {
                                   "type": "string",
-                                  "enum": ["token-usage", "token_usage"]
+                                  "enum": [
+                                    "token-usage",
+                                    "token_usage"
+                                  ]
                                 },
                                 "max_total": {
                                   "type": "number",
@@ -13118,7 +14768,9 @@
                                   "minimum": 0
                                 }
                               },
-                              "required": ["type"],
+                              "required": [
+                                "type"
+                              ],
                               "additionalProperties": false
                             },
                             {
@@ -13148,7 +14800,10 @@
                                 },
                                 "type": {
                                   "type": "string",
-                                  "enum": ["execution-metrics", "execution_metrics"]
+                                  "enum": [
+                                    "execution-metrics",
+                                    "execution_metrics"
+                                  ]
                                 },
                                 "max_tool_calls": {
                                   "type": "number",
@@ -13180,7 +14835,9 @@
                                   "minimum": 0
                                 }
                               },
-                              "required": ["type"],
+                              "required": [
+                                "type"
+                              ],
                               "additionalProperties": false
                             },
                             {
@@ -13210,7 +14867,10 @@
                                 },
                                 "type": {
                                   "type": "string",
-                                  "enum": ["agent-judge", "agent_judge"]
+                                  "enum": [
+                                    "agent-judge",
+                                    "agent_judge"
+                                  ]
                                 },
                                 "prompt": {
                                   "type": "string"
@@ -13264,7 +14924,10 @@
                                               "minLength": 1
                                             }
                                           },
-                                          "required": ["score_range", "outcome"],
+                                          "required": [
+                                            "score_range",
+                                            "outcome"
+                                          ],
                                           "additionalProperties": false
                                         }
                                       }
@@ -13286,7 +14949,9 @@
                                   "type": "string"
                                 }
                               },
-                              "required": ["type"],
+                              "required": [
+                                "type"
+                              ],
                               "additionalProperties": false
                             },
                             {
@@ -13322,7 +14987,10 @@
                                   "type": "string"
                                 }
                               },
-                              "required": ["type", "value"],
+                              "required": [
+                                "type",
+                                "value"
+                              ],
                               "additionalProperties": false
                             },
                             {
@@ -13358,7 +15026,10 @@
                                   "type": "string"
                                 }
                               },
-                              "required": ["type", "value"],
+                              "required": [
+                                "type",
+                                "value"
+                              ],
                               "additionalProperties": false
                             },
                             {
@@ -13388,10 +15059,15 @@
                                 },
                                 "type": {
                                   "type": "string",
-                                  "enum": ["is-json", "is_json"]
+                                  "enum": [
+                                    "is-json",
+                                    "is_json"
+                                  ]
                                 }
                               },
-                              "required": ["type"],
+                              "required": [
+                                "type"
+                              ],
                               "additionalProperties": false
                             },
                             {
@@ -13427,7 +15103,10 @@
                                   "type": "string"
                                 }
                               },
-                              "required": ["type", "value"],
+                              "required": [
+                                "type",
+                                "value"
+                              ],
                               "additionalProperties": false
                             },
                             {
@@ -13508,7 +15187,10 @@
                                               "minLength": 1
                                             }
                                           },
-                                          "required": ["score_range", "outcome"],
+                                          "required": [
+                                            "score_range",
+                                            "outcome"
+                                          ],
                                           "additionalProperties": false
                                         }
                                       }
@@ -13518,7 +15200,10 @@
                                   "minItems": 1
                                 }
                               },
-                              "required": ["type", "criteria"],
+                              "required": [
+                                "type",
+                                "criteria"
+                              ],
                               "additionalProperties": false
                             }
                           ]
@@ -13539,7 +15224,11 @@
                           },
                           "strategy": {
                             "type": "string",
-                            "enum": ["pass_at_k", "mean", "confidence_interval"]
+                            "enum": [
+                              "pass_at_k",
+                              "mean",
+                              "confidence_interval"
+                            ]
                           },
                           "cost_limit_usd": {
                             "type": "number",
@@ -13550,7 +15239,9 @@
                             "minimum": 0
                           }
                         },
-                        "required": ["count"],
+                        "required": [
+                          "count"
+                        ],
                         "additionalProperties": false
                       },
                       "total_budget_usd": {
@@ -13578,7 +15269,10 @@
                       },
                       "isolation": {
                         "type": "string",
-                        "enum": ["shared", "per_test"]
+                        "enum": [
+                          "shared",
+                          "per_test"
+                        ]
                       },
                       "repos": {
                         "type": "array",
@@ -13602,7 +15296,10 @@
                                       "format": "uri"
                                     }
                                   },
-                                  "required": ["type", "url"],
+                                  "required": [
+                                    "type",
+                                    "url"
+                                  ],
                                   "additionalProperties": false
                                 },
                                 {
@@ -13616,7 +15313,10 @@
                                       "type": "string"
                                     }
                                   },
-                                  "required": ["type", "path"],
+                                  "required": [
+                                    "type",
+                                    "path"
+                                  ],
                                   "additionalProperties": false
                                 }
                               ]
@@ -13629,7 +15329,10 @@
                                 },
                                 "resolve": {
                                   "type": "string",
-                                  "enum": ["remote", "local"]
+                                  "enum": [
+                                    "remote",
+                                    "local"
+                                  ]
                                 },
                                 "ancestor": {
                                   "type": "integer",
@@ -13658,7 +15361,10 @@
                               "additionalProperties": false
                             }
                           },
-                          "required": ["path", "source"],
+                          "required": [
+                            "path",
+                            "source"
+                          ],
                           "additionalProperties": false
                         }
                       },
@@ -13694,7 +15400,11 @@
                               },
                               "reset": {
                                 "type": "string",
-                                "enum": ["none", "fast", "strict"]
+                                "enum": [
+                                  "none",
+                                  "fast",
+                                  "strict"
+                                ]
                               }
                             },
                             "additionalProperties": false
@@ -13725,7 +15435,11 @@
                               },
                               "reset": {
                                 "type": "string",
-                                "enum": ["none", "fast", "strict"]
+                                "enum": [
+                                  "none",
+                                  "fast",
+                                  "strict"
+                                ]
                               }
                             },
                             "additionalProperties": false
@@ -13756,7 +15470,11 @@
                               },
                               "reset": {
                                 "type": "string",
-                                "enum": ["none", "fast", "strict"]
+                                "enum": [
+                                  "none",
+                                  "fast",
+                                  "strict"
+                                ]
                               }
                             },
                             "additionalProperties": false
@@ -13787,7 +15505,11 @@
                               },
                               "reset": {
                                 "type": "string",
-                                "enum": ["none", "fast", "strict"]
+                                "enum": [
+                                  "none",
+                                  "fast",
+                                  "strict"
+                                ]
                               }
                             },
                             "additionalProperties": false
@@ -13797,7 +15519,11 @@
                       },
                       "mode": {
                         "type": "string",
-                        "enum": ["pooled", "temp", "static"]
+                        "enum": [
+                          "pooled",
+                          "temp",
+                          "static"
+                        ]
                       },
                       "path": {
                         "type": "string"
@@ -13819,7 +15545,9 @@
                     "type": "string"
                   }
                 },
-                "required": ["id"],
+                "required": [
+                  "id"
+                ],
                 "additionalProperties": false
               }
             },
@@ -13874,7 +15602,10 @@
                       },
                       "type": {
                         "type": "string",
-                        "enum": ["code-judge", "code_judge"]
+                        "enum": [
+                          "code-judge",
+                          "code_judge"
+                        ]
                       },
                       "command": {
                         "anyOf": [
@@ -13926,7 +15657,10 @@
                         "additionalProperties": {}
                       }
                     },
-                    "required": ["type", "command"],
+                    "required": [
+                      "type",
+                      "command"
+                    ],
                     "additionalProperties": false
                   },
                   {
@@ -13956,7 +15690,10 @@
                       },
                       "type": {
                         "type": "string",
-                        "enum": ["llm-judge", "llm_judge"]
+                        "enum": [
+                          "llm-judge",
+                          "llm_judge"
+                        ]
                       },
                       "prompt": {
                         "anyOf": [
@@ -14050,7 +15787,10 @@
                                     "minLength": 1
                                   }
                                 },
-                                "required": ["score_range", "outcome"],
+                                "required": [
+                                  "score_range",
+                                  "outcome"
+                                ],
                                 "additionalProperties": false
                               }
                             }
@@ -14067,9 +15807,21 @@
                       "config": {
                         "type": "object",
                         "additionalProperties": {}
+                      },
+                      "max_steps": {
+                        "type": "integer",
+                        "minimum": 1,
+                        "maximum": 50
+                      },
+                      "temperature": {
+                        "type": "number",
+                        "minimum": 0,
+                        "maximum": 2
                       }
                     },
-                    "required": ["type"],
+                    "required": [
+                      "type"
+                    ],
                     "additionalProperties": false
                   },
                   {
@@ -14129,7 +15881,9 @@
                                 }
                               }
                             },
-                            "required": ["type"],
+                            "required": [
+                              "type"
+                            ],
                             "additionalProperties": false
                           },
                           {
@@ -14145,7 +15899,10 @@
                                 "maximum": 1
                               }
                             },
-                            "required": ["type", "threshold"],
+                            "required": [
+                              "type",
+                              "threshold"
+                            ],
                             "additionalProperties": false
                           },
                           {
@@ -14162,7 +15919,10 @@
                                 "type": "string"
                               }
                             },
-                            "required": ["type", "path"],
+                            "required": [
+                              "type",
+                              "path"
+                            ],
                             "additionalProperties": false
                           },
                           {
@@ -14179,13 +15939,18 @@
                                 "type": "string"
                               }
                             },
-                            "required": ["type"],
+                            "required": [
+                              "type"
+                            ],
                             "additionalProperties": false
                           }
                         ]
                       }
                     },
-                    "required": ["type", "aggregator"],
+                    "required": [
+                      "type",
+                      "aggregator"
+                    ],
                     "additionalProperties": false
                   },
                   {
@@ -14215,11 +15980,20 @@
                       },
                       "type": {
                         "type": "string",
-                        "enum": ["tool-trajectory", "tool_trajectory"]
+                        "enum": [
+                          "tool-trajectory",
+                          "tool_trajectory"
+                        ]
                       },
                       "mode": {
                         "type": "string",
-                        "enum": ["any_order", "in_order", "exact", "subset", "superset"]
+                        "enum": [
+                          "any_order",
+                          "in_order",
+                          "exact",
+                          "subset",
+                          "superset"
+                        ]
                       },
                       "minimums": {
                         "type": "object",
@@ -14260,7 +16034,12 @@
                               "anyOf": [
                                 {
                                   "type": "string",
-                                  "enum": ["exact", "ignore", "subset", "superset"]
+                                  "enum": [
+                                    "exact",
+                                    "ignore",
+                                    "subset",
+                                    "superset"
+                                  ]
                                 },
                                 {
                                   "type": "array",
@@ -14274,7 +16053,12 @@
                               "anyOf": [
                                 {
                                   "type": "string",
-                                  "enum": ["exact", "ignore", "subset", "superset"]
+                                  "enum": [
+                                    "exact",
+                                    "ignore",
+                                    "subset",
+                                    "superset"
+                                  ]
                                 },
                                 {
                                   "type": "array",
@@ -14285,7 +16069,9 @@
                               ]
                             }
                           },
-                          "required": ["tool"],
+                          "required": [
+                            "tool"
+                          ],
                           "additionalProperties": false
                         }
                       },
@@ -14293,7 +16079,12 @@
                         "anyOf": [
                           {
                             "type": "string",
-                            "enum": ["exact", "ignore", "subset", "superset"]
+                            "enum": [
+                              "exact",
+                              "ignore",
+                              "subset",
+                              "superset"
+                            ]
                           },
                           {
                             "type": "array",
@@ -14307,7 +16098,12 @@
                         "anyOf": [
                           {
                             "type": "string",
-                            "enum": ["exact", "ignore", "subset", "superset"]
+                            "enum": [
+                              "exact",
+                              "ignore",
+                              "subset",
+                              "superset"
+                            ]
                           },
                           {
                             "type": "array",
@@ -14318,7 +16114,10 @@
                         ]
                       }
                     },
-                    "required": ["type", "mode"],
+                    "required": [
+                      "type",
+                      "mode"
+                    ],
                     "additionalProperties": false
                   },
                   {
@@ -14348,7 +16147,10 @@
                       },
                       "type": {
                         "type": "string",
-                        "enum": ["field-accuracy", "field_accuracy"]
+                        "enum": [
+                          "field-accuracy",
+                          "field_accuracy"
+                        ]
                       },
                       "fields": {
                         "type": "array",
@@ -14360,7 +16162,11 @@
                             },
                             "match": {
                               "type": "string",
-                              "enum": ["exact", "numeric_tolerance", "date"]
+                              "enum": [
+                                "exact",
+                                "numeric_tolerance",
+                                "date"
+                              ]
                             },
                             "required": {
                               "type": "boolean"
@@ -14382,17 +16188,26 @@
                               }
                             }
                           },
-                          "required": ["path", "match"],
+                          "required": [
+                            "path",
+                            "match"
+                          ],
                           "additionalProperties": false
                         },
                         "minItems": 1
                       },
                       "aggregation": {
                         "type": "string",
-                        "enum": ["weighted_average", "all_or_nothing"]
+                        "enum": [
+                          "weighted_average",
+                          "all_or_nothing"
+                        ]
                       }
                     },
-                    "required": ["type", "fields"],
+                    "required": [
+                      "type",
+                      "fields"
+                    ],
                     "additionalProperties": false
                   },
                   {
@@ -14429,7 +16244,10 @@
                         "minimum": 0
                       }
                     },
-                    "required": ["type", "threshold"],
+                    "required": [
+                      "type",
+                      "threshold"
+                    ],
                     "additionalProperties": false
                   },
                   {
@@ -14466,7 +16284,10 @@
                         "minimum": 0
                       }
                     },
-                    "required": ["type", "budget"],
+                    "required": [
+                      "type",
+                      "budget"
+                    ],
                     "additionalProperties": false
                   },
                   {
@@ -14496,7 +16317,10 @@
                       },
                       "type": {
                         "type": "string",
-                        "enum": ["token-usage", "token_usage"]
+                        "enum": [
+                          "token-usage",
+                          "token_usage"
+                        ]
                       },
                       "max_total": {
                         "type": "number",
@@ -14511,7 +16335,9 @@
                         "minimum": 0
                       }
                     },
-                    "required": ["type"],
+                    "required": [
+                      "type"
+                    ],
                     "additionalProperties": false
                   },
                   {
@@ -14541,7 +16367,10 @@
                       },
                       "type": {
                         "type": "string",
-                        "enum": ["execution-metrics", "execution_metrics"]
+                        "enum": [
+                          "execution-metrics",
+                          "execution_metrics"
+                        ]
                       },
                       "max_tool_calls": {
                         "type": "number",
@@ -14573,7 +16402,9 @@
                         "minimum": 0
                       }
                     },
-                    "required": ["type"],
+                    "required": [
+                      "type"
+                    ],
                     "additionalProperties": false
                   },
                   {
@@ -14603,7 +16434,10 @@
                       },
                       "type": {
                         "type": "string",
-                        "enum": ["agent-judge", "agent_judge"]
+                        "enum": [
+                          "agent-judge",
+                          "agent_judge"
+                        ]
                       },
                       "prompt": {
                         "type": "string"
@@ -14657,7 +16491,10 @@
                                     "minLength": 1
                                   }
                                 },
-                                "required": ["score_range", "outcome"],
+                                "required": [
+                                  "score_range",
+                                  "outcome"
+                                ],
                                 "additionalProperties": false
                               }
                             }
@@ -14679,7 +16516,9 @@
                         "type": "string"
                       }
                     },
-                    "required": ["type"],
+                    "required": [
+                      "type"
+                    ],
                     "additionalProperties": false
                   },
                   {
@@ -14715,7 +16554,10 @@
                         "type": "string"
                       }
                     },
-                    "required": ["type", "value"],
+                    "required": [
+                      "type",
+                      "value"
+                    ],
                     "additionalProperties": false
                   },
                   {
@@ -14751,7 +16593,10 @@
                         "type": "string"
                       }
                     },
-                    "required": ["type", "value"],
+                    "required": [
+                      "type",
+                      "value"
+                    ],
                     "additionalProperties": false
                   },
                   {
@@ -14781,10 +16626,15 @@
                       },
                       "type": {
                         "type": "string",
-                        "enum": ["is-json", "is_json"]
+                        "enum": [
+                          "is-json",
+                          "is_json"
+                        ]
                       }
                     },
-                    "required": ["type"],
+                    "required": [
+                      "type"
+                    ],
                     "additionalProperties": false
                   },
                   {
@@ -14820,7 +16670,10 @@
                         "type": "string"
                       }
                     },
-                    "required": ["type", "value"],
+                    "required": [
+                      "type",
+                      "value"
+                    ],
                     "additionalProperties": false
                   },
                   {
@@ -14901,7 +16754,10 @@
                                     "minLength": 1
                                   }
                                 },
-                                "required": ["score_range", "outcome"],
+                                "required": [
+                                  "score_range",
+                                  "outcome"
+                                ],
                                 "additionalProperties": false
                               }
                             }
@@ -14911,7 +16767,10 @@
                         "minItems": 1
                       }
                     },
-                    "required": ["type", "criteria"],
+                    "required": [
+                      "type",
+                      "criteria"
+                    ],
                     "additionalProperties": false
                   }
                 ]
@@ -14948,7 +16807,10 @@
                       },
                       "type": {
                         "type": "string",
-                        "enum": ["code-judge", "code_judge"]
+                        "enum": [
+                          "code-judge",
+                          "code_judge"
+                        ]
                       },
                       "command": {
                         "anyOf": [
@@ -15000,7 +16862,10 @@
                         "additionalProperties": {}
                       }
                     },
-                    "required": ["type", "command"],
+                    "required": [
+                      "type",
+                      "command"
+                    ],
                     "additionalProperties": false
                   },
                   {
@@ -15030,7 +16895,10 @@
                       },
                       "type": {
                         "type": "string",
-                        "enum": ["llm-judge", "llm_judge"]
+                        "enum": [
+                          "llm-judge",
+                          "llm_judge"
+                        ]
                       },
                       "prompt": {
                         "anyOf": [
@@ -15124,7 +16992,10 @@
                                     "minLength": 1
                                   }
                                 },
-                                "required": ["score_range", "outcome"],
+                                "required": [
+                                  "score_range",
+                                  "outcome"
+                                ],
                                 "additionalProperties": false
                               }
                             }
@@ -15141,9 +17012,21 @@
                       "config": {
                         "type": "object",
                         "additionalProperties": {}
+                      },
+                      "max_steps": {
+                        "type": "integer",
+                        "minimum": 1,
+                        "maximum": 50
+                      },
+                      "temperature": {
+                        "type": "number",
+                        "minimum": 0,
+                        "maximum": 2
                       }
                     },
-                    "required": ["type"],
+                    "required": [
+                      "type"
+                    ],
                     "additionalProperties": false
                   },
                   {
@@ -15203,7 +17086,9 @@
                                 }
                               }
                             },
-                            "required": ["type"],
+                            "required": [
+                              "type"
+                            ],
                             "additionalProperties": false
                           },
                           {
@@ -15219,7 +17104,10 @@
                                 "maximum": 1
                               }
                             },
-                            "required": ["type", "threshold"],
+                            "required": [
+                              "type",
+                              "threshold"
+                            ],
                             "additionalProperties": false
                           },
                           {
@@ -15236,7 +17124,10 @@
                                 "type": "string"
                               }
                             },
-                            "required": ["type", "path"],
+                            "required": [
+                              "type",
+                              "path"
+                            ],
                             "additionalProperties": false
                           },
                           {
@@ -15253,13 +17144,18 @@
                                 "type": "string"
                               }
                             },
-                            "required": ["type"],
+                            "required": [
+                              "type"
+                            ],
                             "additionalProperties": false
                           }
                         ]
                       }
                     },
-                    "required": ["type", "aggregator"],
+                    "required": [
+                      "type",
+                      "aggregator"
+                    ],
                     "additionalProperties": false
                   },
                   {
@@ -15289,11 +17185,20 @@
                       },
                       "type": {
                         "type": "string",
-                        "enum": ["tool-trajectory", "tool_trajectory"]
+                        "enum": [
+                          "tool-trajectory",
+                          "tool_trajectory"
+                        ]
                       },
                       "mode": {
                         "type": "string",
-                        "enum": ["any_order", "in_order", "exact", "subset", "superset"]
+                        "enum": [
+                          "any_order",
+                          "in_order",
+                          "exact",
+                          "subset",
+                          "superset"
+                        ]
                       },
                       "minimums": {
                         "type": "object",
@@ -15334,7 +17239,12 @@
                               "anyOf": [
                                 {
                                   "type": "string",
-                                  "enum": ["exact", "ignore", "subset", "superset"]
+                                  "enum": [
+                                    "exact",
+                                    "ignore",
+                                    "subset",
+                                    "superset"
+                                  ]
                                 },
                                 {
                                   "type": "array",
@@ -15348,7 +17258,12 @@
                               "anyOf": [
                                 {
                                   "type": "string",
-                                  "enum": ["exact", "ignore", "subset", "superset"]
+                                  "enum": [
+                                    "exact",
+                                    "ignore",
+                                    "subset",
+                                    "superset"
+                                  ]
                                 },
                                 {
                                   "type": "array",
@@ -15359,7 +17274,9 @@
                               ]
                             }
                           },
-                          "required": ["tool"],
+                          "required": [
+                            "tool"
+                          ],
                           "additionalProperties": false
                         }
                       },
@@ -15367,7 +17284,12 @@
                         "anyOf": [
                           {
                             "type": "string",
-                            "enum": ["exact", "ignore", "subset", "superset"]
+                            "enum": [
+                              "exact",
+                              "ignore",
+                              "subset",
+                              "superset"
+                            ]
                           },
                           {
                             "type": "array",
@@ -15381,7 +17303,12 @@
                         "anyOf": [
                           {
                             "type": "string",
-                            "enum": ["exact", "ignore", "subset", "superset"]
+                            "enum": [
+                              "exact",
+                              "ignore",
+                              "subset",
+                              "superset"
+                            ]
                           },
                           {
                             "type": "array",
@@ -15392,7 +17319,10 @@
                         ]
                       }
                     },
-                    "required": ["type", "mode"],
+                    "required": [
+                      "type",
+                      "mode"
+                    ],
                     "additionalProperties": false
                   },
                   {
@@ -15422,7 +17352,10 @@
                       },
                       "type": {
                         "type": "string",
-                        "enum": ["field-accuracy", "field_accuracy"]
+                        "enum": [
+                          "field-accuracy",
+                          "field_accuracy"
+                        ]
                       },
                       "fields": {
                         "type": "array",
@@ -15434,7 +17367,11 @@
                             },
                             "match": {
                               "type": "string",
-                              "enum": ["exact", "numeric_tolerance", "date"]
+                              "enum": [
+                                "exact",
+                                "numeric_tolerance",
+                                "date"
+                              ]
                             },
                             "required": {
                               "type": "boolean"
@@ -15456,17 +17393,26 @@
                               }
                             }
                           },
-                          "required": ["path", "match"],
+                          "required": [
+                            "path",
+                            "match"
+                          ],
                           "additionalProperties": false
                         },
                         "minItems": 1
                       },
                       "aggregation": {
                         "type": "string",
-                        "enum": ["weighted_average", "all_or_nothing"]
+                        "enum": [
+                          "weighted_average",
+                          "all_or_nothing"
+                        ]
                       }
                     },
-                    "required": ["type", "fields"],
+                    "required": [
+                      "type",
+                      "fields"
+                    ],
                     "additionalProperties": false
                   },
                   {
@@ -15503,7 +17449,10 @@
                         "minimum": 0
                       }
                     },
-                    "required": ["type", "threshold"],
+                    "required": [
+                      "type",
+                      "threshold"
+                    ],
                     "additionalProperties": false
                   },
                   {
@@ -15540,7 +17489,10 @@
                         "minimum": 0
                       }
                     },
-                    "required": ["type", "budget"],
+                    "required": [
+                      "type",
+                      "budget"
+                    ],
                     "additionalProperties": false
                   },
                   {
@@ -15570,7 +17522,10 @@
                       },
                       "type": {
                         "type": "string",
-                        "enum": ["token-usage", "token_usage"]
+                        "enum": [
+                          "token-usage",
+                          "token_usage"
+                        ]
                       },
                       "max_total": {
                         "type": "number",
@@ -15585,7 +17540,9 @@
                         "minimum": 0
                       }
                     },
-                    "required": ["type"],
+                    "required": [
+                      "type"
+                    ],
                     "additionalProperties": false
                   },
                   {
@@ -15615,7 +17572,10 @@
                       },
                       "type": {
                         "type": "string",
-                        "enum": ["execution-metrics", "execution_metrics"]
+                        "enum": [
+                          "execution-metrics",
+                          "execution_metrics"
+                        ]
                       },
                       "max_tool_calls": {
                         "type": "number",
@@ -15647,7 +17607,9 @@
                         "minimum": 0
                       }
                     },
-                    "required": ["type"],
+                    "required": [
+                      "type"
+                    ],
                     "additionalProperties": false
                   },
                   {
@@ -15677,7 +17639,10 @@
                       },
                       "type": {
                         "type": "string",
-                        "enum": ["agent-judge", "agent_judge"]
+                        "enum": [
+                          "agent-judge",
+                          "agent_judge"
+                        ]
                       },
                       "prompt": {
                         "type": "string"
@@ -15731,7 +17696,10 @@
                                     "minLength": 1
                                   }
                                 },
-                                "required": ["score_range", "outcome"],
+                                "required": [
+                                  "score_range",
+                                  "outcome"
+                                ],
                                 "additionalProperties": false
                               }
                             }
@@ -15753,7 +17721,9 @@
                         "type": "string"
                       }
                     },
-                    "required": ["type"],
+                    "required": [
+                      "type"
+                    ],
                     "additionalProperties": false
                   },
                   {
@@ -15789,7 +17759,10 @@
                         "type": "string"
                       }
                     },
-                    "required": ["type", "value"],
+                    "required": [
+                      "type",
+                      "value"
+                    ],
                     "additionalProperties": false
                   },
                   {
@@ -15825,7 +17798,10 @@
                         "type": "string"
                       }
                     },
-                    "required": ["type", "value"],
+                    "required": [
+                      "type",
+                      "value"
+                    ],
                     "additionalProperties": false
                   },
                   {
@@ -15855,10 +17831,15 @@
                       },
                       "type": {
                         "type": "string",
-                        "enum": ["is-json", "is_json"]
+                        "enum": [
+                          "is-json",
+                          "is_json"
+                        ]
                       }
                     },
-                    "required": ["type"],
+                    "required": [
+                      "type"
+                    ],
                     "additionalProperties": false
                   },
                   {
@@ -15894,7 +17875,10 @@
                         "type": "string"
                       }
                     },
-                    "required": ["type", "value"],
+                    "required": [
+                      "type",
+                      "value"
+                    ],
                     "additionalProperties": false
                   },
                   {
@@ -15975,7 +17959,10 @@
                                     "minLength": 1
                                   }
                                 },
-                                "required": ["score_range", "outcome"],
+                                "required": [
+                                  "score_range",
+                                  "outcome"
+                                ],
                                 "additionalProperties": false
                               }
                             }
@@ -15985,7 +17972,10 @@
                         "minItems": 1
                       }
                     },
-                    "required": ["type", "criteria"],
+                    "required": [
+                      "type",
+                      "criteria"
+                    ],
                     "additionalProperties": false
                   }
                 ]
@@ -16022,7 +18012,10 @@
                       },
                       "type": {
                         "type": "string",
-                        "enum": ["code-judge", "code_judge"]
+                        "enum": [
+                          "code-judge",
+                          "code_judge"
+                        ]
                       },
                       "command": {
                         "anyOf": [
@@ -16074,7 +18067,10 @@
                         "additionalProperties": {}
                       }
                     },
-                    "required": ["type", "command"],
+                    "required": [
+                      "type",
+                      "command"
+                    ],
                     "additionalProperties": false
                   },
                   {
@@ -16104,7 +18100,10 @@
                       },
                       "type": {
                         "type": "string",
-                        "enum": ["llm-judge", "llm_judge"]
+                        "enum": [
+                          "llm-judge",
+                          "llm_judge"
+                        ]
                       },
                       "prompt": {
                         "anyOf": [
@@ -16198,7 +18197,10 @@
                                     "minLength": 1
                                   }
                                 },
-                                "required": ["score_range", "outcome"],
+                                "required": [
+                                  "score_range",
+                                  "outcome"
+                                ],
                                 "additionalProperties": false
                               }
                             }
@@ -16215,9 +18217,21 @@
                       "config": {
                         "type": "object",
                         "additionalProperties": {}
+                      },
+                      "max_steps": {
+                        "type": "integer",
+                        "minimum": 1,
+                        "maximum": 50
+                      },
+                      "temperature": {
+                        "type": "number",
+                        "minimum": 0,
+                        "maximum": 2
                       }
                     },
-                    "required": ["type"],
+                    "required": [
+                      "type"
+                    ],
                     "additionalProperties": false
                   },
                   {
@@ -16277,7 +18291,9 @@
                                 }
                               }
                             },
-                            "required": ["type"],
+                            "required": [
+                              "type"
+                            ],
                             "additionalProperties": false
                           },
                           {
@@ -16293,7 +18309,10 @@
                                 "maximum": 1
                               }
                             },
-                            "required": ["type", "threshold"],
+                            "required": [
+                              "type",
+                              "threshold"
+                            ],
                             "additionalProperties": false
                           },
                           {
@@ -16310,7 +18329,10 @@
                                 "type": "string"
                               }
                             },
-                            "required": ["type", "path"],
+                            "required": [
+                              "type",
+                              "path"
+                            ],
                             "additionalProperties": false
                           },
                           {
@@ -16327,13 +18349,18 @@
                                 "type": "string"
                               }
                             },
-                            "required": ["type"],
+                            "required": [
+                              "type"
+                            ],
                             "additionalProperties": false
                           }
                         ]
                       }
                     },
-                    "required": ["type", "aggregator"],
+                    "required": [
+                      "type",
+                      "aggregator"
+                    ],
                     "additionalProperties": false
                   },
                   {
@@ -16363,11 +18390,20 @@
                       },
                       "type": {
                         "type": "string",
-                        "enum": ["tool-trajectory", "tool_trajectory"]
+                        "enum": [
+                          "tool-trajectory",
+                          "tool_trajectory"
+                        ]
                       },
                       "mode": {
                         "type": "string",
-                        "enum": ["any_order", "in_order", "exact", "subset", "superset"]
+                        "enum": [
+                          "any_order",
+                          "in_order",
+                          "exact",
+                          "subset",
+                          "superset"
+                        ]
                       },
                       "minimums": {
                         "type": "object",
@@ -16408,7 +18444,12 @@
                               "anyOf": [
                                 {
                                   "type": "string",
-                                  "enum": ["exact", "ignore", "subset", "superset"]
+                                  "enum": [
+                                    "exact",
+                                    "ignore",
+                                    "subset",
+                                    "superset"
+                                  ]
                                 },
                                 {
                                   "type": "array",
@@ -16422,7 +18463,12 @@
                               "anyOf": [
                                 {
                                   "type": "string",
-                                  "enum": ["exact", "ignore", "subset", "superset"]
+                                  "enum": [
+                                    "exact",
+                                    "ignore",
+                                    "subset",
+                                    "superset"
+                                  ]
                                 },
                                 {
                                   "type": "array",
@@ -16433,7 +18479,9 @@
                               ]
                             }
                           },
-                          "required": ["tool"],
+                          "required": [
+                            "tool"
+                          ],
                           "additionalProperties": false
                         }
                       },
@@ -16441,7 +18489,12 @@
                         "anyOf": [
                           {
                             "type": "string",
-                            "enum": ["exact", "ignore", "subset", "superset"]
+                            "enum": [
+                              "exact",
+                              "ignore",
+                              "subset",
+                              "superset"
+                            ]
                           },
                           {
                             "type": "array",
@@ -16455,7 +18508,12 @@
                         "anyOf": [
                           {
                             "type": "string",
-                            "enum": ["exact", "ignore", "subset", "superset"]
+                            "enum": [
+                              "exact",
+                              "ignore",
+                              "subset",
+                              "superset"
+                            ]
                           },
                           {
                             "type": "array",
@@ -16466,7 +18524,10 @@
                         ]
                       }
                     },
-                    "required": ["type", "mode"],
+                    "required": [
+                      "type",
+                      "mode"
+                    ],
                     "additionalProperties": false
                   },
                   {
@@ -16496,7 +18557,10 @@
                       },
                       "type": {
                         "type": "string",
-                        "enum": ["field-accuracy", "field_accuracy"]
+                        "enum": [
+                          "field-accuracy",
+                          "field_accuracy"
+                        ]
                       },
                       "fields": {
                         "type": "array",
@@ -16508,7 +18572,11 @@
                             },
                             "match": {
                               "type": "string",
-                              "enum": ["exact", "numeric_tolerance", "date"]
+                              "enum": [
+                                "exact",
+                                "numeric_tolerance",
+                                "date"
+                              ]
                             },
                             "required": {
                               "type": "boolean"
@@ -16530,17 +18598,26 @@
                               }
                             }
                           },
-                          "required": ["path", "match"],
+                          "required": [
+                            "path",
+                            "match"
+                          ],
                           "additionalProperties": false
                         },
                         "minItems": 1
                       },
                       "aggregation": {
                         "type": "string",
-                        "enum": ["weighted_average", "all_or_nothing"]
+                        "enum": [
+                          "weighted_average",
+                          "all_or_nothing"
+                        ]
                       }
                     },
-                    "required": ["type", "fields"],
+                    "required": [
+                      "type",
+                      "fields"
+                    ],
                     "additionalProperties": false
                   },
                   {
@@ -16577,7 +18654,10 @@
                         "minimum": 0
                       }
                     },
-                    "required": ["type", "threshold"],
+                    "required": [
+                      "type",
+                      "threshold"
+                    ],
                     "additionalProperties": false
                   },
                   {
@@ -16614,7 +18694,10 @@
                         "minimum": 0
                       }
                     },
-                    "required": ["type", "budget"],
+                    "required": [
+                      "type",
+                      "budget"
+                    ],
                     "additionalProperties": false
                   },
                   {
@@ -16644,7 +18727,10 @@
                       },
                       "type": {
                         "type": "string",
-                        "enum": ["token-usage", "token_usage"]
+                        "enum": [
+                          "token-usage",
+                          "token_usage"
+                        ]
                       },
                       "max_total": {
                         "type": "number",
@@ -16659,7 +18745,9 @@
                         "minimum": 0
                       }
                     },
-                    "required": ["type"],
+                    "required": [
+                      "type"
+                    ],
                     "additionalProperties": false
                   },
                   {
@@ -16689,7 +18777,10 @@
                       },
                       "type": {
                         "type": "string",
-                        "enum": ["execution-metrics", "execution_metrics"]
+                        "enum": [
+                          "execution-metrics",
+                          "execution_metrics"
+                        ]
                       },
                       "max_tool_calls": {
                         "type": "number",
@@ -16721,7 +18812,9 @@
                         "minimum": 0
                       }
                     },
-                    "required": ["type"],
+                    "required": [
+                      "type"
+                    ],
                     "additionalProperties": false
                   },
                   {
@@ -16751,7 +18844,10 @@
                       },
                       "type": {
                         "type": "string",
-                        "enum": ["agent-judge", "agent_judge"]
+                        "enum": [
+                          "agent-judge",
+                          "agent_judge"
+                        ]
                       },
                       "prompt": {
                         "type": "string"
@@ -16805,7 +18901,10 @@
                                     "minLength": 1
                                   }
                                 },
-                                "required": ["score_range", "outcome"],
+                                "required": [
+                                  "score_range",
+                                  "outcome"
+                                ],
                                 "additionalProperties": false
                               }
                             }
@@ -16827,7 +18926,9 @@
                         "type": "string"
                       }
                     },
-                    "required": ["type"],
+                    "required": [
+                      "type"
+                    ],
                     "additionalProperties": false
                   },
                   {
@@ -16863,7 +18964,10 @@
                         "type": "string"
                       }
                     },
-                    "required": ["type", "value"],
+                    "required": [
+                      "type",
+                      "value"
+                    ],
                     "additionalProperties": false
                   },
                   {
@@ -16899,7 +19003,10 @@
                         "type": "string"
                       }
                     },
-                    "required": ["type", "value"],
+                    "required": [
+                      "type",
+                      "value"
+                    ],
                     "additionalProperties": false
                   },
                   {
@@ -16929,10 +19036,15 @@
                       },
                       "type": {
                         "type": "string",
-                        "enum": ["is-json", "is_json"]
+                        "enum": [
+                          "is-json",
+                          "is_json"
+                        ]
                       }
                     },
-                    "required": ["type"],
+                    "required": [
+                      "type"
+                    ],
                     "additionalProperties": false
                   },
                   {
@@ -16968,7 +19080,10 @@
                         "type": "string"
                       }
                     },
-                    "required": ["type", "value"],
+                    "required": [
+                      "type",
+                      "value"
+                    ],
                     "additionalProperties": false
                   },
                   {
@@ -17049,7 +19164,10 @@
                                     "minLength": 1
                                   }
                                 },
-                                "required": ["score_range", "outcome"],
+                                "required": [
+                                  "score_range",
+                                  "outcome"
+                                ],
                                 "additionalProperties": false
                               }
                             }
@@ -17059,7 +19177,10 @@
                         "minItems": 1
                       }
                     },
-                    "required": ["type", "criteria"],
+                    "required": [
+                      "type",
+                      "criteria"
+                    ],
                     "additionalProperties": false
                   }
                 ]
@@ -17080,7 +19201,11 @@
                 },
                 "strategy": {
                   "type": "string",
-                  "enum": ["pass_at_k", "mean", "confidence_interval"]
+                  "enum": [
+                    "pass_at_k",
+                    "mean",
+                    "confidence_interval"
+                  ]
                 },
                 "cost_limit_usd": {
                   "type": "number",
@@ -17091,7 +19216,9 @@
                   "minimum": 0
                 }
               },
-              "required": ["count"],
+              "required": [
+                "count"
+              ],
               "additionalProperties": false
             },
             "total_budget_usd": {
@@ -17142,7 +19269,10 @@
                   },
                   "type": {
                     "type": "string",
-                    "enum": ["code-judge", "code_judge"]
+                    "enum": [
+                      "code-judge",
+                      "code_judge"
+                    ]
                   },
                   "command": {
                     "anyOf": [
@@ -17194,7 +19324,10 @@
                     "additionalProperties": {}
                   }
                 },
-                "required": ["type", "command"],
+                "required": [
+                  "type",
+                  "command"
+                ],
                 "additionalProperties": false
               },
               {
@@ -17224,7 +19357,10 @@
                   },
                   "type": {
                     "type": "string",
-                    "enum": ["llm-judge", "llm_judge"]
+                    "enum": [
+                      "llm-judge",
+                      "llm_judge"
+                    ]
                   },
                   "prompt": {
                     "anyOf": [
@@ -17318,7 +19454,10 @@
                                 "minLength": 1
                               }
                             },
-                            "required": ["score_range", "outcome"],
+                            "required": [
+                              "score_range",
+                              "outcome"
+                            ],
                             "additionalProperties": false
                           }
                         }
@@ -17335,9 +19474,21 @@
                   "config": {
                     "type": "object",
                     "additionalProperties": {}
+                  },
+                  "max_steps": {
+                    "type": "integer",
+                    "minimum": 1,
+                    "maximum": 50
+                  },
+                  "temperature": {
+                    "type": "number",
+                    "minimum": 0,
+                    "maximum": 2
                   }
                 },
-                "required": ["type"],
+                "required": [
+                  "type"
+                ],
                 "additionalProperties": false
               },
               {
@@ -17397,7 +19548,9 @@
                             }
                           }
                         },
-                        "required": ["type"],
+                        "required": [
+                          "type"
+                        ],
                         "additionalProperties": false
                       },
                       {
@@ -17413,7 +19566,10 @@
                             "maximum": 1
                           }
                         },
-                        "required": ["type", "threshold"],
+                        "required": [
+                          "type",
+                          "threshold"
+                        ],
                         "additionalProperties": false
                       },
                       {
@@ -17430,7 +19586,10 @@
                             "type": "string"
                           }
                         },
-                        "required": ["type", "path"],
+                        "required": [
+                          "type",
+                          "path"
+                        ],
                         "additionalProperties": false
                       },
                       {
@@ -17447,13 +19606,18 @@
                             "type": "string"
                           }
                         },
-                        "required": ["type"],
+                        "required": [
+                          "type"
+                        ],
                         "additionalProperties": false
                       }
                     ]
                   }
                 },
-                "required": ["type", "aggregator"],
+                "required": [
+                  "type",
+                  "aggregator"
+                ],
                 "additionalProperties": false
               },
               {
@@ -17483,11 +19647,20 @@
                   },
                   "type": {
                     "type": "string",
-                    "enum": ["tool-trajectory", "tool_trajectory"]
+                    "enum": [
+                      "tool-trajectory",
+                      "tool_trajectory"
+                    ]
                   },
                   "mode": {
                     "type": "string",
-                    "enum": ["any_order", "in_order", "exact", "subset", "superset"]
+                    "enum": [
+                      "any_order",
+                      "in_order",
+                      "exact",
+                      "subset",
+                      "superset"
+                    ]
                   },
                   "minimums": {
                     "type": "object",
@@ -17528,7 +19701,12 @@
                           "anyOf": [
                             {
                               "type": "string",
-                              "enum": ["exact", "ignore", "subset", "superset"]
+                              "enum": [
+                                "exact",
+                                "ignore",
+                                "subset",
+                                "superset"
+                              ]
                             },
                             {
                               "type": "array",
@@ -17542,7 +19720,12 @@
                           "anyOf": [
                             {
                               "type": "string",
-                              "enum": ["exact", "ignore", "subset", "superset"]
+                              "enum": [
+                                "exact",
+                                "ignore",
+                                "subset",
+                                "superset"
+                              ]
                             },
                             {
                               "type": "array",
@@ -17553,7 +19736,9 @@
                           ]
                         }
                       },
-                      "required": ["tool"],
+                      "required": [
+                        "tool"
+                      ],
                       "additionalProperties": false
                     }
                   },
@@ -17561,7 +19746,12 @@
                     "anyOf": [
                       {
                         "type": "string",
-                        "enum": ["exact", "ignore", "subset", "superset"]
+                        "enum": [
+                          "exact",
+                          "ignore",
+                          "subset",
+                          "superset"
+                        ]
                       },
                       {
                         "type": "array",
@@ -17575,7 +19765,12 @@
                     "anyOf": [
                       {
                         "type": "string",
-                        "enum": ["exact", "ignore", "subset", "superset"]
+                        "enum": [
+                          "exact",
+                          "ignore",
+                          "subset",
+                          "superset"
+                        ]
                       },
                       {
                         "type": "array",
@@ -17586,7 +19781,10 @@
                     ]
                   }
                 },
-                "required": ["type", "mode"],
+                "required": [
+                  "type",
+                  "mode"
+                ],
                 "additionalProperties": false
               },
               {
@@ -17616,7 +19814,10 @@
                   },
                   "type": {
                     "type": "string",
-                    "enum": ["field-accuracy", "field_accuracy"]
+                    "enum": [
+                      "field-accuracy",
+                      "field_accuracy"
+                    ]
                   },
                   "fields": {
                     "type": "array",
@@ -17628,7 +19829,11 @@
                         },
                         "match": {
                           "type": "string",
-                          "enum": ["exact", "numeric_tolerance", "date"]
+                          "enum": [
+                            "exact",
+                            "numeric_tolerance",
+                            "date"
+                          ]
                         },
                         "required": {
                           "type": "boolean"
@@ -17650,17 +19855,26 @@
                           }
                         }
                       },
-                      "required": ["path", "match"],
+                      "required": [
+                        "path",
+                        "match"
+                      ],
                       "additionalProperties": false
                     },
                     "minItems": 1
                   },
                   "aggregation": {
                     "type": "string",
-                    "enum": ["weighted_average", "all_or_nothing"]
+                    "enum": [
+                      "weighted_average",
+                      "all_or_nothing"
+                    ]
                   }
                 },
-                "required": ["type", "fields"],
+                "required": [
+                  "type",
+                  "fields"
+                ],
                 "additionalProperties": false
               },
               {
@@ -17697,7 +19911,10 @@
                     "minimum": 0
                   }
                 },
-                "required": ["type", "threshold"],
+                "required": [
+                  "type",
+                  "threshold"
+                ],
                 "additionalProperties": false
               },
               {
@@ -17734,7 +19951,10 @@
                     "minimum": 0
                   }
                 },
-                "required": ["type", "budget"],
+                "required": [
+                  "type",
+                  "budget"
+                ],
                 "additionalProperties": false
               },
               {
@@ -17764,7 +19984,10 @@
                   },
                   "type": {
                     "type": "string",
-                    "enum": ["token-usage", "token_usage"]
+                    "enum": [
+                      "token-usage",
+                      "token_usage"
+                    ]
                   },
                   "max_total": {
                     "type": "number",
@@ -17779,7 +20002,9 @@
                     "minimum": 0
                   }
                 },
-                "required": ["type"],
+                "required": [
+                  "type"
+                ],
                 "additionalProperties": false
               },
               {
@@ -17809,7 +20034,10 @@
                   },
                   "type": {
                     "type": "string",
-                    "enum": ["execution-metrics", "execution_metrics"]
+                    "enum": [
+                      "execution-metrics",
+                      "execution_metrics"
+                    ]
                   },
                   "max_tool_calls": {
                     "type": "number",
@@ -17841,7 +20069,9 @@
                     "minimum": 0
                   }
                 },
-                "required": ["type"],
+                "required": [
+                  "type"
+                ],
                 "additionalProperties": false
               },
               {
@@ -17871,7 +20101,10 @@
                   },
                   "type": {
                     "type": "string",
-                    "enum": ["agent-judge", "agent_judge"]
+                    "enum": [
+                      "agent-judge",
+                      "agent_judge"
+                    ]
                   },
                   "prompt": {
                     "type": "string"
@@ -17925,7 +20158,10 @@
                                 "minLength": 1
                               }
                             },
-                            "required": ["score_range", "outcome"],
+                            "required": [
+                              "score_range",
+                              "outcome"
+                            ],
                             "additionalProperties": false
                           }
                         }
@@ -17947,7 +20183,9 @@
                     "type": "string"
                   }
                 },
-                "required": ["type"],
+                "required": [
+                  "type"
+                ],
                 "additionalProperties": false
               },
               {
@@ -17983,7 +20221,10 @@
                     "type": "string"
                   }
                 },
-                "required": ["type", "value"],
+                "required": [
+                  "type",
+                  "value"
+                ],
                 "additionalProperties": false
               },
               {
@@ -18019,7 +20260,10 @@
                     "type": "string"
                   }
                 },
-                "required": ["type", "value"],
+                "required": [
+                  "type",
+                  "value"
+                ],
                 "additionalProperties": false
               },
               {
@@ -18049,10 +20293,15 @@
                   },
                   "type": {
                     "type": "string",
-                    "enum": ["is-json", "is_json"]
+                    "enum": [
+                      "is-json",
+                      "is_json"
+                    ]
                   }
                 },
-                "required": ["type"],
+                "required": [
+                  "type"
+                ],
                 "additionalProperties": false
               },
               {
@@ -18088,7 +20337,10 @@
                     "type": "string"
                   }
                 },
-                "required": ["type", "value"],
+                "required": [
+                  "type",
+                  "value"
+                ],
                 "additionalProperties": false
               },
               {
@@ -18169,7 +20421,10 @@
                                 "minLength": 1
                               }
                             },
-                            "required": ["score_range", "outcome"],
+                            "required": [
+                              "score_range",
+                              "outcome"
+                            ],
                             "additionalProperties": false
                           }
                         }
@@ -18179,7 +20434,10 @@
                     "minItems": 1
                   }
                 },
-                "required": ["type", "criteria"],
+                "required": [
+                  "type",
+                  "criteria"
+                ],
                 "additionalProperties": false
               }
             ]
@@ -18216,7 +20474,10 @@
                   },
                   "type": {
                     "type": "string",
-                    "enum": ["code-judge", "code_judge"]
+                    "enum": [
+                      "code-judge",
+                      "code_judge"
+                    ]
                   },
                   "command": {
                     "anyOf": [
@@ -18268,7 +20529,10 @@
                     "additionalProperties": {}
                   }
                 },
-                "required": ["type", "command"],
+                "required": [
+                  "type",
+                  "command"
+                ],
                 "additionalProperties": false
               },
               {
@@ -18298,7 +20562,10 @@
                   },
                   "type": {
                     "type": "string",
-                    "enum": ["llm-judge", "llm_judge"]
+                    "enum": [
+                      "llm-judge",
+                      "llm_judge"
+                    ]
                   },
                   "prompt": {
                     "anyOf": [
@@ -18392,7 +20659,10 @@
                                 "minLength": 1
                               }
                             },
-                            "required": ["score_range", "outcome"],
+                            "required": [
+                              "score_range",
+                              "outcome"
+                            ],
                             "additionalProperties": false
                           }
                         }
@@ -18409,9 +20679,21 @@
                   "config": {
                     "type": "object",
                     "additionalProperties": {}
+                  },
+                  "max_steps": {
+                    "type": "integer",
+                    "minimum": 1,
+                    "maximum": 50
+                  },
+                  "temperature": {
+                    "type": "number",
+                    "minimum": 0,
+                    "maximum": 2
                   }
                 },
-                "required": ["type"],
+                "required": [
+                  "type"
+                ],
                 "additionalProperties": false
               },
               {
@@ -18471,7 +20753,9 @@
                             }
                           }
                         },
-                        "required": ["type"],
+                        "required": [
+                          "type"
+                        ],
                         "additionalProperties": false
                       },
                       {
@@ -18487,7 +20771,10 @@
                             "maximum": 1
                           }
                         },
-                        "required": ["type", "threshold"],
+                        "required": [
+                          "type",
+                          "threshold"
+                        ],
                         "additionalProperties": false
                       },
                       {
@@ -18504,7 +20791,10 @@
                             "type": "string"
                           }
                         },
-                        "required": ["type", "path"],
+                        "required": [
+                          "type",
+                          "path"
+                        ],
                         "additionalProperties": false
                       },
                       {
@@ -18521,13 +20811,18 @@
                             "type": "string"
                           }
                         },
-                        "required": ["type"],
+                        "required": [
+                          "type"
+                        ],
                         "additionalProperties": false
                       }
                     ]
                   }
                 },
-                "required": ["type", "aggregator"],
+                "required": [
+                  "type",
+                  "aggregator"
+                ],
                 "additionalProperties": false
               },
               {
@@ -18557,11 +20852,20 @@
                   },
                   "type": {
                     "type": "string",
-                    "enum": ["tool-trajectory", "tool_trajectory"]
+                    "enum": [
+                      "tool-trajectory",
+                      "tool_trajectory"
+                    ]
                   },
                   "mode": {
                     "type": "string",
-                    "enum": ["any_order", "in_order", "exact", "subset", "superset"]
+                    "enum": [
+                      "any_order",
+                      "in_order",
+                      "exact",
+                      "subset",
+                      "superset"
+                    ]
                   },
                   "minimums": {
                     "type": "object",
@@ -18602,7 +20906,12 @@
                           "anyOf": [
                             {
                               "type": "string",
-                              "enum": ["exact", "ignore", "subset", "superset"]
+                              "enum": [
+                                "exact",
+                                "ignore",
+                                "subset",
+                                "superset"
+                              ]
                             },
                             {
                               "type": "array",
@@ -18616,7 +20925,12 @@
                           "anyOf": [
                             {
                               "type": "string",
-                              "enum": ["exact", "ignore", "subset", "superset"]
+                              "enum": [
+                                "exact",
+                                "ignore",
+                                "subset",
+                                "superset"
+                              ]
                             },
                             {
                               "type": "array",
@@ -18627,7 +20941,9 @@
                           ]
                         }
                       },
-                      "required": ["tool"],
+                      "required": [
+                        "tool"
+                      ],
                       "additionalProperties": false
                     }
                   },
@@ -18635,7 +20951,12 @@
                     "anyOf": [
                       {
                         "type": "string",
-                        "enum": ["exact", "ignore", "subset", "superset"]
+                        "enum": [
+                          "exact",
+                          "ignore",
+                          "subset",
+                          "superset"
+                        ]
                       },
                       {
                         "type": "array",
@@ -18649,7 +20970,12 @@
                     "anyOf": [
                       {
                         "type": "string",
-                        "enum": ["exact", "ignore", "subset", "superset"]
+                        "enum": [
+                          "exact",
+                          "ignore",
+                          "subset",
+                          "superset"
+                        ]
                       },
                       {
                         "type": "array",
@@ -18660,7 +20986,10 @@
                     ]
                   }
                 },
-                "required": ["type", "mode"],
+                "required": [
+                  "type",
+                  "mode"
+                ],
                 "additionalProperties": false
               },
               {
@@ -18690,7 +21019,10 @@
                   },
                   "type": {
                     "type": "string",
-                    "enum": ["field-accuracy", "field_accuracy"]
+                    "enum": [
+                      "field-accuracy",
+                      "field_accuracy"
+                    ]
                   },
                   "fields": {
                     "type": "array",
@@ -18702,7 +21034,11 @@
                         },
                         "match": {
                           "type": "string",
-                          "enum": ["exact", "numeric_tolerance", "date"]
+                          "enum": [
+                            "exact",
+                            "numeric_tolerance",
+                            "date"
+                          ]
                         },
                         "required": {
                           "type": "boolean"
@@ -18724,17 +21060,26 @@
                           }
                         }
                       },
-                      "required": ["path", "match"],
+                      "required": [
+                        "path",
+                        "match"
+                      ],
                       "additionalProperties": false
                     },
                     "minItems": 1
                   },
                   "aggregation": {
                     "type": "string",
-                    "enum": ["weighted_average", "all_or_nothing"]
+                    "enum": [
+                      "weighted_average",
+                      "all_or_nothing"
+                    ]
                   }
                 },
-                "required": ["type", "fields"],
+                "required": [
+                  "type",
+                  "fields"
+                ],
                 "additionalProperties": false
               },
               {
@@ -18771,7 +21116,10 @@
                     "minimum": 0
                   }
                 },
-                "required": ["type", "threshold"],
+                "required": [
+                  "type",
+                  "threshold"
+                ],
                 "additionalProperties": false
               },
               {
@@ -18808,7 +21156,10 @@
                     "minimum": 0
                   }
                 },
-                "required": ["type", "budget"],
+                "required": [
+                  "type",
+                  "budget"
+                ],
                 "additionalProperties": false
               },
               {
@@ -18838,7 +21189,10 @@
                   },
                   "type": {
                     "type": "string",
-                    "enum": ["token-usage", "token_usage"]
+                    "enum": [
+                      "token-usage",
+                      "token_usage"
+                    ]
                   },
                   "max_total": {
                     "type": "number",
@@ -18853,7 +21207,9 @@
                     "minimum": 0
                   }
                 },
-                "required": ["type"],
+                "required": [
+                  "type"
+                ],
                 "additionalProperties": false
               },
               {
@@ -18883,7 +21239,10 @@
                   },
                   "type": {
                     "type": "string",
-                    "enum": ["execution-metrics", "execution_metrics"]
+                    "enum": [
+                      "execution-metrics",
+                      "execution_metrics"
+                    ]
                   },
                   "max_tool_calls": {
                     "type": "number",
@@ -18915,7 +21274,9 @@
                     "minimum": 0
                   }
                 },
-                "required": ["type"],
+                "required": [
+                  "type"
+                ],
                 "additionalProperties": false
               },
               {
@@ -18945,7 +21306,10 @@
                   },
                   "type": {
                     "type": "string",
-                    "enum": ["agent-judge", "agent_judge"]
+                    "enum": [
+                      "agent-judge",
+                      "agent_judge"
+                    ]
                   },
                   "prompt": {
                     "type": "string"
@@ -18999,7 +21363,10 @@
                                 "minLength": 1
                               }
                             },
-                            "required": ["score_range", "outcome"],
+                            "required": [
+                              "score_range",
+                              "outcome"
+                            ],
                             "additionalProperties": false
                           }
                         }
@@ -19021,7 +21388,9 @@
                     "type": "string"
                   }
                 },
-                "required": ["type"],
+                "required": [
+                  "type"
+                ],
                 "additionalProperties": false
               },
               {
@@ -19057,7 +21426,10 @@
                     "type": "string"
                   }
                 },
-                "required": ["type", "value"],
+                "required": [
+                  "type",
+                  "value"
+                ],
                 "additionalProperties": false
               },
               {
@@ -19093,7 +21465,10 @@
                     "type": "string"
                   }
                 },
-                "required": ["type", "value"],
+                "required": [
+                  "type",
+                  "value"
+                ],
                 "additionalProperties": false
               },
               {
@@ -19123,10 +21498,15 @@
                   },
                   "type": {
                     "type": "string",
-                    "enum": ["is-json", "is_json"]
+                    "enum": [
+                      "is-json",
+                      "is_json"
+                    ]
                   }
                 },
-                "required": ["type"],
+                "required": [
+                  "type"
+                ],
                 "additionalProperties": false
               },
               {
@@ -19162,7 +21542,10 @@
                     "type": "string"
                   }
                 },
-                "required": ["type", "value"],
+                "required": [
+                  "type",
+                  "value"
+                ],
                 "additionalProperties": false
               },
               {
@@ -19243,7 +21626,10 @@
                                 "minLength": 1
                               }
                             },
-                            "required": ["score_range", "outcome"],
+                            "required": [
+                              "score_range",
+                              "outcome"
+                            ],
                             "additionalProperties": false
                           }
                         }
@@ -19253,7 +21639,10 @@
                     "minItems": 1
                   }
                 },
-                "required": ["type", "criteria"],
+                "required": [
+                  "type",
+                  "criteria"
+                ],
                 "additionalProperties": false
               }
             ]
@@ -19269,7 +21658,10 @@
                 },
                 "isolation": {
                   "type": "string",
-                  "enum": ["shared", "per_test"]
+                  "enum": [
+                    "shared",
+                    "per_test"
+                  ]
                 },
                 "repos": {
                   "type": "array",
@@ -19293,7 +21685,10 @@
                                 "format": "uri"
                               }
                             },
-                            "required": ["type", "url"],
+                            "required": [
+                              "type",
+                              "url"
+                            ],
                             "additionalProperties": false
                           },
                           {
@@ -19307,7 +21702,10 @@
                                 "type": "string"
                               }
                             },
-                            "required": ["type", "path"],
+                            "required": [
+                              "type",
+                              "path"
+                            ],
                             "additionalProperties": false
                           }
                         ]
@@ -19320,7 +21718,10 @@
                           },
                           "resolve": {
                             "type": "string",
-                            "enum": ["remote", "local"]
+                            "enum": [
+                              "remote",
+                              "local"
+                            ]
                           },
                           "ancestor": {
                             "type": "integer",
@@ -19349,7 +21750,10 @@
                         "additionalProperties": false
                       }
                     },
-                    "required": ["path", "source"],
+                    "required": [
+                      "path",
+                      "source"
+                    ],
                     "additionalProperties": false
                   }
                 },
@@ -19385,7 +21789,11 @@
                         },
                         "reset": {
                           "type": "string",
-                          "enum": ["none", "fast", "strict"]
+                          "enum": [
+                            "none",
+                            "fast",
+                            "strict"
+                          ]
                         }
                       },
                       "additionalProperties": false
@@ -19416,7 +21824,11 @@
                         },
                         "reset": {
                           "type": "string",
-                          "enum": ["none", "fast", "strict"]
+                          "enum": [
+                            "none",
+                            "fast",
+                            "strict"
+                          ]
                         }
                       },
                       "additionalProperties": false
@@ -19447,7 +21859,11 @@
                         },
                         "reset": {
                           "type": "string",
-                          "enum": ["none", "fast", "strict"]
+                          "enum": [
+                            "none",
+                            "fast",
+                            "strict"
+                          ]
                         }
                       },
                       "additionalProperties": false
@@ -19478,7 +21894,11 @@
                         },
                         "reset": {
                           "type": "string",
-                          "enum": ["none", "fast", "strict"]
+                          "enum": [
+                            "none",
+                            "fast",
+                            "strict"
+                          ]
                         }
                       },
                       "additionalProperties": false
@@ -19488,7 +21908,11 @@
                 },
                 "mode": {
                   "type": "string",
-                  "enum": ["pooled", "temp", "static"]
+                  "enum": [
+                    "pooled",
+                    "temp",
+                    "static"
+                  ]
                 },
                 "path": {
                   "type": "string"
@@ -19502,7 +21926,9 @@
           ]
         }
       },
-      "required": ["tests"],
+      "required": [
+        "tests"
+      ],
       "additionalProperties": false
     }
   }

From bd576b91c95cf5e0b408856991fbcf3fb6f8d4b6 Mon Sep 17 00:00:00 2001
From: Christopher Tso <christso@gmail.com>
Date: Sun, 15 Mar 2026 13:28:25 +0000
Subject: [PATCH 07/13] feat: add --judge-target and --model CLI flags with
 orchestrator wiring

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 apps/cli/src/commands/eval/commands/run.ts   | 13 ++++++++++
 apps/cli/src/commands/eval/run-eval.ts       | 11 +++++++++
 packages/core/src/evaluation/orchestrator.ts | 26 +++++++++++++++++++-
 3 files changed, 49 insertions(+), 1 deletion(-)

diff --git a/apps/cli/src/commands/eval/commands/run.ts b/apps/cli/src/commands/eval/commands/run.ts
index 4a7ec1b50..7e2117107 100644
--- a/apps/cli/src/commands/eval/commands/run.ts
+++ b/apps/cli/src/commands/eval/commands/run.ts
@@ -163,6 +163,17 @@ export const evalRunCommand = command({
       description:
         'Write companion artifacts (grading/<test>.json, timing.json, benchmark.json) to the specified directory',
     }),
+    judgeTarget: option({
+      type: optional(string),
+      long: 'judge-target',
+      description:
+        'Override judge target for all evaluators (e.g., "agentv", or a target name from targets.yaml)',
+    }),
+    model: option({
+      type: optional(string),
+      long: 'model',
+      description: 'Override model for the judge target (e.g., "openai:gpt-5-mini")',
+    }),
   },
   handler: async (args) => {
     // Launch interactive wizard when no eval paths and stdin is a TTY
@@ -203,6 +214,8 @@ export const evalRunCommand = command({
       strict: args.strict,
       benchmarkJson: args.benchmarkJson,
       artifacts: args.artifacts,
+      judgeTarget: args.judgeTarget,
+      model: args.model,
     };
     await runEvalCommand({ testFiles: resolvedPaths, rawOptions });
   },
diff --git a/apps/cli/src/commands/eval/run-eval.ts b/apps/cli/src/commands/eval/run-eval.ts
index 43eec380b..1f3d77f46 100644
--- a/apps/cli/src/commands/eval/run-eval.ts
+++ b/apps/cli/src/commands/eval/run-eval.ts
@@ -82,6 +82,8 @@ interface NormalizedOptions {
   readonly workspacePath?: string;
   readonly benchmarkJson?: string;
   readonly artifacts?: string;
+  readonly judgeTarget?: string;
+  readonly model?: string;
 }
 
 function normalizeBoolean(value: unknown): boolean {
@@ -249,6 +251,8 @@ function normalizeOptions(
     workspacePath,
     benchmarkJson: normalizeString(rawOptions.benchmarkJson),
     artifacts: normalizeString(rawOptions.artifacts),
+    judgeTarget: normalizeString(rawOptions.judgeTarget),
+    model: normalizeString(rawOptions.model),
   } satisfies NormalizedOptions;
 }
 
@@ -593,6 +597,8 @@ async function runSingleEvalFile(params: {
     trials: trialsConfig,
     totalBudgetUsd,
     failOnError,
+    judgeTarget: options.judgeTarget,
+    model: options.model,
     streamCallbacks: streamingObserver?.getStreamCallbacks(),
     onResult: async (result: EvaluationResult) => {
       // Finalize streaming observer span with score
@@ -674,6 +680,11 @@ export async function runEvalCommand(input: RunEvalCommandInput): Promise<void>
 
   let options = normalizeOptions(input.rawOptions, config, yamlConfig?.execution);
 
+  // Validate --judge-target / --model combinations
+  if (options.judgeTarget === 'agentv' && !options.model) {
+    throw new Error('--judge-target agentv requires --model (e.g., --model openai:gpt-5-mini)');
+  }
+
   // --retry-errors: override filter to only re-run execution_error test cases.
   // IMPORTANT: JSONL must be fully loaded here, before the output writer is created below,
   // since the retry source and output destination may refer to the same file.
diff --git a/packages/core/src/evaluation/orchestrator.ts b/packages/core/src/evaluation/orchestrator.ts
index 396bc15fe..58e42903b 100644
--- a/packages/core/src/evaluation/orchestrator.ts
+++ b/packages/core/src/evaluation/orchestrator.ts
@@ -235,6 +235,10 @@ export interface RunEvaluationOptions {
   readonly retainOnSuccess?: 'keep' | 'cleanup';
   /** Retention policy override for failed cases */
   readonly retainOnFailure?: 'keep' | 'cleanup';
+  /** CLI override: judge target name (e.g., "agentv" or a target from targets.yaml) */
+  readonly judgeTarget?: string;
+  /** CLI override: model for judge target (e.g., "openai:gpt-5-mini") */
+  readonly model?: string;
 }
 
 export async function runEvaluation(
@@ -271,6 +275,8 @@ export async function runEvaluation(
     workspaceClean,
     retainOnSuccess,
     retainOnFailure,
+    judgeTarget: cliJudgeTarget,
+    model: cliModel,
   } = options;
 
   // Disable cache when trials > 1 (cache makes trials deterministic = pointless)
@@ -335,6 +341,23 @@ export async function runEvaluation(
   const resolveJudgeProvider = async (
     targetContext: ResolvedTarget,
   ): Promise<Provider | undefined> => {
+    // CLI --judge-target takes highest priority
+    if (cliJudgeTarget) {
+      if (cliJudgeTarget === 'agentv') {
+        // Create an agentv provider on-the-fly with the CLI model
+        const { AgentvProvider } = await import('./providers/agentv-provider.js');
+        return new AgentvProvider('agentv', { model: cliModel!, temperature: 0 });
+      }
+      const overrideTarget = resolveTargetByName(cliJudgeTarget);
+      if (!overrideTarget) {
+        throw new Error(`--judge-target "${cliJudgeTarget}" not found in targets`);
+      }
+      return getOrCreateProvider(overrideTarget);
+    }
+
+    // TODO: When --model is provided without --judge-target, override the model of
+    // whichever judge target is resolved. For now, --model only works with --judge-target agentv.
+
     const judgeName = targetContext.judgeTarget ?? targetContext.name;
     const resolvedJudge = resolveTargetByName(judgeName);
     if (!resolvedJudge) {
@@ -346,7 +369,8 @@ export async function runEvaluation(
   // Validate judge_target: error if an agent provider would be used as judge.
   // Agent providers can't return structured JSON for judging — they respond with
   // tool calls and markdown, causing silent score-0 failures.
-  if (isAgentProvider(getOrCreateProvider(target)) && !target.judgeTarget) {
+  // CLI --judge-target override also satisfies this requirement.
+  if (isAgentProvider(getOrCreateProvider(target)) && !target.judgeTarget && !cliJudgeTarget) {
     throw new Error(
       `Target "${target.name}" is an agent provider ("${target.kind}") with no judge_target — agent providers cannot return structured JSON for judging. Set judge_target to an LLM provider (e.g., azure-llm).`,
     );

From a11b2abde4a7c0c9ce0624673bc59200e7354466 Mon Sep 17 00:00:00 2001
From: Christopher Tso <christso@gmail.com>
Date: Sun, 15 Mar 2026 13:33:02 +0000
Subject: [PATCH 08/13] refactor: unify llm-judge/agent-judge in transpiler NL
 conversion

The transpiler now handles llm-judge with rubrics the same way as
agent-judge, expanding rubric items into individual NL assertion strings.

Part of #614
---
 .../loaders/eval-yaml-transpiler.ts           | 11 +++++---
 .../loaders/eval-yaml-transpiler.test.ts      | 25 +++++++++++++++++++
 2 files changed, 32 insertions(+), 4 deletions(-)

diff --git a/packages/core/src/evaluation/loaders/eval-yaml-transpiler.ts b/packages/core/src/evaluation/loaders/eval-yaml-transpiler.ts
index 4d9560157..7e99fd01f 100644
--- a/packages/core/src/evaluation/loaders/eval-yaml-transpiler.ts
+++ b/packages/core/src/evaluation/loaders/eval-yaml-transpiler.ts
@@ -144,8 +144,6 @@ function assertionToNaturalLanguage(entry: RawAssertEntry): string | null {
 
     case 'llm-judge':
     case 'llm_judge':
-      return typeof entry.prompt === 'string' ? entry.prompt : null;
-
     case 'agent-judge':
     case 'agent_judge': {
       // Expand each rubric item to its own assertion string
@@ -217,10 +215,15 @@ function assertionToNaturalLanguage(entry: RawAssertEntry): string | null {
 
 /**
  * Expand a single assertion entry into zero or more NL strings.
- * Most assertions produce exactly one string; agent-judge with rubrics expands to many.
+ * Most assertions produce exactly one string; llm-judge/agent-judge with rubrics expands to many.
  */
 function assertionToNaturalLanguageList(entry: RawAssertEntry): string[] {
-  if (entry.type === 'agent-judge' || entry.type === 'agent_judge') {
+  if (
+    entry.type === 'llm-judge' ||
+    entry.type === 'llm_judge' ||
+    entry.type === 'agent-judge' ||
+    entry.type === 'agent_judge'
+  ) {
     if (Array.isArray(entry.rubrics) && entry.rubrics.length > 0) {
       return (entry.rubrics as Array<{ outcome?: string; criteria?: string; id?: string }>)
         .map((r) => r.outcome ?? r.criteria ?? r.id)
diff --git a/packages/core/test/evaluation/loaders/eval-yaml-transpiler.test.ts b/packages/core/test/evaluation/loaders/eval-yaml-transpiler.test.ts
index de224a1a2..fa8a7e497 100644
--- a/packages/core/test/evaluation/loaders/eval-yaml-transpiler.test.ts
+++ b/packages/core/test/evaluation/loaders/eval-yaml-transpiler.test.ts
@@ -269,6 +269,31 @@ describe('transpileEvalYaml — NL assertions', () => {
     expect(evals[0].assertions).toContain('No unnecessary steps');
   });
 
+  it('converts llm-judge with rubrics to multiple assertions', () => {
+    const suite = {
+      tests: [
+        {
+          id: 't1',
+          input: 'test',
+          assertions: [
+            { type: 'skill-trigger', skill: 's', should_trigger: true },
+            {
+              type: 'llm-judge',
+              rubrics: [
+                { id: 'r1', outcome: 'Response is accurate' },
+                { id: 'r2', outcome: 'Formatting is correct' },
+              ],
+            },
+          ],
+        },
+      ],
+    };
+    const { files } = transpileEvalYaml(suite);
+    const evals = files.get('s')?.evals;
+    expect(evals[0].assertions).toContain('Response is accurate');
+    expect(evals[0].assertions).toContain('Formatting is correct');
+  });
+
   it('converts tool-trajectory to NL', () => {
     const suite = {
       tests: [

From 65a0d7cd7ba0381066ef8de110fc87a400606bf8 Mon Sep 17 00:00:00 2001
From: Christopher Tso <christso@gmail.com>
Date: Sun, 15 Mar 2026 13:43:43 +0000
Subject: [PATCH 09/13] fix: address code review findings

- Add explicit guard for --model when --judge-target is agentv (was non-null assertion)
- Consolidate evaluateWithJudgeTarget/evaluateWithDelegatedAgent into shared evaluateWithDelegate
- Add try-catch for RegExp construction in search_files tool (prevents crash on invalid patterns)
- Add comments explaining agentv exclusion from AGENT_PROVIDER_KINDS and AgentJudgeSchema backward compat

Part of #614
---
 .../src/evaluation/evaluators/llm-judge.ts    | 99 ++++++-------------
 .../evaluation/loaders/evaluator-parser.ts    |  4 +-
 packages/core/src/evaluation/orchestrator.ts  |  7 +-
 .../evaluation/providers/agentv-provider.ts   |  3 +-
 .../evaluation/registry/builtin-evaluators.ts |  2 +
 .../evaluation/validation/eval-file.schema.ts |  1 +
 .../providers/agentv-provider.test.ts         |  8 +-
 7 files changed, 45 insertions(+), 79 deletions(-)

diff --git a/packages/core/src/evaluation/evaluators/llm-judge.ts b/packages/core/src/evaluation/evaluators/llm-judge.ts
index 91e6578bc..88e6a5268 100644
--- a/packages/core/src/evaluation/evaluators/llm-judge.ts
+++ b/packages/core/src/evaluation/evaluators/llm-judge.ts
@@ -473,63 +473,7 @@ export class LlmJudgeEvaluator implements Evaluator {
    * Judge target mode: Delegates to an explicit judgeTargetProvider via Provider.invoke().
    */
   private async evaluateWithJudgeTarget(context: EvaluationContext): Promise<EvaluationScore> {
-    const provider = this.judgeTargetProvider as Provider;
-
-    const workspacePath = context.workspacePath;
-    const prompt = this.buildDelegatedPrompt(context);
-
-    const evaluatorRawRequest: JsonObject = {
-      mode: 'judge_target',
-      judge_target: provider.targetName,
-      prompt,
-    };
-
-    try {
-      const response = await provider.invoke({
-        question: prompt,
-        cwd: workspacePath,
-        evalCaseId: context.evalCase.id,
-        attempt: context.attempt,
-      });
-
-      const assistantContent = extractLastAssistantContent(response.output);
-      if (!assistantContent) {
-        return {
-          score: 0,
-          verdict: 'fail',
-          hits: [],
-          misses: ['llm-judge judge_target returned no assistant response'],
-          expectedAspectCount: 1,
-          evaluatorRawRequest,
-          details: { mode: 'judge_target', judge_target: provider.targetName },
-        };
-      }
-
-      const config = context.evaluator;
-      const rubrics = config?.type === 'llm-judge' ? config.rubrics : undefined;
-
-      const details: JsonObject = {
-        mode: 'judge_target',
-        judge_target: provider.targetName,
-      };
-
-      return this.parseAgentResult(assistantContent, rubrics, evaluatorRawRequest, details);
-    } catch (error) {
-      const message = error instanceof Error ? error.message : String(error);
-      return {
-        score: 0,
-        verdict: 'fail',
-        hits: [],
-        misses: [`llm-judge judge_target evaluation failed: ${message}`],
-        expectedAspectCount: 1,
-        evaluatorRawRequest,
-        details: {
-          mode: 'judge_target',
-          judge_target: provider.targetName,
-          error: message,
-        },
-      };
-    }
+    return this.evaluateWithDelegate(context, this.judgeTargetProvider as Provider, 'judge_target');
   }
 
   /**
@@ -538,18 +482,30 @@ export class LlmJudgeEvaluator implements Evaluator {
   private async evaluateWithDelegatedAgent(
     context: EvaluationContext,
     judgeProvider: Provider,
+  ): Promise<EvaluationScore> {
+    return this.evaluateWithDelegate(context, judgeProvider, 'delegate');
+  }
+
+  /**
+   * Shared implementation for judge_target and delegate modes.
+   * Both invoke a provider and parse the agent result from the response.
+   */
+  private async evaluateWithDelegate(
+    context: EvaluationContext,
+    provider: Provider,
+    modeLabel: string,
   ): Promise<EvaluationScore> {
     const workspacePath = context.workspacePath;
     const prompt = this.buildDelegatedPrompt(context);
 
     const evaluatorRawRequest: JsonObject = {
-      mode: 'judge_target',
-      judge_target: judgeProvider.targetName,
+      mode: modeLabel,
+      judge_target: provider.targetName,
       prompt,
     };
 
     try {
-      const response = await judgeProvider.invoke({
+      const response = await provider.invoke({
         question: prompt,
         cwd: workspacePath,
         evalCaseId: context.evalCase.id,
@@ -562,10 +518,10 @@ export class LlmJudgeEvaluator implements Evaluator {
           score: 0,
           verdict: 'fail',
           hits: [],
-          misses: ['llm-judge delegate returned no assistant response'],
+          misses: [`llm-judge ${modeLabel} returned no assistant response`],
           expectedAspectCount: 1,
           evaluatorRawRequest,
-          details: { mode: 'judge_target', judge_target: judgeProvider.targetName },
+          details: { mode: modeLabel, judge_target: provider.targetName },
         };
       }
 
@@ -573,8 +529,8 @@ export class LlmJudgeEvaluator implements Evaluator {
       const rubrics = config?.type === 'llm-judge' ? config.rubrics : undefined;
 
       const details: JsonObject = {
-        mode: 'judge_target',
-        judge_target: judgeProvider.targetName,
+        mode: modeLabel,
+        judge_target: provider.targetName,
       };
 
       return this.parseAgentResult(assistantContent, rubrics, evaluatorRawRequest, details);
@@ -584,12 +540,12 @@ export class LlmJudgeEvaluator implements Evaluator {
         score: 0,
         verdict: 'fail',
         hits: [],
-        misses: [`llm-judge delegate evaluation failed: ${message}`],
+        misses: [`llm-judge ${modeLabel} evaluation failed: ${message}`],
         expectedAspectCount: 1,
         evaluatorRawRequest,
         details: {
-          mode: 'judge_target',
-          judge_target: judgeProvider.targetName,
+          mode: modeLabel,
+          judge_target: provider.targetName,
           error: message,
         },
       };
@@ -1255,7 +1211,14 @@ function createFilesystemTools(workspacePath: string) {
       execute: async (input: { pattern: string; path: string }) => {
         try {
           const resolved = resolveSandboxed(workspacePath, input.path);
-          const regex = new RegExp(input.pattern, 'gi');
+          let regex: RegExp;
+          try {
+            regex = new RegExp(input.pattern, 'gi');
+          } catch (regexErr) {
+            return {
+              error: `Invalid regex pattern: ${regexErr instanceof Error ? regexErr.message : String(regexErr)}`,
+            };
+          }
           const matches: Array<{ file: string; line: number; text: string }> = [];
 
           await searchDirectory(resolved, workspacePath, regex, matches);
diff --git a/packages/core/src/evaluation/loaders/evaluator-parser.ts b/packages/core/src/evaluation/loaders/evaluator-parser.ts
index e931db1c8..2b77e87e3 100644
--- a/packages/core/src/evaluation/loaders/evaluator-parser.ts
+++ b/packages/core/src/evaluation/loaders/evaluator-parser.ts
@@ -1301,9 +1301,7 @@ async function parseEvaluatorList(
         : undefined;
     const rawTempLlm = rawEvaluator.temperature;
     const llmTemperature =
-      typeof rawTempLlm === 'number' && rawTempLlm >= 0 && rawTempLlm <= 2
-        ? rawTempLlm
-        : undefined;
+      typeof rawTempLlm === 'number' && rawTempLlm >= 0 && rawTempLlm <= 2 ? rawTempLlm : undefined;
 
     evaluators.push({
       name,
diff --git a/packages/core/src/evaluation/orchestrator.ts b/packages/core/src/evaluation/orchestrator.ts
index 58e42903b..fb8649aa4 100644
--- a/packages/core/src/evaluation/orchestrator.ts
+++ b/packages/core/src/evaluation/orchestrator.ts
@@ -344,9 +344,12 @@ export async function runEvaluation(
     // CLI --judge-target takes highest priority
     if (cliJudgeTarget) {
       if (cliJudgeTarget === 'agentv') {
-        // Create an agentv provider on-the-fly with the CLI model
+        if (!cliModel) {
+          throw new Error('--judge-target "agentv" requires --model (e.g., "openai:gpt-5-mini")');
+
+        }
         const { AgentvProvider } = await import('./providers/agentv-provider.js');
-        return new AgentvProvider('agentv', { model: cliModel!, temperature: 0 });
+        return new AgentvProvider('agentv', { model: cliModel, temperature: 0 });
       }
       const overrideTarget = resolveTargetByName(cliJudgeTarget);
       if (!overrideTarget) {
diff --git a/packages/core/src/evaluation/providers/agentv-provider.ts b/packages/core/src/evaluation/providers/agentv-provider.ts
index 06abd9cfe..88084c8fa 100644
--- a/packages/core/src/evaluation/providers/agentv-provider.ts
+++ b/packages/core/src/evaluation/providers/agentv-provider.ts
@@ -42,8 +42,7 @@ function createLanguageModel(modelString: string): LanguageModel {
       return createGoogleGenerativeAI()(modelName);
     default:
       throw new Error(
-        `Unsupported AI SDK provider "${provider}" in model string "${modelString}". ` +
-          'Supported providers: openai, anthropic, azure, google',
+        `Unsupported AI SDK provider "${provider}" in model string "${modelString}". Supported providers: openai, anthropic, azure, google`,
       );
   }
 }
diff --git a/packages/core/src/evaluation/registry/builtin-evaluators.ts b/packages/core/src/evaluation/registry/builtin-evaluators.ts
index a370c08b4..7d8e6ff88 100644
--- a/packages/core/src/evaluation/registry/builtin-evaluators.ts
+++ b/packages/core/src/evaluation/registry/builtin-evaluators.ts
@@ -93,6 +93,8 @@ export const llmJudgeFactory: EvaluatorFactoryFn = (config, context) => {
     }
     // Only pass judgeTargetProvider for agent providers (delegate mode).
     // LLM providers use the normal resolveJudgeProvider path for structured JSON mode.
+    // Note: agentv uses asLanguageModel() not invoke(), so it's not in AGENT_PROVIDER_KINDS;
+    // check it explicitly here for built-in agent mode.
     const isAgent = isAgentProvider(judgeTargetProvider) || judgeTargetProvider.kind === 'agentv';
     evaluator = new LlmJudgeEvaluator({
       resolveJudgeProvider: async (evalContext) => {
diff --git a/packages/core/src/evaluation/validation/eval-file.schema.ts b/packages/core/src/evaluation/validation/eval-file.schema.ts
index 977b68daa..88ab00041 100644
--- a/packages/core/src/evaluation/validation/eval-file.schema.ts
+++ b/packages/core/src/evaluation/validation/eval-file.schema.ts
@@ -191,6 +191,7 @@ const ExecutionMetricsSchema = EvaluatorCommonSchema.extend({
   exploration_tolerance: z.number().min(0).optional(),
 });
 
+/** Backward compat: agent-judge YAML type is accepted and remapped to llm-judge at parse time. */
 const AgentJudgeSchema = EvaluatorCommonSchema.extend({
   type: z.enum(['agent-judge', 'agent_judge']),
   prompt: z.string().optional(),
diff --git a/packages/core/test/evaluation/providers/agentv-provider.test.ts b/packages/core/test/evaluation/providers/agentv-provider.test.ts
index 8670f4ec3..2b0c0aadd 100644
--- a/packages/core/test/evaluation/providers/agentv-provider.test.ts
+++ b/packages/core/test/evaluation/providers/agentv-provider.test.ts
@@ -68,7 +68,7 @@ describe('AgentvProvider', () => {
     });
     const model = provider.asLanguageModel();
     expect(model).toBeDefined();
-    expect((model as any).modelId).toBe('gpt-5-mini');
+    expect((model as unknown as { modelId: string }).modelId).toBe('gpt-5-mini');
   });
 
   it('asLanguageModel() works with anthropic model strings', () => {
@@ -78,7 +78,7 @@ describe('AgentvProvider', () => {
     });
     const model = provider.asLanguageModel();
     expect(model).toBeDefined();
-    expect((model as any).modelId).toBe('claude-sonnet-4-20250514');
+    expect((model as unknown as { modelId: string }).modelId).toBe('claude-sonnet-4-20250514');
   });
 
   it('asLanguageModel() works with google model strings', () => {
@@ -88,7 +88,7 @@ describe('AgentvProvider', () => {
     });
     const model = provider.asLanguageModel();
     expect(model).toBeDefined();
-    expect((model as any).modelId).toBe('gemini-2.5-flash');
+    expect((model as unknown as { modelId: string }).modelId).toBe('gemini-2.5-flash');
   });
 
   it('asLanguageModel() works with azure model strings', () => {
@@ -98,7 +98,7 @@ describe('AgentvProvider', () => {
     });
     const model = provider.asLanguageModel();
     expect(model).toBeDefined();
-    expect((model as any).modelId).toBe('gpt-4o-deployment');
+    expect((model as unknown as { modelId: string }).modelId).toBe('gpt-4o-deployment');
   });
 
   it('throws for unsupported provider prefix', () => {

From 0f3c6afa5d8a27f7ec3dc6a3a12d593efde2fbee Mon Sep 17 00:00:00 2001
From: Christopher Tso <christso@gmail.com>
Date: Sun, 15 Mar 2026 14:06:56 +0000
Subject: [PATCH 10/13] refactor: remove all agent-judge references from
 codebase

---
 .../content/docs/evaluation/eval-cases.mdx    |    2 +-
 .../content/docs/guides/agent-eval-layers.mdx |    6 +-
 ...026-02-26-eval-schema-generation-design.md |   12 +-
 .../features/agent-judge/.agentv/targets.yaml |   22 -
 .../evals/dataset.eval.baseline.jsonl         |    2 -
 .../agent-judge/evals/dataset.eval.yaml       |   64 -
 .../workspace-template/package.json           |    5 -
 .../workspace-template/src/main.ts            |   11 -
 .../file-changes-judges/.agentv/targets.yaml  |    4 +-
 .../evals/dataset.eval.yaml                   |   18 +-
 .../loaders/eval-yaml-transpiler.ts           |   10 +-
 .../evaluation/loaders/evaluator-parser.ts    |   79 +-
 .../evaluation/validation/eval-file.schema.ts |   11 -
 .../loaders/eval-yaml-transpiler.test.ts      |    4 +-
 plugins/agentv-dev/agents/eval-analyzer.md    |    4 +-
 .../skills/agentv-eval-writer/SKILL.md        |    2 +-
 .../references/eval-schema.json               | 2166 +----------------
 17 files changed, 141 insertions(+), 2281 deletions(-)
 delete mode 100644 examples/features/agent-judge/.agentv/targets.yaml
 delete mode 100644 examples/features/agent-judge/evals/dataset.eval.baseline.jsonl
 delete mode 100644 examples/features/agent-judge/evals/dataset.eval.yaml
 delete mode 100644 examples/features/agent-judge/workspace-template/package.json
 delete mode 100644 examples/features/agent-judge/workspace-template/src/main.ts

diff --git a/apps/web/src/content/docs/evaluation/eval-cases.mdx b/apps/web/src/content/docs/evaluation/eval-cases.mdx
index cc1545b64..4881674a9 100644
--- a/apps/web/src/content/docs/evaluation/eval-cases.mdx
+++ b/apps/web/src/content/docs/evaluation/eval-cases.mdx
@@ -265,7 +265,7 @@ tests:
 
 ### `assert` present — explicit evaluators only
 
-When `assert` is defined, only the declared evaluators run. No implicit judge is added. Judges that are declared (such as `llm-judge`, `code-judge`, `agent-judge`, or `rubrics`) receive `criteria` as input automatically.
+When `assert` is defined, only the declared evaluators run. No implicit judge is added. Judges that are declared (such as `llm-judge`, `code-judge`, or `rubrics`) receive `criteria` as input automatically.
 
 If `assert` contains only deterministic evaluators (like `contains` or `regex`), the `criteria` field is not evaluated and a warning is emitted:
 
diff --git a/apps/web/src/content/docs/guides/agent-eval-layers.mdx b/apps/web/src/content/docs/guides/agent-eval-layers.mdx
index 783a2ca55..6d0f542cf 100644
--- a/apps/web/src/content/docs/guides/agent-eval-layers.mdx
+++ b/apps/web/src/content/docs/guides/agent-eval-layers.mdx
@@ -15,8 +15,8 @@ Covers plan quality, plan adherence, and tool selection rationale. Use LLM-based
 
 | Concern | AgentV evaluator |
 |---------|-----------------|
-| Plan quality & coherence | `llm_judge` with reasoning-focused prompt |
-| Workspace-aware auditing | `agent_judge` with rubrics |
+| Plan quality & coherence | `llm-judge` with reasoning-focused prompt |
+| Workspace-aware auditing | `llm-judge` with rubrics |
 
 ```yaml
 # Layer 1: Reasoning — verify the agent's plan makes sense
@@ -29,7 +29,7 @@ assertions:
       Did it select appropriate tools for the task?
       Score 1.0 if reasoning is sound, 0.0 if not.
   - name: workspace-audit
-    type: agent-judge
+    type: llm-judge
     max_steps: 5
     temperature: 0
     rubrics:
diff --git a/docs/plans/2026-02-26-eval-schema-generation-design.md b/docs/plans/2026-02-26-eval-schema-generation-design.md
index a20a7909f..9d6047886 100644
--- a/docs/plans/2026-02-26-eval-schema-generation-design.md
+++ b/docs/plans/2026-02-26-eval-schema-generation-design.md
@@ -248,14 +248,9 @@ const ExecutionMetricsSchema = EvaluatorCommonSchema.extend({
   exploration_tolerance: z.number().min(0).optional(),
 });
 
-const AgentJudgeSchema = EvaluatorCommonSchema.extend({
-  type: z.literal('agent_judge'),
-  prompt: z.string().optional(),
-  rubrics: z.array(RubricItemSchema).optional(),
-  max_steps: z.number().int().min(1).max(50).optional(),
-  temperature: z.number().min(0).max(2).optional(),
-  target: z.string().optional(),
-});
+// Note: agent_judge was removed — llm-judge now covers all judge use cases
+// including agentic behavior (auto-detected based on judge provider kind).
+// See LlmJudgeSchema above for the unified schema.
 
 const ContainsSchema = EvaluatorCommonSchema.extend({
   type: z.literal('contains'),
@@ -292,7 +287,6 @@ const EvaluatorSchema = z.union([
   CostSchema,
   TokenUsageSchema,
   ExecutionMetricsSchema,
-  AgentJudgeSchema,
   ContainsSchema,
   RegexSchema,
   IsJsonSchema,
diff --git a/examples/features/agent-judge/.agentv/targets.yaml b/examples/features/agent-judge/.agentv/targets.yaml
deleted file mode 100644
index 6d5c82918..000000000
--- a/examples/features/agent-judge/.agentv/targets.yaml
+++ /dev/null
@@ -1,22 +0,0 @@
-targets:
-  # Mock agent that "creates tests" in the workspace.
-  # Each test gets a fresh copy of workspace-template/ as its CWD.
-  - name: mock_agent
-    provider: cli
-    command: >-
-      bash -c '
-      mkdir -p tests &&
-      printf "import { add, multiply } from \"../src/main\";\n\ndescribe(\"math functions\", () => {\n  test(\"add returns sum\", () => {\n    expect(add(2, 3)).toBe(5);\n  });\n\n  test(\"multiply returns product\", () => {\n    expect(multiply(4, 5)).toBe(20);\n  });\n});\n" > tests/math.test.ts &&
-      printf "import { greet } from \"../src/main\";\n\ndescribe(\"greet\", () => {\n  test(\"returns greeting\", () => {\n    expect(greet(\"World\")).toBe(\"Hello, World!\");\n  });\n});\n" > tests/greet.test.ts &&
-      echo "Created test files: tests/math.test.ts and tests/greet.test.ts" > {OUTPUT_FILE}
-      '
-    workspace_template: ../workspace-template
-    judge_target: azure_judge
-
-  # Azure OpenAI target used as judge provider for built-in agent_judge mode.
-  - name: azure_judge
-    provider: azure
-    endpoint: ${{ AZURE_OPENAI_ENDPOINT }}
-    api_key: ${{ AZURE_OPENAI_API_KEY }}
-    model: ${{ AZURE_DEPLOYMENT_NAME }}
-    version: ${{ AZURE_OPENAI_API_VERSION }}
diff --git a/examples/features/agent-judge/evals/dataset.eval.baseline.jsonl b/examples/features/agent-judge/evals/dataset.eval.baseline.jsonl
deleted file mode 100644
index bc2d5b6ee..000000000
--- a/examples/features/agent-judge/evals/dataset.eval.baseline.jsonl
+++ /dev/null
@@ -1,2 +0,0 @@
-{"timestamp":"2026-02-20T21:37:58.641Z","test_id":"verify-test-creation-freeform","dataset":"dataset","score":1,"hits":["Created tests/math.test.ts and tests/greet.test.ts in tests/ directory","Test files import functions from src/main.ts","add, multiply, and greet functions are tested","Assertions are meaningful and verify correct outputs"],"misses":[],"target":"mock_agent","reasoning":"workspace-audit: All criteria are fully met: each function is tested with meaningful assertions, test files are correctly placed and import from the source file.","scores":[{"name":"workspace-audit","type":"agent-judge","score":1,"weight":1,"verdict":"pass","hits":["Created tests/math.test.ts and tests/greet.test.ts in tests/ directory","Test files import functions from src/main.ts","add, multiply, and greet functions are tested","Assertions are meaningful and verify correct outputs"],"misses":[],"reasoning":"All criteria are fully met: each function is tested with meaningful assertions, test files are correctly placed and import from the source file.","details":{"mode":"built-in","steps":3,"tool_calls":5}}]}
-{"timestamp":"2026-02-20T21:37:59.540Z","test_id":"verify-test-creation-rubric","dataset":"dataset","score":1,"hits":["[tests-dir-exists] A tests/ directory exists in the workspace: A 'tests/' directory exists in the workspace, containing test files.","[math-tests] Test file exists that tests the add and multiply functions: 'tests/math.test.ts' exists and contains tests for both 'add' and 'multiply' functions.","[greet-tests] Test file exists that tests the greet function: 'tests/greet.test.ts' exists and contains a test for the 'greet' function.","[assertions-present] Tests contain proper assertions (expect/assert calls): All test files contain proper assertions using 'expect' calls."],"misses":[],"target":"mock_agent","reasoning":"workspace-audit-rubric: All required test files exist in the 'tests/' directory, and each function from 'src/main.ts' is covered by appropriate unit tests with proper assertions. The candidate answer meets all rubric criteria.","scores":[{"name":"workspace-audit-rubric","type":"agent-judge","score":1,"weight":1,"verdict":"pass","hits":["[tests-dir-exists] A tests/ directory exists in the workspace: A 'tests/' directory exists in the workspace, containing test files.","[math-tests] Test file exists that tests the add and multiply functions: 'tests/math.test.ts' exists and contains tests for both 'add' and 'multiply' functions.","[greet-tests] Test file exists that tests the greet function: 'tests/greet.test.ts' exists and contains a test for the 'greet' function.","[assertions-present] Tests contain proper assertions (expect/assert calls): All test files contain proper assertions using 'expect' calls."],"misses":[],"reasoning":"All required test files exist in the 'tests/' directory, and each function from 'src/main.ts' is covered by appropriate unit tests with proper assertions. The candidate answer meets all rubric criteria.","details":{"mode":"built-in","steps":2,"tool_calls":3}}]}
diff --git a/examples/features/agent-judge/evals/dataset.eval.yaml b/examples/features/agent-judge/evals/dataset.eval.yaml
deleted file mode 100644
index a9bf21048..000000000
--- a/examples/features/agent-judge/evals/dataset.eval.yaml
+++ /dev/null
@@ -1,64 +0,0 @@
-# Agent Judge feature demonstration
-# Tests that the agent_judge evaluator can investigate the workspace
-# to verify that an agent created the expected files and content.
-#
-# The mock_agent creates test files in the workspace-template.
-# The agent_judge evaluator uses an AI SDK agent loop with filesystem tools
-# to verify the test files exist and contain proper test cases.
-
-description: Verify agent_judge evaluator can audit workspace file creation
-
-execution:
-  target: mock_agent
-
-tests:
-  # Case 1: freeform agent_judge (no rubrics) — scores 0-1
-  - id: verify-test-creation-freeform
-    criteria: >-
-      The agent should create unit test files in a tests/ directory.
-      Test files should import from src/main.ts and test the add, multiply,
-      and greet functions with meaningful assertions.
-
-    input:
-      - role: user
-        content:
-          - type: text
-            value: Create unit tests for all functions in src/main.ts
-
-    assertions:
-      - name: workspace-audit
-        type: agent-judge
-        max_steps: 5
-        temperature: 0
-
-  # Case 2: rubric-based agent_judge — structured evaluation
-  - id: verify-test-creation-rubric
-    criteria: >-
-      The agent should create comprehensive unit tests for the project.
-
-    input:
-      - role: user
-        content:
-          - type: text
-            value: Create unit tests for all functions in src/main.ts
-
-    assertions:
-      - name: workspace-audit-rubric
-        type: agent-judge
-        max_steps: 5
-        temperature: 0
-        rubrics:
-          - id: tests-dir-exists
-            outcome: "A tests/ directory exists in the workspace"
-            weight: 1.0
-            required: true
-          - id: math-tests
-            outcome: "Test file exists that tests the add and multiply functions"
-            weight: 1.0
-            required: true
-          - id: greet-tests
-            outcome: "Test file exists that tests the greet function"
-            weight: 1.0
-          - id: assertions-present
-            outcome: "Tests contain proper assertions (expect/assert calls)"
-            weight: 0.5
diff --git a/examples/features/agent-judge/workspace-template/package.json b/examples/features/agent-judge/workspace-template/package.json
deleted file mode 100644
index 24d635536..000000000
--- a/examples/features/agent-judge/workspace-template/package.json
+++ /dev/null
@@ -1,5 +0,0 @@
-{
-  "name": "sample-project",
-  "version": "1.0.0",
-  "type": "module"
-}
diff --git a/examples/features/agent-judge/workspace-template/src/main.ts b/examples/features/agent-judge/workspace-template/src/main.ts
deleted file mode 100644
index cfda22527..000000000
--- a/examples/features/agent-judge/workspace-template/src/main.ts
+++ /dev/null
@@ -1,11 +0,0 @@
-export function add(a: number, b: number): number {
-  return a + b;
-}
-
-export function multiply(a: number, b: number): number {
-  return a * b;
-}
-
-export function greet(name: string): string {
-  return `Hello, ${name}!`;
-}
diff --git a/examples/features/file-changes-judges/.agentv/targets.yaml b/examples/features/file-changes-judges/.agentv/targets.yaml
index d9645bc03..10c067b31 100644
--- a/examples/features/file-changes-judges/.agentv/targets.yaml
+++ b/examples/features/file-changes-judges/.agentv/targets.yaml
@@ -11,7 +11,7 @@ targets:
     workspace_template: ../workspace-template
     judge_target: azure_judge
 
-  # Azure OpenAI — used as LLM judge (rubrics) and built-in agent_judge provider
+  # Azure OpenAI — used as LLM judge (rubrics) and built-in llm-judge provider
   - name: azure_judge
     provider: azure
     endpoint: ${{ AZURE_OPENAI_ENDPOINT }}
@@ -19,7 +19,7 @@ targets:
     model: ${{ AZURE_DEPLOYMENT_NAME }}
     version: ${{ AZURE_OPENAI_API_VERSION }}
 
-  # Copilot CLI — used as delegated agent_judge target
+  # Copilot CLI — used as delegated llm-judge target
   - name: copilot_judge
     provider: copilot-cli
     model: claude-haiku-4.5
diff --git a/examples/features/file-changes-judges/evals/dataset.eval.yaml b/examples/features/file-changes-judges/evals/dataset.eval.yaml
index 65ebd68df..2fb796537 100644
--- a/examples/features/file-changes-judges/evals/dataset.eval.yaml
+++ b/examples/features/file-changes-judges/evals/dataset.eval.yaml
@@ -2,13 +2,13 @@
 #
 # Proves that file_changes diffs are correctly passed to all judge types:
 #   1. rubrics      — LLM judge (Azure) evaluates the diff
-#   2. agent_judge  — built-in mode (Azure via AI SDK) sees file_changes in prompt
-#   3. agent_judge  — delegated mode (Copilot CLI with haiku) sees file_changes in prompt
+#   2. llm-judge    — built-in mode (Azure via AI SDK) sees file_changes in prompt
+#   3. llm-judge    — delegated mode (Copilot CLI with haiku) sees file_changes in prompt
 #
 # The mock agent adds a `subtract` function to calculator.ts, producing a small
 # diff (~10 lines) that fits comfortably in any LLM context window.
 
-description: Verify file_changes diffs are accessible to LLM judge, built-in agent judge, and copilot-cli agent judge
+description: Verify file_changes diffs are accessible to LLM judge (rubrics, built-in, and copilot-cli)
 
 execution:
   target: mock_agent
@@ -43,14 +43,14 @@ tests:
             outcome: "The file_changes contains a valid unified diff format"
             weight: 0.5
 
-      # 2. Built-in agent judge — Azure via AI SDK with filesystem tools
-      - name: agent-judge-builtin
-        type: agent-judge
+      # 2. Built-in LLM judge — Azure via AI SDK with filesystem tools
+      - name: llm-judge-builtin
+        type: llm-judge
         max_steps: 3
         temperature: 0
 
-      # 3. Copilot CLI agent judge — delegated via target
-      - name: agent-judge-copilot
-        type: agent-judge
+      # 3. Copilot CLI LLM judge — delegated via target
+      - name: llm-judge-copilot
+        type: llm-judge
         target: copilot_judge
         temperature: 0
diff --git a/packages/core/src/evaluation/loaders/eval-yaml-transpiler.ts b/packages/core/src/evaluation/loaders/eval-yaml-transpiler.ts
index 7e99fd01f..db646013b 100644
--- a/packages/core/src/evaluation/loaders/eval-yaml-transpiler.ts
+++ b/packages/core/src/evaluation/loaders/eval-yaml-transpiler.ts
@@ -143,9 +143,7 @@ function assertionToNaturalLanguage(entry: RawAssertEntry): string | null {
       return `Output ends with '${entry.value}'`;
 
     case 'llm-judge':
-    case 'llm_judge':
-    case 'agent-judge':
-    case 'agent_judge': {
+    case 'llm_judge': {
       // Expand each rubric item to its own assertion string
       // Return the first one — callers handle arrays via assertionToNaturalLanguageList
       if (Array.isArray(entry.rubrics) && entry.rubrics.length > 0) {
@@ -215,14 +213,12 @@ function assertionToNaturalLanguage(entry: RawAssertEntry): string | null {
 
 /**
  * Expand a single assertion entry into zero or more NL strings.
- * Most assertions produce exactly one string; llm-judge/agent-judge with rubrics expands to many.
+ * Most assertions produce exactly one string; llm-judge with rubrics expands to many.
  */
 function assertionToNaturalLanguageList(entry: RawAssertEntry): string[] {
   if (
     entry.type === 'llm-judge' ||
-    entry.type === 'llm_judge' ||
-    entry.type === 'agent-judge' ||
-    entry.type === 'agent_judge'
+    entry.type === 'llm_judge'
   ) {
     if (Array.isArray(entry.rubrics) && entry.rubrics.length > 0) {
       return (entry.rubrics as Array<{ outcome?: string; criteria?: string; id?: string }>)
diff --git a/packages/core/src/evaluation/loaders/evaluator-parser.ts b/packages/core/src/evaluation/loaders/evaluator-parser.ts
index 2b77e87e3..8f994a8e4 100644
--- a/packages/core/src/evaluation/loaders/evaluator-parser.ts
+++ b/packages/core/src/evaluation/loaders/evaluator-parser.ts
@@ -134,9 +134,8 @@ async function parseEvaluatorList(
     const typeValue = typeof rawType === 'string' ? normalizeEvaluatorType(rawType) : rawType;
 
     // Unknown types are treated as custom assertion types (resolved via registry discovery)
-    // 'agent-judge' is a known alias (maps to 'llm-judge'), not a custom type
     const isCustomType =
-      typeof typeValue === 'string' && !isEvaluatorKind(typeValue) && typeValue !== 'agent-judge';
+      typeof typeValue === 'string' && !isEvaluatorKind(typeValue);
     if (typeof typeValue !== 'string') {
       logWarning(`Skipping evaluator with invalid type in '${evalId}'`);
       continue;
@@ -854,82 +853,6 @@ async function parseEvaluatorList(
       continue;
     }
 
-    // Backward compat: agent-judge / agent_judge → llm-judge with agent-specific fields
-    if ((typeValue as string) === 'agent-judge') {
-      // Validate max_steps (1-50)
-      const rawMaxSteps = rawEvaluator.max_steps ?? rawEvaluator.maxSteps;
-      let maxSteps: number | undefined;
-      if (rawMaxSteps !== undefined) {
-        if (
-          typeof rawMaxSteps !== 'number' ||
-          !Number.isInteger(rawMaxSteps) ||
-          rawMaxSteps < 1 ||
-          rawMaxSteps > 50
-        ) {
-          logWarning(
-            `Skipping llm-judge evaluator '${name}' in '${evalId}': max_steps must be an integer 1-50`,
-          );
-          continue;
-        }
-        maxSteps = rawMaxSteps;
-      }
-
-      // Validate temperature (0-2)
-      const rawTemperature = rawEvaluator.temperature;
-      let temperature: number | undefined;
-      if (rawTemperature !== undefined) {
-        if (typeof rawTemperature !== 'number' || rawTemperature < 0 || rawTemperature > 2) {
-          logWarning(
-            `Skipping llm-judge evaluator '${name}' in '${evalId}': temperature must be a number 0-2`,
-          );
-          continue;
-        }
-        temperature = rawTemperature;
-      }
-
-      // Validate target (string)
-      const judgeTarget = asString(rawEvaluator.target);
-
-      // Parse prompt (file path or inline text)
-      let agentPrompt: string | undefined;
-      let agentPromptPath: string | undefined;
-      const rawAgentPrompt = rawEvaluator.prompt;
-      if (typeof rawAgentPrompt === 'string') {
-        agentPrompt = rawAgentPrompt;
-        const resolved = await resolveFileReference(rawAgentPrompt, searchRoots);
-        if (resolved.resolvedPath) {
-          agentPromptPath = path.resolve(resolved.resolvedPath);
-        }
-      }
-
-      // Parse rubrics via existing infrastructure
-      const rawAgentRubrics = rawEvaluator.rubrics;
-      const agentParsedRubrics = Array.isArray(rawAgentRubrics)
-        ? parseRubricItems(rawAgentRubrics, name, evalId)
-        : undefined;
-
-      const weight = validateWeight(rawEvaluator.weight, name, evalId);
-      const required = parseRequired(rawEvaluator.required);
-
-      evaluators.push({
-        name,
-        type: 'llm-judge',
-        ...(agentPrompt ? { prompt: agentPrompt } : {}),
-        ...(agentPromptPath
-          ? { promptPath: agentPromptPath, resolvedPromptPath: agentPromptPath }
-          : {}),
-        ...(agentParsedRubrics && agentParsedRubrics.length > 0
-          ? { rubrics: agentParsedRubrics }
-          : {}),
-        ...(maxSteps !== undefined ? { max_steps: maxSteps } : {}),
-        ...(temperature !== undefined ? { temperature } : {}),
-        ...(judgeTarget ? { target: judgeTarget } : {}),
-        ...(weight !== undefined ? { weight } : {}),
-        ...(required !== undefined ? { required } : {}),
-        ...(negate !== undefined ? { negate } : {}),
-      });
-      continue;
-    }
 
     if (typeValue === 'skill-trigger') {
       const skillName = asString(rawEvaluator.skill);
diff --git a/packages/core/src/evaluation/validation/eval-file.schema.ts b/packages/core/src/evaluation/validation/eval-file.schema.ts
index 88ab00041..e3bad5fed 100644
--- a/packages/core/src/evaluation/validation/eval-file.schema.ts
+++ b/packages/core/src/evaluation/validation/eval-file.schema.ts
@@ -191,16 +191,6 @@ const ExecutionMetricsSchema = EvaluatorCommonSchema.extend({
   exploration_tolerance: z.number().min(0).optional(),
 });
 
-/** Backward compat: agent-judge YAML type is accepted and remapped to llm-judge at parse time. */
-const AgentJudgeSchema = EvaluatorCommonSchema.extend({
-  type: z.enum(['agent-judge', 'agent_judge']),
-  prompt: z.string().optional(),
-  rubrics: z.array(RubricItemSchema).optional(),
-  max_steps: z.number().int().min(1).max(50).optional(),
-  temperature: z.number().min(0).max(2).optional(),
-  target: z.string().optional(),
-});
-
 const ContainsSchema = EvaluatorCommonSchema.extend({
   type: z.literal('contains'),
   value: z.string(),
@@ -236,7 +226,6 @@ const EvaluatorSchema = z.union([
   CostSchema,
   TokenUsageSchema,
   ExecutionMetricsSchema,
-  AgentJudgeSchema,
   ContainsSchema,
   RegexSchema,
   IsJsonSchema,
diff --git a/packages/core/test/evaluation/loaders/eval-yaml-transpiler.test.ts b/packages/core/test/evaluation/loaders/eval-yaml-transpiler.test.ts
index fa8a7e497..0647ce387 100644
--- a/packages/core/test/evaluation/loaders/eval-yaml-transpiler.test.ts
+++ b/packages/core/test/evaluation/loaders/eval-yaml-transpiler.test.ts
@@ -244,7 +244,7 @@ describe('transpileEvalYaml — NL assertions', () => {
     expect(evals[0].assertions).toContain('The answer is clear and concise');
   });
 
-  it('converts agent-judge with rubrics to multiple assertions', () => {
+  it('converts llm-judge with rubrics to multiple assertions (rubrics variant)', () => {
     const suite = {
       tests: [
         {
@@ -253,7 +253,7 @@ describe('transpileEvalYaml — NL assertions', () => {
           assertions: [
             { type: 'skill-trigger', skill: 's', should_trigger: true },
             {
-              type: 'agent-judge',
+              type: 'llm-judge',
               rubrics: [
                 { id: 'r1', outcome: 'Correct result returned' },
                 { id: 'r2', outcome: 'No unnecessary steps' },
diff --git a/plugins/agentv-dev/agents/eval-analyzer.md b/plugins/agentv-dev/agents/eval-analyzer.md
index 547c86267..31660128e 100644
--- a/plugins/agentv-dev/agents/eval-analyzer.md
+++ b/plugins/agentv-dev/agents/eval-analyzer.md
@@ -28,7 +28,7 @@ If `eval-path` is provided, also read the EVAL.yaml to understand evaluator conf
 
 ### Step 2: Deterministic-Upgrade Analysis
 
-For each evaluator entry in `scores` where `type` is `"llm-judge"`, `"rubrics"`, or `"agent-judge"`, inspect the `reasoning`, `hits`, and `misses` fields for patterns that indicate a deterministic assertion would suffice:
+For each evaluator entry in `scores` where `type` is `"llm-judge"` or `"rubrics"`, inspect the `reasoning`, `hits`, and `misses` fields for patterns that indicate a deterministic assertion would suffice:
 
 | Signal | Detection | Suggested Upgrade |
 |--------|-----------|-------------------|
@@ -123,7 +123,7 @@ If a section has no findings, include the header with "None found." underneath.
 - **Be specific:** Every suggestion must include the test case ID, evaluator name, evidence from the results, and a concrete replacement config.
 - **Be conservative:** Only suggest deterministic upgrades when the pattern is clear and consistent. Partial or ambiguous evidence should be noted but not acted on.
 - **Prioritize by impact:** Order suggestions by estimated cost savings (LLM-judge → deterministic saves the most).
-- **Handle all evaluator types:** Process `code-judge`, `tool-trajectory`, `llm-judge`, `agent-judge`, `rubrics`, `composite`, and all deterministic types. Only LLM-based types are candidates for deterministic upgrades.
+- **Handle all evaluator types:** Process `code-judge`, `tool-trajectory`, `llm-judge`, `rubrics`, `composite`, and all deterministic types. Only LLM-based types are candidates for deterministic upgrades.
 - **Multi-provider awareness:** When results span multiple targets, note if a suggestion applies to all targets or is target-specific.
 - **No false positives:** It is better to miss a suggestion than to recommend an incorrect upgrade. If unsure, add the finding to a "Needs Review" subsection with your reasoning.
 
diff --git a/plugins/agentv-dev/skills/agentv-eval-writer/SKILL.md b/plugins/agentv-dev/skills/agentv-eval-writer/SKILL.md
index 95d7bf796..5ae6275a3 100644
--- a/plugins/agentv-dev/skills/agentv-eval-writer/SKILL.md
+++ b/plugins/agentv-dev/skills/agentv-eval-writer/SKILL.md
@@ -222,7 +222,7 @@ tests:
 |----------|-------------|----------|
 | `criteria` + **no `assertions`** | Implicit `llm-judge` runs automatically against `criteria` | No |
 | `criteria` + **`assertions` with only deterministic evaluators** (contains, regex, etc.) | Only declared evaluators run. `criteria` is **not evaluated**. | Yes — warns that no evaluator will consume criteria |
-| `criteria` + **`assertions` with a judge** (llm-judge, code-judge, agent-judge, rubrics) | Declared evaluators run. Judges receive `criteria` as input. | No |
+| `criteria` + **`assertions` with a judge** (llm-judge, code-judge, rubrics) | Declared evaluators run. Judges receive `criteria` as input. | No |
 
 ### No assertions → implicit llm-judge
 
diff --git a/plugins/agentv-dev/skills/agentv-eval-writer/references/eval-schema.json b/plugins/agentv-dev/skills/agentv-eval-writer/references/eval-schema.json
index b55528f3c..77f5be6d9 100644
--- a/plugins/agentv-dev/skills/agentv-eval-writer/references/eval-schema.json
+++ b/plugins/agentv-dev/skills/agentv-eval-writer/references/eval-schema.json
@@ -1075,120 +1075,6 @@
                           ],
                           "additionalProperties": false
                         },
-                        {
-                          "type": "object",
-                          "properties": {
-                            "name": {
-                              "type": "string"
-                            },
-                            "weight": {
-                              "type": "number",
-                              "minimum": 0
-                            },
-                            "required": {
-                              "anyOf": [
-                                {
-                                  "type": "boolean"
-                                },
-                                {
-                                  "type": "number",
-                                  "exclusiveMinimum": 0,
-                                  "maximum": 1
-                                }
-                              ]
-                            },
-                            "negate": {
-                              "type": "boolean"
-                            },
-                            "type": {
-                              "type": "string",
-                              "enum": [
-                                "agent-judge",
-                                "agent_judge"
-                              ]
-                            },
-                            "prompt": {
-                              "type": "string"
-                            },
-                            "rubrics": {
-                              "type": "array",
-                              "items": {
-                                "type": "object",
-                                "properties": {
-                                  "id": {
-                                    "type": "string"
-                                  },
-                                  "outcome": {
-                                    "type": "string"
-                                  },
-                                  "weight": {
-                                    "type": "number"
-                                  },
-                                  "required": {
-                                    "type": "boolean"
-                                  },
-                                  "required_min_score": {
-                                    "type": "integer",
-                                    "minimum": 0,
-                                    "maximum": 10
-                                  },
-                                  "score_ranges": {
-                                    "type": "array",
-                                    "items": {
-                                      "type": "object",
-                                      "properties": {
-                                        "score_range": {
-                                          "type": "array",
-                                          "minItems": 2,
-                                          "maxItems": 2,
-                                          "items": [
-                                            {
-                                              "type": "integer",
-                                              "minimum": 0,
-                                              "maximum": 10
-                                            },
-                                            {
-                                              "type": "integer",
-                                              "minimum": 0,
-                                              "maximum": 10
-                                            }
-                                          ]
-                                        },
-                                        "outcome": {
-                                          "type": "string",
-                                          "minLength": 1
-                                        }
-                                      },
-                                      "required": [
-                                        "score_range",
-                                        "outcome"
-                                      ],
-                                      "additionalProperties": false
-                                    }
-                                  }
-                                },
-                                "additionalProperties": false
-                              }
-                            },
-                            "max_steps": {
-                              "type": "integer",
-                              "minimum": 1,
-                              "maximum": 50
-                            },
-                            "temperature": {
-                              "type": "number",
-                              "minimum": 0,
-                              "maximum": 2
-                            },
-                            "target": {
-                              "type": "string"
-                            }
-                          },
-                          "required": [
-                            "type"
-                          ],
-                          "additionalProperties": false
-                        },
                         {
                           "type": "object",
                           "properties": {
@@ -2280,120 +2166,6 @@
                           ],
                           "additionalProperties": false
                         },
-                        {
-                          "type": "object",
-                          "properties": {
-                            "name": {
-                              "type": "string"
-                            },
-                            "weight": {
-                              "type": "number",
-                              "minimum": 0
-                            },
-                            "required": {
-                              "anyOf": [
-                                {
-                                  "type": "boolean"
-                                },
-                                {
-                                  "type": "number",
-                                  "exclusiveMinimum": 0,
-                                  "maximum": 1
-                                }
-                              ]
-                            },
-                            "negate": {
-                              "type": "boolean"
-                            },
-                            "type": {
-                              "type": "string",
-                              "enum": [
-                                "agent-judge",
-                                "agent_judge"
-                              ]
-                            },
-                            "prompt": {
-                              "type": "string"
-                            },
-                            "rubrics": {
-                              "type": "array",
-                              "items": {
-                                "type": "object",
-                                "properties": {
-                                  "id": {
-                                    "type": "string"
-                                  },
-                                  "outcome": {
-                                    "type": "string"
-                                  },
-                                  "weight": {
-                                    "type": "number"
-                                  },
-                                  "required": {
-                                    "type": "boolean"
-                                  },
-                                  "required_min_score": {
-                                    "type": "integer",
-                                    "minimum": 0,
-                                    "maximum": 10
-                                  },
-                                  "score_ranges": {
-                                    "type": "array",
-                                    "items": {
-                                      "type": "object",
-                                      "properties": {
-                                        "score_range": {
-                                          "type": "array",
-                                          "minItems": 2,
-                                          "maxItems": 2,
-                                          "items": [
-                                            {
-                                              "type": "integer",
-                                              "minimum": 0,
-                                              "maximum": 10
-                                            },
-                                            {
-                                              "type": "integer",
-                                              "minimum": 0,
-                                              "maximum": 10
-                                            }
-                                          ]
-                                        },
-                                        "outcome": {
-                                          "type": "string",
-                                          "minLength": 1
-                                        }
-                                      },
-                                      "required": [
-                                        "score_range",
-                                        "outcome"
-                                      ],
-                                      "additionalProperties": false
-                                    }
-                                  }
-                                },
-                                "additionalProperties": false
-                              }
-                            },
-                            "max_steps": {
-                              "type": "integer",
-                              "minimum": 1,
-                              "maximum": 50
-                            },
-                            "temperature": {
-                              "type": "number",
-                              "minimum": 0,
-                              "maximum": 2
-                            },
-                            "target": {
-                              "type": "string"
-                            }
-                          },
-                          "required": [
-                            "type"
-                          ],
-                          "additionalProperties": false
-                        },
                         {
                           "type": "object",
                           "properties": {
@@ -3512,136 +3284,22 @@
                             },
                             "type": {
                               "type": "string",
-                              "enum": [
-                                "agent-judge",
-                                "agent_judge"
-                              ]
+                              "const": "contains"
                             },
-                            "prompt": {
-                              "type": "string"
-                            },
-                            "rubrics": {
-                              "type": "array",
-                              "items": {
-                                "type": "object",
-                                "properties": {
-                                  "id": {
-                                    "type": "string"
-                                  },
-                                  "outcome": {
-                                    "type": "string"
-                                  },
-                                  "weight": {
-                                    "type": "number"
-                                  },
-                                  "required": {
-                                    "type": "boolean"
-                                  },
-                                  "required_min_score": {
-                                    "type": "integer",
-                                    "minimum": 0,
-                                    "maximum": 10
-                                  },
-                                  "score_ranges": {
-                                    "type": "array",
-                                    "items": {
-                                      "type": "object",
-                                      "properties": {
-                                        "score_range": {
-                                          "type": "array",
-                                          "minItems": 2,
-                                          "maxItems": 2,
-                                          "items": [
-                                            {
-                                              "type": "integer",
-                                              "minimum": 0,
-                                              "maximum": 10
-                                            },
-                                            {
-                                              "type": "integer",
-                                              "minimum": 0,
-                                              "maximum": 10
-                                            }
-                                          ]
-                                        },
-                                        "outcome": {
-                                          "type": "string",
-                                          "minLength": 1
-                                        }
-                                      },
-                                      "required": [
-                                        "score_range",
-                                        "outcome"
-                                      ],
-                                      "additionalProperties": false
-                                    }
-                                  }
-                                },
-                                "additionalProperties": false
-                              }
-                            },
-                            "max_steps": {
-                              "type": "integer",
-                              "minimum": 1,
-                              "maximum": 50
-                            },
-                            "temperature": {
-                              "type": "number",
-                              "minimum": 0,
-                              "maximum": 2
-                            },
-                            "target": {
-                              "type": "string"
-                            }
-                          },
-                          "required": [
-                            "type"
-                          ],
-                          "additionalProperties": false
-                        },
-                        {
-                          "type": "object",
-                          "properties": {
-                            "name": {
-                              "type": "string"
-                            },
-                            "weight": {
-                              "type": "number",
-                              "minimum": 0
-                            },
-                            "required": {
-                              "anyOf": [
-                                {
-                                  "type": "boolean"
-                                },
-                                {
-                                  "type": "number",
-                                  "exclusiveMinimum": 0,
-                                  "maximum": 1
-                                }
-                              ]
-                            },
-                            "negate": {
-                              "type": "boolean"
-                            },
-                            "type": {
-                              "type": "string",
-                              "const": "contains"
-                            },
-                            "value": {
-                              "type": "string"
-                            }
-                          },
-                          "required": [
-                            "type",
-                            "value"
-                          ],
-                          "additionalProperties": false
-                        },
-                        {
-                          "type": "object",
-                          "properties": {
-                            "name": {
+                            "value": {
+                              "type": "string"
+                            }
+                          },
+                          "required": [
+                            "type",
+                            "value"
+                          ],
+                          "additionalProperties": false
+                        },
+                        {
+                          "type": "object",
+                          "properties": {
+                            "name": {
                               "type": "string"
                             },
                             "weight": {
@@ -4702,120 +4360,6 @@
                               ],
                               "additionalProperties": false
                             },
-                            {
-                              "type": "object",
-                              "properties": {
-                                "name": {
-                                  "type": "string"
-                                },
-                                "weight": {
-                                  "type": "number",
-                                  "minimum": 0
-                                },
-                                "required": {
-                                  "anyOf": [
-                                    {
-                                      "type": "boolean"
-                                    },
-                                    {
-                                      "type": "number",
-                                      "exclusiveMinimum": 0,
-                                      "maximum": 1
-                                    }
-                                  ]
-                                },
-                                "negate": {
-                                  "type": "boolean"
-                                },
-                                "type": {
-                                  "type": "string",
-                                  "enum": [
-                                    "agent-judge",
-                                    "agent_judge"
-                                  ]
-                                },
-                                "prompt": {
-                                  "type": "string"
-                                },
-                                "rubrics": {
-                                  "type": "array",
-                                  "items": {
-                                    "type": "object",
-                                    "properties": {
-                                      "id": {
-                                        "type": "string"
-                                      },
-                                      "outcome": {
-                                        "type": "string"
-                                      },
-                                      "weight": {
-                                        "type": "number"
-                                      },
-                                      "required": {
-                                        "type": "boolean"
-                                      },
-                                      "required_min_score": {
-                                        "type": "integer",
-                                        "minimum": 0,
-                                        "maximum": 10
-                                      },
-                                      "score_ranges": {
-                                        "type": "array",
-                                        "items": {
-                                          "type": "object",
-                                          "properties": {
-                                            "score_range": {
-                                              "type": "array",
-                                              "minItems": 2,
-                                              "maxItems": 2,
-                                              "items": [
-                                                {
-                                                  "type": "integer",
-                                                  "minimum": 0,
-                                                  "maximum": 10
-                                                },
-                                                {
-                                                  "type": "integer",
-                                                  "minimum": 0,
-                                                  "maximum": 10
-                                                }
-                                              ]
-                                            },
-                                            "outcome": {
-                                              "type": "string",
-                                              "minLength": 1
-                                            }
-                                          },
-                                          "required": [
-                                            "score_range",
-                                            "outcome"
-                                          ],
-                                          "additionalProperties": false
-                                        }
-                                      }
-                                    },
-                                    "additionalProperties": false
-                                  }
-                                },
-                                "max_steps": {
-                                  "type": "integer",
-                                  "minimum": 1,
-                                  "maximum": 50
-                                },
-                                "temperature": {
-                                  "type": "number",
-                                  "minimum": 0,
-                                  "maximum": 2
-                                },
-                                "target": {
-                                  "type": "string"
-                                }
-                              },
-                              "required": [
-                                "type"
-                              ],
-                              "additionalProperties": false
-                            },
                             {
                               "type": "object",
                               "properties": {
@@ -5934,136 +5478,22 @@
                                 },
                                 "type": {
                                   "type": "string",
-                                  "enum": [
-                                    "agent-judge",
-                                    "agent_judge"
-                                  ]
+                                  "const": "contains"
                                 },
-                                "prompt": {
-                                  "type": "string"
-                                },
-                                "rubrics": {
-                                  "type": "array",
-                                  "items": {
-                                    "type": "object",
-                                    "properties": {
-                                      "id": {
-                                        "type": "string"
-                                      },
-                                      "outcome": {
-                                        "type": "string"
-                                      },
-                                      "weight": {
-                                        "type": "number"
-                                      },
-                                      "required": {
-                                        "type": "boolean"
-                                      },
-                                      "required_min_score": {
-                                        "type": "integer",
-                                        "minimum": 0,
-                                        "maximum": 10
-                                      },
-                                      "score_ranges": {
-                                        "type": "array",
-                                        "items": {
-                                          "type": "object",
-                                          "properties": {
-                                            "score_range": {
-                                              "type": "array",
-                                              "minItems": 2,
-                                              "maxItems": 2,
-                                              "items": [
-                                                {
-                                                  "type": "integer",
-                                                  "minimum": 0,
-                                                  "maximum": 10
-                                                },
-                                                {
-                                                  "type": "integer",
-                                                  "minimum": 0,
-                                                  "maximum": 10
-                                                }
-                                              ]
-                                            },
-                                            "outcome": {
-                                              "type": "string",
-                                              "minLength": 1
-                                            }
-                                          },
-                                          "required": [
-                                            "score_range",
-                                            "outcome"
-                                          ],
-                                          "additionalProperties": false
-                                        }
-                                      }
-                                    },
-                                    "additionalProperties": false
-                                  }
-                                },
-                                "max_steps": {
-                                  "type": "integer",
-                                  "minimum": 1,
-                                  "maximum": 50
-                                },
-                                "temperature": {
-                                  "type": "number",
-                                  "minimum": 0,
-                                  "maximum": 2
-                                },
-                                "target": {
-                                  "type": "string"
-                                }
-                              },
-                              "required": [
-                                "type"
-                              ],
-                              "additionalProperties": false
-                            },
-                            {
-                              "type": "object",
-                              "properties": {
-                                "name": {
-                                  "type": "string"
-                                },
-                                "weight": {
-                                  "type": "number",
-                                  "minimum": 0
-                                },
-                                "required": {
-                                  "anyOf": [
-                                    {
-                                      "type": "boolean"
-                                    },
-                                    {
-                                      "type": "number",
-                                      "exclusiveMinimum": 0,
-                                      "maximum": 1
-                                    }
-                                  ]
-                                },
-                                "negate": {
-                                  "type": "boolean"
-                                },
-                                "type": {
-                                  "type": "string",
-                                  "const": "contains"
-                                },
-                                "value": {
-                                  "type": "string"
-                                }
-                              },
-                              "required": [
-                                "type",
-                                "value"
-                              ],
-                              "additionalProperties": false
-                            },
-                            {
-                              "type": "object",
-                              "properties": {
-                                "name": {
+                                "value": {
+                                  "type": "string"
+                                }
+                              },
+                              "required": [
+                                "type",
+                                "value"
+                              ],
+                              "additionalProperties": false
+                            },
+                            {
+                              "type": "object",
+                              "properties": {
+                                "name": {
                                   "type": "string"
                                 },
                                 "weight": {
@@ -7112,120 +6542,6 @@
                               ],
                               "additionalProperties": false
                             },
-                            {
-                              "type": "object",
-                              "properties": {
-                                "name": {
-                                  "type": "string"
-                                },
-                                "weight": {
-                                  "type": "number",
-                                  "minimum": 0
-                                },
-                                "required": {
-                                  "anyOf": [
-                                    {
-                                      "type": "boolean"
-                                    },
-                                    {
-                                      "type": "number",
-                                      "exclusiveMinimum": 0,
-                                      "maximum": 1
-                                    }
-                                  ]
-                                },
-                                "negate": {
-                                  "type": "boolean"
-                                },
-                                "type": {
-                                  "type": "string",
-                                  "enum": [
-                                    "agent-judge",
-                                    "agent_judge"
-                                  ]
-                                },
-                                "prompt": {
-                                  "type": "string"
-                                },
-                                "rubrics": {
-                                  "type": "array",
-                                  "items": {
-                                    "type": "object",
-                                    "properties": {
-                                      "id": {
-                                        "type": "string"
-                                      },
-                                      "outcome": {
-                                        "type": "string"
-                                      },
-                                      "weight": {
-                                        "type": "number"
-                                      },
-                                      "required": {
-                                        "type": "boolean"
-                                      },
-                                      "required_min_score": {
-                                        "type": "integer",
-                                        "minimum": 0,
-                                        "maximum": 10
-                                      },
-                                      "score_ranges": {
-                                        "type": "array",
-                                        "items": {
-                                          "type": "object",
-                                          "properties": {
-                                            "score_range": {
-                                              "type": "array",
-                                              "minItems": 2,
-                                              "maxItems": 2,
-                                              "items": [
-                                                {
-                                                  "type": "integer",
-                                                  "minimum": 0,
-                                                  "maximum": 10
-                                                },
-                                                {
-                                                  "type": "integer",
-                                                  "minimum": 0,
-                                                  "maximum": 10
-                                                }
-                                              ]
-                                            },
-                                            "outcome": {
-                                              "type": "string",
-                                              "minLength": 1
-                                            }
-                                          },
-                                          "required": [
-                                            "score_range",
-                                            "outcome"
-                                          ],
-                                          "additionalProperties": false
-                                        }
-                                      }
-                                    },
-                                    "additionalProperties": false
-                                  }
-                                },
-                                "max_steps": {
-                                  "type": "integer",
-                                  "minimum": 1,
-                                  "maximum": 50
-                                },
-                                "temperature": {
-                                  "type": "number",
-                                  "minimum": 0,
-                                  "maximum": 2
-                                },
-                                "target": {
-                                  "type": "string"
-                                }
-                              },
-                              "required": [
-                                "type"
-                              ],
-                              "additionalProperties": false
-                            },
                             {
                               "type": "object",
                               "properties": {
@@ -8830,136 +8146,22 @@
                             },
                             "type": {
                               "type": "string",
-                              "enum": [
-                                "agent-judge",
-                                "agent_judge"
-                              ]
+                              "const": "contains"
                             },
-                            "prompt": {
-                              "type": "string"
-                            },
-                            "rubrics": {
-                              "type": "array",
-                              "items": {
-                                "type": "object",
-                                "properties": {
-                                  "id": {
-                                    "type": "string"
-                                  },
-                                  "outcome": {
-                                    "type": "string"
-                                  },
-                                  "weight": {
-                                    "type": "number"
-                                  },
-                                  "required": {
-                                    "type": "boolean"
-                                  },
-                                  "required_min_score": {
-                                    "type": "integer",
-                                    "minimum": 0,
-                                    "maximum": 10
-                                  },
-                                  "score_ranges": {
-                                    "type": "array",
-                                    "items": {
-                                      "type": "object",
-                                      "properties": {
-                                        "score_range": {
-                                          "type": "array",
-                                          "minItems": 2,
-                                          "maxItems": 2,
-                                          "items": [
-                                            {
-                                              "type": "integer",
-                                              "minimum": 0,
-                                              "maximum": 10
-                                            },
-                                            {
-                                              "type": "integer",
-                                              "minimum": 0,
-                                              "maximum": 10
-                                            }
-                                          ]
-                                        },
-                                        "outcome": {
-                                          "type": "string",
-                                          "minLength": 1
-                                        }
-                                      },
-                                      "required": [
-                                        "score_range",
-                                        "outcome"
-                                      ],
-                                      "additionalProperties": false
-                                    }
-                                  }
-                                },
-                                "additionalProperties": false
-                              }
-                            },
-                            "max_steps": {
-                              "type": "integer",
-                              "minimum": 1,
-                              "maximum": 50
-                            },
-                            "temperature": {
-                              "type": "number",
-                              "minimum": 0,
-                              "maximum": 2
-                            },
-                            "target": {
-                              "type": "string"
-                            }
-                          },
-                          "required": [
-                            "type"
-                          ],
-                          "additionalProperties": false
-                        },
-                        {
-                          "type": "object",
-                          "properties": {
-                            "name": {
-                              "type": "string"
-                            },
-                            "weight": {
-                              "type": "number",
-                              "minimum": 0
-                            },
-                            "required": {
-                              "anyOf": [
-                                {
-                                  "type": "boolean"
-                                },
-                                {
-                                  "type": "number",
-                                  "exclusiveMinimum": 0,
-                                  "maximum": 1
-                                }
-                              ]
-                            },
-                            "negate": {
-                              "type": "boolean"
-                            },
-                            "type": {
-                              "type": "string",
-                              "const": "contains"
-                            },
-                            "value": {
-                              "type": "string"
-                            }
-                          },
-                          "required": [
-                            "type",
-                            "value"
-                          ],
-                          "additionalProperties": false
-                        },
-                        {
-                          "type": "object",
-                          "properties": {
-                            "name": {
+                            "value": {
+                              "type": "string"
+                            }
+                          },
+                          "required": [
+                            "type",
+                            "value"
+                          ],
+                          "additionalProperties": false
+                        },
+                        {
+                          "type": "object",
+                          "properties": {
+                            "name": {
                               "type": "string"
                             },
                             "weight": {
@@ -10008,120 +9210,6 @@
                           ],
                           "additionalProperties": false
                         },
-                        {
-                          "type": "object",
-                          "properties": {
-                            "name": {
-                              "type": "string"
-                            },
-                            "weight": {
-                              "type": "number",
-                              "minimum": 0
-                            },
-                            "required": {
-                              "anyOf": [
-                                {
-                                  "type": "boolean"
-                                },
-                                {
-                                  "type": "number",
-                                  "exclusiveMinimum": 0,
-                                  "maximum": 1
-                                }
-                              ]
-                            },
-                            "negate": {
-                              "type": "boolean"
-                            },
-                            "type": {
-                              "type": "string",
-                              "enum": [
-                                "agent-judge",
-                                "agent_judge"
-                              ]
-                            },
-                            "prompt": {
-                              "type": "string"
-                            },
-                            "rubrics": {
-                              "type": "array",
-                              "items": {
-                                "type": "object",
-                                "properties": {
-                                  "id": {
-                                    "type": "string"
-                                  },
-                                  "outcome": {
-                                    "type": "string"
-                                  },
-                                  "weight": {
-                                    "type": "number"
-                                  },
-                                  "required": {
-                                    "type": "boolean"
-                                  },
-                                  "required_min_score": {
-                                    "type": "integer",
-                                    "minimum": 0,
-                                    "maximum": 10
-                                  },
-                                  "score_ranges": {
-                                    "type": "array",
-                                    "items": {
-                                      "type": "object",
-                                      "properties": {
-                                        "score_range": {
-                                          "type": "array",
-                                          "minItems": 2,
-                                          "maxItems": 2,
-                                          "items": [
-                                            {
-                                              "type": "integer",
-                                              "minimum": 0,
-                                              "maximum": 10
-                                            },
-                                            {
-                                              "type": "integer",
-                                              "minimum": 0,
-                                              "maximum": 10
-                                            }
-                                          ]
-                                        },
-                                        "outcome": {
-                                          "type": "string",
-                                          "minLength": 1
-                                        }
-                                      },
-                                      "required": [
-                                        "score_range",
-                                        "outcome"
-                                      ],
-                                      "additionalProperties": false
-                                    }
-                                  }
-                                },
-                                "additionalProperties": false
-                              }
-                            },
-                            "max_steps": {
-                              "type": "integer",
-                              "minimum": 1,
-                              "maximum": 50
-                            },
-                            "temperature": {
-                              "type": "number",
-                              "minimum": 0,
-                              "maximum": 2
-                            },
-                            "target": {
-                              "type": "string"
-                            }
-                          },
-                          "required": [
-                            "type"
-                          ],
-                          "additionalProperties": false
-                        },
                         {
                           "type": "object",
                           "properties": {
@@ -11240,136 +10328,22 @@
                             },
                             "type": {
                               "type": "string",
-                              "enum": [
-                                "agent-judge",
-                                "agent_judge"
-                              ]
+                              "const": "contains"
                             },
-                            "prompt": {
-                              "type": "string"
-                            },
-                            "rubrics": {
-                              "type": "array",
-                              "items": {
-                                "type": "object",
-                                "properties": {
-                                  "id": {
-                                    "type": "string"
-                                  },
-                                  "outcome": {
-                                    "type": "string"
-                                  },
-                                  "weight": {
-                                    "type": "number"
-                                  },
-                                  "required": {
-                                    "type": "boolean"
-                                  },
-                                  "required_min_score": {
-                                    "type": "integer",
-                                    "minimum": 0,
-                                    "maximum": 10
-                                  },
-                                  "score_ranges": {
-                                    "type": "array",
-                                    "items": {
-                                      "type": "object",
-                                      "properties": {
-                                        "score_range": {
-                                          "type": "array",
-                                          "minItems": 2,
-                                          "maxItems": 2,
-                                          "items": [
-                                            {
-                                              "type": "integer",
-                                              "minimum": 0,
-                                              "maximum": 10
-                                            },
-                                            {
-                                              "type": "integer",
-                                              "minimum": 0,
-                                              "maximum": 10
-                                            }
-                                          ]
-                                        },
-                                        "outcome": {
-                                          "type": "string",
-                                          "minLength": 1
-                                        }
-                                      },
-                                      "required": [
-                                        "score_range",
-                                        "outcome"
-                                      ],
-                                      "additionalProperties": false
-                                    }
-                                  }
-                                },
-                                "additionalProperties": false
-                              }
-                            },
-                            "max_steps": {
-                              "type": "integer",
-                              "minimum": 1,
-                              "maximum": 50
-                            },
-                            "temperature": {
-                              "type": "number",
-                              "minimum": 0,
-                              "maximum": 2
-                            },
-                            "target": {
-                              "type": "string"
-                            }
-                          },
-                          "required": [
-                            "type"
-                          ],
-                          "additionalProperties": false
-                        },
-                        {
-                          "type": "object",
-                          "properties": {
-                            "name": {
-                              "type": "string"
-                            },
-                            "weight": {
-                              "type": "number",
-                              "minimum": 0
-                            },
-                            "required": {
-                              "anyOf": [
-                                {
-                                  "type": "boolean"
-                                },
-                                {
-                                  "type": "number",
-                                  "exclusiveMinimum": 0,
-                                  "maximum": 1
-                                }
-                              ]
-                            },
-                            "negate": {
-                              "type": "boolean"
-                            },
-                            "type": {
-                              "type": "string",
-                              "const": "contains"
-                            },
-                            "value": {
-                              "type": "string"
-                            }
-                          },
-                          "required": [
-                            "type",
-                            "value"
-                          ],
-                          "additionalProperties": false
-                        },
-                        {
-                          "type": "object",
-                          "properties": {
-                            "name": {
+                            "value": {
+                              "type": "string"
+                            }
+                          },
+                          "required": [
+                            "type",
+                            "value"
+                          ],
+                          "additionalProperties": false
+                        },
+                        {
+                          "type": "object",
+                          "properties": {
+                            "name": {
                               "type": "string"
                             },
                             "weight": {
@@ -12430,120 +11404,6 @@
                               ],
                               "additionalProperties": false
                             },
-                            {
-                              "type": "object",
-                              "properties": {
-                                "name": {
-                                  "type": "string"
-                                },
-                                "weight": {
-                                  "type": "number",
-                                  "minimum": 0
-                                },
-                                "required": {
-                                  "anyOf": [
-                                    {
-                                      "type": "boolean"
-                                    },
-                                    {
-                                      "type": "number",
-                                      "exclusiveMinimum": 0,
-                                      "maximum": 1
-                                    }
-                                  ]
-                                },
-                                "negate": {
-                                  "type": "boolean"
-                                },
-                                "type": {
-                                  "type": "string",
-                                  "enum": [
-                                    "agent-judge",
-                                    "agent_judge"
-                                  ]
-                                },
-                                "prompt": {
-                                  "type": "string"
-                                },
-                                "rubrics": {
-                                  "type": "array",
-                                  "items": {
-                                    "type": "object",
-                                    "properties": {
-                                      "id": {
-                                        "type": "string"
-                                      },
-                                      "outcome": {
-                                        "type": "string"
-                                      },
-                                      "weight": {
-                                        "type": "number"
-                                      },
-                                      "required": {
-                                        "type": "boolean"
-                                      },
-                                      "required_min_score": {
-                                        "type": "integer",
-                                        "minimum": 0,
-                                        "maximum": 10
-                                      },
-                                      "score_ranges": {
-                                        "type": "array",
-                                        "items": {
-                                          "type": "object",
-                                          "properties": {
-                                            "score_range": {
-                                              "type": "array",
-                                              "minItems": 2,
-                                              "maxItems": 2,
-                                              "items": [
-                                                {
-                                                  "type": "integer",
-                                                  "minimum": 0,
-                                                  "maximum": 10
-                                                },
-                                                {
-                                                  "type": "integer",
-                                                  "minimum": 0,
-                                                  "maximum": 10
-                                                }
-                                              ]
-                                            },
-                                            "outcome": {
-                                              "type": "string",
-                                              "minLength": 1
-                                            }
-                                          },
-                                          "required": [
-                                            "score_range",
-                                            "outcome"
-                                          ],
-                                          "additionalProperties": false
-                                        }
-                                      }
-                                    },
-                                    "additionalProperties": false
-                                  }
-                                },
-                                "max_steps": {
-                                  "type": "integer",
-                                  "minimum": 1,
-                                  "maximum": 50
-                                },
-                                "temperature": {
-                                  "type": "number",
-                                  "minimum": 0,
-                                  "maximum": 2
-                                },
-                                "target": {
-                                  "type": "string"
-                                }
-                              },
-                              "required": [
-                                "type"
-                              ],
-                              "additionalProperties": false
-                            },
                             {
                               "type": "object",
                               "properties": {
@@ -13662,136 +12522,22 @@
                                 },
                                 "type": {
                                   "type": "string",
-                                  "enum": [
-                                    "agent-judge",
-                                    "agent_judge"
-                                  ]
+                                  "const": "contains"
                                 },
-                                "prompt": {
-                                  "type": "string"
-                                },
-                                "rubrics": {
-                                  "type": "array",
-                                  "items": {
-                                    "type": "object",
-                                    "properties": {
-                                      "id": {
-                                        "type": "string"
-                                      },
-                                      "outcome": {
-                                        "type": "string"
-                                      },
-                                      "weight": {
-                                        "type": "number"
-                                      },
-                                      "required": {
-                                        "type": "boolean"
-                                      },
-                                      "required_min_score": {
-                                        "type": "integer",
-                                        "minimum": 0,
-                                        "maximum": 10
-                                      },
-                                      "score_ranges": {
-                                        "type": "array",
-                                        "items": {
-                                          "type": "object",
-                                          "properties": {
-                                            "score_range": {
-                                              "type": "array",
-                                              "minItems": 2,
-                                              "maxItems": 2,
-                                              "items": [
-                                                {
-                                                  "type": "integer",
-                                                  "minimum": 0,
-                                                  "maximum": 10
-                                                },
-                                                {
-                                                  "type": "integer",
-                                                  "minimum": 0,
-                                                  "maximum": 10
-                                                }
-                                              ]
-                                            },
-                                            "outcome": {
-                                              "type": "string",
-                                              "minLength": 1
-                                            }
-                                          },
-                                          "required": [
-                                            "score_range",
-                                            "outcome"
-                                          ],
-                                          "additionalProperties": false
-                                        }
-                                      }
-                                    },
-                                    "additionalProperties": false
-                                  }
-                                },
-                                "max_steps": {
-                                  "type": "integer",
-                                  "minimum": 1,
-                                  "maximum": 50
-                                },
-                                "temperature": {
-                                  "type": "number",
-                                  "minimum": 0,
-                                  "maximum": 2
-                                },
-                                "target": {
-                                  "type": "string"
-                                }
-                              },
-                              "required": [
-                                "type"
-                              ],
-                              "additionalProperties": false
-                            },
-                            {
-                              "type": "object",
-                              "properties": {
-                                "name": {
-                                  "type": "string"
-                                },
-                                "weight": {
-                                  "type": "number",
-                                  "minimum": 0
-                                },
-                                "required": {
-                                  "anyOf": [
-                                    {
-                                      "type": "boolean"
-                                    },
-                                    {
-                                      "type": "number",
-                                      "exclusiveMinimum": 0,
-                                      "maximum": 1
-                                    }
-                                  ]
-                                },
-                                "negate": {
-                                  "type": "boolean"
-                                },
-                                "type": {
-                                  "type": "string",
-                                  "const": "contains"
-                                },
-                                "value": {
-                                  "type": "string"
-                                }
-                              },
-                              "required": [
-                                "type",
-                                "value"
-                              ],
-                              "additionalProperties": false
-                            },
-                            {
-                              "type": "object",
-                              "properties": {
-                                "name": {
+                                "value": {
+                                  "type": "string"
+                                }
+                              },
+                              "required": [
+                                "type",
+                                "value"
+                              ],
+                              "additionalProperties": false
+                            },
+                            {
+                              "type": "object",
+                              "properties": {
+                                "name": {
                                   "type": "string"
                                 },
                                 "weight": {
@@ -14840,120 +13586,6 @@
                               ],
                               "additionalProperties": false
                             },
-                            {
-                              "type": "object",
-                              "properties": {
-                                "name": {
-                                  "type": "string"
-                                },
-                                "weight": {
-                                  "type": "number",
-                                  "minimum": 0
-                                },
-                                "required": {
-                                  "anyOf": [
-                                    {
-                                      "type": "boolean"
-                                    },
-                                    {
-                                      "type": "number",
-                                      "exclusiveMinimum": 0,
-                                      "maximum": 1
-                                    }
-                                  ]
-                                },
-                                "negate": {
-                                  "type": "boolean"
-                                },
-                                "type": {
-                                  "type": "string",
-                                  "enum": [
-                                    "agent-judge",
-                                    "agent_judge"
-                                  ]
-                                },
-                                "prompt": {
-                                  "type": "string"
-                                },
-                                "rubrics": {
-                                  "type": "array",
-                                  "items": {
-                                    "type": "object",
-                                    "properties": {
-                                      "id": {
-                                        "type": "string"
-                                      },
-                                      "outcome": {
-                                        "type": "string"
-                                      },
-                                      "weight": {
-                                        "type": "number"
-                                      },
-                                      "required": {
-                                        "type": "boolean"
-                                      },
-                                      "required_min_score": {
-                                        "type": "integer",
-                                        "minimum": 0,
-                                        "maximum": 10
-                                      },
-                                      "score_ranges": {
-                                        "type": "array",
-                                        "items": {
-                                          "type": "object",
-                                          "properties": {
-                                            "score_range": {
-                                              "type": "array",
-                                              "minItems": 2,
-                                              "maxItems": 2,
-                                              "items": [
-                                                {
-                                                  "type": "integer",
-                                                  "minimum": 0,
-                                                  "maximum": 10
-                                                },
-                                                {
-                                                  "type": "integer",
-                                                  "minimum": 0,
-                                                  "maximum": 10
-                                                }
-                                              ]
-                                            },
-                                            "outcome": {
-                                              "type": "string",
-                                              "minLength": 1
-                                            }
-                                          },
-                                          "required": [
-                                            "score_range",
-                                            "outcome"
-                                          ],
-                                          "additionalProperties": false
-                                        }
-                                      }
-                                    },
-                                    "additionalProperties": false
-                                  }
-                                },
-                                "max_steps": {
-                                  "type": "integer",
-                                  "minimum": 1,
-                                  "maximum": 50
-                                },
-                                "temperature": {
-                                  "type": "number",
-                                  "minimum": 0,
-                                  "maximum": 2
-                                },
-                                "target": {
-                                  "type": "string"
-                                }
-                              },
-                              "required": [
-                                "type"
-                              ],
-                              "additionalProperties": false
-                            },
                             {
                               "type": "object",
                               "properties": {
@@ -16434,136 +15066,22 @@
                       },
                       "type": {
                         "type": "string",
-                        "enum": [
-                          "agent-judge",
-                          "agent_judge"
-                        ]
+                        "const": "contains"
                       },
-                      "prompt": {
-                        "type": "string"
-                      },
-                      "rubrics": {
-                        "type": "array",
-                        "items": {
-                          "type": "object",
-                          "properties": {
-                            "id": {
-                              "type": "string"
-                            },
-                            "outcome": {
-                              "type": "string"
-                            },
-                            "weight": {
-                              "type": "number"
-                            },
-                            "required": {
-                              "type": "boolean"
-                            },
-                            "required_min_score": {
-                              "type": "integer",
-                              "minimum": 0,
-                              "maximum": 10
-                            },
-                            "score_ranges": {
-                              "type": "array",
-                              "items": {
-                                "type": "object",
-                                "properties": {
-                                  "score_range": {
-                                    "type": "array",
-                                    "minItems": 2,
-                                    "maxItems": 2,
-                                    "items": [
-                                      {
-                                        "type": "integer",
-                                        "minimum": 0,
-                                        "maximum": 10
-                                      },
-                                      {
-                                        "type": "integer",
-                                        "minimum": 0,
-                                        "maximum": 10
-                                      }
-                                    ]
-                                  },
-                                  "outcome": {
-                                    "type": "string",
-                                    "minLength": 1
-                                  }
-                                },
-                                "required": [
-                                  "score_range",
-                                  "outcome"
-                                ],
-                                "additionalProperties": false
-                              }
-                            }
-                          },
-                          "additionalProperties": false
-                        }
-                      },
-                      "max_steps": {
-                        "type": "integer",
-                        "minimum": 1,
-                        "maximum": 50
-                      },
-                      "temperature": {
-                        "type": "number",
-                        "minimum": 0,
-                        "maximum": 2
-                      },
-                      "target": {
-                        "type": "string"
-                      }
-                    },
-                    "required": [
-                      "type"
-                    ],
-                    "additionalProperties": false
-                  },
-                  {
-                    "type": "object",
-                    "properties": {
-                      "name": {
-                        "type": "string"
-                      },
-                      "weight": {
-                        "type": "number",
-                        "minimum": 0
-                      },
-                      "required": {
-                        "anyOf": [
-                          {
-                            "type": "boolean"
-                          },
-                          {
-                            "type": "number",
-                            "exclusiveMinimum": 0,
-                            "maximum": 1
-                          }
-                        ]
-                      },
-                      "negate": {
-                        "type": "boolean"
-                      },
-                      "type": {
-                        "type": "string",
-                        "const": "contains"
-                      },
-                      "value": {
-                        "type": "string"
-                      }
-                    },
-                    "required": [
-                      "type",
-                      "value"
-                    ],
-                    "additionalProperties": false
-                  },
-                  {
-                    "type": "object",
-                    "properties": {
-                      "name": {
+                      "value": {
+                        "type": "string"
+                      }
+                    },
+                    "required": [
+                      "type",
+                      "value"
+                    ],
+                    "additionalProperties": false
+                  },
+                  {
+                    "type": "object",
+                    "properties": {
+                      "name": {
                         "type": "string"
                       },
                       "weight": {
@@ -17612,120 +16130,6 @@
                     ],
                     "additionalProperties": false
                   },
-                  {
-                    "type": "object",
-                    "properties": {
-                      "name": {
-                        "type": "string"
-                      },
-                      "weight": {
-                        "type": "number",
-                        "minimum": 0
-                      },
-                      "required": {
-                        "anyOf": [
-                          {
-                            "type": "boolean"
-                          },
-                          {
-                            "type": "number",
-                            "exclusiveMinimum": 0,
-                            "maximum": 1
-                          }
-                        ]
-                      },
-                      "negate": {
-                        "type": "boolean"
-                      },
-                      "type": {
-                        "type": "string",
-                        "enum": [
-                          "agent-judge",
-                          "agent_judge"
-                        ]
-                      },
-                      "prompt": {
-                        "type": "string"
-                      },
-                      "rubrics": {
-                        "type": "array",
-                        "items": {
-                          "type": "object",
-                          "properties": {
-                            "id": {
-                              "type": "string"
-                            },
-                            "outcome": {
-                              "type": "string"
-                            },
-                            "weight": {
-                              "type": "number"
-                            },
-                            "required": {
-                              "type": "boolean"
-                            },
-                            "required_min_score": {
-                              "type": "integer",
-                              "minimum": 0,
-                              "maximum": 10
-                            },
-                            "score_ranges": {
-                              "type": "array",
-                              "items": {
-                                "type": "object",
-                                "properties": {
-                                  "score_range": {
-                                    "type": "array",
-                                    "minItems": 2,
-                                    "maxItems": 2,
-                                    "items": [
-                                      {
-                                        "type": "integer",
-                                        "minimum": 0,
-                                        "maximum": 10
-                                      },
-                                      {
-                                        "type": "integer",
-                                        "minimum": 0,
-                                        "maximum": 10
-                                      }
-                                    ]
-                                  },
-                                  "outcome": {
-                                    "type": "string",
-                                    "minLength": 1
-                                  }
-                                },
-                                "required": [
-                                  "score_range",
-                                  "outcome"
-                                ],
-                                "additionalProperties": false
-                              }
-                            }
-                          },
-                          "additionalProperties": false
-                        }
-                      },
-                      "max_steps": {
-                        "type": "integer",
-                        "minimum": 1,
-                        "maximum": 50
-                      },
-                      "temperature": {
-                        "type": "number",
-                        "minimum": 0,
-                        "maximum": 2
-                      },
-                      "target": {
-                        "type": "string"
-                      }
-                    },
-                    "required": [
-                      "type"
-                    ],
-                    "additionalProperties": false
-                  },
                   {
                     "type": "object",
                     "properties": {
@@ -18782,148 +17186,34 @@
                           "execution_metrics"
                         ]
                       },
-                      "max_tool_calls": {
-                        "type": "number",
-                        "minimum": 0
-                      },
-                      "max_llm_calls": {
-                        "type": "number",
-                        "minimum": 0
-                      },
-                      "max_tokens": {
-                        "type": "number",
-                        "minimum": 0
-                      },
-                      "max_cost_usd": {
-                        "type": "number",
-                        "minimum": 0
-                      },
-                      "max_duration_ms": {
-                        "type": "number",
-                        "minimum": 0
-                      },
-                      "target_exploration_ratio": {
-                        "type": "number",
-                        "minimum": 0,
-                        "maximum": 1
-                      },
-                      "exploration_tolerance": {
-                        "type": "number",
-                        "minimum": 0
-                      }
-                    },
-                    "required": [
-                      "type"
-                    ],
-                    "additionalProperties": false
-                  },
-                  {
-                    "type": "object",
-                    "properties": {
-                      "name": {
-                        "type": "string"
-                      },
-                      "weight": {
-                        "type": "number",
-                        "minimum": 0
-                      },
-                      "required": {
-                        "anyOf": [
-                          {
-                            "type": "boolean"
-                          },
-                          {
-                            "type": "number",
-                            "exclusiveMinimum": 0,
-                            "maximum": 1
-                          }
-                        ]
-                      },
-                      "negate": {
-                        "type": "boolean"
-                      },
-                      "type": {
-                        "type": "string",
-                        "enum": [
-                          "agent-judge",
-                          "agent_judge"
-                        ]
-                      },
-                      "prompt": {
-                        "type": "string"
-                      },
-                      "rubrics": {
-                        "type": "array",
-                        "items": {
-                          "type": "object",
-                          "properties": {
-                            "id": {
-                              "type": "string"
-                            },
-                            "outcome": {
-                              "type": "string"
-                            },
-                            "weight": {
-                              "type": "number"
-                            },
-                            "required": {
-                              "type": "boolean"
-                            },
-                            "required_min_score": {
-                              "type": "integer",
-                              "minimum": 0,
-                              "maximum": 10
-                            },
-                            "score_ranges": {
-                              "type": "array",
-                              "items": {
-                                "type": "object",
-                                "properties": {
-                                  "score_range": {
-                                    "type": "array",
-                                    "minItems": 2,
-                                    "maxItems": 2,
-                                    "items": [
-                                      {
-                                        "type": "integer",
-                                        "minimum": 0,
-                                        "maximum": 10
-                                      },
-                                      {
-                                        "type": "integer",
-                                        "minimum": 0,
-                                        "maximum": 10
-                                      }
-                                    ]
-                                  },
-                                  "outcome": {
-                                    "type": "string",
-                                    "minLength": 1
-                                  }
-                                },
-                                "required": [
-                                  "score_range",
-                                  "outcome"
-                                ],
-                                "additionalProperties": false
-                              }
-                            }
-                          },
-                          "additionalProperties": false
-                        }
-                      },
-                      "max_steps": {
-                        "type": "integer",
-                        "minimum": 1,
-                        "maximum": 50
+                      "max_tool_calls": {
+                        "type": "number",
+                        "minimum": 0
                       },
-                      "temperature": {
+                      "max_llm_calls": {
+                        "type": "number",
+                        "minimum": 0
+                      },
+                      "max_tokens": {
+                        "type": "number",
+                        "minimum": 0
+                      },
+                      "max_cost_usd": {
+                        "type": "number",
+                        "minimum": 0
+                      },
+                      "max_duration_ms": {
+                        "type": "number",
+                        "minimum": 0
+                      },
+                      "target_exploration_ratio": {
                         "type": "number",
                         "minimum": 0,
-                        "maximum": 2
+                        "maximum": 1
                       },
-                      "target": {
-                        "type": "string"
+                      "exploration_tolerance": {
+                        "type": "number",
+                        "minimum": 0
                       }
                     },
                     "required": [
@@ -20074,120 +18364,6 @@
                 ],
                 "additionalProperties": false
               },
-              {
-                "type": "object",
-                "properties": {
-                  "name": {
-                    "type": "string"
-                  },
-                  "weight": {
-                    "type": "number",
-                    "minimum": 0
-                  },
-                  "required": {
-                    "anyOf": [
-                      {
-                        "type": "boolean"
-                      },
-                      {
-                        "type": "number",
-                        "exclusiveMinimum": 0,
-                        "maximum": 1
-                      }
-                    ]
-                  },
-                  "negate": {
-                    "type": "boolean"
-                  },
-                  "type": {
-                    "type": "string",
-                    "enum": [
-                      "agent-judge",
-                      "agent_judge"
-                    ]
-                  },
-                  "prompt": {
-                    "type": "string"
-                  },
-                  "rubrics": {
-                    "type": "array",
-                    "items": {
-                      "type": "object",
-                      "properties": {
-                        "id": {
-                          "type": "string"
-                        },
-                        "outcome": {
-                          "type": "string"
-                        },
-                        "weight": {
-                          "type": "number"
-                        },
-                        "required": {
-                          "type": "boolean"
-                        },
-                        "required_min_score": {
-                          "type": "integer",
-                          "minimum": 0,
-                          "maximum": 10
-                        },
-                        "score_ranges": {
-                          "type": "array",
-                          "items": {
-                            "type": "object",
-                            "properties": {
-                              "score_range": {
-                                "type": "array",
-                                "minItems": 2,
-                                "maxItems": 2,
-                                "items": [
-                                  {
-                                    "type": "integer",
-                                    "minimum": 0,
-                                    "maximum": 10
-                                  },
-                                  {
-                                    "type": "integer",
-                                    "minimum": 0,
-                                    "maximum": 10
-                                  }
-                                ]
-                              },
-                              "outcome": {
-                                "type": "string",
-                                "minLength": 1
-                              }
-                            },
-                            "required": [
-                              "score_range",
-                              "outcome"
-                            ],
-                            "additionalProperties": false
-                          }
-                        }
-                      },
-                      "additionalProperties": false
-                    }
-                  },
-                  "max_steps": {
-                    "type": "integer",
-                    "minimum": 1,
-                    "maximum": 50
-                  },
-                  "temperature": {
-                    "type": "number",
-                    "minimum": 0,
-                    "maximum": 2
-                  },
-                  "target": {
-                    "type": "string"
-                  }
-                },
-                "required": [
-                  "type"
-                ],
-                "additionalProperties": false
-              },
               {
                 "type": "object",
                 "properties": {
@@ -21279,120 +19455,6 @@
                 ],
                 "additionalProperties": false
               },
-              {
-                "type": "object",
-                "properties": {
-                  "name": {
-                    "type": "string"
-                  },
-                  "weight": {
-                    "type": "number",
-                    "minimum": 0
-                  },
-                  "required": {
-                    "anyOf": [
-                      {
-                        "type": "boolean"
-                      },
-                      {
-                        "type": "number",
-                        "exclusiveMinimum": 0,
-                        "maximum": 1
-                      }
-                    ]
-                  },
-                  "negate": {
-                    "type": "boolean"
-                  },
-                  "type": {
-                    "type": "string",
-                    "enum": [
-                      "agent-judge",
-                      "agent_judge"
-                    ]
-                  },
-                  "prompt": {
-                    "type": "string"
-                  },
-                  "rubrics": {
-                    "type": "array",
-                    "items": {
-                      "type": "object",
-                      "properties": {
-                        "id": {
-                          "type": "string"
-                        },
-                        "outcome": {
-                          "type": "string"
-                        },
-                        "weight": {
-                          "type": "number"
-                        },
-                        "required": {
-                          "type": "boolean"
-                        },
-                        "required_min_score": {
-                          "type": "integer",
-                          "minimum": 0,
-                          "maximum": 10
-                        },
-                        "score_ranges": {
-                          "type": "array",
-                          "items": {
-                            "type": "object",
-                            "properties": {
-                              "score_range": {
-                                "type": "array",
-                                "minItems": 2,
-                                "maxItems": 2,
-                                "items": [
-                                  {
-                                    "type": "integer",
-                                    "minimum": 0,
-                                    "maximum": 10
-                                  },
-                                  {
-                                    "type": "integer",
-                                    "minimum": 0,
-                                    "maximum": 10
-                                  }
-                                ]
-                              },
-                              "outcome": {
-                                "type": "string",
-                                "minLength": 1
-                              }
-                            },
-                            "required": [
-                              "score_range",
-                              "outcome"
-                            ],
-                            "additionalProperties": false
-                          }
-                        }
-                      },
-                      "additionalProperties": false
-                    }
-                  },
-                  "max_steps": {
-                    "type": "integer",
-                    "minimum": 1,
-                    "maximum": 50
-                  },
-                  "temperature": {
-                    "type": "number",
-                    "minimum": 0,
-                    "maximum": 2
-                  },
-                  "target": {
-                    "type": "string"
-                  }
-                },
-                "required": [
-                  "type"
-                ],
-                "additionalProperties": false
-              },
               {
                 "type": "object",
                 "properties": {

From 3171eb94b26b145418b498c06cfc1a577a2108f7 Mon Sep 17 00:00:00 2001
From: Christopher Tso <christso@gmail.com>
Date: Sun, 15 Mar 2026 19:15:51 +0000
Subject: [PATCH 11/13] docs: add E2E checklist to CLAUDE.md for all work
 before finishing

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 CLAUDE.md | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/CLAUDE.md b/CLAUDE.md
index ea7608f0f..492e4c944 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -159,6 +159,28 @@ Unit tests alone are insufficient for evaluator changes. After implementing or m
 
 5. **Note:** `--dry-run` returns mock responses that don't match evaluator output schemas. Use it only for testing harness flow, not evaluator logic.
 
+## Completing Work — E2E Checklist
+
+Before marking any branch as ready for review, complete this checklist:
+
+1. **Copy `.env` to worktree** (if working in a git worktree):
+   ```bash
+   cp /home/christso/projects/agentv/.env .env
+   ```
+   Without this, any eval run or LLM-dependent test will fail with missing API key errors.
+
+2. **Run unit tests**: `bun run test` — all must pass.
+
+3. **Run at least one real eval** against an example file to verify end-to-end behavior:
+   ```bash
+   bun apps/cli/src/cli.ts eval examples/features/rubric/evals/dataset.eval.yaml --test-id <test-id>
+   ```
+   Inspect the output JSONL to confirm correct evaluator type, scores, and hits/misses.
+
+4. **Verify no regressions** in areas adjacent to your changes (e.g., if you changed evaluator parsing, run an eval that exercises different evaluator types).
+
+5. **Mark PR as ready** only after all above steps pass.
+
 ## Evaluator Type System
 
 Evaluator types use **kebab-case** everywhere (matching promptfoo convention):
@@ -248,6 +270,7 @@ When working on a GitHub issue, **ALWAYS** follow this workflow:
    ```
 
 4. **Before merging**, ensure:
+   - **E2E verification completed** (see "Completing Work — E2E Checklist" below)
    - CI pipeline passes (all checks green)
    - Code has been reviewed if required
    - No merge conflicts with `main`

From effb331dbdef6ba3cd0e6667c467c60167a440c1 Mon Sep 17 00:00:00 2001
From: Christopher Tso <christso@gmail.com>
Date: Sun, 15 Mar 2026 19:16:58 +0000
Subject: [PATCH 12/13] style: fix biome formatting

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 apps/cli/node_modules                         |    1 +
 node_modules                                  |    1 +
 packages/core/node_modules                    |    1 +
 .../loaders/eval-yaml-transpiler.ts           |    5 +-
 .../evaluation/loaders/evaluator-parser.ts    |    4 +-
 packages/core/src/evaluation/orchestrator.ts  |    1 -
 packages/eval/node_modules                    |    1 +
 .../references/eval-schema.json               | 3462 ++++-------------
 8 files changed, 677 insertions(+), 2799 deletions(-)
 create mode 120000 apps/cli/node_modules
 create mode 120000 node_modules
 create mode 120000 packages/core/node_modules
 create mode 120000 packages/eval/node_modules

diff --git a/apps/cli/node_modules b/apps/cli/node_modules
new file mode 120000
index 000000000..c99229581
--- /dev/null
+++ b/apps/cli/node_modules
@@ -0,0 +1 @@
+/home/christso/projects/agentv/apps/cli/node_modules
\ No newline at end of file
diff --git a/node_modules b/node_modules
new file mode 120000
index 000000000..8cba0ae08
--- /dev/null
+++ b/node_modules
@@ -0,0 +1 @@
+/home/christso/projects/agentv/node_modules
\ No newline at end of file
diff --git a/packages/core/node_modules b/packages/core/node_modules
new file mode 120000
index 000000000..a07840188
--- /dev/null
+++ b/packages/core/node_modules
@@ -0,0 +1 @@
+/home/christso/projects/agentv/packages/core/node_modules
\ No newline at end of file
diff --git a/packages/core/src/evaluation/loaders/eval-yaml-transpiler.ts b/packages/core/src/evaluation/loaders/eval-yaml-transpiler.ts
index db646013b..9c79366a0 100644
--- a/packages/core/src/evaluation/loaders/eval-yaml-transpiler.ts
+++ b/packages/core/src/evaluation/loaders/eval-yaml-transpiler.ts
@@ -216,10 +216,7 @@ function assertionToNaturalLanguage(entry: RawAssertEntry): string | null {
  * Most assertions produce exactly one string; llm-judge with rubrics expands to many.
  */
 function assertionToNaturalLanguageList(entry: RawAssertEntry): string[] {
-  if (
-    entry.type === 'llm-judge' ||
-    entry.type === 'llm_judge'
-  ) {
+  if (entry.type === 'llm-judge' || entry.type === 'llm_judge') {
     if (Array.isArray(entry.rubrics) && entry.rubrics.length > 0) {
       return (entry.rubrics as Array<{ outcome?: string; criteria?: string; id?: string }>)
         .map((r) => r.outcome ?? r.criteria ?? r.id)
diff --git a/packages/core/src/evaluation/loaders/evaluator-parser.ts b/packages/core/src/evaluation/loaders/evaluator-parser.ts
index 8f994a8e4..4ec619e22 100644
--- a/packages/core/src/evaluation/loaders/evaluator-parser.ts
+++ b/packages/core/src/evaluation/loaders/evaluator-parser.ts
@@ -134,8 +134,7 @@ async function parseEvaluatorList(
     const typeValue = typeof rawType === 'string' ? normalizeEvaluatorType(rawType) : rawType;
 
     // Unknown types are treated as custom assertion types (resolved via registry discovery)
-    const isCustomType =
-      typeof typeValue === 'string' && !isEvaluatorKind(typeValue);
+    const isCustomType = typeof typeValue === 'string' && !isEvaluatorKind(typeValue);
     if (typeof typeValue !== 'string') {
       logWarning(`Skipping evaluator with invalid type in '${evalId}'`);
       continue;
@@ -853,7 +852,6 @@ async function parseEvaluatorList(
       continue;
     }
 
-
     if (typeValue === 'skill-trigger') {
       const skillName = asString(rawEvaluator.skill);
       if (!skillName) {
diff --git a/packages/core/src/evaluation/orchestrator.ts b/packages/core/src/evaluation/orchestrator.ts
index fb8649aa4..95cbdab7f 100644
--- a/packages/core/src/evaluation/orchestrator.ts
+++ b/packages/core/src/evaluation/orchestrator.ts
@@ -346,7 +346,6 @@ export async function runEvaluation(
       if (cliJudgeTarget === 'agentv') {
         if (!cliModel) {
           throw new Error('--judge-target "agentv" requires --model (e.g., "openai:gpt-5-mini")');
-
         }
         const { AgentvProvider } = await import('./providers/agentv-provider.js');
         return new AgentvProvider('agentv', { model: cliModel, temperature: 0 });
diff --git a/packages/eval/node_modules b/packages/eval/node_modules
new file mode 120000
index 000000000..d1cf07368
--- /dev/null
+++ b/packages/eval/node_modules
@@ -0,0 +1 @@
+/home/christso/projects/agentv/packages/eval/node_modules
\ No newline at end of file
diff --git a/plugins/agentv-dev/skills/agentv-eval-writer/references/eval-schema.json b/plugins/agentv-dev/skills/agentv-eval-writer/references/eval-schema.json
index 77f5be6d9..483031bf6 100644
--- a/plugins/agentv-dev/skills/agentv-eval-writer/references/eval-schema.json
+++ b/plugins/agentv-dev/skills/agentv-eval-writer/references/eval-schema.json
@@ -53,12 +53,7 @@
                 "properties": {
                   "role": {
                     "type": "string",
-                    "enum": [
-                      "system",
-                      "user",
-                      "assistant",
-                      "tool"
-                    ]
+                    "enum": ["system", "user", "assistant", "tool"]
                   },
                   "content": {
                     "anyOf": [
@@ -72,29 +67,20 @@
                           "properties": {
                             "type": {
                               "type": "string",
-                              "enum": [
-                                "text",
-                                "file"
-                              ]
+                              "enum": ["text", "file"]
                             },
                             "value": {
                               "type": "string"
                             }
                           },
-                          "required": [
-                            "type",
-                            "value"
-                          ],
+                          "required": ["type", "value"],
                           "additionalProperties": false
                         }
                       }
                     ]
                   }
                 },
-                "required": [
-                  "role",
-                  "content"
-                ],
+                "required": ["role", "content"],
                 "additionalProperties": false
               }
             }
@@ -129,12 +115,7 @@
                           "properties": {
                             "role": {
                               "type": "string",
-                              "enum": [
-                                "system",
-                                "user",
-                                "assistant",
-                                "tool"
-                              ]
+                              "enum": ["system", "user", "assistant", "tool"]
                             },
                             "content": {
                               "anyOf": [
@@ -148,29 +129,20 @@
                                     "properties": {
                                       "type": {
                                         "type": "string",
-                                        "enum": [
-                                          "text",
-                                          "file"
-                                        ]
+                                        "enum": ["text", "file"]
                                       },
                                       "value": {
                                         "type": "string"
                                       }
                                     },
-                                    "required": [
-                                      "type",
-                                      "value"
-                                    ],
+                                    "required": ["type", "value"],
                                     "additionalProperties": false
                                   }
                                 }
                               ]
                             }
                           },
-                          "required": [
-                            "role",
-                            "content"
-                          ],
+                          "required": ["role", "content"],
                           "additionalProperties": false
                         }
                       }
@@ -192,12 +164,7 @@
                           "properties": {
                             "role": {
                               "type": "string",
-                              "enum": [
-                                "system",
-                                "user",
-                                "assistant",
-                                "tool"
-                              ]
+                              "enum": ["system", "user", "assistant", "tool"]
                             },
                             "content": {
                               "anyOf": [
@@ -211,29 +178,20 @@
                                     "properties": {
                                       "type": {
                                         "type": "string",
-                                        "enum": [
-                                          "text",
-                                          "file"
-                                        ]
+                                        "enum": ["text", "file"]
                                       },
                                       "value": {
                                         "type": "string"
                                       }
                                     },
-                                    "required": [
-                                      "type",
-                                      "value"
-                                    ],
+                                    "required": ["type", "value"],
                                     "additionalProperties": false
                                   }
                                 }
                               ]
                             }
                           },
-                          "required": [
-                            "role",
-                            "content"
-                          ],
+                          "required": ["role", "content"],
                           "additionalProperties": false
                         }
                       }
@@ -270,10 +228,7 @@
                             },
                             "type": {
                               "type": "string",
-                              "enum": [
-                                "code-judge",
-                                "code_judge"
-                              ]
+                              "enum": ["code-judge", "code_judge"]
                             },
                             "command": {
                               "anyOf": [
@@ -325,10 +280,7 @@
                               "additionalProperties": {}
                             }
                           },
-                          "required": [
-                            "type",
-                            "command"
-                          ],
+                          "required": ["type", "command"],
                           "additionalProperties": false
                         },
                         {
@@ -358,10 +310,7 @@
                             },
                             "type": {
                               "type": "string",
-                              "enum": [
-                                "llm-judge",
-                                "llm_judge"
-                              ]
+                              "enum": ["llm-judge", "llm_judge"]
                             },
                             "prompt": {
                               "anyOf": [
@@ -455,10 +404,7 @@
                                           "minLength": 1
                                         }
                                       },
-                                      "required": [
-                                        "score_range",
-                                        "outcome"
-                                      ],
+                                      "required": ["score_range", "outcome"],
                                       "additionalProperties": false
                                     }
                                   }
@@ -487,9 +433,7 @@
                               "maximum": 2
                             }
                           },
-                          "required": [
-                            "type"
-                          ],
+                          "required": ["type"],
                           "additionalProperties": false
                         },
                         {
@@ -549,9 +493,7 @@
                                       }
                                     }
                                   },
-                                  "required": [
-                                    "type"
-                                  ],
+                                  "required": ["type"],
                                   "additionalProperties": false
                                 },
                                 {
@@ -567,10 +509,7 @@
                                       "maximum": 1
                                     }
                                   },
-                                  "required": [
-                                    "type",
-                                    "threshold"
-                                  ],
+                                  "required": ["type", "threshold"],
                                   "additionalProperties": false
                                 },
                                 {
@@ -587,10 +526,7 @@
                                       "type": "string"
                                     }
                                   },
-                                  "required": [
-                                    "type",
-                                    "path"
-                                  ],
+                                  "required": ["type", "path"],
                                   "additionalProperties": false
                                 },
                                 {
@@ -607,18 +543,13 @@
                                       "type": "string"
                                     }
                                   },
-                                  "required": [
-                                    "type"
-                                  ],
+                                  "required": ["type"],
                                   "additionalProperties": false
                                 }
                               ]
                             }
                           },
-                          "required": [
-                            "type",
-                            "aggregator"
-                          ],
+                          "required": ["type", "aggregator"],
                           "additionalProperties": false
                         },
                         {
@@ -648,20 +579,11 @@
                             },
                             "type": {
                               "type": "string",
-                              "enum": [
-                                "tool-trajectory",
-                                "tool_trajectory"
-                              ]
+                              "enum": ["tool-trajectory", "tool_trajectory"]
                             },
                             "mode": {
                               "type": "string",
-                              "enum": [
-                                "any_order",
-                                "in_order",
-                                "exact",
-                                "subset",
-                                "superset"
-                              ]
+                              "enum": ["any_order", "in_order", "exact", "subset", "superset"]
                             },
                             "minimums": {
                               "type": "object",
@@ -702,12 +624,7 @@
                                     "anyOf": [
                                       {
                                         "type": "string",
-                                        "enum": [
-                                          "exact",
-                                          "ignore",
-                                          "subset",
-                                          "superset"
-                                        ]
+                                        "enum": ["exact", "ignore", "subset", "superset"]
                                       },
                                       {
                                         "type": "array",
@@ -721,12 +638,7 @@
                                     "anyOf": [
                                       {
                                         "type": "string",
-                                        "enum": [
-                                          "exact",
-                                          "ignore",
-                                          "subset",
-                                          "superset"
-                                        ]
+                                        "enum": ["exact", "ignore", "subset", "superset"]
                                       },
                                       {
                                         "type": "array",
@@ -737,9 +649,7 @@
                                     ]
                                   }
                                 },
-                                "required": [
-                                  "tool"
-                                ],
+                                "required": ["tool"],
                                 "additionalProperties": false
                               }
                             },
@@ -747,12 +657,7 @@
                               "anyOf": [
                                 {
                                   "type": "string",
-                                  "enum": [
-                                    "exact",
-                                    "ignore",
-                                    "subset",
-                                    "superset"
-                                  ]
+                                  "enum": ["exact", "ignore", "subset", "superset"]
                                 },
                                 {
                                   "type": "array",
@@ -766,12 +671,7 @@
                               "anyOf": [
                                 {
                                   "type": "string",
-                                  "enum": [
-                                    "exact",
-                                    "ignore",
-                                    "subset",
-                                    "superset"
-                                  ]
+                                  "enum": ["exact", "ignore", "subset", "superset"]
                                 },
                                 {
                                   "type": "array",
@@ -782,10 +682,7 @@
                               ]
                             }
                           },
-                          "required": [
-                            "type",
-                            "mode"
-                          ],
+                          "required": ["type", "mode"],
                           "additionalProperties": false
                         },
                         {
@@ -815,10 +712,7 @@
                             },
                             "type": {
                               "type": "string",
-                              "enum": [
-                                "field-accuracy",
-                                "field_accuracy"
-                              ]
+                              "enum": ["field-accuracy", "field_accuracy"]
                             },
                             "fields": {
                               "type": "array",
@@ -830,11 +724,7 @@
                                   },
                                   "match": {
                                     "type": "string",
-                                    "enum": [
-                                      "exact",
-                                      "numeric_tolerance",
-                                      "date"
-                                    ]
+                                    "enum": ["exact", "numeric_tolerance", "date"]
                                   },
                                   "required": {
                                     "type": "boolean"
@@ -856,26 +746,17 @@
                                     }
                                   }
                                 },
-                                "required": [
-                                  "path",
-                                  "match"
-                                ],
+                                "required": ["path", "match"],
                                 "additionalProperties": false
                               },
                               "minItems": 1
                             },
                             "aggregation": {
                               "type": "string",
-                              "enum": [
-                                "weighted_average",
-                                "all_or_nothing"
-                              ]
+                              "enum": ["weighted_average", "all_or_nothing"]
                             }
                           },
-                          "required": [
-                            "type",
-                            "fields"
-                          ],
+                          "required": ["type", "fields"],
                           "additionalProperties": false
                         },
                         {
@@ -912,10 +793,7 @@
                               "minimum": 0
                             }
                           },
-                          "required": [
-                            "type",
-                            "threshold"
-                          ],
+                          "required": ["type", "threshold"],
                           "additionalProperties": false
                         },
                         {
@@ -952,10 +830,7 @@
                               "minimum": 0
                             }
                           },
-                          "required": [
-                            "type",
-                            "budget"
-                          ],
+                          "required": ["type", "budget"],
                           "additionalProperties": false
                         },
                         {
@@ -985,10 +860,7 @@
                             },
                             "type": {
                               "type": "string",
-                              "enum": [
-                                "token-usage",
-                                "token_usage"
-                              ]
+                              "enum": ["token-usage", "token_usage"]
                             },
                             "max_total": {
                               "type": "number",
@@ -1003,9 +875,7 @@
                               "minimum": 0
                             }
                           },
-                          "required": [
-                            "type"
-                          ],
+                          "required": ["type"],
                           "additionalProperties": false
                         },
                         {
@@ -1035,10 +905,7 @@
                             },
                             "type": {
                               "type": "string",
-                              "enum": [
-                                "execution-metrics",
-                                "execution_metrics"
-                              ]
+                              "enum": ["execution-metrics", "execution_metrics"]
                             },
                             "max_tool_calls": {
                               "type": "number",
@@ -1070,9 +937,7 @@
                               "minimum": 0
                             }
                           },
-                          "required": [
-                            "type"
-                          ],
+                          "required": ["type"],
                           "additionalProperties": false
                         },
                         {
@@ -1108,10 +973,7 @@
                               "type": "string"
                             }
                           },
-                          "required": [
-                            "type",
-                            "value"
-                          ],
+                          "required": ["type", "value"],
                           "additionalProperties": false
                         },
                         {
@@ -1147,10 +1009,7 @@
                               "type": "string"
                             }
                           },
-                          "required": [
-                            "type",
-                            "value"
-                          ],
+                          "required": ["type", "value"],
                           "additionalProperties": false
                         },
                         {
@@ -1180,15 +1039,10 @@
                             },
                             "type": {
                               "type": "string",
-                              "enum": [
-                                "is-json",
-                                "is_json"
-                              ]
+                              "enum": ["is-json", "is_json"]
                             }
                           },
-                          "required": [
-                            "type"
-                          ],
+                          "required": ["type"],
                           "additionalProperties": false
                         },
                         {
@@ -1224,10 +1078,7 @@
                               "type": "string"
                             }
                           },
-                          "required": [
-                            "type",
-                            "value"
-                          ],
+                          "required": ["type", "value"],
                           "additionalProperties": false
                         },
                         {
@@ -1308,10 +1159,7 @@
                                           "minLength": 1
                                         }
                                       },
-                                      "required": [
-                                        "score_range",
-                                        "outcome"
-                                      ],
+                                      "required": ["score_range", "outcome"],
                                       "additionalProperties": false
                                     }
                                   }
@@ -1321,10 +1169,7 @@
                               "minItems": 1
                             }
                           },
-                          "required": [
-                            "type",
-                            "criteria"
-                          ],
+                          "required": ["type", "criteria"],
                           "additionalProperties": false
                         }
                       ]
@@ -1361,10 +1206,7 @@
                             },
                             "type": {
                               "type": "string",
-                              "enum": [
-                                "code-judge",
-                                "code_judge"
-                              ]
+                              "enum": ["code-judge", "code_judge"]
                             },
                             "command": {
                               "anyOf": [
@@ -1416,10 +1258,7 @@
                               "additionalProperties": {}
                             }
                           },
-                          "required": [
-                            "type",
-                            "command"
-                          ],
+                          "required": ["type", "command"],
                           "additionalProperties": false
                         },
                         {
@@ -1449,10 +1288,7 @@
                             },
                             "type": {
                               "type": "string",
-                              "enum": [
-                                "llm-judge",
-                                "llm_judge"
-                              ]
+                              "enum": ["llm-judge", "llm_judge"]
                             },
                             "prompt": {
                               "anyOf": [
@@ -1546,10 +1382,7 @@
                                           "minLength": 1
                                         }
                                       },
-                                      "required": [
-                                        "score_range",
-                                        "outcome"
-                                      ],
+                                      "required": ["score_range", "outcome"],
                                       "additionalProperties": false
                                     }
                                   }
@@ -1578,9 +1411,7 @@
                               "maximum": 2
                             }
                           },
-                          "required": [
-                            "type"
-                          ],
+                          "required": ["type"],
                           "additionalProperties": false
                         },
                         {
@@ -1640,9 +1471,7 @@
                                       }
                                     }
                                   },
-                                  "required": [
-                                    "type"
-                                  ],
+                                  "required": ["type"],
                                   "additionalProperties": false
                                 },
                                 {
@@ -1658,10 +1487,7 @@
                                       "maximum": 1
                                     }
                                   },
-                                  "required": [
-                                    "type",
-                                    "threshold"
-                                  ],
+                                  "required": ["type", "threshold"],
                                   "additionalProperties": false
                                 },
                                 {
@@ -1678,10 +1504,7 @@
                                       "type": "string"
                                     }
                                   },
-                                  "required": [
-                                    "type",
-                                    "path"
-                                  ],
+                                  "required": ["type", "path"],
                                   "additionalProperties": false
                                 },
                                 {
@@ -1698,18 +1521,13 @@
                                       "type": "string"
                                     }
                                   },
-                                  "required": [
-                                    "type"
-                                  ],
+                                  "required": ["type"],
                                   "additionalProperties": false
                                 }
                               ]
                             }
                           },
-                          "required": [
-                            "type",
-                            "aggregator"
-                          ],
+                          "required": ["type", "aggregator"],
                           "additionalProperties": false
                         },
                         {
@@ -1739,20 +1557,11 @@
                             },
                             "type": {
                               "type": "string",
-                              "enum": [
-                                "tool-trajectory",
-                                "tool_trajectory"
-                              ]
+                              "enum": ["tool-trajectory", "tool_trajectory"]
                             },
                             "mode": {
                               "type": "string",
-                              "enum": [
-                                "any_order",
-                                "in_order",
-                                "exact",
-                                "subset",
-                                "superset"
-                              ]
+                              "enum": ["any_order", "in_order", "exact", "subset", "superset"]
                             },
                             "minimums": {
                               "type": "object",
@@ -1793,12 +1602,7 @@
                                     "anyOf": [
                                       {
                                         "type": "string",
-                                        "enum": [
-                                          "exact",
-                                          "ignore",
-                                          "subset",
-                                          "superset"
-                                        ]
+                                        "enum": ["exact", "ignore", "subset", "superset"]
                                       },
                                       {
                                         "type": "array",
@@ -1812,12 +1616,7 @@
                                     "anyOf": [
                                       {
                                         "type": "string",
-                                        "enum": [
-                                          "exact",
-                                          "ignore",
-                                          "subset",
-                                          "superset"
-                                        ]
+                                        "enum": ["exact", "ignore", "subset", "superset"]
                                       },
                                       {
                                         "type": "array",
@@ -1828,9 +1627,7 @@
                                     ]
                                   }
                                 },
-                                "required": [
-                                  "tool"
-                                ],
+                                "required": ["tool"],
                                 "additionalProperties": false
                               }
                             },
@@ -1838,12 +1635,7 @@
                               "anyOf": [
                                 {
                                   "type": "string",
-                                  "enum": [
-                                    "exact",
-                                    "ignore",
-                                    "subset",
-                                    "superset"
-                                  ]
+                                  "enum": ["exact", "ignore", "subset", "superset"]
                                 },
                                 {
                                   "type": "array",
@@ -1857,12 +1649,7 @@
                               "anyOf": [
                                 {
                                   "type": "string",
-                                  "enum": [
-                                    "exact",
-                                    "ignore",
-                                    "subset",
-                                    "superset"
-                                  ]
+                                  "enum": ["exact", "ignore", "subset", "superset"]
                                 },
                                 {
                                   "type": "array",
@@ -1873,10 +1660,7 @@
                               ]
                             }
                           },
-                          "required": [
-                            "type",
-                            "mode"
-                          ],
+                          "required": ["type", "mode"],
                           "additionalProperties": false
                         },
                         {
@@ -1906,10 +1690,7 @@
                             },
                             "type": {
                               "type": "string",
-                              "enum": [
-                                "field-accuracy",
-                                "field_accuracy"
-                              ]
+                              "enum": ["field-accuracy", "field_accuracy"]
                             },
                             "fields": {
                               "type": "array",
@@ -1921,11 +1702,7 @@
                                   },
                                   "match": {
                                     "type": "string",
-                                    "enum": [
-                                      "exact",
-                                      "numeric_tolerance",
-                                      "date"
-                                    ]
+                                    "enum": ["exact", "numeric_tolerance", "date"]
                                   },
                                   "required": {
                                     "type": "boolean"
@@ -1947,26 +1724,17 @@
                                     }
                                   }
                                 },
-                                "required": [
-                                  "path",
-                                  "match"
-                                ],
+                                "required": ["path", "match"],
                                 "additionalProperties": false
                               },
                               "minItems": 1
                             },
                             "aggregation": {
                               "type": "string",
-                              "enum": [
-                                "weighted_average",
-                                "all_or_nothing"
-                              ]
+                              "enum": ["weighted_average", "all_or_nothing"]
                             }
                           },
-                          "required": [
-                            "type",
-                            "fields"
-                          ],
+                          "required": ["type", "fields"],
                           "additionalProperties": false
                         },
                         {
@@ -2003,10 +1771,7 @@
                               "minimum": 0
                             }
                           },
-                          "required": [
-                            "type",
-                            "threshold"
-                          ],
+                          "required": ["type", "threshold"],
                           "additionalProperties": false
                         },
                         {
@@ -2043,10 +1808,7 @@
                               "minimum": 0
                             }
                           },
-                          "required": [
-                            "type",
-                            "budget"
-                          ],
+                          "required": ["type", "budget"],
                           "additionalProperties": false
                         },
                         {
@@ -2076,10 +1838,7 @@
                             },
                             "type": {
                               "type": "string",
-                              "enum": [
-                                "token-usage",
-                                "token_usage"
-                              ]
+                              "enum": ["token-usage", "token_usage"]
                             },
                             "max_total": {
                               "type": "number",
@@ -2094,9 +1853,7 @@
                               "minimum": 0
                             }
                           },
-                          "required": [
-                            "type"
-                          ],
+                          "required": ["type"],
                           "additionalProperties": false
                         },
                         {
@@ -2126,10 +1883,7 @@
                             },
                             "type": {
                               "type": "string",
-                              "enum": [
-                                "execution-metrics",
-                                "execution_metrics"
-                              ]
+                              "enum": ["execution-metrics", "execution_metrics"]
                             },
                             "max_tool_calls": {
                               "type": "number",
@@ -2161,9 +1915,7 @@
                               "minimum": 0
                             }
                           },
-                          "required": [
-                            "type"
-                          ],
+                          "required": ["type"],
                           "additionalProperties": false
                         },
                         {
@@ -2199,10 +1951,7 @@
                               "type": "string"
                             }
                           },
-                          "required": [
-                            "type",
-                            "value"
-                          ],
+                          "required": ["type", "value"],
                           "additionalProperties": false
                         },
                         {
@@ -2238,10 +1987,7 @@
                               "type": "string"
                             }
                           },
-                          "required": [
-                            "type",
-                            "value"
-                          ],
+                          "required": ["type", "value"],
                           "additionalProperties": false
                         },
                         {
@@ -2271,15 +2017,10 @@
                             },
                             "type": {
                               "type": "string",
-                              "enum": [
-                                "is-json",
-                                "is_json"
-                              ]
+                              "enum": ["is-json", "is_json"]
                             }
                           },
-                          "required": [
-                            "type"
-                          ],
+                          "required": ["type"],
                           "additionalProperties": false
                         },
                         {
@@ -2315,10 +2056,7 @@
                               "type": "string"
                             }
                           },
-                          "required": [
-                            "type",
-                            "value"
-                          ],
+                          "required": ["type", "value"],
                           "additionalProperties": false
                         },
                         {
@@ -2399,10 +2137,7 @@
                                           "minLength": 1
                                         }
                                       },
-                                      "required": [
-                                        "score_range",
-                                        "outcome"
-                                      ],
+                                      "required": ["score_range", "outcome"],
                                       "additionalProperties": false
                                     }
                                   }
@@ -2412,10 +2147,7 @@
                               "minItems": 1
                             }
                           },
-                          "required": [
-                            "type",
-                            "criteria"
-                          ],
+                          "required": ["type", "criteria"],
                           "additionalProperties": false
                         }
                       ]
@@ -2452,10 +2184,7 @@
                             },
                             "type": {
                               "type": "string",
-                              "enum": [
-                                "code-judge",
-                                "code_judge"
-                              ]
+                              "enum": ["code-judge", "code_judge"]
                             },
                             "command": {
                               "anyOf": [
@@ -2507,10 +2236,7 @@
                               "additionalProperties": {}
                             }
                           },
-                          "required": [
-                            "type",
-                            "command"
-                          ],
+                          "required": ["type", "command"],
                           "additionalProperties": false
                         },
                         {
@@ -2540,10 +2266,7 @@
                             },
                             "type": {
                               "type": "string",
-                              "enum": [
-                                "llm-judge",
-                                "llm_judge"
-                              ]
+                              "enum": ["llm-judge", "llm_judge"]
                             },
                             "prompt": {
                               "anyOf": [
@@ -2637,10 +2360,7 @@
                                           "minLength": 1
                                         }
                                       },
-                                      "required": [
-                                        "score_range",
-                                        "outcome"
-                                      ],
+                                      "required": ["score_range", "outcome"],
                                       "additionalProperties": false
                                     }
                                   }
@@ -2669,9 +2389,7 @@
                               "maximum": 2
                             }
                           },
-                          "required": [
-                            "type"
-                          ],
+                          "required": ["type"],
                           "additionalProperties": false
                         },
                         {
@@ -2731,9 +2449,7 @@
                                       }
                                     }
                                   },
-                                  "required": [
-                                    "type"
-                                  ],
+                                  "required": ["type"],
                                   "additionalProperties": false
                                 },
                                 {
@@ -2749,10 +2465,7 @@
                                       "maximum": 1
                                     }
                                   },
-                                  "required": [
-                                    "type",
-                                    "threshold"
-                                  ],
+                                  "required": ["type", "threshold"],
                                   "additionalProperties": false
                                 },
                                 {
@@ -2769,10 +2482,7 @@
                                       "type": "string"
                                     }
                                   },
-                                  "required": [
-                                    "type",
-                                    "path"
-                                  ],
+                                  "required": ["type", "path"],
                                   "additionalProperties": false
                                 },
                                 {
@@ -2789,18 +2499,13 @@
                                       "type": "string"
                                     }
                                   },
-                                  "required": [
-                                    "type"
-                                  ],
+                                  "required": ["type"],
                                   "additionalProperties": false
                                 }
                               ]
                             }
                           },
-                          "required": [
-                            "type",
-                            "aggregator"
-                          ],
+                          "required": ["type", "aggregator"],
                           "additionalProperties": false
                         },
                         {
@@ -2830,20 +2535,11 @@
                             },
                             "type": {
                               "type": "string",
-                              "enum": [
-                                "tool-trajectory",
-                                "tool_trajectory"
-                              ]
+                              "enum": ["tool-trajectory", "tool_trajectory"]
                             },
                             "mode": {
                               "type": "string",
-                              "enum": [
-                                "any_order",
-                                "in_order",
-                                "exact",
-                                "subset",
-                                "superset"
-                              ]
+                              "enum": ["any_order", "in_order", "exact", "subset", "superset"]
                             },
                             "minimums": {
                               "type": "object",
@@ -2884,12 +2580,7 @@
                                     "anyOf": [
                                       {
                                         "type": "string",
-                                        "enum": [
-                                          "exact",
-                                          "ignore",
-                                          "subset",
-                                          "superset"
-                                        ]
+                                        "enum": ["exact", "ignore", "subset", "superset"]
                                       },
                                       {
                                         "type": "array",
@@ -2903,12 +2594,7 @@
                                     "anyOf": [
                                       {
                                         "type": "string",
-                                        "enum": [
-                                          "exact",
-                                          "ignore",
-                                          "subset",
-                                          "superset"
-                                        ]
+                                        "enum": ["exact", "ignore", "subset", "superset"]
                                       },
                                       {
                                         "type": "array",
@@ -2919,9 +2605,7 @@
                                     ]
                                   }
                                 },
-                                "required": [
-                                  "tool"
-                                ],
+                                "required": ["tool"],
                                 "additionalProperties": false
                               }
                             },
@@ -2929,12 +2613,7 @@
                               "anyOf": [
                                 {
                                   "type": "string",
-                                  "enum": [
-                                    "exact",
-                                    "ignore",
-                                    "subset",
-                                    "superset"
-                                  ]
+                                  "enum": ["exact", "ignore", "subset", "superset"]
                                 },
                                 {
                                   "type": "array",
@@ -2948,12 +2627,7 @@
                               "anyOf": [
                                 {
                                   "type": "string",
-                                  "enum": [
-                                    "exact",
-                                    "ignore",
-                                    "subset",
-                                    "superset"
-                                  ]
+                                  "enum": ["exact", "ignore", "subset", "superset"]
                                 },
                                 {
                                   "type": "array",
@@ -2964,10 +2638,7 @@
                               ]
                             }
                           },
-                          "required": [
-                            "type",
-                            "mode"
-                          ],
+                          "required": ["type", "mode"],
                           "additionalProperties": false
                         },
                         {
@@ -2997,10 +2668,7 @@
                             },
                             "type": {
                               "type": "string",
-                              "enum": [
-                                "field-accuracy",
-                                "field_accuracy"
-                              ]
+                              "enum": ["field-accuracy", "field_accuracy"]
                             },
                             "fields": {
                               "type": "array",
@@ -3012,11 +2680,7 @@
                                   },
                                   "match": {
                                     "type": "string",
-                                    "enum": [
-                                      "exact",
-                                      "numeric_tolerance",
-                                      "date"
-                                    ]
+                                    "enum": ["exact", "numeric_tolerance", "date"]
                                   },
                                   "required": {
                                     "type": "boolean"
@@ -3038,26 +2702,17 @@
                                     }
                                   }
                                 },
-                                "required": [
-                                  "path",
-                                  "match"
-                                ],
+                                "required": ["path", "match"],
                                 "additionalProperties": false
                               },
                               "minItems": 1
                             },
                             "aggregation": {
                               "type": "string",
-                              "enum": [
-                                "weighted_average",
-                                "all_or_nothing"
-                              ]
+                              "enum": ["weighted_average", "all_or_nothing"]
                             }
                           },
-                          "required": [
-                            "type",
-                            "fields"
-                          ],
+                          "required": ["type", "fields"],
                           "additionalProperties": false
                         },
                         {
@@ -3094,10 +2749,7 @@
                               "minimum": 0
                             }
                           },
-                          "required": [
-                            "type",
-                            "threshold"
-                          ],
+                          "required": ["type", "threshold"],
                           "additionalProperties": false
                         },
                         {
@@ -3134,10 +2786,7 @@
                               "minimum": 0
                             }
                           },
-                          "required": [
-                            "type",
-                            "budget"
-                          ],
+                          "required": ["type", "budget"],
                           "additionalProperties": false
                         },
                         {
@@ -3167,10 +2816,7 @@
                             },
                             "type": {
                               "type": "string",
-                              "enum": [
-                                "token-usage",
-                                "token_usage"
-                              ]
+                              "enum": ["token-usage", "token_usage"]
                             },
                             "max_total": {
                               "type": "number",
@@ -3185,9 +2831,7 @@
                               "minimum": 0
                             }
                           },
-                          "required": [
-                            "type"
-                          ],
+                          "required": ["type"],
                           "additionalProperties": false
                         },
                         {
@@ -3217,10 +2861,7 @@
                             },
                             "type": {
                               "type": "string",
-                              "enum": [
-                                "execution-metrics",
-                                "execution_metrics"
-                              ]
+                              "enum": ["execution-metrics", "execution_metrics"]
                             },
                             "max_tool_calls": {
                               "type": "number",
@@ -3252,9 +2893,7 @@
                               "minimum": 0
                             }
                           },
-                          "required": [
-                            "type"
-                          ],
+                          "required": ["type"],
                           "additionalProperties": false
                         },
                         {
@@ -3290,10 +2929,7 @@
                               "type": "string"
                             }
                           },
-                          "required": [
-                            "type",
-                            "value"
-                          ],
+                          "required": ["type", "value"],
                           "additionalProperties": false
                         },
                         {
@@ -3329,10 +2965,7 @@
                               "type": "string"
                             }
                           },
-                          "required": [
-                            "type",
-                            "value"
-                          ],
+                          "required": ["type", "value"],
                           "additionalProperties": false
                         },
                         {
@@ -3362,15 +2995,10 @@
                             },
                             "type": {
                               "type": "string",
-                              "enum": [
-                                "is-json",
-                                "is_json"
-                              ]
+                              "enum": ["is-json", "is_json"]
                             }
                           },
-                          "required": [
-                            "type"
-                          ],
+                          "required": ["type"],
                           "additionalProperties": false
                         },
                         {
@@ -3406,10 +3034,7 @@
                               "type": "string"
                             }
                           },
-                          "required": [
-                            "type",
-                            "value"
-                          ],
+                          "required": ["type", "value"],
                           "additionalProperties": false
                         },
                         {
@@ -3490,10 +3115,7 @@
                                           "minLength": 1
                                         }
                                       },
-                                      "required": [
-                                        "score_range",
-                                        "outcome"
-                                      ],
+                                      "required": ["score_range", "outcome"],
                                       "additionalProperties": false
                                     }
                                   }
@@ -3503,10 +3125,7 @@
                               "minItems": 1
                             }
                           },
-                          "required": [
-                            "type",
-                            "criteria"
-                          ],
+                          "required": ["type", "criteria"],
                           "additionalProperties": false
                         }
                       ]
@@ -3555,10 +3174,7 @@
                                 },
                                 "type": {
                                   "type": "string",
-                                  "enum": [
-                                    "code-judge",
-                                    "code_judge"
-                                  ]
+                                  "enum": ["code-judge", "code_judge"]
                                 },
                                 "command": {
                                   "anyOf": [
@@ -3610,10 +3226,7 @@
                                   "additionalProperties": {}
                                 }
                               },
-                              "required": [
-                                "type",
-                                "command"
-                              ],
+                              "required": ["type", "command"],
                               "additionalProperties": false
                             },
                             {
@@ -3643,10 +3256,7 @@
                                 },
                                 "type": {
                                   "type": "string",
-                                  "enum": [
-                                    "llm-judge",
-                                    "llm_judge"
-                                  ]
+                                  "enum": ["llm-judge", "llm_judge"]
                                 },
                                 "prompt": {
                                   "anyOf": [
@@ -3740,10 +3350,7 @@
                                               "minLength": 1
                                             }
                                           },
-                                          "required": [
-                                            "score_range",
-                                            "outcome"
-                                          ],
+                                          "required": ["score_range", "outcome"],
                                           "additionalProperties": false
                                         }
                                       }
@@ -3772,9 +3379,7 @@
                                   "maximum": 2
                                 }
                               },
-                              "required": [
-                                "type"
-                              ],
+                              "required": ["type"],
                               "additionalProperties": false
                             },
                             {
@@ -3834,9 +3439,7 @@
                                           }
                                         }
                                       },
-                                      "required": [
-                                        "type"
-                                      ],
+                                      "required": ["type"],
                                       "additionalProperties": false
                                     },
                                     {
@@ -3852,10 +3455,7 @@
                                           "maximum": 1
                                         }
                                       },
-                                      "required": [
-                                        "type",
-                                        "threshold"
-                                      ],
+                                      "required": ["type", "threshold"],
                                       "additionalProperties": false
                                     },
                                     {
@@ -3872,10 +3472,7 @@
                                           "type": "string"
                                         }
                                       },
-                                      "required": [
-                                        "type",
-                                        "path"
-                                      ],
+                                      "required": ["type", "path"],
                                       "additionalProperties": false
                                     },
                                     {
@@ -3892,18 +3489,13 @@
                                           "type": "string"
                                         }
                                       },
-                                      "required": [
-                                        "type"
-                                      ],
+                                      "required": ["type"],
                                       "additionalProperties": false
                                     }
                                   ]
                                 }
                               },
-                              "required": [
-                                "type",
-                                "aggregator"
-                              ],
+                              "required": ["type", "aggregator"],
                               "additionalProperties": false
                             },
                             {
@@ -3933,20 +3525,11 @@
                                 },
                                 "type": {
                                   "type": "string",
-                                  "enum": [
-                                    "tool-trajectory",
-                                    "tool_trajectory"
-                                  ]
+                                  "enum": ["tool-trajectory", "tool_trajectory"]
                                 },
                                 "mode": {
                                   "type": "string",
-                                  "enum": [
-                                    "any_order",
-                                    "in_order",
-                                    "exact",
-                                    "subset",
-                                    "superset"
-                                  ]
+                                  "enum": ["any_order", "in_order", "exact", "subset", "superset"]
                                 },
                                 "minimums": {
                                   "type": "object",
@@ -3987,12 +3570,7 @@
                                         "anyOf": [
                                           {
                                             "type": "string",
-                                            "enum": [
-                                              "exact",
-                                              "ignore",
-                                              "subset",
-                                              "superset"
-                                            ]
+                                            "enum": ["exact", "ignore", "subset", "superset"]
                                           },
                                           {
                                             "type": "array",
@@ -4006,12 +3584,7 @@
                                         "anyOf": [
                                           {
                                             "type": "string",
-                                            "enum": [
-                                              "exact",
-                                              "ignore",
-                                              "subset",
-                                              "superset"
-                                            ]
+                                            "enum": ["exact", "ignore", "subset", "superset"]
                                           },
                                           {
                                             "type": "array",
@@ -4022,9 +3595,7 @@
                                         ]
                                       }
                                     },
-                                    "required": [
-                                      "tool"
-                                    ],
+                                    "required": ["tool"],
                                     "additionalProperties": false
                                   }
                                 },
@@ -4032,12 +3603,7 @@
                                   "anyOf": [
                                     {
                                       "type": "string",
-                                      "enum": [
-                                        "exact",
-                                        "ignore",
-                                        "subset",
-                                        "superset"
-                                      ]
+                                      "enum": ["exact", "ignore", "subset", "superset"]
                                     },
                                     {
                                       "type": "array",
@@ -4051,12 +3617,7 @@
                                   "anyOf": [
                                     {
                                       "type": "string",
-                                      "enum": [
-                                        "exact",
-                                        "ignore",
-                                        "subset",
-                                        "superset"
-                                      ]
+                                      "enum": ["exact", "ignore", "subset", "superset"]
                                     },
                                     {
                                       "type": "array",
@@ -4067,10 +3628,7 @@
                                   ]
                                 }
                               },
-                              "required": [
-                                "type",
-                                "mode"
-                              ],
+                              "required": ["type", "mode"],
                               "additionalProperties": false
                             },
                             {
@@ -4100,10 +3658,7 @@
                                 },
                                 "type": {
                                   "type": "string",
-                                  "enum": [
-                                    "field-accuracy",
-                                    "field_accuracy"
-                                  ]
+                                  "enum": ["field-accuracy", "field_accuracy"]
                                 },
                                 "fields": {
                                   "type": "array",
@@ -4115,11 +3670,7 @@
                                       },
                                       "match": {
                                         "type": "string",
-                                        "enum": [
-                                          "exact",
-                                          "numeric_tolerance",
-                                          "date"
-                                        ]
+                                        "enum": ["exact", "numeric_tolerance", "date"]
                                       },
                                       "required": {
                                         "type": "boolean"
@@ -4141,26 +3692,17 @@
                                         }
                                       }
                                     },
-                                    "required": [
-                                      "path",
-                                      "match"
-                                    ],
+                                    "required": ["path", "match"],
                                     "additionalProperties": false
                                   },
                                   "minItems": 1
                                 },
                                 "aggregation": {
                                   "type": "string",
-                                  "enum": [
-                                    "weighted_average",
-                                    "all_or_nothing"
-                                  ]
+                                  "enum": ["weighted_average", "all_or_nothing"]
                                 }
                               },
-                              "required": [
-                                "type",
-                                "fields"
-                              ],
+                              "required": ["type", "fields"],
                               "additionalProperties": false
                             },
                             {
@@ -4197,10 +3739,7 @@
                                   "minimum": 0
                                 }
                               },
-                              "required": [
-                                "type",
-                                "threshold"
-                              ],
+                              "required": ["type", "threshold"],
                               "additionalProperties": false
                             },
                             {
@@ -4237,10 +3776,7 @@
                                   "minimum": 0
                                 }
                               },
-                              "required": [
-                                "type",
-                                "budget"
-                              ],
+                              "required": ["type", "budget"],
                               "additionalProperties": false
                             },
                             {
@@ -4270,10 +3806,7 @@
                                 },
                                 "type": {
                                   "type": "string",
-                                  "enum": [
-                                    "token-usage",
-                                    "token_usage"
-                                  ]
+                                  "enum": ["token-usage", "token_usage"]
                                 },
                                 "max_total": {
                                   "type": "number",
@@ -4288,9 +3821,7 @@
                                   "minimum": 0
                                 }
                               },
-                              "required": [
-                                "type"
-                              ],
+                              "required": ["type"],
                               "additionalProperties": false
                             },
                             {
@@ -4320,10 +3851,7 @@
                                 },
                                 "type": {
                                   "type": "string",
-                                  "enum": [
-                                    "execution-metrics",
-                                    "execution_metrics"
-                                  ]
+                                  "enum": ["execution-metrics", "execution_metrics"]
                                 },
                                 "max_tool_calls": {
                                   "type": "number",
@@ -4355,9 +3883,7 @@
                                   "minimum": 0
                                 }
                               },
-                              "required": [
-                                "type"
-                              ],
+                              "required": ["type"],
                               "additionalProperties": false
                             },
                             {
@@ -4393,10 +3919,7 @@
                                   "type": "string"
                                 }
                               },
-                              "required": [
-                                "type",
-                                "value"
-                              ],
+                              "required": ["type", "value"],
                               "additionalProperties": false
                             },
                             {
@@ -4432,10 +3955,7 @@
                                   "type": "string"
                                 }
                               },
-                              "required": [
-                                "type",
-                                "value"
-                              ],
+                              "required": ["type", "value"],
                               "additionalProperties": false
                             },
                             {
@@ -4465,15 +3985,10 @@
                                 },
                                 "type": {
                                   "type": "string",
-                                  "enum": [
-                                    "is-json",
-                                    "is_json"
-                                  ]
+                                  "enum": ["is-json", "is_json"]
                                 }
                               },
-                              "required": [
-                                "type"
-                              ],
+                              "required": ["type"],
                               "additionalProperties": false
                             },
                             {
@@ -4509,10 +4024,7 @@
                                   "type": "string"
                                 }
                               },
-                              "required": [
-                                "type",
-                                "value"
-                              ],
+                              "required": ["type", "value"],
                               "additionalProperties": false
                             },
                             {
@@ -4593,10 +4105,7 @@
                                               "minLength": 1
                                             }
                                           },
-                                          "required": [
-                                            "score_range",
-                                            "outcome"
-                                          ],
+                                          "required": ["score_range", "outcome"],
                                           "additionalProperties": false
                                         }
                                       }
@@ -4606,10 +4115,7 @@
                                   "minItems": 1
                                 }
                               },
-                              "required": [
-                                "type",
-                                "criteria"
-                              ],
+                              "required": ["type", "criteria"],
                               "additionalProperties": false
                             }
                           ]
@@ -4646,10 +4152,7 @@
                                 },
                                 "type": {
                                   "type": "string",
-                                  "enum": [
-                                    "code-judge",
-                                    "code_judge"
-                                  ]
+                                  "enum": ["code-judge", "code_judge"]
                                 },
                                 "command": {
                                   "anyOf": [
@@ -4701,10 +4204,7 @@
                                   "additionalProperties": {}
                                 }
                               },
-                              "required": [
-                                "type",
-                                "command"
-                              ],
+                              "required": ["type", "command"],
                               "additionalProperties": false
                             },
                             {
@@ -4734,10 +4234,7 @@
                                 },
                                 "type": {
                                   "type": "string",
-                                  "enum": [
-                                    "llm-judge",
-                                    "llm_judge"
-                                  ]
+                                  "enum": ["llm-judge", "llm_judge"]
                                 },
                                 "prompt": {
                                   "anyOf": [
@@ -4831,10 +4328,7 @@
                                               "minLength": 1
                                             }
                                           },
-                                          "required": [
-                                            "score_range",
-                                            "outcome"
-                                          ],
+                                          "required": ["score_range", "outcome"],
                                           "additionalProperties": false
                                         }
                                       }
@@ -4863,9 +4357,7 @@
                                   "maximum": 2
                                 }
                               },
-                              "required": [
-                                "type"
-                              ],
+                              "required": ["type"],
                               "additionalProperties": false
                             },
                             {
@@ -4925,9 +4417,7 @@
                                           }
                                         }
                                       },
-                                      "required": [
-                                        "type"
-                                      ],
+                                      "required": ["type"],
                                       "additionalProperties": false
                                     },
                                     {
@@ -4943,10 +4433,7 @@
                                           "maximum": 1
                                         }
                                       },
-                                      "required": [
-                                        "type",
-                                        "threshold"
-                                      ],
+                                      "required": ["type", "threshold"],
                                       "additionalProperties": false
                                     },
                                     {
@@ -4963,10 +4450,7 @@
                                           "type": "string"
                                         }
                                       },
-                                      "required": [
-                                        "type",
-                                        "path"
-                                      ],
+                                      "required": ["type", "path"],
                                       "additionalProperties": false
                                     },
                                     {
@@ -4983,18 +4467,13 @@
                                           "type": "string"
                                         }
                                       },
-                                      "required": [
-                                        "type"
-                                      ],
+                                      "required": ["type"],
                                       "additionalProperties": false
                                     }
                                   ]
                                 }
                               },
-                              "required": [
-                                "type",
-                                "aggregator"
-                              ],
+                              "required": ["type", "aggregator"],
                               "additionalProperties": false
                             },
                             {
@@ -5024,20 +4503,11 @@
                                 },
                                 "type": {
                                   "type": "string",
-                                  "enum": [
-                                    "tool-trajectory",
-                                    "tool_trajectory"
-                                  ]
+                                  "enum": ["tool-trajectory", "tool_trajectory"]
                                 },
                                 "mode": {
                                   "type": "string",
-                                  "enum": [
-                                    "any_order",
-                                    "in_order",
-                                    "exact",
-                                    "subset",
-                                    "superset"
-                                  ]
+                                  "enum": ["any_order", "in_order", "exact", "subset", "superset"]
                                 },
                                 "minimums": {
                                   "type": "object",
@@ -5078,12 +4548,7 @@
                                         "anyOf": [
                                           {
                                             "type": "string",
-                                            "enum": [
-                                              "exact",
-                                              "ignore",
-                                              "subset",
-                                              "superset"
-                                            ]
+                                            "enum": ["exact", "ignore", "subset", "superset"]
                                           },
                                           {
                                             "type": "array",
@@ -5097,12 +4562,7 @@
                                         "anyOf": [
                                           {
                                             "type": "string",
-                                            "enum": [
-                                              "exact",
-                                              "ignore",
-                                              "subset",
-                                              "superset"
-                                            ]
+                                            "enum": ["exact", "ignore", "subset", "superset"]
                                           },
                                           {
                                             "type": "array",
@@ -5113,9 +4573,7 @@
                                         ]
                                       }
                                     },
-                                    "required": [
-                                      "tool"
-                                    ],
+                                    "required": ["tool"],
                                     "additionalProperties": false
                                   }
                                 },
@@ -5123,12 +4581,7 @@
                                   "anyOf": [
                                     {
                                       "type": "string",
-                                      "enum": [
-                                        "exact",
-                                        "ignore",
-                                        "subset",
-                                        "superset"
-                                      ]
+                                      "enum": ["exact", "ignore", "subset", "superset"]
                                     },
                                     {
                                       "type": "array",
@@ -5142,12 +4595,7 @@
                                   "anyOf": [
                                     {
                                       "type": "string",
-                                      "enum": [
-                                        "exact",
-                                        "ignore",
-                                        "subset",
-                                        "superset"
-                                      ]
+                                      "enum": ["exact", "ignore", "subset", "superset"]
                                     },
                                     {
                                       "type": "array",
@@ -5158,10 +4606,7 @@
                                   ]
                                 }
                               },
-                              "required": [
-                                "type",
-                                "mode"
-                              ],
+                              "required": ["type", "mode"],
                               "additionalProperties": false
                             },
                             {
@@ -5191,10 +4636,7 @@
                                 },
                                 "type": {
                                   "type": "string",
-                                  "enum": [
-                                    "field-accuracy",
-                                    "field_accuracy"
-                                  ]
+                                  "enum": ["field-accuracy", "field_accuracy"]
                                 },
                                 "fields": {
                                   "type": "array",
@@ -5206,11 +4648,7 @@
                                       },
                                       "match": {
                                         "type": "string",
-                                        "enum": [
-                                          "exact",
-                                          "numeric_tolerance",
-                                          "date"
-                                        ]
+                                        "enum": ["exact", "numeric_tolerance", "date"]
                                       },
                                       "required": {
                                         "type": "boolean"
@@ -5232,26 +4670,17 @@
                                         }
                                       }
                                     },
-                                    "required": [
-                                      "path",
-                                      "match"
-                                    ],
+                                    "required": ["path", "match"],
                                     "additionalProperties": false
                                   },
                                   "minItems": 1
                                 },
                                 "aggregation": {
                                   "type": "string",
-                                  "enum": [
-                                    "weighted_average",
-                                    "all_or_nothing"
-                                  ]
+                                  "enum": ["weighted_average", "all_or_nothing"]
                                 }
                               },
-                              "required": [
-                                "type",
-                                "fields"
-                              ],
+                              "required": ["type", "fields"],
                               "additionalProperties": false
                             },
                             {
@@ -5288,10 +4717,7 @@
                                   "minimum": 0
                                 }
                               },
-                              "required": [
-                                "type",
-                                "threshold"
-                              ],
+                              "required": ["type", "threshold"],
                               "additionalProperties": false
                             },
                             {
@@ -5328,10 +4754,7 @@
                                   "minimum": 0
                                 }
                               },
-                              "required": [
-                                "type",
-                                "budget"
-                              ],
+                              "required": ["type", "budget"],
                               "additionalProperties": false
                             },
                             {
@@ -5361,10 +4784,7 @@
                                 },
                                 "type": {
                                   "type": "string",
-                                  "enum": [
-                                    "token-usage",
-                                    "token_usage"
-                                  ]
+                                  "enum": ["token-usage", "token_usage"]
                                 },
                                 "max_total": {
                                   "type": "number",
@@ -5379,9 +4799,7 @@
                                   "minimum": 0
                                 }
                               },
-                              "required": [
-                                "type"
-                              ],
+                              "required": ["type"],
                               "additionalProperties": false
                             },
                             {
@@ -5411,10 +4829,7 @@
                                 },
                                 "type": {
                                   "type": "string",
-                                  "enum": [
-                                    "execution-metrics",
-                                    "execution_metrics"
-                                  ]
+                                  "enum": ["execution-metrics", "execution_metrics"]
                                 },
                                 "max_tool_calls": {
                                   "type": "number",
@@ -5446,9 +4861,7 @@
                                   "minimum": 0
                                 }
                               },
-                              "required": [
-                                "type"
-                              ],
+                              "required": ["type"],
                               "additionalProperties": false
                             },
                             {
@@ -5484,10 +4897,7 @@
                                   "type": "string"
                                 }
                               },
-                              "required": [
-                                "type",
-                                "value"
-                              ],
+                              "required": ["type", "value"],
                               "additionalProperties": false
                             },
                             {
@@ -5523,10 +4933,7 @@
                                   "type": "string"
                                 }
                               },
-                              "required": [
-                                "type",
-                                "value"
-                              ],
+                              "required": ["type", "value"],
                               "additionalProperties": false
                             },
                             {
@@ -5556,15 +4963,10 @@
                                 },
                                 "type": {
                                   "type": "string",
-                                  "enum": [
-                                    "is-json",
-                                    "is_json"
-                                  ]
+                                  "enum": ["is-json", "is_json"]
                                 }
                               },
-                              "required": [
-                                "type"
-                              ],
+                              "required": ["type"],
                               "additionalProperties": false
                             },
                             {
@@ -5600,10 +5002,7 @@
                                   "type": "string"
                                 }
                               },
-                              "required": [
-                                "type",
-                                "value"
-                              ],
+                              "required": ["type", "value"],
                               "additionalProperties": false
                             },
                             {
@@ -5684,10 +5083,7 @@
                                               "minLength": 1
                                             }
                                           },
-                                          "required": [
-                                            "score_range",
-                                            "outcome"
-                                          ],
+                                          "required": ["score_range", "outcome"],
                                           "additionalProperties": false
                                         }
                                       }
@@ -5697,10 +5093,7 @@
                                   "minItems": 1
                                 }
                               },
-                              "required": [
-                                "type",
-                                "criteria"
-                              ],
+                              "required": ["type", "criteria"],
                               "additionalProperties": false
                             }
                           ]
@@ -5737,10 +5130,7 @@
                                 },
                                 "type": {
                                   "type": "string",
-                                  "enum": [
-                                    "code-judge",
-                                    "code_judge"
-                                  ]
+                                  "enum": ["code-judge", "code_judge"]
                                 },
                                 "command": {
                                   "anyOf": [
@@ -5792,10 +5182,7 @@
                                   "additionalProperties": {}
                                 }
                               },
-                              "required": [
-                                "type",
-                                "command"
-                              ],
+                              "required": ["type", "command"],
                               "additionalProperties": false
                             },
                             {
@@ -5825,10 +5212,7 @@
                                 },
                                 "type": {
                                   "type": "string",
-                                  "enum": [
-                                    "llm-judge",
-                                    "llm_judge"
-                                  ]
+                                  "enum": ["llm-judge", "llm_judge"]
                                 },
                                 "prompt": {
                                   "anyOf": [
@@ -5922,10 +5306,7 @@
                                               "minLength": 1
                                             }
                                           },
-                                          "required": [
-                                            "score_range",
-                                            "outcome"
-                                          ],
+                                          "required": ["score_range", "outcome"],
                                           "additionalProperties": false
                                         }
                                       }
@@ -5954,9 +5335,7 @@
                                   "maximum": 2
                                 }
                               },
-                              "required": [
-                                "type"
-                              ],
+                              "required": ["type"],
                               "additionalProperties": false
                             },
                             {
@@ -6016,9 +5395,7 @@
                                           }
                                         }
                                       },
-                                      "required": [
-                                        "type"
-                                      ],
+                                      "required": ["type"],
                                       "additionalProperties": false
                                     },
                                     {
@@ -6034,10 +5411,7 @@
                                           "maximum": 1
                                         }
                                       },
-                                      "required": [
-                                        "type",
-                                        "threshold"
-                                      ],
+                                      "required": ["type", "threshold"],
                                       "additionalProperties": false
                                     },
                                     {
@@ -6054,10 +5428,7 @@
                                           "type": "string"
                                         }
                                       },
-                                      "required": [
-                                        "type",
-                                        "path"
-                                      ],
+                                      "required": ["type", "path"],
                                       "additionalProperties": false
                                     },
                                     {
@@ -6074,18 +5445,13 @@
                                           "type": "string"
                                         }
                                       },
-                                      "required": [
-                                        "type"
-                                      ],
+                                      "required": ["type"],
                                       "additionalProperties": false
                                     }
                                   ]
                                 }
                               },
-                              "required": [
-                                "type",
-                                "aggregator"
-                              ],
+                              "required": ["type", "aggregator"],
                               "additionalProperties": false
                             },
                             {
@@ -6115,20 +5481,11 @@
                                 },
                                 "type": {
                                   "type": "string",
-                                  "enum": [
-                                    "tool-trajectory",
-                                    "tool_trajectory"
-                                  ]
+                                  "enum": ["tool-trajectory", "tool_trajectory"]
                                 },
                                 "mode": {
                                   "type": "string",
-                                  "enum": [
-                                    "any_order",
-                                    "in_order",
-                                    "exact",
-                                    "subset",
-                                    "superset"
-                                  ]
+                                  "enum": ["any_order", "in_order", "exact", "subset", "superset"]
                                 },
                                 "minimums": {
                                   "type": "object",
@@ -6169,12 +5526,7 @@
                                         "anyOf": [
                                           {
                                             "type": "string",
-                                            "enum": [
-                                              "exact",
-                                              "ignore",
-                                              "subset",
-                                              "superset"
-                                            ]
+                                            "enum": ["exact", "ignore", "subset", "superset"]
                                           },
                                           {
                                             "type": "array",
@@ -6188,12 +5540,7 @@
                                         "anyOf": [
                                           {
                                             "type": "string",
-                                            "enum": [
-                                              "exact",
-                                              "ignore",
-                                              "subset",
-                                              "superset"
-                                            ]
+                                            "enum": ["exact", "ignore", "subset", "superset"]
                                           },
                                           {
                                             "type": "array",
@@ -6204,9 +5551,7 @@
                                         ]
                                       }
                                     },
-                                    "required": [
-                                      "tool"
-                                    ],
+                                    "required": ["tool"],
                                     "additionalProperties": false
                                   }
                                 },
@@ -6214,12 +5559,7 @@
                                   "anyOf": [
                                     {
                                       "type": "string",
-                                      "enum": [
-                                        "exact",
-                                        "ignore",
-                                        "subset",
-                                        "superset"
-                                      ]
+                                      "enum": ["exact", "ignore", "subset", "superset"]
                                     },
                                     {
                                       "type": "array",
@@ -6233,12 +5573,7 @@
                                   "anyOf": [
                                     {
                                       "type": "string",
-                                      "enum": [
-                                        "exact",
-                                        "ignore",
-                                        "subset",
-                                        "superset"
-                                      ]
+                                      "enum": ["exact", "ignore", "subset", "superset"]
                                     },
                                     {
                                       "type": "array",
@@ -6249,10 +5584,7 @@
                                   ]
                                 }
                               },
-                              "required": [
-                                "type",
-                                "mode"
-                              ],
+                              "required": ["type", "mode"],
                               "additionalProperties": false
                             },
                             {
@@ -6282,10 +5614,7 @@
                                 },
                                 "type": {
                                   "type": "string",
-                                  "enum": [
-                                    "field-accuracy",
-                                    "field_accuracy"
-                                  ]
+                                  "enum": ["field-accuracy", "field_accuracy"]
                                 },
                                 "fields": {
                                   "type": "array",
@@ -6297,11 +5626,7 @@
                                       },
                                       "match": {
                                         "type": "string",
-                                        "enum": [
-                                          "exact",
-                                          "numeric_tolerance",
-                                          "date"
-                                        ]
+                                        "enum": ["exact", "numeric_tolerance", "date"]
                                       },
                                       "required": {
                                         "type": "boolean"
@@ -6323,26 +5648,17 @@
                                         }
                                       }
                                     },
-                                    "required": [
-                                      "path",
-                                      "match"
-                                    ],
+                                    "required": ["path", "match"],
                                     "additionalProperties": false
                                   },
                                   "minItems": 1
                                 },
                                 "aggregation": {
                                   "type": "string",
-                                  "enum": [
-                                    "weighted_average",
-                                    "all_or_nothing"
-                                  ]
+                                  "enum": ["weighted_average", "all_or_nothing"]
                                 }
                               },
-                              "required": [
-                                "type",
-                                "fields"
-                              ],
+                              "required": ["type", "fields"],
                               "additionalProperties": false
                             },
                             {
@@ -6379,10 +5695,7 @@
                                   "minimum": 0
                                 }
                               },
-                              "required": [
-                                "type",
-                                "threshold"
-                              ],
+                              "required": ["type", "threshold"],
                               "additionalProperties": false
                             },
                             {
@@ -6419,10 +5732,7 @@
                                   "minimum": 0
                                 }
                               },
-                              "required": [
-                                "type",
-                                "budget"
-                              ],
+                              "required": ["type", "budget"],
                               "additionalProperties": false
                             },
                             {
@@ -6452,10 +5762,7 @@
                                 },
                                 "type": {
                                   "type": "string",
-                                  "enum": [
-                                    "token-usage",
-                                    "token_usage"
-                                  ]
+                                  "enum": ["token-usage", "token_usage"]
                                 },
                                 "max_total": {
                                   "type": "number",
@@ -6470,9 +5777,7 @@
                                   "minimum": 0
                                 }
                               },
-                              "required": [
-                                "type"
-                              ],
+                              "required": ["type"],
                               "additionalProperties": false
                             },
                             {
@@ -6502,10 +5807,7 @@
                                 },
                                 "type": {
                                   "type": "string",
-                                  "enum": [
-                                    "execution-metrics",
-                                    "execution_metrics"
-                                  ]
+                                  "enum": ["execution-metrics", "execution_metrics"]
                                 },
                                 "max_tool_calls": {
                                   "type": "number",
@@ -6537,9 +5839,7 @@
                                   "minimum": 0
                                 }
                               },
-                              "required": [
-                                "type"
-                              ],
+                              "required": ["type"],
                               "additionalProperties": false
                             },
                             {
@@ -6575,10 +5875,7 @@
                                   "type": "string"
                                 }
                               },
-                              "required": [
-                                "type",
-                                "value"
-                              ],
+                              "required": ["type", "value"],
                               "additionalProperties": false
                             },
                             {
@@ -6614,10 +5911,7 @@
                                   "type": "string"
                                 }
                               },
-                              "required": [
-                                "type",
-                                "value"
-                              ],
+                              "required": ["type", "value"],
                               "additionalProperties": false
                             },
                             {
@@ -6647,15 +5941,10 @@
                                 },
                                 "type": {
                                   "type": "string",
-                                  "enum": [
-                                    "is-json",
-                                    "is_json"
-                                  ]
+                                  "enum": ["is-json", "is_json"]
                                 }
                               },
-                              "required": [
-                                "type"
-                              ],
+                              "required": ["type"],
                               "additionalProperties": false
                             },
                             {
@@ -6691,10 +5980,7 @@
                                   "type": "string"
                                 }
                               },
-                              "required": [
-                                "type",
-                                "value"
-                              ],
+                              "required": ["type", "value"],
                               "additionalProperties": false
                             },
                             {
@@ -6775,10 +6061,7 @@
                                               "minLength": 1
                                             }
                                           },
-                                          "required": [
-                                            "score_range",
-                                            "outcome"
-                                          ],
+                                          "required": ["score_range", "outcome"],
                                           "additionalProperties": false
                                         }
                                       }
@@ -6788,10 +6071,7 @@
                                   "minItems": 1
                                 }
                               },
-                              "required": [
-                                "type",
-                                "criteria"
-                              ],
+                              "required": ["type", "criteria"],
                               "additionalProperties": false
                             }
                           ]
@@ -6812,11 +6092,7 @@
                           },
                           "strategy": {
                             "type": "string",
-                            "enum": [
-                              "pass_at_k",
-                              "mean",
-                              "confidence_interval"
-                            ]
+                            "enum": ["pass_at_k", "mean", "confidence_interval"]
                           },
                           "cost_limit_usd": {
                             "type": "number",
@@ -6827,9 +6103,7 @@
                             "minimum": 0
                           }
                         },
-                        "required": [
-                          "count"
-                        ],
+                        "required": ["count"],
                         "additionalProperties": false
                       },
                       "total_budget_usd": {
@@ -6857,10 +6131,7 @@
                       },
                       "isolation": {
                         "type": "string",
-                        "enum": [
-                          "shared",
-                          "per_test"
-                        ]
+                        "enum": ["shared", "per_test"]
                       },
                       "repos": {
                         "type": "array",
@@ -6884,10 +6155,7 @@
                                       "format": "uri"
                                     }
                                   },
-                                  "required": [
-                                    "type",
-                                    "url"
-                                  ],
+                                  "required": ["type", "url"],
                                   "additionalProperties": false
                                 },
                                 {
@@ -6901,10 +6169,7 @@
                                       "type": "string"
                                     }
                                   },
-                                  "required": [
-                                    "type",
-                                    "path"
-                                  ],
+                                  "required": ["type", "path"],
                                   "additionalProperties": false
                                 }
                               ]
@@ -6917,10 +6182,7 @@
                                 },
                                 "resolve": {
                                   "type": "string",
-                                  "enum": [
-                                    "remote",
-                                    "local"
-                                  ]
+                                  "enum": ["remote", "local"]
                                 },
                                 "ancestor": {
                                   "type": "integer",
@@ -6949,10 +6211,7 @@
                               "additionalProperties": false
                             }
                           },
-                          "required": [
-                            "path",
-                            "source"
-                          ],
+                          "required": ["path", "source"],
                           "additionalProperties": false
                         }
                       },
@@ -6988,11 +6247,7 @@
                               },
                               "reset": {
                                 "type": "string",
-                                "enum": [
-                                  "none",
-                                  "fast",
-                                  "strict"
-                                ]
+                                "enum": ["none", "fast", "strict"]
                               }
                             },
                             "additionalProperties": false
@@ -7023,11 +6278,7 @@
                               },
                               "reset": {
                                 "type": "string",
-                                "enum": [
-                                  "none",
-                                  "fast",
-                                  "strict"
-                                ]
+                                "enum": ["none", "fast", "strict"]
                               }
                             },
                             "additionalProperties": false
@@ -7058,11 +6309,7 @@
                               },
                               "reset": {
                                 "type": "string",
-                                "enum": [
-                                  "none",
-                                  "fast",
-                                  "strict"
-                                ]
+                                "enum": ["none", "fast", "strict"]
                               }
                             },
                             "additionalProperties": false
@@ -7093,11 +6340,7 @@
                               },
                               "reset": {
                                 "type": "string",
-                                "enum": [
-                                  "none",
-                                  "fast",
-                                  "strict"
-                                ]
+                                "enum": ["none", "fast", "strict"]
                               }
                             },
                             "additionalProperties": false
@@ -7107,11 +6350,7 @@
                       },
                       "mode": {
                         "type": "string",
-                        "enum": [
-                          "pooled",
-                          "temp",
-                          "static"
-                        ]
+                        "enum": ["pooled", "temp", "static"]
                       },
                       "path": {
                         "type": "string"
@@ -7133,9 +6372,7 @@
                     "type": "string"
                   }
                 },
-                "required": [
-                  "id"
-                ],
+                "required": ["id"],
                 "additionalProperties": false
               }
             },
@@ -7173,12 +6410,7 @@
                           "properties": {
                             "role": {
                               "type": "string",
-                              "enum": [
-                                "system",
-                                "user",
-                                "assistant",
-                                "tool"
-                              ]
+                              "enum": ["system", "user", "assistant", "tool"]
                             },
                             "content": {
                               "anyOf": [
@@ -7192,29 +6424,20 @@
                                     "properties": {
                                       "type": {
                                         "type": "string",
-                                        "enum": [
-                                          "text",
-                                          "file"
-                                        ]
+                                        "enum": ["text", "file"]
                                       },
                                       "value": {
                                         "type": "string"
                                       }
                                     },
-                                    "required": [
-                                      "type",
-                                      "value"
-                                    ],
+                                    "required": ["type", "value"],
                                     "additionalProperties": false
                                   }
                                 }
                               ]
                             }
                           },
-                          "required": [
-                            "role",
-                            "content"
-                          ],
+                          "required": ["role", "content"],
                           "additionalProperties": false
                         }
                       }
@@ -7236,12 +6459,7 @@
                           "properties": {
                             "role": {
                               "type": "string",
-                              "enum": [
-                                "system",
-                                "user",
-                                "assistant",
-                                "tool"
-                              ]
+                              "enum": ["system", "user", "assistant", "tool"]
                             },
                             "content": {
                               "anyOf": [
@@ -7255,29 +6473,20 @@
                                     "properties": {
                                       "type": {
                                         "type": "string",
-                                        "enum": [
-                                          "text",
-                                          "file"
-                                        ]
+                                        "enum": ["text", "file"]
                                       },
                                       "value": {
                                         "type": "string"
                                       }
                                     },
-                                    "required": [
-                                      "type",
-                                      "value"
-                                    ],
+                                    "required": ["type", "value"],
                                     "additionalProperties": false
                                   }
                                 }
                               ]
                             }
                           },
-                          "required": [
-                            "role",
-                            "content"
-                          ],
+                          "required": ["role", "content"],
                           "additionalProperties": false
                         }
                       }
@@ -7314,10 +6523,7 @@
                             },
                             "type": {
                               "type": "string",
-                              "enum": [
-                                "code-judge",
-                                "code_judge"
-                              ]
+                              "enum": ["code-judge", "code_judge"]
                             },
                             "command": {
                               "anyOf": [
@@ -7369,10 +6575,7 @@
                               "additionalProperties": {}
                             }
                           },
-                          "required": [
-                            "type",
-                            "command"
-                          ],
+                          "required": ["type", "command"],
                           "additionalProperties": false
                         },
                         {
@@ -7402,10 +6605,7 @@
                             },
                             "type": {
                               "type": "string",
-                              "enum": [
-                                "llm-judge",
-                                "llm_judge"
-                              ]
+                              "enum": ["llm-judge", "llm_judge"]
                             },
                             "prompt": {
                               "anyOf": [
@@ -7499,10 +6699,7 @@
                                           "minLength": 1
                                         }
                                       },
-                                      "required": [
-                                        "score_range",
-                                        "outcome"
-                                      ],
+                                      "required": ["score_range", "outcome"],
                                       "additionalProperties": false
                                     }
                                   }
@@ -7531,9 +6728,7 @@
                               "maximum": 2
                             }
                           },
-                          "required": [
-                            "type"
-                          ],
+                          "required": ["type"],
                           "additionalProperties": false
                         },
                         {
@@ -7593,9 +6788,7 @@
                                       }
                                     }
                                   },
-                                  "required": [
-                                    "type"
-                                  ],
+                                  "required": ["type"],
                                   "additionalProperties": false
                                 },
                                 {
@@ -7611,10 +6804,7 @@
                                       "maximum": 1
                                     }
                                   },
-                                  "required": [
-                                    "type",
-                                    "threshold"
-                                  ],
+                                  "required": ["type", "threshold"],
                                   "additionalProperties": false
                                 },
                                 {
@@ -7631,10 +6821,7 @@
                                       "type": "string"
                                     }
                                   },
-                                  "required": [
-                                    "type",
-                                    "path"
-                                  ],
+                                  "required": ["type", "path"],
                                   "additionalProperties": false
                                 },
                                 {
@@ -7651,18 +6838,13 @@
                                       "type": "string"
                                     }
                                   },
-                                  "required": [
-                                    "type"
-                                  ],
+                                  "required": ["type"],
                                   "additionalProperties": false
                                 }
                               ]
                             }
                           },
-                          "required": [
-                            "type",
-                            "aggregator"
-                          ],
+                          "required": ["type", "aggregator"],
                           "additionalProperties": false
                         },
                         {
@@ -7692,20 +6874,11 @@
                             },
                             "type": {
                               "type": "string",
-                              "enum": [
-                                "tool-trajectory",
-                                "tool_trajectory"
-                              ]
+                              "enum": ["tool-trajectory", "tool_trajectory"]
                             },
                             "mode": {
                               "type": "string",
-                              "enum": [
-                                "any_order",
-                                "in_order",
-                                "exact",
-                                "subset",
-                                "superset"
-                              ]
+                              "enum": ["any_order", "in_order", "exact", "subset", "superset"]
                             },
                             "minimums": {
                               "type": "object",
@@ -7746,12 +6919,7 @@
                                     "anyOf": [
                                       {
                                         "type": "string",
-                                        "enum": [
-                                          "exact",
-                                          "ignore",
-                                          "subset",
-                                          "superset"
-                                        ]
+                                        "enum": ["exact", "ignore", "subset", "superset"]
                                       },
                                       {
                                         "type": "array",
@@ -7765,12 +6933,7 @@
                                     "anyOf": [
                                       {
                                         "type": "string",
-                                        "enum": [
-                                          "exact",
-                                          "ignore",
-                                          "subset",
-                                          "superset"
-                                        ]
+                                        "enum": ["exact", "ignore", "subset", "superset"]
                                       },
                                       {
                                         "type": "array",
@@ -7781,9 +6944,7 @@
                                     ]
                                   }
                                 },
-                                "required": [
-                                  "tool"
-                                ],
+                                "required": ["tool"],
                                 "additionalProperties": false
                               }
                             },
@@ -7791,12 +6952,7 @@
                               "anyOf": [
                                 {
                                   "type": "string",
-                                  "enum": [
-                                    "exact",
-                                    "ignore",
-                                    "subset",
-                                    "superset"
-                                  ]
+                                  "enum": ["exact", "ignore", "subset", "superset"]
                                 },
                                 {
                                   "type": "array",
@@ -7810,12 +6966,7 @@
                               "anyOf": [
                                 {
                                   "type": "string",
-                                  "enum": [
-                                    "exact",
-                                    "ignore",
-                                    "subset",
-                                    "superset"
-                                  ]
+                                  "enum": ["exact", "ignore", "subset", "superset"]
                                 },
                                 {
                                   "type": "array",
@@ -7826,10 +6977,7 @@
                               ]
                             }
                           },
-                          "required": [
-                            "type",
-                            "mode"
-                          ],
+                          "required": ["type", "mode"],
                           "additionalProperties": false
                         },
                         {
@@ -7859,10 +7007,7 @@
                             },
                             "type": {
                               "type": "string",
-                              "enum": [
-                                "field-accuracy",
-                                "field_accuracy"
-                              ]
+                              "enum": ["field-accuracy", "field_accuracy"]
                             },
                             "fields": {
                               "type": "array",
@@ -7874,11 +7019,7 @@
                                   },
                                   "match": {
                                     "type": "string",
-                                    "enum": [
-                                      "exact",
-                                      "numeric_tolerance",
-                                      "date"
-                                    ]
+                                    "enum": ["exact", "numeric_tolerance", "date"]
                                   },
                                   "required": {
                                     "type": "boolean"
@@ -7900,26 +7041,17 @@
                                     }
                                   }
                                 },
-                                "required": [
-                                  "path",
-                                  "match"
-                                ],
+                                "required": ["path", "match"],
                                 "additionalProperties": false
                               },
                               "minItems": 1
                             },
                             "aggregation": {
                               "type": "string",
-                              "enum": [
-                                "weighted_average",
-                                "all_or_nothing"
-                              ]
+                              "enum": ["weighted_average", "all_or_nothing"]
                             }
                           },
-                          "required": [
-                            "type",
-                            "fields"
-                          ],
+                          "required": ["type", "fields"],
                           "additionalProperties": false
                         },
                         {
@@ -7956,10 +7088,7 @@
                               "minimum": 0
                             }
                           },
-                          "required": [
-                            "type",
-                            "threshold"
-                          ],
+                          "required": ["type", "threshold"],
                           "additionalProperties": false
                         },
                         {
@@ -7996,10 +7125,7 @@
                               "minimum": 0
                             }
                           },
-                          "required": [
-                            "type",
-                            "budget"
-                          ],
+                          "required": ["type", "budget"],
                           "additionalProperties": false
                         },
                         {
@@ -8029,10 +7155,7 @@
                             },
                             "type": {
                               "type": "string",
-                              "enum": [
-                                "token-usage",
-                                "token_usage"
-                              ]
+                              "enum": ["token-usage", "token_usage"]
                             },
                             "max_total": {
                               "type": "number",
@@ -8047,9 +7170,7 @@
                               "minimum": 0
                             }
                           },
-                          "required": [
-                            "type"
-                          ],
+                          "required": ["type"],
                           "additionalProperties": false
                         },
                         {
@@ -8079,10 +7200,7 @@
                             },
                             "type": {
                               "type": "string",
-                              "enum": [
-                                "execution-metrics",
-                                "execution_metrics"
-                              ]
+                              "enum": ["execution-metrics", "execution_metrics"]
                             },
                             "max_tool_calls": {
                               "type": "number",
@@ -8114,9 +7232,7 @@
                               "minimum": 0
                             }
                           },
-                          "required": [
-                            "type"
-                          ],
+                          "required": ["type"],
                           "additionalProperties": false
                         },
                         {
@@ -8152,10 +7268,7 @@
                               "type": "string"
                             }
                           },
-                          "required": [
-                            "type",
-                            "value"
-                          ],
+                          "required": ["type", "value"],
                           "additionalProperties": false
                         },
                         {
@@ -8191,10 +7304,7 @@
                               "type": "string"
                             }
                           },
-                          "required": [
-                            "type",
-                            "value"
-                          ],
+                          "required": ["type", "value"],
                           "additionalProperties": false
                         },
                         {
@@ -8224,15 +7334,10 @@
                             },
                             "type": {
                               "type": "string",
-                              "enum": [
-                                "is-json",
-                                "is_json"
-                              ]
+                              "enum": ["is-json", "is_json"]
                             }
                           },
-                          "required": [
-                            "type"
-                          ],
+                          "required": ["type"],
                           "additionalProperties": false
                         },
                         {
@@ -8268,10 +7373,7 @@
                               "type": "string"
                             }
                           },
-                          "required": [
-                            "type",
-                            "value"
-                          ],
+                          "required": ["type", "value"],
                           "additionalProperties": false
                         },
                         {
@@ -8352,10 +7454,7 @@
                                           "minLength": 1
                                         }
                                       },
-                                      "required": [
-                                        "score_range",
-                                        "outcome"
-                                      ],
+                                      "required": ["score_range", "outcome"],
                                       "additionalProperties": false
                                     }
                                   }
@@ -8365,10 +7464,7 @@
                               "minItems": 1
                             }
                           },
-                          "required": [
-                            "type",
-                            "criteria"
-                          ],
+                          "required": ["type", "criteria"],
                           "additionalProperties": false
                         }
                       ]
@@ -8405,10 +7501,7 @@
                             },
                             "type": {
                               "type": "string",
-                              "enum": [
-                                "code-judge",
-                                "code_judge"
-                              ]
+                              "enum": ["code-judge", "code_judge"]
                             },
                             "command": {
                               "anyOf": [
@@ -8460,10 +7553,7 @@
                               "additionalProperties": {}
                             }
                           },
-                          "required": [
-                            "type",
-                            "command"
-                          ],
+                          "required": ["type", "command"],
                           "additionalProperties": false
                         },
                         {
@@ -8493,10 +7583,7 @@
                             },
                             "type": {
                               "type": "string",
-                              "enum": [
-                                "llm-judge",
-                                "llm_judge"
-                              ]
+                              "enum": ["llm-judge", "llm_judge"]
                             },
                             "prompt": {
                               "anyOf": [
@@ -8590,10 +7677,7 @@
                                           "minLength": 1
                                         }
                                       },
-                                      "required": [
-                                        "score_range",
-                                        "outcome"
-                                      ],
+                                      "required": ["score_range", "outcome"],
                                       "additionalProperties": false
                                     }
                                   }
@@ -8622,9 +7706,7 @@
                               "maximum": 2
                             }
                           },
-                          "required": [
-                            "type"
-                          ],
+                          "required": ["type"],
                           "additionalProperties": false
                         },
                         {
@@ -8684,9 +7766,7 @@
                                       }
                                     }
                                   },
-                                  "required": [
-                                    "type"
-                                  ],
+                                  "required": ["type"],
                                   "additionalProperties": false
                                 },
                                 {
@@ -8702,10 +7782,7 @@
                                       "maximum": 1
                                     }
                                   },
-                                  "required": [
-                                    "type",
-                                    "threshold"
-                                  ],
+                                  "required": ["type", "threshold"],
                                   "additionalProperties": false
                                 },
                                 {
@@ -8722,10 +7799,7 @@
                                       "type": "string"
                                     }
                                   },
-                                  "required": [
-                                    "type",
-                                    "path"
-                                  ],
+                                  "required": ["type", "path"],
                                   "additionalProperties": false
                                 },
                                 {
@@ -8742,18 +7816,13 @@
                                       "type": "string"
                                     }
                                   },
-                                  "required": [
-                                    "type"
-                                  ],
+                                  "required": ["type"],
                                   "additionalProperties": false
                                 }
                               ]
                             }
                           },
-                          "required": [
-                            "type",
-                            "aggregator"
-                          ],
+                          "required": ["type", "aggregator"],
                           "additionalProperties": false
                         },
                         {
@@ -8783,20 +7852,11 @@
                             },
                             "type": {
                               "type": "string",
-                              "enum": [
-                                "tool-trajectory",
-                                "tool_trajectory"
-                              ]
+                              "enum": ["tool-trajectory", "tool_trajectory"]
                             },
                             "mode": {
                               "type": "string",
-                              "enum": [
-                                "any_order",
-                                "in_order",
-                                "exact",
-                                "subset",
-                                "superset"
-                              ]
+                              "enum": ["any_order", "in_order", "exact", "subset", "superset"]
                             },
                             "minimums": {
                               "type": "object",
@@ -8837,12 +7897,7 @@
                                     "anyOf": [
                                       {
                                         "type": "string",
-                                        "enum": [
-                                          "exact",
-                                          "ignore",
-                                          "subset",
-                                          "superset"
-                                        ]
+                                        "enum": ["exact", "ignore", "subset", "superset"]
                                       },
                                       {
                                         "type": "array",
@@ -8856,12 +7911,7 @@
                                     "anyOf": [
                                       {
                                         "type": "string",
-                                        "enum": [
-                                          "exact",
-                                          "ignore",
-                                          "subset",
-                                          "superset"
-                                        ]
+                                        "enum": ["exact", "ignore", "subset", "superset"]
                                       },
                                       {
                                         "type": "array",
@@ -8872,9 +7922,7 @@
                                     ]
                                   }
                                 },
-                                "required": [
-                                  "tool"
-                                ],
+                                "required": ["tool"],
                                 "additionalProperties": false
                               }
                             },
@@ -8882,12 +7930,7 @@
                               "anyOf": [
                                 {
                                   "type": "string",
-                                  "enum": [
-                                    "exact",
-                                    "ignore",
-                                    "subset",
-                                    "superset"
-                                  ]
+                                  "enum": ["exact", "ignore", "subset", "superset"]
                                 },
                                 {
                                   "type": "array",
@@ -8901,12 +7944,7 @@
                               "anyOf": [
                                 {
                                   "type": "string",
-                                  "enum": [
-                                    "exact",
-                                    "ignore",
-                                    "subset",
-                                    "superset"
-                                  ]
+                                  "enum": ["exact", "ignore", "subset", "superset"]
                                 },
                                 {
                                   "type": "array",
@@ -8917,10 +7955,7 @@
                               ]
                             }
                           },
-                          "required": [
-                            "type",
-                            "mode"
-                          ],
+                          "required": ["type", "mode"],
                           "additionalProperties": false
                         },
                         {
@@ -8950,10 +7985,7 @@
                             },
                             "type": {
                               "type": "string",
-                              "enum": [
-                                "field-accuracy",
-                                "field_accuracy"
-                              ]
+                              "enum": ["field-accuracy", "field_accuracy"]
                             },
                             "fields": {
                               "type": "array",
@@ -8965,11 +7997,7 @@
                                   },
                                   "match": {
                                     "type": "string",
-                                    "enum": [
-                                      "exact",
-                                      "numeric_tolerance",
-                                      "date"
-                                    ]
+                                    "enum": ["exact", "numeric_tolerance", "date"]
                                   },
                                   "required": {
                                     "type": "boolean"
@@ -8991,26 +8019,17 @@
                                     }
                                   }
                                 },
-                                "required": [
-                                  "path",
-                                  "match"
-                                ],
+                                "required": ["path", "match"],
                                 "additionalProperties": false
                               },
                               "minItems": 1
                             },
                             "aggregation": {
                               "type": "string",
-                              "enum": [
-                                "weighted_average",
-                                "all_or_nothing"
-                              ]
+                              "enum": ["weighted_average", "all_or_nothing"]
                             }
                           },
-                          "required": [
-                            "type",
-                            "fields"
-                          ],
+                          "required": ["type", "fields"],
                           "additionalProperties": false
                         },
                         {
@@ -9047,10 +8066,7 @@
                               "minimum": 0
                             }
                           },
-                          "required": [
-                            "type",
-                            "threshold"
-                          ],
+                          "required": ["type", "threshold"],
                           "additionalProperties": false
                         },
                         {
@@ -9087,10 +8103,7 @@
                               "minimum": 0
                             }
                           },
-                          "required": [
-                            "type",
-                            "budget"
-                          ],
+                          "required": ["type", "budget"],
                           "additionalProperties": false
                         },
                         {
@@ -9120,10 +8133,7 @@
                             },
                             "type": {
                               "type": "string",
-                              "enum": [
-                                "token-usage",
-                                "token_usage"
-                              ]
+                              "enum": ["token-usage", "token_usage"]
                             },
                             "max_total": {
                               "type": "number",
@@ -9138,9 +8148,7 @@
                               "minimum": 0
                             }
                           },
-                          "required": [
-                            "type"
-                          ],
+                          "required": ["type"],
                           "additionalProperties": false
                         },
                         {
@@ -9170,10 +8178,7 @@
                             },
                             "type": {
                               "type": "string",
-                              "enum": [
-                                "execution-metrics",
-                                "execution_metrics"
-                              ]
+                              "enum": ["execution-metrics", "execution_metrics"]
                             },
                             "max_tool_calls": {
                               "type": "number",
@@ -9205,9 +8210,7 @@
                               "minimum": 0
                             }
                           },
-                          "required": [
-                            "type"
-                          ],
+                          "required": ["type"],
                           "additionalProperties": false
                         },
                         {
@@ -9243,10 +8246,7 @@
                               "type": "string"
                             }
                           },
-                          "required": [
-                            "type",
-                            "value"
-                          ],
+                          "required": ["type", "value"],
                           "additionalProperties": false
                         },
                         {
@@ -9282,10 +8282,7 @@
                               "type": "string"
                             }
                           },
-                          "required": [
-                            "type",
-                            "value"
-                          ],
+                          "required": ["type", "value"],
                           "additionalProperties": false
                         },
                         {
@@ -9315,15 +8312,10 @@
                             },
                             "type": {
                               "type": "string",
-                              "enum": [
-                                "is-json",
-                                "is_json"
-                              ]
+                              "enum": ["is-json", "is_json"]
                             }
                           },
-                          "required": [
-                            "type"
-                          ],
+                          "required": ["type"],
                           "additionalProperties": false
                         },
                         {
@@ -9359,10 +8351,7 @@
                               "type": "string"
                             }
                           },
-                          "required": [
-                            "type",
-                            "value"
-                          ],
+                          "required": ["type", "value"],
                           "additionalProperties": false
                         },
                         {
@@ -9443,10 +8432,7 @@
                                           "minLength": 1
                                         }
                                       },
-                                      "required": [
-                                        "score_range",
-                                        "outcome"
-                                      ],
+                                      "required": ["score_range", "outcome"],
                                       "additionalProperties": false
                                     }
                                   }
@@ -9456,10 +8442,7 @@
                               "minItems": 1
                             }
                           },
-                          "required": [
-                            "type",
-                            "criteria"
-                          ],
+                          "required": ["type", "criteria"],
                           "additionalProperties": false
                         }
                       ]
@@ -9496,10 +8479,7 @@
                             },
                             "type": {
                               "type": "string",
-                              "enum": [
-                                "code-judge",
-                                "code_judge"
-                              ]
+                              "enum": ["code-judge", "code_judge"]
                             },
                             "command": {
                               "anyOf": [
@@ -9551,10 +8531,7 @@
                               "additionalProperties": {}
                             }
                           },
-                          "required": [
-                            "type",
-                            "command"
-                          ],
+                          "required": ["type", "command"],
                           "additionalProperties": false
                         },
                         {
@@ -9584,10 +8561,7 @@
                             },
                             "type": {
                               "type": "string",
-                              "enum": [
-                                "llm-judge",
-                                "llm_judge"
-                              ]
+                              "enum": ["llm-judge", "llm_judge"]
                             },
                             "prompt": {
                               "anyOf": [
@@ -9681,10 +8655,7 @@
                                           "minLength": 1
                                         }
                                       },
-                                      "required": [
-                                        "score_range",
-                                        "outcome"
-                                      ],
+                                      "required": ["score_range", "outcome"],
                                       "additionalProperties": false
                                     }
                                   }
@@ -9713,9 +8684,7 @@
                               "maximum": 2
                             }
                           },
-                          "required": [
-                            "type"
-                          ],
+                          "required": ["type"],
                           "additionalProperties": false
                         },
                         {
@@ -9775,9 +8744,7 @@
                                       }
                                     }
                                   },
-                                  "required": [
-                                    "type"
-                                  ],
+                                  "required": ["type"],
                                   "additionalProperties": false
                                 },
                                 {
@@ -9793,10 +8760,7 @@
                                       "maximum": 1
                                     }
                                   },
-                                  "required": [
-                                    "type",
-                                    "threshold"
-                                  ],
+                                  "required": ["type", "threshold"],
                                   "additionalProperties": false
                                 },
                                 {
@@ -9813,10 +8777,7 @@
                                       "type": "string"
                                     }
                                   },
-                                  "required": [
-                                    "type",
-                                    "path"
-                                  ],
+                                  "required": ["type", "path"],
                                   "additionalProperties": false
                                 },
                                 {
@@ -9833,18 +8794,13 @@
                                       "type": "string"
                                     }
                                   },
-                                  "required": [
-                                    "type"
-                                  ],
+                                  "required": ["type"],
                                   "additionalProperties": false
                                 }
                               ]
                             }
                           },
-                          "required": [
-                            "type",
-                            "aggregator"
-                          ],
+                          "required": ["type", "aggregator"],
                           "additionalProperties": false
                         },
                         {
@@ -9874,20 +8830,11 @@
                             },
                             "type": {
                               "type": "string",
-                              "enum": [
-                                "tool-trajectory",
-                                "tool_trajectory"
-                              ]
+                              "enum": ["tool-trajectory", "tool_trajectory"]
                             },
                             "mode": {
                               "type": "string",
-                              "enum": [
-                                "any_order",
-                                "in_order",
-                                "exact",
-                                "subset",
-                                "superset"
-                              ]
+                              "enum": ["any_order", "in_order", "exact", "subset", "superset"]
                             },
                             "minimums": {
                               "type": "object",
@@ -9928,12 +8875,7 @@
                                     "anyOf": [
                                       {
                                         "type": "string",
-                                        "enum": [
-                                          "exact",
-                                          "ignore",
-                                          "subset",
-                                          "superset"
-                                        ]
+                                        "enum": ["exact", "ignore", "subset", "superset"]
                                       },
                                       {
                                         "type": "array",
@@ -9947,12 +8889,7 @@
                                     "anyOf": [
                                       {
                                         "type": "string",
-                                        "enum": [
-                                          "exact",
-                                          "ignore",
-                                          "subset",
-                                          "superset"
-                                        ]
+                                        "enum": ["exact", "ignore", "subset", "superset"]
                                       },
                                       {
                                         "type": "array",
@@ -9963,9 +8900,7 @@
                                     ]
                                   }
                                 },
-                                "required": [
-                                  "tool"
-                                ],
+                                "required": ["tool"],
                                 "additionalProperties": false
                               }
                             },
@@ -9973,12 +8908,7 @@
                               "anyOf": [
                                 {
                                   "type": "string",
-                                  "enum": [
-                                    "exact",
-                                    "ignore",
-                                    "subset",
-                                    "superset"
-                                  ]
+                                  "enum": ["exact", "ignore", "subset", "superset"]
                                 },
                                 {
                                   "type": "array",
@@ -9992,12 +8922,7 @@
                               "anyOf": [
                                 {
                                   "type": "string",
-                                  "enum": [
-                                    "exact",
-                                    "ignore",
-                                    "subset",
-                                    "superset"
-                                  ]
+                                  "enum": ["exact", "ignore", "subset", "superset"]
                                 },
                                 {
                                   "type": "array",
@@ -10008,10 +8933,7 @@
                               ]
                             }
                           },
-                          "required": [
-                            "type",
-                            "mode"
-                          ],
+                          "required": ["type", "mode"],
                           "additionalProperties": false
                         },
                         {
@@ -10041,10 +8963,7 @@
                             },
                             "type": {
                               "type": "string",
-                              "enum": [
-                                "field-accuracy",
-                                "field_accuracy"
-                              ]
+                              "enum": ["field-accuracy", "field_accuracy"]
                             },
                             "fields": {
                               "type": "array",
@@ -10056,11 +8975,7 @@
                                   },
                                   "match": {
                                     "type": "string",
-                                    "enum": [
-                                      "exact",
-                                      "numeric_tolerance",
-                                      "date"
-                                    ]
+                                    "enum": ["exact", "numeric_tolerance", "date"]
                                   },
                                   "required": {
                                     "type": "boolean"
@@ -10082,26 +8997,17 @@
                                     }
                                   }
                                 },
-                                "required": [
-                                  "path",
-                                  "match"
-                                ],
+                                "required": ["path", "match"],
                                 "additionalProperties": false
                               },
                               "minItems": 1
                             },
                             "aggregation": {
                               "type": "string",
-                              "enum": [
-                                "weighted_average",
-                                "all_or_nothing"
-                              ]
+                              "enum": ["weighted_average", "all_or_nothing"]
                             }
                           },
-                          "required": [
-                            "type",
-                            "fields"
-                          ],
+                          "required": ["type", "fields"],
                           "additionalProperties": false
                         },
                         {
@@ -10138,10 +9044,7 @@
                               "minimum": 0
                             }
                           },
-                          "required": [
-                            "type",
-                            "threshold"
-                          ],
+                          "required": ["type", "threshold"],
                           "additionalProperties": false
                         },
                         {
@@ -10178,10 +9081,7 @@
                               "minimum": 0
                             }
                           },
-                          "required": [
-                            "type",
-                            "budget"
-                          ],
+                          "required": ["type", "budget"],
                           "additionalProperties": false
                         },
                         {
@@ -10211,10 +9111,7 @@
                             },
                             "type": {
                               "type": "string",
-                              "enum": [
-                                "token-usage",
-                                "token_usage"
-                              ]
+                              "enum": ["token-usage", "token_usage"]
                             },
                             "max_total": {
                               "type": "number",
@@ -10229,9 +9126,7 @@
                               "minimum": 0
                             }
                           },
-                          "required": [
-                            "type"
-                          ],
+                          "required": ["type"],
                           "additionalProperties": false
                         },
                         {
@@ -10261,10 +9156,7 @@
                             },
                             "type": {
                               "type": "string",
-                              "enum": [
-                                "execution-metrics",
-                                "execution_metrics"
-                              ]
+                              "enum": ["execution-metrics", "execution_metrics"]
                             },
                             "max_tool_calls": {
                               "type": "number",
@@ -10296,9 +9188,7 @@
                               "minimum": 0
                             }
                           },
-                          "required": [
-                            "type"
-                          ],
+                          "required": ["type"],
                           "additionalProperties": false
                         },
                         {
@@ -10334,10 +9224,7 @@
                               "type": "string"
                             }
                           },
-                          "required": [
-                            "type",
-                            "value"
-                          ],
+                          "required": ["type", "value"],
                           "additionalProperties": false
                         },
                         {
@@ -10373,10 +9260,7 @@
                               "type": "string"
                             }
                           },
-                          "required": [
-                            "type",
-                            "value"
-                          ],
+                          "required": ["type", "value"],
                           "additionalProperties": false
                         },
                         {
@@ -10406,15 +9290,10 @@
                             },
                             "type": {
                               "type": "string",
-                              "enum": [
-                                "is-json",
-                                "is_json"
-                              ]
+                              "enum": ["is-json", "is_json"]
                             }
                           },
-                          "required": [
-                            "type"
-                          ],
+                          "required": ["type"],
                           "additionalProperties": false
                         },
                         {
@@ -10450,10 +9329,7 @@
                               "type": "string"
                             }
                           },
-                          "required": [
-                            "type",
-                            "value"
-                          ],
+                          "required": ["type", "value"],
                           "additionalProperties": false
                         },
                         {
@@ -10534,10 +9410,7 @@
                                           "minLength": 1
                                         }
                                       },
-                                      "required": [
-                                        "score_range",
-                                        "outcome"
-                                      ],
+                                      "required": ["score_range", "outcome"],
                                       "additionalProperties": false
                                     }
                                   }
@@ -10547,10 +9420,7 @@
                               "minItems": 1
                             }
                           },
-                          "required": [
-                            "type",
-                            "criteria"
-                          ],
+                          "required": ["type", "criteria"],
                           "additionalProperties": false
                         }
                       ]
@@ -10599,10 +9469,7 @@
                                 },
                                 "type": {
                                   "type": "string",
-                                  "enum": [
-                                    "code-judge",
-                                    "code_judge"
-                                  ]
+                                  "enum": ["code-judge", "code_judge"]
                                 },
                                 "command": {
                                   "anyOf": [
@@ -10654,10 +9521,7 @@
                                   "additionalProperties": {}
                                 }
                               },
-                              "required": [
-                                "type",
-                                "command"
-                              ],
+                              "required": ["type", "command"],
                               "additionalProperties": false
                             },
                             {
@@ -10687,10 +9551,7 @@
                                 },
                                 "type": {
                                   "type": "string",
-                                  "enum": [
-                                    "llm-judge",
-                                    "llm_judge"
-                                  ]
+                                  "enum": ["llm-judge", "llm_judge"]
                                 },
                                 "prompt": {
                                   "anyOf": [
@@ -10784,10 +9645,7 @@
                                               "minLength": 1
                                             }
                                           },
-                                          "required": [
-                                            "score_range",
-                                            "outcome"
-                                          ],
+                                          "required": ["score_range", "outcome"],
                                           "additionalProperties": false
                                         }
                                       }
@@ -10816,9 +9674,7 @@
                                   "maximum": 2
                                 }
                               },
-                              "required": [
-                                "type"
-                              ],
+                              "required": ["type"],
                               "additionalProperties": false
                             },
                             {
@@ -10878,9 +9734,7 @@
                                           }
                                         }
                                       },
-                                      "required": [
-                                        "type"
-                                      ],
+                                      "required": ["type"],
                                       "additionalProperties": false
                                     },
                                     {
@@ -10896,10 +9750,7 @@
                                           "maximum": 1
                                         }
                                       },
-                                      "required": [
-                                        "type",
-                                        "threshold"
-                                      ],
+                                      "required": ["type", "threshold"],
                                       "additionalProperties": false
                                     },
                                     {
@@ -10916,10 +9767,7 @@
                                           "type": "string"
                                         }
                                       },
-                                      "required": [
-                                        "type",
-                                        "path"
-                                      ],
+                                      "required": ["type", "path"],
                                       "additionalProperties": false
                                     },
                                     {
@@ -10936,18 +9784,13 @@
                                           "type": "string"
                                         }
                                       },
-                                      "required": [
-                                        "type"
-                                      ],
+                                      "required": ["type"],
                                       "additionalProperties": false
                                     }
                                   ]
                                 }
                               },
-                              "required": [
-                                "type",
-                                "aggregator"
-                              ],
+                              "required": ["type", "aggregator"],
                               "additionalProperties": false
                             },
                             {
@@ -10977,20 +9820,11 @@
                                 },
                                 "type": {
                                   "type": "string",
-                                  "enum": [
-                                    "tool-trajectory",
-                                    "tool_trajectory"
-                                  ]
+                                  "enum": ["tool-trajectory", "tool_trajectory"]
                                 },
                                 "mode": {
                                   "type": "string",
-                                  "enum": [
-                                    "any_order",
-                                    "in_order",
-                                    "exact",
-                                    "subset",
-                                    "superset"
-                                  ]
+                                  "enum": ["any_order", "in_order", "exact", "subset", "superset"]
                                 },
                                 "minimums": {
                                   "type": "object",
@@ -11031,12 +9865,7 @@
                                         "anyOf": [
                                           {
                                             "type": "string",
-                                            "enum": [
-                                              "exact",
-                                              "ignore",
-                                              "subset",
-                                              "superset"
-                                            ]
+                                            "enum": ["exact", "ignore", "subset", "superset"]
                                           },
                                           {
                                             "type": "array",
@@ -11050,12 +9879,7 @@
                                         "anyOf": [
                                           {
                                             "type": "string",
-                                            "enum": [
-                                              "exact",
-                                              "ignore",
-                                              "subset",
-                                              "superset"
-                                            ]
+                                            "enum": ["exact", "ignore", "subset", "superset"]
                                           },
                                           {
                                             "type": "array",
@@ -11066,9 +9890,7 @@
                                         ]
                                       }
                                     },
-                                    "required": [
-                                      "tool"
-                                    ],
+                                    "required": ["tool"],
                                     "additionalProperties": false
                                   }
                                 },
@@ -11076,12 +9898,7 @@
                                   "anyOf": [
                                     {
                                       "type": "string",
-                                      "enum": [
-                                        "exact",
-                                        "ignore",
-                                        "subset",
-                                        "superset"
-                                      ]
+                                      "enum": ["exact", "ignore", "subset", "superset"]
                                     },
                                     {
                                       "type": "array",
@@ -11095,12 +9912,7 @@
                                   "anyOf": [
                                     {
                                       "type": "string",
-                                      "enum": [
-                                        "exact",
-                                        "ignore",
-                                        "subset",
-                                        "superset"
-                                      ]
+                                      "enum": ["exact", "ignore", "subset", "superset"]
                                     },
                                     {
                                       "type": "array",
@@ -11111,10 +9923,7 @@
                                   ]
                                 }
                               },
-                              "required": [
-                                "type",
-                                "mode"
-                              ],
+                              "required": ["type", "mode"],
                               "additionalProperties": false
                             },
                             {
@@ -11144,10 +9953,7 @@
                                 },
                                 "type": {
                                   "type": "string",
-                                  "enum": [
-                                    "field-accuracy",
-                                    "field_accuracy"
-                                  ]
+                                  "enum": ["field-accuracy", "field_accuracy"]
                                 },
                                 "fields": {
                                   "type": "array",
@@ -11159,11 +9965,7 @@
                                       },
                                       "match": {
                                         "type": "string",
-                                        "enum": [
-                                          "exact",
-                                          "numeric_tolerance",
-                                          "date"
-                                        ]
+                                        "enum": ["exact", "numeric_tolerance", "date"]
                                       },
                                       "required": {
                                         "type": "boolean"
@@ -11185,26 +9987,17 @@
                                         }
                                       }
                                     },
-                                    "required": [
-                                      "path",
-                                      "match"
-                                    ],
+                                    "required": ["path", "match"],
                                     "additionalProperties": false
                                   },
                                   "minItems": 1
                                 },
                                 "aggregation": {
                                   "type": "string",
-                                  "enum": [
-                                    "weighted_average",
-                                    "all_or_nothing"
-                                  ]
+                                  "enum": ["weighted_average", "all_or_nothing"]
                                 }
                               },
-                              "required": [
-                                "type",
-                                "fields"
-                              ],
+                              "required": ["type", "fields"],
                               "additionalProperties": false
                             },
                             {
@@ -11241,10 +10034,7 @@
                                   "minimum": 0
                                 }
                               },
-                              "required": [
-                                "type",
-                                "threshold"
-                              ],
+                              "required": ["type", "threshold"],
                               "additionalProperties": false
                             },
                             {
@@ -11281,10 +10071,7 @@
                                   "minimum": 0
                                 }
                               },
-                              "required": [
-                                "type",
-                                "budget"
-                              ],
+                              "required": ["type", "budget"],
                               "additionalProperties": false
                             },
                             {
@@ -11314,10 +10101,7 @@
                                 },
                                 "type": {
                                   "type": "string",
-                                  "enum": [
-                                    "token-usage",
-                                    "token_usage"
-                                  ]
+                                  "enum": ["token-usage", "token_usage"]
                                 },
                                 "max_total": {
                                   "type": "number",
@@ -11332,9 +10116,7 @@
                                   "minimum": 0
                                 }
                               },
-                              "required": [
-                                "type"
-                              ],
+                              "required": ["type"],
                               "additionalProperties": false
                             },
                             {
@@ -11364,10 +10146,7 @@
                                 },
                                 "type": {
                                   "type": "string",
-                                  "enum": [
-                                    "execution-metrics",
-                                    "execution_metrics"
-                                  ]
+                                  "enum": ["execution-metrics", "execution_metrics"]
                                 },
                                 "max_tool_calls": {
                                   "type": "number",
@@ -11399,9 +10178,7 @@
                                   "minimum": 0
                                 }
                               },
-                              "required": [
-                                "type"
-                              ],
+                              "required": ["type"],
                               "additionalProperties": false
                             },
                             {
@@ -11437,10 +10214,7 @@
                                   "type": "string"
                                 }
                               },
-                              "required": [
-                                "type",
-                                "value"
-                              ],
+                              "required": ["type", "value"],
                               "additionalProperties": false
                             },
                             {
@@ -11476,10 +10250,7 @@
                                   "type": "string"
                                 }
                               },
-                              "required": [
-                                "type",
-                                "value"
-                              ],
+                              "required": ["type", "value"],
                               "additionalProperties": false
                             },
                             {
@@ -11509,15 +10280,10 @@
                                 },
                                 "type": {
                                   "type": "string",
-                                  "enum": [
-                                    "is-json",
-                                    "is_json"
-                                  ]
+                                  "enum": ["is-json", "is_json"]
                                 }
                               },
-                              "required": [
-                                "type"
-                              ],
+                              "required": ["type"],
                               "additionalProperties": false
                             },
                             {
@@ -11553,10 +10319,7 @@
                                   "type": "string"
                                 }
                               },
-                              "required": [
-                                "type",
-                                "value"
-                              ],
+                              "required": ["type", "value"],
                               "additionalProperties": false
                             },
                             {
@@ -11637,10 +10400,7 @@
                                               "minLength": 1
                                             }
                                           },
-                                          "required": [
-                                            "score_range",
-                                            "outcome"
-                                          ],
+                                          "required": ["score_range", "outcome"],
                                           "additionalProperties": false
                                         }
                                       }
@@ -11650,10 +10410,7 @@
                                   "minItems": 1
                                 }
                               },
-                              "required": [
-                                "type",
-                                "criteria"
-                              ],
+                              "required": ["type", "criteria"],
                               "additionalProperties": false
                             }
                           ]
@@ -11690,10 +10447,7 @@
                                 },
                                 "type": {
                                   "type": "string",
-                                  "enum": [
-                                    "code-judge",
-                                    "code_judge"
-                                  ]
+                                  "enum": ["code-judge", "code_judge"]
                                 },
                                 "command": {
                                   "anyOf": [
@@ -11745,10 +10499,7 @@
                                   "additionalProperties": {}
                                 }
                               },
-                              "required": [
-                                "type",
-                                "command"
-                              ],
+                              "required": ["type", "command"],
                               "additionalProperties": false
                             },
                             {
@@ -11778,10 +10529,7 @@
                                 },
                                 "type": {
                                   "type": "string",
-                                  "enum": [
-                                    "llm-judge",
-                                    "llm_judge"
-                                  ]
+                                  "enum": ["llm-judge", "llm_judge"]
                                 },
                                 "prompt": {
                                   "anyOf": [
@@ -11875,10 +10623,7 @@
                                               "minLength": 1
                                             }
                                           },
-                                          "required": [
-                                            "score_range",
-                                            "outcome"
-                                          ],
+                                          "required": ["score_range", "outcome"],
                                           "additionalProperties": false
                                         }
                                       }
@@ -11907,9 +10652,7 @@
                                   "maximum": 2
                                 }
                               },
-                              "required": [
-                                "type"
-                              ],
+                              "required": ["type"],
                               "additionalProperties": false
                             },
                             {
@@ -11969,9 +10712,7 @@
                                           }
                                         }
                                       },
-                                      "required": [
-                                        "type"
-                                      ],
+                                      "required": ["type"],
                                       "additionalProperties": false
                                     },
                                     {
@@ -11987,10 +10728,7 @@
                                           "maximum": 1
                                         }
                                       },
-                                      "required": [
-                                        "type",
-                                        "threshold"
-                                      ],
+                                      "required": ["type", "threshold"],
                                       "additionalProperties": false
                                     },
                                     {
@@ -12007,10 +10745,7 @@
                                           "type": "string"
                                         }
                                       },
-                                      "required": [
-                                        "type",
-                                        "path"
-                                      ],
+                                      "required": ["type", "path"],
                                       "additionalProperties": false
                                     },
                                     {
@@ -12027,18 +10762,13 @@
                                           "type": "string"
                                         }
                                       },
-                                      "required": [
-                                        "type"
-                                      ],
+                                      "required": ["type"],
                                       "additionalProperties": false
                                     }
                                   ]
                                 }
                               },
-                              "required": [
-                                "type",
-                                "aggregator"
-                              ],
+                              "required": ["type", "aggregator"],
                               "additionalProperties": false
                             },
                             {
@@ -12068,20 +10798,11 @@
                                 },
                                 "type": {
                                   "type": "string",
-                                  "enum": [
-                                    "tool-trajectory",
-                                    "tool_trajectory"
-                                  ]
+                                  "enum": ["tool-trajectory", "tool_trajectory"]
                                 },
                                 "mode": {
                                   "type": "string",
-                                  "enum": [
-                                    "any_order",
-                                    "in_order",
-                                    "exact",
-                                    "subset",
-                                    "superset"
-                                  ]
+                                  "enum": ["any_order", "in_order", "exact", "subset", "superset"]
                                 },
                                 "minimums": {
                                   "type": "object",
@@ -12122,12 +10843,7 @@
                                         "anyOf": [
                                           {
                                             "type": "string",
-                                            "enum": [
-                                              "exact",
-                                              "ignore",
-                                              "subset",
-                                              "superset"
-                                            ]
+                                            "enum": ["exact", "ignore", "subset", "superset"]
                                           },
                                           {
                                             "type": "array",
@@ -12141,12 +10857,7 @@
                                         "anyOf": [
                                           {
                                             "type": "string",
-                                            "enum": [
-                                              "exact",
-                                              "ignore",
-                                              "subset",
-                                              "superset"
-                                            ]
+                                            "enum": ["exact", "ignore", "subset", "superset"]
                                           },
                                           {
                                             "type": "array",
@@ -12157,9 +10868,7 @@
                                         ]
                                       }
                                     },
-                                    "required": [
-                                      "tool"
-                                    ],
+                                    "required": ["tool"],
                                     "additionalProperties": false
                                   }
                                 },
@@ -12167,12 +10876,7 @@
                                   "anyOf": [
                                     {
                                       "type": "string",
-                                      "enum": [
-                                        "exact",
-                                        "ignore",
-                                        "subset",
-                                        "superset"
-                                      ]
+                                      "enum": ["exact", "ignore", "subset", "superset"]
                                     },
                                     {
                                       "type": "array",
@@ -12186,12 +10890,7 @@
                                   "anyOf": [
                                     {
                                       "type": "string",
-                                      "enum": [
-                                        "exact",
-                                        "ignore",
-                                        "subset",
-                                        "superset"
-                                      ]
+                                      "enum": ["exact", "ignore", "subset", "superset"]
                                     },
                                     {
                                       "type": "array",
@@ -12202,10 +10901,7 @@
                                   ]
                                 }
                               },
-                              "required": [
-                                "type",
-                                "mode"
-                              ],
+                              "required": ["type", "mode"],
                               "additionalProperties": false
                             },
                             {
@@ -12235,10 +10931,7 @@
                                 },
                                 "type": {
                                   "type": "string",
-                                  "enum": [
-                                    "field-accuracy",
-                                    "field_accuracy"
-                                  ]
+                                  "enum": ["field-accuracy", "field_accuracy"]
                                 },
                                 "fields": {
                                   "type": "array",
@@ -12250,11 +10943,7 @@
                                       },
                                       "match": {
                                         "type": "string",
-                                        "enum": [
-                                          "exact",
-                                          "numeric_tolerance",
-                                          "date"
-                                        ]
+                                        "enum": ["exact", "numeric_tolerance", "date"]
                                       },
                                       "required": {
                                         "type": "boolean"
@@ -12276,26 +10965,17 @@
                                         }
                                       }
                                     },
-                                    "required": [
-                                      "path",
-                                      "match"
-                                    ],
+                                    "required": ["path", "match"],
                                     "additionalProperties": false
                                   },
                                   "minItems": 1
                                 },
                                 "aggregation": {
                                   "type": "string",
-                                  "enum": [
-                                    "weighted_average",
-                                    "all_or_nothing"
-                                  ]
+                                  "enum": ["weighted_average", "all_or_nothing"]
                                 }
                               },
-                              "required": [
-                                "type",
-                                "fields"
-                              ],
+                              "required": ["type", "fields"],
                               "additionalProperties": false
                             },
                             {
@@ -12332,10 +11012,7 @@
                                   "minimum": 0
                                 }
                               },
-                              "required": [
-                                "type",
-                                "threshold"
-                              ],
+                              "required": ["type", "threshold"],
                               "additionalProperties": false
                             },
                             {
@@ -12372,10 +11049,7 @@
                                   "minimum": 0
                                 }
                               },
-                              "required": [
-                                "type",
-                                "budget"
-                              ],
+                              "required": ["type", "budget"],
                               "additionalProperties": false
                             },
                             {
@@ -12405,10 +11079,7 @@
                                 },
                                 "type": {
                                   "type": "string",
-                                  "enum": [
-                                    "token-usage",
-                                    "token_usage"
-                                  ]
+                                  "enum": ["token-usage", "token_usage"]
                                 },
                                 "max_total": {
                                   "type": "number",
@@ -12423,9 +11094,7 @@
                                   "minimum": 0
                                 }
                               },
-                              "required": [
-                                "type"
-                              ],
+                              "required": ["type"],
                               "additionalProperties": false
                             },
                             {
@@ -12455,10 +11124,7 @@
                                 },
                                 "type": {
                                   "type": "string",
-                                  "enum": [
-                                    "execution-metrics",
-                                    "execution_metrics"
-                                  ]
+                                  "enum": ["execution-metrics", "execution_metrics"]
                                 },
                                 "max_tool_calls": {
                                   "type": "number",
@@ -12490,9 +11156,7 @@
                                   "minimum": 0
                                 }
                               },
-                              "required": [
-                                "type"
-                              ],
+                              "required": ["type"],
                               "additionalProperties": false
                             },
                             {
@@ -12528,10 +11192,7 @@
                                   "type": "string"
                                 }
                               },
-                              "required": [
-                                "type",
-                                "value"
-                              ],
+                              "required": ["type", "value"],
                               "additionalProperties": false
                             },
                             {
@@ -12567,10 +11228,7 @@
                                   "type": "string"
                                 }
                               },
-                              "required": [
-                                "type",
-                                "value"
-                              ],
+                              "required": ["type", "value"],
                               "additionalProperties": false
                             },
                             {
@@ -12600,15 +11258,10 @@
                                 },
                                 "type": {
                                   "type": "string",
-                                  "enum": [
-                                    "is-json",
-                                    "is_json"
-                                  ]
+                                  "enum": ["is-json", "is_json"]
                                 }
                               },
-                              "required": [
-                                "type"
-                              ],
+                              "required": ["type"],
                               "additionalProperties": false
                             },
                             {
@@ -12644,10 +11297,7 @@
                                   "type": "string"
                                 }
                               },
-                              "required": [
-                                "type",
-                                "value"
-                              ],
+                              "required": ["type", "value"],
                               "additionalProperties": false
                             },
                             {
@@ -12728,10 +11378,7 @@
                                               "minLength": 1
                                             }
                                           },
-                                          "required": [
-                                            "score_range",
-                                            "outcome"
-                                          ],
+                                          "required": ["score_range", "outcome"],
                                           "additionalProperties": false
                                         }
                                       }
@@ -12741,10 +11388,7 @@
                                   "minItems": 1
                                 }
                               },
-                              "required": [
-                                "type",
-                                "criteria"
-                              ],
+                              "required": ["type", "criteria"],
                               "additionalProperties": false
                             }
                           ]
@@ -12781,10 +11425,7 @@
                                 },
                                 "type": {
                                   "type": "string",
-                                  "enum": [
-                                    "code-judge",
-                                    "code_judge"
-                                  ]
+                                  "enum": ["code-judge", "code_judge"]
                                 },
                                 "command": {
                                   "anyOf": [
@@ -12836,10 +11477,7 @@
                                   "additionalProperties": {}
                                 }
                               },
-                              "required": [
-                                "type",
-                                "command"
-                              ],
+                              "required": ["type", "command"],
                               "additionalProperties": false
                             },
                             {
@@ -12869,10 +11507,7 @@
                                 },
                                 "type": {
                                   "type": "string",
-                                  "enum": [
-                                    "llm-judge",
-                                    "llm_judge"
-                                  ]
+                                  "enum": ["llm-judge", "llm_judge"]
                                 },
                                 "prompt": {
                                   "anyOf": [
@@ -12966,10 +11601,7 @@
                                               "minLength": 1
                                             }
                                           },
-                                          "required": [
-                                            "score_range",
-                                            "outcome"
-                                          ],
+                                          "required": ["score_range", "outcome"],
                                           "additionalProperties": false
                                         }
                                       }
@@ -12998,9 +11630,7 @@
                                   "maximum": 2
                                 }
                               },
-                              "required": [
-                                "type"
-                              ],
+                              "required": ["type"],
                               "additionalProperties": false
                             },
                             {
@@ -13060,9 +11690,7 @@
                                           }
                                         }
                                       },
-                                      "required": [
-                                        "type"
-                                      ],
+                                      "required": ["type"],
                                       "additionalProperties": false
                                     },
                                     {
@@ -13078,10 +11706,7 @@
                                           "maximum": 1
                                         }
                                       },
-                                      "required": [
-                                        "type",
-                                        "threshold"
-                                      ],
+                                      "required": ["type", "threshold"],
                                       "additionalProperties": false
                                     },
                                     {
@@ -13098,10 +11723,7 @@
                                           "type": "string"
                                         }
                                       },
-                                      "required": [
-                                        "type",
-                                        "path"
-                                      ],
+                                      "required": ["type", "path"],
                                       "additionalProperties": false
                                     },
                                     {
@@ -13118,18 +11740,13 @@
                                           "type": "string"
                                         }
                                       },
-                                      "required": [
-                                        "type"
-                                      ],
+                                      "required": ["type"],
                                       "additionalProperties": false
                                     }
                                   ]
                                 }
                               },
-                              "required": [
-                                "type",
-                                "aggregator"
-                              ],
+                              "required": ["type", "aggregator"],
                               "additionalProperties": false
                             },
                             {
@@ -13159,20 +11776,11 @@
                                 },
                                 "type": {
                                   "type": "string",
-                                  "enum": [
-                                    "tool-trajectory",
-                                    "tool_trajectory"
-                                  ]
+                                  "enum": ["tool-trajectory", "tool_trajectory"]
                                 },
                                 "mode": {
                                   "type": "string",
-                                  "enum": [
-                                    "any_order",
-                                    "in_order",
-                                    "exact",
-                                    "subset",
-                                    "superset"
-                                  ]
+                                  "enum": ["any_order", "in_order", "exact", "subset", "superset"]
                                 },
                                 "minimums": {
                                   "type": "object",
@@ -13213,12 +11821,7 @@
                                         "anyOf": [
                                           {
                                             "type": "string",
-                                            "enum": [
-                                              "exact",
-                                              "ignore",
-                                              "subset",
-                                              "superset"
-                                            ]
+                                            "enum": ["exact", "ignore", "subset", "superset"]
                                           },
                                           {
                                             "type": "array",
@@ -13232,12 +11835,7 @@
                                         "anyOf": [
                                           {
                                             "type": "string",
-                                            "enum": [
-                                              "exact",
-                                              "ignore",
-                                              "subset",
-                                              "superset"
-                                            ]
+                                            "enum": ["exact", "ignore", "subset", "superset"]
                                           },
                                           {
                                             "type": "array",
@@ -13248,9 +11846,7 @@
                                         ]
                                       }
                                     },
-                                    "required": [
-                                      "tool"
-                                    ],
+                                    "required": ["tool"],
                                     "additionalProperties": false
                                   }
                                 },
@@ -13258,12 +11854,7 @@
                                   "anyOf": [
                                     {
                                       "type": "string",
-                                      "enum": [
-                                        "exact",
-                                        "ignore",
-                                        "subset",
-                                        "superset"
-                                      ]
+                                      "enum": ["exact", "ignore", "subset", "superset"]
                                     },
                                     {
                                       "type": "array",
@@ -13277,12 +11868,7 @@
                                   "anyOf": [
                                     {
                                       "type": "string",
-                                      "enum": [
-                                        "exact",
-                                        "ignore",
-                                        "subset",
-                                        "superset"
-                                      ]
+                                      "enum": ["exact", "ignore", "subset", "superset"]
                                     },
                                     {
                                       "type": "array",
@@ -13293,10 +11879,7 @@
                                   ]
                                 }
                               },
-                              "required": [
-                                "type",
-                                "mode"
-                              ],
+                              "required": ["type", "mode"],
                               "additionalProperties": false
                             },
                             {
@@ -13326,10 +11909,7 @@
                                 },
                                 "type": {
                                   "type": "string",
-                                  "enum": [
-                                    "field-accuracy",
-                                    "field_accuracy"
-                                  ]
+                                  "enum": ["field-accuracy", "field_accuracy"]
                                 },
                                 "fields": {
                                   "type": "array",
@@ -13341,11 +11921,7 @@
                                       },
                                       "match": {
                                         "type": "string",
-                                        "enum": [
-                                          "exact",
-                                          "numeric_tolerance",
-                                          "date"
-                                        ]
+                                        "enum": ["exact", "numeric_tolerance", "date"]
                                       },
                                       "required": {
                                         "type": "boolean"
@@ -13367,26 +11943,17 @@
                                         }
                                       }
                                     },
-                                    "required": [
-                                      "path",
-                                      "match"
-                                    ],
+                                    "required": ["path", "match"],
                                     "additionalProperties": false
                                   },
                                   "minItems": 1
                                 },
                                 "aggregation": {
                                   "type": "string",
-                                  "enum": [
-                                    "weighted_average",
-                                    "all_or_nothing"
-                                  ]
+                                  "enum": ["weighted_average", "all_or_nothing"]
                                 }
                               },
-                              "required": [
-                                "type",
-                                "fields"
-                              ],
+                              "required": ["type", "fields"],
                               "additionalProperties": false
                             },
                             {
@@ -13423,10 +11990,7 @@
                                   "minimum": 0
                                 }
                               },
-                              "required": [
-                                "type",
-                                "threshold"
-                              ],
+                              "required": ["type", "threshold"],
                               "additionalProperties": false
                             },
                             {
@@ -13463,10 +12027,7 @@
                                   "minimum": 0
                                 }
                               },
-                              "required": [
-                                "type",
-                                "budget"
-                              ],
+                              "required": ["type", "budget"],
                               "additionalProperties": false
                             },
                             {
@@ -13496,10 +12057,7 @@
                                 },
                                 "type": {
                                   "type": "string",
-                                  "enum": [
-                                    "token-usage",
-                                    "token_usage"
-                                  ]
+                                  "enum": ["token-usage", "token_usage"]
                                 },
                                 "max_total": {
                                   "type": "number",
@@ -13514,9 +12072,7 @@
                                   "minimum": 0
                                 }
                               },
-                              "required": [
-                                "type"
-                              ],
+                              "required": ["type"],
                               "additionalProperties": false
                             },
                             {
@@ -13546,10 +12102,7 @@
                                 },
                                 "type": {
                                   "type": "string",
-                                  "enum": [
-                                    "execution-metrics",
-                                    "execution_metrics"
-                                  ]
+                                  "enum": ["execution-metrics", "execution_metrics"]
                                 },
                                 "max_tool_calls": {
                                   "type": "number",
@@ -13581,9 +12134,7 @@
                                   "minimum": 0
                                 }
                               },
-                              "required": [
-                                "type"
-                              ],
+                              "required": ["type"],
                               "additionalProperties": false
                             },
                             {
@@ -13619,10 +12170,7 @@
                                   "type": "string"
                                 }
                               },
-                              "required": [
-                                "type",
-                                "value"
-                              ],
+                              "required": ["type", "value"],
                               "additionalProperties": false
                             },
                             {
@@ -13658,10 +12206,7 @@
                                   "type": "string"
                                 }
                               },
-                              "required": [
-                                "type",
-                                "value"
-                              ],
+                              "required": ["type", "value"],
                               "additionalProperties": false
                             },
                             {
@@ -13691,15 +12236,10 @@
                                 },
                                 "type": {
                                   "type": "string",
-                                  "enum": [
-                                    "is-json",
-                                    "is_json"
-                                  ]
+                                  "enum": ["is-json", "is_json"]
                                 }
                               },
-                              "required": [
-                                "type"
-                              ],
+                              "required": ["type"],
                               "additionalProperties": false
                             },
                             {
@@ -13735,10 +12275,7 @@
                                   "type": "string"
                                 }
                               },
-                              "required": [
-                                "type",
-                                "value"
-                              ],
+                              "required": ["type", "value"],
                               "additionalProperties": false
                             },
                             {
@@ -13819,10 +12356,7 @@
                                               "minLength": 1
                                             }
                                           },
-                                          "required": [
-                                            "score_range",
-                                            "outcome"
-                                          ],
+                                          "required": ["score_range", "outcome"],
                                           "additionalProperties": false
                                         }
                                       }
@@ -13832,10 +12366,7 @@
                                   "minItems": 1
                                 }
                               },
-                              "required": [
-                                "type",
-                                "criteria"
-                              ],
+                              "required": ["type", "criteria"],
                               "additionalProperties": false
                             }
                           ]
@@ -13856,11 +12387,7 @@
                           },
                           "strategy": {
                             "type": "string",
-                            "enum": [
-                              "pass_at_k",
-                              "mean",
-                              "confidence_interval"
-                            ]
+                            "enum": ["pass_at_k", "mean", "confidence_interval"]
                           },
                           "cost_limit_usd": {
                             "type": "number",
@@ -13871,9 +12398,7 @@
                             "minimum": 0
                           }
                         },
-                        "required": [
-                          "count"
-                        ],
+                        "required": ["count"],
                         "additionalProperties": false
                       },
                       "total_budget_usd": {
@@ -13901,10 +12426,7 @@
                       },
                       "isolation": {
                         "type": "string",
-                        "enum": [
-                          "shared",
-                          "per_test"
-                        ]
+                        "enum": ["shared", "per_test"]
                       },
                       "repos": {
                         "type": "array",
@@ -13928,10 +12450,7 @@
                                       "format": "uri"
                                     }
                                   },
-                                  "required": [
-                                    "type",
-                                    "url"
-                                  ],
+                                  "required": ["type", "url"],
                                   "additionalProperties": false
                                 },
                                 {
@@ -13945,10 +12464,7 @@
                                       "type": "string"
                                     }
                                   },
-                                  "required": [
-                                    "type",
-                                    "path"
-                                  ],
+                                  "required": ["type", "path"],
                                   "additionalProperties": false
                                 }
                               ]
@@ -13961,10 +12477,7 @@
                                 },
                                 "resolve": {
                                   "type": "string",
-                                  "enum": [
-                                    "remote",
-                                    "local"
-                                  ]
+                                  "enum": ["remote", "local"]
                                 },
                                 "ancestor": {
                                   "type": "integer",
@@ -13993,10 +12506,7 @@
                               "additionalProperties": false
                             }
                           },
-                          "required": [
-                            "path",
-                            "source"
-                          ],
+                          "required": ["path", "source"],
                           "additionalProperties": false
                         }
                       },
@@ -14032,11 +12542,7 @@
                               },
                               "reset": {
                                 "type": "string",
-                                "enum": [
-                                  "none",
-                                  "fast",
-                                  "strict"
-                                ]
+                                "enum": ["none", "fast", "strict"]
                               }
                             },
                             "additionalProperties": false
@@ -14067,11 +12573,7 @@
                               },
                               "reset": {
                                 "type": "string",
-                                "enum": [
-                                  "none",
-                                  "fast",
-                                  "strict"
-                                ]
+                                "enum": ["none", "fast", "strict"]
                               }
                             },
                             "additionalProperties": false
@@ -14102,11 +12604,7 @@
                               },
                               "reset": {
                                 "type": "string",
-                                "enum": [
-                                  "none",
-                                  "fast",
-                                  "strict"
-                                ]
+                                "enum": ["none", "fast", "strict"]
                               }
                             },
                             "additionalProperties": false
@@ -14137,11 +12635,7 @@
                               },
                               "reset": {
                                 "type": "string",
-                                "enum": [
-                                  "none",
-                                  "fast",
-                                  "strict"
-                                ]
+                                "enum": ["none", "fast", "strict"]
                               }
                             },
                             "additionalProperties": false
@@ -14151,11 +12645,7 @@
                       },
                       "mode": {
                         "type": "string",
-                        "enum": [
-                          "pooled",
-                          "temp",
-                          "static"
-                        ]
+                        "enum": ["pooled", "temp", "static"]
                       },
                       "path": {
                         "type": "string"
@@ -14177,9 +12667,7 @@
                     "type": "string"
                   }
                 },
-                "required": [
-                  "id"
-                ],
+                "required": ["id"],
                 "additionalProperties": false
               }
             },
@@ -14234,10 +12722,7 @@
                       },
                       "type": {
                         "type": "string",
-                        "enum": [
-                          "code-judge",
-                          "code_judge"
-                        ]
+                        "enum": ["code-judge", "code_judge"]
                       },
                       "command": {
                         "anyOf": [
@@ -14289,10 +12774,7 @@
                         "additionalProperties": {}
                       }
                     },
-                    "required": [
-                      "type",
-                      "command"
-                    ],
+                    "required": ["type", "command"],
                     "additionalProperties": false
                   },
                   {
@@ -14322,10 +12804,7 @@
                       },
                       "type": {
                         "type": "string",
-                        "enum": [
-                          "llm-judge",
-                          "llm_judge"
-                        ]
+                        "enum": ["llm-judge", "llm_judge"]
                       },
                       "prompt": {
                         "anyOf": [
@@ -14419,10 +12898,7 @@
                                     "minLength": 1
                                   }
                                 },
-                                "required": [
-                                  "score_range",
-                                  "outcome"
-                                ],
+                                "required": ["score_range", "outcome"],
                                 "additionalProperties": false
                               }
                             }
@@ -14451,9 +12927,7 @@
                         "maximum": 2
                       }
                     },
-                    "required": [
-                      "type"
-                    ],
+                    "required": ["type"],
                     "additionalProperties": false
                   },
                   {
@@ -14513,9 +12987,7 @@
                                 }
                               }
                             },
-                            "required": [
-                              "type"
-                            ],
+                            "required": ["type"],
                             "additionalProperties": false
                           },
                           {
@@ -14531,10 +13003,7 @@
                                 "maximum": 1
                               }
                             },
-                            "required": [
-                              "type",
-                              "threshold"
-                            ],
+                            "required": ["type", "threshold"],
                             "additionalProperties": false
                           },
                           {
@@ -14551,10 +13020,7 @@
                                 "type": "string"
                               }
                             },
-                            "required": [
-                              "type",
-                              "path"
-                            ],
+                            "required": ["type", "path"],
                             "additionalProperties": false
                           },
                           {
@@ -14571,18 +13037,13 @@
                                 "type": "string"
                               }
                             },
-                            "required": [
-                              "type"
-                            ],
+                            "required": ["type"],
                             "additionalProperties": false
                           }
                         ]
                       }
                     },
-                    "required": [
-                      "type",
-                      "aggregator"
-                    ],
+                    "required": ["type", "aggregator"],
                     "additionalProperties": false
                   },
                   {
@@ -14612,20 +13073,11 @@
                       },
                       "type": {
                         "type": "string",
-                        "enum": [
-                          "tool-trajectory",
-                          "tool_trajectory"
-                        ]
+                        "enum": ["tool-trajectory", "tool_trajectory"]
                       },
                       "mode": {
                         "type": "string",
-                        "enum": [
-                          "any_order",
-                          "in_order",
-                          "exact",
-                          "subset",
-                          "superset"
-                        ]
+                        "enum": ["any_order", "in_order", "exact", "subset", "superset"]
                       },
                       "minimums": {
                         "type": "object",
@@ -14666,12 +13118,7 @@
                               "anyOf": [
                                 {
                                   "type": "string",
-                                  "enum": [
-                                    "exact",
-                                    "ignore",
-                                    "subset",
-                                    "superset"
-                                  ]
+                                  "enum": ["exact", "ignore", "subset", "superset"]
                                 },
                                 {
                                   "type": "array",
@@ -14685,12 +13132,7 @@
                               "anyOf": [
                                 {
                                   "type": "string",
-                                  "enum": [
-                                    "exact",
-                                    "ignore",
-                                    "subset",
-                                    "superset"
-                                  ]
+                                  "enum": ["exact", "ignore", "subset", "superset"]
                                 },
                                 {
                                   "type": "array",
@@ -14701,9 +13143,7 @@
                               ]
                             }
                           },
-                          "required": [
-                            "tool"
-                          ],
+                          "required": ["tool"],
                           "additionalProperties": false
                         }
                       },
@@ -14711,12 +13151,7 @@
                         "anyOf": [
                           {
                             "type": "string",
-                            "enum": [
-                              "exact",
-                              "ignore",
-                              "subset",
-                              "superset"
-                            ]
+                            "enum": ["exact", "ignore", "subset", "superset"]
                           },
                           {
                             "type": "array",
@@ -14730,12 +13165,7 @@
                         "anyOf": [
                           {
                             "type": "string",
-                            "enum": [
-                              "exact",
-                              "ignore",
-                              "subset",
-                              "superset"
-                            ]
+                            "enum": ["exact", "ignore", "subset", "superset"]
                           },
                           {
                             "type": "array",
@@ -14746,10 +13176,7 @@
                         ]
                       }
                     },
-                    "required": [
-                      "type",
-                      "mode"
-                    ],
+                    "required": ["type", "mode"],
                     "additionalProperties": false
                   },
                   {
@@ -14779,10 +13206,7 @@
                       },
                       "type": {
                         "type": "string",
-                        "enum": [
-                          "field-accuracy",
-                          "field_accuracy"
-                        ]
+                        "enum": ["field-accuracy", "field_accuracy"]
                       },
                       "fields": {
                         "type": "array",
@@ -14794,11 +13218,7 @@
                             },
                             "match": {
                               "type": "string",
-                              "enum": [
-                                "exact",
-                                "numeric_tolerance",
-                                "date"
-                              ]
+                              "enum": ["exact", "numeric_tolerance", "date"]
                             },
                             "required": {
                               "type": "boolean"
@@ -14820,26 +13240,17 @@
                               }
                             }
                           },
-                          "required": [
-                            "path",
-                            "match"
-                          ],
+                          "required": ["path", "match"],
                           "additionalProperties": false
                         },
                         "minItems": 1
                       },
                       "aggregation": {
                         "type": "string",
-                        "enum": [
-                          "weighted_average",
-                          "all_or_nothing"
-                        ]
+                        "enum": ["weighted_average", "all_or_nothing"]
                       }
                     },
-                    "required": [
-                      "type",
-                      "fields"
-                    ],
+                    "required": ["type", "fields"],
                     "additionalProperties": false
                   },
                   {
@@ -14876,10 +13287,7 @@
                         "minimum": 0
                       }
                     },
-                    "required": [
-                      "type",
-                      "threshold"
-                    ],
+                    "required": ["type", "threshold"],
                     "additionalProperties": false
                   },
                   {
@@ -14916,10 +13324,7 @@
                         "minimum": 0
                       }
                     },
-                    "required": [
-                      "type",
-                      "budget"
-                    ],
+                    "required": ["type", "budget"],
                     "additionalProperties": false
                   },
                   {
@@ -14949,10 +13354,7 @@
                       },
                       "type": {
                         "type": "string",
-                        "enum": [
-                          "token-usage",
-                          "token_usage"
-                        ]
+                        "enum": ["token-usage", "token_usage"]
                       },
                       "max_total": {
                         "type": "number",
@@ -14967,9 +13369,7 @@
                         "minimum": 0
                       }
                     },
-                    "required": [
-                      "type"
-                    ],
+                    "required": ["type"],
                     "additionalProperties": false
                   },
                   {
@@ -14999,10 +13399,7 @@
                       },
                       "type": {
                         "type": "string",
-                        "enum": [
-                          "execution-metrics",
-                          "execution_metrics"
-                        ]
+                        "enum": ["execution-metrics", "execution_metrics"]
                       },
                       "max_tool_calls": {
                         "type": "number",
@@ -15034,9 +13431,7 @@
                         "minimum": 0
                       }
                     },
-                    "required": [
-                      "type"
-                    ],
+                    "required": ["type"],
                     "additionalProperties": false
                   },
                   {
@@ -15072,10 +13467,7 @@
                         "type": "string"
                       }
                     },
-                    "required": [
-                      "type",
-                      "value"
-                    ],
+                    "required": ["type", "value"],
                     "additionalProperties": false
                   },
                   {
@@ -15111,10 +13503,7 @@
                         "type": "string"
                       }
                     },
-                    "required": [
-                      "type",
-                      "value"
-                    ],
+                    "required": ["type", "value"],
                     "additionalProperties": false
                   },
                   {
@@ -15144,15 +13533,10 @@
                       },
                       "type": {
                         "type": "string",
-                        "enum": [
-                          "is-json",
-                          "is_json"
-                        ]
+                        "enum": ["is-json", "is_json"]
                       }
                     },
-                    "required": [
-                      "type"
-                    ],
+                    "required": ["type"],
                     "additionalProperties": false
                   },
                   {
@@ -15188,10 +13572,7 @@
                         "type": "string"
                       }
                     },
-                    "required": [
-                      "type",
-                      "value"
-                    ],
+                    "required": ["type", "value"],
                     "additionalProperties": false
                   },
                   {
@@ -15272,10 +13653,7 @@
                                     "minLength": 1
                                   }
                                 },
-                                "required": [
-                                  "score_range",
-                                  "outcome"
-                                ],
+                                "required": ["score_range", "outcome"],
                                 "additionalProperties": false
                               }
                             }
@@ -15285,10 +13663,7 @@
                         "minItems": 1
                       }
                     },
-                    "required": [
-                      "type",
-                      "criteria"
-                    ],
+                    "required": ["type", "criteria"],
                     "additionalProperties": false
                   }
                 ]
@@ -15325,10 +13700,7 @@
                       },
                       "type": {
                         "type": "string",
-                        "enum": [
-                          "code-judge",
-                          "code_judge"
-                        ]
+                        "enum": ["code-judge", "code_judge"]
                       },
                       "command": {
                         "anyOf": [
@@ -15380,10 +13752,7 @@
                         "additionalProperties": {}
                       }
                     },
-                    "required": [
-                      "type",
-                      "command"
-                    ],
+                    "required": ["type", "command"],
                     "additionalProperties": false
                   },
                   {
@@ -15413,10 +13782,7 @@
                       },
                       "type": {
                         "type": "string",
-                        "enum": [
-                          "llm-judge",
-                          "llm_judge"
-                        ]
+                        "enum": ["llm-judge", "llm_judge"]
                       },
                       "prompt": {
                         "anyOf": [
@@ -15510,10 +13876,7 @@
                                     "minLength": 1
                                   }
                                 },
-                                "required": [
-                                  "score_range",
-                                  "outcome"
-                                ],
+                                "required": ["score_range", "outcome"],
                                 "additionalProperties": false
                               }
                             }
@@ -15542,9 +13905,7 @@
                         "maximum": 2
                       }
                     },
-                    "required": [
-                      "type"
-                    ],
+                    "required": ["type"],
                     "additionalProperties": false
                   },
                   {
@@ -15604,9 +13965,7 @@
                                 }
                               }
                             },
-                            "required": [
-                              "type"
-                            ],
+                            "required": ["type"],
                             "additionalProperties": false
                           },
                           {
@@ -15622,10 +13981,7 @@
                                 "maximum": 1
                               }
                             },
-                            "required": [
-                              "type",
-                              "threshold"
-                            ],
+                            "required": ["type", "threshold"],
                             "additionalProperties": false
                           },
                           {
@@ -15642,10 +13998,7 @@
                                 "type": "string"
                               }
                             },
-                            "required": [
-                              "type",
-                              "path"
-                            ],
+                            "required": ["type", "path"],
                             "additionalProperties": false
                           },
                           {
@@ -15662,18 +14015,13 @@
                                 "type": "string"
                               }
                             },
-                            "required": [
-                              "type"
-                            ],
+                            "required": ["type"],
                             "additionalProperties": false
                           }
                         ]
                       }
                     },
-                    "required": [
-                      "type",
-                      "aggregator"
-                    ],
+                    "required": ["type", "aggregator"],
                     "additionalProperties": false
                   },
                   {
@@ -15703,20 +14051,11 @@
                       },
                       "type": {
                         "type": "string",
-                        "enum": [
-                          "tool-trajectory",
-                          "tool_trajectory"
-                        ]
+                        "enum": ["tool-trajectory", "tool_trajectory"]
                       },
                       "mode": {
                         "type": "string",
-                        "enum": [
-                          "any_order",
-                          "in_order",
-                          "exact",
-                          "subset",
-                          "superset"
-                        ]
+                        "enum": ["any_order", "in_order", "exact", "subset", "superset"]
                       },
                       "minimums": {
                         "type": "object",
@@ -15757,12 +14096,7 @@
                               "anyOf": [
                                 {
                                   "type": "string",
-                                  "enum": [
-                                    "exact",
-                                    "ignore",
-                                    "subset",
-                                    "superset"
-                                  ]
+                                  "enum": ["exact", "ignore", "subset", "superset"]
                                 },
                                 {
                                   "type": "array",
@@ -15776,12 +14110,7 @@
                               "anyOf": [
                                 {
                                   "type": "string",
-                                  "enum": [
-                                    "exact",
-                                    "ignore",
-                                    "subset",
-                                    "superset"
-                                  ]
+                                  "enum": ["exact", "ignore", "subset", "superset"]
                                 },
                                 {
                                   "type": "array",
@@ -15792,9 +14121,7 @@
                               ]
                             }
                           },
-                          "required": [
-                            "tool"
-                          ],
+                          "required": ["tool"],
                           "additionalProperties": false
                         }
                       },
@@ -15802,12 +14129,7 @@
                         "anyOf": [
                           {
                             "type": "string",
-                            "enum": [
-                              "exact",
-                              "ignore",
-                              "subset",
-                              "superset"
-                            ]
+                            "enum": ["exact", "ignore", "subset", "superset"]
                           },
                           {
                             "type": "array",
@@ -15821,12 +14143,7 @@
                         "anyOf": [
                           {
                             "type": "string",
-                            "enum": [
-                              "exact",
-                              "ignore",
-                              "subset",
-                              "superset"
-                            ]
+                            "enum": ["exact", "ignore", "subset", "superset"]
                           },
                           {
                             "type": "array",
@@ -15837,10 +14154,7 @@
                         ]
                       }
                     },
-                    "required": [
-                      "type",
-                      "mode"
-                    ],
+                    "required": ["type", "mode"],
                     "additionalProperties": false
                   },
                   {
@@ -15870,10 +14184,7 @@
                       },
                       "type": {
                         "type": "string",
-                        "enum": [
-                          "field-accuracy",
-                          "field_accuracy"
-                        ]
+                        "enum": ["field-accuracy", "field_accuracy"]
                       },
                       "fields": {
                         "type": "array",
@@ -15885,11 +14196,7 @@
                             },
                             "match": {
                               "type": "string",
-                              "enum": [
-                                "exact",
-                                "numeric_tolerance",
-                                "date"
-                              ]
+                              "enum": ["exact", "numeric_tolerance", "date"]
                             },
                             "required": {
                               "type": "boolean"
@@ -15911,26 +14218,17 @@
                               }
                             }
                           },
-                          "required": [
-                            "path",
-                            "match"
-                          ],
+                          "required": ["path", "match"],
                           "additionalProperties": false
                         },
                         "minItems": 1
                       },
                       "aggregation": {
                         "type": "string",
-                        "enum": [
-                          "weighted_average",
-                          "all_or_nothing"
-                        ]
+                        "enum": ["weighted_average", "all_or_nothing"]
                       }
                     },
-                    "required": [
-                      "type",
-                      "fields"
-                    ],
+                    "required": ["type", "fields"],
                     "additionalProperties": false
                   },
                   {
@@ -15967,10 +14265,7 @@
                         "minimum": 0
                       }
                     },
-                    "required": [
-                      "type",
-                      "threshold"
-                    ],
+                    "required": ["type", "threshold"],
                     "additionalProperties": false
                   },
                   {
@@ -16007,10 +14302,7 @@
                         "minimum": 0
                       }
                     },
-                    "required": [
-                      "type",
-                      "budget"
-                    ],
+                    "required": ["type", "budget"],
                     "additionalProperties": false
                   },
                   {
@@ -16040,10 +14332,7 @@
                       },
                       "type": {
                         "type": "string",
-                        "enum": [
-                          "token-usage",
-                          "token_usage"
-                        ]
+                        "enum": ["token-usage", "token_usage"]
                       },
                       "max_total": {
                         "type": "number",
@@ -16058,9 +14347,7 @@
                         "minimum": 0
                       }
                     },
-                    "required": [
-                      "type"
-                    ],
+                    "required": ["type"],
                     "additionalProperties": false
                   },
                   {
@@ -16090,10 +14377,7 @@
                       },
                       "type": {
                         "type": "string",
-                        "enum": [
-                          "execution-metrics",
-                          "execution_metrics"
-                        ]
+                        "enum": ["execution-metrics", "execution_metrics"]
                       },
                       "max_tool_calls": {
                         "type": "number",
@@ -16125,9 +14409,7 @@
                         "minimum": 0
                       }
                     },
-                    "required": [
-                      "type"
-                    ],
+                    "required": ["type"],
                     "additionalProperties": false
                   },
                   {
@@ -16163,10 +14445,7 @@
                         "type": "string"
                       }
                     },
-                    "required": [
-                      "type",
-                      "value"
-                    ],
+                    "required": ["type", "value"],
                     "additionalProperties": false
                   },
                   {
@@ -16202,10 +14481,7 @@
                         "type": "string"
                       }
                     },
-                    "required": [
-                      "type",
-                      "value"
-                    ],
+                    "required": ["type", "value"],
                     "additionalProperties": false
                   },
                   {
@@ -16235,15 +14511,10 @@
                       },
                       "type": {
                         "type": "string",
-                        "enum": [
-                          "is-json",
-                          "is_json"
-                        ]
+                        "enum": ["is-json", "is_json"]
                       }
                     },
-                    "required": [
-                      "type"
-                    ],
+                    "required": ["type"],
                     "additionalProperties": false
                   },
                   {
@@ -16279,10 +14550,7 @@
                         "type": "string"
                       }
                     },
-                    "required": [
-                      "type",
-                      "value"
-                    ],
+                    "required": ["type", "value"],
                     "additionalProperties": false
                   },
                   {
@@ -16363,10 +14631,7 @@
                                     "minLength": 1
                                   }
                                 },
-                                "required": [
-                                  "score_range",
-                                  "outcome"
-                                ],
+                                "required": ["score_range", "outcome"],
                                 "additionalProperties": false
                               }
                             }
@@ -16376,10 +14641,7 @@
                         "minItems": 1
                       }
                     },
-                    "required": [
-                      "type",
-                      "criteria"
-                    ],
+                    "required": ["type", "criteria"],
                     "additionalProperties": false
                   }
                 ]
@@ -16416,10 +14678,7 @@
                       },
                       "type": {
                         "type": "string",
-                        "enum": [
-                          "code-judge",
-                          "code_judge"
-                        ]
+                        "enum": ["code-judge", "code_judge"]
                       },
                       "command": {
                         "anyOf": [
@@ -16471,10 +14730,7 @@
                         "additionalProperties": {}
                       }
                     },
-                    "required": [
-                      "type",
-                      "command"
-                    ],
+                    "required": ["type", "command"],
                     "additionalProperties": false
                   },
                   {
@@ -16504,10 +14760,7 @@
                       },
                       "type": {
                         "type": "string",
-                        "enum": [
-                          "llm-judge",
-                          "llm_judge"
-                        ]
+                        "enum": ["llm-judge", "llm_judge"]
                       },
                       "prompt": {
                         "anyOf": [
@@ -16601,10 +14854,7 @@
                                     "minLength": 1
                                   }
                                 },
-                                "required": [
-                                  "score_range",
-                                  "outcome"
-                                ],
+                                "required": ["score_range", "outcome"],
                                 "additionalProperties": false
                               }
                             }
@@ -16633,9 +14883,7 @@
                         "maximum": 2
                       }
                     },
-                    "required": [
-                      "type"
-                    ],
+                    "required": ["type"],
                     "additionalProperties": false
                   },
                   {
@@ -16695,9 +14943,7 @@
                                 }
                               }
                             },
-                            "required": [
-                              "type"
-                            ],
+                            "required": ["type"],
                             "additionalProperties": false
                           },
                           {
@@ -16713,10 +14959,7 @@
                                 "maximum": 1
                               }
                             },
-                            "required": [
-                              "type",
-                              "threshold"
-                            ],
+                            "required": ["type", "threshold"],
                             "additionalProperties": false
                           },
                           {
@@ -16733,10 +14976,7 @@
                                 "type": "string"
                               }
                             },
-                            "required": [
-                              "type",
-                              "path"
-                            ],
+                            "required": ["type", "path"],
                             "additionalProperties": false
                           },
                           {
@@ -16753,18 +14993,13 @@
                                 "type": "string"
                               }
                             },
-                            "required": [
-                              "type"
-                            ],
+                            "required": ["type"],
                             "additionalProperties": false
                           }
                         ]
                       }
                     },
-                    "required": [
-                      "type",
-                      "aggregator"
-                    ],
+                    "required": ["type", "aggregator"],
                     "additionalProperties": false
                   },
                   {
@@ -16794,20 +15029,11 @@
                       },
                       "type": {
                         "type": "string",
-                        "enum": [
-                          "tool-trajectory",
-                          "tool_trajectory"
-                        ]
+                        "enum": ["tool-trajectory", "tool_trajectory"]
                       },
                       "mode": {
                         "type": "string",
-                        "enum": [
-                          "any_order",
-                          "in_order",
-                          "exact",
-                          "subset",
-                          "superset"
-                        ]
+                        "enum": ["any_order", "in_order", "exact", "subset", "superset"]
                       },
                       "minimums": {
                         "type": "object",
@@ -16848,12 +15074,7 @@
                               "anyOf": [
                                 {
                                   "type": "string",
-                                  "enum": [
-                                    "exact",
-                                    "ignore",
-                                    "subset",
-                                    "superset"
-                                  ]
+                                  "enum": ["exact", "ignore", "subset", "superset"]
                                 },
                                 {
                                   "type": "array",
@@ -16867,12 +15088,7 @@
                               "anyOf": [
                                 {
                                   "type": "string",
-                                  "enum": [
-                                    "exact",
-                                    "ignore",
-                                    "subset",
-                                    "superset"
-                                  ]
+                                  "enum": ["exact", "ignore", "subset", "superset"]
                                 },
                                 {
                                   "type": "array",
@@ -16883,9 +15099,7 @@
                               ]
                             }
                           },
-                          "required": [
-                            "tool"
-                          ],
+                          "required": ["tool"],
                           "additionalProperties": false
                         }
                       },
@@ -16893,12 +15107,7 @@
                         "anyOf": [
                           {
                             "type": "string",
-                            "enum": [
-                              "exact",
-                              "ignore",
-                              "subset",
-                              "superset"
-                            ]
+                            "enum": ["exact", "ignore", "subset", "superset"]
                           },
                           {
                             "type": "array",
@@ -16912,12 +15121,7 @@
                         "anyOf": [
                           {
                             "type": "string",
-                            "enum": [
-                              "exact",
-                              "ignore",
-                              "subset",
-                              "superset"
-                            ]
+                            "enum": ["exact", "ignore", "subset", "superset"]
                           },
                           {
                             "type": "array",
@@ -16928,10 +15132,7 @@
                         ]
                       }
                     },
-                    "required": [
-                      "type",
-                      "mode"
-                    ],
+                    "required": ["type", "mode"],
                     "additionalProperties": false
                   },
                   {
@@ -16961,10 +15162,7 @@
                       },
                       "type": {
                         "type": "string",
-                        "enum": [
-                          "field-accuracy",
-                          "field_accuracy"
-                        ]
+                        "enum": ["field-accuracy", "field_accuracy"]
                       },
                       "fields": {
                         "type": "array",
@@ -16976,11 +15174,7 @@
                             },
                             "match": {
                               "type": "string",
-                              "enum": [
-                                "exact",
-                                "numeric_tolerance",
-                                "date"
-                              ]
+                              "enum": ["exact", "numeric_tolerance", "date"]
                             },
                             "required": {
                               "type": "boolean"
@@ -17002,26 +15196,17 @@
                               }
                             }
                           },
-                          "required": [
-                            "path",
-                            "match"
-                          ],
+                          "required": ["path", "match"],
                           "additionalProperties": false
                         },
                         "minItems": 1
                       },
                       "aggregation": {
                         "type": "string",
-                        "enum": [
-                          "weighted_average",
-                          "all_or_nothing"
-                        ]
+                        "enum": ["weighted_average", "all_or_nothing"]
                       }
                     },
-                    "required": [
-                      "type",
-                      "fields"
-                    ],
+                    "required": ["type", "fields"],
                     "additionalProperties": false
                   },
                   {
@@ -17058,10 +15243,7 @@
                         "minimum": 0
                       }
                     },
-                    "required": [
-                      "type",
-                      "threshold"
-                    ],
+                    "required": ["type", "threshold"],
                     "additionalProperties": false
                   },
                   {
@@ -17098,10 +15280,7 @@
                         "minimum": 0
                       }
                     },
-                    "required": [
-                      "type",
-                      "budget"
-                    ],
+                    "required": ["type", "budget"],
                     "additionalProperties": false
                   },
                   {
@@ -17131,10 +15310,7 @@
                       },
                       "type": {
                         "type": "string",
-                        "enum": [
-                          "token-usage",
-                          "token_usage"
-                        ]
+                        "enum": ["token-usage", "token_usage"]
                       },
                       "max_total": {
                         "type": "number",
@@ -17149,9 +15325,7 @@
                         "minimum": 0
                       }
                     },
-                    "required": [
-                      "type"
-                    ],
+                    "required": ["type"],
                     "additionalProperties": false
                   },
                   {
@@ -17181,10 +15355,7 @@
                       },
                       "type": {
                         "type": "string",
-                        "enum": [
-                          "execution-metrics",
-                          "execution_metrics"
-                        ]
+                        "enum": ["execution-metrics", "execution_metrics"]
                       },
                       "max_tool_calls": {
                         "type": "number",
@@ -17216,9 +15387,7 @@
                         "minimum": 0
                       }
                     },
-                    "required": [
-                      "type"
-                    ],
+                    "required": ["type"],
                     "additionalProperties": false
                   },
                   {
@@ -17254,10 +15423,7 @@
                         "type": "string"
                       }
                     },
-                    "required": [
-                      "type",
-                      "value"
-                    ],
+                    "required": ["type", "value"],
                     "additionalProperties": false
                   },
                   {
@@ -17293,10 +15459,7 @@
                         "type": "string"
                       }
                     },
-                    "required": [
-                      "type",
-                      "value"
-                    ],
+                    "required": ["type", "value"],
                     "additionalProperties": false
                   },
                   {
@@ -17326,15 +15489,10 @@
                       },
                       "type": {
                         "type": "string",
-                        "enum": [
-                          "is-json",
-                          "is_json"
-                        ]
+                        "enum": ["is-json", "is_json"]
                       }
                     },
-                    "required": [
-                      "type"
-                    ],
+                    "required": ["type"],
                     "additionalProperties": false
                   },
                   {
@@ -17370,10 +15528,7 @@
                         "type": "string"
                       }
                     },
-                    "required": [
-                      "type",
-                      "value"
-                    ],
+                    "required": ["type", "value"],
                     "additionalProperties": false
                   },
                   {
@@ -17454,10 +15609,7 @@
                                     "minLength": 1
                                   }
                                 },
-                                "required": [
-                                  "score_range",
-                                  "outcome"
-                                ],
+                                "required": ["score_range", "outcome"],
                                 "additionalProperties": false
                               }
                             }
@@ -17467,10 +15619,7 @@
                         "minItems": 1
                       }
                     },
-                    "required": [
-                      "type",
-                      "criteria"
-                    ],
+                    "required": ["type", "criteria"],
                     "additionalProperties": false
                   }
                 ]
@@ -17491,11 +15640,7 @@
                 },
                 "strategy": {
                   "type": "string",
-                  "enum": [
-                    "pass_at_k",
-                    "mean",
-                    "confidence_interval"
-                  ]
+                  "enum": ["pass_at_k", "mean", "confidence_interval"]
                 },
                 "cost_limit_usd": {
                   "type": "number",
@@ -17506,9 +15651,7 @@
                   "minimum": 0
                 }
               },
-              "required": [
-                "count"
-              ],
+              "required": ["count"],
               "additionalProperties": false
             },
             "total_budget_usd": {
@@ -17559,10 +15702,7 @@
                   },
                   "type": {
                     "type": "string",
-                    "enum": [
-                      "code-judge",
-                      "code_judge"
-                    ]
+                    "enum": ["code-judge", "code_judge"]
                   },
                   "command": {
                     "anyOf": [
@@ -17614,10 +15754,7 @@
                     "additionalProperties": {}
                   }
                 },
-                "required": [
-                  "type",
-                  "command"
-                ],
+                "required": ["type", "command"],
                 "additionalProperties": false
               },
               {
@@ -17647,10 +15784,7 @@
                   },
                   "type": {
                     "type": "string",
-                    "enum": [
-                      "llm-judge",
-                      "llm_judge"
-                    ]
+                    "enum": ["llm-judge", "llm_judge"]
                   },
                   "prompt": {
                     "anyOf": [
@@ -17744,10 +15878,7 @@
                                 "minLength": 1
                               }
                             },
-                            "required": [
-                              "score_range",
-                              "outcome"
-                            ],
+                            "required": ["score_range", "outcome"],
                             "additionalProperties": false
                           }
                         }
@@ -17776,9 +15907,7 @@
                     "maximum": 2
                   }
                 },
-                "required": [
-                  "type"
-                ],
+                "required": ["type"],
                 "additionalProperties": false
               },
               {
@@ -17838,9 +15967,7 @@
                             }
                           }
                         },
-                        "required": [
-                          "type"
-                        ],
+                        "required": ["type"],
                         "additionalProperties": false
                       },
                       {
@@ -17856,10 +15983,7 @@
                             "maximum": 1
                           }
                         },
-                        "required": [
-                          "type",
-                          "threshold"
-                        ],
+                        "required": ["type", "threshold"],
                         "additionalProperties": false
                       },
                       {
@@ -17876,10 +16000,7 @@
                             "type": "string"
                           }
                         },
-                        "required": [
-                          "type",
-                          "path"
-                        ],
+                        "required": ["type", "path"],
                         "additionalProperties": false
                       },
                       {
@@ -17896,18 +16017,13 @@
                             "type": "string"
                           }
                         },
-                        "required": [
-                          "type"
-                        ],
+                        "required": ["type"],
                         "additionalProperties": false
                       }
                     ]
                   }
                 },
-                "required": [
-                  "type",
-                  "aggregator"
-                ],
+                "required": ["type", "aggregator"],
                 "additionalProperties": false
               },
               {
@@ -17937,20 +16053,11 @@
                   },
                   "type": {
                     "type": "string",
-                    "enum": [
-                      "tool-trajectory",
-                      "tool_trajectory"
-                    ]
+                    "enum": ["tool-trajectory", "tool_trajectory"]
                   },
                   "mode": {
                     "type": "string",
-                    "enum": [
-                      "any_order",
-                      "in_order",
-                      "exact",
-                      "subset",
-                      "superset"
-                    ]
+                    "enum": ["any_order", "in_order", "exact", "subset", "superset"]
                   },
                   "minimums": {
                     "type": "object",
@@ -17991,12 +16098,7 @@
                           "anyOf": [
                             {
                               "type": "string",
-                              "enum": [
-                                "exact",
-                                "ignore",
-                                "subset",
-                                "superset"
-                              ]
+                              "enum": ["exact", "ignore", "subset", "superset"]
                             },
                             {
                               "type": "array",
@@ -18010,12 +16112,7 @@
                           "anyOf": [
                             {
                               "type": "string",
-                              "enum": [
-                                "exact",
-                                "ignore",
-                                "subset",
-                                "superset"
-                              ]
+                              "enum": ["exact", "ignore", "subset", "superset"]
                             },
                             {
                               "type": "array",
@@ -18026,9 +16123,7 @@
                           ]
                         }
                       },
-                      "required": [
-                        "tool"
-                      ],
+                      "required": ["tool"],
                       "additionalProperties": false
                     }
                   },
@@ -18036,12 +16131,7 @@
                     "anyOf": [
                       {
                         "type": "string",
-                        "enum": [
-                          "exact",
-                          "ignore",
-                          "subset",
-                          "superset"
-                        ]
+                        "enum": ["exact", "ignore", "subset", "superset"]
                       },
                       {
                         "type": "array",
@@ -18055,12 +16145,7 @@
                     "anyOf": [
                       {
                         "type": "string",
-                        "enum": [
-                          "exact",
-                          "ignore",
-                          "subset",
-                          "superset"
-                        ]
+                        "enum": ["exact", "ignore", "subset", "superset"]
                       },
                       {
                         "type": "array",
@@ -18071,10 +16156,7 @@
                     ]
                   }
                 },
-                "required": [
-                  "type",
-                  "mode"
-                ],
+                "required": ["type", "mode"],
                 "additionalProperties": false
               },
               {
@@ -18104,10 +16186,7 @@
                   },
                   "type": {
                     "type": "string",
-                    "enum": [
-                      "field-accuracy",
-                      "field_accuracy"
-                    ]
+                    "enum": ["field-accuracy", "field_accuracy"]
                   },
                   "fields": {
                     "type": "array",
@@ -18119,11 +16198,7 @@
                         },
                         "match": {
                           "type": "string",
-                          "enum": [
-                            "exact",
-                            "numeric_tolerance",
-                            "date"
-                          ]
+                          "enum": ["exact", "numeric_tolerance", "date"]
                         },
                         "required": {
                           "type": "boolean"
@@ -18145,26 +16220,17 @@
                           }
                         }
                       },
-                      "required": [
-                        "path",
-                        "match"
-                      ],
+                      "required": ["path", "match"],
                       "additionalProperties": false
                     },
                     "minItems": 1
                   },
                   "aggregation": {
                     "type": "string",
-                    "enum": [
-                      "weighted_average",
-                      "all_or_nothing"
-                    ]
+                    "enum": ["weighted_average", "all_or_nothing"]
                   }
                 },
-                "required": [
-                  "type",
-                  "fields"
-                ],
+                "required": ["type", "fields"],
                 "additionalProperties": false
               },
               {
@@ -18201,10 +16267,7 @@
                     "minimum": 0
                   }
                 },
-                "required": [
-                  "type",
-                  "threshold"
-                ],
+                "required": ["type", "threshold"],
                 "additionalProperties": false
               },
               {
@@ -18241,10 +16304,7 @@
                     "minimum": 0
                   }
                 },
-                "required": [
-                  "type",
-                  "budget"
-                ],
+                "required": ["type", "budget"],
                 "additionalProperties": false
               },
               {
@@ -18274,10 +16334,7 @@
                   },
                   "type": {
                     "type": "string",
-                    "enum": [
-                      "token-usage",
-                      "token_usage"
-                    ]
+                    "enum": ["token-usage", "token_usage"]
                   },
                   "max_total": {
                     "type": "number",
@@ -18292,9 +16349,7 @@
                     "minimum": 0
                   }
                 },
-                "required": [
-                  "type"
-                ],
+                "required": ["type"],
                 "additionalProperties": false
               },
               {
@@ -18324,10 +16379,7 @@
                   },
                   "type": {
                     "type": "string",
-                    "enum": [
-                      "execution-metrics",
-                      "execution_metrics"
-                    ]
+                    "enum": ["execution-metrics", "execution_metrics"]
                   },
                   "max_tool_calls": {
                     "type": "number",
@@ -18359,9 +16411,7 @@
                     "minimum": 0
                   }
                 },
-                "required": [
-                  "type"
-                ],
+                "required": ["type"],
                 "additionalProperties": false
               },
               {
@@ -18397,10 +16447,7 @@
                     "type": "string"
                   }
                 },
-                "required": [
-                  "type",
-                  "value"
-                ],
+                "required": ["type", "value"],
                 "additionalProperties": false
               },
               {
@@ -18436,10 +16483,7 @@
                     "type": "string"
                   }
                 },
-                "required": [
-                  "type",
-                  "value"
-                ],
+                "required": ["type", "value"],
                 "additionalProperties": false
               },
               {
@@ -18469,15 +16513,10 @@
                   },
                   "type": {
                     "type": "string",
-                    "enum": [
-                      "is-json",
-                      "is_json"
-                    ]
+                    "enum": ["is-json", "is_json"]
                   }
                 },
-                "required": [
-                  "type"
-                ],
+                "required": ["type"],
                 "additionalProperties": false
               },
               {
@@ -18513,10 +16552,7 @@
                     "type": "string"
                   }
                 },
-                "required": [
-                  "type",
-                  "value"
-                ],
+                "required": ["type", "value"],
                 "additionalProperties": false
               },
               {
@@ -18597,10 +16633,7 @@
                                 "minLength": 1
                               }
                             },
-                            "required": [
-                              "score_range",
-                              "outcome"
-                            ],
+                            "required": ["score_range", "outcome"],
                             "additionalProperties": false
                           }
                         }
@@ -18610,10 +16643,7 @@
                     "minItems": 1
                   }
                 },
-                "required": [
-                  "type",
-                  "criteria"
-                ],
+                "required": ["type", "criteria"],
                 "additionalProperties": false
               }
             ]
@@ -18650,10 +16680,7 @@
                   },
                   "type": {
                     "type": "string",
-                    "enum": [
-                      "code-judge",
-                      "code_judge"
-                    ]
+                    "enum": ["code-judge", "code_judge"]
                   },
                   "command": {
                     "anyOf": [
@@ -18705,10 +16732,7 @@
                     "additionalProperties": {}
                   }
                 },
-                "required": [
-                  "type",
-                  "command"
-                ],
+                "required": ["type", "command"],
                 "additionalProperties": false
               },
               {
@@ -18738,10 +16762,7 @@
                   },
                   "type": {
                     "type": "string",
-                    "enum": [
-                      "llm-judge",
-                      "llm_judge"
-                    ]
+                    "enum": ["llm-judge", "llm_judge"]
                   },
                   "prompt": {
                     "anyOf": [
@@ -18835,10 +16856,7 @@
                                 "minLength": 1
                               }
                             },
-                            "required": [
-                              "score_range",
-                              "outcome"
-                            ],
+                            "required": ["score_range", "outcome"],
                             "additionalProperties": false
                           }
                         }
@@ -18867,9 +16885,7 @@
                     "maximum": 2
                   }
                 },
-                "required": [
-                  "type"
-                ],
+                "required": ["type"],
                 "additionalProperties": false
               },
               {
@@ -18929,9 +16945,7 @@
                             }
                           }
                         },
-                        "required": [
-                          "type"
-                        ],
+                        "required": ["type"],
                         "additionalProperties": false
                       },
                       {
@@ -18947,10 +16961,7 @@
                             "maximum": 1
                           }
                         },
-                        "required": [
-                          "type",
-                          "threshold"
-                        ],
+                        "required": ["type", "threshold"],
                         "additionalProperties": false
                       },
                       {
@@ -18967,10 +16978,7 @@
                             "type": "string"
                           }
                         },
-                        "required": [
-                          "type",
-                          "path"
-                        ],
+                        "required": ["type", "path"],
                         "additionalProperties": false
                       },
                       {
@@ -18987,18 +16995,13 @@
                             "type": "string"
                           }
                         },
-                        "required": [
-                          "type"
-                        ],
+                        "required": ["type"],
                         "additionalProperties": false
                       }
                     ]
                   }
                 },
-                "required": [
-                  "type",
-                  "aggregator"
-                ],
+                "required": ["type", "aggregator"],
                 "additionalProperties": false
               },
               {
@@ -19028,20 +17031,11 @@
                   },
                   "type": {
                     "type": "string",
-                    "enum": [
-                      "tool-trajectory",
-                      "tool_trajectory"
-                    ]
+                    "enum": ["tool-trajectory", "tool_trajectory"]
                   },
                   "mode": {
                     "type": "string",
-                    "enum": [
-                      "any_order",
-                      "in_order",
-                      "exact",
-                      "subset",
-                      "superset"
-                    ]
+                    "enum": ["any_order", "in_order", "exact", "subset", "superset"]
                   },
                   "minimums": {
                     "type": "object",
@@ -19082,12 +17076,7 @@
                           "anyOf": [
                             {
                               "type": "string",
-                              "enum": [
-                                "exact",
-                                "ignore",
-                                "subset",
-                                "superset"
-                              ]
+                              "enum": ["exact", "ignore", "subset", "superset"]
                             },
                             {
                               "type": "array",
@@ -19101,12 +17090,7 @@
                           "anyOf": [
                             {
                               "type": "string",
-                              "enum": [
-                                "exact",
-                                "ignore",
-                                "subset",
-                                "superset"
-                              ]
+                              "enum": ["exact", "ignore", "subset", "superset"]
                             },
                             {
                               "type": "array",
@@ -19117,9 +17101,7 @@
                           ]
                         }
                       },
-                      "required": [
-                        "tool"
-                      ],
+                      "required": ["tool"],
                       "additionalProperties": false
                     }
                   },
@@ -19127,12 +17109,7 @@
                     "anyOf": [
                       {
                         "type": "string",
-                        "enum": [
-                          "exact",
-                          "ignore",
-                          "subset",
-                          "superset"
-                        ]
+                        "enum": ["exact", "ignore", "subset", "superset"]
                       },
                       {
                         "type": "array",
@@ -19146,12 +17123,7 @@
                     "anyOf": [
                       {
                         "type": "string",
-                        "enum": [
-                          "exact",
-                          "ignore",
-                          "subset",
-                          "superset"
-                        ]
+                        "enum": ["exact", "ignore", "subset", "superset"]
                       },
                       {
                         "type": "array",
@@ -19162,10 +17134,7 @@
                     ]
                   }
                 },
-                "required": [
-                  "type",
-                  "mode"
-                ],
+                "required": ["type", "mode"],
                 "additionalProperties": false
               },
               {
@@ -19195,10 +17164,7 @@
                   },
                   "type": {
                     "type": "string",
-                    "enum": [
-                      "field-accuracy",
-                      "field_accuracy"
-                    ]
+                    "enum": ["field-accuracy", "field_accuracy"]
                   },
                   "fields": {
                     "type": "array",
@@ -19210,11 +17176,7 @@
                         },
                         "match": {
                           "type": "string",
-                          "enum": [
-                            "exact",
-                            "numeric_tolerance",
-                            "date"
-                          ]
+                          "enum": ["exact", "numeric_tolerance", "date"]
                         },
                         "required": {
                           "type": "boolean"
@@ -19236,26 +17198,17 @@
                           }
                         }
                       },
-                      "required": [
-                        "path",
-                        "match"
-                      ],
+                      "required": ["path", "match"],
                       "additionalProperties": false
                     },
                     "minItems": 1
                   },
                   "aggregation": {
                     "type": "string",
-                    "enum": [
-                      "weighted_average",
-                      "all_or_nothing"
-                    ]
+                    "enum": ["weighted_average", "all_or_nothing"]
                   }
                 },
-                "required": [
-                  "type",
-                  "fields"
-                ],
+                "required": ["type", "fields"],
                 "additionalProperties": false
               },
               {
@@ -19292,10 +17245,7 @@
                     "minimum": 0
                   }
                 },
-                "required": [
-                  "type",
-                  "threshold"
-                ],
+                "required": ["type", "threshold"],
                 "additionalProperties": false
               },
               {
@@ -19332,10 +17282,7 @@
                     "minimum": 0
                   }
                 },
-                "required": [
-                  "type",
-                  "budget"
-                ],
+                "required": ["type", "budget"],
                 "additionalProperties": false
               },
               {
@@ -19365,10 +17312,7 @@
                   },
                   "type": {
                     "type": "string",
-                    "enum": [
-                      "token-usage",
-                      "token_usage"
-                    ]
+                    "enum": ["token-usage", "token_usage"]
                   },
                   "max_total": {
                     "type": "number",
@@ -19383,9 +17327,7 @@
                     "minimum": 0
                   }
                 },
-                "required": [
-                  "type"
-                ],
+                "required": ["type"],
                 "additionalProperties": false
               },
               {
@@ -19415,10 +17357,7 @@
                   },
                   "type": {
                     "type": "string",
-                    "enum": [
-                      "execution-metrics",
-                      "execution_metrics"
-                    ]
+                    "enum": ["execution-metrics", "execution_metrics"]
                   },
                   "max_tool_calls": {
                     "type": "number",
@@ -19450,9 +17389,7 @@
                     "minimum": 0
                   }
                 },
-                "required": [
-                  "type"
-                ],
+                "required": ["type"],
                 "additionalProperties": false
               },
               {
@@ -19488,10 +17425,7 @@
                     "type": "string"
                   }
                 },
-                "required": [
-                  "type",
-                  "value"
-                ],
+                "required": ["type", "value"],
                 "additionalProperties": false
               },
               {
@@ -19527,10 +17461,7 @@
                     "type": "string"
                   }
                 },
-                "required": [
-                  "type",
-                  "value"
-                ],
+                "required": ["type", "value"],
                 "additionalProperties": false
               },
               {
@@ -19560,15 +17491,10 @@
                   },
                   "type": {
                     "type": "string",
-                    "enum": [
-                      "is-json",
-                      "is_json"
-                    ]
+                    "enum": ["is-json", "is_json"]
                   }
                 },
-                "required": [
-                  "type"
-                ],
+                "required": ["type"],
                 "additionalProperties": false
               },
               {
@@ -19604,10 +17530,7 @@
                     "type": "string"
                   }
                 },
-                "required": [
-                  "type",
-                  "value"
-                ],
+                "required": ["type", "value"],
                 "additionalProperties": false
               },
               {
@@ -19688,10 +17611,7 @@
                                 "minLength": 1
                               }
                             },
-                            "required": [
-                              "score_range",
-                              "outcome"
-                            ],
+                            "required": ["score_range", "outcome"],
                             "additionalProperties": false
                           }
                         }
@@ -19701,10 +17621,7 @@
                     "minItems": 1
                   }
                 },
-                "required": [
-                  "type",
-                  "criteria"
-                ],
+                "required": ["type", "criteria"],
                 "additionalProperties": false
               }
             ]
@@ -19720,10 +17637,7 @@
                 },
                 "isolation": {
                   "type": "string",
-                  "enum": [
-                    "shared",
-                    "per_test"
-                  ]
+                  "enum": ["shared", "per_test"]
                 },
                 "repos": {
                   "type": "array",
@@ -19747,10 +17661,7 @@
                                 "format": "uri"
                               }
                             },
-                            "required": [
-                              "type",
-                              "url"
-                            ],
+                            "required": ["type", "url"],
                             "additionalProperties": false
                           },
                           {
@@ -19764,10 +17675,7 @@
                                 "type": "string"
                               }
                             },
-                            "required": [
-                              "type",
-                              "path"
-                            ],
+                            "required": ["type", "path"],
                             "additionalProperties": false
                           }
                         ]
@@ -19780,10 +17688,7 @@
                           },
                           "resolve": {
                             "type": "string",
-                            "enum": [
-                              "remote",
-                              "local"
-                            ]
+                            "enum": ["remote", "local"]
                           },
                           "ancestor": {
                             "type": "integer",
@@ -19812,10 +17717,7 @@
                         "additionalProperties": false
                       }
                     },
-                    "required": [
-                      "path",
-                      "source"
-                    ],
+                    "required": ["path", "source"],
                     "additionalProperties": false
                   }
                 },
@@ -19851,11 +17753,7 @@
                         },
                         "reset": {
                           "type": "string",
-                          "enum": [
-                            "none",
-                            "fast",
-                            "strict"
-                          ]
+                          "enum": ["none", "fast", "strict"]
                         }
                       },
                       "additionalProperties": false
@@ -19886,11 +17784,7 @@
                         },
                         "reset": {
                           "type": "string",
-                          "enum": [
-                            "none",
-                            "fast",
-                            "strict"
-                          ]
+                          "enum": ["none", "fast", "strict"]
                         }
                       },
                       "additionalProperties": false
@@ -19921,11 +17815,7 @@
                         },
                         "reset": {
                           "type": "string",
-                          "enum": [
-                            "none",
-                            "fast",
-                            "strict"
-                          ]
+                          "enum": ["none", "fast", "strict"]
                         }
                       },
                       "additionalProperties": false
@@ -19956,11 +17846,7 @@
                         },
                         "reset": {
                           "type": "string",
-                          "enum": [
-                            "none",
-                            "fast",
-                            "strict"
-                          ]
+                          "enum": ["none", "fast", "strict"]
                         }
                       },
                       "additionalProperties": false
@@ -19970,11 +17856,7 @@
                 },
                 "mode": {
                   "type": "string",
-                  "enum": [
-                    "pooled",
-                    "temp",
-                    "static"
-                  ]
+                  "enum": ["pooled", "temp", "static"]
                 },
                 "path": {
                   "type": "string"
@@ -19988,9 +17870,7 @@
           ]
         }
       },
-      "required": [
-        "tests"
-      ],
+      "required": ["tests"],
       "additionalProperties": false
     }
   }

From 3c9ed0019d69762a8ef79a7c5d61144f8ed5bec2 Mon Sep 17 00:00:00 2001
From: Christopher Tso <christso@gmail.com>
Date: Sun, 15 Mar 2026 19:18:08 +0000
Subject: [PATCH 13/13] chore: remove accidentally committed node_modules
 symlinks

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 apps/cli/node_modules      | 1 -
 node_modules               | 1 -
 packages/core/node_modules | 1 -
 packages/eval/node_modules | 1 -
 4 files changed, 4 deletions(-)
 delete mode 120000 apps/cli/node_modules
 delete mode 120000 node_modules
 delete mode 120000 packages/core/node_modules
 delete mode 120000 packages/eval/node_modules

diff --git a/apps/cli/node_modules b/apps/cli/node_modules
deleted file mode 120000
index c99229581..000000000
--- a/apps/cli/node_modules
+++ /dev/null
@@ -1 +0,0 @@
-/home/christso/projects/agentv/apps/cli/node_modules
\ No newline at end of file
diff --git a/node_modules b/node_modules
deleted file mode 120000
index 8cba0ae08..000000000
--- a/node_modules
+++ /dev/null
@@ -1 +0,0 @@
-/home/christso/projects/agentv/node_modules
\ No newline at end of file
diff --git a/packages/core/node_modules b/packages/core/node_modules
deleted file mode 120000
index a07840188..000000000
--- a/packages/core/node_modules
+++ /dev/null
@@ -1 +0,0 @@
-/home/christso/projects/agentv/packages/core/node_modules
\ No newline at end of file
diff --git a/packages/eval/node_modules b/packages/eval/node_modules
deleted file mode 120000
index d1cf07368..000000000
--- a/packages/eval/node_modules
+++ /dev/null
@@ -1 +0,0 @@
-/home/christso/projects/agentv/packages/eval/node_modules
\ No newline at end of file