From 34fcaea9616343a5ea81159a0221616b01c3b241 Mon Sep 17 00:00:00 2001 From: Christopher Tso Date: Sat, 14 Mar 2026 12:23:55 +0000 Subject: [PATCH 1/8] feat(providers,evaluators): add claude-cli provider and trigger-judge evaluator (#593) - Add ClaudeCliProvider that spawns `claude -p` as a subprocess, streams output via --output-format stream-json --include-partial-messages, and extracts tool calls, token usage, and cost from stream events - Rename existing SDK provider class to ClaudeSdkProvider (claude-sdk.ts) with kind 'claude-sdk' for explicit opt-in to the Agent SDK path - Register 'claude' and 'claude-cli' as aliases for ClaudeCliProvider; 'claude-sdk' maps to ClaudeSdkProvider - Add 'claude-cli' and 'claude-sdk' to ProviderKind, AGENT_PROVIDER_KINDS, KNOWN_PROVIDERS, and ResolvedTarget union - Add TriggerJudgeEvaluator that checks whether the agent invoked a named skill by scanning tool calls for Skill invocations (args.skill match) or skill file reads (.claude/commands/, .claude/skills/) - Register trigger-judge in evaluator parser, schema, builtin registry, and EvaluatorConfig union - Regenerate eval-schema.json to include trigger-judge schema - Add unit tests for trigger-judge evaluator and claude provider aliases --- .../core/src/evaluation/evaluators/index.ts | 3 + .../evaluation/evaluators/trigger-judge.ts | 147 +++++ .../evaluation/loaders/evaluator-parser.ts | 23 + .../src/evaluation/providers/claude-cli.ts | 586 +++++++++++++++++ .../src/evaluation/providers/claude-sdk.ts | 495 +++++++++++++++ .../core/src/evaluation/providers/index.ts | 8 +- .../core/src/evaluation/providers/targets.ts | 27 +- .../core/src/evaluation/providers/types.ts | 7 +- .../evaluation/registry/builtin-evaluators.ts | 8 + packages/core/src/evaluation/types.ts | 18 + .../evaluation/validation/eval-file.schema.ts | 6 + .../validation/targets-validator.ts | 1 + .../providers/claude-provider-aliases.test.ts | 63 ++ .../trigger-judge-evaluator.test.ts | 253 ++++++++ .../references/eval-schema.json | 592 +++++++++++++++++- 15 files changed, 2226 insertions(+), 11 deletions(-) create mode 100644 packages/core/src/evaluation/evaluators/trigger-judge.ts create mode 100644 packages/core/src/evaluation/providers/claude-cli.ts create mode 100644 packages/core/src/evaluation/providers/claude-sdk.ts create mode 100644 packages/core/test/evaluation/providers/claude-provider-aliases.test.ts create mode 100644 packages/core/test/evaluation/trigger-judge-evaluator.test.ts diff --git a/packages/core/src/evaluation/evaluators/index.ts b/packages/core/src/evaluation/evaluators/index.ts index 2a7ea58aa..59355a7e6 100644 --- a/packages/core/src/evaluation/evaluators/index.ts +++ b/packages/core/src/evaluation/evaluators/index.ts @@ -64,6 +64,9 @@ export type { TokenUsageEvaluatorOptions } from './token-usage.js'; export { ToolTrajectoryEvaluator } from './tool-trajectory.js'; export type { ToolTrajectoryEvaluatorOptions } from './tool-trajectory.js'; +export { TriggerJudgeEvaluator } from './trigger-judge.js'; +export type { TriggerJudgeEvaluatorConfig } from '../types.js'; + // Deterministic assertions export { runContainsAssertion, diff --git a/packages/core/src/evaluation/evaluators/trigger-judge.ts b/packages/core/src/evaluation/evaluators/trigger-judge.ts new file mode 100644 index 000000000..475e21ad0 --- /dev/null +++ b/packages/core/src/evaluation/evaluators/trigger-judge.ts @@ -0,0 +1,147 @@ +import type { ToolCall } from '../providers/types.js'; +import type { TriggerJudgeEvaluatorConfig } from '../types.js'; +import type { EvaluationContext, EvaluationScore, Evaluator } from './types.js'; + +export type { TriggerJudgeEvaluatorConfig }; + +/** + * TriggerJudgeEvaluator checks whether the agent invoked a named skill during + * its execution. It scans the response tool calls for: + * + * 1. A `Skill` tool call where args.skill contains the skill name + * 2. A `Read` tool call where the file_path contains the skill name and a + * skill-related directory (.claude/commands/ or .claude/skills/) + * + * This enables post-hoc verification that the agent used the correct skill + * rather than re-implementing the logic inline. + */ +export class TriggerJudgeEvaluator implements Evaluator { + readonly kind = 'trigger-judge'; + + private readonly config: TriggerJudgeEvaluatorConfig; + + constructor(config: TriggerJudgeEvaluatorConfig) { + this.config = config; + } + + evaluate(context: EvaluationContext): EvaluationScore { + const skillName = this.config.skill; + const allToolCalls = collectAllToolCalls(context.output); + + if (allToolCalls.length === 0) { + return { + score: 0, + verdict: 'fail', + hits: [], + misses: [`No tool calls found — skill '${skillName}' was not triggered`], + expectedAspectCount: 1, + reasoning: `No tool calls were made, so skill '${skillName}' was not invoked.`, + }; + } + + // Check for Skill tool call with matching skill name + const skillToolCall = findSkillToolCall(allToolCalls, skillName); + if (skillToolCall) { + const argsStr = JSON.stringify(skillToolCall.input ?? {}); + return { + score: 1, + verdict: 'pass', + hits: [`Skill tool called with skill='${skillName}' (args: ${argsStr})`], + misses: [], + expectedAspectCount: 1, + reasoning: `The agent invoked the '${skillName}' skill via the Skill tool.`, + }; + } + + // Check for Read tool call loading a skill file + const readToolCall = findSkillReadToolCall(allToolCalls, skillName); + if (readToolCall) { + const filePath = + typeof (readToolCall.input as Record | undefined)?.file_path === 'string' + ? (readToolCall.input as Record).file_path + : String(readToolCall.input ?? ''); + return { + score: 1, + verdict: 'pass', + hits: [`Skill file read: ${filePath}`], + misses: [], + expectedAspectCount: 1, + reasoning: `The agent read the skill file for '${skillName}' at '${filePath}'.`, + }; + } + + return { + score: 0, + verdict: 'fail', + hits: [], + misses: [`Skill '${skillName}' was not triggered (${allToolCalls.length} tool calls made)`], + expectedAspectCount: 1, + reasoning: `The agent made ${allToolCalls.length} tool call(s) but did not invoke skill '${skillName}'.`, + }; + } +} + +/** + * Collect all tool calls from all output messages. + */ +function collectAllToolCalls( + output: readonly import('../providers/types.js').Message[] | undefined, +): readonly ToolCall[] { + if (!output || output.length === 0) { + return []; + } + const result: ToolCall[] = []; + for (const message of output) { + if (message.toolCalls && message.toolCalls.length > 0) { + result.push(...message.toolCalls); + } + } + return result; +} + +/** + * Find a Skill tool call where args.skill matches (exact or contains) the skill name. + */ +function findSkillToolCall( + toolCalls: readonly ToolCall[], + skillName: string, +): ToolCall | undefined { + const lowerSkill = skillName.toLowerCase(); + for (const tc of toolCalls) { + if (tc.tool !== 'Skill') continue; + const args = tc.input as Record | undefined; + if (!args) continue; + const argSkill = args.skill ?? args.name ?? args.args; + if (typeof argSkill === 'string') { + const lowerArgSkill = argSkill.toLowerCase(); + if (lowerArgSkill === lowerSkill || lowerArgSkill.includes(lowerSkill)) { + return tc; + } + } + } + return undefined; +} + +/** + * Find a Read tool call where the file_path contains the skill name and a + * known skill directory (.claude/commands/ or .claude/skills/). + */ +function findSkillReadToolCall( + toolCalls: readonly ToolCall[], + skillName: string, +): ToolCall | undefined { + const lowerSkill = skillName.toLowerCase(); + const skillDirs = ['.claude/commands/', '.claude/skills/']; + for (const tc of toolCalls) { + if (tc.tool !== 'Read') continue; + const args = tc.input as Record | undefined; + if (!args) continue; + const filePath = typeof args.file_path === 'string' ? args.file_path.toLowerCase() : ''; + if (!filePath) continue; + const inSkillDir = skillDirs.some((dir) => filePath.includes(dir)); + if (inSkillDir && filePath.includes(lowerSkill)) { + return tc; + } + } + return undefined; +} diff --git a/packages/core/src/evaluation/loaders/evaluator-parser.ts b/packages/core/src/evaluation/loaders/evaluator-parser.ts index 4e540f772..db6393dbb 100644 --- a/packages/core/src/evaluation/loaders/evaluator-parser.ts +++ b/packages/core/src/evaluation/loaders/evaluator-parser.ts @@ -596,6 +596,29 @@ async function parseEvaluatorList( continue; } + if (typeValue === 'trigger-judge') { + const skill = asString(rawEvaluator.skill); + if (!skill) { + logWarning( + `Skipping trigger-judge evaluator '${name}' in '${evalId}': missing required 'skill' field`, + ); + continue; + } + + const weight = validateWeight(rawEvaluator.weight, name, evalId); + const required = parseRequired(rawEvaluator.required); + + evaluators.push({ + name, + type: 'trigger-judge', + skill, + ...(weight !== undefined ? { weight } : {}), + ...(required !== undefined ? { required } : {}), + ...(negate !== undefined ? { negate } : {}), + } as import('../types.js').TriggerJudgeEvaluatorConfig); + continue; + } + if (typeValue === 'field-accuracy') { const rawFields = rawEvaluator.fields; if (!Array.isArray(rawFields)) { diff --git a/packages/core/src/evaluation/providers/claude-cli.ts b/packages/core/src/evaluation/providers/claude-cli.ts new file mode 100644 index 000000000..67cf96295 --- /dev/null +++ b/packages/core/src/evaluation/providers/claude-cli.ts @@ -0,0 +1,586 @@ +import { spawn } from 'node:child_process'; +import { randomUUID } from 'node:crypto'; +import { createWriteStream } from 'node:fs'; +import type { WriteStream } from 'node:fs'; +import { mkdir } from 'node:fs/promises'; +import path from 'node:path'; + +import { recordClaudeLogEntry } from './claude-log-tracker.js'; +import { buildPromptDocument, normalizeInputFiles } from './preread.js'; +import type { ClaudeResolvedConfig } from './targets.js'; +import type { + Message, + Provider, + ProviderRequest, + ProviderResponse, + ProviderTokenUsage, + ToolCall, +} from './types.js'; + +/** + * Claude CLI provider that spawns `claude -p` as a subprocess. + * Uses --output-format stream-json --include-partial-messages for structured output. + * This is the default `claude` provider. Use `claude-sdk` for SDK-based invocation. + */ +export class ClaudeCliProvider implements Provider { + readonly id: string; + readonly kind = 'claude-cli' as const; + readonly targetName: string; + readonly supportsBatch = false; + + private readonly config: ClaudeResolvedConfig; + + constructor(targetName: string, config: ClaudeResolvedConfig) { + this.id = `claude-cli:${targetName}`; + this.targetName = targetName; + this.config = config; + } + + async invoke(request: ProviderRequest): Promise { + if (request.signal?.aborted) { + throw new Error('Claude CLI request was aborted before execution'); + } + + const startTime = new Date().toISOString(); + const startMs = Date.now(); + + const logger = await this.createStreamLogger(request).catch(() => undefined); + + // Build the prompt + const inputFiles = normalizeInputFiles(request.inputFiles); + const prompt = buildPromptDocument(request, inputFiles); + + const args = this.buildArgs(); + const cwd = this.resolveCwd(request.cwd); + const env = sanitizeEnvForClaude(request.braintrustSpanIds); + + // Track state from stream events + const completedToolCalls: ToolCall[] = []; + const output: Message[] = []; + let tokenUsage: ProviderTokenUsage | undefined; + let costUsd: number | undefined; + let durationMs: number | undefined; + + try { + const result = await this.runClaude({ + args, + cwd, + prompt, + env, + signal: request.signal, + onLine: (line) => { + logger?.handleLine(line); + const event = tryParseJson(line); + if (!event) return; + + if (event.type === 'assistant') { + const betaMessage = event.message; + if (betaMessage && typeof betaMessage === 'object') { + const msg = betaMessage as Record; + const content = msg.content; + const textContent = extractTextContent(content); + const toolCalls = extractToolCalls(content); + + const outputMsg: Message = { + role: 'assistant', + content: textContent, + toolCalls: toolCalls.length > 0 ? toolCalls : undefined, + }; + output.push(outputMsg); + completedToolCalls.push(...toolCalls); + + // Stream callbacks for real-time observability + if (request.streamCallbacks) { + for (const tc of toolCalls) { + request.streamCallbacks.onToolCallEnd?.( + tc.tool, + tc.input, + tc.output, + tc.durationMs ?? 0, + tc.id, + ); + } + } + } + } + + if (event.type === 'result') { + const resultEvent = event as Record; + if (typeof resultEvent.total_cost_usd === 'number') { + costUsd = resultEvent.total_cost_usd; + } + if (typeof resultEvent.duration_ms === 'number') { + durationMs = resultEvent.duration_ms; + } + const usage = resultEvent.usage as Record | undefined; + if (usage) { + const inputTokens = + ((usage.input_tokens as number) ?? 0) + + ((usage.cache_read_input_tokens as number) ?? 0) + + ((usage.cache_creation_input_tokens as number) ?? 0); + const outputTokens = (usage.output_tokens as number) ?? 0; + tokenUsage = { + input: inputTokens, + output: outputTokens, + cached: (usage.cache_read_input_tokens as number) ?? undefined, + }; + + // Stream callback for LLM usage + request.streamCallbacks?.onLlmCallEnd?.(this.config.model ?? 'claude', tokenUsage); + } + } + }, + }); + + if (result.timedOut) { + throw new Error( + `Claude CLI timed out${formatTimeoutSuffix(this.config.timeoutMs ?? undefined)}`, + ); + } + + if (result.exitCode !== 0) { + const detail = result.stderr.trim() || result.stdout.trim(); + const prefix = `Claude CLI exited with code ${result.exitCode}`; + throw new Error(detail ? `${prefix}: ${detail}` : prefix); + } + + const endTime = new Date().toISOString(); + const totalDurationMs = durationMs ?? Date.now() - startMs; + + return { + raw: { + model: this.config.model, + logFile: logger?.filePath, + args, + exitCode: result.exitCode, + }, + output, + tokenUsage, + costUsd, + durationMs: totalDurationMs, + startTime, + endTime, + }; + } finally { + await logger?.close(); + } + } + + private buildArgs(): string[] { + const args = ['-p', '--output-format', 'stream-json', '--include-partial-messages']; + + if (this.config.model) { + args.push('--model', this.config.model); + } + + if (this.config.maxTurns !== undefined) { + args.push('--max-turns', String(this.config.maxTurns)); + } + + return args; + } + + private resolveCwd(cwdOverride?: string): string | undefined { + if (cwdOverride) { + return path.resolve(cwdOverride); + } + if (this.config.cwd) { + return path.resolve(this.config.cwd); + } + return undefined; + } + + private resolveLogDirectory(): string | undefined { + const disabled = isClaudeCliLogStreamingDisabled(); + if (disabled) { + return undefined; + } + if (this.config.logDir) { + return path.resolve(this.config.logDir); + } + return path.join(process.cwd(), '.agentv', 'logs', 'claude-cli'); + } + + private async createStreamLogger( + request: ProviderRequest, + ): Promise { + const logDir = this.resolveLogDirectory(); + if (!logDir) { + return undefined; + } + try { + await mkdir(logDir, { recursive: true }); + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + console.warn(`Skipping Claude CLI stream logging (could not create ${logDir}): ${message}`); + return undefined; + } + + const filePath = path.join(logDir, buildLogFilename(request, this.targetName)); + + try { + const logger = await ClaudeCliStreamLogger.create({ + filePath, + targetName: this.targetName, + evalCaseId: request.evalCaseId, + attempt: request.attempt, + format: this.config.logFormat ?? 'summary', + }); + recordClaudeLogEntry({ + filePath, + targetName: this.targetName, + evalCaseId: request.evalCaseId, + attempt: request.attempt, + }); + return logger; + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + console.warn(`Skipping Claude CLI stream logging for ${filePath}: ${message}`); + return undefined; + } + } + + private async runClaude(options: { + readonly args: string[]; + readonly cwd: string | undefined; + readonly prompt: string; + readonly env: Record; + readonly signal?: AbortSignal; + readonly onLine: (line: string) => void; + }): Promise<{ stdout: string; stderr: string; exitCode: number; timedOut: boolean }> { + return new Promise((resolve, reject) => { + const spawnOptions: Parameters[2] = { + stdio: ['pipe', 'pipe', 'pipe'], + env: options.env as NodeJS.ProcessEnv, + }; + if (options.cwd) { + spawnOptions.cwd = options.cwd; + } + + const child = spawn('claude', options.args, spawnOptions); + + let stdout = ''; + let stderr = ''; + let timedOut = false; + let stdoutBuffer = ''; + + const onAbort = (): void => { + child.kill('SIGTERM'); + }; + + if (options.signal) { + if (options.signal.aborted) { + onAbort(); + } else { + options.signal.addEventListener('abort', onAbort, { once: true }); + } + } + + let timeoutHandle: NodeJS.Timeout | undefined; + if (this.config.timeoutMs && this.config.timeoutMs > 0) { + timeoutHandle = setTimeout(() => { + timedOut = true; + child.kill('SIGTERM'); + }, this.config.timeoutMs); + timeoutHandle.unref?.(); + } + + child.stdout.setEncoding('utf8'); + child.stdout.on('data', (chunk: string) => { + stdout += chunk; + stdoutBuffer += chunk; + // Process complete lines + const lines = stdoutBuffer.split(/\r?\n/); + stdoutBuffer = lines.pop() ?? ''; + for (const line of lines) { + const trimmed = line.trim(); + if (trimmed.length > 0) { + options.onLine(trimmed); + } + } + }); + + child.stderr.setEncoding('utf8'); + child.stderr.on('data', (chunk: string) => { + stderr += chunk; + }); + + // Send prompt via stdin + child.stdin.end(options.prompt); + + const cleanup = (): void => { + if (timeoutHandle) { + clearTimeout(timeoutHandle); + } + if (options.signal) { + options.signal.removeEventListener('abort', onAbort); + } + }; + + child.on('error', (error) => { + cleanup(); + const err = error as NodeJS.ErrnoException; + if (err.code === 'ENOENT') { + reject( + new Error( + `Claude CLI executable 'claude' was not found on PATH. Install claude-code or ensure it is in PATH.`, + ), + ); + } else { + reject(error); + } + }); + + child.on('close', (code) => { + cleanup(); + // Flush remaining buffer + if (stdoutBuffer.trim().length > 0) { + options.onLine(stdoutBuffer.trim()); + } + resolve({ + stdout, + stderr, + exitCode: typeof code === 'number' ? code : -1, + timedOut, + }); + }); + }); + } +} + +class ClaudeCliStreamLogger { + readonly filePath: string; + private readonly stream: WriteStream; + private readonly startedAt = Date.now(); + private readonly format: 'summary' | 'json'; + + private constructor(filePath: string, format: 'summary' | 'json') { + this.filePath = filePath; + this.format = format; + this.stream = createWriteStream(filePath, { flags: 'a' }); + } + + static async create(options: { + readonly filePath: string; + readonly targetName: string; + readonly evalCaseId?: string; + readonly attempt?: number; + readonly format: 'summary' | 'json'; + }): Promise { + const logger = new ClaudeCliStreamLogger(options.filePath, options.format); + const header = [ + '# Claude CLI stream log', + `# target: ${options.targetName}`, + options.evalCaseId ? `# eval: ${options.evalCaseId}` : undefined, + options.attempt !== undefined ? `# attempt: ${options.attempt + 1}` : undefined, + `# started: ${new Date().toISOString()}`, + '', + ].filter((line): line is string => Boolean(line)); + for (const line of header) { + logger.stream.write(`${line}\n`); + } + return logger; + } + + handleLine(line: string): void { + const elapsed = formatElapsed(this.startedAt); + const event = tryParseJson(line); + + if (this.format === 'json') { + if (event) { + this.stream.write(`${JSON.stringify({ time: elapsed, data: event })}\n`); + } else { + this.stream.write(`${JSON.stringify({ time: elapsed, raw: line })}\n`); + } + } else { + if (event) { + const summary = summarizeEvent(event); + if (summary) { + const type = typeof event.type === 'string' ? event.type : 'unknown'; + this.stream.write(`[+${elapsed}] [${type}] ${summary}\n`); + } + } else { + this.stream.write(`[+${elapsed}] ${line}\n`); + } + } + } + + async close(): Promise { + await new Promise((resolve, reject) => { + this.stream.once('error', reject); + this.stream.end(() => resolve()); + }); + } +} + +function summarizeEvent(event: Record): string | undefined { + const type = event.type as string; + switch (type) { + case 'assistant': { + const message = event.message as Record | undefined; + if (message) { + const content = message.content; + if (Array.isArray(content) && content.length > 0) { + const first = content[0] as Record | undefined; + if (first?.type === 'tool_use') { + return `tool_use (${first.name})`; + } + if (first?.type === 'text') { + const text = first.text; + if (typeof text === 'string') { + const preview = text.length > 50 ? `${text.slice(0, 50)}...` : text; + return preview; + } + } + } + } + return 'message'; + } + case 'user': { + const message = event.message as Record | undefined; + if (message) { + const content = message.content; + if (Array.isArray(content) && content.length > 0) { + const first = content[0] as Record | undefined; + if (first?.type === 'tool_result') { + return `tool_result (${first.tool_use_id})`; + } + } + } + return 'user'; + } + case 'result': { + const cost = event.total_cost_usd; + const duration = event.duration_ms; + if (typeof cost === 'number' && typeof duration === 'number') { + return `$${cost.toFixed(4)}, ${Math.round(duration)}ms`; + } + return 'result'; + } + case 'system': + return 'init'; + default: + return undefined; + } +} + +/** + * Extract text content from Claude's content array format. + */ +function extractTextContent(content: unknown): string | undefined { + if (typeof content === 'string') { + return content; + } + if (!Array.isArray(content)) { + return undefined; + } + const textParts: string[] = []; + for (const part of content) { + if (!part || typeof part !== 'object') { + continue; + } + const p = part as Record; + if (p.type === 'text' && typeof p.text === 'string') { + textParts.push(p.text); + } + } + return textParts.length > 0 ? textParts.join('\n') : undefined; +} + +/** + * Extract tool calls from Claude's content array format. + */ +function extractToolCalls(content: unknown): readonly ToolCall[] { + if (!Array.isArray(content)) { + return []; + } + const toolCalls: ToolCall[] = []; + for (const part of content) { + if (!part || typeof part !== 'object') { + continue; + } + const p = part as Record; + if (p.type === 'tool_use' && typeof p.name === 'string') { + toolCalls.push({ + tool: p.name, + input: p.input, + id: typeof p.id === 'string' ? p.id : undefined, + }); + } + } + return toolCalls; +} + +/** + * Build a sanitized process.env without variables that block nested Claude sessions. + * Removes CLAUDECODE so the spawned CLI doesn't refuse to run inside another session. + */ +function sanitizeEnvForClaude(braintrustSpanIds?: { + readonly parentSpanId: string; + readonly rootSpanId: string; +}): Record { + const env = { ...process.env }; + // Remove all Claude Code session markers to allow nested sessions + env.CLAUDECODE = undefined; + env.CLAUDE_CODE_ENTRYPOINT = undefined; + // Inject Braintrust trace IDs so the trace-claude-code plugin can attach + // Claude Code session traces to the AgentV eval span + if (braintrustSpanIds) { + env.CC_PARENT_SPAN_ID = braintrustSpanIds.parentSpanId; + env.CC_ROOT_SPAN_ID = braintrustSpanIds.rootSpanId; + } + return env; +} + +function isClaudeCliLogStreamingDisabled(): boolean { + const envValue = process.env.AGENTV_CLAUDE_STREAM_LOGS; + if (!envValue) { + return false; + } + const normalized = envValue.trim().toLowerCase(); + return normalized === 'false' || normalized === '0' || normalized === 'off'; +} + +function buildLogFilename(request: ProviderRequest, targetName: string): string { + const timestamp = new Date().toISOString().replace(/[:.]/g, '-'); + const evalId = sanitizeForFilename(request.evalCaseId ?? 'claude-cli'); + const attemptSuffix = request.attempt !== undefined ? `_attempt-${request.attempt + 1}` : ''; + const target = sanitizeForFilename(targetName); + return `${timestamp}_${target}_${evalId}${attemptSuffix}_${randomUUID().slice(0, 8)}.log`; +} + +function sanitizeForFilename(value: string): string { + const sanitized = value.replace(/[^A-Za-z0-9._-]+/g, '_'); + return sanitized.length > 0 ? sanitized : 'claude-cli'; +} + +function formatElapsed(startedAt: number): string { + const elapsedSeconds = Math.floor((Date.now() - startedAt) / 1000); + const hours = Math.floor(elapsedSeconds / 3600); + const minutes = Math.floor((elapsedSeconds % 3600) / 60); + const seconds = elapsedSeconds % 60; + if (hours > 0) { + return `${hours.toString().padStart(2, '0')}:${minutes.toString().padStart(2, '0')}:${seconds.toString().padStart(2, '0')}`; + } + return `${minutes.toString().padStart(2, '0')}:${seconds.toString().padStart(2, '0')}`; +} + +function formatTimeoutSuffix(timeoutMs: number | undefined): string { + if (!timeoutMs || timeoutMs <= 0) { + return ''; + } + const seconds = Math.ceil(timeoutMs / 1000); + return ` after ${seconds}s`; +} + +function tryParseJson(line: string): Record | undefined { + try { + const parsed = JSON.parse(line); + if (parsed && typeof parsed === 'object' && !Array.isArray(parsed)) { + return parsed as Record; + } + return undefined; + } catch { + return undefined; + } +} diff --git a/packages/core/src/evaluation/providers/claude-sdk.ts b/packages/core/src/evaluation/providers/claude-sdk.ts new file mode 100644 index 000000000..d4a768b4e --- /dev/null +++ b/packages/core/src/evaluation/providers/claude-sdk.ts @@ -0,0 +1,495 @@ +import { randomUUID } from 'node:crypto'; +import { createWriteStream } from 'node:fs'; +import type { WriteStream } from 'node:fs'; +import { mkdir } from 'node:fs/promises'; +import path from 'node:path'; + +import { recordClaudeLogEntry } from './claude-log-tracker.js'; +import { buildPromptDocument, normalizeInputFiles } from './preread.js'; +import type { ClaudeResolvedConfig } from './targets.js'; +import type { + Message, + Provider, + ProviderRequest, + ProviderResponse, + ProviderTokenUsage, + ToolCall, +} from './types.js'; + +// Lazy-loaded module to avoid bundling issues with dynamic requires +// biome-ignore lint/suspicious/noExplicitAny: dynamic import type +let claudeSdkModule: any = null; + +async function loadClaudeSdk(): Promise { + if (!claudeSdkModule) { + try { + claudeSdkModule = await import('@anthropic-ai/claude-agent-sdk'); + } catch (error) { + throw new Error( + `Failed to load @anthropic-ai/claude-agent-sdk. Please install it:\n npm install @anthropic-ai/claude-agent-sdk\n\nOriginal error: ${error instanceof Error ? error.message : String(error)}`, + ); + } + } + return claudeSdkModule; +} + +/** + * Claude Agent SDK provider using the @anthropic-ai/claude-agent-sdk library directly. + * This provides typed SDK access for structured tool calls, token usage, and clean + * session lifecycle. Use `claude-cli` for subprocess-based invocation. + * + * Note: The SDK is loaded lazily on first use to avoid bundling issues. + * Users must install @anthropic-ai/claude-agent-sdk separately. + */ +export class ClaudeSdkProvider implements Provider { + readonly id: string; + readonly kind = 'claude-sdk' as const; + readonly targetName: string; + readonly supportsBatch = false; + + private readonly config: ClaudeResolvedConfig; + + constructor(targetName: string, config: ClaudeResolvedConfig) { + this.id = `claude-sdk:${targetName}`; + this.targetName = targetName; + this.config = config; + } + + async invoke(request: ProviderRequest): Promise { + if (request.signal?.aborted) { + throw new Error('Claude SDK request was aborted before execution'); + } + + const sdk = await loadClaudeSdk(); + + const startTime = new Date().toISOString(); + const startMs = Date.now(); + + const logger = await this.createStreamLogger(request).catch(() => undefined); + + // Build the prompt + const inputFiles = normalizeInputFiles(request.inputFiles); + const prompt = buildPromptDocument(request, inputFiles); + + // Skip forced diff prompt when AgentV captures file changes + const systemPrompt = this.config.systemPrompt; + + // Build query options + // biome-ignore lint/suspicious/noExplicitAny: SDK options type is dynamically loaded + const queryOptions: any = { + permissionMode: 'bypassPermissions', + allowDangerouslySkipPermissions: true, + // The SDK spawns a Claude Code subprocess. When AgentV itself runs inside + // a Claude Code session the CLAUDECODE env var is set, which causes the + // subprocess to refuse to start ("cannot be launched inside another Claude + // Code session"). Passing a sanitized env removes that guard. + env: sanitizeEnvForClaudeSdk(request.braintrustSpanIds), + }; + + if (this.config.model) { + queryOptions.model = this.config.model; + } + + const cwd = this.resolveCwd(request.cwd); + if (cwd) { + queryOptions.cwd = cwd; + } + + if (systemPrompt) { + queryOptions.systemPrompt = systemPrompt; + } + + if (this.config.maxTurns !== undefined) { + queryOptions.maxTurns = this.config.maxTurns; + } + + if (this.config.maxBudgetUsd !== undefined) { + queryOptions.maxBudgetUsd = this.config.maxBudgetUsd; + } + + if (request.signal) { + queryOptions.abortController = { signal: request.signal } as AbortController; + } + + // Track state from messages + const completedToolCalls: ToolCall[] = []; + const output: Message[] = []; + let tokenUsage: ProviderTokenUsage | undefined; + let costUsd: number | undefined; + let durationMs: number | undefined; + + try { + const q = sdk.query({ prompt, options: queryOptions }); + + // Set up timeout if configured + let timeoutTimer: ReturnType | undefined; + if (this.config.timeoutMs) { + timeoutTimer = setTimeout(() => { + q.return(undefined as never).catch(() => {}); + }, this.config.timeoutMs); + timeoutTimer.unref?.(); + } + + try { + for await (const message of q) { + logger?.handleMessage(message); + + if (message.type === 'assistant') { + const betaMessage = (message as { message?: unknown }).message; + if (betaMessage && typeof betaMessage === 'object') { + const msg = betaMessage as Record; + const content = msg.content; + const textContent = extractTextContent(content); + const toolCalls = extractToolCalls(content); + + const outputMsg: Message = { + role: 'assistant', + content: textContent, + toolCalls: toolCalls.length > 0 ? toolCalls : undefined, + }; + output.push(outputMsg); + completedToolCalls.push(...toolCalls); + + // Stream callbacks for real-time observability + if (request.streamCallbacks) { + for (const tc of toolCalls) { + request.streamCallbacks.onToolCallEnd?.( + tc.tool, + tc.input, + tc.output, + tc.durationMs ?? 0, + tc.id, + ); + } + } + } + } + + if (message.type === 'result') { + const result = message as Record; + if (typeof result.total_cost_usd === 'number') { + costUsd = result.total_cost_usd; + } + if (typeof result.duration_ms === 'number') { + durationMs = result.duration_ms; + } + const usage = result.usage as Record | undefined; + if (usage) { + const inputTokens = + ((usage.input_tokens as number) ?? 0) + + ((usage.cache_read_input_tokens as number) ?? 0) + + ((usage.cache_creation_input_tokens as number) ?? 0); + const outputTokens = (usage.output_tokens as number) ?? 0; + tokenUsage = { + input: inputTokens, + output: outputTokens, + cached: (usage.cache_read_input_tokens as number) ?? undefined, + }; + + // Stream callback for LLM usage + request.streamCallbacks?.onLlmCallEnd?.(this.config.model ?? 'claude', tokenUsage); + } + } + } + } finally { + if (timeoutTimer) { + clearTimeout(timeoutTimer); + } + } + + const endTime = new Date().toISOString(); + const totalDurationMs = durationMs ?? Date.now() - startMs; + + return { + raw: { + model: this.config.model, + logFile: logger?.filePath, + }, + output, + tokenUsage, + costUsd, + durationMs: totalDurationMs, + startTime, + endTime, + }; + } finally { + await logger?.close(); + } + } + + private resolveCwd(cwdOverride?: string): string | undefined { + if (cwdOverride) { + return path.resolve(cwdOverride); + } + if (this.config.cwd) { + return path.resolve(this.config.cwd); + } + return undefined; + } + + private resolveLogDirectory(): string | undefined { + const disabled = isClaudeLogStreamingDisabled(); + if (disabled) { + return undefined; + } + if (this.config.logDir) { + return path.resolve(this.config.logDir); + } + return path.join(process.cwd(), '.agentv', 'logs', 'claude'); + } + + private async createStreamLogger( + request: ProviderRequest, + ): Promise { + const logDir = this.resolveLogDirectory(); + if (!logDir) { + return undefined; + } + try { + await mkdir(logDir, { recursive: true }); + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + console.warn(`Skipping Claude stream logging (could not create ${logDir}): ${message}`); + return undefined; + } + + const filePath = path.join(logDir, buildLogFilename(request, this.targetName)); + + try { + const logger = await ClaudeStreamLogger.create({ + filePath, + targetName: this.targetName, + evalCaseId: request.evalCaseId, + attempt: request.attempt, + format: this.config.logFormat ?? 'summary', + }); + recordClaudeLogEntry({ + filePath, + targetName: this.targetName, + evalCaseId: request.evalCaseId, + attempt: request.attempt, + }); + return logger; + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + console.warn(`Skipping Claude stream logging for ${filePath}: ${message}`); + return undefined; + } + } +} + +/** + * Extract text content from Claude's content array format. + * Claude uses: content: [{ type: "text", text: "..." }, ...] + */ +function extractTextContent(content: unknown): string | undefined { + if (typeof content === 'string') { + return content; + } + if (!Array.isArray(content)) { + return undefined; + } + const textParts: string[] = []; + for (const part of content) { + if (!part || typeof part !== 'object') { + continue; + } + const p = part as Record; + if (p.type === 'text' && typeof p.text === 'string') { + textParts.push(p.text); + } + } + return textParts.length > 0 ? textParts.join('\n') : undefined; +} + +/** + * Extract tool calls from Claude's content array format. + * Claude uses: content: [{ type: "tool_use", name: "...", input: {...}, id: "..." }, ...] + */ +function extractToolCalls(content: unknown): readonly ToolCall[] { + if (!Array.isArray(content)) { + return []; + } + const toolCalls: ToolCall[] = []; + for (const part of content) { + if (!part || typeof part !== 'object') { + continue; + } + const p = part as Record; + if (p.type === 'tool_use' && typeof p.name === 'string') { + toolCalls.push({ + tool: p.name, + input: p.input, + id: typeof p.id === 'string' ? p.id : undefined, + }); + } + } + return toolCalls; +} + +class ClaudeStreamLogger { + readonly filePath: string; + private readonly stream: WriteStream; + private readonly startedAt = Date.now(); + private readonly format: 'summary' | 'json'; + + private constructor(filePath: string, format: 'summary' | 'json') { + this.filePath = filePath; + this.format = format; + this.stream = createWriteStream(filePath, { flags: 'a' }); + } + + static async create(options: { + readonly filePath: string; + readonly targetName: string; + readonly evalCaseId?: string; + readonly attempt?: number; + readonly format: 'summary' | 'json'; + }): Promise { + const logger = new ClaudeStreamLogger(options.filePath, options.format); + const header = [ + '# Claude Agent SDK stream log', + `# target: ${options.targetName}`, + options.evalCaseId ? `# eval: ${options.evalCaseId}` : undefined, + options.attempt !== undefined ? `# attempt: ${options.attempt + 1}` : undefined, + `# started: ${new Date().toISOString()}`, + '', + ].filter((line): line is string => Boolean(line)); + for (const line of header) { + logger.stream.write(`${line}\n`); + } + return logger; + } + + handleMessage(message: unknown): void { + if (!message || typeof message !== 'object') { + return; + } + const elapsed = formatElapsed(this.startedAt); + const msg = message as Record; + const type = typeof msg.type === 'string' ? msg.type : 'unknown'; + + if (this.format === 'json') { + this.stream.write(`${JSON.stringify({ time: elapsed, type, data: message })}\n`); + } else { + const summary = summarizeMessage(msg); + if (summary) { + this.stream.write(`[+${elapsed}] [${type}] ${summary}\n`); + } + } + } + + async close(): Promise { + await new Promise((resolve, reject) => { + this.stream.once('error', reject); + this.stream.end(() => resolve()); + }); + } +} + +function summarizeMessage(msg: Record): string | undefined { + const type = msg.type as string; + switch (type) { + case 'assistant': { + const message = msg.message as Record | undefined; + if (message) { + const content = message.content; + if (Array.isArray(content) && content.length > 0) { + const first = content[0] as Record | undefined; + if (first?.type === 'tool_use') { + return `tool_use (${first.name})`; + } + if (first?.type === 'text') { + const text = first.text; + if (typeof text === 'string') { + const preview = text.length > 50 ? `${text.slice(0, 50)}...` : text; + return preview; + } + } + } + } + return 'message'; + } + case 'user': { + const message = msg.message as Record | undefined; + if (message) { + const content = message.content; + if (Array.isArray(content) && content.length > 0) { + const first = content[0] as Record | undefined; + if (first?.type === 'tool_result') { + return `tool_result (${first.tool_use_id})`; + } + } + } + return 'user'; + } + case 'result': { + const cost = msg.total_cost_usd; + const duration = msg.duration_ms; + if (typeof cost === 'number' && typeof duration === 'number') { + return `$${cost.toFixed(4)}, ${Math.round(duration)}ms`; + } + return 'result'; + } + case 'system': + return 'init'; + default: + return undefined; + } +} + +/** + * Build a process.env copy without variables that block nested Claude sessions. + * The Claude Agent SDK spawns Claude Code as a child process; if CLAUDECODE is + * present the child immediately exits with "cannot be launched inside another + * Claude Code session". + */ +function sanitizeEnvForClaudeSdk(braintrustSpanIds?: { + readonly parentSpanId: string; + readonly rootSpanId: string; +}): Record { + const env = { ...process.env }; + // Remove all Claude Code session markers to allow nested sessions + env.CLAUDECODE = undefined; + env.CLAUDE_CODE_ENTRYPOINT = undefined; + // Inject Braintrust trace IDs so the trace-claude-code plugin can attach + // Claude Code session traces to the AgentV eval span + if (braintrustSpanIds) { + env.CC_PARENT_SPAN_ID = braintrustSpanIds.parentSpanId; + env.CC_ROOT_SPAN_ID = braintrustSpanIds.rootSpanId; + } + return env; +} + +function isClaudeLogStreamingDisabled(): boolean { + const envValue = process.env.AGENTV_CLAUDE_STREAM_LOGS; + if (!envValue) { + return false; + } + const normalized = envValue.trim().toLowerCase(); + return normalized === 'false' || normalized === '0' || normalized === 'off'; +} + +function buildLogFilename(request: ProviderRequest, targetName: string): string { + const timestamp = new Date().toISOString().replace(/[:.]/g, '-'); + const evalId = sanitizeForFilename(request.evalCaseId ?? 'claude'); + const attemptSuffix = request.attempt !== undefined ? `_attempt-${request.attempt + 1}` : ''; + const target = sanitizeForFilename(targetName); + return `${timestamp}_${target}_${evalId}${attemptSuffix}_${randomUUID().slice(0, 8)}.log`; +} + +function sanitizeForFilename(value: string): string { + const sanitized = value.replace(/[^A-Za-z0-9._-]+/g, '_'); + return sanitized.length > 0 ? sanitized : 'claude'; +} + +function formatElapsed(startedAt: number): string { + const elapsedSeconds = Math.floor((Date.now() - startedAt) / 1000); + const hours = Math.floor(elapsedSeconds / 3600); + const minutes = Math.floor((elapsedSeconds % 3600) / 60); + const seconds = elapsedSeconds % 60; + if (hours > 0) { + return `${hours.toString().padStart(2, '0')}:${minutes.toString().padStart(2, '0')}:${seconds.toString().padStart(2, '0')}`; + } + return `${minutes.toString().padStart(2, '0')}:${seconds.toString().padStart(2, '0')}`; +} diff --git a/packages/core/src/evaluation/providers/index.ts b/packages/core/src/evaluation/providers/index.ts index 547183776..62cd8eef8 100644 --- a/packages/core/src/evaluation/providers/index.ts +++ b/packages/core/src/evaluation/providers/index.ts @@ -1,4 +1,6 @@ import { AnthropicProvider, AzureProvider, GeminiProvider } from './ai-sdk.js'; +import { ClaudeCliProvider } from './claude-cli.js'; +import { ClaudeSdkProvider } from './claude-sdk.js'; import { ClaudeProvider } from './claude.js'; import { CliProvider } from './cli.js'; import { CodexProvider } from './codex.js'; @@ -87,7 +89,11 @@ export function createBuiltinProviderRegistry(): ProviderRegistry { .register('copilot-cli', (t) => new CopilotCliProvider(t.name, t.config as never)) .register('pi-coding-agent', (t) => new PiCodingAgentProvider(t.name, t.config as never)) .register('pi-agent-sdk', (t) => new PiAgentSdkProvider(t.name, t.config as never)) - .register('claude', (t) => new ClaudeProvider(t.name, t.config as never)) + // claude-cli is the new default subprocess provider; claude is an alias + .register('claude-cli', (t) => new ClaudeCliProvider(t.name, t.config as never)) + .register('claude', (t) => new ClaudeCliProvider(t.name, t.config as never)) + // claude-sdk is the explicit SDK provider (requires @anthropic-ai/claude-agent-sdk) + .register('claude-sdk', (t) => new ClaudeSdkProvider(t.name, t.config as never)) .register('mock', (t) => new MockProvider(t.name, t.config as never)) .register('vscode', (t) => new VSCodeProvider(t.name, t.config as never, 'vscode')) .register( diff --git a/packages/core/src/evaluation/providers/targets.ts b/packages/core/src/evaluation/providers/targets.ts index 1fb331d6c..aa30b06b6 100644 --- a/packages/core/src/evaluation/providers/targets.ts +++ b/packages/core/src/evaluation/providers/targets.ts @@ -596,6 +596,22 @@ export type ResolvedTarget = readonly providerBatching?: boolean; readonly config: ClaudeResolvedConfig; } + | { + readonly kind: 'claude-cli'; + readonly name: string; + readonly judgeTarget?: string; + readonly workers?: number; + readonly providerBatching?: boolean; + readonly config: ClaudeResolvedConfig; + } + | { + readonly kind: 'claude-sdk'; + readonly name: string; + readonly judgeTarget?: string; + readonly workers?: number; + readonly providerBatching?: boolean; + readonly config: ClaudeResolvedConfig; + } | { readonly kind: 'mock'; readonly name: string; @@ -788,9 +804,18 @@ export function resolveTargetDefinition( }; case 'claude': case 'claude-code': + case 'claude-cli': + return { + kind: 'claude-cli', + name: parsed.name, + judgeTarget: parsed.judge_target, + workers: parsed.workers, + providerBatching, + config: resolveClaudeConfig(parsed, env, evalFilePath), + }; case 'claude-sdk': return { - kind: 'claude', + kind: 'claude-sdk', name: parsed.name, judgeTarget: parsed.judge_target, workers: parsed.workers, diff --git a/packages/core/src/evaluation/providers/types.ts b/packages/core/src/evaluation/providers/types.ts index a30108d5b..af5e3b6a1 100644 --- a/packages/core/src/evaluation/providers/types.ts +++ b/packages/core/src/evaluation/providers/types.ts @@ -20,6 +20,8 @@ export type ProviderKind = | 'pi-coding-agent' | 'pi-agent-sdk' | 'claude' + | 'claude-cli' + | 'claude-sdk' | 'cli' | 'mock' | 'vscode' @@ -35,6 +37,8 @@ export const AGENT_PROVIDER_KINDS: readonly ProviderKind[] = [ 'copilot-cli', 'pi-coding-agent', 'claude', + 'claude-cli', + 'claude-sdk', 'vscode', 'vscode-insiders', ] as const; @@ -53,6 +57,8 @@ export const KNOWN_PROVIDERS: readonly ProviderKind[] = [ 'pi-coding-agent', 'pi-agent-sdk', 'claude', + 'claude-cli', + 'claude-sdk', 'cli', 'mock', 'vscode', @@ -73,7 +79,6 @@ export const PROVIDER_ALIASES: readonly string[] = [ 'pi', // alias for "pi-coding-agent" 'claude-code', // alias for "claude" (legacy) - 'claude-sdk', // alias for "claude" 'openai', // legacy/future support 'bedrock', // legacy/future support 'vertex', // legacy/future support diff --git a/packages/core/src/evaluation/registry/builtin-evaluators.ts b/packages/core/src/evaluation/registry/builtin-evaluators.ts index 60d190f60..f0930e092 100644 --- a/packages/core/src/evaluation/registry/builtin-evaluators.ts +++ b/packages/core/src/evaluation/registry/builtin-evaluators.ts @@ -19,6 +19,7 @@ import { LlmJudgeEvaluator, TokenUsageEvaluator, ToolTrajectoryEvaluator, + TriggerJudgeEvaluator, runContainsAllAssertion, runContainsAnyAssertion, runContainsAssertion, @@ -57,6 +58,7 @@ import type { RegexEvaluatorConfig, StartsWithEvaluatorConfig, TokenUsageEvaluatorConfig, + TriggerJudgeEvaluatorConfig, } from '../types.js'; import { DeterministicAssertionEvaluator, @@ -167,6 +169,11 @@ export const toolTrajectoryFactory: EvaluatorFactoryFn = (config) => { }); }; +/** Factory for `trigger-judge` evaluators. */ +export const triggerJudgeFactory: EvaluatorFactoryFn = (config) => { + return new TriggerJudgeEvaluator(config as TriggerJudgeEvaluatorConfig); +}; + /** Factory for `field-accuracy` evaluators. */ export const fieldAccuracyFactory: EvaluatorFactoryFn = (config) => { return new FieldAccuracyEvaluator({ @@ -428,6 +435,7 @@ export function createBuiltinRegistry(): EvaluatorRegistry { .register('code-judge', codeFactory) .register('composite', compositeFactory) .register('tool-trajectory', toolTrajectoryFactory) + .register('trigger-judge', triggerJudgeFactory) .register('field-accuracy', fieldAccuracyFactory) .register('latency', latencyFactory) .register('cost', costFactory) diff --git a/packages/core/src/evaluation/types.ts b/packages/core/src/evaluation/types.ts index ed09d670b..97ba267e1 100644 --- a/packages/core/src/evaluation/types.ts +++ b/packages/core/src/evaluation/types.ts @@ -153,6 +153,7 @@ const EVALUATOR_KIND_VALUES = [ 'rubric', 'composite', 'tool-trajectory', + 'trigger-judge', 'field-accuracy', 'latency', 'cost', @@ -738,11 +739,28 @@ export type InlineAssertEvaluatorConfig = { readonly negate?: boolean; }; +/** + * Configuration for the trigger-judge evaluator. + * Checks whether the agent invoked a named skill during execution by + * scanning tool calls for Skill invocations or skill file reads. + */ +export type TriggerJudgeEvaluatorConfig = { + readonly name: string; + readonly type: 'trigger-judge'; + /** The skill name to check for (matched against Skill tool args and skill file paths) */ + readonly skill: string; + readonly weight?: number; + readonly required?: boolean | number; + /** When true, inverts the evaluator score (1 - score) and swaps pass/fail verdict */ + readonly negate?: boolean; +}; + export type EvaluatorConfig = | CodeEvaluatorConfig | LlmJudgeEvaluatorConfig | CompositeEvaluatorConfig | ToolTrajectoryEvaluatorConfig + | TriggerJudgeEvaluatorConfig | FieldAccuracyEvaluatorConfig | LatencyEvaluatorConfig | CostEvaluatorConfig diff --git a/packages/core/src/evaluation/validation/eval-file.schema.ts b/packages/core/src/evaluation/validation/eval-file.schema.ts index 690373b43..f1a5ca5c3 100644 --- a/packages/core/src/evaluation/validation/eval-file.schema.ts +++ b/packages/core/src/evaluation/validation/eval-file.schema.ts @@ -222,12 +222,18 @@ const RubricsSchema = EvaluatorCommonSchema.extend({ criteria: z.array(RubricItemSchema).min(1), }); +const TriggerJudgeSchema = EvaluatorCommonSchema.extend({ + type: z.enum(['trigger-judge', 'trigger_judge']), + skill: z.string(), +}); + /** Union of all evaluator types */ const EvaluatorSchema = z.union([ CodeJudgeSchema, LlmJudgeSchema, CompositeSchema, ToolTrajectorySchema, + TriggerJudgeSchema, FieldAccuracySchema, LatencySchema, CostSchema, diff --git a/packages/core/src/evaluation/validation/targets-validator.ts b/packages/core/src/evaluation/validation/targets-validator.ts index c507308e9..068848e00 100644 --- a/packages/core/src/evaluation/validation/targets-validator.ts +++ b/packages/core/src/evaluation/validation/targets-validator.ts @@ -217,6 +217,7 @@ function getKnownSettings(provider: string): Set | null { return COPILOT_CLI_SETTINGS; case 'claude': case 'claude-code': + case 'claude-cli': case 'claude-sdk': return CLAUDE_SETTINGS; case 'vscode': diff --git a/packages/core/test/evaluation/providers/claude-provider-aliases.test.ts b/packages/core/test/evaluation/providers/claude-provider-aliases.test.ts new file mode 100644 index 000000000..4834d02c5 --- /dev/null +++ b/packages/core/test/evaluation/providers/claude-provider-aliases.test.ts @@ -0,0 +1,63 @@ +import { describe, expect, it } from 'bun:test'; + +import { ClaudeCliProvider } from '../../../src/evaluation/providers/claude-cli.js'; +import { ClaudeSdkProvider } from '../../../src/evaluation/providers/claude-sdk.js'; +import { ClaudeProvider } from '../../../src/evaluation/providers/claude.js'; +import { createBuiltinProviderRegistry } from '../../../src/evaluation/providers/index.js'; + +const mockClaudeConfig = { + model: undefined, + cwd: undefined, + timeoutMs: undefined, + logDir: undefined, + logFormat: 'summary' as const, + systemPrompt: undefined, + maxTurns: undefined, + maxBudgetUsd: undefined, +}; + +describe('Claude provider alias resolution', () => { + const registry = createBuiltinProviderRegistry(); + + it('creates a ClaudeCliProvider for claude-cli kind', () => { + const provider = registry.create({ + name: 'test-target', + kind: 'claude-cli', + config: mockClaudeConfig, + }); + expect(provider).toBeInstanceOf(ClaudeCliProvider); + expect(provider.kind).toBe('claude-cli'); + expect(provider.id).toBe('claude-cli:test-target'); + }); + + it('creates a ClaudeCliProvider for claude kind (alias for claude-cli)', () => { + const provider = registry.create({ + name: 'test-target', + kind: 'claude', + config: mockClaudeConfig, + }); + expect(provider).toBeInstanceOf(ClaudeCliProvider); + expect(provider.kind).toBe('claude-cli'); + }); + + it('creates a ClaudeSdkProvider for claude-sdk kind', () => { + const provider = registry.create({ + name: 'test-target', + kind: 'claude-sdk', + config: mockClaudeConfig, + }); + expect(provider).toBeInstanceOf(ClaudeSdkProvider); + expect(provider.kind).toBe('claude-sdk'); + expect(provider.id).toBe('claude-sdk:test-target'); + }); + + it('ClaudeCliProvider and ClaudeProvider are different classes', () => { + // ClaudeProvider is the legacy SDK provider kept for reference + const cliProvider = new ClaudeCliProvider('target', mockClaudeConfig); + const sdkProvider = new ClaudeProvider('target', mockClaudeConfig as never); + expect(cliProvider).toBeInstanceOf(ClaudeCliProvider); + expect(sdkProvider).toBeInstanceOf(ClaudeProvider); + expect(cliProvider.kind).toBe('claude-cli'); + expect(sdkProvider.kind).toBe('claude'); + }); +}); diff --git a/packages/core/test/evaluation/trigger-judge-evaluator.test.ts b/packages/core/test/evaluation/trigger-judge-evaluator.test.ts new file mode 100644 index 000000000..2773000f2 --- /dev/null +++ b/packages/core/test/evaluation/trigger-judge-evaluator.test.ts @@ -0,0 +1,253 @@ +import { describe, expect, it } from 'bun:test'; + +import { TriggerJudgeEvaluator } from '../../src/evaluation/evaluators/trigger-judge.js'; +import type { TriggerJudgeEvaluatorConfig } from '../../src/evaluation/evaluators/trigger-judge.js'; +import type { EvaluationContext } from '../../src/evaluation/evaluators/types.js'; +import type { ResolvedTarget } from '../../src/evaluation/providers/targets.js'; +import type { Message, Provider } from '../../src/evaluation/providers/types.js'; +import type { EvalTest } from '../../src/evaluation/types.js'; + +// Minimal mock objects +const mockTarget: ResolvedTarget = { + name: 'mock', + kind: 'mock', + config: {}, +}; + +const mockProvider: Provider = { + id: 'mock', + kind: 'mock', + targetName: 'mock', + async invoke() { + return { output: [] }; + }, +}; + +const mockEvalCase: EvalTest = { + id: 'test-case', + question: 'Test question', + input: [], + input_segments: [], + expected_output: [], + guideline_paths: [], + file_paths: [], + criteria: 'Expected outcome', +}; + +function createContext(output?: readonly Message[]): EvaluationContext { + return { + evalCase: mockEvalCase, + candidate: '', + target: mockTarget, + provider: mockProvider, + attempt: 0, + promptInputs: { question: '', guidelines: '' }, + now: new Date(), + output, + }; +} + +function makeConfig(skill: string): TriggerJudgeEvaluatorConfig { + return { name: 'trigger-judge-test', type: 'trigger-judge', skill }; +} + +describe('TriggerJudgeEvaluator', () => { + describe('no output / no tool calls', () => { + it('fails when no output is provided', () => { + const evaluator = new TriggerJudgeEvaluator(makeConfig('ship')); + const result = evaluator.evaluate(createContext(undefined)); + expect(result.score).toBe(0); + expect(result.verdict).toBe('fail'); + expect(result.misses.length).toBeGreaterThan(0); + }); + + it('fails when output is empty array', () => { + const evaluator = new TriggerJudgeEvaluator(makeConfig('ship')); + const result = evaluator.evaluate(createContext([])); + expect(result.score).toBe(0); + expect(result.verdict).toBe('fail'); + }); + + it('fails when messages have no tool calls', () => { + const output: Message[] = [{ role: 'assistant', content: 'Hello world' }]; + const evaluator = new TriggerJudgeEvaluator(makeConfig('ship')); + const result = evaluator.evaluate(createContext(output)); + expect(result.score).toBe(0); + expect(result.verdict).toBe('fail'); + }); + }); + + describe('Skill tool call detection', () => { + it('passes when Skill tool is called with exact skill name in args.skill', () => { + const output: Message[] = [ + { + role: 'assistant', + toolCalls: [ + { + tool: 'Skill', + input: { skill: 'ship' }, + }, + ], + }, + ]; + const evaluator = new TriggerJudgeEvaluator(makeConfig('ship')); + const result = evaluator.evaluate(createContext(output)); + expect(result.score).toBe(1); + expect(result.verdict).toBe('pass'); + expect(result.hits.length).toBeGreaterThan(0); + }); + + it('passes when Skill tool args.skill contains the skill name (case-insensitive)', () => { + const output: Message[] = [ + { + role: 'assistant', + toolCalls: [ + { + tool: 'Skill', + input: { skill: 'agentv-ship' }, + }, + ], + }, + ]; + const evaluator = new TriggerJudgeEvaluator(makeConfig('ship')); + const result = evaluator.evaluate(createContext(output)); + expect(result.score).toBe(1); + expect(result.verdict).toBe('pass'); + }); + + it('passes when Skill tool args.skill matches case-insensitively', () => { + const output: Message[] = [ + { + role: 'assistant', + toolCalls: [ + { + tool: 'Skill', + input: { skill: 'SHIP' }, + }, + ], + }, + ]; + const evaluator = new TriggerJudgeEvaluator(makeConfig('ship')); + const result = evaluator.evaluate(createContext(output)); + expect(result.score).toBe(1); + expect(result.verdict).toBe('pass'); + }); + + it('fails when Skill tool is called with a different skill name', () => { + const output: Message[] = [ + { + role: 'assistant', + toolCalls: [ + { + tool: 'Skill', + input: { skill: 'create-eval' }, + }, + ], + }, + ]; + const evaluator = new TriggerJudgeEvaluator(makeConfig('ship')); + const result = evaluator.evaluate(createContext(output)); + expect(result.score).toBe(0); + expect(result.verdict).toBe('fail'); + }); + + it('passes when Skill tool is called in a later message', () => { + const output: Message[] = [ + { + role: 'assistant', + content: 'Thinking...', + toolCalls: [{ tool: 'Read', input: { file_path: '/some/file.ts' } }], + }, + { + role: 'assistant', + toolCalls: [{ tool: 'Skill', input: { skill: 'ship' } }], + }, + ]; + const evaluator = new TriggerJudgeEvaluator(makeConfig('ship')); + const result = evaluator.evaluate(createContext(output)); + expect(result.score).toBe(1); + expect(result.verdict).toBe('pass'); + }); + }); + + describe('Read tool call detection (skill file)', () => { + it('passes when a Read tool loads a file in .claude/commands/ containing skill name', () => { + const output: Message[] = [ + { + role: 'assistant', + toolCalls: [ + { + tool: 'Read', + input: { file_path: '/home/user/project/.claude/commands/ship.md' }, + }, + ], + }, + ]; + const evaluator = new TriggerJudgeEvaluator(makeConfig('ship')); + const result = evaluator.evaluate(createContext(output)); + expect(result.score).toBe(1); + expect(result.verdict).toBe('pass'); + }); + + it('passes when a Read tool loads a file in .claude/skills/ containing skill name', () => { + const output: Message[] = [ + { + role: 'assistant', + toolCalls: [ + { + tool: 'Read', + input: { file_path: '/home/user/project/.claude/skills/ship/README.md' }, + }, + ], + }, + ]; + const evaluator = new TriggerJudgeEvaluator(makeConfig('ship')); + const result = evaluator.evaluate(createContext(output)); + expect(result.score).toBe(1); + expect(result.verdict).toBe('pass'); + }); + + it('fails when Read tool reads a non-skill file', () => { + const output: Message[] = [ + { + role: 'assistant', + toolCalls: [ + { + tool: 'Read', + input: { file_path: '/home/user/project/src/main.ts' }, + }, + ], + }, + ]; + const evaluator = new TriggerJudgeEvaluator(makeConfig('ship')); + const result = evaluator.evaluate(createContext(output)); + expect(result.score).toBe(0); + expect(result.verdict).toBe('fail'); + }); + + it('fails when Read tool reads from .claude/commands/ but skill name does not match', () => { + const output: Message[] = [ + { + role: 'assistant', + toolCalls: [ + { + tool: 'Read', + input: { file_path: '/home/user/project/.claude/commands/create-eval.md' }, + }, + ], + }, + ]; + const evaluator = new TriggerJudgeEvaluator(makeConfig('ship')); + const result = evaluator.evaluate(createContext(output)); + expect(result.score).toBe(0); + expect(result.verdict).toBe('fail'); + }); + }); + + describe('provider alias resolution metadata (integration)', () => { + it('has kind === trigger-judge', () => { + const evaluator = new TriggerJudgeEvaluator(makeConfig('ship')); + expect(evaluator.kind).toBe('trigger-judge'); + }); + }); +}); diff --git a/plugins/agentv-dev/skills/agentv-eval-writer/references/eval-schema.json b/plugins/agentv-dev/skills/agentv-eval-writer/references/eval-schema.json index 9093c7e48..58c5081d7 100644 --- a/plugins/agentv-dev/skills/agentv-eval-writer/references/eval-schema.json +++ b/plugins/agentv-dev/skills/agentv-eval-writer/references/eval-schema.json @@ -675,6 +675,42 @@ "required": ["type", "mode"], "additionalProperties": false }, + { + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "weight": { + "type": "number", + "minimum": 0 + }, + "required": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "number", + "exclusiveMinimum": 0, + "maximum": 1 + } + ] + }, + "negate": { + "type": "boolean" + }, + "type": { + "type": "string", + "enum": ["trigger-judge", "trigger_judge"] + }, + "skill": { + "type": "string" + } + }, + "required": ["type", "skill"], + "additionalProperties": false + }, { "type": "object", "properties": { @@ -1749,6 +1785,42 @@ "required": ["type", "mode"], "additionalProperties": false }, + { + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "weight": { + "type": "number", + "minimum": 0 + }, + "required": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "number", + "exclusiveMinimum": 0, + "maximum": 1 + } + ] + }, + "negate": { + "type": "boolean" + }, + "type": { + "type": "string", + "enum": ["trigger-judge", "trigger_judge"] + }, + "skill": { + "type": "string" + } + }, + "required": ["type", "skill"], + "additionalProperties": false + }, { "type": "object", "properties": { @@ -2823,6 +2895,42 @@ "required": ["type", "mode"], "additionalProperties": false }, + { + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "weight": { + "type": "number", + "minimum": 0 + }, + "required": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "number", + "exclusiveMinimum": 0, + "maximum": 1 + } + ] + }, + "negate": { + "type": "boolean" + }, + "type": { + "type": "string", + "enum": ["trigger-judge", "trigger_judge"] + }, + "skill": { + "type": "string" + } + }, + "required": ["type", "skill"], + "additionalProperties": false + }, { "type": "object", "properties": { @@ -3909,6 +4017,42 @@ "required": ["type", "mode"], "additionalProperties": false }, + { + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "weight": { + "type": "number", + "minimum": 0 + }, + "required": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "number", + "exclusiveMinimum": 0, + "maximum": 1 + } + ] + }, + "negate": { + "type": "boolean" + }, + "type": { + "type": "string", + "enum": ["trigger-judge", "trigger_judge"] + }, + "skill": { + "type": "string" + } + }, + "required": ["type", "skill"], + "additionalProperties": false + }, { "type": "object", "properties": { @@ -4983,6 +5127,42 @@ "required": ["type", "mode"], "additionalProperties": false }, + { + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "weight": { + "type": "number", + "minimum": 0 + }, + "required": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "number", + "exclusiveMinimum": 0, + "maximum": 1 + } + ] + }, + "negate": { + "type": "boolean" + }, + "type": { + "type": "string", + "enum": ["trigger-judge", "trigger_judge"] + }, + "skill": { + "type": "string" + } + }, + "required": ["type", "skill"], + "additionalProperties": false + }, { "type": "object", "properties": { @@ -6057,6 +6237,42 @@ "required": ["type", "mode"], "additionalProperties": false }, + { + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "weight": { + "type": "number", + "minimum": 0 + }, + "required": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "number", + "exclusiveMinimum": 0, + "maximum": 1 + } + ] + }, + "negate": { + "type": "boolean" + }, + "type": { + "type": "string", + "enum": ["trigger-judge", "trigger_judge"] + }, + "skill": { + "type": "string" + } + }, + "required": ["type", "skill"], + "additionalProperties": false + }, { "type": "object", "properties": { @@ -7546,6 +7762,42 @@ "required": ["type", "mode"], "additionalProperties": false }, + { + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "weight": { + "type": "number", + "minimum": 0 + }, + "required": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "number", + "exclusiveMinimum": 0, + "maximum": 1 + } + ] + }, + "negate": { + "type": "boolean" + }, + "type": { + "type": "string", + "enum": ["trigger-judge", "trigger_judge"] + }, + "skill": { + "type": "string" + } + }, + "required": ["type", "skill"], + "additionalProperties": false + }, { "type": "object", "properties": { @@ -8620,6 +8872,42 @@ "required": ["type", "mode"], "additionalProperties": false }, + { + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "weight": { + "type": "number", + "minimum": 0 + }, + "required": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "number", + "exclusiveMinimum": 0, + "maximum": 1 + } + ] + }, + "negate": { + "type": "boolean" + }, + "type": { + "type": "string", + "enum": ["trigger-judge", "trigger_judge"] + }, + "skill": { + "type": "string" + } + }, + "required": ["type", "skill"], + "additionalProperties": false + }, { "type": "object", "properties": { @@ -9676,22 +9964,58 @@ } ] }, - "argsMatch": { + "argsMatch": { + "anyOf": [ + { + "type": "string", + "enum": ["exact", "ignore", "subset", "superset"] + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ] + } + }, + "required": ["type", "mode"], + "additionalProperties": false + }, + { + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "weight": { + "type": "number", + "minimum": 0 + }, + "required": { "anyOf": [ { - "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "type": "boolean" }, { - "type": "array", - "items": { - "type": "string" - } + "type": "number", + "exclusiveMinimum": 0, + "maximum": 1 } ] + }, + "negate": { + "type": "boolean" + }, + "type": { + "type": "string", + "enum": ["trigger-judge", "trigger_judge"] + }, + "skill": { + "type": "string" } }, - "required": ["type", "mode"], + "required": ["type", "skill"], "additionalProperties": false }, { @@ -10780,6 +11104,42 @@ "required": ["type", "mode"], "additionalProperties": false }, + { + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "weight": { + "type": "number", + "minimum": 0 + }, + "required": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "number", + "exclusiveMinimum": 0, + "maximum": 1 + } + ] + }, + "negate": { + "type": "boolean" + }, + "type": { + "type": "string", + "enum": ["trigger-judge", "trigger_judge"] + }, + "skill": { + "type": "string" + } + }, + "required": ["type", "skill"], + "additionalProperties": false + }, { "type": "object", "properties": { @@ -11854,6 +12214,42 @@ "required": ["type", "mode"], "additionalProperties": false }, + { + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "weight": { + "type": "number", + "minimum": 0 + }, + "required": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "number", + "exclusiveMinimum": 0, + "maximum": 1 + } + ] + }, + "negate": { + "type": "boolean" + }, + "type": { + "type": "string", + "enum": ["trigger-judge", "trigger_judge"] + }, + "skill": { + "type": "string" + } + }, + "required": ["type", "skill"], + "additionalProperties": false + }, { "type": "object", "properties": { @@ -12928,6 +13324,42 @@ "required": ["type", "mode"], "additionalProperties": false }, + { + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "weight": { + "type": "number", + "minimum": 0 + }, + "required": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "number", + "exclusiveMinimum": 0, + "maximum": 1 + } + ] + }, + "negate": { + "type": "boolean" + }, + "type": { + "type": "string", + "enum": ["trigger-judge", "trigger_judge"] + }, + "skill": { + "type": "string" + } + }, + "required": ["type", "skill"], + "additionalProperties": false + }, { "type": "object", "properties": { @@ -14321,6 +14753,42 @@ "required": ["type", "mode"], "additionalProperties": false }, + { + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "weight": { + "type": "number", + "minimum": 0 + }, + "required": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "number", + "exclusiveMinimum": 0, + "maximum": 1 + } + ] + }, + "negate": { + "type": "boolean" + }, + "type": { + "type": "string", + "enum": ["trigger-judge", "trigger_judge"] + }, + "skill": { + "type": "string" + } + }, + "required": ["type", "skill"], + "additionalProperties": false + }, { "type": "object", "properties": { @@ -15395,6 +15863,42 @@ "required": ["type", "mode"], "additionalProperties": false }, + { + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "weight": { + "type": "number", + "minimum": 0 + }, + "required": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "number", + "exclusiveMinimum": 0, + "maximum": 1 + } + ] + }, + "negate": { + "type": "boolean" + }, + "type": { + "type": "string", + "enum": ["trigger-judge", "trigger_judge"] + }, + "skill": { + "type": "string" + } + }, + "required": ["type", "skill"], + "additionalProperties": false + }, { "type": "object", "properties": { @@ -16469,6 +16973,42 @@ "required": ["type", "mode"], "additionalProperties": false }, + { + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "weight": { + "type": "number", + "minimum": 0 + }, + "required": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "number", + "exclusiveMinimum": 0, + "maximum": 1 + } + ] + }, + "negate": { + "type": "boolean" + }, + "type": { + "type": "string", + "enum": ["trigger-judge", "trigger_judge"] + }, + "skill": { + "type": "string" + } + }, + "required": ["type", "skill"], + "additionalProperties": false + }, { "type": "object", "properties": { @@ -18663,6 +19203,42 @@ "required": ["type", "mode"], "additionalProperties": false }, + { + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "weight": { + "type": "number", + "minimum": 0 + }, + "required": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "number", + "exclusiveMinimum": 0, + "maximum": 1 + } + ] + }, + "negate": { + "type": "boolean" + }, + "type": { + "type": "string", + "enum": ["trigger-judge", "trigger_judge"] + }, + "skill": { + "type": "string" + } + }, + "required": ["type", "skill"], + "additionalProperties": false + }, { "type": "object", "properties": { From d028fde8959a88653e5114612ef230795edcb376 Mon Sep 17 00:00:00 2001 From: Christopher Tso Date: Sat, 14 Mar 2026 12:25:05 +0000 Subject: [PATCH 2/8] fix(providers): guard stdio access for null safety in claude-cli provider --- .../src/evaluation/providers/claude-cli.ts | 40 ++++++++++--------- 1 file changed, 22 insertions(+), 18 deletions(-) diff --git a/packages/core/src/evaluation/providers/claude-cli.ts b/packages/core/src/evaluation/providers/claude-cli.ts index 67cf96295..cbc3d5fc2 100644 --- a/packages/core/src/evaluation/providers/claude-cli.ts +++ b/packages/core/src/evaluation/providers/claude-cli.ts @@ -285,28 +285,32 @@ export class ClaudeCliProvider implements Provider { timeoutHandle.unref?.(); } - child.stdout.setEncoding('utf8'); - child.stdout.on('data', (chunk: string) => { - stdout += chunk; - stdoutBuffer += chunk; - // Process complete lines - const lines = stdoutBuffer.split(/\r?\n/); - stdoutBuffer = lines.pop() ?? ''; - for (const line of lines) { - const trimmed = line.trim(); - if (trimmed.length > 0) { - options.onLine(trimmed); + if (child.stdout) { + child.stdout.setEncoding('utf8'); + child.stdout.on('data', (chunk: string) => { + stdout += chunk; + stdoutBuffer += chunk; + // Process complete lines + const lines = stdoutBuffer.split(/\r?\n/); + stdoutBuffer = lines.pop() ?? ''; + for (const line of lines) { + const trimmed = line.trim(); + if (trimmed.length > 0) { + options.onLine(trimmed); + } } - } - }); + }); + } - child.stderr.setEncoding('utf8'); - child.stderr.on('data', (chunk: string) => { - stderr += chunk; - }); + if (child.stderr) { + child.stderr.setEncoding('utf8'); + child.stderr.on('data', (chunk: string) => { + stderr += chunk; + }); + } // Send prompt via stdin - child.stdin.end(options.prompt); + child.stdin?.end(options.prompt); const cleanup = (): void => { if (timeoutHandle) { From 13e33046529d1ab953f0967d3a130e8f71b3b3cd Mon Sep 17 00:00:00 2001 From: Christopher Tso Date: Sat, 14 Mar 2026 12:51:05 +0000 Subject: [PATCH 3/8] fix(providers): add --verbose flag to claude-cli subprocess invocation --output-format stream-json requires --verbose when using -p (--print) mode. Without it the CLI exits with code 1 immediately. Also adds E2E tests validating output, tokenUsage, durationMs, and log file emission parity between claude-cli and claude-sdk providers. --- packages/core/src/evaluation/providers/claude-cli.ts | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/packages/core/src/evaluation/providers/claude-cli.ts b/packages/core/src/evaluation/providers/claude-cli.ts index cbc3d5fc2..659d82097 100644 --- a/packages/core/src/evaluation/providers/claude-cli.ts +++ b/packages/core/src/evaluation/providers/claude-cli.ts @@ -167,7 +167,14 @@ export class ClaudeCliProvider implements Provider { } private buildArgs(): string[] { - const args = ['-p', '--output-format', 'stream-json', '--include-partial-messages']; + // --verbose is required when combining -p with --output-format stream-json + const args = [ + '-p', + '--output-format', + 'stream-json', + '--include-partial-messages', + '--verbose', + ]; if (this.config.model) { args.push('--model', this.config.model); From a10f11898308d2a41a99c69c6bd622b13b8effe7 Mon Sep 17 00:00:00 2001 From: Christopher Tso Date: Sat, 14 Mar 2026 13:11:55 +0000 Subject: [PATCH 4/8] refactor(evaluators): move trigger-judge from built-in to .agentv/judges/ example Removes TriggerJudgeEvaluator from core built-ins (violates Principles 1 & 2: Claude-Code-specific, expressible as a code-judge script) and adds: - packages/core/src/evaluation/registry/judge-discovery.ts: new discoverJudges() function, mirroring discoverAssertions() but scans .agentv/judges/ - Wired discoverJudges into orchestrator alongside discoverAssertions - Exported discoverJudges from core public API and registry/index.ts - examples/features/agent-skills-evals/.agentv/judges/trigger-judge.ts: reference implementation as a code-judge script using defineCodeJudge - Regenerated eval-schema.json (trigger-judge removed from EvaluatorSchema union) --- .../.agentv/judges/trigger-judge.ts | 69 + .../core/src/evaluation/evaluators/index.ts | 3 - .../evaluation/evaluators/trigger-judge.ts | 147 - .../evaluation/loaders/evaluator-parser.ts | 23 - packages/core/src/evaluation/orchestrator.ts | 3 +- .../evaluation/registry/builtin-evaluators.ts | 8 - .../core/src/evaluation/registry/index.ts | 1 + .../evaluation/registry/judge-discovery.ts | 78 + packages/core/src/evaluation/types.ts | 18 - .../evaluation/validation/eval-file.schema.ts | 6 - packages/core/src/index.ts | 1 + .../trigger-judge-evaluator.test.ts | 253 - .../references/eval-schema.json | 4059 ++++++++++++----- 13 files changed, 2960 insertions(+), 1709 deletions(-) create mode 100644 examples/features/agent-skills-evals/.agentv/judges/trigger-judge.ts delete mode 100644 packages/core/src/evaluation/evaluators/trigger-judge.ts create mode 100644 packages/core/src/evaluation/registry/judge-discovery.ts delete mode 100644 packages/core/test/evaluation/trigger-judge-evaluator.test.ts diff --git a/examples/features/agent-skills-evals/.agentv/judges/trigger-judge.ts b/examples/features/agent-skills-evals/.agentv/judges/trigger-judge.ts new file mode 100644 index 000000000..dc41e0d10 --- /dev/null +++ b/examples/features/agent-skills-evals/.agentv/judges/trigger-judge.ts @@ -0,0 +1,69 @@ +#!/usr/bin/env bun +/** + * trigger-judge: detects whether the agent invoked a named Claude Code skill. + * + * Usage in eval YAML: + * evaluators: + * - type: trigger-judge # discovered from .agentv/judges/ + * skill: my-skill-name # passed via config + * + * Checks: + * - Skill tool call where args.skill matches the configured skill name + * - Read tool call loading a file from .claude/commands/ or .claude/skills/ + * whose path contains the skill name + */ +import { defineCodeJudge } from '@agentv/eval'; + +export default defineCodeJudge(({ output, config }) => { + const skillName = config?.skill as string | undefined; + if (!skillName) { + return { score: 0, misses: ['config.skill is required'], reasoning: 'No skill name configured' }; + } + + const allToolCalls = (output ?? []).flatMap((msg) => msg.toolCalls ?? []); + + // Check for Skill tool invocation + const skillTrigger = allToolCalls.find( + (tc) => + tc.tool === 'Skill' && + typeof tc.input === 'object' && + tc.input !== null && + String((tc.input as Record).skill ?? '').toLowerCase().includes(skillName.toLowerCase()), + ); + + if (skillTrigger) { + return { + score: 1, + hits: [`Skill tool invoked with skill="${(skillTrigger.input as Record).skill}"`], + reasoning: `Agent triggered skill "${skillName}"`, + }; + } + + // Check for Read tool loading a skill file + const readTrigger = allToolCalls.find((tc) => { + if (tc.tool !== 'Read') return false; + const filePath = String( + (tc.input as Record | null)?.file_path ?? + (tc.input as Record | null)?.path ?? + '', + ).toLowerCase(); + return ( + (filePath.includes('.claude/commands/') || filePath.includes('.claude/skills/')) && + filePath.includes(skillName.toLowerCase()) + ); + }); + + if (readTrigger) { + return { + score: 1, + hits: [`Read tool loaded skill file: ${(readTrigger.input as Record)?.file_path ?? (readTrigger.input as Record)?.path}`], + reasoning: `Agent read skill "${skillName}" definition`, + }; + } + + return { + score: 0, + misses: [`Skill "${skillName}" was not triggered`], + reasoning: `No Skill or Read tool call matched "${skillName}"`, + }; +}); diff --git a/packages/core/src/evaluation/evaluators/index.ts b/packages/core/src/evaluation/evaluators/index.ts index 59355a7e6..2a7ea58aa 100644 --- a/packages/core/src/evaluation/evaluators/index.ts +++ b/packages/core/src/evaluation/evaluators/index.ts @@ -64,9 +64,6 @@ export type { TokenUsageEvaluatorOptions } from './token-usage.js'; export { ToolTrajectoryEvaluator } from './tool-trajectory.js'; export type { ToolTrajectoryEvaluatorOptions } from './tool-trajectory.js'; -export { TriggerJudgeEvaluator } from './trigger-judge.js'; -export type { TriggerJudgeEvaluatorConfig } from '../types.js'; - // Deterministic assertions export { runContainsAssertion, diff --git a/packages/core/src/evaluation/evaluators/trigger-judge.ts b/packages/core/src/evaluation/evaluators/trigger-judge.ts deleted file mode 100644 index 475e21ad0..000000000 --- a/packages/core/src/evaluation/evaluators/trigger-judge.ts +++ /dev/null @@ -1,147 +0,0 @@ -import type { ToolCall } from '../providers/types.js'; -import type { TriggerJudgeEvaluatorConfig } from '../types.js'; -import type { EvaluationContext, EvaluationScore, Evaluator } from './types.js'; - -export type { TriggerJudgeEvaluatorConfig }; - -/** - * TriggerJudgeEvaluator checks whether the agent invoked a named skill during - * its execution. It scans the response tool calls for: - * - * 1. A `Skill` tool call where args.skill contains the skill name - * 2. A `Read` tool call where the file_path contains the skill name and a - * skill-related directory (.claude/commands/ or .claude/skills/) - * - * This enables post-hoc verification that the agent used the correct skill - * rather than re-implementing the logic inline. - */ -export class TriggerJudgeEvaluator implements Evaluator { - readonly kind = 'trigger-judge'; - - private readonly config: TriggerJudgeEvaluatorConfig; - - constructor(config: TriggerJudgeEvaluatorConfig) { - this.config = config; - } - - evaluate(context: EvaluationContext): EvaluationScore { - const skillName = this.config.skill; - const allToolCalls = collectAllToolCalls(context.output); - - if (allToolCalls.length === 0) { - return { - score: 0, - verdict: 'fail', - hits: [], - misses: [`No tool calls found — skill '${skillName}' was not triggered`], - expectedAspectCount: 1, - reasoning: `No tool calls were made, so skill '${skillName}' was not invoked.`, - }; - } - - // Check for Skill tool call with matching skill name - const skillToolCall = findSkillToolCall(allToolCalls, skillName); - if (skillToolCall) { - const argsStr = JSON.stringify(skillToolCall.input ?? {}); - return { - score: 1, - verdict: 'pass', - hits: [`Skill tool called with skill='${skillName}' (args: ${argsStr})`], - misses: [], - expectedAspectCount: 1, - reasoning: `The agent invoked the '${skillName}' skill via the Skill tool.`, - }; - } - - // Check for Read tool call loading a skill file - const readToolCall = findSkillReadToolCall(allToolCalls, skillName); - if (readToolCall) { - const filePath = - typeof (readToolCall.input as Record | undefined)?.file_path === 'string' - ? (readToolCall.input as Record).file_path - : String(readToolCall.input ?? ''); - return { - score: 1, - verdict: 'pass', - hits: [`Skill file read: ${filePath}`], - misses: [], - expectedAspectCount: 1, - reasoning: `The agent read the skill file for '${skillName}' at '${filePath}'.`, - }; - } - - return { - score: 0, - verdict: 'fail', - hits: [], - misses: [`Skill '${skillName}' was not triggered (${allToolCalls.length} tool calls made)`], - expectedAspectCount: 1, - reasoning: `The agent made ${allToolCalls.length} tool call(s) but did not invoke skill '${skillName}'.`, - }; - } -} - -/** - * Collect all tool calls from all output messages. - */ -function collectAllToolCalls( - output: readonly import('../providers/types.js').Message[] | undefined, -): readonly ToolCall[] { - if (!output || output.length === 0) { - return []; - } - const result: ToolCall[] = []; - for (const message of output) { - if (message.toolCalls && message.toolCalls.length > 0) { - result.push(...message.toolCalls); - } - } - return result; -} - -/** - * Find a Skill tool call where args.skill matches (exact or contains) the skill name. - */ -function findSkillToolCall( - toolCalls: readonly ToolCall[], - skillName: string, -): ToolCall | undefined { - const lowerSkill = skillName.toLowerCase(); - for (const tc of toolCalls) { - if (tc.tool !== 'Skill') continue; - const args = tc.input as Record | undefined; - if (!args) continue; - const argSkill = args.skill ?? args.name ?? args.args; - if (typeof argSkill === 'string') { - const lowerArgSkill = argSkill.toLowerCase(); - if (lowerArgSkill === lowerSkill || lowerArgSkill.includes(lowerSkill)) { - return tc; - } - } - } - return undefined; -} - -/** - * Find a Read tool call where the file_path contains the skill name and a - * known skill directory (.claude/commands/ or .claude/skills/). - */ -function findSkillReadToolCall( - toolCalls: readonly ToolCall[], - skillName: string, -): ToolCall | undefined { - const lowerSkill = skillName.toLowerCase(); - const skillDirs = ['.claude/commands/', '.claude/skills/']; - for (const tc of toolCalls) { - if (tc.tool !== 'Read') continue; - const args = tc.input as Record | undefined; - if (!args) continue; - const filePath = typeof args.file_path === 'string' ? args.file_path.toLowerCase() : ''; - if (!filePath) continue; - const inSkillDir = skillDirs.some((dir) => filePath.includes(dir)); - if (inSkillDir && filePath.includes(lowerSkill)) { - return tc; - } - } - return undefined; -} diff --git a/packages/core/src/evaluation/loaders/evaluator-parser.ts b/packages/core/src/evaluation/loaders/evaluator-parser.ts index db6393dbb..4e540f772 100644 --- a/packages/core/src/evaluation/loaders/evaluator-parser.ts +++ b/packages/core/src/evaluation/loaders/evaluator-parser.ts @@ -596,29 +596,6 @@ async function parseEvaluatorList( continue; } - if (typeValue === 'trigger-judge') { - const skill = asString(rawEvaluator.skill); - if (!skill) { - logWarning( - `Skipping trigger-judge evaluator '${name}' in '${evalId}': missing required 'skill' field`, - ); - continue; - } - - const weight = validateWeight(rawEvaluator.weight, name, evalId); - const required = parseRequired(rawEvaluator.required); - - evaluators.push({ - name, - type: 'trigger-judge', - skill, - ...(weight !== undefined ? { weight } : {}), - ...(required !== undefined ? { required } : {}), - ...(negate !== undefined ? { negate } : {}), - } as import('../types.js').TriggerJudgeEvaluatorConfig); - continue; - } - if (typeValue === 'field-accuracy') { const rawFields = rawEvaluator.fields; if (!Array.isArray(rawFields)) { diff --git a/packages/core/src/evaluation/orchestrator.ts b/packages/core/src/evaluation/orchestrator.ts index 17d0f2abb..12047bb8c 100644 --- a/packages/core/src/evaluation/orchestrator.ts +++ b/packages/core/src/evaluation/orchestrator.ts @@ -28,7 +28,7 @@ import type { TargetDefinition, } from './providers/types.js'; import { extractLastAssistantContent, isAgentProvider } from './providers/types.js'; -import { createBuiltinRegistry, discoverAssertions } from './registry/index.js'; +import { createBuiltinRegistry, discoverAssertions, discoverJudges } from './registry/index.js'; import { type TokenUsage, type TraceSummary, @@ -375,6 +375,7 @@ export async function runEvaluation( // Directory containing the eval YAML file, used as default cwd for workspace scripts const evalDir = discoveryBaseDir; await discoverAssertions(typeRegistry, discoveryBaseDir); + await discoverJudges(typeRegistry, discoveryBaseDir); // Discover custom providers from .agentv/providers/ directory const providerRegistry = createBuiltinProviderRegistry(); diff --git a/packages/core/src/evaluation/registry/builtin-evaluators.ts b/packages/core/src/evaluation/registry/builtin-evaluators.ts index f0930e092..60d190f60 100644 --- a/packages/core/src/evaluation/registry/builtin-evaluators.ts +++ b/packages/core/src/evaluation/registry/builtin-evaluators.ts @@ -19,7 +19,6 @@ import { LlmJudgeEvaluator, TokenUsageEvaluator, ToolTrajectoryEvaluator, - TriggerJudgeEvaluator, runContainsAllAssertion, runContainsAnyAssertion, runContainsAssertion, @@ -58,7 +57,6 @@ import type { RegexEvaluatorConfig, StartsWithEvaluatorConfig, TokenUsageEvaluatorConfig, - TriggerJudgeEvaluatorConfig, } from '../types.js'; import { DeterministicAssertionEvaluator, @@ -169,11 +167,6 @@ export const toolTrajectoryFactory: EvaluatorFactoryFn = (config) => { }); }; -/** Factory for `trigger-judge` evaluators. */ -export const triggerJudgeFactory: EvaluatorFactoryFn = (config) => { - return new TriggerJudgeEvaluator(config as TriggerJudgeEvaluatorConfig); -}; - /** Factory for `field-accuracy` evaluators. */ export const fieldAccuracyFactory: EvaluatorFactoryFn = (config) => { return new FieldAccuracyEvaluator({ @@ -435,7 +428,6 @@ export function createBuiltinRegistry(): EvaluatorRegistry { .register('code-judge', codeFactory) .register('composite', compositeFactory) .register('tool-trajectory', toolTrajectoryFactory) - .register('trigger-judge', triggerJudgeFactory) .register('field-accuracy', fieldAccuracyFactory) .register('latency', latencyFactory) .register('cost', costFactory) diff --git a/packages/core/src/evaluation/registry/index.ts b/packages/core/src/evaluation/registry/index.ts index fc60bb177..75c8332dc 100644 --- a/packages/core/src/evaluation/registry/index.ts +++ b/packages/core/src/evaluation/registry/index.ts @@ -7,3 +7,4 @@ export { EvaluatorRegistry, DeterministicAssertionEvaluator } from './evaluator- export type { EvaluatorDispatchContext, EvaluatorFactoryFn } from './evaluator-registry.js'; export { createBuiltinRegistry } from './builtin-evaluators.js'; export { discoverAssertions } from './assertion-discovery.js'; +export { discoverJudges } from './judge-discovery.js'; diff --git a/packages/core/src/evaluation/registry/judge-discovery.ts b/packages/core/src/evaluation/registry/judge-discovery.ts new file mode 100644 index 000000000..c4a843565 --- /dev/null +++ b/packages/core/src/evaluation/registry/judge-discovery.ts @@ -0,0 +1,78 @@ +/** + * Convention-based discovery of custom judge scripts. + * + * Scans `.agentv/judges/` for TypeScript/JavaScript files and registers + * them as code-judge evaluators in the registry. The file name (without + * extension) becomes the evaluator type name. + * + * Example: `.agentv/judges/trigger-judge.ts` → type "trigger-judge" in EVAL.yaml + */ + +import path from 'node:path'; +import fg from 'fast-glob'; + +import { CodeEvaluator } from '../evaluators/code-evaluator.js'; +import type { EvaluatorFactoryFn } from './evaluator-registry.js'; +import type { EvaluatorRegistry } from './evaluator-registry.js'; + +/** + * Discover custom judge scripts from `.agentv/judges/` and register + * them as evaluator types in the registry. + * + * @param registry - The evaluator registry to register discovered judges into + * @param baseDir - The base directory to search from (typically project root or eval file dir) + * @returns Names of discovered judge types + */ +export async function discoverJudges( + registry: EvaluatorRegistry, + baseDir: string, +): Promise { + const patterns = ['*.ts', '*.js', '*.mts', '*.mjs']; + + // Search baseDir and its ancestors for .agentv/judges/ + const candidateDirs: string[] = []; + let dir = path.resolve(baseDir); + const root = path.parse(dir).root; + while (dir !== root) { + candidateDirs.push(path.join(dir, '.agentv', 'judges')); + dir = path.dirname(dir); + } + + let files: string[] = []; + for (const judgesDir of candidateDirs) { + try { + const found = await fg(patterns, { + cwd: judgesDir, + absolute: true, + onlyFiles: true, + }); + files = files.concat(found); + } catch { + // Directory doesn't exist — skip + } + } + + const discoveredTypes: string[] = []; + + for (const filePath of files) { + const basename = path.basename(filePath); + const typeName = basename.replace(/\.(ts|js|mts|mjs)$/, ''); + + // Don't override built-in types + if (registry.has(typeName)) { + continue; + } + + const factory: EvaluatorFactoryFn = (_config, context) => { + return new CodeEvaluator({ + command: ['bun', 'run', filePath], + agentTimeoutMs: context.agentTimeoutMs, + }); + }; + + registry.register(typeName, factory); + discoveredTypes.push(typeName); + } + + return discoveredTypes; +} diff --git a/packages/core/src/evaluation/types.ts b/packages/core/src/evaluation/types.ts index 97ba267e1..ed09d670b 100644 --- a/packages/core/src/evaluation/types.ts +++ b/packages/core/src/evaluation/types.ts @@ -153,7 +153,6 @@ const EVALUATOR_KIND_VALUES = [ 'rubric', 'composite', 'tool-trajectory', - 'trigger-judge', 'field-accuracy', 'latency', 'cost', @@ -739,28 +738,11 @@ export type InlineAssertEvaluatorConfig = { readonly negate?: boolean; }; -/** - * Configuration for the trigger-judge evaluator. - * Checks whether the agent invoked a named skill during execution by - * scanning tool calls for Skill invocations or skill file reads. - */ -export type TriggerJudgeEvaluatorConfig = { - readonly name: string; - readonly type: 'trigger-judge'; - /** The skill name to check for (matched against Skill tool args and skill file paths) */ - readonly skill: string; - readonly weight?: number; - readonly required?: boolean | number; - /** When true, inverts the evaluator score (1 - score) and swaps pass/fail verdict */ - readonly negate?: boolean; -}; - export type EvaluatorConfig = | CodeEvaluatorConfig | LlmJudgeEvaluatorConfig | CompositeEvaluatorConfig | ToolTrajectoryEvaluatorConfig - | TriggerJudgeEvaluatorConfig | FieldAccuracyEvaluatorConfig | LatencyEvaluatorConfig | CostEvaluatorConfig diff --git a/packages/core/src/evaluation/validation/eval-file.schema.ts b/packages/core/src/evaluation/validation/eval-file.schema.ts index f1a5ca5c3..690373b43 100644 --- a/packages/core/src/evaluation/validation/eval-file.schema.ts +++ b/packages/core/src/evaluation/validation/eval-file.schema.ts @@ -222,18 +222,12 @@ const RubricsSchema = EvaluatorCommonSchema.extend({ criteria: z.array(RubricItemSchema).min(1), }); -const TriggerJudgeSchema = EvaluatorCommonSchema.extend({ - type: z.enum(['trigger-judge', 'trigger_judge']), - skill: z.string(), -}); - /** Union of all evaluator types */ const EvaluatorSchema = z.union([ CodeJudgeSchema, LlmJudgeSchema, CompositeSchema, ToolTrajectorySchema, - TriggerJudgeSchema, FieldAccuracySchema, LatencySchema, CostSchema, diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index 7df57f3f2..514f7acae 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -57,6 +57,7 @@ export type { } from './evaluation/registry/evaluator-registry.js'; export { createBuiltinRegistry } from './evaluation/registry/builtin-evaluators.js'; export { discoverAssertions } from './evaluation/registry/assertion-discovery.js'; +export { discoverJudges } from './evaluation/registry/judge-discovery.js'; export type AgentKernel = { status: string; diff --git a/packages/core/test/evaluation/trigger-judge-evaluator.test.ts b/packages/core/test/evaluation/trigger-judge-evaluator.test.ts deleted file mode 100644 index 2773000f2..000000000 --- a/packages/core/test/evaluation/trigger-judge-evaluator.test.ts +++ /dev/null @@ -1,253 +0,0 @@ -import { describe, expect, it } from 'bun:test'; - -import { TriggerJudgeEvaluator } from '../../src/evaluation/evaluators/trigger-judge.js'; -import type { TriggerJudgeEvaluatorConfig } from '../../src/evaluation/evaluators/trigger-judge.js'; -import type { EvaluationContext } from '../../src/evaluation/evaluators/types.js'; -import type { ResolvedTarget } from '../../src/evaluation/providers/targets.js'; -import type { Message, Provider } from '../../src/evaluation/providers/types.js'; -import type { EvalTest } from '../../src/evaluation/types.js'; - -// Minimal mock objects -const mockTarget: ResolvedTarget = { - name: 'mock', - kind: 'mock', - config: {}, -}; - -const mockProvider: Provider = { - id: 'mock', - kind: 'mock', - targetName: 'mock', - async invoke() { - return { output: [] }; - }, -}; - -const mockEvalCase: EvalTest = { - id: 'test-case', - question: 'Test question', - input: [], - input_segments: [], - expected_output: [], - guideline_paths: [], - file_paths: [], - criteria: 'Expected outcome', -}; - -function createContext(output?: readonly Message[]): EvaluationContext { - return { - evalCase: mockEvalCase, - candidate: '', - target: mockTarget, - provider: mockProvider, - attempt: 0, - promptInputs: { question: '', guidelines: '' }, - now: new Date(), - output, - }; -} - -function makeConfig(skill: string): TriggerJudgeEvaluatorConfig { - return { name: 'trigger-judge-test', type: 'trigger-judge', skill }; -} - -describe('TriggerJudgeEvaluator', () => { - describe('no output / no tool calls', () => { - it('fails when no output is provided', () => { - const evaluator = new TriggerJudgeEvaluator(makeConfig('ship')); - const result = evaluator.evaluate(createContext(undefined)); - expect(result.score).toBe(0); - expect(result.verdict).toBe('fail'); - expect(result.misses.length).toBeGreaterThan(0); - }); - - it('fails when output is empty array', () => { - const evaluator = new TriggerJudgeEvaluator(makeConfig('ship')); - const result = evaluator.evaluate(createContext([])); - expect(result.score).toBe(0); - expect(result.verdict).toBe('fail'); - }); - - it('fails when messages have no tool calls', () => { - const output: Message[] = [{ role: 'assistant', content: 'Hello world' }]; - const evaluator = new TriggerJudgeEvaluator(makeConfig('ship')); - const result = evaluator.evaluate(createContext(output)); - expect(result.score).toBe(0); - expect(result.verdict).toBe('fail'); - }); - }); - - describe('Skill tool call detection', () => { - it('passes when Skill tool is called with exact skill name in args.skill', () => { - const output: Message[] = [ - { - role: 'assistant', - toolCalls: [ - { - tool: 'Skill', - input: { skill: 'ship' }, - }, - ], - }, - ]; - const evaluator = new TriggerJudgeEvaluator(makeConfig('ship')); - const result = evaluator.evaluate(createContext(output)); - expect(result.score).toBe(1); - expect(result.verdict).toBe('pass'); - expect(result.hits.length).toBeGreaterThan(0); - }); - - it('passes when Skill tool args.skill contains the skill name (case-insensitive)', () => { - const output: Message[] = [ - { - role: 'assistant', - toolCalls: [ - { - tool: 'Skill', - input: { skill: 'agentv-ship' }, - }, - ], - }, - ]; - const evaluator = new TriggerJudgeEvaluator(makeConfig('ship')); - const result = evaluator.evaluate(createContext(output)); - expect(result.score).toBe(1); - expect(result.verdict).toBe('pass'); - }); - - it('passes when Skill tool args.skill matches case-insensitively', () => { - const output: Message[] = [ - { - role: 'assistant', - toolCalls: [ - { - tool: 'Skill', - input: { skill: 'SHIP' }, - }, - ], - }, - ]; - const evaluator = new TriggerJudgeEvaluator(makeConfig('ship')); - const result = evaluator.evaluate(createContext(output)); - expect(result.score).toBe(1); - expect(result.verdict).toBe('pass'); - }); - - it('fails when Skill tool is called with a different skill name', () => { - const output: Message[] = [ - { - role: 'assistant', - toolCalls: [ - { - tool: 'Skill', - input: { skill: 'create-eval' }, - }, - ], - }, - ]; - const evaluator = new TriggerJudgeEvaluator(makeConfig('ship')); - const result = evaluator.evaluate(createContext(output)); - expect(result.score).toBe(0); - expect(result.verdict).toBe('fail'); - }); - - it('passes when Skill tool is called in a later message', () => { - const output: Message[] = [ - { - role: 'assistant', - content: 'Thinking...', - toolCalls: [{ tool: 'Read', input: { file_path: '/some/file.ts' } }], - }, - { - role: 'assistant', - toolCalls: [{ tool: 'Skill', input: { skill: 'ship' } }], - }, - ]; - const evaluator = new TriggerJudgeEvaluator(makeConfig('ship')); - const result = evaluator.evaluate(createContext(output)); - expect(result.score).toBe(1); - expect(result.verdict).toBe('pass'); - }); - }); - - describe('Read tool call detection (skill file)', () => { - it('passes when a Read tool loads a file in .claude/commands/ containing skill name', () => { - const output: Message[] = [ - { - role: 'assistant', - toolCalls: [ - { - tool: 'Read', - input: { file_path: '/home/user/project/.claude/commands/ship.md' }, - }, - ], - }, - ]; - const evaluator = new TriggerJudgeEvaluator(makeConfig('ship')); - const result = evaluator.evaluate(createContext(output)); - expect(result.score).toBe(1); - expect(result.verdict).toBe('pass'); - }); - - it('passes when a Read tool loads a file in .claude/skills/ containing skill name', () => { - const output: Message[] = [ - { - role: 'assistant', - toolCalls: [ - { - tool: 'Read', - input: { file_path: '/home/user/project/.claude/skills/ship/README.md' }, - }, - ], - }, - ]; - const evaluator = new TriggerJudgeEvaluator(makeConfig('ship')); - const result = evaluator.evaluate(createContext(output)); - expect(result.score).toBe(1); - expect(result.verdict).toBe('pass'); - }); - - it('fails when Read tool reads a non-skill file', () => { - const output: Message[] = [ - { - role: 'assistant', - toolCalls: [ - { - tool: 'Read', - input: { file_path: '/home/user/project/src/main.ts' }, - }, - ], - }, - ]; - const evaluator = new TriggerJudgeEvaluator(makeConfig('ship')); - const result = evaluator.evaluate(createContext(output)); - expect(result.score).toBe(0); - expect(result.verdict).toBe('fail'); - }); - - it('fails when Read tool reads from .claude/commands/ but skill name does not match', () => { - const output: Message[] = [ - { - role: 'assistant', - toolCalls: [ - { - tool: 'Read', - input: { file_path: '/home/user/project/.claude/commands/create-eval.md' }, - }, - ], - }, - ]; - const evaluator = new TriggerJudgeEvaluator(makeConfig('ship')); - const result = evaluator.evaluate(createContext(output)); - expect(result.score).toBe(0); - expect(result.verdict).toBe('fail'); - }); - }); - - describe('provider alias resolution metadata (integration)', () => { - it('has kind === trigger-judge', () => { - const evaluator = new TriggerJudgeEvaluator(makeConfig('ship')); - expect(evaluator.kind).toBe('trigger-judge'); - }); - }); -}); diff --git a/plugins/agentv-dev/skills/agentv-eval-writer/references/eval-schema.json b/plugins/agentv-dev/skills/agentv-eval-writer/references/eval-schema.json index 58c5081d7..dafd565a0 100644 --- a/plugins/agentv-dev/skills/agentv-eval-writer/references/eval-schema.json +++ b/plugins/agentv-dev/skills/agentv-eval-writer/references/eval-schema.json @@ -53,7 +53,12 @@ "properties": { "role": { "type": "string", - "enum": ["system", "user", "assistant", "tool"] + "enum": [ + "system", + "user", + "assistant", + "tool" + ] }, "content": { "anyOf": [ @@ -67,20 +72,29 @@ "properties": { "type": { "type": "string", - "enum": ["text", "file"] + "enum": [ + "text", + "file" + ] }, "value": { "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false } } ] } }, - "required": ["role", "content"], + "required": [ + "role", + "content" + ], "additionalProperties": false } } @@ -115,7 +129,12 @@ "properties": { "role": { "type": "string", - "enum": ["system", "user", "assistant", "tool"] + "enum": [ + "system", + "user", + "assistant", + "tool" + ] }, "content": { "anyOf": [ @@ -129,20 +148,29 @@ "properties": { "type": { "type": "string", - "enum": ["text", "file"] + "enum": [ + "text", + "file" + ] }, "value": { "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false } } ] } }, - "required": ["role", "content"], + "required": [ + "role", + "content" + ], "additionalProperties": false } } @@ -164,7 +192,12 @@ "properties": { "role": { "type": "string", - "enum": ["system", "user", "assistant", "tool"] + "enum": [ + "system", + "user", + "assistant", + "tool" + ] }, "content": { "anyOf": [ @@ -178,20 +211,29 @@ "properties": { "type": { "type": "string", - "enum": ["text", "file"] + "enum": [ + "text", + "file" + ] }, "value": { "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false } } ] } }, - "required": ["role", "content"], + "required": [ + "role", + "content" + ], "additionalProperties": false } } @@ -228,7 +270,10 @@ }, "type": { "type": "string", - "enum": ["code-judge", "code_judge"] + "enum": [ + "code-judge", + "code_judge" + ] }, "command": { "anyOf": [ @@ -280,7 +325,10 @@ "additionalProperties": {} } }, - "required": ["type", "command"], + "required": [ + "type", + "command" + ], "additionalProperties": false }, { @@ -310,7 +358,10 @@ }, "type": { "type": "string", - "enum": ["llm-judge", "llm_judge"] + "enum": [ + "llm-judge", + "llm_judge" + ] }, "prompt": { "anyOf": [ @@ -404,7 +455,10 @@ "minLength": 1 } }, - "required": ["score_range", "outcome"], + "required": [ + "score_range", + "outcome" + ], "additionalProperties": false } } @@ -423,7 +477,9 @@ "additionalProperties": {} } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -483,7 +539,9 @@ } } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -499,7 +557,10 @@ "maximum": 1 } }, - "required": ["type", "threshold"], + "required": [ + "type", + "threshold" + ], "additionalProperties": false }, { @@ -516,7 +577,10 @@ "type": "string" } }, - "required": ["type", "path"], + "required": [ + "type", + "path" + ], "additionalProperties": false }, { @@ -533,13 +597,18 @@ "type": "string" } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false } ] } }, - "required": ["type", "aggregator"], + "required": [ + "type", + "aggregator" + ], "additionalProperties": false }, { @@ -569,11 +638,20 @@ }, "type": { "type": "string", - "enum": ["tool-trajectory", "tool_trajectory"] + "enum": [ + "tool-trajectory", + "tool_trajectory" + ] }, "mode": { "type": "string", - "enum": ["any_order", "in_order", "exact", "subset", "superset"] + "enum": [ + "any_order", + "in_order", + "exact", + "subset", + "superset" + ] }, "minimums": { "type": "object", @@ -614,7 +692,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -628,7 +711,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -639,7 +727,9 @@ ] } }, - "required": ["tool"], + "required": [ + "tool" + ], "additionalProperties": false } }, @@ -647,7 +737,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -661,7 +756,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -672,7 +772,10 @@ ] } }, - "required": ["type", "mode"], + "required": [ + "type", + "mode" + ], "additionalProperties": false }, { @@ -702,44 +805,11 @@ }, "type": { "type": "string", - "enum": ["trigger-judge", "trigger_judge"] - }, - "skill": { - "type": "string" - } - }, - "required": ["type", "skill"], - "additionalProperties": false - }, - { - "type": "object", - "properties": { - "name": { - "type": "string" - }, - "weight": { - "type": "number", - "minimum": 0 - }, - "required": { - "anyOf": [ - { - "type": "boolean" - }, - { - "type": "number", - "exclusiveMinimum": 0, - "maximum": 1 - } + "enum": [ + "field-accuracy", + "field_accuracy" ] }, - "negate": { - "type": "boolean" - }, - "type": { - "type": "string", - "enum": ["field-accuracy", "field_accuracy"] - }, "fields": { "type": "array", "items": { @@ -750,7 +820,11 @@ }, "match": { "type": "string", - "enum": ["exact", "numeric_tolerance", "date"] + "enum": [ + "exact", + "numeric_tolerance", + "date" + ] }, "required": { "type": "boolean" @@ -772,17 +846,26 @@ } } }, - "required": ["path", "match"], + "required": [ + "path", + "match" + ], "additionalProperties": false }, "minItems": 1 }, "aggregation": { "type": "string", - "enum": ["weighted_average", "all_or_nothing"] + "enum": [ + "weighted_average", + "all_or_nothing" + ] } }, - "required": ["type", "fields"], + "required": [ + "type", + "fields" + ], "additionalProperties": false }, { @@ -819,7 +902,10 @@ "minimum": 0 } }, - "required": ["type", "threshold"], + "required": [ + "type", + "threshold" + ], "additionalProperties": false }, { @@ -856,7 +942,10 @@ "minimum": 0 } }, - "required": ["type", "budget"], + "required": [ + "type", + "budget" + ], "additionalProperties": false }, { @@ -886,7 +975,10 @@ }, "type": { "type": "string", - "enum": ["token-usage", "token_usage"] + "enum": [ + "token-usage", + "token_usage" + ] }, "max_total": { "type": "number", @@ -901,7 +993,9 @@ "minimum": 0 } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -931,7 +1025,10 @@ }, "type": { "type": "string", - "enum": ["execution-metrics", "execution_metrics"] + "enum": [ + "execution-metrics", + "execution_metrics" + ] }, "max_tool_calls": { "type": "number", @@ -963,7 +1060,9 @@ "minimum": 0 } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -993,7 +1092,10 @@ }, "type": { "type": "string", - "enum": ["agent-judge", "agent_judge"] + "enum": [ + "agent-judge", + "agent_judge" + ] }, "prompt": { "type": "string" @@ -1047,7 +1149,10 @@ "minLength": 1 } }, - "required": ["score_range", "outcome"], + "required": [ + "score_range", + "outcome" + ], "additionalProperties": false } } @@ -1069,7 +1174,9 @@ "type": "string" } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -1105,7 +1212,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -1141,7 +1251,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -1171,10 +1284,15 @@ }, "type": { "type": "string", - "enum": ["is-json", "is_json"] + "enum": [ + "is-json", + "is_json" + ] } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -1210,7 +1328,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -1291,7 +1412,10 @@ "minLength": 1 } }, - "required": ["score_range", "outcome"], + "required": [ + "score_range", + "outcome" + ], "additionalProperties": false } } @@ -1301,7 +1425,10 @@ "minItems": 1 } }, - "required": ["type", "criteria"], + "required": [ + "type", + "criteria" + ], "additionalProperties": false } ] @@ -1338,7 +1465,10 @@ }, "type": { "type": "string", - "enum": ["code-judge", "code_judge"] + "enum": [ + "code-judge", + "code_judge" + ] }, "command": { "anyOf": [ @@ -1390,7 +1520,10 @@ "additionalProperties": {} } }, - "required": ["type", "command"], + "required": [ + "type", + "command" + ], "additionalProperties": false }, { @@ -1420,7 +1553,10 @@ }, "type": { "type": "string", - "enum": ["llm-judge", "llm_judge"] + "enum": [ + "llm-judge", + "llm_judge" + ] }, "prompt": { "anyOf": [ @@ -1514,7 +1650,10 @@ "minLength": 1 } }, - "required": ["score_range", "outcome"], + "required": [ + "score_range", + "outcome" + ], "additionalProperties": false } } @@ -1533,7 +1672,9 @@ "additionalProperties": {} } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -1593,7 +1734,9 @@ } } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -1609,7 +1752,10 @@ "maximum": 1 } }, - "required": ["type", "threshold"], + "required": [ + "type", + "threshold" + ], "additionalProperties": false }, { @@ -1626,7 +1772,10 @@ "type": "string" } }, - "required": ["type", "path"], + "required": [ + "type", + "path" + ], "additionalProperties": false }, { @@ -1643,13 +1792,18 @@ "type": "string" } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false } ] } }, - "required": ["type", "aggregator"], + "required": [ + "type", + "aggregator" + ], "additionalProperties": false }, { @@ -1679,11 +1833,20 @@ }, "type": { "type": "string", - "enum": ["tool-trajectory", "tool_trajectory"] + "enum": [ + "tool-trajectory", + "tool_trajectory" + ] }, "mode": { "type": "string", - "enum": ["any_order", "in_order", "exact", "subset", "superset"] + "enum": [ + "any_order", + "in_order", + "exact", + "subset", + "superset" + ] }, "minimums": { "type": "object", @@ -1724,7 +1887,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -1738,7 +1906,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -1749,7 +1922,9 @@ ] } }, - "required": ["tool"], + "required": [ + "tool" + ], "additionalProperties": false } }, @@ -1757,7 +1932,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -1771,7 +1951,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -1782,7 +1967,10 @@ ] } }, - "required": ["type", "mode"], + "required": [ + "type", + "mode" + ], "additionalProperties": false }, { @@ -1812,44 +2000,11 @@ }, "type": { "type": "string", - "enum": ["trigger-judge", "trigger_judge"] - }, - "skill": { - "type": "string" - } - }, - "required": ["type", "skill"], - "additionalProperties": false - }, - { - "type": "object", - "properties": { - "name": { - "type": "string" - }, - "weight": { - "type": "number", - "minimum": 0 - }, - "required": { - "anyOf": [ - { - "type": "boolean" - }, - { - "type": "number", - "exclusiveMinimum": 0, - "maximum": 1 - } + "enum": [ + "field-accuracy", + "field_accuracy" ] }, - "negate": { - "type": "boolean" - }, - "type": { - "type": "string", - "enum": ["field-accuracy", "field_accuracy"] - }, "fields": { "type": "array", "items": { @@ -1860,7 +2015,11 @@ }, "match": { "type": "string", - "enum": ["exact", "numeric_tolerance", "date"] + "enum": [ + "exact", + "numeric_tolerance", + "date" + ] }, "required": { "type": "boolean" @@ -1882,17 +2041,26 @@ } } }, - "required": ["path", "match"], + "required": [ + "path", + "match" + ], "additionalProperties": false }, "minItems": 1 }, "aggregation": { "type": "string", - "enum": ["weighted_average", "all_or_nothing"] + "enum": [ + "weighted_average", + "all_or_nothing" + ] } }, - "required": ["type", "fields"], + "required": [ + "type", + "fields" + ], "additionalProperties": false }, { @@ -1929,7 +2097,10 @@ "minimum": 0 } }, - "required": ["type", "threshold"], + "required": [ + "type", + "threshold" + ], "additionalProperties": false }, { @@ -1966,7 +2137,10 @@ "minimum": 0 } }, - "required": ["type", "budget"], + "required": [ + "type", + "budget" + ], "additionalProperties": false }, { @@ -1996,7 +2170,10 @@ }, "type": { "type": "string", - "enum": ["token-usage", "token_usage"] + "enum": [ + "token-usage", + "token_usage" + ] }, "max_total": { "type": "number", @@ -2011,7 +2188,9 @@ "minimum": 0 } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -2041,7 +2220,10 @@ }, "type": { "type": "string", - "enum": ["execution-metrics", "execution_metrics"] + "enum": [ + "execution-metrics", + "execution_metrics" + ] }, "max_tool_calls": { "type": "number", @@ -2073,7 +2255,9 @@ "minimum": 0 } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -2103,7 +2287,10 @@ }, "type": { "type": "string", - "enum": ["agent-judge", "agent_judge"] + "enum": [ + "agent-judge", + "agent_judge" + ] }, "prompt": { "type": "string" @@ -2157,7 +2344,10 @@ "minLength": 1 } }, - "required": ["score_range", "outcome"], + "required": [ + "score_range", + "outcome" + ], "additionalProperties": false } } @@ -2179,7 +2369,9 @@ "type": "string" } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -2215,7 +2407,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -2251,7 +2446,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -2281,10 +2479,15 @@ }, "type": { "type": "string", - "enum": ["is-json", "is_json"] + "enum": [ + "is-json", + "is_json" + ] } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -2320,7 +2523,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -2401,7 +2607,10 @@ "minLength": 1 } }, - "required": ["score_range", "outcome"], + "required": [ + "score_range", + "outcome" + ], "additionalProperties": false } } @@ -2411,7 +2620,10 @@ "minItems": 1 } }, - "required": ["type", "criteria"], + "required": [ + "type", + "criteria" + ], "additionalProperties": false } ] @@ -2448,7 +2660,10 @@ }, "type": { "type": "string", - "enum": ["code-judge", "code_judge"] + "enum": [ + "code-judge", + "code_judge" + ] }, "command": { "anyOf": [ @@ -2500,7 +2715,10 @@ "additionalProperties": {} } }, - "required": ["type", "command"], + "required": [ + "type", + "command" + ], "additionalProperties": false }, { @@ -2530,7 +2748,10 @@ }, "type": { "type": "string", - "enum": ["llm-judge", "llm_judge"] + "enum": [ + "llm-judge", + "llm_judge" + ] }, "prompt": { "anyOf": [ @@ -2624,7 +2845,10 @@ "minLength": 1 } }, - "required": ["score_range", "outcome"], + "required": [ + "score_range", + "outcome" + ], "additionalProperties": false } } @@ -2643,7 +2867,9 @@ "additionalProperties": {} } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -2703,7 +2929,9 @@ } } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -2719,7 +2947,10 @@ "maximum": 1 } }, - "required": ["type", "threshold"], + "required": [ + "type", + "threshold" + ], "additionalProperties": false }, { @@ -2736,7 +2967,10 @@ "type": "string" } }, - "required": ["type", "path"], + "required": [ + "type", + "path" + ], "additionalProperties": false }, { @@ -2753,13 +2987,18 @@ "type": "string" } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false } ] } }, - "required": ["type", "aggregator"], + "required": [ + "type", + "aggregator" + ], "additionalProperties": false }, { @@ -2789,11 +3028,20 @@ }, "type": { "type": "string", - "enum": ["tool-trajectory", "tool_trajectory"] + "enum": [ + "tool-trajectory", + "tool_trajectory" + ] }, "mode": { "type": "string", - "enum": ["any_order", "in_order", "exact", "subset", "superset"] + "enum": [ + "any_order", + "in_order", + "exact", + "subset", + "superset" + ] }, "minimums": { "type": "object", @@ -2834,7 +3082,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -2848,7 +3101,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -2859,7 +3117,9 @@ ] } }, - "required": ["tool"], + "required": [ + "tool" + ], "additionalProperties": false } }, @@ -2867,7 +3127,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -2881,7 +3146,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -2892,7 +3162,10 @@ ] } }, - "required": ["type", "mode"], + "required": [ + "type", + "mode" + ], "additionalProperties": false }, { @@ -2922,44 +3195,11 @@ }, "type": { "type": "string", - "enum": ["trigger-judge", "trigger_judge"] - }, - "skill": { - "type": "string" - } - }, - "required": ["type", "skill"], - "additionalProperties": false - }, - { - "type": "object", - "properties": { - "name": { - "type": "string" - }, - "weight": { - "type": "number", - "minimum": 0 - }, - "required": { - "anyOf": [ - { - "type": "boolean" - }, - { - "type": "number", - "exclusiveMinimum": 0, - "maximum": 1 - } + "enum": [ + "field-accuracy", + "field_accuracy" ] }, - "negate": { - "type": "boolean" - }, - "type": { - "type": "string", - "enum": ["field-accuracy", "field_accuracy"] - }, "fields": { "type": "array", "items": { @@ -2970,7 +3210,11 @@ }, "match": { "type": "string", - "enum": ["exact", "numeric_tolerance", "date"] + "enum": [ + "exact", + "numeric_tolerance", + "date" + ] }, "required": { "type": "boolean" @@ -2992,17 +3236,26 @@ } } }, - "required": ["path", "match"], + "required": [ + "path", + "match" + ], "additionalProperties": false }, "minItems": 1 }, "aggregation": { "type": "string", - "enum": ["weighted_average", "all_or_nothing"] + "enum": [ + "weighted_average", + "all_or_nothing" + ] } }, - "required": ["type", "fields"], + "required": [ + "type", + "fields" + ], "additionalProperties": false }, { @@ -3039,7 +3292,10 @@ "minimum": 0 } }, - "required": ["type", "threshold"], + "required": [ + "type", + "threshold" + ], "additionalProperties": false }, { @@ -3076,7 +3332,10 @@ "minimum": 0 } }, - "required": ["type", "budget"], + "required": [ + "type", + "budget" + ], "additionalProperties": false }, { @@ -3106,7 +3365,10 @@ }, "type": { "type": "string", - "enum": ["token-usage", "token_usage"] + "enum": [ + "token-usage", + "token_usage" + ] }, "max_total": { "type": "number", @@ -3121,7 +3383,9 @@ "minimum": 0 } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -3151,7 +3415,10 @@ }, "type": { "type": "string", - "enum": ["execution-metrics", "execution_metrics"] + "enum": [ + "execution-metrics", + "execution_metrics" + ] }, "max_tool_calls": { "type": "number", @@ -3183,7 +3450,9 @@ "minimum": 0 } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -3213,7 +3482,10 @@ }, "type": { "type": "string", - "enum": ["agent-judge", "agent_judge"] + "enum": [ + "agent-judge", + "agent_judge" + ] }, "prompt": { "type": "string" @@ -3267,7 +3539,10 @@ "minLength": 1 } }, - "required": ["score_range", "outcome"], + "required": [ + "score_range", + "outcome" + ], "additionalProperties": false } } @@ -3289,7 +3564,9 @@ "type": "string" } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -3325,7 +3602,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -3361,7 +3641,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -3391,10 +3674,15 @@ }, "type": { "type": "string", - "enum": ["is-json", "is_json"] + "enum": [ + "is-json", + "is_json" + ] } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -3430,7 +3718,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -3511,7 +3802,10 @@ "minLength": 1 } }, - "required": ["score_range", "outcome"], + "required": [ + "score_range", + "outcome" + ], "additionalProperties": false } } @@ -3521,7 +3815,10 @@ "minItems": 1 } }, - "required": ["type", "criteria"], + "required": [ + "type", + "criteria" + ], "additionalProperties": false } ] @@ -3570,7 +3867,10 @@ }, "type": { "type": "string", - "enum": ["code-judge", "code_judge"] + "enum": [ + "code-judge", + "code_judge" + ] }, "command": { "anyOf": [ @@ -3622,7 +3922,10 @@ "additionalProperties": {} } }, - "required": ["type", "command"], + "required": [ + "type", + "command" + ], "additionalProperties": false }, { @@ -3652,7 +3955,10 @@ }, "type": { "type": "string", - "enum": ["llm-judge", "llm_judge"] + "enum": [ + "llm-judge", + "llm_judge" + ] }, "prompt": { "anyOf": [ @@ -3746,7 +4052,10 @@ "minLength": 1 } }, - "required": ["score_range", "outcome"], + "required": [ + "score_range", + "outcome" + ], "additionalProperties": false } } @@ -3765,7 +4074,9 @@ "additionalProperties": {} } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -3825,7 +4136,9 @@ } } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -3841,7 +4154,10 @@ "maximum": 1 } }, - "required": ["type", "threshold"], + "required": [ + "type", + "threshold" + ], "additionalProperties": false }, { @@ -3858,7 +4174,10 @@ "type": "string" } }, - "required": ["type", "path"], + "required": [ + "type", + "path" + ], "additionalProperties": false }, { @@ -3875,13 +4194,18 @@ "type": "string" } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false } ] } }, - "required": ["type", "aggregator"], + "required": [ + "type", + "aggregator" + ], "additionalProperties": false }, { @@ -3911,11 +4235,20 @@ }, "type": { "type": "string", - "enum": ["tool-trajectory", "tool_trajectory"] + "enum": [ + "tool-trajectory", + "tool_trajectory" + ] }, "mode": { "type": "string", - "enum": ["any_order", "in_order", "exact", "subset", "superset"] + "enum": [ + "any_order", + "in_order", + "exact", + "subset", + "superset" + ] }, "minimums": { "type": "object", @@ -3956,7 +4289,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -3970,7 +4308,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -3981,7 +4324,9 @@ ] } }, - "required": ["tool"], + "required": [ + "tool" + ], "additionalProperties": false } }, @@ -3989,7 +4334,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -4003,7 +4353,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -4014,7 +4369,10 @@ ] } }, - "required": ["type", "mode"], + "required": [ + "type", + "mode" + ], "additionalProperties": false }, { @@ -4044,44 +4402,11 @@ }, "type": { "type": "string", - "enum": ["trigger-judge", "trigger_judge"] - }, - "skill": { - "type": "string" - } - }, - "required": ["type", "skill"], - "additionalProperties": false - }, - { - "type": "object", - "properties": { - "name": { - "type": "string" - }, - "weight": { - "type": "number", - "minimum": 0 - }, - "required": { - "anyOf": [ - { - "type": "boolean" - }, - { - "type": "number", - "exclusiveMinimum": 0, - "maximum": 1 - } + "enum": [ + "field-accuracy", + "field_accuracy" ] }, - "negate": { - "type": "boolean" - }, - "type": { - "type": "string", - "enum": ["field-accuracy", "field_accuracy"] - }, "fields": { "type": "array", "items": { @@ -4092,7 +4417,11 @@ }, "match": { "type": "string", - "enum": ["exact", "numeric_tolerance", "date"] + "enum": [ + "exact", + "numeric_tolerance", + "date" + ] }, "required": { "type": "boolean" @@ -4114,17 +4443,26 @@ } } }, - "required": ["path", "match"], + "required": [ + "path", + "match" + ], "additionalProperties": false }, "minItems": 1 }, "aggregation": { "type": "string", - "enum": ["weighted_average", "all_or_nothing"] + "enum": [ + "weighted_average", + "all_or_nothing" + ] } }, - "required": ["type", "fields"], + "required": [ + "type", + "fields" + ], "additionalProperties": false }, { @@ -4161,7 +4499,10 @@ "minimum": 0 } }, - "required": ["type", "threshold"], + "required": [ + "type", + "threshold" + ], "additionalProperties": false }, { @@ -4198,7 +4539,10 @@ "minimum": 0 } }, - "required": ["type", "budget"], + "required": [ + "type", + "budget" + ], "additionalProperties": false }, { @@ -4228,7 +4572,10 @@ }, "type": { "type": "string", - "enum": ["token-usage", "token_usage"] + "enum": [ + "token-usage", + "token_usage" + ] }, "max_total": { "type": "number", @@ -4243,7 +4590,9 @@ "minimum": 0 } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -4273,7 +4622,10 @@ }, "type": { "type": "string", - "enum": ["execution-metrics", "execution_metrics"] + "enum": [ + "execution-metrics", + "execution_metrics" + ] }, "max_tool_calls": { "type": "number", @@ -4305,7 +4657,9 @@ "minimum": 0 } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -4335,7 +4689,10 @@ }, "type": { "type": "string", - "enum": ["agent-judge", "agent_judge"] + "enum": [ + "agent-judge", + "agent_judge" + ] }, "prompt": { "type": "string" @@ -4389,7 +4746,10 @@ "minLength": 1 } }, - "required": ["score_range", "outcome"], + "required": [ + "score_range", + "outcome" + ], "additionalProperties": false } } @@ -4411,7 +4771,9 @@ "type": "string" } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -4447,7 +4809,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -4483,7 +4848,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -4513,10 +4881,15 @@ }, "type": { "type": "string", - "enum": ["is-json", "is_json"] + "enum": [ + "is-json", + "is_json" + ] } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -4552,7 +4925,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -4633,7 +5009,10 @@ "minLength": 1 } }, - "required": ["score_range", "outcome"], + "required": [ + "score_range", + "outcome" + ], "additionalProperties": false } } @@ -4643,7 +5022,10 @@ "minItems": 1 } }, - "required": ["type", "criteria"], + "required": [ + "type", + "criteria" + ], "additionalProperties": false } ] @@ -4680,7 +5062,10 @@ }, "type": { "type": "string", - "enum": ["code-judge", "code_judge"] + "enum": [ + "code-judge", + "code_judge" + ] }, "command": { "anyOf": [ @@ -4732,7 +5117,10 @@ "additionalProperties": {} } }, - "required": ["type", "command"], + "required": [ + "type", + "command" + ], "additionalProperties": false }, { @@ -4762,7 +5150,10 @@ }, "type": { "type": "string", - "enum": ["llm-judge", "llm_judge"] + "enum": [ + "llm-judge", + "llm_judge" + ] }, "prompt": { "anyOf": [ @@ -4856,7 +5247,10 @@ "minLength": 1 } }, - "required": ["score_range", "outcome"], + "required": [ + "score_range", + "outcome" + ], "additionalProperties": false } } @@ -4875,7 +5269,9 @@ "additionalProperties": {} } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -4935,7 +5331,9 @@ } } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -4951,7 +5349,10 @@ "maximum": 1 } }, - "required": ["type", "threshold"], + "required": [ + "type", + "threshold" + ], "additionalProperties": false }, { @@ -4968,7 +5369,10 @@ "type": "string" } }, - "required": ["type", "path"], + "required": [ + "type", + "path" + ], "additionalProperties": false }, { @@ -4985,13 +5389,18 @@ "type": "string" } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false } ] } }, - "required": ["type", "aggregator"], + "required": [ + "type", + "aggregator" + ], "additionalProperties": false }, { @@ -5021,11 +5430,20 @@ }, "type": { "type": "string", - "enum": ["tool-trajectory", "tool_trajectory"] + "enum": [ + "tool-trajectory", + "tool_trajectory" + ] }, "mode": { "type": "string", - "enum": ["any_order", "in_order", "exact", "subset", "superset"] + "enum": [ + "any_order", + "in_order", + "exact", + "subset", + "superset" + ] }, "minimums": { "type": "object", @@ -5066,7 +5484,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -5080,7 +5503,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -5091,7 +5519,9 @@ ] } }, - "required": ["tool"], + "required": [ + "tool" + ], "additionalProperties": false } }, @@ -5099,7 +5529,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -5113,7 +5548,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -5124,7 +5564,10 @@ ] } }, - "required": ["type", "mode"], + "required": [ + "type", + "mode" + ], "additionalProperties": false }, { @@ -5154,44 +5597,11 @@ }, "type": { "type": "string", - "enum": ["trigger-judge", "trigger_judge"] - }, - "skill": { - "type": "string" - } - }, - "required": ["type", "skill"], - "additionalProperties": false - }, - { - "type": "object", - "properties": { - "name": { - "type": "string" - }, - "weight": { - "type": "number", - "minimum": 0 - }, - "required": { - "anyOf": [ - { - "type": "boolean" - }, - { - "type": "number", - "exclusiveMinimum": 0, - "maximum": 1 - } + "enum": [ + "field-accuracy", + "field_accuracy" ] }, - "negate": { - "type": "boolean" - }, - "type": { - "type": "string", - "enum": ["field-accuracy", "field_accuracy"] - }, "fields": { "type": "array", "items": { @@ -5202,7 +5612,11 @@ }, "match": { "type": "string", - "enum": ["exact", "numeric_tolerance", "date"] + "enum": [ + "exact", + "numeric_tolerance", + "date" + ] }, "required": { "type": "boolean" @@ -5224,17 +5638,26 @@ } } }, - "required": ["path", "match"], + "required": [ + "path", + "match" + ], "additionalProperties": false }, "minItems": 1 }, "aggregation": { "type": "string", - "enum": ["weighted_average", "all_or_nothing"] + "enum": [ + "weighted_average", + "all_or_nothing" + ] } }, - "required": ["type", "fields"], + "required": [ + "type", + "fields" + ], "additionalProperties": false }, { @@ -5271,7 +5694,10 @@ "minimum": 0 } }, - "required": ["type", "threshold"], + "required": [ + "type", + "threshold" + ], "additionalProperties": false }, { @@ -5308,7 +5734,10 @@ "minimum": 0 } }, - "required": ["type", "budget"], + "required": [ + "type", + "budget" + ], "additionalProperties": false }, { @@ -5338,7 +5767,10 @@ }, "type": { "type": "string", - "enum": ["token-usage", "token_usage"] + "enum": [ + "token-usage", + "token_usage" + ] }, "max_total": { "type": "number", @@ -5353,7 +5785,9 @@ "minimum": 0 } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -5383,7 +5817,10 @@ }, "type": { "type": "string", - "enum": ["execution-metrics", "execution_metrics"] + "enum": [ + "execution-metrics", + "execution_metrics" + ] }, "max_tool_calls": { "type": "number", @@ -5415,7 +5852,9 @@ "minimum": 0 } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -5445,7 +5884,10 @@ }, "type": { "type": "string", - "enum": ["agent-judge", "agent_judge"] + "enum": [ + "agent-judge", + "agent_judge" + ] }, "prompt": { "type": "string" @@ -5499,7 +5941,10 @@ "minLength": 1 } }, - "required": ["score_range", "outcome"], + "required": [ + "score_range", + "outcome" + ], "additionalProperties": false } } @@ -5521,7 +5966,9 @@ "type": "string" } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -5557,7 +6004,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -5593,7 +6043,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -5623,10 +6076,15 @@ }, "type": { "type": "string", - "enum": ["is-json", "is_json"] + "enum": [ + "is-json", + "is_json" + ] } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -5662,7 +6120,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -5743,7 +6204,10 @@ "minLength": 1 } }, - "required": ["score_range", "outcome"], + "required": [ + "score_range", + "outcome" + ], "additionalProperties": false } } @@ -5753,7 +6217,10 @@ "minItems": 1 } }, - "required": ["type", "criteria"], + "required": [ + "type", + "criteria" + ], "additionalProperties": false } ] @@ -5790,7 +6257,10 @@ }, "type": { "type": "string", - "enum": ["code-judge", "code_judge"] + "enum": [ + "code-judge", + "code_judge" + ] }, "command": { "anyOf": [ @@ -5842,7 +6312,10 @@ "additionalProperties": {} } }, - "required": ["type", "command"], + "required": [ + "type", + "command" + ], "additionalProperties": false }, { @@ -5872,7 +6345,10 @@ }, "type": { "type": "string", - "enum": ["llm-judge", "llm_judge"] + "enum": [ + "llm-judge", + "llm_judge" + ] }, "prompt": { "anyOf": [ @@ -5966,7 +6442,10 @@ "minLength": 1 } }, - "required": ["score_range", "outcome"], + "required": [ + "score_range", + "outcome" + ], "additionalProperties": false } } @@ -5985,7 +6464,9 @@ "additionalProperties": {} } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -6045,7 +6526,9 @@ } } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -6061,7 +6544,10 @@ "maximum": 1 } }, - "required": ["type", "threshold"], + "required": [ + "type", + "threshold" + ], "additionalProperties": false }, { @@ -6078,7 +6564,10 @@ "type": "string" } }, - "required": ["type", "path"], + "required": [ + "type", + "path" + ], "additionalProperties": false }, { @@ -6095,13 +6584,18 @@ "type": "string" } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false } ] } }, - "required": ["type", "aggregator"], + "required": [ + "type", + "aggregator" + ], "additionalProperties": false }, { @@ -6131,11 +6625,20 @@ }, "type": { "type": "string", - "enum": ["tool-trajectory", "tool_trajectory"] + "enum": [ + "tool-trajectory", + "tool_trajectory" + ] }, "mode": { "type": "string", - "enum": ["any_order", "in_order", "exact", "subset", "superset"] + "enum": [ + "any_order", + "in_order", + "exact", + "subset", + "superset" + ] }, "minimums": { "type": "object", @@ -6176,7 +6679,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -6190,7 +6698,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -6201,7 +6714,9 @@ ] } }, - "required": ["tool"], + "required": [ + "tool" + ], "additionalProperties": false } }, @@ -6209,7 +6724,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -6223,7 +6743,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -6234,7 +6759,10 @@ ] } }, - "required": ["type", "mode"], + "required": [ + "type", + "mode" + ], "additionalProperties": false }, { @@ -6264,43 +6792,10 @@ }, "type": { "type": "string", - "enum": ["trigger-judge", "trigger_judge"] - }, - "skill": { - "type": "string" - } - }, - "required": ["type", "skill"], - "additionalProperties": false - }, - { - "type": "object", - "properties": { - "name": { - "type": "string" - }, - "weight": { - "type": "number", - "minimum": 0 - }, - "required": { - "anyOf": [ - { - "type": "boolean" - }, - { - "type": "number", - "exclusiveMinimum": 0, - "maximum": 1 - } - ] - }, - "negate": { - "type": "boolean" - }, - "type": { - "type": "string", - "enum": ["field-accuracy", "field_accuracy"] + "enum": [ + "field-accuracy", + "field_accuracy" + ] }, "fields": { "type": "array", @@ -6312,7 +6807,11 @@ }, "match": { "type": "string", - "enum": ["exact", "numeric_tolerance", "date"] + "enum": [ + "exact", + "numeric_tolerance", + "date" + ] }, "required": { "type": "boolean" @@ -6334,17 +6833,26 @@ } } }, - "required": ["path", "match"], + "required": [ + "path", + "match" + ], "additionalProperties": false }, "minItems": 1 }, "aggregation": { "type": "string", - "enum": ["weighted_average", "all_or_nothing"] + "enum": [ + "weighted_average", + "all_or_nothing" + ] } }, - "required": ["type", "fields"], + "required": [ + "type", + "fields" + ], "additionalProperties": false }, { @@ -6381,7 +6889,10 @@ "minimum": 0 } }, - "required": ["type", "threshold"], + "required": [ + "type", + "threshold" + ], "additionalProperties": false }, { @@ -6418,7 +6929,10 @@ "minimum": 0 } }, - "required": ["type", "budget"], + "required": [ + "type", + "budget" + ], "additionalProperties": false }, { @@ -6448,7 +6962,10 @@ }, "type": { "type": "string", - "enum": ["token-usage", "token_usage"] + "enum": [ + "token-usage", + "token_usage" + ] }, "max_total": { "type": "number", @@ -6463,7 +6980,9 @@ "minimum": 0 } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -6493,7 +7012,10 @@ }, "type": { "type": "string", - "enum": ["execution-metrics", "execution_metrics"] + "enum": [ + "execution-metrics", + "execution_metrics" + ] }, "max_tool_calls": { "type": "number", @@ -6525,7 +7047,9 @@ "minimum": 0 } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -6555,7 +7079,10 @@ }, "type": { "type": "string", - "enum": ["agent-judge", "agent_judge"] + "enum": [ + "agent-judge", + "agent_judge" + ] }, "prompt": { "type": "string" @@ -6609,7 +7136,10 @@ "minLength": 1 } }, - "required": ["score_range", "outcome"], + "required": [ + "score_range", + "outcome" + ], "additionalProperties": false } } @@ -6631,7 +7161,9 @@ "type": "string" } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -6667,7 +7199,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -6703,7 +7238,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -6733,10 +7271,15 @@ }, "type": { "type": "string", - "enum": ["is-json", "is_json"] + "enum": [ + "is-json", + "is_json" + ] } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -6772,7 +7315,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -6853,7 +7399,10 @@ "minLength": 1 } }, - "required": ["score_range", "outcome"], + "required": [ + "score_range", + "outcome" + ], "additionalProperties": false } } @@ -6863,7 +7412,10 @@ "minItems": 1 } }, - "required": ["type", "criteria"], + "required": [ + "type", + "criteria" + ], "additionalProperties": false } ] @@ -6884,7 +7436,11 @@ }, "strategy": { "type": "string", - "enum": ["pass_at_k", "mean", "confidence_interval"] + "enum": [ + "pass_at_k", + "mean", + "confidence_interval" + ] }, "cost_limit_usd": { "type": "number", @@ -6895,7 +7451,9 @@ "minimum": 0 } }, - "required": ["count"], + "required": [ + "count" + ], "additionalProperties": false }, "total_budget_usd": { @@ -6923,7 +7481,10 @@ }, "isolation": { "type": "string", - "enum": ["shared", "per_test"] + "enum": [ + "shared", + "per_test" + ] }, "repos": { "type": "array", @@ -6947,7 +7508,10 @@ "format": "uri" } }, - "required": ["type", "url"], + "required": [ + "type", + "url" + ], "additionalProperties": false }, { @@ -6961,7 +7525,10 @@ "type": "string" } }, - "required": ["type", "path"], + "required": [ + "type", + "path" + ], "additionalProperties": false } ] @@ -6974,7 +7541,10 @@ }, "resolve": { "type": "string", - "enum": ["remote", "local"] + "enum": [ + "remote", + "local" + ] }, "ancestor": { "type": "integer", @@ -7003,7 +7573,10 @@ "additionalProperties": false } }, - "required": ["path", "source"], + "required": [ + "path", + "source" + ], "additionalProperties": false } }, @@ -7039,7 +7612,11 @@ }, "reset": { "type": "string", - "enum": ["none", "fast", "strict"] + "enum": [ + "none", + "fast", + "strict" + ] } }, "additionalProperties": false @@ -7070,7 +7647,11 @@ }, "reset": { "type": "string", - "enum": ["none", "fast", "strict"] + "enum": [ + "none", + "fast", + "strict" + ] } }, "additionalProperties": false @@ -7101,7 +7682,11 @@ }, "reset": { "type": "string", - "enum": ["none", "fast", "strict"] + "enum": [ + "none", + "fast", + "strict" + ] } }, "additionalProperties": false @@ -7132,7 +7717,11 @@ }, "reset": { "type": "string", - "enum": ["none", "fast", "strict"] + "enum": [ + "none", + "fast", + "strict" + ] } }, "additionalProperties": false @@ -7142,7 +7731,11 @@ }, "mode": { "type": "string", - "enum": ["pooled", "temp", "static"] + "enum": [ + "pooled", + "temp", + "static" + ] }, "path": { "type": "string" @@ -7164,7 +7757,9 @@ "type": "string" } }, - "required": ["id"], + "required": [ + "id" + ], "additionalProperties": false } }, @@ -7202,7 +7797,12 @@ "properties": { "role": { "type": "string", - "enum": ["system", "user", "assistant", "tool"] + "enum": [ + "system", + "user", + "assistant", + "tool" + ] }, "content": { "anyOf": [ @@ -7216,20 +7816,29 @@ "properties": { "type": { "type": "string", - "enum": ["text", "file"] + "enum": [ + "text", + "file" + ] }, "value": { "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false } } ] } }, - "required": ["role", "content"], + "required": [ + "role", + "content" + ], "additionalProperties": false } } @@ -7251,7 +7860,12 @@ "properties": { "role": { "type": "string", - "enum": ["system", "user", "assistant", "tool"] + "enum": [ + "system", + "user", + "assistant", + "tool" + ] }, "content": { "anyOf": [ @@ -7265,20 +7879,29 @@ "properties": { "type": { "type": "string", - "enum": ["text", "file"] + "enum": [ + "text", + "file" + ] }, "value": { "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false } } ] } }, - "required": ["role", "content"], + "required": [ + "role", + "content" + ], "additionalProperties": false } } @@ -7315,7 +7938,10 @@ }, "type": { "type": "string", - "enum": ["code-judge", "code_judge"] + "enum": [ + "code-judge", + "code_judge" + ] }, "command": { "anyOf": [ @@ -7367,7 +7993,10 @@ "additionalProperties": {} } }, - "required": ["type", "command"], + "required": [ + "type", + "command" + ], "additionalProperties": false }, { @@ -7397,7 +8026,10 @@ }, "type": { "type": "string", - "enum": ["llm-judge", "llm_judge"] + "enum": [ + "llm-judge", + "llm_judge" + ] }, "prompt": { "anyOf": [ @@ -7491,7 +8123,10 @@ "minLength": 1 } }, - "required": ["score_range", "outcome"], + "required": [ + "score_range", + "outcome" + ], "additionalProperties": false } } @@ -7510,7 +8145,9 @@ "additionalProperties": {} } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -7570,7 +8207,9 @@ } } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -7586,7 +8225,10 @@ "maximum": 1 } }, - "required": ["type", "threshold"], + "required": [ + "type", + "threshold" + ], "additionalProperties": false }, { @@ -7603,7 +8245,10 @@ "type": "string" } }, - "required": ["type", "path"], + "required": [ + "type", + "path" + ], "additionalProperties": false }, { @@ -7620,13 +8265,18 @@ "type": "string" } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false } ] } }, - "required": ["type", "aggregator"], + "required": [ + "type", + "aggregator" + ], "additionalProperties": false }, { @@ -7656,11 +8306,20 @@ }, "type": { "type": "string", - "enum": ["tool-trajectory", "tool_trajectory"] + "enum": [ + "tool-trajectory", + "tool_trajectory" + ] }, "mode": { "type": "string", - "enum": ["any_order", "in_order", "exact", "subset", "superset"] + "enum": [ + "any_order", + "in_order", + "exact", + "subset", + "superset" + ] }, "minimums": { "type": "object", @@ -7701,7 +8360,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -7715,7 +8379,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -7726,7 +8395,9 @@ ] } }, - "required": ["tool"], + "required": [ + "tool" + ], "additionalProperties": false } }, @@ -7734,7 +8405,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -7748,7 +8424,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -7759,7 +8440,10 @@ ] } }, - "required": ["type", "mode"], + "required": [ + "type", + "mode" + ], "additionalProperties": false }, { @@ -7789,44 +8473,11 @@ }, "type": { "type": "string", - "enum": ["trigger-judge", "trigger_judge"] - }, - "skill": { - "type": "string" - } - }, - "required": ["type", "skill"], - "additionalProperties": false - }, - { - "type": "object", - "properties": { - "name": { - "type": "string" - }, - "weight": { - "type": "number", - "minimum": 0 - }, - "required": { - "anyOf": [ - { - "type": "boolean" - }, - { - "type": "number", - "exclusiveMinimum": 0, - "maximum": 1 - } + "enum": [ + "field-accuracy", + "field_accuracy" ] }, - "negate": { - "type": "boolean" - }, - "type": { - "type": "string", - "enum": ["field-accuracy", "field_accuracy"] - }, "fields": { "type": "array", "items": { @@ -7837,7 +8488,11 @@ }, "match": { "type": "string", - "enum": ["exact", "numeric_tolerance", "date"] + "enum": [ + "exact", + "numeric_tolerance", + "date" + ] }, "required": { "type": "boolean" @@ -7859,17 +8514,26 @@ } } }, - "required": ["path", "match"], + "required": [ + "path", + "match" + ], "additionalProperties": false }, "minItems": 1 }, "aggregation": { "type": "string", - "enum": ["weighted_average", "all_or_nothing"] + "enum": [ + "weighted_average", + "all_or_nothing" + ] } }, - "required": ["type", "fields"], + "required": [ + "type", + "fields" + ], "additionalProperties": false }, { @@ -7906,7 +8570,10 @@ "minimum": 0 } }, - "required": ["type", "threshold"], + "required": [ + "type", + "threshold" + ], "additionalProperties": false }, { @@ -7943,7 +8610,10 @@ "minimum": 0 } }, - "required": ["type", "budget"], + "required": [ + "type", + "budget" + ], "additionalProperties": false }, { @@ -7973,7 +8643,10 @@ }, "type": { "type": "string", - "enum": ["token-usage", "token_usage"] + "enum": [ + "token-usage", + "token_usage" + ] }, "max_total": { "type": "number", @@ -7988,7 +8661,9 @@ "minimum": 0 } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -8018,7 +8693,10 @@ }, "type": { "type": "string", - "enum": ["execution-metrics", "execution_metrics"] + "enum": [ + "execution-metrics", + "execution_metrics" + ] }, "max_tool_calls": { "type": "number", @@ -8050,7 +8728,9 @@ "minimum": 0 } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -8080,7 +8760,10 @@ }, "type": { "type": "string", - "enum": ["agent-judge", "agent_judge"] + "enum": [ + "agent-judge", + "agent_judge" + ] }, "prompt": { "type": "string" @@ -8134,7 +8817,10 @@ "minLength": 1 } }, - "required": ["score_range", "outcome"], + "required": [ + "score_range", + "outcome" + ], "additionalProperties": false } } @@ -8156,7 +8842,9 @@ "type": "string" } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -8192,7 +8880,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -8228,7 +8919,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -8258,10 +8952,15 @@ }, "type": { "type": "string", - "enum": ["is-json", "is_json"] + "enum": [ + "is-json", + "is_json" + ] } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -8297,7 +8996,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -8378,7 +9080,10 @@ "minLength": 1 } }, - "required": ["score_range", "outcome"], + "required": [ + "score_range", + "outcome" + ], "additionalProperties": false } } @@ -8388,7 +9093,10 @@ "minItems": 1 } }, - "required": ["type", "criteria"], + "required": [ + "type", + "criteria" + ], "additionalProperties": false } ] @@ -8425,7 +9133,10 @@ }, "type": { "type": "string", - "enum": ["code-judge", "code_judge"] + "enum": [ + "code-judge", + "code_judge" + ] }, "command": { "anyOf": [ @@ -8477,7 +9188,10 @@ "additionalProperties": {} } }, - "required": ["type", "command"], + "required": [ + "type", + "command" + ], "additionalProperties": false }, { @@ -8507,7 +9221,10 @@ }, "type": { "type": "string", - "enum": ["llm-judge", "llm_judge"] + "enum": [ + "llm-judge", + "llm_judge" + ] }, "prompt": { "anyOf": [ @@ -8601,7 +9318,10 @@ "minLength": 1 } }, - "required": ["score_range", "outcome"], + "required": [ + "score_range", + "outcome" + ], "additionalProperties": false } } @@ -8620,7 +9340,9 @@ "additionalProperties": {} } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -8680,7 +9402,9 @@ } } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -8696,7 +9420,10 @@ "maximum": 1 } }, - "required": ["type", "threshold"], + "required": [ + "type", + "threshold" + ], "additionalProperties": false }, { @@ -8713,7 +9440,10 @@ "type": "string" } }, - "required": ["type", "path"], + "required": [ + "type", + "path" + ], "additionalProperties": false }, { @@ -8730,13 +9460,18 @@ "type": "string" } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false } ] } }, - "required": ["type", "aggregator"], + "required": [ + "type", + "aggregator" + ], "additionalProperties": false }, { @@ -8766,11 +9501,20 @@ }, "type": { "type": "string", - "enum": ["tool-trajectory", "tool_trajectory"] + "enum": [ + "tool-trajectory", + "tool_trajectory" + ] }, "mode": { "type": "string", - "enum": ["any_order", "in_order", "exact", "subset", "superset"] + "enum": [ + "any_order", + "in_order", + "exact", + "subset", + "superset" + ] }, "minimums": { "type": "object", @@ -8811,7 +9555,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -8825,7 +9574,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -8836,7 +9590,9 @@ ] } }, - "required": ["tool"], + "required": [ + "tool" + ], "additionalProperties": false } }, @@ -8844,7 +9600,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -8858,7 +9619,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -8869,7 +9635,10 @@ ] } }, - "required": ["type", "mode"], + "required": [ + "type", + "mode" + ], "additionalProperties": false }, { @@ -8899,44 +9668,11 @@ }, "type": { "type": "string", - "enum": ["trigger-judge", "trigger_judge"] - }, - "skill": { - "type": "string" - } - }, - "required": ["type", "skill"], - "additionalProperties": false - }, - { - "type": "object", - "properties": { - "name": { - "type": "string" - }, - "weight": { - "type": "number", - "minimum": 0 - }, - "required": { - "anyOf": [ - { - "type": "boolean" - }, - { - "type": "number", - "exclusiveMinimum": 0, - "maximum": 1 - } + "enum": [ + "field-accuracy", + "field_accuracy" ] }, - "negate": { - "type": "boolean" - }, - "type": { - "type": "string", - "enum": ["field-accuracy", "field_accuracy"] - }, "fields": { "type": "array", "items": { @@ -8947,7 +9683,11 @@ }, "match": { "type": "string", - "enum": ["exact", "numeric_tolerance", "date"] + "enum": [ + "exact", + "numeric_tolerance", + "date" + ] }, "required": { "type": "boolean" @@ -8969,17 +9709,26 @@ } } }, - "required": ["path", "match"], + "required": [ + "path", + "match" + ], "additionalProperties": false }, "minItems": 1 }, "aggregation": { "type": "string", - "enum": ["weighted_average", "all_or_nothing"] + "enum": [ + "weighted_average", + "all_or_nothing" + ] } }, - "required": ["type", "fields"], + "required": [ + "type", + "fields" + ], "additionalProperties": false }, { @@ -9016,7 +9765,10 @@ "minimum": 0 } }, - "required": ["type", "threshold"], + "required": [ + "type", + "threshold" + ], "additionalProperties": false }, { @@ -9053,7 +9805,10 @@ "minimum": 0 } }, - "required": ["type", "budget"], + "required": [ + "type", + "budget" + ], "additionalProperties": false }, { @@ -9083,7 +9838,10 @@ }, "type": { "type": "string", - "enum": ["token-usage", "token_usage"] + "enum": [ + "token-usage", + "token_usage" + ] }, "max_total": { "type": "number", @@ -9098,7 +9856,9 @@ "minimum": 0 } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -9128,7 +9888,10 @@ }, "type": { "type": "string", - "enum": ["execution-metrics", "execution_metrics"] + "enum": [ + "execution-metrics", + "execution_metrics" + ] }, "max_tool_calls": { "type": "number", @@ -9160,7 +9923,9 @@ "minimum": 0 } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -9190,7 +9955,10 @@ }, "type": { "type": "string", - "enum": ["agent-judge", "agent_judge"] + "enum": [ + "agent-judge", + "agent_judge" + ] }, "prompt": { "type": "string" @@ -9244,7 +10012,10 @@ "minLength": 1 } }, - "required": ["score_range", "outcome"], + "required": [ + "score_range", + "outcome" + ], "additionalProperties": false } } @@ -9266,7 +10037,9 @@ "type": "string" } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -9302,7 +10075,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -9338,7 +10114,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -9368,10 +10147,15 @@ }, "type": { "type": "string", - "enum": ["is-json", "is_json"] + "enum": [ + "is-json", + "is_json" + ] } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -9407,7 +10191,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -9488,7 +10275,10 @@ "minLength": 1 } }, - "required": ["score_range", "outcome"], + "required": [ + "score_range", + "outcome" + ], "additionalProperties": false } } @@ -9498,7 +10288,10 @@ "minItems": 1 } }, - "required": ["type", "criteria"], + "required": [ + "type", + "criteria" + ], "additionalProperties": false } ] @@ -9535,7 +10328,10 @@ }, "type": { "type": "string", - "enum": ["code-judge", "code_judge"] + "enum": [ + "code-judge", + "code_judge" + ] }, "command": { "anyOf": [ @@ -9587,7 +10383,10 @@ "additionalProperties": {} } }, - "required": ["type", "command"], + "required": [ + "type", + "command" + ], "additionalProperties": false }, { @@ -9617,7 +10416,10 @@ }, "type": { "type": "string", - "enum": ["llm-judge", "llm_judge"] + "enum": [ + "llm-judge", + "llm_judge" + ] }, "prompt": { "anyOf": [ @@ -9711,7 +10513,10 @@ "minLength": 1 } }, - "required": ["score_range", "outcome"], + "required": [ + "score_range", + "outcome" + ], "additionalProperties": false } } @@ -9730,7 +10535,9 @@ "additionalProperties": {} } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -9790,7 +10597,9 @@ } } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -9806,7 +10615,10 @@ "maximum": 1 } }, - "required": ["type", "threshold"], + "required": [ + "type", + "threshold" + ], "additionalProperties": false }, { @@ -9823,7 +10635,10 @@ "type": "string" } }, - "required": ["type", "path"], + "required": [ + "type", + "path" + ], "additionalProperties": false }, { @@ -9840,13 +10655,18 @@ "type": "string" } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false } ] } }, - "required": ["type", "aggregator"], + "required": [ + "type", + "aggregator" + ], "additionalProperties": false }, { @@ -9876,11 +10696,20 @@ }, "type": { "type": "string", - "enum": ["tool-trajectory", "tool_trajectory"] + "enum": [ + "tool-trajectory", + "tool_trajectory" + ] }, "mode": { "type": "string", - "enum": ["any_order", "in_order", "exact", "subset", "superset"] + "enum": [ + "any_order", + "in_order", + "exact", + "subset", + "superset" + ] }, "minimums": { "type": "object", @@ -9921,7 +10750,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -9935,7 +10769,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -9946,7 +10785,9 @@ ] } }, - "required": ["tool"], + "required": [ + "tool" + ], "additionalProperties": false } }, @@ -9954,7 +10795,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -9968,7 +10814,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -9979,7 +10830,10 @@ ] } }, - "required": ["type", "mode"], + "required": [ + "type", + "mode" + ], "additionalProperties": false }, { @@ -10009,44 +10863,11 @@ }, "type": { "type": "string", - "enum": ["trigger-judge", "trigger_judge"] - }, - "skill": { - "type": "string" - } - }, - "required": ["type", "skill"], - "additionalProperties": false - }, - { - "type": "object", - "properties": { - "name": { - "type": "string" - }, - "weight": { - "type": "number", - "minimum": 0 - }, - "required": { - "anyOf": [ - { - "type": "boolean" - }, - { - "type": "number", - "exclusiveMinimum": 0, - "maximum": 1 - } + "enum": [ + "field-accuracy", + "field_accuracy" ] }, - "negate": { - "type": "boolean" - }, - "type": { - "type": "string", - "enum": ["field-accuracy", "field_accuracy"] - }, "fields": { "type": "array", "items": { @@ -10057,7 +10878,11 @@ }, "match": { "type": "string", - "enum": ["exact", "numeric_tolerance", "date"] + "enum": [ + "exact", + "numeric_tolerance", + "date" + ] }, "required": { "type": "boolean" @@ -10079,17 +10904,26 @@ } } }, - "required": ["path", "match"], + "required": [ + "path", + "match" + ], "additionalProperties": false }, "minItems": 1 }, "aggregation": { "type": "string", - "enum": ["weighted_average", "all_or_nothing"] + "enum": [ + "weighted_average", + "all_or_nothing" + ] } }, - "required": ["type", "fields"], + "required": [ + "type", + "fields" + ], "additionalProperties": false }, { @@ -10126,7 +10960,10 @@ "minimum": 0 } }, - "required": ["type", "threshold"], + "required": [ + "type", + "threshold" + ], "additionalProperties": false }, { @@ -10163,7 +11000,10 @@ "minimum": 0 } }, - "required": ["type", "budget"], + "required": [ + "type", + "budget" + ], "additionalProperties": false }, { @@ -10193,7 +11033,10 @@ }, "type": { "type": "string", - "enum": ["token-usage", "token_usage"] + "enum": [ + "token-usage", + "token_usage" + ] }, "max_total": { "type": "number", @@ -10208,7 +11051,9 @@ "minimum": 0 } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -10238,7 +11083,10 @@ }, "type": { "type": "string", - "enum": ["execution-metrics", "execution_metrics"] + "enum": [ + "execution-metrics", + "execution_metrics" + ] }, "max_tool_calls": { "type": "number", @@ -10270,7 +11118,9 @@ "minimum": 0 } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -10300,7 +11150,10 @@ }, "type": { "type": "string", - "enum": ["agent-judge", "agent_judge"] + "enum": [ + "agent-judge", + "agent_judge" + ] }, "prompt": { "type": "string" @@ -10354,7 +11207,10 @@ "minLength": 1 } }, - "required": ["score_range", "outcome"], + "required": [ + "score_range", + "outcome" + ], "additionalProperties": false } } @@ -10376,7 +11232,9 @@ "type": "string" } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -10412,7 +11270,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -10448,7 +11309,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -10478,10 +11342,15 @@ }, "type": { "type": "string", - "enum": ["is-json", "is_json"] + "enum": [ + "is-json", + "is_json" + ] } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -10517,7 +11386,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -10598,7 +11470,10 @@ "minLength": 1 } }, - "required": ["score_range", "outcome"], + "required": [ + "score_range", + "outcome" + ], "additionalProperties": false } } @@ -10608,7 +11483,10 @@ "minItems": 1 } }, - "required": ["type", "criteria"], + "required": [ + "type", + "criteria" + ], "additionalProperties": false } ] @@ -10657,7 +11535,10 @@ }, "type": { "type": "string", - "enum": ["code-judge", "code_judge"] + "enum": [ + "code-judge", + "code_judge" + ] }, "command": { "anyOf": [ @@ -10709,7 +11590,10 @@ "additionalProperties": {} } }, - "required": ["type", "command"], + "required": [ + "type", + "command" + ], "additionalProperties": false }, { @@ -10739,7 +11623,10 @@ }, "type": { "type": "string", - "enum": ["llm-judge", "llm_judge"] + "enum": [ + "llm-judge", + "llm_judge" + ] }, "prompt": { "anyOf": [ @@ -10833,7 +11720,10 @@ "minLength": 1 } }, - "required": ["score_range", "outcome"], + "required": [ + "score_range", + "outcome" + ], "additionalProperties": false } } @@ -10852,7 +11742,9 @@ "additionalProperties": {} } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -10912,7 +11804,9 @@ } } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -10928,7 +11822,10 @@ "maximum": 1 } }, - "required": ["type", "threshold"], + "required": [ + "type", + "threshold" + ], "additionalProperties": false }, { @@ -10945,7 +11842,10 @@ "type": "string" } }, - "required": ["type", "path"], + "required": [ + "type", + "path" + ], "additionalProperties": false }, { @@ -10962,13 +11862,18 @@ "type": "string" } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false } ] } }, - "required": ["type", "aggregator"], + "required": [ + "type", + "aggregator" + ], "additionalProperties": false }, { @@ -10998,11 +11903,20 @@ }, "type": { "type": "string", - "enum": ["tool-trajectory", "tool_trajectory"] + "enum": [ + "tool-trajectory", + "tool_trajectory" + ] }, "mode": { "type": "string", - "enum": ["any_order", "in_order", "exact", "subset", "superset"] + "enum": [ + "any_order", + "in_order", + "exact", + "subset", + "superset" + ] }, "minimums": { "type": "object", @@ -11043,7 +11957,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -11057,7 +11976,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -11068,7 +11992,9 @@ ] } }, - "required": ["tool"], + "required": [ + "tool" + ], "additionalProperties": false } }, @@ -11076,7 +12002,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -11090,7 +12021,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -11101,7 +12037,10 @@ ] } }, - "required": ["type", "mode"], + "required": [ + "type", + "mode" + ], "additionalProperties": false }, { @@ -11131,44 +12070,11 @@ }, "type": { "type": "string", - "enum": ["trigger-judge", "trigger_judge"] - }, - "skill": { - "type": "string" - } - }, - "required": ["type", "skill"], - "additionalProperties": false - }, - { - "type": "object", - "properties": { - "name": { - "type": "string" - }, - "weight": { - "type": "number", - "minimum": 0 - }, - "required": { - "anyOf": [ - { - "type": "boolean" - }, - { - "type": "number", - "exclusiveMinimum": 0, - "maximum": 1 - } + "enum": [ + "field-accuracy", + "field_accuracy" ] }, - "negate": { - "type": "boolean" - }, - "type": { - "type": "string", - "enum": ["field-accuracy", "field_accuracy"] - }, "fields": { "type": "array", "items": { @@ -11179,7 +12085,11 @@ }, "match": { "type": "string", - "enum": ["exact", "numeric_tolerance", "date"] + "enum": [ + "exact", + "numeric_tolerance", + "date" + ] }, "required": { "type": "boolean" @@ -11201,17 +12111,26 @@ } } }, - "required": ["path", "match"], + "required": [ + "path", + "match" + ], "additionalProperties": false }, "minItems": 1 }, "aggregation": { "type": "string", - "enum": ["weighted_average", "all_or_nothing"] + "enum": [ + "weighted_average", + "all_or_nothing" + ] } }, - "required": ["type", "fields"], + "required": [ + "type", + "fields" + ], "additionalProperties": false }, { @@ -11248,7 +12167,10 @@ "minimum": 0 } }, - "required": ["type", "threshold"], + "required": [ + "type", + "threshold" + ], "additionalProperties": false }, { @@ -11285,7 +12207,10 @@ "minimum": 0 } }, - "required": ["type", "budget"], + "required": [ + "type", + "budget" + ], "additionalProperties": false }, { @@ -11315,7 +12240,10 @@ }, "type": { "type": "string", - "enum": ["token-usage", "token_usage"] + "enum": [ + "token-usage", + "token_usage" + ] }, "max_total": { "type": "number", @@ -11330,7 +12258,9 @@ "minimum": 0 } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -11360,7 +12290,10 @@ }, "type": { "type": "string", - "enum": ["execution-metrics", "execution_metrics"] + "enum": [ + "execution-metrics", + "execution_metrics" + ] }, "max_tool_calls": { "type": "number", @@ -11392,7 +12325,9 @@ "minimum": 0 } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -11422,7 +12357,10 @@ }, "type": { "type": "string", - "enum": ["agent-judge", "agent_judge"] + "enum": [ + "agent-judge", + "agent_judge" + ] }, "prompt": { "type": "string" @@ -11476,7 +12414,10 @@ "minLength": 1 } }, - "required": ["score_range", "outcome"], + "required": [ + "score_range", + "outcome" + ], "additionalProperties": false } } @@ -11498,7 +12439,9 @@ "type": "string" } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -11534,7 +12477,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -11570,7 +12516,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -11600,10 +12549,15 @@ }, "type": { "type": "string", - "enum": ["is-json", "is_json"] + "enum": [ + "is-json", + "is_json" + ] } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -11639,7 +12593,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -11720,7 +12677,10 @@ "minLength": 1 } }, - "required": ["score_range", "outcome"], + "required": [ + "score_range", + "outcome" + ], "additionalProperties": false } } @@ -11730,7 +12690,10 @@ "minItems": 1 } }, - "required": ["type", "criteria"], + "required": [ + "type", + "criteria" + ], "additionalProperties": false } ] @@ -11767,7 +12730,10 @@ }, "type": { "type": "string", - "enum": ["code-judge", "code_judge"] + "enum": [ + "code-judge", + "code_judge" + ] }, "command": { "anyOf": [ @@ -11819,7 +12785,10 @@ "additionalProperties": {} } }, - "required": ["type", "command"], + "required": [ + "type", + "command" + ], "additionalProperties": false }, { @@ -11849,7 +12818,10 @@ }, "type": { "type": "string", - "enum": ["llm-judge", "llm_judge"] + "enum": [ + "llm-judge", + "llm_judge" + ] }, "prompt": { "anyOf": [ @@ -11943,7 +12915,10 @@ "minLength": 1 } }, - "required": ["score_range", "outcome"], + "required": [ + "score_range", + "outcome" + ], "additionalProperties": false } } @@ -11962,7 +12937,9 @@ "additionalProperties": {} } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -12022,7 +12999,9 @@ } } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -12038,7 +13017,10 @@ "maximum": 1 } }, - "required": ["type", "threshold"], + "required": [ + "type", + "threshold" + ], "additionalProperties": false }, { @@ -12055,7 +13037,10 @@ "type": "string" } }, - "required": ["type", "path"], + "required": [ + "type", + "path" + ], "additionalProperties": false }, { @@ -12072,13 +13057,18 @@ "type": "string" } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false } ] } }, - "required": ["type", "aggregator"], + "required": [ + "type", + "aggregator" + ], "additionalProperties": false }, { @@ -12108,11 +13098,20 @@ }, "type": { "type": "string", - "enum": ["tool-trajectory", "tool_trajectory"] + "enum": [ + "tool-trajectory", + "tool_trajectory" + ] }, "mode": { "type": "string", - "enum": ["any_order", "in_order", "exact", "subset", "superset"] + "enum": [ + "any_order", + "in_order", + "exact", + "subset", + "superset" + ] }, "minimums": { "type": "object", @@ -12153,7 +13152,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -12167,7 +13171,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -12178,7 +13187,9 @@ ] } }, - "required": ["tool"], + "required": [ + "tool" + ], "additionalProperties": false } }, @@ -12186,7 +13197,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -12200,7 +13216,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -12211,7 +13232,10 @@ ] } }, - "required": ["type", "mode"], + "required": [ + "type", + "mode" + ], "additionalProperties": false }, { @@ -12241,44 +13265,11 @@ }, "type": { "type": "string", - "enum": ["trigger-judge", "trigger_judge"] - }, - "skill": { - "type": "string" - } - }, - "required": ["type", "skill"], - "additionalProperties": false - }, - { - "type": "object", - "properties": { - "name": { - "type": "string" - }, - "weight": { - "type": "number", - "minimum": 0 - }, - "required": { - "anyOf": [ - { - "type": "boolean" - }, - { - "type": "number", - "exclusiveMinimum": 0, - "maximum": 1 - } + "enum": [ + "field-accuracy", + "field_accuracy" ] }, - "negate": { - "type": "boolean" - }, - "type": { - "type": "string", - "enum": ["field-accuracy", "field_accuracy"] - }, "fields": { "type": "array", "items": { @@ -12289,7 +13280,11 @@ }, "match": { "type": "string", - "enum": ["exact", "numeric_tolerance", "date"] + "enum": [ + "exact", + "numeric_tolerance", + "date" + ] }, "required": { "type": "boolean" @@ -12311,17 +13306,26 @@ } } }, - "required": ["path", "match"], + "required": [ + "path", + "match" + ], "additionalProperties": false }, "minItems": 1 }, "aggregation": { "type": "string", - "enum": ["weighted_average", "all_or_nothing"] + "enum": [ + "weighted_average", + "all_or_nothing" + ] } }, - "required": ["type", "fields"], + "required": [ + "type", + "fields" + ], "additionalProperties": false }, { @@ -12358,7 +13362,10 @@ "minimum": 0 } }, - "required": ["type", "threshold"], + "required": [ + "type", + "threshold" + ], "additionalProperties": false }, { @@ -12395,7 +13402,10 @@ "minimum": 0 } }, - "required": ["type", "budget"], + "required": [ + "type", + "budget" + ], "additionalProperties": false }, { @@ -12425,7 +13435,10 @@ }, "type": { "type": "string", - "enum": ["token-usage", "token_usage"] + "enum": [ + "token-usage", + "token_usage" + ] }, "max_total": { "type": "number", @@ -12440,7 +13453,9 @@ "minimum": 0 } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -12470,7 +13485,10 @@ }, "type": { "type": "string", - "enum": ["execution-metrics", "execution_metrics"] + "enum": [ + "execution-metrics", + "execution_metrics" + ] }, "max_tool_calls": { "type": "number", @@ -12502,7 +13520,9 @@ "minimum": 0 } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -12532,7 +13552,10 @@ }, "type": { "type": "string", - "enum": ["agent-judge", "agent_judge"] + "enum": [ + "agent-judge", + "agent_judge" + ] }, "prompt": { "type": "string" @@ -12586,7 +13609,10 @@ "minLength": 1 } }, - "required": ["score_range", "outcome"], + "required": [ + "score_range", + "outcome" + ], "additionalProperties": false } } @@ -12608,7 +13634,9 @@ "type": "string" } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -12644,7 +13672,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -12680,7 +13711,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -12710,10 +13744,15 @@ }, "type": { "type": "string", - "enum": ["is-json", "is_json"] + "enum": [ + "is-json", + "is_json" + ] } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -12749,7 +13788,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -12830,7 +13872,10 @@ "minLength": 1 } }, - "required": ["score_range", "outcome"], + "required": [ + "score_range", + "outcome" + ], "additionalProperties": false } } @@ -12840,7 +13885,10 @@ "minItems": 1 } }, - "required": ["type", "criteria"], + "required": [ + "type", + "criteria" + ], "additionalProperties": false } ] @@ -12877,7 +13925,10 @@ }, "type": { "type": "string", - "enum": ["code-judge", "code_judge"] + "enum": [ + "code-judge", + "code_judge" + ] }, "command": { "anyOf": [ @@ -12929,7 +13980,10 @@ "additionalProperties": {} } }, - "required": ["type", "command"], + "required": [ + "type", + "command" + ], "additionalProperties": false }, { @@ -12959,7 +14013,10 @@ }, "type": { "type": "string", - "enum": ["llm-judge", "llm_judge"] + "enum": [ + "llm-judge", + "llm_judge" + ] }, "prompt": { "anyOf": [ @@ -13053,7 +14110,10 @@ "minLength": 1 } }, - "required": ["score_range", "outcome"], + "required": [ + "score_range", + "outcome" + ], "additionalProperties": false } } @@ -13072,7 +14132,9 @@ "additionalProperties": {} } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -13132,7 +14194,9 @@ } } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -13148,7 +14212,10 @@ "maximum": 1 } }, - "required": ["type", "threshold"], + "required": [ + "type", + "threshold" + ], "additionalProperties": false }, { @@ -13165,7 +14232,10 @@ "type": "string" } }, - "required": ["type", "path"], + "required": [ + "type", + "path" + ], "additionalProperties": false }, { @@ -13182,13 +14252,18 @@ "type": "string" } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false } ] } }, - "required": ["type", "aggregator"], + "required": [ + "type", + "aggregator" + ], "additionalProperties": false }, { @@ -13218,11 +14293,20 @@ }, "type": { "type": "string", - "enum": ["tool-trajectory", "tool_trajectory"] + "enum": [ + "tool-trajectory", + "tool_trajectory" + ] }, "mode": { "type": "string", - "enum": ["any_order", "in_order", "exact", "subset", "superset"] + "enum": [ + "any_order", + "in_order", + "exact", + "subset", + "superset" + ] }, "minimums": { "type": "object", @@ -13263,7 +14347,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -13277,7 +14366,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -13288,7 +14382,9 @@ ] } }, - "required": ["tool"], + "required": [ + "tool" + ], "additionalProperties": false } }, @@ -13296,7 +14392,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -13310,54 +14411,26 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] - }, - { - "type": "array", - "items": { - "type": "string" - } - } - ] - } - }, - "required": ["type", "mode"], - "additionalProperties": false - }, - { - "type": "object", - "properties": { - "name": { - "type": "string" - }, - "weight": { - "type": "number", - "minimum": 0 - }, - "required": { - "anyOf": [ - { - "type": "boolean" + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, - { - "type": "number", - "exclusiveMinimum": 0, - "maximum": 1 + { + "type": "array", + "items": { + "type": "string" + } } ] - }, - "negate": { - "type": "boolean" - }, - "type": { - "type": "string", - "enum": ["trigger-judge", "trigger_judge"] - }, - "skill": { - "type": "string" } }, - "required": ["type", "skill"], + "required": [ + "type", + "mode" + ], "additionalProperties": false }, { @@ -13387,7 +14460,10 @@ }, "type": { "type": "string", - "enum": ["field-accuracy", "field_accuracy"] + "enum": [ + "field-accuracy", + "field_accuracy" + ] }, "fields": { "type": "array", @@ -13399,7 +14475,11 @@ }, "match": { "type": "string", - "enum": ["exact", "numeric_tolerance", "date"] + "enum": [ + "exact", + "numeric_tolerance", + "date" + ] }, "required": { "type": "boolean" @@ -13421,17 +14501,26 @@ } } }, - "required": ["path", "match"], + "required": [ + "path", + "match" + ], "additionalProperties": false }, "minItems": 1 }, "aggregation": { "type": "string", - "enum": ["weighted_average", "all_or_nothing"] + "enum": [ + "weighted_average", + "all_or_nothing" + ] } }, - "required": ["type", "fields"], + "required": [ + "type", + "fields" + ], "additionalProperties": false }, { @@ -13468,7 +14557,10 @@ "minimum": 0 } }, - "required": ["type", "threshold"], + "required": [ + "type", + "threshold" + ], "additionalProperties": false }, { @@ -13505,7 +14597,10 @@ "minimum": 0 } }, - "required": ["type", "budget"], + "required": [ + "type", + "budget" + ], "additionalProperties": false }, { @@ -13535,7 +14630,10 @@ }, "type": { "type": "string", - "enum": ["token-usage", "token_usage"] + "enum": [ + "token-usage", + "token_usage" + ] }, "max_total": { "type": "number", @@ -13550,7 +14648,9 @@ "minimum": 0 } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -13580,7 +14680,10 @@ }, "type": { "type": "string", - "enum": ["execution-metrics", "execution_metrics"] + "enum": [ + "execution-metrics", + "execution_metrics" + ] }, "max_tool_calls": { "type": "number", @@ -13612,7 +14715,9 @@ "minimum": 0 } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -13642,7 +14747,10 @@ }, "type": { "type": "string", - "enum": ["agent-judge", "agent_judge"] + "enum": [ + "agent-judge", + "agent_judge" + ] }, "prompt": { "type": "string" @@ -13696,7 +14804,10 @@ "minLength": 1 } }, - "required": ["score_range", "outcome"], + "required": [ + "score_range", + "outcome" + ], "additionalProperties": false } } @@ -13718,7 +14829,9 @@ "type": "string" } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -13754,7 +14867,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -13790,7 +14906,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -13820,10 +14939,15 @@ }, "type": { "type": "string", - "enum": ["is-json", "is_json"] + "enum": [ + "is-json", + "is_json" + ] } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -13859,7 +14983,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -13940,7 +15067,10 @@ "minLength": 1 } }, - "required": ["score_range", "outcome"], + "required": [ + "score_range", + "outcome" + ], "additionalProperties": false } } @@ -13950,7 +15080,10 @@ "minItems": 1 } }, - "required": ["type", "criteria"], + "required": [ + "type", + "criteria" + ], "additionalProperties": false } ] @@ -13971,7 +15104,11 @@ }, "strategy": { "type": "string", - "enum": ["pass_at_k", "mean", "confidence_interval"] + "enum": [ + "pass_at_k", + "mean", + "confidence_interval" + ] }, "cost_limit_usd": { "type": "number", @@ -13982,7 +15119,9 @@ "minimum": 0 } }, - "required": ["count"], + "required": [ + "count" + ], "additionalProperties": false }, "total_budget_usd": { @@ -14010,7 +15149,10 @@ }, "isolation": { "type": "string", - "enum": ["shared", "per_test"] + "enum": [ + "shared", + "per_test" + ] }, "repos": { "type": "array", @@ -14034,7 +15176,10 @@ "format": "uri" } }, - "required": ["type", "url"], + "required": [ + "type", + "url" + ], "additionalProperties": false }, { @@ -14048,7 +15193,10 @@ "type": "string" } }, - "required": ["type", "path"], + "required": [ + "type", + "path" + ], "additionalProperties": false } ] @@ -14061,7 +15209,10 @@ }, "resolve": { "type": "string", - "enum": ["remote", "local"] + "enum": [ + "remote", + "local" + ] }, "ancestor": { "type": "integer", @@ -14090,7 +15241,10 @@ "additionalProperties": false } }, - "required": ["path", "source"], + "required": [ + "path", + "source" + ], "additionalProperties": false } }, @@ -14126,7 +15280,11 @@ }, "reset": { "type": "string", - "enum": ["none", "fast", "strict"] + "enum": [ + "none", + "fast", + "strict" + ] } }, "additionalProperties": false @@ -14157,7 +15315,11 @@ }, "reset": { "type": "string", - "enum": ["none", "fast", "strict"] + "enum": [ + "none", + "fast", + "strict" + ] } }, "additionalProperties": false @@ -14188,7 +15350,11 @@ }, "reset": { "type": "string", - "enum": ["none", "fast", "strict"] + "enum": [ + "none", + "fast", + "strict" + ] } }, "additionalProperties": false @@ -14219,7 +15385,11 @@ }, "reset": { "type": "string", - "enum": ["none", "fast", "strict"] + "enum": [ + "none", + "fast", + "strict" + ] } }, "additionalProperties": false @@ -14229,7 +15399,11 @@ }, "mode": { "type": "string", - "enum": ["pooled", "temp", "static"] + "enum": [ + "pooled", + "temp", + "static" + ] }, "path": { "type": "string" @@ -14251,7 +15425,9 @@ "type": "string" } }, - "required": ["id"], + "required": [ + "id" + ], "additionalProperties": false } }, @@ -14306,7 +15482,10 @@ }, "type": { "type": "string", - "enum": ["code-judge", "code_judge"] + "enum": [ + "code-judge", + "code_judge" + ] }, "command": { "anyOf": [ @@ -14358,7 +15537,10 @@ "additionalProperties": {} } }, - "required": ["type", "command"], + "required": [ + "type", + "command" + ], "additionalProperties": false }, { @@ -14388,7 +15570,10 @@ }, "type": { "type": "string", - "enum": ["llm-judge", "llm_judge"] + "enum": [ + "llm-judge", + "llm_judge" + ] }, "prompt": { "anyOf": [ @@ -14482,7 +15667,10 @@ "minLength": 1 } }, - "required": ["score_range", "outcome"], + "required": [ + "score_range", + "outcome" + ], "additionalProperties": false } } @@ -14501,7 +15689,9 @@ "additionalProperties": {} } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -14561,7 +15751,9 @@ } } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -14577,7 +15769,10 @@ "maximum": 1 } }, - "required": ["type", "threshold"], + "required": [ + "type", + "threshold" + ], "additionalProperties": false }, { @@ -14594,7 +15789,10 @@ "type": "string" } }, - "required": ["type", "path"], + "required": [ + "type", + "path" + ], "additionalProperties": false }, { @@ -14611,13 +15809,18 @@ "type": "string" } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false } ] } }, - "required": ["type", "aggregator"], + "required": [ + "type", + "aggregator" + ], "additionalProperties": false }, { @@ -14647,11 +15850,20 @@ }, "type": { "type": "string", - "enum": ["tool-trajectory", "tool_trajectory"] + "enum": [ + "tool-trajectory", + "tool_trajectory" + ] }, "mode": { "type": "string", - "enum": ["any_order", "in_order", "exact", "subset", "superset"] + "enum": [ + "any_order", + "in_order", + "exact", + "subset", + "superset" + ] }, "minimums": { "type": "object", @@ -14692,7 +15904,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -14706,7 +15923,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -14717,7 +15939,9 @@ ] } }, - "required": ["tool"], + "required": [ + "tool" + ], "additionalProperties": false } }, @@ -14725,7 +15949,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -14739,7 +15968,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -14750,7 +15984,10 @@ ] } }, - "required": ["type", "mode"], + "required": [ + "type", + "mode" + ], "additionalProperties": false }, { @@ -14780,44 +16017,11 @@ }, "type": { "type": "string", - "enum": ["trigger-judge", "trigger_judge"] - }, - "skill": { - "type": "string" - } - }, - "required": ["type", "skill"], - "additionalProperties": false - }, - { - "type": "object", - "properties": { - "name": { - "type": "string" - }, - "weight": { - "type": "number", - "minimum": 0 - }, - "required": { - "anyOf": [ - { - "type": "boolean" - }, - { - "type": "number", - "exclusiveMinimum": 0, - "maximum": 1 - } + "enum": [ + "field-accuracy", + "field_accuracy" ] }, - "negate": { - "type": "boolean" - }, - "type": { - "type": "string", - "enum": ["field-accuracy", "field_accuracy"] - }, "fields": { "type": "array", "items": { @@ -14828,7 +16032,11 @@ }, "match": { "type": "string", - "enum": ["exact", "numeric_tolerance", "date"] + "enum": [ + "exact", + "numeric_tolerance", + "date" + ] }, "required": { "type": "boolean" @@ -14850,17 +16058,26 @@ } } }, - "required": ["path", "match"], + "required": [ + "path", + "match" + ], "additionalProperties": false }, "minItems": 1 }, "aggregation": { "type": "string", - "enum": ["weighted_average", "all_or_nothing"] + "enum": [ + "weighted_average", + "all_or_nothing" + ] } }, - "required": ["type", "fields"], + "required": [ + "type", + "fields" + ], "additionalProperties": false }, { @@ -14897,7 +16114,10 @@ "minimum": 0 } }, - "required": ["type", "threshold"], + "required": [ + "type", + "threshold" + ], "additionalProperties": false }, { @@ -14934,7 +16154,10 @@ "minimum": 0 } }, - "required": ["type", "budget"], + "required": [ + "type", + "budget" + ], "additionalProperties": false }, { @@ -14964,7 +16187,10 @@ }, "type": { "type": "string", - "enum": ["token-usage", "token_usage"] + "enum": [ + "token-usage", + "token_usage" + ] }, "max_total": { "type": "number", @@ -14979,7 +16205,9 @@ "minimum": 0 } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -15009,7 +16237,10 @@ }, "type": { "type": "string", - "enum": ["execution-metrics", "execution_metrics"] + "enum": [ + "execution-metrics", + "execution_metrics" + ] }, "max_tool_calls": { "type": "number", @@ -15041,7 +16272,9 @@ "minimum": 0 } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -15071,7 +16304,10 @@ }, "type": { "type": "string", - "enum": ["agent-judge", "agent_judge"] + "enum": [ + "agent-judge", + "agent_judge" + ] }, "prompt": { "type": "string" @@ -15125,7 +16361,10 @@ "minLength": 1 } }, - "required": ["score_range", "outcome"], + "required": [ + "score_range", + "outcome" + ], "additionalProperties": false } } @@ -15147,7 +16386,9 @@ "type": "string" } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -15183,7 +16424,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -15219,7 +16463,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -15249,10 +16496,15 @@ }, "type": { "type": "string", - "enum": ["is-json", "is_json"] + "enum": [ + "is-json", + "is_json" + ] } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -15288,7 +16540,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -15369,7 +16624,10 @@ "minLength": 1 } }, - "required": ["score_range", "outcome"], + "required": [ + "score_range", + "outcome" + ], "additionalProperties": false } } @@ -15379,7 +16637,10 @@ "minItems": 1 } }, - "required": ["type", "criteria"], + "required": [ + "type", + "criteria" + ], "additionalProperties": false } ] @@ -15416,7 +16677,10 @@ }, "type": { "type": "string", - "enum": ["code-judge", "code_judge"] + "enum": [ + "code-judge", + "code_judge" + ] }, "command": { "anyOf": [ @@ -15468,7 +16732,10 @@ "additionalProperties": {} } }, - "required": ["type", "command"], + "required": [ + "type", + "command" + ], "additionalProperties": false }, { @@ -15498,7 +16765,10 @@ }, "type": { "type": "string", - "enum": ["llm-judge", "llm_judge"] + "enum": [ + "llm-judge", + "llm_judge" + ] }, "prompt": { "anyOf": [ @@ -15592,7 +16862,10 @@ "minLength": 1 } }, - "required": ["score_range", "outcome"], + "required": [ + "score_range", + "outcome" + ], "additionalProperties": false } } @@ -15611,7 +16884,9 @@ "additionalProperties": {} } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -15671,7 +16946,9 @@ } } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -15687,7 +16964,10 @@ "maximum": 1 } }, - "required": ["type", "threshold"], + "required": [ + "type", + "threshold" + ], "additionalProperties": false }, { @@ -15704,7 +16984,10 @@ "type": "string" } }, - "required": ["type", "path"], + "required": [ + "type", + "path" + ], "additionalProperties": false }, { @@ -15721,13 +17004,18 @@ "type": "string" } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false } ] } }, - "required": ["type", "aggregator"], + "required": [ + "type", + "aggregator" + ], "additionalProperties": false }, { @@ -15757,11 +17045,20 @@ }, "type": { "type": "string", - "enum": ["tool-trajectory", "tool_trajectory"] + "enum": [ + "tool-trajectory", + "tool_trajectory" + ] }, "mode": { "type": "string", - "enum": ["any_order", "in_order", "exact", "subset", "superset"] + "enum": [ + "any_order", + "in_order", + "exact", + "subset", + "superset" + ] }, "minimums": { "type": "object", @@ -15802,7 +17099,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -15816,7 +17118,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -15827,7 +17134,9 @@ ] } }, - "required": ["tool"], + "required": [ + "tool" + ], "additionalProperties": false } }, @@ -15835,7 +17144,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -15849,7 +17163,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -15860,7 +17179,10 @@ ] } }, - "required": ["type", "mode"], + "required": [ + "type", + "mode" + ], "additionalProperties": false }, { @@ -15890,44 +17212,11 @@ }, "type": { "type": "string", - "enum": ["trigger-judge", "trigger_judge"] - }, - "skill": { - "type": "string" - } - }, - "required": ["type", "skill"], - "additionalProperties": false - }, - { - "type": "object", - "properties": { - "name": { - "type": "string" - }, - "weight": { - "type": "number", - "minimum": 0 - }, - "required": { - "anyOf": [ - { - "type": "boolean" - }, - { - "type": "number", - "exclusiveMinimum": 0, - "maximum": 1 - } + "enum": [ + "field-accuracy", + "field_accuracy" ] }, - "negate": { - "type": "boolean" - }, - "type": { - "type": "string", - "enum": ["field-accuracy", "field_accuracy"] - }, "fields": { "type": "array", "items": { @@ -15938,7 +17227,11 @@ }, "match": { "type": "string", - "enum": ["exact", "numeric_tolerance", "date"] + "enum": [ + "exact", + "numeric_tolerance", + "date" + ] }, "required": { "type": "boolean" @@ -15960,17 +17253,26 @@ } } }, - "required": ["path", "match"], + "required": [ + "path", + "match" + ], "additionalProperties": false }, "minItems": 1 }, "aggregation": { "type": "string", - "enum": ["weighted_average", "all_or_nothing"] + "enum": [ + "weighted_average", + "all_or_nothing" + ] } }, - "required": ["type", "fields"], + "required": [ + "type", + "fields" + ], "additionalProperties": false }, { @@ -16007,7 +17309,10 @@ "minimum": 0 } }, - "required": ["type", "threshold"], + "required": [ + "type", + "threshold" + ], "additionalProperties": false }, { @@ -16044,7 +17349,10 @@ "minimum": 0 } }, - "required": ["type", "budget"], + "required": [ + "type", + "budget" + ], "additionalProperties": false }, { @@ -16074,7 +17382,10 @@ }, "type": { "type": "string", - "enum": ["token-usage", "token_usage"] + "enum": [ + "token-usage", + "token_usage" + ] }, "max_total": { "type": "number", @@ -16089,7 +17400,9 @@ "minimum": 0 } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -16119,7 +17432,10 @@ }, "type": { "type": "string", - "enum": ["execution-metrics", "execution_metrics"] + "enum": [ + "execution-metrics", + "execution_metrics" + ] }, "max_tool_calls": { "type": "number", @@ -16151,7 +17467,9 @@ "minimum": 0 } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -16181,7 +17499,10 @@ }, "type": { "type": "string", - "enum": ["agent-judge", "agent_judge"] + "enum": [ + "agent-judge", + "agent_judge" + ] }, "prompt": { "type": "string" @@ -16235,7 +17556,10 @@ "minLength": 1 } }, - "required": ["score_range", "outcome"], + "required": [ + "score_range", + "outcome" + ], "additionalProperties": false } } @@ -16257,7 +17581,9 @@ "type": "string" } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -16293,7 +17619,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -16329,7 +17658,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -16359,10 +17691,15 @@ }, "type": { "type": "string", - "enum": ["is-json", "is_json"] + "enum": [ + "is-json", + "is_json" + ] } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -16398,7 +17735,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -16479,7 +17819,10 @@ "minLength": 1 } }, - "required": ["score_range", "outcome"], + "required": [ + "score_range", + "outcome" + ], "additionalProperties": false } } @@ -16489,7 +17832,10 @@ "minItems": 1 } }, - "required": ["type", "criteria"], + "required": [ + "type", + "criteria" + ], "additionalProperties": false } ] @@ -16526,7 +17872,10 @@ }, "type": { "type": "string", - "enum": ["code-judge", "code_judge"] + "enum": [ + "code-judge", + "code_judge" + ] }, "command": { "anyOf": [ @@ -16578,7 +17927,10 @@ "additionalProperties": {} } }, - "required": ["type", "command"], + "required": [ + "type", + "command" + ], "additionalProperties": false }, { @@ -16608,7 +17960,10 @@ }, "type": { "type": "string", - "enum": ["llm-judge", "llm_judge"] + "enum": [ + "llm-judge", + "llm_judge" + ] }, "prompt": { "anyOf": [ @@ -16702,7 +18057,10 @@ "minLength": 1 } }, - "required": ["score_range", "outcome"], + "required": [ + "score_range", + "outcome" + ], "additionalProperties": false } } @@ -16721,7 +18079,9 @@ "additionalProperties": {} } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -16781,7 +18141,9 @@ } } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -16797,7 +18159,10 @@ "maximum": 1 } }, - "required": ["type", "threshold"], + "required": [ + "type", + "threshold" + ], "additionalProperties": false }, { @@ -16814,7 +18179,10 @@ "type": "string" } }, - "required": ["type", "path"], + "required": [ + "type", + "path" + ], "additionalProperties": false }, { @@ -16831,13 +18199,18 @@ "type": "string" } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false } ] } }, - "required": ["type", "aggregator"], + "required": [ + "type", + "aggregator" + ], "additionalProperties": false }, { @@ -16867,11 +18240,20 @@ }, "type": { "type": "string", - "enum": ["tool-trajectory", "tool_trajectory"] + "enum": [ + "tool-trajectory", + "tool_trajectory" + ] }, "mode": { "type": "string", - "enum": ["any_order", "in_order", "exact", "subset", "superset"] + "enum": [ + "any_order", + "in_order", + "exact", + "subset", + "superset" + ] }, "minimums": { "type": "object", @@ -16912,7 +18294,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -16926,7 +18313,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -16937,7 +18329,9 @@ ] } }, - "required": ["tool"], + "required": [ + "tool" + ], "additionalProperties": false } }, @@ -16945,7 +18339,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -16959,7 +18358,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -16970,7 +18374,10 @@ ] } }, - "required": ["type", "mode"], + "required": [ + "type", + "mode" + ], "additionalProperties": false }, { @@ -17000,44 +18407,11 @@ }, "type": { "type": "string", - "enum": ["trigger-judge", "trigger_judge"] - }, - "skill": { - "type": "string" - } - }, - "required": ["type", "skill"], - "additionalProperties": false - }, - { - "type": "object", - "properties": { - "name": { - "type": "string" - }, - "weight": { - "type": "number", - "minimum": 0 - }, - "required": { - "anyOf": [ - { - "type": "boolean" - }, - { - "type": "number", - "exclusiveMinimum": 0, - "maximum": 1 - } + "enum": [ + "field-accuracy", + "field_accuracy" ] }, - "negate": { - "type": "boolean" - }, - "type": { - "type": "string", - "enum": ["field-accuracy", "field_accuracy"] - }, "fields": { "type": "array", "items": { @@ -17048,7 +18422,11 @@ }, "match": { "type": "string", - "enum": ["exact", "numeric_tolerance", "date"] + "enum": [ + "exact", + "numeric_tolerance", + "date" + ] }, "required": { "type": "boolean" @@ -17070,17 +18448,26 @@ } } }, - "required": ["path", "match"], + "required": [ + "path", + "match" + ], "additionalProperties": false }, "minItems": 1 }, "aggregation": { "type": "string", - "enum": ["weighted_average", "all_or_nothing"] + "enum": [ + "weighted_average", + "all_or_nothing" + ] } }, - "required": ["type", "fields"], + "required": [ + "type", + "fields" + ], "additionalProperties": false }, { @@ -17117,7 +18504,10 @@ "minimum": 0 } }, - "required": ["type", "threshold"], + "required": [ + "type", + "threshold" + ], "additionalProperties": false }, { @@ -17154,7 +18544,10 @@ "minimum": 0 } }, - "required": ["type", "budget"], + "required": [ + "type", + "budget" + ], "additionalProperties": false }, { @@ -17184,7 +18577,10 @@ }, "type": { "type": "string", - "enum": ["token-usage", "token_usage"] + "enum": [ + "token-usage", + "token_usage" + ] }, "max_total": { "type": "number", @@ -17199,7 +18595,9 @@ "minimum": 0 } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -17229,7 +18627,10 @@ }, "type": { "type": "string", - "enum": ["execution-metrics", "execution_metrics"] + "enum": [ + "execution-metrics", + "execution_metrics" + ] }, "max_tool_calls": { "type": "number", @@ -17261,7 +18662,9 @@ "minimum": 0 } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -17291,7 +18694,10 @@ }, "type": { "type": "string", - "enum": ["agent-judge", "agent_judge"] + "enum": [ + "agent-judge", + "agent_judge" + ] }, "prompt": { "type": "string" @@ -17345,7 +18751,10 @@ "minLength": 1 } }, - "required": ["score_range", "outcome"], + "required": [ + "score_range", + "outcome" + ], "additionalProperties": false } } @@ -17367,7 +18776,9 @@ "type": "string" } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -17403,7 +18814,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -17439,7 +18853,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -17469,10 +18886,15 @@ }, "type": { "type": "string", - "enum": ["is-json", "is_json"] + "enum": [ + "is-json", + "is_json" + ] } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -17508,7 +18930,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -17589,7 +19014,10 @@ "minLength": 1 } }, - "required": ["score_range", "outcome"], + "required": [ + "score_range", + "outcome" + ], "additionalProperties": false } } @@ -17599,7 +19027,10 @@ "minItems": 1 } }, - "required": ["type", "criteria"], + "required": [ + "type", + "criteria" + ], "additionalProperties": false } ] @@ -17620,7 +19051,11 @@ }, "strategy": { "type": "string", - "enum": ["pass_at_k", "mean", "confidence_interval"] + "enum": [ + "pass_at_k", + "mean", + "confidence_interval" + ] }, "cost_limit_usd": { "type": "number", @@ -17631,7 +19066,9 @@ "minimum": 0 } }, - "required": ["count"], + "required": [ + "count" + ], "additionalProperties": false }, "total_budget_usd": { @@ -18756,7 +20193,10 @@ }, "type": { "type": "string", - "enum": ["code-judge", "code_judge"] + "enum": [ + "code-judge", + "code_judge" + ] }, "command": { "anyOf": [ @@ -18808,7 +20248,10 @@ "additionalProperties": {} } }, - "required": ["type", "command"], + "required": [ + "type", + "command" + ], "additionalProperties": false }, { @@ -18838,7 +20281,10 @@ }, "type": { "type": "string", - "enum": ["llm-judge", "llm_judge"] + "enum": [ + "llm-judge", + "llm_judge" + ] }, "prompt": { "anyOf": [ @@ -18932,7 +20378,10 @@ "minLength": 1 } }, - "required": ["score_range", "outcome"], + "required": [ + "score_range", + "outcome" + ], "additionalProperties": false } } @@ -18951,7 +20400,9 @@ "additionalProperties": {} } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -19011,7 +20462,9 @@ } } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -19027,7 +20480,10 @@ "maximum": 1 } }, - "required": ["type", "threshold"], + "required": [ + "type", + "threshold" + ], "additionalProperties": false }, { @@ -19044,7 +20500,10 @@ "type": "string" } }, - "required": ["type", "path"], + "required": [ + "type", + "path" + ], "additionalProperties": false }, { @@ -19061,13 +20520,18 @@ "type": "string" } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false } ] } }, - "required": ["type", "aggregator"], + "required": [ + "type", + "aggregator" + ], "additionalProperties": false }, { @@ -19097,11 +20561,20 @@ }, "type": { "type": "string", - "enum": ["tool-trajectory", "tool_trajectory"] + "enum": [ + "tool-trajectory", + "tool_trajectory" + ] }, "mode": { "type": "string", - "enum": ["any_order", "in_order", "exact", "subset", "superset"] + "enum": [ + "any_order", + "in_order", + "exact", + "subset", + "superset" + ] }, "minimums": { "type": "object", @@ -19142,7 +20615,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -19156,7 +20634,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -19167,7 +20650,9 @@ ] } }, - "required": ["tool"], + "required": [ + "tool" + ], "additionalProperties": false } }, @@ -19175,7 +20660,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -19189,7 +20679,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -19200,7 +20695,10 @@ ] } }, - "required": ["type", "mode"], + "required": [ + "type", + "mode" + ], "additionalProperties": false }, { @@ -19230,44 +20728,11 @@ }, "type": { "type": "string", - "enum": ["trigger-judge", "trigger_judge"] - }, - "skill": { - "type": "string" - } - }, - "required": ["type", "skill"], - "additionalProperties": false - }, - { - "type": "object", - "properties": { - "name": { - "type": "string" - }, - "weight": { - "type": "number", - "minimum": 0 - }, - "required": { - "anyOf": [ - { - "type": "boolean" - }, - { - "type": "number", - "exclusiveMinimum": 0, - "maximum": 1 - } + "enum": [ + "field-accuracy", + "field_accuracy" ] }, - "negate": { - "type": "boolean" - }, - "type": { - "type": "string", - "enum": ["field-accuracy", "field_accuracy"] - }, "fields": { "type": "array", "items": { @@ -19278,7 +20743,11 @@ }, "match": { "type": "string", - "enum": ["exact", "numeric_tolerance", "date"] + "enum": [ + "exact", + "numeric_tolerance", + "date" + ] }, "required": { "type": "boolean" @@ -19300,17 +20769,26 @@ } } }, - "required": ["path", "match"], + "required": [ + "path", + "match" + ], "additionalProperties": false }, "minItems": 1 }, "aggregation": { "type": "string", - "enum": ["weighted_average", "all_or_nothing"] + "enum": [ + "weighted_average", + "all_or_nothing" + ] } }, - "required": ["type", "fields"], + "required": [ + "type", + "fields" + ], "additionalProperties": false }, { @@ -19347,7 +20825,10 @@ "minimum": 0 } }, - "required": ["type", "threshold"], + "required": [ + "type", + "threshold" + ], "additionalProperties": false }, { @@ -19384,7 +20865,10 @@ "minimum": 0 } }, - "required": ["type", "budget"], + "required": [ + "type", + "budget" + ], "additionalProperties": false }, { @@ -19414,7 +20898,10 @@ }, "type": { "type": "string", - "enum": ["token-usage", "token_usage"] + "enum": [ + "token-usage", + "token_usage" + ] }, "max_total": { "type": "number", @@ -19429,7 +20916,9 @@ "minimum": 0 } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -19459,7 +20948,10 @@ }, "type": { "type": "string", - "enum": ["execution-metrics", "execution_metrics"] + "enum": [ + "execution-metrics", + "execution_metrics" + ] }, "max_tool_calls": { "type": "number", @@ -19491,7 +20983,9 @@ "minimum": 0 } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -19521,7 +21015,10 @@ }, "type": { "type": "string", - "enum": ["agent-judge", "agent_judge"] + "enum": [ + "agent-judge", + "agent_judge" + ] }, "prompt": { "type": "string" @@ -19575,7 +21072,10 @@ "minLength": 1 } }, - "required": ["score_range", "outcome"], + "required": [ + "score_range", + "outcome" + ], "additionalProperties": false } } @@ -19597,7 +21097,9 @@ "type": "string" } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -19633,7 +21135,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -19669,7 +21174,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -19699,10 +21207,15 @@ }, "type": { "type": "string", - "enum": ["is-json", "is_json"] + "enum": [ + "is-json", + "is_json" + ] } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -19738,7 +21251,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -19819,7 +21335,10 @@ "minLength": 1 } }, - "required": ["score_range", "outcome"], + "required": [ + "score_range", + "outcome" + ], "additionalProperties": false } } @@ -19829,7 +21348,10 @@ "minItems": 1 } }, - "required": ["type", "criteria"], + "required": [ + "type", + "criteria" + ], "additionalProperties": false } ] @@ -19845,7 +21367,10 @@ }, "isolation": { "type": "string", - "enum": ["shared", "per_test"] + "enum": [ + "shared", + "per_test" + ] }, "repos": { "type": "array", @@ -19869,7 +21394,10 @@ "format": "uri" } }, - "required": ["type", "url"], + "required": [ + "type", + "url" + ], "additionalProperties": false }, { @@ -19883,7 +21411,10 @@ "type": "string" } }, - "required": ["type", "path"], + "required": [ + "type", + "path" + ], "additionalProperties": false } ] @@ -19896,7 +21427,10 @@ }, "resolve": { "type": "string", - "enum": ["remote", "local"] + "enum": [ + "remote", + "local" + ] }, "ancestor": { "type": "integer", @@ -19925,7 +21459,10 @@ "additionalProperties": false } }, - "required": ["path", "source"], + "required": [ + "path", + "source" + ], "additionalProperties": false } }, @@ -19961,7 +21498,11 @@ }, "reset": { "type": "string", - "enum": ["none", "fast", "strict"] + "enum": [ + "none", + "fast", + "strict" + ] } }, "additionalProperties": false @@ -19992,7 +21533,11 @@ }, "reset": { "type": "string", - "enum": ["none", "fast", "strict"] + "enum": [ + "none", + "fast", + "strict" + ] } }, "additionalProperties": false @@ -20023,7 +21568,11 @@ }, "reset": { "type": "string", - "enum": ["none", "fast", "strict"] + "enum": [ + "none", + "fast", + "strict" + ] } }, "additionalProperties": false @@ -20054,7 +21603,11 @@ }, "reset": { "type": "string", - "enum": ["none", "fast", "strict"] + "enum": [ + "none", + "fast", + "strict" + ] } }, "additionalProperties": false @@ -20064,7 +21617,11 @@ }, "mode": { "type": "string", - "enum": ["pooled", "temp", "static"] + "enum": [ + "pooled", + "temp", + "static" + ] }, "path": { "type": "string" @@ -20078,7 +21635,9 @@ ] } }, - "required": ["tests"], + "required": [ + "tests" + ], "additionalProperties": false } } From 3660b9d797a1d94baa6753b7e99176d641e20ab7 Mon Sep 17 00:00:00 2001 From: Christopher Tso Date: Sat, 14 Mar 2026 13:13:08 +0000 Subject: [PATCH 5/8] style: format eval-schema.json with biome --- .../references/eval-schema.json | 3501 ++++------------- 1 file changed, 683 insertions(+), 2818 deletions(-) diff --git a/plugins/agentv-dev/skills/agentv-eval-writer/references/eval-schema.json b/plugins/agentv-dev/skills/agentv-eval-writer/references/eval-schema.json index dafd565a0..9093c7e48 100644 --- a/plugins/agentv-dev/skills/agentv-eval-writer/references/eval-schema.json +++ b/plugins/agentv-dev/skills/agentv-eval-writer/references/eval-schema.json @@ -53,12 +53,7 @@ "properties": { "role": { "type": "string", - "enum": [ - "system", - "user", - "assistant", - "tool" - ] + "enum": ["system", "user", "assistant", "tool"] }, "content": { "anyOf": [ @@ -72,29 +67,20 @@ "properties": { "type": { "type": "string", - "enum": [ - "text", - "file" - ] + "enum": ["text", "file"] }, "value": { "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false } } ] } }, - "required": [ - "role", - "content" - ], + "required": ["role", "content"], "additionalProperties": false } } @@ -129,12 +115,7 @@ "properties": { "role": { "type": "string", - "enum": [ - "system", - "user", - "assistant", - "tool" - ] + "enum": ["system", "user", "assistant", "tool"] }, "content": { "anyOf": [ @@ -148,29 +129,20 @@ "properties": { "type": { "type": "string", - "enum": [ - "text", - "file" - ] + "enum": ["text", "file"] }, "value": { "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false } } ] } }, - "required": [ - "role", - "content" - ], + "required": ["role", "content"], "additionalProperties": false } } @@ -192,12 +164,7 @@ "properties": { "role": { "type": "string", - "enum": [ - "system", - "user", - "assistant", - "tool" - ] + "enum": ["system", "user", "assistant", "tool"] }, "content": { "anyOf": [ @@ -211,29 +178,20 @@ "properties": { "type": { "type": "string", - "enum": [ - "text", - "file" - ] + "enum": ["text", "file"] }, "value": { "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false } } ] } }, - "required": [ - "role", - "content" - ], + "required": ["role", "content"], "additionalProperties": false } } @@ -270,10 +228,7 @@ }, "type": { "type": "string", - "enum": [ - "code-judge", - "code_judge" - ] + "enum": ["code-judge", "code_judge"] }, "command": { "anyOf": [ @@ -325,10 +280,7 @@ "additionalProperties": {} } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false }, { @@ -358,10 +310,7 @@ }, "type": { "type": "string", - "enum": [ - "llm-judge", - "llm_judge" - ] + "enum": ["llm-judge", "llm_judge"] }, "prompt": { "anyOf": [ @@ -455,10 +404,7 @@ "minLength": 1 } }, - "required": [ - "score_range", - "outcome" - ], + "required": ["score_range", "outcome"], "additionalProperties": false } } @@ -477,9 +423,7 @@ "additionalProperties": {} } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -539,9 +483,7 @@ } } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -557,10 +499,7 @@ "maximum": 1 } }, - "required": [ - "type", - "threshold" - ], + "required": ["type", "threshold"], "additionalProperties": false }, { @@ -577,10 +516,7 @@ "type": "string" } }, - "required": [ - "type", - "path" - ], + "required": ["type", "path"], "additionalProperties": false }, { @@ -597,18 +533,13 @@ "type": "string" } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false } ] } }, - "required": [ - "type", - "aggregator" - ], + "required": ["type", "aggregator"], "additionalProperties": false }, { @@ -638,20 +569,11 @@ }, "type": { "type": "string", - "enum": [ - "tool-trajectory", - "tool_trajectory" - ] + "enum": ["tool-trajectory", "tool_trajectory"] }, "mode": { "type": "string", - "enum": [ - "any_order", - "in_order", - "exact", - "subset", - "superset" - ] + "enum": ["any_order", "in_order", "exact", "subset", "superset"] }, "minimums": { "type": "object", @@ -692,12 +614,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -711,12 +628,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -727,9 +639,7 @@ ] } }, - "required": [ - "tool" - ], + "required": ["tool"], "additionalProperties": false } }, @@ -737,12 +647,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -756,12 +661,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -772,10 +672,7 @@ ] } }, - "required": [ - "type", - "mode" - ], + "required": ["type", "mode"], "additionalProperties": false }, { @@ -805,10 +702,7 @@ }, "type": { "type": "string", - "enum": [ - "field-accuracy", - "field_accuracy" - ] + "enum": ["field-accuracy", "field_accuracy"] }, "fields": { "type": "array", @@ -820,11 +714,7 @@ }, "match": { "type": "string", - "enum": [ - "exact", - "numeric_tolerance", - "date" - ] + "enum": ["exact", "numeric_tolerance", "date"] }, "required": { "type": "boolean" @@ -846,26 +736,17 @@ } } }, - "required": [ - "path", - "match" - ], + "required": ["path", "match"], "additionalProperties": false }, "minItems": 1 }, "aggregation": { "type": "string", - "enum": [ - "weighted_average", - "all_or_nothing" - ] + "enum": ["weighted_average", "all_or_nothing"] } }, - "required": [ - "type", - "fields" - ], + "required": ["type", "fields"], "additionalProperties": false }, { @@ -902,10 +783,7 @@ "minimum": 0 } }, - "required": [ - "type", - "threshold" - ], + "required": ["type", "threshold"], "additionalProperties": false }, { @@ -942,10 +820,7 @@ "minimum": 0 } }, - "required": [ - "type", - "budget" - ], + "required": ["type", "budget"], "additionalProperties": false }, { @@ -975,10 +850,7 @@ }, "type": { "type": "string", - "enum": [ - "token-usage", - "token_usage" - ] + "enum": ["token-usage", "token_usage"] }, "max_total": { "type": "number", @@ -993,9 +865,7 @@ "minimum": 0 } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -1025,10 +895,7 @@ }, "type": { "type": "string", - "enum": [ - "execution-metrics", - "execution_metrics" - ] + "enum": ["execution-metrics", "execution_metrics"] }, "max_tool_calls": { "type": "number", @@ -1060,9 +927,7 @@ "minimum": 0 } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -1092,10 +957,7 @@ }, "type": { "type": "string", - "enum": [ - "agent-judge", - "agent_judge" - ] + "enum": ["agent-judge", "agent_judge"] }, "prompt": { "type": "string" @@ -1149,10 +1011,7 @@ "minLength": 1 } }, - "required": [ - "score_range", - "outcome" - ], + "required": ["score_range", "outcome"], "additionalProperties": false } } @@ -1174,9 +1033,7 @@ "type": "string" } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -1212,10 +1069,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -1251,10 +1105,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -1284,15 +1135,10 @@ }, "type": { "type": "string", - "enum": [ - "is-json", - "is_json" - ] + "enum": ["is-json", "is_json"] } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -1328,10 +1174,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -1412,10 +1255,7 @@ "minLength": 1 } }, - "required": [ - "score_range", - "outcome" - ], + "required": ["score_range", "outcome"], "additionalProperties": false } } @@ -1425,10 +1265,7 @@ "minItems": 1 } }, - "required": [ - "type", - "criteria" - ], + "required": ["type", "criteria"], "additionalProperties": false } ] @@ -1465,10 +1302,7 @@ }, "type": { "type": "string", - "enum": [ - "code-judge", - "code_judge" - ] + "enum": ["code-judge", "code_judge"] }, "command": { "anyOf": [ @@ -1520,10 +1354,7 @@ "additionalProperties": {} } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false }, { @@ -1553,10 +1384,7 @@ }, "type": { "type": "string", - "enum": [ - "llm-judge", - "llm_judge" - ] + "enum": ["llm-judge", "llm_judge"] }, "prompt": { "anyOf": [ @@ -1650,10 +1478,7 @@ "minLength": 1 } }, - "required": [ - "score_range", - "outcome" - ], + "required": ["score_range", "outcome"], "additionalProperties": false } } @@ -1672,9 +1497,7 @@ "additionalProperties": {} } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -1734,9 +1557,7 @@ } } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -1752,10 +1573,7 @@ "maximum": 1 } }, - "required": [ - "type", - "threshold" - ], + "required": ["type", "threshold"], "additionalProperties": false }, { @@ -1772,10 +1590,7 @@ "type": "string" } }, - "required": [ - "type", - "path" - ], + "required": ["type", "path"], "additionalProperties": false }, { @@ -1792,18 +1607,13 @@ "type": "string" } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false } ] } }, - "required": [ - "type", - "aggregator" - ], + "required": ["type", "aggregator"], "additionalProperties": false }, { @@ -1833,20 +1643,11 @@ }, "type": { "type": "string", - "enum": [ - "tool-trajectory", - "tool_trajectory" - ] + "enum": ["tool-trajectory", "tool_trajectory"] }, "mode": { "type": "string", - "enum": [ - "any_order", - "in_order", - "exact", - "subset", - "superset" - ] + "enum": ["any_order", "in_order", "exact", "subset", "superset"] }, "minimums": { "type": "object", @@ -1887,12 +1688,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -1906,12 +1702,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -1922,9 +1713,7 @@ ] } }, - "required": [ - "tool" - ], + "required": ["tool"], "additionalProperties": false } }, @@ -1932,12 +1721,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -1951,12 +1735,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -1967,10 +1746,7 @@ ] } }, - "required": [ - "type", - "mode" - ], + "required": ["type", "mode"], "additionalProperties": false }, { @@ -2000,10 +1776,7 @@ }, "type": { "type": "string", - "enum": [ - "field-accuracy", - "field_accuracy" - ] + "enum": ["field-accuracy", "field_accuracy"] }, "fields": { "type": "array", @@ -2015,11 +1788,7 @@ }, "match": { "type": "string", - "enum": [ - "exact", - "numeric_tolerance", - "date" - ] + "enum": ["exact", "numeric_tolerance", "date"] }, "required": { "type": "boolean" @@ -2041,26 +1810,17 @@ } } }, - "required": [ - "path", - "match" - ], + "required": ["path", "match"], "additionalProperties": false }, "minItems": 1 }, "aggregation": { "type": "string", - "enum": [ - "weighted_average", - "all_or_nothing" - ] + "enum": ["weighted_average", "all_or_nothing"] } }, - "required": [ - "type", - "fields" - ], + "required": ["type", "fields"], "additionalProperties": false }, { @@ -2097,10 +1857,7 @@ "minimum": 0 } }, - "required": [ - "type", - "threshold" - ], + "required": ["type", "threshold"], "additionalProperties": false }, { @@ -2137,10 +1894,7 @@ "minimum": 0 } }, - "required": [ - "type", - "budget" - ], + "required": ["type", "budget"], "additionalProperties": false }, { @@ -2170,10 +1924,7 @@ }, "type": { "type": "string", - "enum": [ - "token-usage", - "token_usage" - ] + "enum": ["token-usage", "token_usage"] }, "max_total": { "type": "number", @@ -2188,9 +1939,7 @@ "minimum": 0 } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -2220,10 +1969,7 @@ }, "type": { "type": "string", - "enum": [ - "execution-metrics", - "execution_metrics" - ] + "enum": ["execution-metrics", "execution_metrics"] }, "max_tool_calls": { "type": "number", @@ -2255,9 +2001,7 @@ "minimum": 0 } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -2287,10 +2031,7 @@ }, "type": { "type": "string", - "enum": [ - "agent-judge", - "agent_judge" - ] + "enum": ["agent-judge", "agent_judge"] }, "prompt": { "type": "string" @@ -2344,10 +2085,7 @@ "minLength": 1 } }, - "required": [ - "score_range", - "outcome" - ], + "required": ["score_range", "outcome"], "additionalProperties": false } } @@ -2369,9 +2107,7 @@ "type": "string" } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -2407,10 +2143,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -2446,10 +2179,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -2479,15 +2209,10 @@ }, "type": { "type": "string", - "enum": [ - "is-json", - "is_json" - ] + "enum": ["is-json", "is_json"] } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -2523,10 +2248,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -2607,10 +2329,7 @@ "minLength": 1 } }, - "required": [ - "score_range", - "outcome" - ], + "required": ["score_range", "outcome"], "additionalProperties": false } } @@ -2620,10 +2339,7 @@ "minItems": 1 } }, - "required": [ - "type", - "criteria" - ], + "required": ["type", "criteria"], "additionalProperties": false } ] @@ -2660,10 +2376,7 @@ }, "type": { "type": "string", - "enum": [ - "code-judge", - "code_judge" - ] + "enum": ["code-judge", "code_judge"] }, "command": { "anyOf": [ @@ -2715,10 +2428,7 @@ "additionalProperties": {} } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false }, { @@ -2748,10 +2458,7 @@ }, "type": { "type": "string", - "enum": [ - "llm-judge", - "llm_judge" - ] + "enum": ["llm-judge", "llm_judge"] }, "prompt": { "anyOf": [ @@ -2845,10 +2552,7 @@ "minLength": 1 } }, - "required": [ - "score_range", - "outcome" - ], + "required": ["score_range", "outcome"], "additionalProperties": false } } @@ -2867,9 +2571,7 @@ "additionalProperties": {} } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -2929,9 +2631,7 @@ } } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -2947,10 +2647,7 @@ "maximum": 1 } }, - "required": [ - "type", - "threshold" - ], + "required": ["type", "threshold"], "additionalProperties": false }, { @@ -2967,10 +2664,7 @@ "type": "string" } }, - "required": [ - "type", - "path" - ], + "required": ["type", "path"], "additionalProperties": false }, { @@ -2987,18 +2681,13 @@ "type": "string" } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false } ] } }, - "required": [ - "type", - "aggregator" - ], + "required": ["type", "aggregator"], "additionalProperties": false }, { @@ -3028,20 +2717,11 @@ }, "type": { "type": "string", - "enum": [ - "tool-trajectory", - "tool_trajectory" - ] + "enum": ["tool-trajectory", "tool_trajectory"] }, "mode": { "type": "string", - "enum": [ - "any_order", - "in_order", - "exact", - "subset", - "superset" - ] + "enum": ["any_order", "in_order", "exact", "subset", "superset"] }, "minimums": { "type": "object", @@ -3082,12 +2762,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -3101,12 +2776,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -3117,9 +2787,7 @@ ] } }, - "required": [ - "tool" - ], + "required": ["tool"], "additionalProperties": false } }, @@ -3127,12 +2795,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -3146,12 +2809,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -3162,10 +2820,7 @@ ] } }, - "required": [ - "type", - "mode" - ], + "required": ["type", "mode"], "additionalProperties": false }, { @@ -3195,10 +2850,7 @@ }, "type": { "type": "string", - "enum": [ - "field-accuracy", - "field_accuracy" - ] + "enum": ["field-accuracy", "field_accuracy"] }, "fields": { "type": "array", @@ -3210,11 +2862,7 @@ }, "match": { "type": "string", - "enum": [ - "exact", - "numeric_tolerance", - "date" - ] + "enum": ["exact", "numeric_tolerance", "date"] }, "required": { "type": "boolean" @@ -3236,26 +2884,17 @@ } } }, - "required": [ - "path", - "match" - ], + "required": ["path", "match"], "additionalProperties": false }, "minItems": 1 }, "aggregation": { "type": "string", - "enum": [ - "weighted_average", - "all_or_nothing" - ] + "enum": ["weighted_average", "all_or_nothing"] } }, - "required": [ - "type", - "fields" - ], + "required": ["type", "fields"], "additionalProperties": false }, { @@ -3292,10 +2931,7 @@ "minimum": 0 } }, - "required": [ - "type", - "threshold" - ], + "required": ["type", "threshold"], "additionalProperties": false }, { @@ -3332,10 +2968,7 @@ "minimum": 0 } }, - "required": [ - "type", - "budget" - ], + "required": ["type", "budget"], "additionalProperties": false }, { @@ -3365,10 +2998,7 @@ }, "type": { "type": "string", - "enum": [ - "token-usage", - "token_usage" - ] + "enum": ["token-usage", "token_usage"] }, "max_total": { "type": "number", @@ -3383,9 +3013,7 @@ "minimum": 0 } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -3415,10 +3043,7 @@ }, "type": { "type": "string", - "enum": [ - "execution-metrics", - "execution_metrics" - ] + "enum": ["execution-metrics", "execution_metrics"] }, "max_tool_calls": { "type": "number", @@ -3450,9 +3075,7 @@ "minimum": 0 } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -3482,10 +3105,7 @@ }, "type": { "type": "string", - "enum": [ - "agent-judge", - "agent_judge" - ] + "enum": ["agent-judge", "agent_judge"] }, "prompt": { "type": "string" @@ -3539,10 +3159,7 @@ "minLength": 1 } }, - "required": [ - "score_range", - "outcome" - ], + "required": ["score_range", "outcome"], "additionalProperties": false } } @@ -3564,9 +3181,7 @@ "type": "string" } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -3602,10 +3217,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -3641,10 +3253,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -3674,15 +3283,10 @@ }, "type": { "type": "string", - "enum": [ - "is-json", - "is_json" - ] + "enum": ["is-json", "is_json"] } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -3718,10 +3322,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -3802,10 +3403,7 @@ "minLength": 1 } }, - "required": [ - "score_range", - "outcome" - ], + "required": ["score_range", "outcome"], "additionalProperties": false } } @@ -3815,10 +3413,7 @@ "minItems": 1 } }, - "required": [ - "type", - "criteria" - ], + "required": ["type", "criteria"], "additionalProperties": false } ] @@ -3867,10 +3462,7 @@ }, "type": { "type": "string", - "enum": [ - "code-judge", - "code_judge" - ] + "enum": ["code-judge", "code_judge"] }, "command": { "anyOf": [ @@ -3922,10 +3514,7 @@ "additionalProperties": {} } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false }, { @@ -3955,10 +3544,7 @@ }, "type": { "type": "string", - "enum": [ - "llm-judge", - "llm_judge" - ] + "enum": ["llm-judge", "llm_judge"] }, "prompt": { "anyOf": [ @@ -4052,10 +3638,7 @@ "minLength": 1 } }, - "required": [ - "score_range", - "outcome" - ], + "required": ["score_range", "outcome"], "additionalProperties": false } } @@ -4074,9 +3657,7 @@ "additionalProperties": {} } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -4136,9 +3717,7 @@ } } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -4154,10 +3733,7 @@ "maximum": 1 } }, - "required": [ - "type", - "threshold" - ], + "required": ["type", "threshold"], "additionalProperties": false }, { @@ -4174,10 +3750,7 @@ "type": "string" } }, - "required": [ - "type", - "path" - ], + "required": ["type", "path"], "additionalProperties": false }, { @@ -4194,18 +3767,13 @@ "type": "string" } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false } ] } }, - "required": [ - "type", - "aggregator" - ], + "required": ["type", "aggregator"], "additionalProperties": false }, { @@ -4235,20 +3803,11 @@ }, "type": { "type": "string", - "enum": [ - "tool-trajectory", - "tool_trajectory" - ] + "enum": ["tool-trajectory", "tool_trajectory"] }, "mode": { "type": "string", - "enum": [ - "any_order", - "in_order", - "exact", - "subset", - "superset" - ] + "enum": ["any_order", "in_order", "exact", "subset", "superset"] }, "minimums": { "type": "object", @@ -4289,12 +3848,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -4308,12 +3862,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -4324,9 +3873,7 @@ ] } }, - "required": [ - "tool" - ], + "required": ["tool"], "additionalProperties": false } }, @@ -4334,12 +3881,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -4353,12 +3895,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -4369,10 +3906,7 @@ ] } }, - "required": [ - "type", - "mode" - ], + "required": ["type", "mode"], "additionalProperties": false }, { @@ -4402,10 +3936,7 @@ }, "type": { "type": "string", - "enum": [ - "field-accuracy", - "field_accuracy" - ] + "enum": ["field-accuracy", "field_accuracy"] }, "fields": { "type": "array", @@ -4417,11 +3948,7 @@ }, "match": { "type": "string", - "enum": [ - "exact", - "numeric_tolerance", - "date" - ] + "enum": ["exact", "numeric_tolerance", "date"] }, "required": { "type": "boolean" @@ -4443,26 +3970,17 @@ } } }, - "required": [ - "path", - "match" - ], + "required": ["path", "match"], "additionalProperties": false }, "minItems": 1 }, "aggregation": { "type": "string", - "enum": [ - "weighted_average", - "all_or_nothing" - ] + "enum": ["weighted_average", "all_or_nothing"] } }, - "required": [ - "type", - "fields" - ], + "required": ["type", "fields"], "additionalProperties": false }, { @@ -4499,10 +4017,7 @@ "minimum": 0 } }, - "required": [ - "type", - "threshold" - ], + "required": ["type", "threshold"], "additionalProperties": false }, { @@ -4539,10 +4054,7 @@ "minimum": 0 } }, - "required": [ - "type", - "budget" - ], + "required": ["type", "budget"], "additionalProperties": false }, { @@ -4572,10 +4084,7 @@ }, "type": { "type": "string", - "enum": [ - "token-usage", - "token_usage" - ] + "enum": ["token-usage", "token_usage"] }, "max_total": { "type": "number", @@ -4590,9 +4099,7 @@ "minimum": 0 } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -4622,10 +4129,7 @@ }, "type": { "type": "string", - "enum": [ - "execution-metrics", - "execution_metrics" - ] + "enum": ["execution-metrics", "execution_metrics"] }, "max_tool_calls": { "type": "number", @@ -4657,9 +4161,7 @@ "minimum": 0 } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -4689,10 +4191,7 @@ }, "type": { "type": "string", - "enum": [ - "agent-judge", - "agent_judge" - ] + "enum": ["agent-judge", "agent_judge"] }, "prompt": { "type": "string" @@ -4746,10 +4245,7 @@ "minLength": 1 } }, - "required": [ - "score_range", - "outcome" - ], + "required": ["score_range", "outcome"], "additionalProperties": false } } @@ -4771,9 +4267,7 @@ "type": "string" } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -4809,10 +4303,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -4848,10 +4339,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -4881,15 +4369,10 @@ }, "type": { "type": "string", - "enum": [ - "is-json", - "is_json" - ] + "enum": ["is-json", "is_json"] } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -4925,10 +4408,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -5009,10 +4489,7 @@ "minLength": 1 } }, - "required": [ - "score_range", - "outcome" - ], + "required": ["score_range", "outcome"], "additionalProperties": false } } @@ -5022,10 +4499,7 @@ "minItems": 1 } }, - "required": [ - "type", - "criteria" - ], + "required": ["type", "criteria"], "additionalProperties": false } ] @@ -5062,10 +4536,7 @@ }, "type": { "type": "string", - "enum": [ - "code-judge", - "code_judge" - ] + "enum": ["code-judge", "code_judge"] }, "command": { "anyOf": [ @@ -5117,10 +4588,7 @@ "additionalProperties": {} } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false }, { @@ -5150,10 +4618,7 @@ }, "type": { "type": "string", - "enum": [ - "llm-judge", - "llm_judge" - ] + "enum": ["llm-judge", "llm_judge"] }, "prompt": { "anyOf": [ @@ -5247,10 +4712,7 @@ "minLength": 1 } }, - "required": [ - "score_range", - "outcome" - ], + "required": ["score_range", "outcome"], "additionalProperties": false } } @@ -5269,9 +4731,7 @@ "additionalProperties": {} } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -5331,9 +4791,7 @@ } } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -5349,10 +4807,7 @@ "maximum": 1 } }, - "required": [ - "type", - "threshold" - ], + "required": ["type", "threshold"], "additionalProperties": false }, { @@ -5369,10 +4824,7 @@ "type": "string" } }, - "required": [ - "type", - "path" - ], + "required": ["type", "path"], "additionalProperties": false }, { @@ -5389,18 +4841,13 @@ "type": "string" } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false } ] } }, - "required": [ - "type", - "aggregator" - ], + "required": ["type", "aggregator"], "additionalProperties": false }, { @@ -5430,20 +4877,11 @@ }, "type": { "type": "string", - "enum": [ - "tool-trajectory", - "tool_trajectory" - ] + "enum": ["tool-trajectory", "tool_trajectory"] }, "mode": { "type": "string", - "enum": [ - "any_order", - "in_order", - "exact", - "subset", - "superset" - ] + "enum": ["any_order", "in_order", "exact", "subset", "superset"] }, "minimums": { "type": "object", @@ -5484,12 +4922,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -5503,12 +4936,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -5519,9 +4947,7 @@ ] } }, - "required": [ - "tool" - ], + "required": ["tool"], "additionalProperties": false } }, @@ -5529,12 +4955,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -5548,12 +4969,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -5564,10 +4980,7 @@ ] } }, - "required": [ - "type", - "mode" - ], + "required": ["type", "mode"], "additionalProperties": false }, { @@ -5597,10 +5010,7 @@ }, "type": { "type": "string", - "enum": [ - "field-accuracy", - "field_accuracy" - ] + "enum": ["field-accuracy", "field_accuracy"] }, "fields": { "type": "array", @@ -5612,11 +5022,7 @@ }, "match": { "type": "string", - "enum": [ - "exact", - "numeric_tolerance", - "date" - ] + "enum": ["exact", "numeric_tolerance", "date"] }, "required": { "type": "boolean" @@ -5638,26 +5044,17 @@ } } }, - "required": [ - "path", - "match" - ], + "required": ["path", "match"], "additionalProperties": false }, "minItems": 1 }, "aggregation": { "type": "string", - "enum": [ - "weighted_average", - "all_or_nothing" - ] + "enum": ["weighted_average", "all_or_nothing"] } }, - "required": [ - "type", - "fields" - ], + "required": ["type", "fields"], "additionalProperties": false }, { @@ -5694,10 +5091,7 @@ "minimum": 0 } }, - "required": [ - "type", - "threshold" - ], + "required": ["type", "threshold"], "additionalProperties": false }, { @@ -5734,10 +5128,7 @@ "minimum": 0 } }, - "required": [ - "type", - "budget" - ], + "required": ["type", "budget"], "additionalProperties": false }, { @@ -5767,10 +5158,7 @@ }, "type": { "type": "string", - "enum": [ - "token-usage", - "token_usage" - ] + "enum": ["token-usage", "token_usage"] }, "max_total": { "type": "number", @@ -5785,9 +5173,7 @@ "minimum": 0 } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -5817,10 +5203,7 @@ }, "type": { "type": "string", - "enum": [ - "execution-metrics", - "execution_metrics" - ] + "enum": ["execution-metrics", "execution_metrics"] }, "max_tool_calls": { "type": "number", @@ -5852,9 +5235,7 @@ "minimum": 0 } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -5884,10 +5265,7 @@ }, "type": { "type": "string", - "enum": [ - "agent-judge", - "agent_judge" - ] + "enum": ["agent-judge", "agent_judge"] }, "prompt": { "type": "string" @@ -5941,10 +5319,7 @@ "minLength": 1 } }, - "required": [ - "score_range", - "outcome" - ], + "required": ["score_range", "outcome"], "additionalProperties": false } } @@ -5966,9 +5341,7 @@ "type": "string" } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -6004,10 +5377,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -6043,10 +5413,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -6076,15 +5443,10 @@ }, "type": { "type": "string", - "enum": [ - "is-json", - "is_json" - ] + "enum": ["is-json", "is_json"] } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -6120,10 +5482,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -6204,10 +5563,7 @@ "minLength": 1 } }, - "required": [ - "score_range", - "outcome" - ], + "required": ["score_range", "outcome"], "additionalProperties": false } } @@ -6217,10 +5573,7 @@ "minItems": 1 } }, - "required": [ - "type", - "criteria" - ], + "required": ["type", "criteria"], "additionalProperties": false } ] @@ -6257,10 +5610,7 @@ }, "type": { "type": "string", - "enum": [ - "code-judge", - "code_judge" - ] + "enum": ["code-judge", "code_judge"] }, "command": { "anyOf": [ @@ -6312,10 +5662,7 @@ "additionalProperties": {} } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false }, { @@ -6345,10 +5692,7 @@ }, "type": { "type": "string", - "enum": [ - "llm-judge", - "llm_judge" - ] + "enum": ["llm-judge", "llm_judge"] }, "prompt": { "anyOf": [ @@ -6442,10 +5786,7 @@ "minLength": 1 } }, - "required": [ - "score_range", - "outcome" - ], + "required": ["score_range", "outcome"], "additionalProperties": false } } @@ -6464,9 +5805,7 @@ "additionalProperties": {} } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -6526,9 +5865,7 @@ } } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -6544,10 +5881,7 @@ "maximum": 1 } }, - "required": [ - "type", - "threshold" - ], + "required": ["type", "threshold"], "additionalProperties": false }, { @@ -6564,10 +5898,7 @@ "type": "string" } }, - "required": [ - "type", - "path" - ], + "required": ["type", "path"], "additionalProperties": false }, { @@ -6584,18 +5915,13 @@ "type": "string" } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false } ] } }, - "required": [ - "type", - "aggregator" - ], + "required": ["type", "aggregator"], "additionalProperties": false }, { @@ -6625,20 +5951,11 @@ }, "type": { "type": "string", - "enum": [ - "tool-trajectory", - "tool_trajectory" - ] + "enum": ["tool-trajectory", "tool_trajectory"] }, "mode": { "type": "string", - "enum": [ - "any_order", - "in_order", - "exact", - "subset", - "superset" - ] + "enum": ["any_order", "in_order", "exact", "subset", "superset"] }, "minimums": { "type": "object", @@ -6679,12 +5996,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -6698,12 +6010,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -6714,9 +6021,7 @@ ] } }, - "required": [ - "tool" - ], + "required": ["tool"], "additionalProperties": false } }, @@ -6724,12 +6029,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -6743,12 +6043,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -6759,10 +6054,7 @@ ] } }, - "required": [ - "type", - "mode" - ], + "required": ["type", "mode"], "additionalProperties": false }, { @@ -6792,10 +6084,7 @@ }, "type": { "type": "string", - "enum": [ - "field-accuracy", - "field_accuracy" - ] + "enum": ["field-accuracy", "field_accuracy"] }, "fields": { "type": "array", @@ -6807,11 +6096,7 @@ }, "match": { "type": "string", - "enum": [ - "exact", - "numeric_tolerance", - "date" - ] + "enum": ["exact", "numeric_tolerance", "date"] }, "required": { "type": "boolean" @@ -6833,26 +6118,17 @@ } } }, - "required": [ - "path", - "match" - ], + "required": ["path", "match"], "additionalProperties": false }, "minItems": 1 }, "aggregation": { "type": "string", - "enum": [ - "weighted_average", - "all_or_nothing" - ] + "enum": ["weighted_average", "all_or_nothing"] } }, - "required": [ - "type", - "fields" - ], + "required": ["type", "fields"], "additionalProperties": false }, { @@ -6889,10 +6165,7 @@ "minimum": 0 } }, - "required": [ - "type", - "threshold" - ], + "required": ["type", "threshold"], "additionalProperties": false }, { @@ -6929,10 +6202,7 @@ "minimum": 0 } }, - "required": [ - "type", - "budget" - ], + "required": ["type", "budget"], "additionalProperties": false }, { @@ -6962,10 +6232,7 @@ }, "type": { "type": "string", - "enum": [ - "token-usage", - "token_usage" - ] + "enum": ["token-usage", "token_usage"] }, "max_total": { "type": "number", @@ -6980,9 +6247,7 @@ "minimum": 0 } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -7012,10 +6277,7 @@ }, "type": { "type": "string", - "enum": [ - "execution-metrics", - "execution_metrics" - ] + "enum": ["execution-metrics", "execution_metrics"] }, "max_tool_calls": { "type": "number", @@ -7047,9 +6309,7 @@ "minimum": 0 } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -7079,10 +6339,7 @@ }, "type": { "type": "string", - "enum": [ - "agent-judge", - "agent_judge" - ] + "enum": ["agent-judge", "agent_judge"] }, "prompt": { "type": "string" @@ -7136,10 +6393,7 @@ "minLength": 1 } }, - "required": [ - "score_range", - "outcome" - ], + "required": ["score_range", "outcome"], "additionalProperties": false } } @@ -7161,9 +6415,7 @@ "type": "string" } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -7199,10 +6451,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -7238,10 +6487,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -7271,15 +6517,10 @@ }, "type": { "type": "string", - "enum": [ - "is-json", - "is_json" - ] + "enum": ["is-json", "is_json"] } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -7315,10 +6556,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -7399,10 +6637,7 @@ "minLength": 1 } }, - "required": [ - "score_range", - "outcome" - ], + "required": ["score_range", "outcome"], "additionalProperties": false } } @@ -7412,10 +6647,7 @@ "minItems": 1 } }, - "required": [ - "type", - "criteria" - ], + "required": ["type", "criteria"], "additionalProperties": false } ] @@ -7436,11 +6668,7 @@ }, "strategy": { "type": "string", - "enum": [ - "pass_at_k", - "mean", - "confidence_interval" - ] + "enum": ["pass_at_k", "mean", "confidence_interval"] }, "cost_limit_usd": { "type": "number", @@ -7451,9 +6679,7 @@ "minimum": 0 } }, - "required": [ - "count" - ], + "required": ["count"], "additionalProperties": false }, "total_budget_usd": { @@ -7481,10 +6707,7 @@ }, "isolation": { "type": "string", - "enum": [ - "shared", - "per_test" - ] + "enum": ["shared", "per_test"] }, "repos": { "type": "array", @@ -7508,10 +6731,7 @@ "format": "uri" } }, - "required": [ - "type", - "url" - ], + "required": ["type", "url"], "additionalProperties": false }, { @@ -7525,10 +6745,7 @@ "type": "string" } }, - "required": [ - "type", - "path" - ], + "required": ["type", "path"], "additionalProperties": false } ] @@ -7541,10 +6758,7 @@ }, "resolve": { "type": "string", - "enum": [ - "remote", - "local" - ] + "enum": ["remote", "local"] }, "ancestor": { "type": "integer", @@ -7573,10 +6787,7 @@ "additionalProperties": false } }, - "required": [ - "path", - "source" - ], + "required": ["path", "source"], "additionalProperties": false } }, @@ -7612,11 +6823,7 @@ }, "reset": { "type": "string", - "enum": [ - "none", - "fast", - "strict" - ] + "enum": ["none", "fast", "strict"] } }, "additionalProperties": false @@ -7647,11 +6854,7 @@ }, "reset": { "type": "string", - "enum": [ - "none", - "fast", - "strict" - ] + "enum": ["none", "fast", "strict"] } }, "additionalProperties": false @@ -7682,11 +6885,7 @@ }, "reset": { "type": "string", - "enum": [ - "none", - "fast", - "strict" - ] + "enum": ["none", "fast", "strict"] } }, "additionalProperties": false @@ -7717,11 +6916,7 @@ }, "reset": { "type": "string", - "enum": [ - "none", - "fast", - "strict" - ] + "enum": ["none", "fast", "strict"] } }, "additionalProperties": false @@ -7731,11 +6926,7 @@ }, "mode": { "type": "string", - "enum": [ - "pooled", - "temp", - "static" - ] + "enum": ["pooled", "temp", "static"] }, "path": { "type": "string" @@ -7757,9 +6948,7 @@ "type": "string" } }, - "required": [ - "id" - ], + "required": ["id"], "additionalProperties": false } }, @@ -7797,12 +6986,7 @@ "properties": { "role": { "type": "string", - "enum": [ - "system", - "user", - "assistant", - "tool" - ] + "enum": ["system", "user", "assistant", "tool"] }, "content": { "anyOf": [ @@ -7816,29 +7000,20 @@ "properties": { "type": { "type": "string", - "enum": [ - "text", - "file" - ] + "enum": ["text", "file"] }, "value": { "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false } } ] } }, - "required": [ - "role", - "content" - ], + "required": ["role", "content"], "additionalProperties": false } } @@ -7860,12 +7035,7 @@ "properties": { "role": { "type": "string", - "enum": [ - "system", - "user", - "assistant", - "tool" - ] + "enum": ["system", "user", "assistant", "tool"] }, "content": { "anyOf": [ @@ -7879,29 +7049,20 @@ "properties": { "type": { "type": "string", - "enum": [ - "text", - "file" - ] + "enum": ["text", "file"] }, "value": { "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false } } ] } }, - "required": [ - "role", - "content" - ], + "required": ["role", "content"], "additionalProperties": false } } @@ -7938,10 +7099,7 @@ }, "type": { "type": "string", - "enum": [ - "code-judge", - "code_judge" - ] + "enum": ["code-judge", "code_judge"] }, "command": { "anyOf": [ @@ -7993,10 +7151,7 @@ "additionalProperties": {} } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false }, { @@ -8026,10 +7181,7 @@ }, "type": { "type": "string", - "enum": [ - "llm-judge", - "llm_judge" - ] + "enum": ["llm-judge", "llm_judge"] }, "prompt": { "anyOf": [ @@ -8123,10 +7275,7 @@ "minLength": 1 } }, - "required": [ - "score_range", - "outcome" - ], + "required": ["score_range", "outcome"], "additionalProperties": false } } @@ -8145,9 +7294,7 @@ "additionalProperties": {} } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -8207,9 +7354,7 @@ } } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -8225,10 +7370,7 @@ "maximum": 1 } }, - "required": [ - "type", - "threshold" - ], + "required": ["type", "threshold"], "additionalProperties": false }, { @@ -8245,10 +7387,7 @@ "type": "string" } }, - "required": [ - "type", - "path" - ], + "required": ["type", "path"], "additionalProperties": false }, { @@ -8265,18 +7404,13 @@ "type": "string" } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false } ] } }, - "required": [ - "type", - "aggregator" - ], + "required": ["type", "aggregator"], "additionalProperties": false }, { @@ -8306,20 +7440,11 @@ }, "type": { "type": "string", - "enum": [ - "tool-trajectory", - "tool_trajectory" - ] + "enum": ["tool-trajectory", "tool_trajectory"] }, "mode": { "type": "string", - "enum": [ - "any_order", - "in_order", - "exact", - "subset", - "superset" - ] + "enum": ["any_order", "in_order", "exact", "subset", "superset"] }, "minimums": { "type": "object", @@ -8360,12 +7485,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -8379,12 +7499,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -8395,9 +7510,7 @@ ] } }, - "required": [ - "tool" - ], + "required": ["tool"], "additionalProperties": false } }, @@ -8405,12 +7518,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -8424,12 +7532,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -8440,10 +7543,7 @@ ] } }, - "required": [ - "type", - "mode" - ], + "required": ["type", "mode"], "additionalProperties": false }, { @@ -8473,10 +7573,7 @@ }, "type": { "type": "string", - "enum": [ - "field-accuracy", - "field_accuracy" - ] + "enum": ["field-accuracy", "field_accuracy"] }, "fields": { "type": "array", @@ -8488,11 +7585,7 @@ }, "match": { "type": "string", - "enum": [ - "exact", - "numeric_tolerance", - "date" - ] + "enum": ["exact", "numeric_tolerance", "date"] }, "required": { "type": "boolean" @@ -8514,26 +7607,17 @@ } } }, - "required": [ - "path", - "match" - ], + "required": ["path", "match"], "additionalProperties": false }, "minItems": 1 }, "aggregation": { "type": "string", - "enum": [ - "weighted_average", - "all_or_nothing" - ] + "enum": ["weighted_average", "all_or_nothing"] } }, - "required": [ - "type", - "fields" - ], + "required": ["type", "fields"], "additionalProperties": false }, { @@ -8570,10 +7654,7 @@ "minimum": 0 } }, - "required": [ - "type", - "threshold" - ], + "required": ["type", "threshold"], "additionalProperties": false }, { @@ -8610,10 +7691,7 @@ "minimum": 0 } }, - "required": [ - "type", - "budget" - ], + "required": ["type", "budget"], "additionalProperties": false }, { @@ -8643,10 +7721,7 @@ }, "type": { "type": "string", - "enum": [ - "token-usage", - "token_usage" - ] + "enum": ["token-usage", "token_usage"] }, "max_total": { "type": "number", @@ -8661,9 +7736,7 @@ "minimum": 0 } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -8693,10 +7766,7 @@ }, "type": { "type": "string", - "enum": [ - "execution-metrics", - "execution_metrics" - ] + "enum": ["execution-metrics", "execution_metrics"] }, "max_tool_calls": { "type": "number", @@ -8728,9 +7798,7 @@ "minimum": 0 } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -8760,10 +7828,7 @@ }, "type": { "type": "string", - "enum": [ - "agent-judge", - "agent_judge" - ] + "enum": ["agent-judge", "agent_judge"] }, "prompt": { "type": "string" @@ -8817,10 +7882,7 @@ "minLength": 1 } }, - "required": [ - "score_range", - "outcome" - ], + "required": ["score_range", "outcome"], "additionalProperties": false } } @@ -8842,9 +7904,7 @@ "type": "string" } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -8880,10 +7940,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -8919,10 +7976,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -8952,15 +8006,10 @@ }, "type": { "type": "string", - "enum": [ - "is-json", - "is_json" - ] + "enum": ["is-json", "is_json"] } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -8996,10 +8045,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -9080,10 +8126,7 @@ "minLength": 1 } }, - "required": [ - "score_range", - "outcome" - ], + "required": ["score_range", "outcome"], "additionalProperties": false } } @@ -9093,10 +8136,7 @@ "minItems": 1 } }, - "required": [ - "type", - "criteria" - ], + "required": ["type", "criteria"], "additionalProperties": false } ] @@ -9133,10 +8173,7 @@ }, "type": { "type": "string", - "enum": [ - "code-judge", - "code_judge" - ] + "enum": ["code-judge", "code_judge"] }, "command": { "anyOf": [ @@ -9188,10 +8225,7 @@ "additionalProperties": {} } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false }, { @@ -9221,10 +8255,7 @@ }, "type": { "type": "string", - "enum": [ - "llm-judge", - "llm_judge" - ] + "enum": ["llm-judge", "llm_judge"] }, "prompt": { "anyOf": [ @@ -9318,10 +8349,7 @@ "minLength": 1 } }, - "required": [ - "score_range", - "outcome" - ], + "required": ["score_range", "outcome"], "additionalProperties": false } } @@ -9340,9 +8368,7 @@ "additionalProperties": {} } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -9402,9 +8428,7 @@ } } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -9420,10 +8444,7 @@ "maximum": 1 } }, - "required": [ - "type", - "threshold" - ], + "required": ["type", "threshold"], "additionalProperties": false }, { @@ -9440,10 +8461,7 @@ "type": "string" } }, - "required": [ - "type", - "path" - ], + "required": ["type", "path"], "additionalProperties": false }, { @@ -9460,18 +8478,13 @@ "type": "string" } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false } ] } }, - "required": [ - "type", - "aggregator" - ], + "required": ["type", "aggregator"], "additionalProperties": false }, { @@ -9501,20 +8514,11 @@ }, "type": { "type": "string", - "enum": [ - "tool-trajectory", - "tool_trajectory" - ] + "enum": ["tool-trajectory", "tool_trajectory"] }, "mode": { "type": "string", - "enum": [ - "any_order", - "in_order", - "exact", - "subset", - "superset" - ] + "enum": ["any_order", "in_order", "exact", "subset", "superset"] }, "minimums": { "type": "object", @@ -9555,12 +8559,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -9574,12 +8573,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -9590,9 +8584,7 @@ ] } }, - "required": [ - "tool" - ], + "required": ["tool"], "additionalProperties": false } }, @@ -9600,12 +8592,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -9619,12 +8606,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -9635,10 +8617,7 @@ ] } }, - "required": [ - "type", - "mode" - ], + "required": ["type", "mode"], "additionalProperties": false }, { @@ -9668,10 +8647,7 @@ }, "type": { "type": "string", - "enum": [ - "field-accuracy", - "field_accuracy" - ] + "enum": ["field-accuracy", "field_accuracy"] }, "fields": { "type": "array", @@ -9683,11 +8659,7 @@ }, "match": { "type": "string", - "enum": [ - "exact", - "numeric_tolerance", - "date" - ] + "enum": ["exact", "numeric_tolerance", "date"] }, "required": { "type": "boolean" @@ -9709,26 +8681,17 @@ } } }, - "required": [ - "path", - "match" - ], + "required": ["path", "match"], "additionalProperties": false }, "minItems": 1 }, "aggregation": { "type": "string", - "enum": [ - "weighted_average", - "all_or_nothing" - ] + "enum": ["weighted_average", "all_or_nothing"] } }, - "required": [ - "type", - "fields" - ], + "required": ["type", "fields"], "additionalProperties": false }, { @@ -9765,10 +8728,7 @@ "minimum": 0 } }, - "required": [ - "type", - "threshold" - ], + "required": ["type", "threshold"], "additionalProperties": false }, { @@ -9805,10 +8765,7 @@ "minimum": 0 } }, - "required": [ - "type", - "budget" - ], + "required": ["type", "budget"], "additionalProperties": false }, { @@ -9838,10 +8795,7 @@ }, "type": { "type": "string", - "enum": [ - "token-usage", - "token_usage" - ] + "enum": ["token-usage", "token_usage"] }, "max_total": { "type": "number", @@ -9856,9 +8810,7 @@ "minimum": 0 } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -9888,10 +8840,7 @@ }, "type": { "type": "string", - "enum": [ - "execution-metrics", - "execution_metrics" - ] + "enum": ["execution-metrics", "execution_metrics"] }, "max_tool_calls": { "type": "number", @@ -9923,9 +8872,7 @@ "minimum": 0 } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -9955,10 +8902,7 @@ }, "type": { "type": "string", - "enum": [ - "agent-judge", - "agent_judge" - ] + "enum": ["agent-judge", "agent_judge"] }, "prompt": { "type": "string" @@ -10012,10 +8956,7 @@ "minLength": 1 } }, - "required": [ - "score_range", - "outcome" - ], + "required": ["score_range", "outcome"], "additionalProperties": false } } @@ -10037,9 +8978,7 @@ "type": "string" } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -10075,10 +9014,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -10114,10 +9050,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -10147,15 +9080,10 @@ }, "type": { "type": "string", - "enum": [ - "is-json", - "is_json" - ] + "enum": ["is-json", "is_json"] } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -10191,10 +9119,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -10275,10 +9200,7 @@ "minLength": 1 } }, - "required": [ - "score_range", - "outcome" - ], + "required": ["score_range", "outcome"], "additionalProperties": false } } @@ -10288,10 +9210,7 @@ "minItems": 1 } }, - "required": [ - "type", - "criteria" - ], + "required": ["type", "criteria"], "additionalProperties": false } ] @@ -10328,10 +9247,7 @@ }, "type": { "type": "string", - "enum": [ - "code-judge", - "code_judge" - ] + "enum": ["code-judge", "code_judge"] }, "command": { "anyOf": [ @@ -10383,10 +9299,7 @@ "additionalProperties": {} } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false }, { @@ -10416,10 +9329,7 @@ }, "type": { "type": "string", - "enum": [ - "llm-judge", - "llm_judge" - ] + "enum": ["llm-judge", "llm_judge"] }, "prompt": { "anyOf": [ @@ -10513,10 +9423,7 @@ "minLength": 1 } }, - "required": [ - "score_range", - "outcome" - ], + "required": ["score_range", "outcome"], "additionalProperties": false } } @@ -10535,9 +9442,7 @@ "additionalProperties": {} } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -10597,9 +9502,7 @@ } } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -10615,10 +9518,7 @@ "maximum": 1 } }, - "required": [ - "type", - "threshold" - ], + "required": ["type", "threshold"], "additionalProperties": false }, { @@ -10635,10 +9535,7 @@ "type": "string" } }, - "required": [ - "type", - "path" - ], + "required": ["type", "path"], "additionalProperties": false }, { @@ -10655,18 +9552,13 @@ "type": "string" } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false } ] } }, - "required": [ - "type", - "aggregator" - ], + "required": ["type", "aggregator"], "additionalProperties": false }, { @@ -10696,20 +9588,11 @@ }, "type": { "type": "string", - "enum": [ - "tool-trajectory", - "tool_trajectory" - ] + "enum": ["tool-trajectory", "tool_trajectory"] }, "mode": { "type": "string", - "enum": [ - "any_order", - "in_order", - "exact", - "subset", - "superset" - ] + "enum": ["any_order", "in_order", "exact", "subset", "superset"] }, "minimums": { "type": "object", @@ -10750,12 +9633,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -10769,12 +9647,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -10785,9 +9658,7 @@ ] } }, - "required": [ - "tool" - ], + "required": ["tool"], "additionalProperties": false } }, @@ -10795,12 +9666,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -10814,12 +9680,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -10830,10 +9691,7 @@ ] } }, - "required": [ - "type", - "mode" - ], + "required": ["type", "mode"], "additionalProperties": false }, { @@ -10863,10 +9721,7 @@ }, "type": { "type": "string", - "enum": [ - "field-accuracy", - "field_accuracy" - ] + "enum": ["field-accuracy", "field_accuracy"] }, "fields": { "type": "array", @@ -10878,11 +9733,7 @@ }, "match": { "type": "string", - "enum": [ - "exact", - "numeric_tolerance", - "date" - ] + "enum": ["exact", "numeric_tolerance", "date"] }, "required": { "type": "boolean" @@ -10904,26 +9755,17 @@ } } }, - "required": [ - "path", - "match" - ], + "required": ["path", "match"], "additionalProperties": false }, "minItems": 1 }, "aggregation": { "type": "string", - "enum": [ - "weighted_average", - "all_or_nothing" - ] + "enum": ["weighted_average", "all_or_nothing"] } }, - "required": [ - "type", - "fields" - ], + "required": ["type", "fields"], "additionalProperties": false }, { @@ -10960,10 +9802,7 @@ "minimum": 0 } }, - "required": [ - "type", - "threshold" - ], + "required": ["type", "threshold"], "additionalProperties": false }, { @@ -11000,10 +9839,7 @@ "minimum": 0 } }, - "required": [ - "type", - "budget" - ], + "required": ["type", "budget"], "additionalProperties": false }, { @@ -11033,10 +9869,7 @@ }, "type": { "type": "string", - "enum": [ - "token-usage", - "token_usage" - ] + "enum": ["token-usage", "token_usage"] }, "max_total": { "type": "number", @@ -11051,9 +9884,7 @@ "minimum": 0 } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -11083,10 +9914,7 @@ }, "type": { "type": "string", - "enum": [ - "execution-metrics", - "execution_metrics" - ] + "enum": ["execution-metrics", "execution_metrics"] }, "max_tool_calls": { "type": "number", @@ -11118,9 +9946,7 @@ "minimum": 0 } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -11150,10 +9976,7 @@ }, "type": { "type": "string", - "enum": [ - "agent-judge", - "agent_judge" - ] + "enum": ["agent-judge", "agent_judge"] }, "prompt": { "type": "string" @@ -11207,10 +10030,7 @@ "minLength": 1 } }, - "required": [ - "score_range", - "outcome" - ], + "required": ["score_range", "outcome"], "additionalProperties": false } } @@ -11232,9 +10052,7 @@ "type": "string" } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -11270,10 +10088,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -11309,10 +10124,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -11342,15 +10154,10 @@ }, "type": { "type": "string", - "enum": [ - "is-json", - "is_json" - ] + "enum": ["is-json", "is_json"] } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -11386,10 +10193,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -11470,10 +10274,7 @@ "minLength": 1 } }, - "required": [ - "score_range", - "outcome" - ], + "required": ["score_range", "outcome"], "additionalProperties": false } } @@ -11483,10 +10284,7 @@ "minItems": 1 } }, - "required": [ - "type", - "criteria" - ], + "required": ["type", "criteria"], "additionalProperties": false } ] @@ -11535,10 +10333,7 @@ }, "type": { "type": "string", - "enum": [ - "code-judge", - "code_judge" - ] + "enum": ["code-judge", "code_judge"] }, "command": { "anyOf": [ @@ -11590,10 +10385,7 @@ "additionalProperties": {} } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false }, { @@ -11623,10 +10415,7 @@ }, "type": { "type": "string", - "enum": [ - "llm-judge", - "llm_judge" - ] + "enum": ["llm-judge", "llm_judge"] }, "prompt": { "anyOf": [ @@ -11720,10 +10509,7 @@ "minLength": 1 } }, - "required": [ - "score_range", - "outcome" - ], + "required": ["score_range", "outcome"], "additionalProperties": false } } @@ -11742,9 +10528,7 @@ "additionalProperties": {} } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -11804,9 +10588,7 @@ } } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -11822,10 +10604,7 @@ "maximum": 1 } }, - "required": [ - "type", - "threshold" - ], + "required": ["type", "threshold"], "additionalProperties": false }, { @@ -11842,10 +10621,7 @@ "type": "string" } }, - "required": [ - "type", - "path" - ], + "required": ["type", "path"], "additionalProperties": false }, { @@ -11862,18 +10638,13 @@ "type": "string" } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false } ] } }, - "required": [ - "type", - "aggregator" - ], + "required": ["type", "aggregator"], "additionalProperties": false }, { @@ -11903,20 +10674,11 @@ }, "type": { "type": "string", - "enum": [ - "tool-trajectory", - "tool_trajectory" - ] + "enum": ["tool-trajectory", "tool_trajectory"] }, "mode": { "type": "string", - "enum": [ - "any_order", - "in_order", - "exact", - "subset", - "superset" - ] + "enum": ["any_order", "in_order", "exact", "subset", "superset"] }, "minimums": { "type": "object", @@ -11957,12 +10719,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -11976,12 +10733,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -11992,9 +10744,7 @@ ] } }, - "required": [ - "tool" - ], + "required": ["tool"], "additionalProperties": false } }, @@ -12002,12 +10752,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -12021,12 +10766,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -12037,10 +10777,7 @@ ] } }, - "required": [ - "type", - "mode" - ], + "required": ["type", "mode"], "additionalProperties": false }, { @@ -12070,10 +10807,7 @@ }, "type": { "type": "string", - "enum": [ - "field-accuracy", - "field_accuracy" - ] + "enum": ["field-accuracy", "field_accuracy"] }, "fields": { "type": "array", @@ -12085,11 +10819,7 @@ }, "match": { "type": "string", - "enum": [ - "exact", - "numeric_tolerance", - "date" - ] + "enum": ["exact", "numeric_tolerance", "date"] }, "required": { "type": "boolean" @@ -12111,26 +10841,17 @@ } } }, - "required": [ - "path", - "match" - ], + "required": ["path", "match"], "additionalProperties": false }, "minItems": 1 }, "aggregation": { "type": "string", - "enum": [ - "weighted_average", - "all_or_nothing" - ] + "enum": ["weighted_average", "all_or_nothing"] } }, - "required": [ - "type", - "fields" - ], + "required": ["type", "fields"], "additionalProperties": false }, { @@ -12167,10 +10888,7 @@ "minimum": 0 } }, - "required": [ - "type", - "threshold" - ], + "required": ["type", "threshold"], "additionalProperties": false }, { @@ -12207,10 +10925,7 @@ "minimum": 0 } }, - "required": [ - "type", - "budget" - ], + "required": ["type", "budget"], "additionalProperties": false }, { @@ -12240,10 +10955,7 @@ }, "type": { "type": "string", - "enum": [ - "token-usage", - "token_usage" - ] + "enum": ["token-usage", "token_usage"] }, "max_total": { "type": "number", @@ -12258,9 +10970,7 @@ "minimum": 0 } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -12290,10 +11000,7 @@ }, "type": { "type": "string", - "enum": [ - "execution-metrics", - "execution_metrics" - ] + "enum": ["execution-metrics", "execution_metrics"] }, "max_tool_calls": { "type": "number", @@ -12325,9 +11032,7 @@ "minimum": 0 } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -12357,10 +11062,7 @@ }, "type": { "type": "string", - "enum": [ - "agent-judge", - "agent_judge" - ] + "enum": ["agent-judge", "agent_judge"] }, "prompt": { "type": "string" @@ -12414,10 +11116,7 @@ "minLength": 1 } }, - "required": [ - "score_range", - "outcome" - ], + "required": ["score_range", "outcome"], "additionalProperties": false } } @@ -12439,9 +11138,7 @@ "type": "string" } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -12477,10 +11174,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -12516,10 +11210,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -12549,15 +11240,10 @@ }, "type": { "type": "string", - "enum": [ - "is-json", - "is_json" - ] + "enum": ["is-json", "is_json"] } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -12593,10 +11279,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -12677,10 +11360,7 @@ "minLength": 1 } }, - "required": [ - "score_range", - "outcome" - ], + "required": ["score_range", "outcome"], "additionalProperties": false } } @@ -12690,10 +11370,7 @@ "minItems": 1 } }, - "required": [ - "type", - "criteria" - ], + "required": ["type", "criteria"], "additionalProperties": false } ] @@ -12730,10 +11407,7 @@ }, "type": { "type": "string", - "enum": [ - "code-judge", - "code_judge" - ] + "enum": ["code-judge", "code_judge"] }, "command": { "anyOf": [ @@ -12785,10 +11459,7 @@ "additionalProperties": {} } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false }, { @@ -12818,10 +11489,7 @@ }, "type": { "type": "string", - "enum": [ - "llm-judge", - "llm_judge" - ] + "enum": ["llm-judge", "llm_judge"] }, "prompt": { "anyOf": [ @@ -12915,10 +11583,7 @@ "minLength": 1 } }, - "required": [ - "score_range", - "outcome" - ], + "required": ["score_range", "outcome"], "additionalProperties": false } } @@ -12937,9 +11602,7 @@ "additionalProperties": {} } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -12999,9 +11662,7 @@ } } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -13017,10 +11678,7 @@ "maximum": 1 } }, - "required": [ - "type", - "threshold" - ], + "required": ["type", "threshold"], "additionalProperties": false }, { @@ -13037,10 +11695,7 @@ "type": "string" } }, - "required": [ - "type", - "path" - ], + "required": ["type", "path"], "additionalProperties": false }, { @@ -13057,18 +11712,13 @@ "type": "string" } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false } ] } }, - "required": [ - "type", - "aggregator" - ], + "required": ["type", "aggregator"], "additionalProperties": false }, { @@ -13098,20 +11748,11 @@ }, "type": { "type": "string", - "enum": [ - "tool-trajectory", - "tool_trajectory" - ] + "enum": ["tool-trajectory", "tool_trajectory"] }, "mode": { "type": "string", - "enum": [ - "any_order", - "in_order", - "exact", - "subset", - "superset" - ] + "enum": ["any_order", "in_order", "exact", "subset", "superset"] }, "minimums": { "type": "object", @@ -13152,12 +11793,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -13171,12 +11807,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -13187,9 +11818,7 @@ ] } }, - "required": [ - "tool" - ], + "required": ["tool"], "additionalProperties": false } }, @@ -13197,12 +11826,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -13216,12 +11840,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -13232,10 +11851,7 @@ ] } }, - "required": [ - "type", - "mode" - ], + "required": ["type", "mode"], "additionalProperties": false }, { @@ -13265,10 +11881,7 @@ }, "type": { "type": "string", - "enum": [ - "field-accuracy", - "field_accuracy" - ] + "enum": ["field-accuracy", "field_accuracy"] }, "fields": { "type": "array", @@ -13280,11 +11893,7 @@ }, "match": { "type": "string", - "enum": [ - "exact", - "numeric_tolerance", - "date" - ] + "enum": ["exact", "numeric_tolerance", "date"] }, "required": { "type": "boolean" @@ -13306,26 +11915,17 @@ } } }, - "required": [ - "path", - "match" - ], + "required": ["path", "match"], "additionalProperties": false }, "minItems": 1 }, "aggregation": { "type": "string", - "enum": [ - "weighted_average", - "all_or_nothing" - ] + "enum": ["weighted_average", "all_or_nothing"] } }, - "required": [ - "type", - "fields" - ], + "required": ["type", "fields"], "additionalProperties": false }, { @@ -13362,10 +11962,7 @@ "minimum": 0 } }, - "required": [ - "type", - "threshold" - ], + "required": ["type", "threshold"], "additionalProperties": false }, { @@ -13402,10 +11999,7 @@ "minimum": 0 } }, - "required": [ - "type", - "budget" - ], + "required": ["type", "budget"], "additionalProperties": false }, { @@ -13435,10 +12029,7 @@ }, "type": { "type": "string", - "enum": [ - "token-usage", - "token_usage" - ] + "enum": ["token-usage", "token_usage"] }, "max_total": { "type": "number", @@ -13453,9 +12044,7 @@ "minimum": 0 } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -13485,10 +12074,7 @@ }, "type": { "type": "string", - "enum": [ - "execution-metrics", - "execution_metrics" - ] + "enum": ["execution-metrics", "execution_metrics"] }, "max_tool_calls": { "type": "number", @@ -13520,9 +12106,7 @@ "minimum": 0 } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -13552,10 +12136,7 @@ }, "type": { "type": "string", - "enum": [ - "agent-judge", - "agent_judge" - ] + "enum": ["agent-judge", "agent_judge"] }, "prompt": { "type": "string" @@ -13609,10 +12190,7 @@ "minLength": 1 } }, - "required": [ - "score_range", - "outcome" - ], + "required": ["score_range", "outcome"], "additionalProperties": false } } @@ -13634,9 +12212,7 @@ "type": "string" } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -13672,10 +12248,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -13711,10 +12284,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -13744,15 +12314,10 @@ }, "type": { "type": "string", - "enum": [ - "is-json", - "is_json" - ] + "enum": ["is-json", "is_json"] } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -13788,10 +12353,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -13872,10 +12434,7 @@ "minLength": 1 } }, - "required": [ - "score_range", - "outcome" - ], + "required": ["score_range", "outcome"], "additionalProperties": false } } @@ -13885,10 +12444,7 @@ "minItems": 1 } }, - "required": [ - "type", - "criteria" - ], + "required": ["type", "criteria"], "additionalProperties": false } ] @@ -13925,10 +12481,7 @@ }, "type": { "type": "string", - "enum": [ - "code-judge", - "code_judge" - ] + "enum": ["code-judge", "code_judge"] }, "command": { "anyOf": [ @@ -13980,10 +12533,7 @@ "additionalProperties": {} } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false }, { @@ -14013,10 +12563,7 @@ }, "type": { "type": "string", - "enum": [ - "llm-judge", - "llm_judge" - ] + "enum": ["llm-judge", "llm_judge"] }, "prompt": { "anyOf": [ @@ -14110,10 +12657,7 @@ "minLength": 1 } }, - "required": [ - "score_range", - "outcome" - ], + "required": ["score_range", "outcome"], "additionalProperties": false } } @@ -14132,9 +12676,7 @@ "additionalProperties": {} } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -14194,9 +12736,7 @@ } } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -14212,10 +12752,7 @@ "maximum": 1 } }, - "required": [ - "type", - "threshold" - ], + "required": ["type", "threshold"], "additionalProperties": false }, { @@ -14232,10 +12769,7 @@ "type": "string" } }, - "required": [ - "type", - "path" - ], + "required": ["type", "path"], "additionalProperties": false }, { @@ -14252,18 +12786,13 @@ "type": "string" } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false } ] } }, - "required": [ - "type", - "aggregator" - ], + "required": ["type", "aggregator"], "additionalProperties": false }, { @@ -14293,20 +12822,11 @@ }, "type": { "type": "string", - "enum": [ - "tool-trajectory", - "tool_trajectory" - ] + "enum": ["tool-trajectory", "tool_trajectory"] }, "mode": { "type": "string", - "enum": [ - "any_order", - "in_order", - "exact", - "subset", - "superset" - ] + "enum": ["any_order", "in_order", "exact", "subset", "superset"] }, "minimums": { "type": "object", @@ -14347,12 +12867,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -14366,12 +12881,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -14382,9 +12892,7 @@ ] } }, - "required": [ - "tool" - ], + "required": ["tool"], "additionalProperties": false } }, @@ -14392,12 +12900,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -14411,12 +12914,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -14427,10 +12925,7 @@ ] } }, - "required": [ - "type", - "mode" - ], + "required": ["type", "mode"], "additionalProperties": false }, { @@ -14460,10 +12955,7 @@ }, "type": { "type": "string", - "enum": [ - "field-accuracy", - "field_accuracy" - ] + "enum": ["field-accuracy", "field_accuracy"] }, "fields": { "type": "array", @@ -14475,11 +12967,7 @@ }, "match": { "type": "string", - "enum": [ - "exact", - "numeric_tolerance", - "date" - ] + "enum": ["exact", "numeric_tolerance", "date"] }, "required": { "type": "boolean" @@ -14501,26 +12989,17 @@ } } }, - "required": [ - "path", - "match" - ], + "required": ["path", "match"], "additionalProperties": false }, "minItems": 1 }, "aggregation": { "type": "string", - "enum": [ - "weighted_average", - "all_or_nothing" - ] + "enum": ["weighted_average", "all_or_nothing"] } }, - "required": [ - "type", - "fields" - ], + "required": ["type", "fields"], "additionalProperties": false }, { @@ -14557,10 +13036,7 @@ "minimum": 0 } }, - "required": [ - "type", - "threshold" - ], + "required": ["type", "threshold"], "additionalProperties": false }, { @@ -14597,10 +13073,7 @@ "minimum": 0 } }, - "required": [ - "type", - "budget" - ], + "required": ["type", "budget"], "additionalProperties": false }, { @@ -14630,10 +13103,7 @@ }, "type": { "type": "string", - "enum": [ - "token-usage", - "token_usage" - ] + "enum": ["token-usage", "token_usage"] }, "max_total": { "type": "number", @@ -14648,9 +13118,7 @@ "minimum": 0 } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -14680,10 +13148,7 @@ }, "type": { "type": "string", - "enum": [ - "execution-metrics", - "execution_metrics" - ] + "enum": ["execution-metrics", "execution_metrics"] }, "max_tool_calls": { "type": "number", @@ -14715,9 +13180,7 @@ "minimum": 0 } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -14747,10 +13210,7 @@ }, "type": { "type": "string", - "enum": [ - "agent-judge", - "agent_judge" - ] + "enum": ["agent-judge", "agent_judge"] }, "prompt": { "type": "string" @@ -14804,10 +13264,7 @@ "minLength": 1 } }, - "required": [ - "score_range", - "outcome" - ], + "required": ["score_range", "outcome"], "additionalProperties": false } } @@ -14829,9 +13286,7 @@ "type": "string" } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -14867,10 +13322,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -14906,10 +13358,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -14939,15 +13388,10 @@ }, "type": { "type": "string", - "enum": [ - "is-json", - "is_json" - ] + "enum": ["is-json", "is_json"] } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -14983,10 +13427,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -15067,10 +13508,7 @@ "minLength": 1 } }, - "required": [ - "score_range", - "outcome" - ], + "required": ["score_range", "outcome"], "additionalProperties": false } } @@ -15080,10 +13518,7 @@ "minItems": 1 } }, - "required": [ - "type", - "criteria" - ], + "required": ["type", "criteria"], "additionalProperties": false } ] @@ -15104,11 +13539,7 @@ }, "strategy": { "type": "string", - "enum": [ - "pass_at_k", - "mean", - "confidence_interval" - ] + "enum": ["pass_at_k", "mean", "confidence_interval"] }, "cost_limit_usd": { "type": "number", @@ -15119,9 +13550,7 @@ "minimum": 0 } }, - "required": [ - "count" - ], + "required": ["count"], "additionalProperties": false }, "total_budget_usd": { @@ -15149,10 +13578,7 @@ }, "isolation": { "type": "string", - "enum": [ - "shared", - "per_test" - ] + "enum": ["shared", "per_test"] }, "repos": { "type": "array", @@ -15176,10 +13602,7 @@ "format": "uri" } }, - "required": [ - "type", - "url" - ], + "required": ["type", "url"], "additionalProperties": false }, { @@ -15193,10 +13616,7 @@ "type": "string" } }, - "required": [ - "type", - "path" - ], + "required": ["type", "path"], "additionalProperties": false } ] @@ -15209,10 +13629,7 @@ }, "resolve": { "type": "string", - "enum": [ - "remote", - "local" - ] + "enum": ["remote", "local"] }, "ancestor": { "type": "integer", @@ -15241,10 +13658,7 @@ "additionalProperties": false } }, - "required": [ - "path", - "source" - ], + "required": ["path", "source"], "additionalProperties": false } }, @@ -15280,11 +13694,7 @@ }, "reset": { "type": "string", - "enum": [ - "none", - "fast", - "strict" - ] + "enum": ["none", "fast", "strict"] } }, "additionalProperties": false @@ -15315,11 +13725,7 @@ }, "reset": { "type": "string", - "enum": [ - "none", - "fast", - "strict" - ] + "enum": ["none", "fast", "strict"] } }, "additionalProperties": false @@ -15350,11 +13756,7 @@ }, "reset": { "type": "string", - "enum": [ - "none", - "fast", - "strict" - ] + "enum": ["none", "fast", "strict"] } }, "additionalProperties": false @@ -15385,11 +13787,7 @@ }, "reset": { "type": "string", - "enum": [ - "none", - "fast", - "strict" - ] + "enum": ["none", "fast", "strict"] } }, "additionalProperties": false @@ -15399,11 +13797,7 @@ }, "mode": { "type": "string", - "enum": [ - "pooled", - "temp", - "static" - ] + "enum": ["pooled", "temp", "static"] }, "path": { "type": "string" @@ -15425,9 +13819,7 @@ "type": "string" } }, - "required": [ - "id" - ], + "required": ["id"], "additionalProperties": false } }, @@ -15482,10 +13874,7 @@ }, "type": { "type": "string", - "enum": [ - "code-judge", - "code_judge" - ] + "enum": ["code-judge", "code_judge"] }, "command": { "anyOf": [ @@ -15537,10 +13926,7 @@ "additionalProperties": {} } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false }, { @@ -15570,10 +13956,7 @@ }, "type": { "type": "string", - "enum": [ - "llm-judge", - "llm_judge" - ] + "enum": ["llm-judge", "llm_judge"] }, "prompt": { "anyOf": [ @@ -15667,10 +14050,7 @@ "minLength": 1 } }, - "required": [ - "score_range", - "outcome" - ], + "required": ["score_range", "outcome"], "additionalProperties": false } } @@ -15689,9 +14069,7 @@ "additionalProperties": {} } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -15751,9 +14129,7 @@ } } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -15769,10 +14145,7 @@ "maximum": 1 } }, - "required": [ - "type", - "threshold" - ], + "required": ["type", "threshold"], "additionalProperties": false }, { @@ -15789,10 +14162,7 @@ "type": "string" } }, - "required": [ - "type", - "path" - ], + "required": ["type", "path"], "additionalProperties": false }, { @@ -15809,18 +14179,13 @@ "type": "string" } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false } ] } }, - "required": [ - "type", - "aggregator" - ], + "required": ["type", "aggregator"], "additionalProperties": false }, { @@ -15850,20 +14215,11 @@ }, "type": { "type": "string", - "enum": [ - "tool-trajectory", - "tool_trajectory" - ] + "enum": ["tool-trajectory", "tool_trajectory"] }, "mode": { "type": "string", - "enum": [ - "any_order", - "in_order", - "exact", - "subset", - "superset" - ] + "enum": ["any_order", "in_order", "exact", "subset", "superset"] }, "minimums": { "type": "object", @@ -15904,12 +14260,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -15923,12 +14274,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -15939,9 +14285,7 @@ ] } }, - "required": [ - "tool" - ], + "required": ["tool"], "additionalProperties": false } }, @@ -15949,12 +14293,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -15968,12 +14307,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -15984,10 +14318,7 @@ ] } }, - "required": [ - "type", - "mode" - ], + "required": ["type", "mode"], "additionalProperties": false }, { @@ -16017,10 +14348,7 @@ }, "type": { "type": "string", - "enum": [ - "field-accuracy", - "field_accuracy" - ] + "enum": ["field-accuracy", "field_accuracy"] }, "fields": { "type": "array", @@ -16032,11 +14360,7 @@ }, "match": { "type": "string", - "enum": [ - "exact", - "numeric_tolerance", - "date" - ] + "enum": ["exact", "numeric_tolerance", "date"] }, "required": { "type": "boolean" @@ -16058,26 +14382,17 @@ } } }, - "required": [ - "path", - "match" - ], + "required": ["path", "match"], "additionalProperties": false }, "minItems": 1 }, "aggregation": { "type": "string", - "enum": [ - "weighted_average", - "all_or_nothing" - ] + "enum": ["weighted_average", "all_or_nothing"] } }, - "required": [ - "type", - "fields" - ], + "required": ["type", "fields"], "additionalProperties": false }, { @@ -16114,10 +14429,7 @@ "minimum": 0 } }, - "required": [ - "type", - "threshold" - ], + "required": ["type", "threshold"], "additionalProperties": false }, { @@ -16154,10 +14466,7 @@ "minimum": 0 } }, - "required": [ - "type", - "budget" - ], + "required": ["type", "budget"], "additionalProperties": false }, { @@ -16187,10 +14496,7 @@ }, "type": { "type": "string", - "enum": [ - "token-usage", - "token_usage" - ] + "enum": ["token-usage", "token_usage"] }, "max_total": { "type": "number", @@ -16205,9 +14511,7 @@ "minimum": 0 } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -16237,10 +14541,7 @@ }, "type": { "type": "string", - "enum": [ - "execution-metrics", - "execution_metrics" - ] + "enum": ["execution-metrics", "execution_metrics"] }, "max_tool_calls": { "type": "number", @@ -16272,9 +14573,7 @@ "minimum": 0 } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -16304,10 +14603,7 @@ }, "type": { "type": "string", - "enum": [ - "agent-judge", - "agent_judge" - ] + "enum": ["agent-judge", "agent_judge"] }, "prompt": { "type": "string" @@ -16361,10 +14657,7 @@ "minLength": 1 } }, - "required": [ - "score_range", - "outcome" - ], + "required": ["score_range", "outcome"], "additionalProperties": false } } @@ -16386,9 +14679,7 @@ "type": "string" } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -16424,10 +14715,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -16463,10 +14751,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -16496,15 +14781,10 @@ }, "type": { "type": "string", - "enum": [ - "is-json", - "is_json" - ] + "enum": ["is-json", "is_json"] } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -16540,10 +14820,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -16624,10 +14901,7 @@ "minLength": 1 } }, - "required": [ - "score_range", - "outcome" - ], + "required": ["score_range", "outcome"], "additionalProperties": false } } @@ -16637,10 +14911,7 @@ "minItems": 1 } }, - "required": [ - "type", - "criteria" - ], + "required": ["type", "criteria"], "additionalProperties": false } ] @@ -16677,10 +14948,7 @@ }, "type": { "type": "string", - "enum": [ - "code-judge", - "code_judge" - ] + "enum": ["code-judge", "code_judge"] }, "command": { "anyOf": [ @@ -16732,10 +15000,7 @@ "additionalProperties": {} } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false }, { @@ -16765,10 +15030,7 @@ }, "type": { "type": "string", - "enum": [ - "llm-judge", - "llm_judge" - ] + "enum": ["llm-judge", "llm_judge"] }, "prompt": { "anyOf": [ @@ -16862,10 +15124,7 @@ "minLength": 1 } }, - "required": [ - "score_range", - "outcome" - ], + "required": ["score_range", "outcome"], "additionalProperties": false } } @@ -16884,9 +15143,7 @@ "additionalProperties": {} } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -16946,9 +15203,7 @@ } } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -16964,10 +15219,7 @@ "maximum": 1 } }, - "required": [ - "type", - "threshold" - ], + "required": ["type", "threshold"], "additionalProperties": false }, { @@ -16984,10 +15236,7 @@ "type": "string" } }, - "required": [ - "type", - "path" - ], + "required": ["type", "path"], "additionalProperties": false }, { @@ -17004,18 +15253,13 @@ "type": "string" } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false } ] } }, - "required": [ - "type", - "aggregator" - ], + "required": ["type", "aggregator"], "additionalProperties": false }, { @@ -17045,20 +15289,11 @@ }, "type": { "type": "string", - "enum": [ - "tool-trajectory", - "tool_trajectory" - ] + "enum": ["tool-trajectory", "tool_trajectory"] }, "mode": { "type": "string", - "enum": [ - "any_order", - "in_order", - "exact", - "subset", - "superset" - ] + "enum": ["any_order", "in_order", "exact", "subset", "superset"] }, "minimums": { "type": "object", @@ -17099,12 +15334,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -17118,12 +15348,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -17134,9 +15359,7 @@ ] } }, - "required": [ - "tool" - ], + "required": ["tool"], "additionalProperties": false } }, @@ -17144,12 +15367,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -17163,12 +15381,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -17179,10 +15392,7 @@ ] } }, - "required": [ - "type", - "mode" - ], + "required": ["type", "mode"], "additionalProperties": false }, { @@ -17212,10 +15422,7 @@ }, "type": { "type": "string", - "enum": [ - "field-accuracy", - "field_accuracy" - ] + "enum": ["field-accuracy", "field_accuracy"] }, "fields": { "type": "array", @@ -17227,11 +15434,7 @@ }, "match": { "type": "string", - "enum": [ - "exact", - "numeric_tolerance", - "date" - ] + "enum": ["exact", "numeric_tolerance", "date"] }, "required": { "type": "boolean" @@ -17253,26 +15456,17 @@ } } }, - "required": [ - "path", - "match" - ], + "required": ["path", "match"], "additionalProperties": false }, "minItems": 1 }, "aggregation": { "type": "string", - "enum": [ - "weighted_average", - "all_or_nothing" - ] + "enum": ["weighted_average", "all_or_nothing"] } }, - "required": [ - "type", - "fields" - ], + "required": ["type", "fields"], "additionalProperties": false }, { @@ -17309,10 +15503,7 @@ "minimum": 0 } }, - "required": [ - "type", - "threshold" - ], + "required": ["type", "threshold"], "additionalProperties": false }, { @@ -17349,10 +15540,7 @@ "minimum": 0 } }, - "required": [ - "type", - "budget" - ], + "required": ["type", "budget"], "additionalProperties": false }, { @@ -17382,10 +15570,7 @@ }, "type": { "type": "string", - "enum": [ - "token-usage", - "token_usage" - ] + "enum": ["token-usage", "token_usage"] }, "max_total": { "type": "number", @@ -17400,9 +15585,7 @@ "minimum": 0 } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -17432,10 +15615,7 @@ }, "type": { "type": "string", - "enum": [ - "execution-metrics", - "execution_metrics" - ] + "enum": ["execution-metrics", "execution_metrics"] }, "max_tool_calls": { "type": "number", @@ -17467,9 +15647,7 @@ "minimum": 0 } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -17499,10 +15677,7 @@ }, "type": { "type": "string", - "enum": [ - "agent-judge", - "agent_judge" - ] + "enum": ["agent-judge", "agent_judge"] }, "prompt": { "type": "string" @@ -17556,10 +15731,7 @@ "minLength": 1 } }, - "required": [ - "score_range", - "outcome" - ], + "required": ["score_range", "outcome"], "additionalProperties": false } } @@ -17581,9 +15753,7 @@ "type": "string" } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -17619,10 +15789,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -17658,10 +15825,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -17691,15 +15855,10 @@ }, "type": { "type": "string", - "enum": [ - "is-json", - "is_json" - ] + "enum": ["is-json", "is_json"] } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -17735,10 +15894,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -17819,10 +15975,7 @@ "minLength": 1 } }, - "required": [ - "score_range", - "outcome" - ], + "required": ["score_range", "outcome"], "additionalProperties": false } } @@ -17832,10 +15985,7 @@ "minItems": 1 } }, - "required": [ - "type", - "criteria" - ], + "required": ["type", "criteria"], "additionalProperties": false } ] @@ -17872,10 +16022,7 @@ }, "type": { "type": "string", - "enum": [ - "code-judge", - "code_judge" - ] + "enum": ["code-judge", "code_judge"] }, "command": { "anyOf": [ @@ -17927,10 +16074,7 @@ "additionalProperties": {} } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false }, { @@ -17960,10 +16104,7 @@ }, "type": { "type": "string", - "enum": [ - "llm-judge", - "llm_judge" - ] + "enum": ["llm-judge", "llm_judge"] }, "prompt": { "anyOf": [ @@ -18057,10 +16198,7 @@ "minLength": 1 } }, - "required": [ - "score_range", - "outcome" - ], + "required": ["score_range", "outcome"], "additionalProperties": false } } @@ -18079,9 +16217,7 @@ "additionalProperties": {} } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -18141,9 +16277,7 @@ } } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -18159,10 +16293,7 @@ "maximum": 1 } }, - "required": [ - "type", - "threshold" - ], + "required": ["type", "threshold"], "additionalProperties": false }, { @@ -18179,10 +16310,7 @@ "type": "string" } }, - "required": [ - "type", - "path" - ], + "required": ["type", "path"], "additionalProperties": false }, { @@ -18199,18 +16327,13 @@ "type": "string" } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false } ] } }, - "required": [ - "type", - "aggregator" - ], + "required": ["type", "aggregator"], "additionalProperties": false }, { @@ -18240,20 +16363,11 @@ }, "type": { "type": "string", - "enum": [ - "tool-trajectory", - "tool_trajectory" - ] + "enum": ["tool-trajectory", "tool_trajectory"] }, "mode": { "type": "string", - "enum": [ - "any_order", - "in_order", - "exact", - "subset", - "superset" - ] + "enum": ["any_order", "in_order", "exact", "subset", "superset"] }, "minimums": { "type": "object", @@ -18294,12 +16408,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -18313,12 +16422,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -18329,9 +16433,7 @@ ] } }, - "required": [ - "tool" - ], + "required": ["tool"], "additionalProperties": false } }, @@ -18339,12 +16441,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -18358,12 +16455,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -18374,10 +16466,7 @@ ] } }, - "required": [ - "type", - "mode" - ], + "required": ["type", "mode"], "additionalProperties": false }, { @@ -18407,10 +16496,7 @@ }, "type": { "type": "string", - "enum": [ - "field-accuracy", - "field_accuracy" - ] + "enum": ["field-accuracy", "field_accuracy"] }, "fields": { "type": "array", @@ -18422,11 +16508,7 @@ }, "match": { "type": "string", - "enum": [ - "exact", - "numeric_tolerance", - "date" - ] + "enum": ["exact", "numeric_tolerance", "date"] }, "required": { "type": "boolean" @@ -18448,26 +16530,17 @@ } } }, - "required": [ - "path", - "match" - ], + "required": ["path", "match"], "additionalProperties": false }, "minItems": 1 }, "aggregation": { "type": "string", - "enum": [ - "weighted_average", - "all_or_nothing" - ] + "enum": ["weighted_average", "all_or_nothing"] } }, - "required": [ - "type", - "fields" - ], + "required": ["type", "fields"], "additionalProperties": false }, { @@ -18504,10 +16577,7 @@ "minimum": 0 } }, - "required": [ - "type", - "threshold" - ], + "required": ["type", "threshold"], "additionalProperties": false }, { @@ -18544,10 +16614,7 @@ "minimum": 0 } }, - "required": [ - "type", - "budget" - ], + "required": ["type", "budget"], "additionalProperties": false }, { @@ -18577,10 +16644,7 @@ }, "type": { "type": "string", - "enum": [ - "token-usage", - "token_usage" - ] + "enum": ["token-usage", "token_usage"] }, "max_total": { "type": "number", @@ -18595,9 +16659,7 @@ "minimum": 0 } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -18627,10 +16689,7 @@ }, "type": { "type": "string", - "enum": [ - "execution-metrics", - "execution_metrics" - ] + "enum": ["execution-metrics", "execution_metrics"] }, "max_tool_calls": { "type": "number", @@ -18662,9 +16721,7 @@ "minimum": 0 } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -18694,10 +16751,7 @@ }, "type": { "type": "string", - "enum": [ - "agent-judge", - "agent_judge" - ] + "enum": ["agent-judge", "agent_judge"] }, "prompt": { "type": "string" @@ -18751,10 +16805,7 @@ "minLength": 1 } }, - "required": [ - "score_range", - "outcome" - ], + "required": ["score_range", "outcome"], "additionalProperties": false } } @@ -18776,9 +16827,7 @@ "type": "string" } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -18814,10 +16863,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -18853,10 +16899,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -18886,15 +16929,10 @@ }, "type": { "type": "string", - "enum": [ - "is-json", - "is_json" - ] + "enum": ["is-json", "is_json"] } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -18930,10 +16968,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -19014,10 +17049,7 @@ "minLength": 1 } }, - "required": [ - "score_range", - "outcome" - ], + "required": ["score_range", "outcome"], "additionalProperties": false } } @@ -19027,10 +17059,7 @@ "minItems": 1 } }, - "required": [ - "type", - "criteria" - ], + "required": ["type", "criteria"], "additionalProperties": false } ] @@ -19051,11 +17080,7 @@ }, "strategy": { "type": "string", - "enum": [ - "pass_at_k", - "mean", - "confidence_interval" - ] + "enum": ["pass_at_k", "mean", "confidence_interval"] }, "cost_limit_usd": { "type": "number", @@ -19066,9 +17091,7 @@ "minimum": 0 } }, - "required": [ - "count" - ], + "required": ["count"], "additionalProperties": false }, "total_budget_usd": { @@ -20193,10 +18216,7 @@ }, "type": { "type": "string", - "enum": [ - "code-judge", - "code_judge" - ] + "enum": ["code-judge", "code_judge"] }, "command": { "anyOf": [ @@ -20248,10 +18268,7 @@ "additionalProperties": {} } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false }, { @@ -20281,10 +18298,7 @@ }, "type": { "type": "string", - "enum": [ - "llm-judge", - "llm_judge" - ] + "enum": ["llm-judge", "llm_judge"] }, "prompt": { "anyOf": [ @@ -20378,10 +18392,7 @@ "minLength": 1 } }, - "required": [ - "score_range", - "outcome" - ], + "required": ["score_range", "outcome"], "additionalProperties": false } } @@ -20400,9 +18411,7 @@ "additionalProperties": {} } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -20462,9 +18471,7 @@ } } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -20480,10 +18487,7 @@ "maximum": 1 } }, - "required": [ - "type", - "threshold" - ], + "required": ["type", "threshold"], "additionalProperties": false }, { @@ -20500,10 +18504,7 @@ "type": "string" } }, - "required": [ - "type", - "path" - ], + "required": ["type", "path"], "additionalProperties": false }, { @@ -20520,18 +18521,13 @@ "type": "string" } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false } ] } }, - "required": [ - "type", - "aggregator" - ], + "required": ["type", "aggregator"], "additionalProperties": false }, { @@ -20561,20 +18557,11 @@ }, "type": { "type": "string", - "enum": [ - "tool-trajectory", - "tool_trajectory" - ] + "enum": ["tool-trajectory", "tool_trajectory"] }, "mode": { "type": "string", - "enum": [ - "any_order", - "in_order", - "exact", - "subset", - "superset" - ] + "enum": ["any_order", "in_order", "exact", "subset", "superset"] }, "minimums": { "type": "object", @@ -20615,12 +18602,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -20634,12 +18616,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -20650,9 +18627,7 @@ ] } }, - "required": [ - "tool" - ], + "required": ["tool"], "additionalProperties": false } }, @@ -20660,12 +18635,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -20679,12 +18649,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -20695,10 +18660,7 @@ ] } }, - "required": [ - "type", - "mode" - ], + "required": ["type", "mode"], "additionalProperties": false }, { @@ -20728,10 +18690,7 @@ }, "type": { "type": "string", - "enum": [ - "field-accuracy", - "field_accuracy" - ] + "enum": ["field-accuracy", "field_accuracy"] }, "fields": { "type": "array", @@ -20743,11 +18702,7 @@ }, "match": { "type": "string", - "enum": [ - "exact", - "numeric_tolerance", - "date" - ] + "enum": ["exact", "numeric_tolerance", "date"] }, "required": { "type": "boolean" @@ -20769,26 +18724,17 @@ } } }, - "required": [ - "path", - "match" - ], + "required": ["path", "match"], "additionalProperties": false }, "minItems": 1 }, "aggregation": { "type": "string", - "enum": [ - "weighted_average", - "all_or_nothing" - ] + "enum": ["weighted_average", "all_or_nothing"] } }, - "required": [ - "type", - "fields" - ], + "required": ["type", "fields"], "additionalProperties": false }, { @@ -20825,10 +18771,7 @@ "minimum": 0 } }, - "required": [ - "type", - "threshold" - ], + "required": ["type", "threshold"], "additionalProperties": false }, { @@ -20865,10 +18808,7 @@ "minimum": 0 } }, - "required": [ - "type", - "budget" - ], + "required": ["type", "budget"], "additionalProperties": false }, { @@ -20898,10 +18838,7 @@ }, "type": { "type": "string", - "enum": [ - "token-usage", - "token_usage" - ] + "enum": ["token-usage", "token_usage"] }, "max_total": { "type": "number", @@ -20916,9 +18853,7 @@ "minimum": 0 } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -20948,10 +18883,7 @@ }, "type": { "type": "string", - "enum": [ - "execution-metrics", - "execution_metrics" - ] + "enum": ["execution-metrics", "execution_metrics"] }, "max_tool_calls": { "type": "number", @@ -20983,9 +18915,7 @@ "minimum": 0 } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -21015,10 +18945,7 @@ }, "type": { "type": "string", - "enum": [ - "agent-judge", - "agent_judge" - ] + "enum": ["agent-judge", "agent_judge"] }, "prompt": { "type": "string" @@ -21072,10 +18999,7 @@ "minLength": 1 } }, - "required": [ - "score_range", - "outcome" - ], + "required": ["score_range", "outcome"], "additionalProperties": false } } @@ -21097,9 +19021,7 @@ "type": "string" } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -21135,10 +19057,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -21174,10 +19093,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -21207,15 +19123,10 @@ }, "type": { "type": "string", - "enum": [ - "is-json", - "is_json" - ] + "enum": ["is-json", "is_json"] } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -21251,10 +19162,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -21335,10 +19243,7 @@ "minLength": 1 } }, - "required": [ - "score_range", - "outcome" - ], + "required": ["score_range", "outcome"], "additionalProperties": false } } @@ -21348,10 +19253,7 @@ "minItems": 1 } }, - "required": [ - "type", - "criteria" - ], + "required": ["type", "criteria"], "additionalProperties": false } ] @@ -21367,10 +19269,7 @@ }, "isolation": { "type": "string", - "enum": [ - "shared", - "per_test" - ] + "enum": ["shared", "per_test"] }, "repos": { "type": "array", @@ -21394,10 +19293,7 @@ "format": "uri" } }, - "required": [ - "type", - "url" - ], + "required": ["type", "url"], "additionalProperties": false }, { @@ -21411,10 +19307,7 @@ "type": "string" } }, - "required": [ - "type", - "path" - ], + "required": ["type", "path"], "additionalProperties": false } ] @@ -21427,10 +19320,7 @@ }, "resolve": { "type": "string", - "enum": [ - "remote", - "local" - ] + "enum": ["remote", "local"] }, "ancestor": { "type": "integer", @@ -21459,10 +19349,7 @@ "additionalProperties": false } }, - "required": [ - "path", - "source" - ], + "required": ["path", "source"], "additionalProperties": false } }, @@ -21498,11 +19385,7 @@ }, "reset": { "type": "string", - "enum": [ - "none", - "fast", - "strict" - ] + "enum": ["none", "fast", "strict"] } }, "additionalProperties": false @@ -21533,11 +19416,7 @@ }, "reset": { "type": "string", - "enum": [ - "none", - "fast", - "strict" - ] + "enum": ["none", "fast", "strict"] } }, "additionalProperties": false @@ -21568,11 +19447,7 @@ }, "reset": { "type": "string", - "enum": [ - "none", - "fast", - "strict" - ] + "enum": ["none", "fast", "strict"] } }, "additionalProperties": false @@ -21603,11 +19478,7 @@ }, "reset": { "type": "string", - "enum": [ - "none", - "fast", - "strict" - ] + "enum": ["none", "fast", "strict"] } }, "additionalProperties": false @@ -21617,11 +19488,7 @@ }, "mode": { "type": "string", - "enum": [ - "pooled", - "temp", - "static" - ] + "enum": ["pooled", "temp", "static"] }, "path": { "type": "string" @@ -21635,9 +19502,7 @@ ] } }, - "required": [ - "tests" - ], + "required": ["tests"], "additionalProperties": false } } From 50589991680582a855a66fdb8c33dcf29acc8c36 Mon Sep 17 00:00:00 2001 From: Christopher Tso Date: Sat, 14 Mar 2026 21:31:35 +0000 Subject: [PATCH 6/8] docs: use assert instead of evaluators in trigger-judge example comment --- .../features/agent-skills-evals/.agentv/judges/trigger-judge.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/features/agent-skills-evals/.agentv/judges/trigger-judge.ts b/examples/features/agent-skills-evals/.agentv/judges/trigger-judge.ts index dc41e0d10..461a9dd41 100644 --- a/examples/features/agent-skills-evals/.agentv/judges/trigger-judge.ts +++ b/examples/features/agent-skills-evals/.agentv/judges/trigger-judge.ts @@ -3,7 +3,7 @@ * trigger-judge: detects whether the agent invoked a named Claude Code skill. * * Usage in eval YAML: - * evaluators: + * assert: * - type: trigger-judge # discovered from .agentv/judges/ * skill: my-skill-name # passed via config * From 481ab33cb020e95fa9b698682b2553352ce6ae94 Mon Sep 17 00:00:00 2001 From: Christopher Tso Date: Sun, 15 Mar 2026 03:21:18 +0000 Subject: [PATCH 7/8] refactor(judges): align trigger-judge detection with skill-creator run_eval.py --- .../.agentv/judges/trigger-judge.ts | 98 +++++++++++-------- 1 file changed, 59 insertions(+), 39 deletions(-) diff --git a/examples/features/agent-skills-evals/.agentv/judges/trigger-judge.ts b/examples/features/agent-skills-evals/.agentv/judges/trigger-judge.ts index 461a9dd41..f426b87bb 100644 --- a/examples/features/agent-skills-evals/.agentv/judges/trigger-judge.ts +++ b/examples/features/agent-skills-evals/.agentv/judges/trigger-judge.ts @@ -2,68 +2,88 @@ /** * trigger-judge: detects whether the agent invoked a named Claude Code skill. * + * Mirrors the post-hoc fallback detection in skill-creator's run_eval.py: + * - Only the FIRST tool call matters. Any non-Skill/Read tool as the first + * call means the skill was not triggered (mirrors run_eval.py's early-exit). + * - Skill tool: checks input.skill contains the skill name (case-sensitive). + * - Read tool: checks input.file_path contains the skill name (case-sensitive). + * - Supports negative cases via should_trigger: false. + * * Usage in eval YAML: * assert: * - type: trigger-judge # discovered from .agentv/judges/ - * skill: my-skill-name # passed via config + * skill: my-skill-name # required: exact name as installed in .claude/commands/ + * should_trigger: true # optional: expected behaviour (default: true) * - * Checks: - * - Skill tool call where args.skill matches the configured skill name - * - Read tool call loading a file from .claude/commands/ or .claude/skills/ - * whose path contains the skill name + * Positive case (should_trigger: true): passes when skill fires. + * Negative case (should_trigger: false): passes when skill does NOT fire. */ import { defineCodeJudge } from '@agentv/eval'; export default defineCodeJudge(({ output, config }) => { const skillName = config?.skill as string | undefined; + const shouldTrigger = (config?.should_trigger ?? true) as boolean; + if (!skillName) { - return { score: 0, misses: ['config.skill is required'], reasoning: 'No skill name configured' }; + return { + score: 0, + misses: ['config.skill is required'], + reasoning: 'No skill name configured', + }; } - const allToolCalls = (output ?? []).flatMap((msg) => msg.toolCalls ?? []); + // Flatten all tool calls across messages and take only the first one. + // run_eval.py returns false as soon as a non-Skill/Read tool starts, so + // only the first tool call is relevant. + const firstTool = (output ?? []).flatMap((msg) => msg.toolCalls ?? [])[0]; - // Check for Skill tool invocation - const skillTrigger = allToolCalls.find( - (tc) => - tc.tool === 'Skill' && - typeof tc.input === 'object' && - tc.input !== null && - String((tc.input as Record).skill ?? '').toLowerCase().includes(skillName.toLowerCase()), - ); + let triggered = false; + let evidence = ''; - if (skillTrigger) { - return { - score: 1, - hits: [`Skill tool invoked with skill="${(skillTrigger.input as Record).skill}"`], - reasoning: `Agent triggered skill "${skillName}"`, - }; + if (firstTool) { + const input = (firstTool.input ?? {}) as Record; + + if (firstTool.tool === 'Skill') { + const skillArg = String(input.skill ?? ''); + if (skillArg.includes(skillName)) { + triggered = true; + evidence = `Skill tool invoked with skill="${skillArg}"`; + } + } else if (firstTool.tool === 'Read') { + const filePath = String(input.file_path ?? ''); + if (filePath.includes(skillName)) { + triggered = true; + evidence = `Read tool loaded skill file: ${filePath}`; + } + } + // Any other tool as first call: triggered remains false } - // Check for Read tool loading a skill file - const readTrigger = allToolCalls.find((tc) => { - if (tc.tool !== 'Read') return false; - const filePath = String( - (tc.input as Record | null)?.file_path ?? - (tc.input as Record | null)?.path ?? - '', - ).toLowerCase(); - return ( - (filePath.includes('.claude/commands/') || filePath.includes('.claude/skills/')) && - filePath.includes(skillName.toLowerCase()) - ); - }); + const pass = triggered === shouldTrigger; - if (readTrigger) { + if (pass) { return { score: 1, - hits: [`Read tool loaded skill file: ${(readTrigger.input as Record)?.file_path ?? (readTrigger.input as Record)?.path}`], - reasoning: `Agent read skill "${skillName}" definition`, + hits: [ + shouldTrigger + ? evidence || `Skill "${skillName}" triggered as expected` + : `Skill "${skillName}" correctly did not trigger`, + ], + reasoning: shouldTrigger ? 'Skill triggered correctly' : 'No false trigger', }; } return { score: 0, - misses: [`Skill "${skillName}" was not triggered`], - reasoning: `No Skill or Read tool call matched "${skillName}"`, + misses: [ + shouldTrigger + ? firstTool + ? `First tool was "${firstTool.tool}" — not Skill/Read for "${skillName}"` + : `No tool calls recorded` + : evidence || `Skill "${skillName}" triggered unexpectedly`, + ], + reasoning: shouldTrigger + ? `Skill "${skillName}" was not triggered` + : `False trigger: skill fired when it should not have`, }; }); From 5008ce54450a1f9afa70ea181550198ae7a380ff Mon Sep 17 00:00:00 2001 From: Christopher Tso Date: Sun, 15 Mar 2026 03:58:36 +0000 Subject: [PATCH 8/8] =?UTF-8?q?docs(judges):=20update=20trigger-judge=20ex?= =?UTF-8?q?ample=20comment=20assert:=20=E2=86=92=20assertions:?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Align with the global rename from PR #604. --- .../features/agent-skills-evals/.agentv/judges/trigger-judge.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/features/agent-skills-evals/.agentv/judges/trigger-judge.ts b/examples/features/agent-skills-evals/.agentv/judges/trigger-judge.ts index f426b87bb..fb2d4d866 100644 --- a/examples/features/agent-skills-evals/.agentv/judges/trigger-judge.ts +++ b/examples/features/agent-skills-evals/.agentv/judges/trigger-judge.ts @@ -10,7 +10,7 @@ * - Supports negative cases via should_trigger: false. * * Usage in eval YAML: - * assert: + * assertions: * - type: trigger-judge # discovered from .agentv/judges/ * skill: my-skill-name # required: exact name as installed in .claude/commands/ * should_trigger: true # optional: expected behaviour (default: true)