Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 0 additions & 10 deletions bun.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 1 addition & 3 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
},
"dependencies": {
"@anthropic-ai/claude-agent-sdk": "^0.2.77",
"@anthropic-ai/sdk": "^0.80.0",
"@modelcontextprotocol/sdk": "^1.28.0",
"@slack/bolt": "^4.6.0",
"croner": "^10.0.1",
Expand All @@ -26,8 +25,7 @@
"resend": "^6.9.4",
"telegraf": "^4.16.3",
"yaml": "^2.6.0",
"zod": "^3.24.0",
"zod-to-json-schema": "^3.25.1"
"zod": "^3.24.0"
},
"devDependencies": {
"@biomejs/biome": "^1.9.0",
Expand Down
95 changes: 95 additions & 0 deletions src/agent/__tests__/judge-query.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
import { describe, expect, test } from "bun:test";
import { z } from "zod/v4";
import { parseJsonFromResponse } from "../judge-query.ts";

// parseJsonFromResponse is the shape-normalization layer for judge subprocess output.
// Models sometimes return markdown fences, leading prose, or trailing whitespace even
// when asked for raw JSON. These tests lock in the tolerance window: we accept the
// well-formed common cases and reject anything that cannot be safely parsed.

const Schema = z.object({
verdict: z.enum(["pass", "fail"]),
confidence: z.number().min(0).max(1),
reasoning: z.string(),
});

describe("parseJsonFromResponse", () => {
test("parses raw JSON object", () => {
const text = '{"verdict":"pass","confidence":0.95,"reasoning":"Looks clean."}';
const result = parseJsonFromResponse(text, Schema);
expect(result.verdict).toBe("pass");
expect(result.confidence).toBe(0.95);
});

test("parses JSON wrapped in markdown json code fence", () => {
const text = '```json\n{"verdict":"fail","confidence":0.8,"reasoning":"Issue detected."}\n```';
const result = parseJsonFromResponse(text, Schema);
expect(result.verdict).toBe("fail");
expect(result.reasoning).toBe("Issue detected.");
});

test("parses JSON wrapped in plain markdown code fence", () => {
const text = '```\n{"verdict":"pass","confidence":1,"reasoning":"ok"}\n```';
const result = parseJsonFromResponse(text, Schema);
expect(result.verdict).toBe("pass");
});

test("handles leading/trailing whitespace", () => {
const text = '\n\n {"verdict":"pass","confidence":0.5,"reasoning":"fine"} \n';
const result = parseJsonFromResponse(text, Schema);
expect(result.verdict).toBe("pass");
});

test("recovers JSON from surrounding prose via brace scan", () => {
const text = 'Here is my analysis: {"verdict":"fail","confidence":0.72,"reasoning":"Unsafe pattern"}. Thank you.';
const result = parseJsonFromResponse(text, Schema);
expect(result.verdict).toBe("fail");
expect(result.confidence).toBe(0.72);
});

test("throws a clear error on empty response", () => {
expect(() => parseJsonFromResponse("", Schema)).toThrow(/empty/i);
expect(() => parseJsonFromResponse(" \n\n ", Schema)).toThrow(/empty/i);
});

test("throws on text with no JSON object at all", () => {
expect(() => parseJsonFromResponse("I cannot comply with this request.", Schema)).toThrow(/non-JSON|invalid/i);
});

test("throws on malformed JSON", () => {
const text = '{"verdict":"pass", "confidence":';
expect(() => parseJsonFromResponse(text, Schema)).toThrow(/invalid JSON|non-JSON/i);
});

test("throws on JSON that violates the schema", () => {
const text = '{"verdict":"maybe","confidence":0.9,"reasoning":"..."}';
expect(() => parseJsonFromResponse(text, Schema)).toThrow(/schema validation/i);
});

test("throws on JSON missing required fields", () => {
const text = '{"verdict":"pass"}';
expect(() => parseJsonFromResponse(text, Schema)).toThrow(/schema validation/i);
});

test("throws on confidence out of range", () => {
const text = '{"verdict":"pass","confidence":1.5,"reasoning":"over"}';
expect(() => parseJsonFromResponse(text, Schema)).toThrow(/schema validation/i);
});

test("error message includes truncated response for debugging", () => {
const text = "not json at all, just prose with no object";
expect(() => parseJsonFromResponse(text, Schema)).toThrow(/not json/i);
});

test("parses nested structures", () => {
const Nested = z.object({
flags: z.array(z.object({ category: z.string(), severity: z.enum(["critical", "warning", "info"]) })),
verdict: z.enum(["pass", "fail"]),
});
const text = '```json\n{"flags":[{"category":"safety","severity":"critical"}],"verdict":"fail"}\n```';
const result = parseJsonFromResponse(text, Nested);
expect(result.flags).toHaveLength(1);
expect(result.flags[0].severity).toBe("critical");
expect(result.verdict).toBe("fail");
});
});
206 changes: 206 additions & 0 deletions src/agent/judge-query.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,206 @@
import { query } from "@anthropic-ai/claude-agent-sdk";
import { z } from "zod/v4";
import type { PhantomConfig } from "../config/types.ts";
import { extractTextFromMessage } from "./message-utils.ts";

// Judge subprocess integration. Routes LLM judge calls through the same
// Agent SDK `query()` subprocess as the main agent so that auth, provider,
// and base URL flow through a single path. The older raw Anthropic SDK
// integration (`client.messages.parse`) is gone; structured output is now
// produced by prompt instruction + JSON.parse + Zod validation.

export type JudgeQueryOptions<T> = {
systemPrompt: string;
userMessage: string;
schema: z.ZodType<T>;
model?: string;
maxTokens?: number;
};

export type JudgeQueryResult<T> = {
verdict: "pass" | "fail";
confidence: number;
reasoning: string;
data: T;
model: string;
inputTokens: number;
outputTokens: number;
costUsd: number;
durationMs: number;
};

// Minimum permissive schema shape so we can surface verdict/confidence/reasoning
// on the envelope when the concrete schema opts into those fields.
type JudgeEnvelopeFields = {
verdict?: "pass" | "fail";
confidence?: number;
reasoning?: string;
overall_reasoning?: string;
};

const JSON_BLOCK = /^```(?:json)?\s*\n?/;
const TRAILING_BLOCK = /\n?```\s*$/;

/**
* Parse and validate a JSON response returned by a judge subprocess.
*
* Handles three common model output shapes:
* 1. Raw JSON object (preferred, matches the prompt instruction)
* 2. JSON wrapped in a ```json ... ``` code fence
* 3. Prose around a JSON object, recovered by taking the substring from the
* first `{` to the last `}`
*
* Any remaining format noise causes a clear error. Zod validation catches
* structural mismatches. No silent fallback to partial data.
*/
export function parseJsonFromResponse<T>(text: string, schema: z.ZodType<T>): T {
if (!text || text.trim().length === 0) {
throw new Error("Judge returned empty response");
}

let cleaned = text.trim();
if (cleaned.startsWith("```")) {
cleaned = cleaned.replace(JSON_BLOCK, "").replace(TRAILING_BLOCK, "").trim();
}

let raw: unknown;
try {
raw = JSON.parse(cleaned);
} catch {
// Second chance: find the outermost JSON object in the text.
// Useful when a model prepends/appends commentary despite the prompt.
const firstBrace = cleaned.indexOf("{");
const lastBrace = cleaned.lastIndexOf("}");
if (firstBrace === -1 || lastBrace === -1 || lastBrace <= firstBrace) {
throw new Error(`Judge returned non-JSON response: ${truncate(text, 200)}`);
}
try {
raw = JSON.parse(cleaned.slice(firstBrace, lastBrace + 1));
} catch (err) {
const msg = err instanceof Error ? err.message : String(err);
throw new Error(`Judge returned invalid JSON: ${msg}. Response: ${truncate(text, 200)}`);
}
}

const result = schema.safeParse(raw);
if (!result.success) {
throw new Error(`Judge output failed schema validation: ${formatZodError(result.error)}`);
}
return result.data;
}

/**
* Run a focused evaluation query through the Agent SDK subprocess.
*
* The judge prompt is assembled from the caller's system prompt plus a JSON
* schema contract. `maxTurns: 1` and `effort: "low"` keep judge latency and
* cost bounded; MCP servers, hooks, and session persistence are all disabled
* because judges are stateless evaluators, not interactive agents.
*/
export async function runJudgeQuery<T>(
config: PhantomConfig,
options: JudgeQueryOptions<T>,
): Promise<JudgeQueryResult<T>> {
const startTime = Date.now();
const resolvedModel = options.model ?? config.judge_model ?? config.model;
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Honor judge_model override in judge selection

runJudgeQuery resolves the model as options.model ?? config.judge_model ?? config.model, but every judge wrapper still passes a hard-coded model into callJudge (for example, the Sonnet/Haiku constants in the judge modules), so config.judge_model is never reached in practice. Operators who set judge_model expecting to shift judge traffic to a cheaper/faster tier will see no behavior change and continue paying for the hard-coded models.

Useful? React with 👍 / 👎.


const schemaJson = z.toJSONSchema(options.schema);
const judgePrompt = buildJudgePrompt(options.systemPrompt, schemaJson);

const queryStream = query({
prompt: options.userMessage,
options: {
model: resolvedModel,
permissionMode: "bypassPermissions",
allowDangerouslySkipPermissions: true,
systemPrompt: {
type: "preset" as const,
preset: "claude_code" as const,
append: judgePrompt,
},
maxTurns: 1,
effort: "low",
persistSession: false,
Comment on lines +121 to +123
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Apply maxTokens when issuing judge subprocess queries

The new judge path keeps maxTokens in the public options shape and forwards it from callJudge, but runJudgeQuery never includes that value in the SDK query() options. This silently drops token caps that previously bounded judge responses, which can increase latency/cost or make long judge outputs fail unpredictably when callers rely on that limit.

Useful? React with 👍 / 👎.

},
});

let responseText = "";
let inputTokens = 0;
let outputTokens = 0;
let resultCostUsd = 0;
let errored: string | null = null;

for await (const message of queryStream) {
switch (message.type) {
case "assistant": {
const content = extractTextFromMessage(message.message);
if (content) responseText = content;
break;
}
case "result": {
const msg = message as {
subtype: string;
result?: string;
total_cost_usd?: number;
usage?: { input_tokens?: number; output_tokens?: number };
};
if (msg.subtype === "success" && msg.result) {
responseText = msg.result;
}
if (msg.subtype !== "success") {
errored = msg.subtype;
}
inputTokens = msg.usage?.input_tokens ?? 0;
outputTokens = msg.usage?.output_tokens ?? 0;
resultCostUsd = msg.total_cost_usd ?? 0;
break;
}
}
}

if (errored) {
throw new Error(`Judge subprocess ended with ${errored}`);
}

const parsed = parseJsonFromResponse<T>(responseText, options.schema);
const envelope = parsed as T & JudgeEnvelopeFields;

return {
verdict: envelope.verdict ?? "pass",
confidence: typeof envelope.confidence === "number" ? envelope.confidence : 1.0,
reasoning: envelope.reasoning ?? envelope.overall_reasoning ?? "",
data: parsed,
model: resolvedModel,
inputTokens,
outputTokens,
costUsd: resultCostUsd,
durationMs: Date.now() - startTime,
};
}

function buildJudgePrompt(systemPrompt: string, schemaJson: unknown): string {
return [
systemPrompt,
"",
"You MUST respond with ONLY a JSON object that conforms to the schema below.",
"Do not include markdown code fences, prose, explanations, or any text outside the JSON object.",
"The first character of your response must be `{` and the last must be `}`.",
"",
"Schema:",
JSON.stringify(schemaJson, null, 2),
].join("\n");
}

function formatZodError(error: z.ZodError): string {
const issues = error.issues.slice(0, 3).map((issue) => {
const path = issue.path.length > 0 ? issue.path.join(".") : "(root)";
return `${path}: ${issue.message}`;
});
const suffix = error.issues.length > 3 ? ` (+${error.issues.length - 3} more)` : "";
return `${issues.join("; ")}${suffix}`;
}

function truncate(text: string, max: number): string {
if (text.length <= max) return text;
return `${text.slice(0, max)}...`;
}
Loading
Loading