diff --git a/apps/server/package.json b/apps/server/package.json index c63836d95d5..87e98ada112 100644 --- a/apps/server/package.json +++ b/apps/server/package.json @@ -25,7 +25,7 @@ "typecheck": "tsc --noEmit", "test": "vitest run", "test:process-reaper": "vitest run src/server.test.ts src/provider/Layers/ClaudeAdapter.test.ts src/provider/Layers/ProviderSessionDirectory.test.ts src/provider/Layers/ProviderSessionReaper.test.ts src/provider/Layers/CodexAdapter.test.ts", - "test:perf": "vitest run integration/perf" + "test:perf": "VITEST_PERF=1 vitest run integration/perf" }, "dependencies": { "@anthropic-ai/claude-agent-sdk": "^0.2.111", diff --git a/apps/server/src/provider/Layers/ClaudeAdapter.ts b/apps/server/src/provider/Layers/ClaudeAdapter.ts index 7ea4b6be2b4..9bf918aaebe 100644 --- a/apps/server/src/provider/Layers/ClaudeAdapter.ts +++ b/apps/server/src/provider/Layers/ClaudeAdapter.ts @@ -42,12 +42,12 @@ import { TurnId, type UserInputQuestion, ClaudeCodeEffort, + type ModelCapabilities, RuntimeMode, } from "@t3tools/contracts"; import { applyClaudePromptEffortPrefix, resolveApiModelId, - resolveEffort, trimOrNull, } from "@t3tools/shared/model"; import { @@ -222,12 +222,36 @@ function isSyntheticClaudeThreadId(value: string): boolean { } function getEffectiveClaudeCodeEffort( - effort: ClaudeCodeEffort | null | undefined, + caps: ModelCapabilities, + rawEffort: string | null | undefined, ): Exclude | null { - if (!effort) { + const promptInjected = new Set(caps.promptInjectedEffortLevels); + const supportedNonPromptLevels = caps.reasoningEffortLevels + .map((level) => level.value) + .filter((value) => !promptInjected.has(value)); + + const trimmed = trimOrNull(rawEffort); + + if (trimmed && supportedNonPromptLevels.includes(trimmed)) { + return trimmed as Exclude; + } + + if (!trimmed) { + const defaultValue = caps.reasoningEffortLevels.find((level) => level.isDefault)?.value; + return defaultValue && !promptInjected.has(defaultValue) + ? (defaultValue as Exclude) + : null; + } + + // Raw effort is unsupported (e.g. "max" on Sonnet 4.6) or prompt-injected + // (e.g. "ultrathink"); cap to the highest supported non-prompt-injected level. + if (supportedNonPromptLevels.length === 0) { return null; } - return effort === "ultrathink" ? null : effort; + return supportedNonPromptLevels[supportedNonPromptLevels.length - 1] as Exclude< + ClaudeCodeEffort, + "ultrathink" + >; } function isClaudeInterruptedMessage(message: string): boolean { @@ -2927,14 +2951,12 @@ const makeClaudeAdapter = Effect.fn("makeClaudeAdapter")(function* ( input.modelSelection?.provider === "claudeAgent" ? input.modelSelection : undefined; const caps = getClaudeModelCapabilities(modelSelection?.model); const apiModelId = modelSelection ? resolveApiModelId(modelSelection) : undefined; - const effort = (resolveEffort(caps, modelSelection?.options?.effort) ?? - null) as ClaudeCodeEffort | null; const fastMode = modelSelection?.options?.fastMode === true && caps.supportsFastMode; const thinking = typeof modelSelection?.options?.thinking === "boolean" && caps.supportsThinkingToggle ? modelSelection.options.thinking : undefined; - const effectiveEffort = getEffectiveClaudeCodeEffort(effort); + const effectiveEffort = getEffectiveClaudeCodeEffort(caps, modelSelection?.options?.effort); const runtimeModeToPermission: Record = { "approval-required": "default", "auto-accept-edits": "acceptEdits", diff --git a/apps/server/vitest.config.ts b/apps/server/vitest.config.ts index 1c5b2f0d38d..2df4e464fd3 100644 --- a/apps/server/vitest.config.ts +++ b/apps/server/vitest.config.ts @@ -1,7 +1,11 @@ -import { defineConfig, mergeConfig } from "vitest/config"; +import { configDefaults, defineConfig, mergeConfig } from "vitest/config"; import baseConfig from "../../vitest.config"; +// Perf benchmarks are wall-clock sensitive and unreliable on shared CI hardware. +// `bun run test:perf` opts back in by setting VITEST_PERF=1. +const includePerf = process.env.VITEST_PERF === "1"; + export default mergeConfig( baseConfig, defineConfig({ @@ -13,6 +17,9 @@ export default mergeConfig( // Under package-wide parallel runs they regularly exceed the default 15s budget. testTimeout: 60_000, hookTimeout: 60_000, + exclude: includePerf + ? [...configDefaults.exclude] + : [...configDefaults.exclude, "integration/perf/**"], }, }), );