diff --git a/docs/config/models.mdx b/docs/config/models.mdx index e46b083c65..0a2e0d431e 100644 --- a/docs/config/models.mdx +++ b/docs/config/models.mdx @@ -11,23 +11,25 @@ Mux ships with curated models kept up to date with the frontier. Use any custom {/* BEGIN KNOWN_MODELS_TABLE */} -| Model | ID | Aliases | Default | -| ---------------------- | ----------------------------- | ---------------------------------------- | ------- | -| Opus 4.7 | anthropic:claude-opus-4-7 | `opus` | ✓ | -| Sonnet 4.6 | anthropic:claude-sonnet-4-6 | `sonnet` | | -| Haiku 4.5 | anthropic:claude-haiku-4-5 | `haiku` | | -| GPT-5.5 | openai:gpt-5.5 | `gpt`, `gpt-5.5` | | -| GPT-5.5 Pro | openai:gpt-5.5-pro | `gpt-pro`, `gpt-5.5-pro` | | -| GPT-5.4 Mini | openai:gpt-5.4-mini | `gpt-mini` | | -| GPT-5.4 Nano | openai:gpt-5.4-nano | `gpt-nano` | | -| Codex 5.3 | openai:gpt-5.3-codex | `codex`, `codex-5.3` | | -| Spark 5.3 | openai:gpt-5.3-codex-spark | `spark` | | -| Codex Mini 5.1 | openai:gpt-5.1-codex-mini | `codex-mini` | | -| Codex Max 5.1 | openai:gpt-5.1-codex-max | `codex-max` | | -| Gemini 3.1 Pro Preview | google:gemini-3.1-pro-preview | `gemini`, `gemini-pro` | | -| Gemini 3 Flash Preview | google:gemini-3-flash-preview | `gemini-flash` | | -| Grok 4 1 Fast | xai:grok-4-1-fast | `grok`, `grok-4`, `grok-4.1`, `grok-4-1` | | -| Grok Code Fast 1 | xai:grok-code-fast-1 | `grok-code` | | +| Model | ID | Aliases | Default | +| ---------------------- | ----------------------------- | ------------------------------------------------------------ | ------- | +| Opus 4.7 | anthropic:claude-opus-4-7 | `opus` | ✓ | +| Sonnet 4.6 | anthropic:claude-sonnet-4-6 | `sonnet` | | +| Haiku 4.5 | anthropic:claude-haiku-4-5 | `haiku` | | +| GPT-5.5 | openai:gpt-5.5 | `gpt`, `gpt-5.5` | | +| GPT-5.5 Pro | openai:gpt-5.5-pro | `gpt-pro`, `gpt-5.5-pro` | | +| GPT-5.4 Mini | openai:gpt-5.4-mini | `gpt-mini` | | +| GPT-5.4 Nano | openai:gpt-5.4-nano | `gpt-nano` | | +| Codex 5.3 | openai:gpt-5.3-codex | `codex`, `codex-5.3` | | +| Spark 5.3 | openai:gpt-5.3-codex-spark | `spark` | | +| Codex Mini 5.1 | openai:gpt-5.1-codex-mini | `codex-mini` | | +| Codex Max 5.1 | openai:gpt-5.1-codex-max | `codex-max` | | +| Gemini 3.1 Pro Preview | google:gemini-3.1-pro-preview | `gemini`, `gemini-pro` | | +| Gemini 3 Flash Preview | google:gemini-3-flash-preview | `gemini-flash` | | +| Grok 4 1 Fast | xai:grok-4-1-fast | `grok`, `grok-4`, `grok-4.1`, `grok-4-1` | | +| Grok Code Fast 1 | xai:grok-code-fast-1 | `grok-code` | | +| DeepSeek V4 Pro | deepseek:deepseek-v4-pro | `deepseek`, `deepseek-pro`, `deepseek-v4`, `deepseek-v4-pro` | | +| DeepSeek V4 Flash | deepseek:deepseek-v4-flash | `deepseek-flash`, `deepseek-v4-flash` | | {/* END KNOWN_MODELS_TABLE */} diff --git a/src/common/constants/knownModels.ts b/src/common/constants/knownModels.ts index 7d203a5768..cdcb2e3c1a 100644 --- a/src/common/constants/knownModels.ts +++ b/src/common/constants/knownModels.ts @@ -4,7 +4,7 @@ import { formatModelDisplayName } from "../utils/ai/modelDisplay"; -type ModelProvider = "anthropic" | "openai" | "google" | "xai"; +type ModelProvider = "anthropic" | "openai" | "google" | "xai" | "deepseek"; interface KnownModelDefinition { /** Provider identifier used by SDK factories */ @@ -131,6 +131,27 @@ const MODEL_DEFINITIONS = { providerModelId: "grok-code-fast-1", aliases: ["grok-code"], }, + // DeepSeek V4 Pro is the flagship V4 tier (1.6T total / 49B active params, 1M context, + // 384K max output). Bare `deepseek` alias points here per the convention that the + // shortest alias tracks each provider's flagship model (mirrors `gemini` → Gemini Pro, + // `grok` → Grok 4.1). + DEEPSEEK_V4_PRO: { + provider: "deepseek", + providerModelId: "deepseek-v4-pro", + aliases: ["deepseek", "deepseek-pro", "deepseek-v4", "deepseek-v4-pro"], + // V4 ships a custom `encoding_dsv4` tokenizer that isn't published upstream yet; + // reuse v3.1 (the latest available DeepSeek tokenizer in ai-tokenizer) for + // approximate token counting until V4 weights land in the registry. + tokenizerOverride: "deepseek/deepseek-v3.1", + }, + // DeepSeek V4 Flash is the fast/economical V4 tier (284B total / 13B active params). + // Same 1M context + 384K output as Pro; lower cost, smaller scale. + DEEPSEEK_V4_FLASH: { + provider: "deepseek", + providerModelId: "deepseek-v4-flash", + aliases: ["deepseek-flash", "deepseek-v4-flash"], + tokenizerOverride: "deepseek/deepseek-v3.1", + }, } as const satisfies Record; export type KnownModelKey = keyof typeof MODEL_DEFINITIONS; diff --git a/src/common/utils/ai/modelDisplay.test.ts b/src/common/utils/ai/modelDisplay.test.ts index 352004695a..12ddd780be 100644 --- a/src/common/utils/ai/modelDisplay.test.ts +++ b/src/common/utils/ai/modelDisplay.test.ts @@ -49,6 +49,30 @@ describe("formatModelDisplayName", () => { }); }); + describe("DeepSeek models", () => { + test("preserves DeepSeek camel-case branding and uppercases version tags", () => { + expect(formatModelDisplayName("deepseek-v4-pro")).toBe("DeepSeek V4 Pro"); + expect(formatModelDisplayName("deepseek-v4-flash")).toBe("DeepSeek V4 Flash"); + expect(formatModelDisplayName("deepseek-r1")).toBe("DeepSeek R1"); + expect(formatModelDisplayName("deepseek-chat")).toBe("DeepSeek Chat"); + }); + + test("strips provider prefix when DeepSeek model is gateway-scoped", () => { + // OpenRouter exposes the same models under "deepseek/deepseek-v4-pro"; the + // existing slash-stripping branch should route through the DeepSeek handler. + expect(formatModelDisplayName("deepseek/deepseek-v4-pro")).toBe("DeepSeek V4 Pro"); + }); + + test("colon-suffixed Ollama IDs preserve DeepSeek branding and size", () => { + // Locally-pulled DeepSeek models use Ollama tags like "deepseek-r1:8b". + // Both the DeepSeek brand casing and the parenthesized size suffix must + // be preserved; the generic digit-split formatter would otherwise render + // "Deepseek-r 1 (8B)". + expect(formatModelDisplayName("deepseek-r1:8b")).toBe("DeepSeek R1 (8B)"); + expect(formatModelDisplayName("deepseek-coder:6.7b")).toBe("DeepSeek Coder (6.7B)"); + }); + }); + describe("Ollama models", () => { test("formats Llama models with size", () => { expect(formatModelDisplayName("llama3.2:7b")).toBe("Llama 3.2 (7B)"); diff --git a/src/common/utils/ai/modelDisplay.ts b/src/common/utils/ai/modelDisplay.ts index 8ad9d3f9ab..216176ec5f 100644 --- a/src/common/utils/ai/modelDisplay.ts +++ b/src/common/utils/ai/modelDisplay.ts @@ -143,10 +143,38 @@ export function formatModelDisplayName(modelName: string): string { } } + // DeepSeek models - keep camel-cased "DeepSeek" branding and uppercase the + // version segment (e.g. "v4-pro" -> "V4 Pro") since "Deepseek V4 Pro" mis-cases + // the brand name. + // + // Skip when the name carries a colon-suffixed size tag like "deepseek-r1:8b" — + // those are Ollama-style local model IDs and must fall through to the colon-size + // handler below so the size renders as "(8B)" rather than being concatenated + // verbatim. + if (lower.startsWith("deepseek-") && !modelName.includes(":")) { + const parts = lower.replace("deepseek-", "").split("-"); + const formatted = parts + .map((part) => { + // Uppercase short tokens that look like a version tag (e.g. "v4", "r1"). + if (/^[a-z]\d+(?:\.\d+)?$/.test(part)) return part.toUpperCase(); + return capitalize(part); + }) + .join(" "); + return formatted ? `DeepSeek ${formatted}` : "DeepSeek"; + } + // Ollama models - handle format like "llama3.2:7b" or "codellama:13b" // Split by colon to handle quantization/size suffix const [baseName, size] = modelName.split(":"); if (size) { + // DeepSeek IDs published as Ollama tags (e.g. "deepseek-r1:8b") need to + // preserve the DeepSeek brand casing before the size suffix is appended. + // Recurse into the formatter for the colon-stripped base so the DeepSeek + // branch above produces "DeepSeek R1", then append "(8B)". Without this, + // the generic digit-split below would render "Deepseek-r 1 (8B)". + if (baseName.toLowerCase().startsWith("deepseek-")) { + return `${formatModelDisplayName(baseName)} (${size.toUpperCase()})`; + } // "llama3.2:7b" -> "Llama 3.2 (7B)" // "codellama:13b" -> "Codellama (13B)" const formatted = baseName diff --git a/src/common/utils/tokens/modelStats.test.ts b/src/common/utils/tokens/modelStats.test.ts index b1b85b5266..6ca42ff14c 100644 --- a/src/common/utils/tokens/modelStats.test.ts +++ b/src/common/utils/tokens/modelStats.test.ts @@ -76,6 +76,25 @@ describe("getModelStats", () => { expect(uncached.cache_read_input_token_cost).toBeUndefined(); }); + test("resolves DeepSeek V4 pricing and limits via direct and gateway forms", () => { + // Direct provider id wires up to the modelsExtra entry. + const pro = expectStats("deepseek:deepseek-v4-pro"); + expect(pro.max_input_tokens).toBe(1_000_000); + expect(pro.max_output_tokens).toBe(384_000); + expect(pro.input_cost_per_token).toBe(0.00000174); + expect(pro.output_cost_per_token).toBe(0.00000348); + expect(pro.cache_read_input_token_cost).toBe(0.000000174); + + // OpenRouter routes "deepseek/deepseek-v4-pro" back to the direct DeepSeek + // entry via normalizeToCanonical, so pricing must match the direct lookup. + expect(expectStats("openrouter:deepseek/deepseek-v4-pro")).toEqual(pro); + + const flash = expectStats("deepseek:deepseek-v4-flash"); + expect(flash.input_cost_per_token).toBe(0.00000014); + expect(flash.output_cost_per_token).toBe(0.00000028); + expect(flash.cache_read_input_token_cost).toBe(0.000000014); + }); + test("returns null for unknown models across direct and gateway forms", () => { expect(getModelStats("unknown:fake-model-9000")).toBeNull(); expect(getModelStats("ollama:this-model-does-not-exist")).toBeNull(); diff --git a/src/common/utils/tokens/models-extra.ts b/src/common/utils/tokens/models-extra.ts index 192fa8e8e7..fafed74964 100644 --- a/src/common/utils/tokens/models-extra.ts +++ b/src/common/utils/tokens/models-extra.ts @@ -342,4 +342,41 @@ export const modelsExtra: Record = { supports_response_schema: true, supported_endpoints: ["/v1/responses"], }, + + // DeepSeek V4 Pro - Released April 24, 2026 (Preview) + // 1.6T total / 49B active MoE params; 1M context, 384K max output. + // Standard pricing: $1.74/M input, $3.48/M output (full price; an introductory 75% + // discount runs through 2026/05/05 but we record the post-discount baseline so + // billing/forecasts don't silently regress when the promo ends). + // Cache-hit input pricing is documented at 1/10 of input price. + "deepseek-v4-pro": { + max_input_tokens: 1000000, + max_output_tokens: 384000, + input_cost_per_token: 0.00000174, // $1.74 per million input tokens + output_cost_per_token: 0.00000348, // $3.48 per million output tokens + cache_read_input_token_cost: 0.000000174, // 1/10 of input price + litellm_provider: "deepseek", + mode: "chat", + supports_function_calling: true, + supports_reasoning: true, + supports_response_schema: true, + }, + + // DeepSeek V4 Flash - Released April 24, 2026 (Preview) + // 284B total / 13B active MoE params; 1M context, 384K max output. + // Pricing: $0.14/M input, $0.28/M output. Cache-hit input is 1/10 of input price. + // Legacy `deepseek-chat` (non-thinking) and `deepseek-reasoner` (thinking) currently + // route to V4-Flash compatibility modes and retire 2026-07-24. + "deepseek-v4-flash": { + max_input_tokens: 1000000, + max_output_tokens: 384000, + input_cost_per_token: 0.00000014, // $0.14 per million input tokens + output_cost_per_token: 0.00000028, // $0.28 per million output tokens + cache_read_input_token_cost: 0.000000014, // 1/10 of input price + litellm_provider: "deepseek", + mode: "chat", + supports_function_calling: true, + supports_reasoning: true, + supports_response_schema: true, + }, }; diff --git a/src/node/services/agentSkills/builtInSkillContent.generated.ts b/src/node/services/agentSkills/builtInSkillContent.generated.ts index d296aedd2f..187e714652 100644 --- a/src/node/services/agentSkills/builtInSkillContent.generated.ts +++ b/src/node/services/agentSkills/builtInSkillContent.generated.ts @@ -2043,23 +2043,25 @@ export const BUILTIN_SKILL_FILES: Record> = { "", "{/* BEGIN KNOWN_MODELS_TABLE */}", "", - "| Model | ID | Aliases | Default |", - "| ---------------------- | ----------------------------- | ---------------------------------------- | ------- |", - "| Opus 4.7 | anthropic:claude-opus-4-7 | `opus` | ✓ |", - "| Sonnet 4.6 | anthropic:claude-sonnet-4-6 | `sonnet` | |", - "| Haiku 4.5 | anthropic:claude-haiku-4-5 | `haiku` | |", - "| GPT-5.5 | openai:gpt-5.5 | `gpt`, `gpt-5.5` | |", - "| GPT-5.5 Pro | openai:gpt-5.5-pro | `gpt-pro`, `gpt-5.5-pro` | |", - "| GPT-5.4 Mini | openai:gpt-5.4-mini | `gpt-mini` | |", - "| GPT-5.4 Nano | openai:gpt-5.4-nano | `gpt-nano` | |", - "| Codex 5.3 | openai:gpt-5.3-codex | `codex`, `codex-5.3` | |", - "| Spark 5.3 | openai:gpt-5.3-codex-spark | `spark` | |", - "| Codex Mini 5.1 | openai:gpt-5.1-codex-mini | `codex-mini` | |", - "| Codex Max 5.1 | openai:gpt-5.1-codex-max | `codex-max` | |", - "| Gemini 3.1 Pro Preview | google:gemini-3.1-pro-preview | `gemini`, `gemini-pro` | |", - "| Gemini 3 Flash Preview | google:gemini-3-flash-preview | `gemini-flash` | |", - "| Grok 4 1 Fast | xai:grok-4-1-fast | `grok`, `grok-4`, `grok-4.1`, `grok-4-1` | |", - "| Grok Code Fast 1 | xai:grok-code-fast-1 | `grok-code` | |", + "| Model | ID | Aliases | Default |", + "| ---------------------- | ----------------------------- | ------------------------------------------------------------ | ------- |", + "| Opus 4.7 | anthropic:claude-opus-4-7 | `opus` | ✓ |", + "| Sonnet 4.6 | anthropic:claude-sonnet-4-6 | `sonnet` | |", + "| Haiku 4.5 | anthropic:claude-haiku-4-5 | `haiku` | |", + "| GPT-5.5 | openai:gpt-5.5 | `gpt`, `gpt-5.5` | |", + "| GPT-5.5 Pro | openai:gpt-5.5-pro | `gpt-pro`, `gpt-5.5-pro` | |", + "| GPT-5.4 Mini | openai:gpt-5.4-mini | `gpt-mini` | |", + "| GPT-5.4 Nano | openai:gpt-5.4-nano | `gpt-nano` | |", + "| Codex 5.3 | openai:gpt-5.3-codex | `codex`, `codex-5.3` | |", + "| Spark 5.3 | openai:gpt-5.3-codex-spark | `spark` | |", + "| Codex Mini 5.1 | openai:gpt-5.1-codex-mini | `codex-mini` | |", + "| Codex Max 5.1 | openai:gpt-5.1-codex-max | `codex-max` | |", + "| Gemini 3.1 Pro Preview | google:gemini-3.1-pro-preview | `gemini`, `gemini-pro` | |", + "| Gemini 3 Flash Preview | google:gemini-3-flash-preview | `gemini-flash` | |", + "| Grok 4 1 Fast | xai:grok-4-1-fast | `grok`, `grok-4`, `grok-4.1`, `grok-4-1` | |", + "| Grok Code Fast 1 | xai:grok-code-fast-1 | `grok-code` | |", + "| DeepSeek V4 Pro | deepseek:deepseek-v4-pro | `deepseek`, `deepseek-pro`, `deepseek-v4`, `deepseek-v4-pro` | |", + "| DeepSeek V4 Flash | deepseek:deepseek-v4-flash | `deepseek-flash`, `deepseek-v4-flash` | |", "", "{/* END KNOWN_MODELS_TABLE */}", "",