From 7b91c16ca552e76c979da74fd98cbb180d23d04f Mon Sep 17 00:00:00 2001 From: Ammar Date: Tue, 5 May 2026 13:10:10 -0500 Subject: [PATCH 1/3] feat: first-class DeepSeek V4 support - Add deepseek to ModelProvider union - Add DEEPSEEK_V4_PRO + DEEPSEEK_V4_FLASH to knownModels with aliases - Add deepseek-v4-pro / deepseek-v4-flash entries to models-extra.ts - Add DeepSeek branch to formatModelDisplayName for proper branding - Regenerate docs/config/models.mdx --- docs/config/models.mdx | 36 +++++++++--------- src/common/constants/knownModels.ts | 23 +++++++++++- src/common/utils/ai/modelDisplay.test.ts | 15 ++++++++ src/common/utils/ai/modelDisplay.ts | 15 ++++++++ src/common/utils/tokens/models-extra.ts | 37 +++++++++++++++++++ .../builtInSkillContent.generated.ts | 36 +++++++++--------- 6 files changed, 127 insertions(+), 35 deletions(-) diff --git a/docs/config/models.mdx b/docs/config/models.mdx index e46b083c65..0a2e0d431e 100644 --- a/docs/config/models.mdx +++ b/docs/config/models.mdx @@ -11,23 +11,25 @@ Mux ships with curated models kept up to date with the frontier. Use any custom {/* BEGIN KNOWN_MODELS_TABLE */} -| Model | ID | Aliases | Default | -| ---------------------- | ----------------------------- | ---------------------------------------- | ------- | -| Opus 4.7 | anthropic:claude-opus-4-7 | `opus` | ✓ | -| Sonnet 4.6 | anthropic:claude-sonnet-4-6 | `sonnet` | | -| Haiku 4.5 | anthropic:claude-haiku-4-5 | `haiku` | | -| GPT-5.5 | openai:gpt-5.5 | `gpt`, `gpt-5.5` | | -| GPT-5.5 Pro | openai:gpt-5.5-pro | `gpt-pro`, `gpt-5.5-pro` | | -| GPT-5.4 Mini | openai:gpt-5.4-mini | `gpt-mini` | | -| GPT-5.4 Nano | openai:gpt-5.4-nano | `gpt-nano` | | -| Codex 5.3 | openai:gpt-5.3-codex | `codex`, `codex-5.3` | | -| Spark 5.3 | openai:gpt-5.3-codex-spark | `spark` | | -| Codex Mini 5.1 | openai:gpt-5.1-codex-mini | `codex-mini` | | -| Codex Max 5.1 | openai:gpt-5.1-codex-max | `codex-max` | | -| Gemini 3.1 Pro Preview | google:gemini-3.1-pro-preview | `gemini`, `gemini-pro` | | -| Gemini 3 Flash Preview | google:gemini-3-flash-preview | `gemini-flash` | | -| Grok 4 1 Fast | xai:grok-4-1-fast | `grok`, `grok-4`, `grok-4.1`, `grok-4-1` | | -| Grok Code Fast 1 | xai:grok-code-fast-1 | `grok-code` | | +| Model | ID | Aliases | Default | +| ---------------------- | ----------------------------- | ------------------------------------------------------------ | ------- | +| Opus 4.7 | anthropic:claude-opus-4-7 | `opus` | ✓ | +| Sonnet 4.6 | anthropic:claude-sonnet-4-6 | `sonnet` | | +| Haiku 4.5 | anthropic:claude-haiku-4-5 | `haiku` | | +| GPT-5.5 | openai:gpt-5.5 | `gpt`, `gpt-5.5` | | +| GPT-5.5 Pro | openai:gpt-5.5-pro | `gpt-pro`, `gpt-5.5-pro` | | +| GPT-5.4 Mini | openai:gpt-5.4-mini | `gpt-mini` | | +| GPT-5.4 Nano | openai:gpt-5.4-nano | `gpt-nano` | | +| Codex 5.3 | openai:gpt-5.3-codex | `codex`, `codex-5.3` | | +| Spark 5.3 | openai:gpt-5.3-codex-spark | `spark` | | +| Codex Mini 5.1 | openai:gpt-5.1-codex-mini | `codex-mini` | | +| Codex Max 5.1 | openai:gpt-5.1-codex-max | `codex-max` | | +| Gemini 3.1 Pro Preview | google:gemini-3.1-pro-preview | `gemini`, `gemini-pro` | | +| Gemini 3 Flash Preview | google:gemini-3-flash-preview | `gemini-flash` | | +| Grok 4 1 Fast | xai:grok-4-1-fast | `grok`, `grok-4`, `grok-4.1`, `grok-4-1` | | +| Grok Code Fast 1 | xai:grok-code-fast-1 | `grok-code` | | +| DeepSeek V4 Pro | deepseek:deepseek-v4-pro | `deepseek`, `deepseek-pro`, `deepseek-v4`, `deepseek-v4-pro` | | +| DeepSeek V4 Flash | deepseek:deepseek-v4-flash | `deepseek-flash`, `deepseek-v4-flash` | | {/* END KNOWN_MODELS_TABLE */} diff --git a/src/common/constants/knownModels.ts b/src/common/constants/knownModels.ts index 7d203a5768..cdcb2e3c1a 100644 --- a/src/common/constants/knownModels.ts +++ b/src/common/constants/knownModels.ts @@ -4,7 +4,7 @@ import { formatModelDisplayName } from "../utils/ai/modelDisplay"; -type ModelProvider = "anthropic" | "openai" | "google" | "xai"; +type ModelProvider = "anthropic" | "openai" | "google" | "xai" | "deepseek"; interface KnownModelDefinition { /** Provider identifier used by SDK factories */ @@ -131,6 +131,27 @@ const MODEL_DEFINITIONS = { providerModelId: "grok-code-fast-1", aliases: ["grok-code"], }, + // DeepSeek V4 Pro is the flagship V4 tier (1.6T total / 49B active params, 1M context, + // 384K max output). Bare `deepseek` alias points here per the convention that the + // shortest alias tracks each provider's flagship model (mirrors `gemini` → Gemini Pro, + // `grok` → Grok 4.1). + DEEPSEEK_V4_PRO: { + provider: "deepseek", + providerModelId: "deepseek-v4-pro", + aliases: ["deepseek", "deepseek-pro", "deepseek-v4", "deepseek-v4-pro"], + // V4 ships a custom `encoding_dsv4` tokenizer that isn't published upstream yet; + // reuse v3.1 (the latest available DeepSeek tokenizer in ai-tokenizer) for + // approximate token counting until V4 weights land in the registry. + tokenizerOverride: "deepseek/deepseek-v3.1", + }, + // DeepSeek V4 Flash is the fast/economical V4 tier (284B total / 13B active params). + // Same 1M context + 384K output as Pro; lower cost, smaller scale. + DEEPSEEK_V4_FLASH: { + provider: "deepseek", + providerModelId: "deepseek-v4-flash", + aliases: ["deepseek-flash", "deepseek-v4-flash"], + tokenizerOverride: "deepseek/deepseek-v3.1", + }, } as const satisfies Record; export type KnownModelKey = keyof typeof MODEL_DEFINITIONS; diff --git a/src/common/utils/ai/modelDisplay.test.ts b/src/common/utils/ai/modelDisplay.test.ts index 352004695a..467bd35975 100644 --- a/src/common/utils/ai/modelDisplay.test.ts +++ b/src/common/utils/ai/modelDisplay.test.ts @@ -49,6 +49,21 @@ describe("formatModelDisplayName", () => { }); }); + describe("DeepSeek models", () => { + test("preserves DeepSeek camel-case branding and uppercases version tags", () => { + expect(formatModelDisplayName("deepseek-v4-pro")).toBe("DeepSeek V4 Pro"); + expect(formatModelDisplayName("deepseek-v4-flash")).toBe("DeepSeek V4 Flash"); + expect(formatModelDisplayName("deepseek-r1")).toBe("DeepSeek R1"); + expect(formatModelDisplayName("deepseek-chat")).toBe("DeepSeek Chat"); + }); + + test("strips provider prefix when DeepSeek model is gateway-scoped", () => { + // OpenRouter exposes the same models under "deepseek/deepseek-v4-pro"; the + // existing slash-stripping branch should route through the DeepSeek handler. + expect(formatModelDisplayName("deepseek/deepseek-v4-pro")).toBe("DeepSeek V4 Pro"); + }); + }); + describe("Ollama models", () => { test("formats Llama models with size", () => { expect(formatModelDisplayName("llama3.2:7b")).toBe("Llama 3.2 (7B)"); diff --git a/src/common/utils/ai/modelDisplay.ts b/src/common/utils/ai/modelDisplay.ts index 8ad9d3f9ab..97af08f136 100644 --- a/src/common/utils/ai/modelDisplay.ts +++ b/src/common/utils/ai/modelDisplay.ts @@ -143,6 +143,21 @@ export function formatModelDisplayName(modelName: string): string { } } + // DeepSeek models - keep camel-cased "DeepSeek" branding and uppercase the + // version segment (e.g. "v4-pro" -> "V4 Pro") since "Deepseek V4 Pro" mis-cases + // the brand name. + if (lower.startsWith("deepseek-")) { + const parts = lower.replace("deepseek-", "").split("-"); + const formatted = parts + .map((part) => { + // Uppercase short tokens that look like a version tag (e.g. "v4", "r1"). + if (/^[a-z]\d+(?:\.\d+)?$/.test(part)) return part.toUpperCase(); + return capitalize(part); + }) + .join(" "); + return formatted ? `DeepSeek ${formatted}` : "DeepSeek"; + } + // Ollama models - handle format like "llama3.2:7b" or "codellama:13b" // Split by colon to handle quantization/size suffix const [baseName, size] = modelName.split(":"); diff --git a/src/common/utils/tokens/models-extra.ts b/src/common/utils/tokens/models-extra.ts index 192fa8e8e7..fafed74964 100644 --- a/src/common/utils/tokens/models-extra.ts +++ b/src/common/utils/tokens/models-extra.ts @@ -342,4 +342,41 @@ export const modelsExtra: Record = { supports_response_schema: true, supported_endpoints: ["/v1/responses"], }, + + // DeepSeek V4 Pro - Released April 24, 2026 (Preview) + // 1.6T total / 49B active MoE params; 1M context, 384K max output. + // Standard pricing: $1.74/M input, $3.48/M output (full price; an introductory 75% + // discount runs through 2026/05/05 but we record the post-discount baseline so + // billing/forecasts don't silently regress when the promo ends). + // Cache-hit input pricing is documented at 1/10 of input price. + "deepseek-v4-pro": { + max_input_tokens: 1000000, + max_output_tokens: 384000, + input_cost_per_token: 0.00000174, // $1.74 per million input tokens + output_cost_per_token: 0.00000348, // $3.48 per million output tokens + cache_read_input_token_cost: 0.000000174, // 1/10 of input price + litellm_provider: "deepseek", + mode: "chat", + supports_function_calling: true, + supports_reasoning: true, + supports_response_schema: true, + }, + + // DeepSeek V4 Flash - Released April 24, 2026 (Preview) + // 284B total / 13B active MoE params; 1M context, 384K max output. + // Pricing: $0.14/M input, $0.28/M output. Cache-hit input is 1/10 of input price. + // Legacy `deepseek-chat` (non-thinking) and `deepseek-reasoner` (thinking) currently + // route to V4-Flash compatibility modes and retire 2026-07-24. + "deepseek-v4-flash": { + max_input_tokens: 1000000, + max_output_tokens: 384000, + input_cost_per_token: 0.00000014, // $0.14 per million input tokens + output_cost_per_token: 0.00000028, // $0.28 per million output tokens + cache_read_input_token_cost: 0.000000014, // 1/10 of input price + litellm_provider: "deepseek", + mode: "chat", + supports_function_calling: true, + supports_reasoning: true, + supports_response_schema: true, + }, }; diff --git a/src/node/services/agentSkills/builtInSkillContent.generated.ts b/src/node/services/agentSkills/builtInSkillContent.generated.ts index d296aedd2f..187e714652 100644 --- a/src/node/services/agentSkills/builtInSkillContent.generated.ts +++ b/src/node/services/agentSkills/builtInSkillContent.generated.ts @@ -2043,23 +2043,25 @@ export const BUILTIN_SKILL_FILES: Record> = { "", "{/* BEGIN KNOWN_MODELS_TABLE */}", "", - "| Model | ID | Aliases | Default |", - "| ---------------------- | ----------------------------- | ---------------------------------------- | ------- |", - "| Opus 4.7 | anthropic:claude-opus-4-7 | `opus` | ✓ |", - "| Sonnet 4.6 | anthropic:claude-sonnet-4-6 | `sonnet` | |", - "| Haiku 4.5 | anthropic:claude-haiku-4-5 | `haiku` | |", - "| GPT-5.5 | openai:gpt-5.5 | `gpt`, `gpt-5.5` | |", - "| GPT-5.5 Pro | openai:gpt-5.5-pro | `gpt-pro`, `gpt-5.5-pro` | |", - "| GPT-5.4 Mini | openai:gpt-5.4-mini | `gpt-mini` | |", - "| GPT-5.4 Nano | openai:gpt-5.4-nano | `gpt-nano` | |", - "| Codex 5.3 | openai:gpt-5.3-codex | `codex`, `codex-5.3` | |", - "| Spark 5.3 | openai:gpt-5.3-codex-spark | `spark` | |", - "| Codex Mini 5.1 | openai:gpt-5.1-codex-mini | `codex-mini` | |", - "| Codex Max 5.1 | openai:gpt-5.1-codex-max | `codex-max` | |", - "| Gemini 3.1 Pro Preview | google:gemini-3.1-pro-preview | `gemini`, `gemini-pro` | |", - "| Gemini 3 Flash Preview | google:gemini-3-flash-preview | `gemini-flash` | |", - "| Grok 4 1 Fast | xai:grok-4-1-fast | `grok`, `grok-4`, `grok-4.1`, `grok-4-1` | |", - "| Grok Code Fast 1 | xai:grok-code-fast-1 | `grok-code` | |", + "| Model | ID | Aliases | Default |", + "| ---------------------- | ----------------------------- | ------------------------------------------------------------ | ------- |", + "| Opus 4.7 | anthropic:claude-opus-4-7 | `opus` | ✓ |", + "| Sonnet 4.6 | anthropic:claude-sonnet-4-6 | `sonnet` | |", + "| Haiku 4.5 | anthropic:claude-haiku-4-5 | `haiku` | |", + "| GPT-5.5 | openai:gpt-5.5 | `gpt`, `gpt-5.5` | |", + "| GPT-5.5 Pro | openai:gpt-5.5-pro | `gpt-pro`, `gpt-5.5-pro` | |", + "| GPT-5.4 Mini | openai:gpt-5.4-mini | `gpt-mini` | |", + "| GPT-5.4 Nano | openai:gpt-5.4-nano | `gpt-nano` | |", + "| Codex 5.3 | openai:gpt-5.3-codex | `codex`, `codex-5.3` | |", + "| Spark 5.3 | openai:gpt-5.3-codex-spark | `spark` | |", + "| Codex Mini 5.1 | openai:gpt-5.1-codex-mini | `codex-mini` | |", + "| Codex Max 5.1 | openai:gpt-5.1-codex-max | `codex-max` | |", + "| Gemini 3.1 Pro Preview | google:gemini-3.1-pro-preview | `gemini`, `gemini-pro` | |", + "| Gemini 3 Flash Preview | google:gemini-3-flash-preview | `gemini-flash` | |", + "| Grok 4 1 Fast | xai:grok-4-1-fast | `grok`, `grok-4`, `grok-4.1`, `grok-4-1` | |", + "| Grok Code Fast 1 | xai:grok-code-fast-1 | `grok-code` | |", + "| DeepSeek V4 Pro | deepseek:deepseek-v4-pro | `deepseek`, `deepseek-pro`, `deepseek-v4`, `deepseek-v4-pro` | |", + "| DeepSeek V4 Flash | deepseek:deepseek-v4-flash | `deepseek-flash`, `deepseek-v4-flash` | |", "", "{/* END KNOWN_MODELS_TABLE */}", "", From fb40a6d1f8e9cb8b0bd5906560b407cb7a209ceb Mon Sep 17 00:00:00 2001 From: Ammar Date: Tue, 5 May 2026 13:16:28 -0500 Subject: [PATCH 2/3] fix: skip DeepSeek brand path for colon-suffixed Ollama IDs Codex flagged that 'deepseek-r1:8b' (Ollama-style local tag) was rendering as 'DeepSeek R1:8b' instead of falling through to the colon-size handler that produces '(8B)'. Gate the DeepSeek branch on absence of ':' and add a regression test. --- src/common/utils/ai/modelDisplay.test.ts | 7 +++++++ src/common/utils/ai/modelDisplay.ts | 7 ++++++- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/src/common/utils/ai/modelDisplay.test.ts b/src/common/utils/ai/modelDisplay.test.ts index 467bd35975..b044361303 100644 --- a/src/common/utils/ai/modelDisplay.test.ts +++ b/src/common/utils/ai/modelDisplay.test.ts @@ -62,6 +62,13 @@ describe("formatModelDisplayName", () => { // existing slash-stripping branch should route through the DeepSeek handler. expect(formatModelDisplayName("deepseek/deepseek-v4-pro")).toBe("DeepSeek V4 Pro"); }); + + test("colon-suffixed Ollama IDs fall through to the size handler", () => { + // Locally-pulled DeepSeek models use Ollama tags like "deepseek-r1:8b". The + // DeepSeek brand branch must not preempt the colon-size formatter; otherwise + // the size renders verbatim instead of as "(8B)". + expect(formatModelDisplayName("deepseek-r1:8b")).toContain("(8B)"); + }); }); describe("Ollama models", () => { diff --git a/src/common/utils/ai/modelDisplay.ts b/src/common/utils/ai/modelDisplay.ts index 97af08f136..31d38b757a 100644 --- a/src/common/utils/ai/modelDisplay.ts +++ b/src/common/utils/ai/modelDisplay.ts @@ -146,7 +146,12 @@ export function formatModelDisplayName(modelName: string): string { // DeepSeek models - keep camel-cased "DeepSeek" branding and uppercase the // version segment (e.g. "v4-pro" -> "V4 Pro") since "Deepseek V4 Pro" mis-cases // the brand name. - if (lower.startsWith("deepseek-")) { + // + // Skip when the name carries a colon-suffixed size tag like "deepseek-r1:8b" — + // those are Ollama-style local model IDs and must fall through to the colon-size + // handler below so the size renders as "(8B)" rather than being concatenated + // verbatim. + if (lower.startsWith("deepseek-") && !modelName.includes(":")) { const parts = lower.replace("deepseek-", "").split("-"); const formatted = parts .map((part) => { From fd572c794ed939304a62b885f1b1abd10896e51b Mon Sep 17 00:00:00 2001 From: Ammar Date: Tue, 5 May 2026 13:26:31 -0500 Subject: [PATCH 3/3] fix: preserve DeepSeek branding for colon-tagged Ollama IDs + pricing test Codex round-2 noted that gating the DeepSeek branch on absence of `:`makes deepseek-r1:8b fall through to the generic digit-split formatter, producing 'Deepseek-r 1 (8B)' (lost casing, broken token split). Compose the two paths instead: when a colon-suffixed name starts with 'deepseek-', recurse into the formatter for the colon-stripped base (which routes through the DeepSeek brand path) and then append the size, yielding 'DeepSeek R1 (8B)'. Also add an explicit getModelStats regression test that asserts V4 Pro and Flash pricing resolves correctly through both direct (deepseek:) and OpenRouter-style (openrouter:deepseek/) ids, so pricing can't silently regress. --- src/common/utils/ai/modelDisplay.test.ts | 12 +++++++----- src/common/utils/ai/modelDisplay.ts | 8 ++++++++ src/common/utils/tokens/modelStats.test.ts | 19 +++++++++++++++++++ 3 files changed, 34 insertions(+), 5 deletions(-) diff --git a/src/common/utils/ai/modelDisplay.test.ts b/src/common/utils/ai/modelDisplay.test.ts index b044361303..12ddd780be 100644 --- a/src/common/utils/ai/modelDisplay.test.ts +++ b/src/common/utils/ai/modelDisplay.test.ts @@ -63,11 +63,13 @@ describe("formatModelDisplayName", () => { expect(formatModelDisplayName("deepseek/deepseek-v4-pro")).toBe("DeepSeek V4 Pro"); }); - test("colon-suffixed Ollama IDs fall through to the size handler", () => { - // Locally-pulled DeepSeek models use Ollama tags like "deepseek-r1:8b". The - // DeepSeek brand branch must not preempt the colon-size formatter; otherwise - // the size renders verbatim instead of as "(8B)". - expect(formatModelDisplayName("deepseek-r1:8b")).toContain("(8B)"); + test("colon-suffixed Ollama IDs preserve DeepSeek branding and size", () => { + // Locally-pulled DeepSeek models use Ollama tags like "deepseek-r1:8b". + // Both the DeepSeek brand casing and the parenthesized size suffix must + // be preserved; the generic digit-split formatter would otherwise render + // "Deepseek-r 1 (8B)". + expect(formatModelDisplayName("deepseek-r1:8b")).toBe("DeepSeek R1 (8B)"); + expect(formatModelDisplayName("deepseek-coder:6.7b")).toBe("DeepSeek Coder (6.7B)"); }); }); diff --git a/src/common/utils/ai/modelDisplay.ts b/src/common/utils/ai/modelDisplay.ts index 31d38b757a..216176ec5f 100644 --- a/src/common/utils/ai/modelDisplay.ts +++ b/src/common/utils/ai/modelDisplay.ts @@ -167,6 +167,14 @@ export function formatModelDisplayName(modelName: string): string { // Split by colon to handle quantization/size suffix const [baseName, size] = modelName.split(":"); if (size) { + // DeepSeek IDs published as Ollama tags (e.g. "deepseek-r1:8b") need to + // preserve the DeepSeek brand casing before the size suffix is appended. + // Recurse into the formatter for the colon-stripped base so the DeepSeek + // branch above produces "DeepSeek R1", then append "(8B)". Without this, + // the generic digit-split below would render "Deepseek-r 1 (8B)". + if (baseName.toLowerCase().startsWith("deepseek-")) { + return `${formatModelDisplayName(baseName)} (${size.toUpperCase()})`; + } // "llama3.2:7b" -> "Llama 3.2 (7B)" // "codellama:13b" -> "Codellama (13B)" const formatted = baseName diff --git a/src/common/utils/tokens/modelStats.test.ts b/src/common/utils/tokens/modelStats.test.ts index b1b85b5266..6ca42ff14c 100644 --- a/src/common/utils/tokens/modelStats.test.ts +++ b/src/common/utils/tokens/modelStats.test.ts @@ -76,6 +76,25 @@ describe("getModelStats", () => { expect(uncached.cache_read_input_token_cost).toBeUndefined(); }); + test("resolves DeepSeek V4 pricing and limits via direct and gateway forms", () => { + // Direct provider id wires up to the modelsExtra entry. + const pro = expectStats("deepseek:deepseek-v4-pro"); + expect(pro.max_input_tokens).toBe(1_000_000); + expect(pro.max_output_tokens).toBe(384_000); + expect(pro.input_cost_per_token).toBe(0.00000174); + expect(pro.output_cost_per_token).toBe(0.00000348); + expect(pro.cache_read_input_token_cost).toBe(0.000000174); + + // OpenRouter routes "deepseek/deepseek-v4-pro" back to the direct DeepSeek + // entry via normalizeToCanonical, so pricing must match the direct lookup. + expect(expectStats("openrouter:deepseek/deepseek-v4-pro")).toEqual(pro); + + const flash = expectStats("deepseek:deepseek-v4-flash"); + expect(flash.input_cost_per_token).toBe(0.00000014); + expect(flash.output_cost_per_token).toBe(0.00000028); + expect(flash.cache_read_input_token_cost).toBe(0.000000014); + }); + test("returns null for unknown models across direct and gateway forms", () => { expect(getModelStats("unknown:fake-model-9000")).toBeNull(); expect(getModelStats("ollama:this-model-does-not-exist")).toBeNull();