Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 19 additions & 17 deletions docs/config/models.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -11,23 +11,25 @@ Mux ships with curated models kept up to date with the frontier. Use any custom

{/* BEGIN KNOWN_MODELS_TABLE */}

| Model | ID | Aliases | Default |
| ---------------------- | ----------------------------- | ---------------------------------------- | ------- |
| Opus 4.7 | anthropic:claude-opus-4-7 | `opus` | βœ“ |
| Sonnet 4.6 | anthropic:claude-sonnet-4-6 | `sonnet` | |
| Haiku 4.5 | anthropic:claude-haiku-4-5 | `haiku` | |
| GPT-5.5 | openai:gpt-5.5 | `gpt`, `gpt-5.5` | |
| GPT-5.5 Pro | openai:gpt-5.5-pro | `gpt-pro`, `gpt-5.5-pro` | |
| GPT-5.4 Mini | openai:gpt-5.4-mini | `gpt-mini` | |
| GPT-5.4 Nano | openai:gpt-5.4-nano | `gpt-nano` | |
| Codex 5.3 | openai:gpt-5.3-codex | `codex`, `codex-5.3` | |
| Spark 5.3 | openai:gpt-5.3-codex-spark | `spark` | |
| Codex Mini 5.1 | openai:gpt-5.1-codex-mini | `codex-mini` | |
| Codex Max 5.1 | openai:gpt-5.1-codex-max | `codex-max` | |
| Gemini 3.1 Pro Preview | google:gemini-3.1-pro-preview | `gemini`, `gemini-pro` | |
| Gemini 3 Flash Preview | google:gemini-3-flash-preview | `gemini-flash` | |
| Grok 4 1 Fast | xai:grok-4-1-fast | `grok`, `grok-4`, `grok-4.1`, `grok-4-1` | |
| Grok Code Fast 1 | xai:grok-code-fast-1 | `grok-code` | |
| Model | ID | Aliases | Default |
| ---------------------- | ----------------------------- | ------------------------------------------------------------ | ------- |
| Opus 4.7 | anthropic:claude-opus-4-7 | `opus` | βœ“ |
| Sonnet 4.6 | anthropic:claude-sonnet-4-6 | `sonnet` | |
| Haiku 4.5 | anthropic:claude-haiku-4-5 | `haiku` | |
| GPT-5.5 | openai:gpt-5.5 | `gpt`, `gpt-5.5` | |
| GPT-5.5 Pro | openai:gpt-5.5-pro | `gpt-pro`, `gpt-5.5-pro` | |
| GPT-5.4 Mini | openai:gpt-5.4-mini | `gpt-mini` | |
| GPT-5.4 Nano | openai:gpt-5.4-nano | `gpt-nano` | |
| Codex 5.3 | openai:gpt-5.3-codex | `codex`, `codex-5.3` | |
| Spark 5.3 | openai:gpt-5.3-codex-spark | `spark` | |
| Codex Mini 5.1 | openai:gpt-5.1-codex-mini | `codex-mini` | |
| Codex Max 5.1 | openai:gpt-5.1-codex-max | `codex-max` | |
| Gemini 3.1 Pro Preview | google:gemini-3.1-pro-preview | `gemini`, `gemini-pro` | |
| Gemini 3 Flash Preview | google:gemini-3-flash-preview | `gemini-flash` | |
| Grok 4 1 Fast | xai:grok-4-1-fast | `grok`, `grok-4`, `grok-4.1`, `grok-4-1` | |
| Grok Code Fast 1 | xai:grok-code-fast-1 | `grok-code` | |
| DeepSeek V4 Pro | deepseek:deepseek-v4-pro | `deepseek`, `deepseek-pro`, `deepseek-v4`, `deepseek-v4-pro` | |
| DeepSeek V4 Flash | deepseek:deepseek-v4-flash | `deepseek-flash`, `deepseek-v4-flash` | |

{/* END KNOWN_MODELS_TABLE */}

Expand Down
23 changes: 22 additions & 1 deletion src/common/constants/knownModels.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

import { formatModelDisplayName } from "../utils/ai/modelDisplay";

type ModelProvider = "anthropic" | "openai" | "google" | "xai";
type ModelProvider = "anthropic" | "openai" | "google" | "xai" | "deepseek";

interface KnownModelDefinition {
/** Provider identifier used by SDK factories */
Expand Down Expand Up @@ -131,6 +131,27 @@ const MODEL_DEFINITIONS = {
providerModelId: "grok-code-fast-1",
aliases: ["grok-code"],
},
// DeepSeek V4 Pro is the flagship V4 tier (1.6T total / 49B active params, 1M context,
// 384K max output). Bare `deepseek` alias points here per the convention that the
// shortest alias tracks each provider's flagship model (mirrors `gemini` β†’ Gemini Pro,
// `grok` β†’ Grok 4.1).
DEEPSEEK_V4_PRO: {
provider: "deepseek",
providerModelId: "deepseek-v4-pro",
aliases: ["deepseek", "deepseek-pro", "deepseek-v4", "deepseek-v4-pro"],
// V4 ships a custom `encoding_dsv4` tokenizer that isn't published upstream yet;
// reuse v3.1 (the latest available DeepSeek tokenizer in ai-tokenizer) for
// approximate token counting until V4 weights land in the registry.
tokenizerOverride: "deepseek/deepseek-v3.1",
},
// DeepSeek V4 Flash is the fast/economical V4 tier (284B total / 13B active params).
// Same 1M context + 384K output as Pro; lower cost, smaller scale.
DEEPSEEK_V4_FLASH: {
provider: "deepseek",
providerModelId: "deepseek-v4-flash",
aliases: ["deepseek-flash", "deepseek-v4-flash"],
tokenizerOverride: "deepseek/deepseek-v3.1",
},
} as const satisfies Record<string, KnownModelDefinition>;

export type KnownModelKey = keyof typeof MODEL_DEFINITIONS;
Expand Down
24 changes: 24 additions & 0 deletions src/common/utils/ai/modelDisplay.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,30 @@ describe("formatModelDisplayName", () => {
});
});

describe("DeepSeek models", () => {
test("preserves DeepSeek camel-case branding and uppercases version tags", () => {
expect(formatModelDisplayName("deepseek-v4-pro")).toBe("DeepSeek V4 Pro");
expect(formatModelDisplayName("deepseek-v4-flash")).toBe("DeepSeek V4 Flash");
expect(formatModelDisplayName("deepseek-r1")).toBe("DeepSeek R1");
expect(formatModelDisplayName("deepseek-chat")).toBe("DeepSeek Chat");
});

test("strips provider prefix when DeepSeek model is gateway-scoped", () => {
// OpenRouter exposes the same models under "deepseek/deepseek-v4-pro"; the
// existing slash-stripping branch should route through the DeepSeek handler.
expect(formatModelDisplayName("deepseek/deepseek-v4-pro")).toBe("DeepSeek V4 Pro");
});

test("colon-suffixed Ollama IDs preserve DeepSeek branding and size", () => {
// Locally-pulled DeepSeek models use Ollama tags like "deepseek-r1:8b".
// Both the DeepSeek brand casing and the parenthesized size suffix must
// be preserved; the generic digit-split formatter would otherwise render
// "Deepseek-r 1 (8B)".
expect(formatModelDisplayName("deepseek-r1:8b")).toBe("DeepSeek R1 (8B)");
expect(formatModelDisplayName("deepseek-coder:6.7b")).toBe("DeepSeek Coder (6.7B)");
});
});

describe("Ollama models", () => {
test("formats Llama models with size", () => {
expect(formatModelDisplayName("llama3.2:7b")).toBe("Llama 3.2 (7B)");
Expand Down
28 changes: 28 additions & 0 deletions src/common/utils/ai/modelDisplay.ts
Original file line number Diff line number Diff line change
Expand Up @@ -143,10 +143,38 @@ export function formatModelDisplayName(modelName: string): string {
}
}

// DeepSeek models - keep camel-cased "DeepSeek" branding and uppercase the
// version segment (e.g. "v4-pro" -> "V4 Pro") since "Deepseek V4 Pro" mis-cases
// the brand name.
//
// Skip when the name carries a colon-suffixed size tag like "deepseek-r1:8b" β€”
// those are Ollama-style local model IDs and must fall through to the colon-size
// handler below so the size renders as "(8B)" rather than being concatenated
// verbatim.
if (lower.startsWith("deepseek-") && !modelName.includes(":")) {
Comment thread
ammar-agent marked this conversation as resolved.
const parts = lower.replace("deepseek-", "").split("-");
const formatted = parts
.map((part) => {
// Uppercase short tokens that look like a version tag (e.g. "v4", "r1").
if (/^[a-z]\d+(?:\.\d+)?$/.test(part)) return part.toUpperCase();
return capitalize(part);
})
.join(" ");
return formatted ? `DeepSeek ${formatted}` : "DeepSeek";
}

// Ollama models - handle format like "llama3.2:7b" or "codellama:13b"
// Split by colon to handle quantization/size suffix
const [baseName, size] = modelName.split(":");
if (size) {
// DeepSeek IDs published as Ollama tags (e.g. "deepseek-r1:8b") need to
// preserve the DeepSeek brand casing before the size suffix is appended.
// Recurse into the formatter for the colon-stripped base so the DeepSeek
// branch above produces "DeepSeek R1", then append "(8B)". Without this,
// the generic digit-split below would render "Deepseek-r 1 (8B)".
if (baseName.toLowerCase().startsWith("deepseek-")) {
return `${formatModelDisplayName(baseName)} (${size.toUpperCase()})`;
}
// "llama3.2:7b" -> "Llama 3.2 (7B)"
// "codellama:13b" -> "Codellama (13B)"
const formatted = baseName
Expand Down
19 changes: 19 additions & 0 deletions src/common/utils/tokens/modelStats.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,25 @@ describe("getModelStats", () => {
expect(uncached.cache_read_input_token_cost).toBeUndefined();
});

test("resolves DeepSeek V4 pricing and limits via direct and gateway forms", () => {
// Direct provider id wires up to the modelsExtra entry.
const pro = expectStats("deepseek:deepseek-v4-pro");
expect(pro.max_input_tokens).toBe(1_000_000);
expect(pro.max_output_tokens).toBe(384_000);
expect(pro.input_cost_per_token).toBe(0.00000174);
expect(pro.output_cost_per_token).toBe(0.00000348);
expect(pro.cache_read_input_token_cost).toBe(0.000000174);

// OpenRouter routes "deepseek/deepseek-v4-pro" back to the direct DeepSeek
// entry via normalizeToCanonical, so pricing must match the direct lookup.
expect(expectStats("openrouter:deepseek/deepseek-v4-pro")).toEqual(pro);

const flash = expectStats("deepseek:deepseek-v4-flash");
expect(flash.input_cost_per_token).toBe(0.00000014);
expect(flash.output_cost_per_token).toBe(0.00000028);
expect(flash.cache_read_input_token_cost).toBe(0.000000014);
});

test("returns null for unknown models across direct and gateway forms", () => {
expect(getModelStats("unknown:fake-model-9000")).toBeNull();
expect(getModelStats("ollama:this-model-does-not-exist")).toBeNull();
Expand Down
37 changes: 37 additions & 0 deletions src/common/utils/tokens/models-extra.ts
Original file line number Diff line number Diff line change
Expand Up @@ -342,4 +342,41 @@ export const modelsExtra: Record<string, ModelData> = {
supports_response_schema: true,
supported_endpoints: ["/v1/responses"],
},

// DeepSeek V4 Pro - Released April 24, 2026 (Preview)
// 1.6T total / 49B active MoE params; 1M context, 384K max output.
// Standard pricing: $1.74/M input, $3.48/M output (full price; an introductory 75%
// discount runs through 2026/05/05 but we record the post-discount baseline so
// billing/forecasts don't silently regress when the promo ends).
// Cache-hit input pricing is documented at 1/10 of input price.
"deepseek-v4-pro": {
max_input_tokens: 1000000,
max_output_tokens: 384000,
input_cost_per_token: 0.00000174, // $1.74 per million input tokens
output_cost_per_token: 0.00000348, // $3.48 per million output tokens
cache_read_input_token_cost: 0.000000174, // 1/10 of input price
litellm_provider: "deepseek",
mode: "chat",
supports_function_calling: true,
supports_reasoning: true,
supports_response_schema: true,
},

// DeepSeek V4 Flash - Released April 24, 2026 (Preview)
// 284B total / 13B active MoE params; 1M context, 384K max output.
// Pricing: $0.14/M input, $0.28/M output. Cache-hit input is 1/10 of input price.
// Legacy `deepseek-chat` (non-thinking) and `deepseek-reasoner` (thinking) currently
// route to V4-Flash compatibility modes and retire 2026-07-24.
"deepseek-v4-flash": {
max_input_tokens: 1000000,
max_output_tokens: 384000,
input_cost_per_token: 0.00000014, // $0.14 per million input tokens
output_cost_per_token: 0.00000028, // $0.28 per million output tokens
cache_read_input_token_cost: 0.000000014, // 1/10 of input price
litellm_provider: "deepseek",
mode: "chat",
supports_function_calling: true,
supports_reasoning: true,
supports_response_schema: true,
},
};
36 changes: 19 additions & 17 deletions src/node/services/agentSkills/builtInSkillContent.generated.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2043,23 +2043,25 @@ export const BUILTIN_SKILL_FILES: Record<string, Record<string, string>> = {
"",
"{/* BEGIN KNOWN_MODELS_TABLE */}",
"",
"| Model | ID | Aliases | Default |",
"| ---------------------- | ----------------------------- | ---------------------------------------- | ------- |",
"| Opus 4.7 | anthropic:claude-opus-4-7 | `opus` | βœ“ |",
"| Sonnet 4.6 | anthropic:claude-sonnet-4-6 | `sonnet` | |",
"| Haiku 4.5 | anthropic:claude-haiku-4-5 | `haiku` | |",
"| GPT-5.5 | openai:gpt-5.5 | `gpt`, `gpt-5.5` | |",
"| GPT-5.5 Pro | openai:gpt-5.5-pro | `gpt-pro`, `gpt-5.5-pro` | |",
"| GPT-5.4 Mini | openai:gpt-5.4-mini | `gpt-mini` | |",
"| GPT-5.4 Nano | openai:gpt-5.4-nano | `gpt-nano` | |",
"| Codex 5.3 | openai:gpt-5.3-codex | `codex`, `codex-5.3` | |",
"| Spark 5.3 | openai:gpt-5.3-codex-spark | `spark` | |",
"| Codex Mini 5.1 | openai:gpt-5.1-codex-mini | `codex-mini` | |",
"| Codex Max 5.1 | openai:gpt-5.1-codex-max | `codex-max` | |",
"| Gemini 3.1 Pro Preview | google:gemini-3.1-pro-preview | `gemini`, `gemini-pro` | |",
"| Gemini 3 Flash Preview | google:gemini-3-flash-preview | `gemini-flash` | |",
"| Grok 4 1 Fast | xai:grok-4-1-fast | `grok`, `grok-4`, `grok-4.1`, `grok-4-1` | |",
"| Grok Code Fast 1 | xai:grok-code-fast-1 | `grok-code` | |",
"| Model | ID | Aliases | Default |",
"| ---------------------- | ----------------------------- | ------------------------------------------------------------ | ------- |",
"| Opus 4.7 | anthropic:claude-opus-4-7 | `opus` | βœ“ |",
"| Sonnet 4.6 | anthropic:claude-sonnet-4-6 | `sonnet` | |",
"| Haiku 4.5 | anthropic:claude-haiku-4-5 | `haiku` | |",
"| GPT-5.5 | openai:gpt-5.5 | `gpt`, `gpt-5.5` | |",
"| GPT-5.5 Pro | openai:gpt-5.5-pro | `gpt-pro`, `gpt-5.5-pro` | |",
"| GPT-5.4 Mini | openai:gpt-5.4-mini | `gpt-mini` | |",
"| GPT-5.4 Nano | openai:gpt-5.4-nano | `gpt-nano` | |",
"| Codex 5.3 | openai:gpt-5.3-codex | `codex`, `codex-5.3` | |",
"| Spark 5.3 | openai:gpt-5.3-codex-spark | `spark` | |",
"| Codex Mini 5.1 | openai:gpt-5.1-codex-mini | `codex-mini` | |",
"| Codex Max 5.1 | openai:gpt-5.1-codex-max | `codex-max` | |",
"| Gemini 3.1 Pro Preview | google:gemini-3.1-pro-preview | `gemini`, `gemini-pro` | |",
"| Gemini 3 Flash Preview | google:gemini-3-flash-preview | `gemini-flash` | |",
"| Grok 4 1 Fast | xai:grok-4-1-fast | `grok`, `grok-4`, `grok-4.1`, `grok-4-1` | |",
"| Grok Code Fast 1 | xai:grok-code-fast-1 | `grok-code` | |",
"| DeepSeek V4 Pro | deepseek:deepseek-v4-pro | `deepseek`, `deepseek-pro`, `deepseek-v4`, `deepseek-v4-pro` | |",
"| DeepSeek V4 Flash | deepseek:deepseek-v4-flash | `deepseek-flash`, `deepseek-v4-flash` | |",
"",
"{/* END KNOWN_MODELS_TABLE */}",
"",
Expand Down
Loading