Skip to content

Commit 0612739

Browse files
Add OpenAI GPT-5.3 chat latest and GPT-5.4 model support (#11848)
1 parent 52ce796 commit 0612739

File tree

9 files changed

+374
-1
lines changed

9 files changed

+374
-1
lines changed

.changeset/gpt-5-3-chat-latest.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
"roo-cline": patch
3+
---
4+
5+
Add OpenAI's GPT-5.3-Chat-Latest model support

.changeset/gpt-5-4.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
"roo-cline": patch
3+
---
4+
5+
Add OpenAI's GPT-5.4 model support

packages/types/src/model.ts

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,16 @@ export const modelInfoSchema = z.object({
9898
outputPrice: z.number().optional(),
9999
cacheWritesPrice: z.number().optional(),
100100
cacheReadsPrice: z.number().optional(),
101+
longContextPricing: z
102+
.object({
103+
thresholdTokens: z.number(),
104+
inputPriceMultiplier: z.number().optional(),
105+
outputPriceMultiplier: z.number().optional(),
106+
cacheWritesPriceMultiplier: z.number().optional(),
107+
cacheReadsPriceMultiplier: z.number().optional(),
108+
appliesToServiceTiers: z.array(serviceTierSchema).optional(),
109+
})
110+
.optional(),
101111
description: z.string().optional(),
102112
// Default effort value for models that support reasoning effort
103113
reasoningEffort: reasoningEffortExtendedSchema.optional(),

packages/types/src/providers/openai.ts

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,32 @@ export const openAiNativeModels = {
2424
description:
2525
"GPT-5.1 Codex Max: Our most intelligent coding model optimized for long-horizon, agentic coding tasks",
2626
},
27+
"gpt-5.4": {
28+
maxTokens: 128000,
29+
contextWindow: 1_050_000,
30+
includedTools: ["apply_patch"],
31+
excludedTools: ["apply_diff", "write_to_file"],
32+
supportsImages: true,
33+
supportsPromptCache: true,
34+
supportsReasoningEffort: ["none", "low", "medium", "high", "xhigh"],
35+
reasoningEffort: "none",
36+
inputPrice: 2.5,
37+
outputPrice: 15.0,
38+
cacheReadsPrice: 0.25,
39+
longContextPricing: {
40+
thresholdTokens: 272_000,
41+
inputPriceMultiplier: 2,
42+
outputPriceMultiplier: 1.5,
43+
appliesToServiceTiers: ["default", "flex"],
44+
},
45+
supportsVerbosity: true,
46+
supportsTemperature: false,
47+
tiers: [
48+
{ name: "flex", contextWindow: 1_050_000, inputPrice: 1.25, outputPrice: 7.5, cacheReadsPrice: 0.125 },
49+
{ name: "priority", contextWindow: 1_050_000, inputPrice: 5.0, outputPrice: 30.0, cacheReadsPrice: 0.5 },
50+
],
51+
description: "GPT-5.4: Our most capable model for professional work",
52+
},
2753
"gpt-5.2": {
2854
maxTokens: 128000,
2955
contextWindow: 400000,
@@ -93,6 +119,18 @@ export const openAiNativeModels = {
93119
cacheReadsPrice: 0.175,
94120
description: "GPT-5.2 Chat: Optimized for conversational AI and chat use cases",
95121
},
122+
"gpt-5.3-chat-latest": {
123+
maxTokens: 16_384,
124+
contextWindow: 128_000,
125+
includedTools: ["apply_patch"],
126+
excludedTools: ["apply_diff", "write_to_file"],
127+
supportsImages: true,
128+
supportsPromptCache: true,
129+
inputPrice: 1.75,
130+
outputPrice: 14.0,
131+
cacheReadsPrice: 0.175,
132+
description: "GPT-5.3 Chat: Optimized for conversational AI and chat use cases",
133+
},
96134
"gpt-5.1": {
97135
maxTokens: 128000,
98136
contextWindow: 400000,

src/api/providers/__tests__/openai-native-usage.spec.ts

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,10 @@ describe("OpenAiNativeHandler - normalizeUsage", () => {
88
id: "gpt-4o",
99
info: openAiNativeModels["gpt-4o"],
1010
}
11+
const gpt54Model = {
12+
id: "gpt-5.4",
13+
info: openAiNativeModels["gpt-5.4"],
14+
}
1115

1216
beforeEach(() => {
1317
handler = new OpenAiNativeHandler({
@@ -378,6 +382,12 @@ describe("OpenAiNativeHandler - normalizeUsage", () => {
378382

379383
const fourOBody = buildRequestBodyForModel("gpt-4o")
380384
expect(fourOBody.prompt_cache_retention).toBeUndefined()
385+
386+
const gpt54Body = buildRequestBodyForModel("gpt-5.4")
387+
expect(gpt54Body.prompt_cache_retention).toBeUndefined()
388+
389+
const chatModelBody = buildRequestBodyForModel("gpt-5.3-chat-latest")
390+
expect(chatModelBody.prompt_cache_retention).toBeUndefined()
381391
})
382392

383393
it("should not set prompt_cache_retention when the model does not support prompt caching", () => {
@@ -418,5 +428,64 @@ describe("OpenAiNativeHandler - normalizeUsage", () => {
418428
expect(result.totalCost).toBeGreaterThan(0)
419429
// Cost should be calculated with full input tokens since no cache reads
420430
})
431+
432+
it("should use standard GPT-5.4 pricing within the base context threshold", () => {
433+
const usage = {
434+
input_tokens: 100_000,
435+
output_tokens: 1_000,
436+
cache_read_input_tokens: 20_000,
437+
}
438+
439+
const result = (handler as any).normalizeUsage(usage, gpt54Model)
440+
441+
expect(result).toMatchObject({
442+
type: "usage",
443+
inputTokens: 100_000,
444+
outputTokens: 1_000,
445+
cacheReadTokens: 20_000,
446+
})
447+
expect(result.totalCost).toBeCloseTo(0.22, 6)
448+
})
449+
450+
it("should apply GPT-5.4 long-context pricing above the threshold", () => {
451+
const usage = {
452+
input_tokens: 300_000,
453+
output_tokens: 1_000,
454+
cache_read_input_tokens: 100_000,
455+
}
456+
457+
const result = (handler as any).normalizeUsage(usage, gpt54Model)
458+
459+
expect(result).toMatchObject({
460+
type: "usage",
461+
inputTokens: 300_000,
462+
outputTokens: 1_000,
463+
cacheReadTokens: 100_000,
464+
})
465+
expect(result.totalCost).toBeCloseTo(1.0475, 6)
466+
})
467+
468+
it("should not apply GPT-5.4 long-context pricing to priority tier", () => {
469+
handler = new OpenAiNativeHandler({
470+
openAiNativeApiKey: "test-key",
471+
openAiNativeServiceTier: "priority",
472+
})
473+
474+
const usage = {
475+
input_tokens: 300_000,
476+
output_tokens: 1_000,
477+
cache_read_input_tokens: 100_000,
478+
}
479+
480+
const result = (handler as any).normalizeUsage(usage, gpt54Model)
481+
482+
expect(result).toMatchObject({
483+
type: "usage",
484+
inputTokens: 300_000,
485+
outputTokens: 1_000,
486+
cacheReadTokens: 100_000,
487+
})
488+
expect(result.totalCost).toBeCloseTo(1.08, 6)
489+
})
421490
})
422491
})

src/api/providers/__tests__/openai-native.spec.ts

Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -249,6 +249,34 @@ describe("OpenAiNativeHandler", () => {
249249
expect(modelInfo.info.supportsReasoningEffort).toEqual(["low", "medium", "high", "xhigh"])
250250
})
251251

252+
it("should return GPT-5.4 model info when selected", () => {
253+
const gpt54Handler = new OpenAiNativeHandler({
254+
...mockOptions,
255+
apiModelId: "gpt-5.4",
256+
})
257+
258+
const modelInfo = gpt54Handler.getModel()
259+
expect(modelInfo.id).toBe("gpt-5.4")
260+
expect(modelInfo.info.maxTokens).toBe(128000)
261+
expect(modelInfo.info.contextWindow).toBe(1_050_000)
262+
expect(modelInfo.info.supportsVerbosity).toBe(true)
263+
expect(modelInfo.info.supportsReasoningEffort).toEqual(["none", "low", "medium", "high", "xhigh"])
264+
expect(modelInfo.info.reasoningEffort).toBe("none")
265+
})
266+
267+
it("should return GPT-5.3 Chat model info when selected", () => {
268+
const chatHandler = new OpenAiNativeHandler({
269+
...mockOptions,
270+
apiModelId: "gpt-5.3-chat-latest",
271+
})
272+
273+
const modelInfo = chatHandler.getModel()
274+
expect(modelInfo.id).toBe("gpt-5.3-chat-latest")
275+
expect(modelInfo.info.maxTokens).toBe(16_384)
276+
expect(modelInfo.info.contextWindow).toBe(128000)
277+
expect(modelInfo.info.supportsImages).toBe(true)
278+
})
279+
252280
it("should handle undefined model ID", () => {
253281
const handlerWithoutModel = new OpenAiNativeHandler({
254282
openAiNativeApiKey: "test-api-key",
@@ -345,6 +373,107 @@ describe("OpenAiNativeHandler", () => {
345373
expect(textChunks[1].text).toBe(" world")
346374
})
347375

376+
it("should handle GPT-5.4 model with Responses API", async () => {
377+
const mockFetch = vitest.fn().mockResolvedValue({
378+
ok: true,
379+
body: new ReadableStream({
380+
start(controller) {
381+
controller.enqueue(
382+
new TextEncoder().encode(
383+
'data: {"type":"response.output_item.added","item":{"type":"text","text":"GPT-5.4 reply"}}\n\n',
384+
),
385+
)
386+
controller.enqueue(new TextEncoder().encode("data: [DONE]\n\n"))
387+
controller.close()
388+
},
389+
}),
390+
})
391+
global.fetch = mockFetch as any
392+
393+
mockResponsesCreate.mockRejectedValue(new Error("SDK not available"))
394+
395+
handler = new OpenAiNativeHandler({
396+
...mockOptions,
397+
apiModelId: "gpt-5.4",
398+
})
399+
400+
const stream = handler.createMessage(systemPrompt, messages)
401+
const chunks: any[] = []
402+
for await (const chunk of stream) {
403+
chunks.push(chunk)
404+
}
405+
406+
expect(mockFetch).toHaveBeenCalledWith(
407+
"https://api.openai.com/v1/responses",
408+
expect.objectContaining({
409+
body: expect.any(String),
410+
}),
411+
)
412+
const body = (mockFetch.mock.calls[0][1] as any).body as string
413+
const parsedBody = JSON.parse(body)
414+
expect(parsedBody.model).toBe("gpt-5.4")
415+
expect(parsedBody.max_output_tokens).toBe(128000)
416+
expect(parsedBody.temperature).toBeUndefined()
417+
expect(parsedBody.include).toEqual(["reasoning.encrypted_content"])
418+
expect(parsedBody.reasoning?.effort).toBe("none")
419+
expect(parsedBody.text?.verbosity).toBe("medium")
420+
421+
const textChunks = chunks.filter((chunk) => chunk.type === "text")
422+
expect(textChunks).toHaveLength(1)
423+
expect(textChunks[0].text).toBe("GPT-5.4 reply")
424+
})
425+
426+
it("should handle GPT-5.3 Chat model with Responses API", async () => {
427+
// Mock fetch for Responses API
428+
const mockFetch = vitest.fn().mockResolvedValue({
429+
ok: true,
430+
body: new ReadableStream({
431+
start(controller) {
432+
controller.enqueue(
433+
new TextEncoder().encode(
434+
'data: {"type":"response.output_item.added","item":{"type":"text","text":"Chat reply"}}\n\n',
435+
),
436+
)
437+
controller.enqueue(new TextEncoder().encode("data: [DONE]\n\n"))
438+
controller.close()
439+
},
440+
}),
441+
})
442+
global.fetch = mockFetch as any
443+
444+
// Mock SDK to fail so it uses fetch
445+
mockResponsesCreate.mockRejectedValue(new Error("SDK not available"))
446+
447+
handler = new OpenAiNativeHandler({
448+
...mockOptions,
449+
apiModelId: "gpt-5.3-chat-latest",
450+
})
451+
452+
const stream = handler.createMessage(systemPrompt, messages)
453+
const chunks: any[] = []
454+
for await (const chunk of stream) {
455+
chunks.push(chunk)
456+
}
457+
458+
expect(mockFetch).toHaveBeenCalledWith(
459+
"https://api.openai.com/v1/responses",
460+
expect.objectContaining({
461+
body: expect.any(String),
462+
}),
463+
)
464+
const body = (mockFetch.mock.calls[0][1] as any).body as string
465+
const parsedBody = JSON.parse(body)
466+
expect(parsedBody.model).toBe("gpt-5.3-chat-latest")
467+
expect(parsedBody.max_output_tokens).toBe(16_384)
468+
expect(parsedBody.temperature).toBe(0)
469+
expect(parsedBody.reasoning?.effort).toBeUndefined()
470+
expect(parsedBody.text?.verbosity).toBeUndefined()
471+
472+
const textChunks = chunks.filter((chunk) => chunk.type === "text")
473+
expect(textChunks).toHaveLength(1)
474+
expect(textChunks[0].text).toBe("Chat reply")
475+
})
476+
348477
it("should handle GPT-5-mini model with Responses API", async () => {
349478
// Mock fetch for Responses API
350479
const mockFetch = vitest.fn().mockResolvedValue({

src/api/providers/openai-native.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,7 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
148148
totalOutputTokens,
149149
cacheWriteTokens,
150150
cacheReadTokens,
151+
effectiveTier,
151152
)
152153

153154
const reasoningTokens =

src/shared/cost.ts

Lines changed: 36 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,44 @@
11
import type { ModelInfo } from "@roo-code/types"
2+
import type { ServiceTier } from "@roo-code/types"
23

34
export interface ApiCostResult {
45
totalInputTokens: number
56
totalOutputTokens: number
67
totalCost: number
78
}
89

10+
function applyLongContextPricing(modelInfo: ModelInfo, totalInputTokens: number, serviceTier?: ServiceTier): ModelInfo {
11+
const pricing = modelInfo.longContextPricing
12+
if (!pricing || totalInputTokens <= pricing.thresholdTokens) {
13+
return modelInfo
14+
}
15+
16+
const effectiveServiceTier = serviceTier ?? "default"
17+
if (pricing.appliesToServiceTiers && !pricing.appliesToServiceTiers.includes(effectiveServiceTier)) {
18+
return modelInfo
19+
}
20+
21+
return {
22+
...modelInfo,
23+
inputPrice:
24+
modelInfo.inputPrice !== undefined && pricing.inputPriceMultiplier !== undefined
25+
? modelInfo.inputPrice * pricing.inputPriceMultiplier
26+
: modelInfo.inputPrice,
27+
outputPrice:
28+
modelInfo.outputPrice !== undefined && pricing.outputPriceMultiplier !== undefined
29+
? modelInfo.outputPrice * pricing.outputPriceMultiplier
30+
: modelInfo.outputPrice,
31+
cacheWritesPrice:
32+
modelInfo.cacheWritesPrice !== undefined && pricing.cacheWritesPriceMultiplier !== undefined
33+
? modelInfo.cacheWritesPrice * pricing.cacheWritesPriceMultiplier
34+
: modelInfo.cacheWritesPrice,
35+
cacheReadsPrice:
36+
modelInfo.cacheReadsPrice !== undefined && pricing.cacheReadsPriceMultiplier !== undefined
37+
? modelInfo.cacheReadsPrice * pricing.cacheReadsPriceMultiplier
38+
: modelInfo.cacheReadsPrice,
39+
}
40+
}
41+
942
function calculateApiCostInternal(
1043
modelInfo: ModelInfo,
1144
inputTokens: number,
@@ -62,15 +95,17 @@ export function calculateApiCostOpenAI(
6295
outputTokens: number,
6396
cacheCreationInputTokens?: number,
6497
cacheReadInputTokens?: number,
98+
serviceTier?: ServiceTier,
6599
): ApiCostResult {
66100
const cacheCreationInputTokensNum = cacheCreationInputTokens || 0
67101
const cacheReadInputTokensNum = cacheReadInputTokens || 0
68102
const nonCachedInputTokens = Math.max(0, inputTokens - cacheCreationInputTokensNum - cacheReadInputTokensNum)
103+
const effectiveModelInfo = applyLongContextPricing(modelInfo, inputTokens, serviceTier)
69104

70105
// For OpenAI: inputTokens ALREADY includes all tokens (cached + non-cached)
71106
// So we pass the original inputTokens as the total
72107
return calculateApiCostInternal(
73-
modelInfo,
108+
effectiveModelInfo,
74109
nonCachedInputTokens,
75110
outputTokens,
76111
cacheCreationInputTokensNum,

0 commit comments

Comments
 (0)