Add OpenAI GPT-5.3 chat latest and GPT-5.4 model support (#11848)

PeterDaveHello · web-flow · commit 0612739ba1bc · 2026-03-05T14:15:55.000-07:00
diff --git a/.changeset/gpt-5-3-chat-latest.md b/.changeset/gpt-5-3-chat-latest.md
@@ -0,0 +1,5 @@
+---
+"roo-cline": patch
+---
+
+Add OpenAI's GPT-5.3-Chat-Latest model support
diff --git a/.changeset/gpt-5-4.md b/.changeset/gpt-5-4.md
@@ -0,0 +1,5 @@
+---
+"roo-cline": patch
+---
+
+Add OpenAI's GPT-5.4 model support
diff --git a/packages/types/src/model.ts b/packages/types/src/model.ts
@@ -98,6 +98,16 @@ export const modelInfoSchema = z.object({
 	outputPrice: z.number().optional(),
 	cacheWritesPrice: z.number().optional(),
 	cacheReadsPrice: z.number().optional(),
+	longContextPricing: z
+		.object({
+			thresholdTokens: z.number(),
+			inputPriceMultiplier: z.number().optional(),
+			outputPriceMultiplier: z.number().optional(),
+			cacheWritesPriceMultiplier: z.number().optional(),
+			cacheReadsPriceMultiplier: z.number().optional(),
+			appliesToServiceTiers: z.array(serviceTierSchema).optional(),
+		})
+		.optional(),
 	description: z.string().optional(),
 	// Default effort value for models that support reasoning effort
 	reasoningEffort: reasoningEffortExtendedSchema.optional(),
diff --git a/packages/types/src/providers/openai.ts b/packages/types/src/providers/openai.ts
@@ -24,6 +24,32 @@ export const openAiNativeModels = {
 		description:
 			"GPT-5.1 Codex Max: Our most intelligent coding model optimized for long-horizon, agentic coding tasks",
 	},
+	"gpt-5.4": {
+		maxTokens: 128000,
+		contextWindow: 1_050_000,
+		includedTools: ["apply_patch"],
+		excludedTools: ["apply_diff", "write_to_file"],
+		supportsImages: true,
+		supportsPromptCache: true,
+		supportsReasoningEffort: ["none", "low", "medium", "high", "xhigh"],
+		reasoningEffort: "none",
+		inputPrice: 2.5,
+		outputPrice: 15.0,
+		cacheReadsPrice: 0.25,
+		longContextPricing: {
+			thresholdTokens: 272_000,
+			inputPriceMultiplier: 2,
+			outputPriceMultiplier: 1.5,
+			appliesToServiceTiers: ["default", "flex"],
+		},
+		supportsVerbosity: true,
+		supportsTemperature: false,
+		tiers: [
+			{ name: "flex", contextWindow: 1_050_000, inputPrice: 1.25, outputPrice: 7.5, cacheReadsPrice: 0.125 },
+			{ name: "priority", contextWindow: 1_050_000, inputPrice: 5.0, outputPrice: 30.0, cacheReadsPrice: 0.5 },
+		],
+		description: "GPT-5.4: Our most capable model for professional work",
+	},
 	"gpt-5.2": {
 		maxTokens: 128000,
 		contextWindow: 400000,
@@ -93,6 +119,18 @@ export const openAiNativeModels = {
 		cacheReadsPrice: 0.175,
 		description: "GPT-5.2 Chat: Optimized for conversational AI and chat use cases",
 	},
+	"gpt-5.3-chat-latest": {
+		maxTokens: 16_384,
+		contextWindow: 128_000,
+		includedTools: ["apply_patch"],
+		excludedTools: ["apply_diff", "write_to_file"],
+		supportsImages: true,
+		supportsPromptCache: true,
+		inputPrice: 1.75,
+		outputPrice: 14.0,
+		cacheReadsPrice: 0.175,
+		description: "GPT-5.3 Chat: Optimized for conversational AI and chat use cases",
+	},
 	"gpt-5.1": {
 		maxTokens: 128000,
 		contextWindow: 400000,
diff --git a/src/api/providers/__tests__/openai-native-usage.spec.ts b/src/api/providers/__tests__/openai-native-usage.spec.ts
@@ -8,6 +8,10 @@ describe("OpenAiNativeHandler - normalizeUsage", () => {
 		id: "gpt-4o",
 		info: openAiNativeModels["gpt-4o"],
 	}
+	const gpt54Model = {
+		id: "gpt-5.4",
+		info: openAiNativeModels["gpt-5.4"],
+	}
 
 	beforeEach(() => {
 		handler = new OpenAiNativeHandler({
@@ -378,6 +382,12 @@ describe("OpenAiNativeHandler - normalizeUsage", () => {
 
 			const fourOBody = buildRequestBodyForModel("gpt-4o")
 			expect(fourOBody.prompt_cache_retention).toBeUndefined()
+
+			const gpt54Body = buildRequestBodyForModel("gpt-5.4")
+			expect(gpt54Body.prompt_cache_retention).toBeUndefined()
+
+			const chatModelBody = buildRequestBodyForModel("gpt-5.3-chat-latest")
+			expect(chatModelBody.prompt_cache_retention).toBeUndefined()
 		})
 
 		it("should not set prompt_cache_retention when the model does not support prompt caching", () => {
@@ -418,5 +428,64 @@ describe("OpenAiNativeHandler - normalizeUsage", () => {
 			expect(result.totalCost).toBeGreaterThan(0)
 			// Cost should be calculated with full input tokens since no cache reads
 		})
+
+		it("should use standard GPT-5.4 pricing within the base context threshold", () => {
+			const usage = {
+				input_tokens: 100_000,
+				output_tokens: 1_000,
+				cache_read_input_tokens: 20_000,
+			}
+
+			const result = (handler as any).normalizeUsage(usage, gpt54Model)
+
+			expect(result).toMatchObject({
+				type: "usage",
+				inputTokens: 100_000,
+				outputTokens: 1_000,
+				cacheReadTokens: 20_000,
+			})
+			expect(result.totalCost).toBeCloseTo(0.22, 6)
+		})
+
+		it("should apply GPT-5.4 long-context pricing above the threshold", () => {
+			const usage = {
+				input_tokens: 300_000,
+				output_tokens: 1_000,
+				cache_read_input_tokens: 100_000,
+			}
+
+			const result = (handler as any).normalizeUsage(usage, gpt54Model)
+
+			expect(result).toMatchObject({
+				type: "usage",
+				inputTokens: 300_000,
+				outputTokens: 1_000,
+				cacheReadTokens: 100_000,
+			})
+			expect(result.totalCost).toBeCloseTo(1.0475, 6)
+		})
+
+		it("should not apply GPT-5.4 long-context pricing to priority tier", () => {
+			handler = new OpenAiNativeHandler({
+				openAiNativeApiKey: "test-key",
+				openAiNativeServiceTier: "priority",
+			})
+
+			const usage = {
+				input_tokens: 300_000,
+				output_tokens: 1_000,
+				cache_read_input_tokens: 100_000,
+			}
+
+			const result = (handler as any).normalizeUsage(usage, gpt54Model)
+
+			expect(result).toMatchObject({
+				type: "usage",
+				inputTokens: 300_000,
+				outputTokens: 1_000,
+				cacheReadTokens: 100_000,
+			})
+			expect(result.totalCost).toBeCloseTo(1.08, 6)
+		})
 	})
 })
diff --git a/src/api/providers/__tests__/openai-native.spec.ts b/src/api/providers/__tests__/openai-native.spec.ts
@@ -249,6 +249,34 @@ describe("OpenAiNativeHandler", () => {
 			expect(modelInfo.info.supportsReasoningEffort).toEqual(["low", "medium", "high", "xhigh"])
 		})
 
+		it("should return GPT-5.4 model info when selected", () => {
+			const gpt54Handler = new OpenAiNativeHandler({
+				...mockOptions,
+				apiModelId: "gpt-5.4",
+			})
+
+			const modelInfo = gpt54Handler.getModel()
+			expect(modelInfo.id).toBe("gpt-5.4")
+			expect(modelInfo.info.maxTokens).toBe(128000)
+			expect(modelInfo.info.contextWindow).toBe(1_050_000)
+			expect(modelInfo.info.supportsVerbosity).toBe(true)
+			expect(modelInfo.info.supportsReasoningEffort).toEqual(["none", "low", "medium", "high", "xhigh"])
+			expect(modelInfo.info.reasoningEffort).toBe("none")
+		})
+
+		it("should return GPT-5.3 Chat model info when selected", () => {
+			const chatHandler = new OpenAiNativeHandler({
+				...mockOptions,
+				apiModelId: "gpt-5.3-chat-latest",
+			})
+
+			const modelInfo = chatHandler.getModel()
+			expect(modelInfo.id).toBe("gpt-5.3-chat-latest")
+			expect(modelInfo.info.maxTokens).toBe(16_384)
+			expect(modelInfo.info.contextWindow).toBe(128000)
+			expect(modelInfo.info.supportsImages).toBe(true)
+		})
+
 		it("should handle undefined model ID", () => {
 			const handlerWithoutModel = new OpenAiNativeHandler({
 				openAiNativeApiKey: "test-api-key",
@@ -345,6 +373,107 @@ describe("OpenAiNativeHandler", () => {
 			expect(textChunks[1].text).toBe(" world")
 		})
 
+		it("should handle GPT-5.4 model with Responses API", async () => {
+			const mockFetch = vitest.fn().mockResolvedValue({
+				ok: true,
+				body: new ReadableStream({
+					start(controller) {
+						controller.enqueue(
+							new TextEncoder().encode(
+								'data: {"type":"response.output_item.added","item":{"type":"text","text":"GPT-5.4 reply"}}\n\n',
+							),
+						)
+						controller.enqueue(new TextEncoder().encode("data: [DONE]\n\n"))
+						controller.close()
+					},
+				}),
+			})
+			global.fetch = mockFetch as any
+
+			mockResponsesCreate.mockRejectedValue(new Error("SDK not available"))
+
+			handler = new OpenAiNativeHandler({
+				...mockOptions,
+				apiModelId: "gpt-5.4",
+			})
+
+			const stream = handler.createMessage(systemPrompt, messages)
+			const chunks: any[] = []
+			for await (const chunk of stream) {
+				chunks.push(chunk)
+			}
+
+			expect(mockFetch).toHaveBeenCalledWith(
+				"https://api.openai.com/v1/responses",
+				expect.objectContaining({
+					body: expect.any(String),
+				}),
+			)
+			const body = (mockFetch.mock.calls[0][1] as any).body as string
+			const parsedBody = JSON.parse(body)
+			expect(parsedBody.model).toBe("gpt-5.4")
+			expect(parsedBody.max_output_tokens).toBe(128000)
+			expect(parsedBody.temperature).toBeUndefined()
+			expect(parsedBody.include).toEqual(["reasoning.encrypted_content"])
+			expect(parsedBody.reasoning?.effort).toBe("none")
+			expect(parsedBody.text?.verbosity).toBe("medium")
+
+			const textChunks = chunks.filter((chunk) => chunk.type === "text")
+			expect(textChunks).toHaveLength(1)
+			expect(textChunks[0].text).toBe("GPT-5.4 reply")
+		})
+
+		it("should handle GPT-5.3 Chat model with Responses API", async () => {
+			// Mock fetch for Responses API
+			const mockFetch = vitest.fn().mockResolvedValue({
+				ok: true,
+				body: new ReadableStream({
+					start(controller) {
+						controller.enqueue(
+							new TextEncoder().encode(
+								'data: {"type":"response.output_item.added","item":{"type":"text","text":"Chat reply"}}\n\n',
+							),
+						)
+						controller.enqueue(new TextEncoder().encode("data: [DONE]\n\n"))
+						controller.close()
+					},
+				}),
+			})
+			global.fetch = mockFetch as any
+
+			// Mock SDK to fail so it uses fetch
+			mockResponsesCreate.mockRejectedValue(new Error("SDK not available"))
+
+			handler = new OpenAiNativeHandler({
+				...mockOptions,
+				apiModelId: "gpt-5.3-chat-latest",
+			})
+
+			const stream = handler.createMessage(systemPrompt, messages)
+			const chunks: any[] = []
+			for await (const chunk of stream) {
+				chunks.push(chunk)
+			}
+
+			expect(mockFetch).toHaveBeenCalledWith(
+				"https://api.openai.com/v1/responses",
+				expect.objectContaining({
+					body: expect.any(String),
+				}),
+			)
+			const body = (mockFetch.mock.calls[0][1] as any).body as string
+			const parsedBody = JSON.parse(body)
+			expect(parsedBody.model).toBe("gpt-5.3-chat-latest")
+			expect(parsedBody.max_output_tokens).toBe(16_384)
+			expect(parsedBody.temperature).toBe(0)
+			expect(parsedBody.reasoning?.effort).toBeUndefined()
+			expect(parsedBody.text?.verbosity).toBeUndefined()
+
+			const textChunks = chunks.filter((chunk) => chunk.type === "text")
+			expect(textChunks).toHaveLength(1)
+			expect(textChunks[0].text).toBe("Chat reply")
+		})
+
 		it("should handle GPT-5-mini model with Responses API", async () => {
 			// Mock fetch for Responses API
 			const mockFetch = vitest.fn().mockResolvedValue({
diff --git a/src/api/providers/openai-native.ts b/src/api/providers/openai-native.ts
@@ -148,6 +148,7 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 			totalOutputTokens,
 			cacheWriteTokens,
 			cacheReadTokens,
+			effectiveTier,
 		)
 
 		const reasoningTokens =
diff --git a/src/shared/cost.ts b/src/shared/cost.ts
@@ -1,11 +1,44 @@
 import type { ModelInfo } from "@roo-code/types"
+import type { ServiceTier } from "@roo-code/types"
 
 export interface ApiCostResult {
 	totalInputTokens: number
 	totalOutputTokens: number
 	totalCost: number
 }
 
+function applyLongContextPricing(modelInfo: ModelInfo, totalInputTokens: number, serviceTier?: ServiceTier): ModelInfo {
+	const pricing = modelInfo.longContextPricing
+	if (!pricing || totalInputTokens <= pricing.thresholdTokens) {
+		return modelInfo
+	}
+
+	const effectiveServiceTier = serviceTier ?? "default"
+	if (pricing.appliesToServiceTiers && !pricing.appliesToServiceTiers.includes(effectiveServiceTier)) {
+		return modelInfo
+	}
+
+	return {
+		...modelInfo,
+		inputPrice:
+			modelInfo.inputPrice !== undefined && pricing.inputPriceMultiplier !== undefined
+				? modelInfo.inputPrice * pricing.inputPriceMultiplier
+				: modelInfo.inputPrice,
+		outputPrice:
+			modelInfo.outputPrice !== undefined && pricing.outputPriceMultiplier !== undefined
+				? modelInfo.outputPrice * pricing.outputPriceMultiplier
+				: modelInfo.outputPrice,
+		cacheWritesPrice:
+			modelInfo.cacheWritesPrice !== undefined && pricing.cacheWritesPriceMultiplier !== undefined
+				? modelInfo.cacheWritesPrice * pricing.cacheWritesPriceMultiplier
+				: modelInfo.cacheWritesPrice,
+		cacheReadsPrice:
+			modelInfo.cacheReadsPrice !== undefined && pricing.cacheReadsPriceMultiplier !== undefined
+				? modelInfo.cacheReadsPrice * pricing.cacheReadsPriceMultiplier
+				: modelInfo.cacheReadsPrice,
+	}
+}
+
 function calculateApiCostInternal(
 	modelInfo: ModelInfo,
 	inputTokens: number,
@@ -62,15 +95,17 @@ export function calculateApiCostOpenAI(
 	outputTokens: number,
 	cacheCreationInputTokens?: number,
 	cacheReadInputTokens?: number,
+	serviceTier?: ServiceTier,
 ): ApiCostResult {
 	const cacheCreationInputTokensNum = cacheCreationInputTokens || 0
 	const cacheReadInputTokensNum = cacheReadInputTokens || 0
 	const nonCachedInputTokens = Math.max(0, inputTokens - cacheCreationInputTokensNum - cacheReadInputTokensNum)
+	const effectiveModelInfo = applyLongContextPricing(modelInfo, inputTokens, serviceTier)
 
 	// For OpenAI: inputTokens ALREADY includes all tokens (cached + non-cached)
 	// So we pass the original inputTokens as the total
 	return calculateApiCostInternal(
-		modelInfo,
+		effectiveModelInfo,
 		nonCachedInputTokens,
 		outputTokens,
 		cacheCreationInputTokensNum,
diff --git a/src/utils/__tests__/cost.spec.ts b/src/utils/__tests__/cost.spec.ts

-Original file line number
+Diff line change
@@ @@ -0,0 +1,5 @@ @@
 +---
 +"roo-cline": patch
 +---
++
 +Add OpenAI's GPT-5.3-Chat-Latest model support
Original file line number	Diff line number	Diff line change
`@@ -148,6 +148,7 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio`
`148`	`148`	`totalOutputTokens,`
`149`	`149`	`cacheWriteTokens,`
`150`	`150`	`cacheReadTokens,`
	`151`	`+ effectiveTier,`
`151`	`152`	`)`
`152`	`153`
`153`	`154`	`const reasoningTokens =`