From cc3b3b9adbd3a6556ae2901646be9372d73866f1 Mon Sep 17 00:00:00 2001 From: krishnatejaswis Date: Wed, 6 May 2026 13:11:17 +0530 Subject: [PATCH 1/3] fix: inject cache_control on content blocks for openai-compatible Bedrock proxies MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When setCacheKey: true is set on an @ai-sdk/openai-compatible provider and the model ID contains 'bedrock/', or when cacheStrategy: 'bedrock' is explicitly set, OpenCode now injects cache_control: {type:'ephemeral'} onto message content blocks instead of sending a promptCacheKey request option. promptCacheKey is an OpenAI-native mechanism that Bifrost, LiteLLM, and other proxies routing to AWS Bedrock/Anthropic ignore entirely. These proxies require cache_control on individual content blocks (Anthropic-style), which they then translate to the native backend caching format. Key changes: - applyCompatCaching(): new function that converts string system messages to content block arrays and annotates the last block of system/user messages with cache_control via providerOptions.openaiCompatible — matching what Bifrost and LiteLLM expect on the wire - Guards applyCaching() from running on @ai-sdk/openai-compatible models to prevent the 'claude' model-id heuristic from triggering the wrong caching path - Passes provider options (item.options) into ProviderTransform.message() so setCacheKey / cacheStrategy are available at message-transform time - Adds cacheStrategy: 'bedrock' option to provider config schema - Docs: new section explaining caching for openai-compatible Bedrock proxies --- packages/opencode/src/config/provider.ts | 4 + packages/opencode/src/provider/transform.ts | 65 +++++++++- packages/opencode/src/session/llm.ts | 2 +- .../opencode/test/provider/transform.test.ts | 115 ++++++++++++++++++ packages/web/src/content/docs/config.mdx | 31 ++++- 5 files changed, 213 insertions(+), 4 deletions(-) diff --git a/packages/opencode/src/config/provider.ts b/packages/opencode/src/config/provider.ts index 7821bca5a937..10693c25da6d 100644 --- a/packages/opencode/src/config/provider.ts +++ b/packages/opencode/src/config/provider.ts @@ -87,6 +87,10 @@ export const Info = Schema.Struct({ setCacheKey: Schema.optional(Schema.Boolean).annotate({ description: "Enable promptCacheKey for this provider (default false)", }), + cacheStrategy: Schema.optional(Schema.Literals(["bedrock"])).annotate({ + description: + "Cache strategy for openai-compatible providers. Set to 'bedrock' when routing through a proxy (e.g. Bifrost, LiteLLM) to AWS Bedrock Claude models to inject cache_control: {type: 'ephemeral'} into message content blocks instead of promptCacheKey.", + }), timeout: Schema.optional( Schema.Union([PositiveInt, Schema.Literal(false)]).annotate({ description: diff --git a/packages/opencode/src/provider/transform.ts b/packages/opencode/src/provider/transform.ts index cd29e40822da..24e345b82f02 100644 --- a/packages/opencode/src/provider/transform.ts +++ b/packages/opencode/src/provider/transform.ts @@ -414,7 +414,55 @@ function unsupportedParts(msgs: ModelMessage[], model: Provider.Model): ModelMes }) } -export function message(msgs: ModelMessage[], model: Provider.Model, options: Record) { +// Injects cache_control: { type: "ephemeral" } as a content block property for +// openai-compatible providers (e.g. Bifrost, LiteLLM) that route to Anthropic-capable +// backends. Both proxies accept and forward this to the upstream provider. +// +// Key difference from applyCaching: string system/user messages are converted to +// content block arrays so that cache_control lands on the block itself — not as a +// top-level message field (which proxies ignore for caching purposes). +function applyCompatCaching(msgs: ModelMessage[]): ModelMessage[] { + const cacheOpt = { openaiCompatible: { cache_control: { type: "ephemeral" } } } + const system = msgs.filter((msg) => msg.role === "system").slice(0, 2) + const final = msgs.filter((msg) => msg.role !== "system").slice(-2) + const targets = unique([...system, ...final]) + + return msgs.map((msg) => { + if (!targets.includes(msg)) return msg + + if (msg.role === "system") { + // Convert string system message to a content block array so cache_control + // is on the block, matching what Bifrost/LiteLLM expect: + // content: [{ type: "text", text: "...", providerOptions: { openaiCompatible: { cache_control: {...} } } }] + const block = { type: "text" as const, text: msg.content, providerOptions: cacheOpt } + // Cast: @ai-sdk types content as string but openai-compatible serialises array blocks correctly + return { ...msg, content: [block] } as unknown as ModelMessage + } + + if (msg.role === "user") { + // Normalise to array so cache_control goes on the last content block + const parts: any[] = + typeof msg.content === "string" + ? [{ type: "text" as const, text: msg.content }] + : Array.isArray(msg.content) + ? [...msg.content] + : [] + if (parts.length === 0) return msg + const last = parts[parts.length - 1] + parts[parts.length - 1] = { ...last, providerOptions: mergeDeep(last.providerOptions ?? {}, cacheOpt) } + return { ...msg, content: parts } as ModelMessage + } + + return msg + }) +} + +export function message( + msgs: ModelMessage[], + model: Provider.Model, + options: Record, + providerOpts?: Record, +) { msgs = unsupportedParts(msgs, model) msgs = normalizeMessages(msgs, model, options) if ( @@ -426,10 +474,23 @@ export function message(msgs: ModelMessage[], model: Provider.Model, options: Re model.id.includes("claude") || model.api.npm === "@ai-sdk/anthropic" || model.api.npm === "@ai-sdk/alibaba") && - model.api.npm !== "@ai-sdk/gateway" + model.api.npm !== "@ai-sdk/gateway" && + model.api.npm !== "@ai-sdk/openai-compatible" ) { msgs = applyCaching(msgs, model) } + // For openai-compatible providers (e.g. Bifrost, LiteLLM routing to Bedrock/Anthropic), + // inject cache_control into content blocks when cacheStrategy is "bedrock" or when + // setCacheKey is true and the model id contains "bedrock/". + // This uses Anthropic-style cache_control: { type: "ephemeral" } on content blocks, + // which both Bifrost and LiteLLM accept and translate to the native backend format. + if ( + model.api.npm === "@ai-sdk/openai-compatible" && + (providerOpts?.cacheStrategy === "bedrock" || + (providerOpts?.setCacheKey === true && model.id.toLowerCase().includes("bedrock/"))) + ) { + msgs = applyCompatCaching(msgs) + } // Remap providerOptions keys from stored providerID to expected SDK key const key = sdkKey(model.api.npm) diff --git a/packages/opencode/src/session/llm.ts b/packages/opencode/src/session/llm.ts index e76583f2d347..abc74ad16247 100644 --- a/packages/opencode/src/session/llm.ts +++ b/packages/opencode/src/session/llm.ts @@ -396,7 +396,7 @@ const live: Layer.Layer< async transformParams(args) { if (args.type === "stream") { // @ts-expect-error - args.params.prompt = ProviderTransform.message(args.params.prompt, input.model, options) + args.params.prompt = ProviderTransform.message(args.params.prompt, input.model, options, item.options) } return args.params }, diff --git a/packages/opencode/test/provider/transform.test.ts b/packages/opencode/test/provider/transform.test.ts index c7a321d57199..ce98a86137bf 100644 --- a/packages/opencode/test/provider/transform.test.ts +++ b/packages/opencode/test/provider/transform.test.ts @@ -120,6 +120,121 @@ describe("ProviderTransform.options - setCacheKey", () => { }) }) +describe("ProviderTransform.message - openai-compatible Bedrock caching", () => { + const createCompatModel = (modelId: string, providerID = "my-bifrost") => + ({ + id: modelId, + providerID, + api: { + id: modelId, + url: "https://bifrost.example.com/api/v1", + npm: "@ai-sdk/openai-compatible", + }, + name: "Claude via Bifrost", + capabilities: { + temperature: true, + reasoning: false, + attachment: true, + toolcall: true, + input: { text: true, audio: false, image: true, video: false, pdf: true }, + output: { text: true, audio: false, image: false, video: false, pdf: false }, + interleaved: false, + }, + cost: { input: 0.003, output: 0.015, cache: { read: 0.0003, write: 0.00375 } }, + limit: { context: 200000, output: 8192 }, + status: "active", + options: {}, + headers: {}, + }) as any + + const makeMsgs = () => + [ + { role: "system", content: "You are a helpful assistant." }, + { role: "user", content: [{ type: "text", text: "Hello" }] }, + ] as any[] + + const cacheOpt = { type: "ephemeral" } + + test("string system message is converted to content block array with cache_control on last block", () => { + const model = createCompatModel("my-bifrost/bedrock/anthropic.claude-sonnet-4") + const result = ProviderTransform.message(makeMsgs(), model, {}, { cacheStrategy: "bedrock" }) as any[] + const sysMsg = result.find((m: any) => m.role === "system") + // String content must become a content block array so cache_control is on the block, + // not a top-level message field (which proxies like Bifrost ignore for caching) + expect(Array.isArray(sysMsg?.content)).toBe(true) + expect(sysMsg.content[0].type).toBe("text") + expect(sysMsg.content[0].text).toBe("You are a helpful assistant.") + expect(sysMsg.content[0].providerOptions?.openaiCompatible?.cache_control).toEqual(cacheOpt) + // must NOT be on the message object itself + expect(sysMsg.providerOptions?.openaiCompatible?.cache_control).toBeUndefined() + }) + + test("user message content block gets cache_control on last block", () => { + const model = createCompatModel("my-bifrost/bedrock/anthropic.claude-sonnet-4") + const result = ProviderTransform.message(makeMsgs(), model, {}, { cacheStrategy: "bedrock" }) as any[] + const userMsg = result.find((m: any) => m.role === "user") + expect(Array.isArray(userMsg?.content)).toBe(true) + const lastBlock = userMsg.content[userMsg.content.length - 1] + expect(lastBlock.providerOptions?.openaiCompatible?.cache_control).toEqual(cacheOpt) + // must NOT be on the message object itself + expect(userMsg.providerOptions?.openaiCompatible?.cache_control).toBeUndefined() + }) + + test("auto-triggers on setCacheKey: true when model id contains bedrock/", () => { + const model = createCompatModel("my-bifrost/bedrock/anthropic.claude-sonnet-4") + const result = ProviderTransform.message(makeMsgs(), model, {}, { setCacheKey: true }) as any[] + const sysMsg = result.find((m: any) => m.role === "system") + expect(Array.isArray(sysMsg?.content)).toBe(true) + expect(sysMsg.content[0].providerOptions?.openaiCompatible?.cache_control).toEqual(cacheOpt) + }) + + test("does not inject cache_control for openai-compatible without cacheStrategy or bedrock model id", () => { + const model = createCompatModel("my-provider/gpt-4o") + const result = ProviderTransform.message(makeMsgs(), model, {}, { setCacheKey: true }) as any[] + const sysMsg = result.find((m: any) => m.role === "system") + // system message must stay as a plain string — no content block conversion + expect(typeof sysMsg?.content).toBe("string") + expect(sysMsg?.providerOptions?.openaiCompatible?.cache_control).toBeUndefined() + }) + + test("does not inject cache_control when no providerOpts are passed", () => { + const model = createCompatModel("my-bifrost/bedrock/amazon.nova-pro") + const result = ProviderTransform.message(makeMsgs(), model, {}) as any[] + const sysMsg = result.find((m: any) => m.role === "system") + expect(typeof sysMsg?.content).toBe("string") + expect(sysMsg?.providerOptions?.openaiCompatible?.cache_control).toBeUndefined() + }) + + test("native anthropic provider is unaffected by cacheStrategy option", () => { + const anthropicModel = { + ...createCompatModel("anthropic/claude-sonnet-4", "anthropic"), + api: { id: "claude-sonnet-4", url: "https://api.anthropic.com", npm: "@ai-sdk/anthropic" }, + } + const result = ProviderTransform.message(makeMsgs(), anthropicModel, {}, { cacheStrategy: "bedrock" }) as any[] + const sysMsg = result.find((m: any) => m.role === "system") + // Native anthropic path: cache hint is at message-level providerOptions, not converted to block + expect(sysMsg?.providerOptions?.anthropic?.cacheControl).toEqual({ type: "ephemeral" }) + }) + + test("multi-part user message: only last content block gets cache_control", () => { + const model = createCompatModel("my-bifrost/bedrock/anthropic.claude-sonnet-4") + const msgs = [ + { role: "system", content: "System prompt." }, + { + role: "user", + content: [ + { type: "text", text: "First part" }, + { type: "text", text: "Second part" }, + ], + }, + ] as any[] + const result = ProviderTransform.message(msgs, model, {}, { cacheStrategy: "bedrock" }) as any[] + const userMsg = result.find((m: any) => m.role === "user") + expect(userMsg.content[0].providerOptions?.openaiCompatible?.cache_control).toBeUndefined() + expect(userMsg.content[1].providerOptions?.openaiCompatible?.cache_control).toEqual(cacheOpt) + }) +}) + describe("ProviderTransform.options - zai/zhipuai thinking", () => { const sessionID = "test-session-123" diff --git a/packages/web/src/content/docs/config.mdx b/packages/web/src/content/docs/config.mdx index 8568ffbb9e08..451a824ac9f5 100644 --- a/packages/web/src/content/docs/config.mdx +++ b/packages/web/src/content/docs/config.mdx @@ -360,7 +360,7 @@ You can configure the providers and models you want to use in your OpenCode conf The `small_model` option configures a separate model for lightweight tasks like title generation. By default, OpenCode tries to use a cheaper model if one is available from your provider, otherwise it falls back to your main model. -Provider options can include `timeout`, `chunkTimeout`, and `setCacheKey`: +Provider options can include `timeout`, `chunkTimeout`, `setCacheKey`, and `cacheStrategy`: ```json title="opencode.json" { @@ -380,6 +380,7 @@ Provider options can include `timeout`, `chunkTimeout`, and `setCacheKey`: - `timeout` - Request timeout in milliseconds (default: 300000). Set to `false` to disable. - `chunkTimeout` - Timeout in milliseconds between streamed response chunks. If no chunk arrives in time, the request is aborted. - `setCacheKey` - Ensure a cache key is always set for designated provider. +- `cacheStrategy` - Cache strategy for openai-compatible providers. Set to `"bedrock"` when routing through a proxy (e.g. Bifrost, LiteLLM) to AWS Bedrock Claude models. This injects `cache_control: {type: "ephemeral"}` into message content blocks instead of `promptCacheKey`, matching how native Bedrock/Anthropic providers handle caching. See [Caching with OpenAI-Compatible Bedrock Proxies](#caching-with-openai-compatible-bedrock-proxies) below. You can also configure [local models](/docs/models#local). [Learn more](/docs/models). @@ -420,6 +421,34 @@ Bearer tokens (`AWS_BEARER_TOKEN_BEDROCK` or `/connect`) take precedence over pr --- +##### Caching with OpenAI-Compatible Bedrock Proxies + +When routing Claude models through an OpenAI-compatible proxy (e.g. [Bifrost](https://github.com/maximhq/bifrost), [LiteLLM](https://github.com/BerriAI/litellm)) to AWS Bedrock, standard `setCacheKey` sends a `promptCacheKey` header — which is not the correct caching mechanism for Bedrock Claude. Bedrock requires `cache_control: {type: "ephemeral"}` injected into message content blocks. + +Set `cacheStrategy: "bedrock"` to enable this: + +```json title="opencode.json" +{ + "$schema": "https://opencode.ai/config.json", + "provider": { + "my-bifrost": { + "npm": "@ai-sdk/openai-compatible", + "options": { + "baseURL": "https://your-bifrost-endpoint/api/v1", + "apiKey": "your-key", + "cacheStrategy": "bedrock" + } + } + } +} +``` + +This is equivalent to how native `@ai-sdk/anthropic` and `@ai-sdk/amazon-bedrock` providers handle caching internally. + +You can also trigger the bedrock cache strategy automatically without `cacheStrategy` by setting `setCacheKey: true` and using a model ID that contains `bedrock/` (e.g. `my-bifrost/bedrock/anthropic.claude-sonnet-4`). + +--- + ### Themes Set your UI theme in `tui.json`. From 96371fbb9dc370246909619a79d130f492217c2a Mon Sep 17 00:00:00 2001 From: krishnatejaswis Date: Sun, 10 May 2026 10:39:46 +0530 Subject: [PATCH 2/3] fix: replace any[] with proper UserContent types in applyCompatCaching --- packages/opencode/src/provider/transform.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/opencode/src/provider/transform.ts b/packages/opencode/src/provider/transform.ts index bb90eb5ad524..2941ce468fa5 100644 --- a/packages/opencode/src/provider/transform.ts +++ b/packages/opencode/src/provider/transform.ts @@ -1,4 +1,4 @@ -import type { ModelMessage, ToolResultPart } from "ai" +import type { ModelMessage, TextPart, ImagePart, FilePart, ToolResultPart } from "ai" import { mergeDeep, unique } from "remeda" import type { JSONSchema7 } from "@ai-sdk/provider" import type { JSONSchema } from "zod/v4/core" @@ -455,7 +455,7 @@ function applyCompatCaching(msgs: ModelMessage[]): ModelMessage[] { if (msg.role === "user") { // Normalise to array so cache_control goes on the last content block - const parts: any[] = + const parts: Array = typeof msg.content === "string" ? [{ type: "text" as const, text: msg.content }] : Array.isArray(msg.content) From cc502c674ccf7eb490e099176b91e59255c0d115 Mon Sep 17 00:00:00 2001 From: krishnatejaswis Date: Sun, 10 May 2026 11:18:46 +0530 Subject: [PATCH 3/3] refactor: align applyCompatCaching with project style guide --- packages/opencode/src/config/provider.ts | 5 ++- packages/opencode/src/provider/transform.ts | 37 ++++++++------------- 2 files changed, 15 insertions(+), 27 deletions(-) diff --git a/packages/opencode/src/config/provider.ts b/packages/opencode/src/config/provider.ts index 28d288dbd980..638303687838 100644 --- a/packages/opencode/src/config/provider.ts +++ b/packages/opencode/src/config/provider.ts @@ -88,9 +88,8 @@ export const Info = Schema.Struct({ setCacheKey: Schema.optional(Schema.Boolean).annotate({ description: "Enable promptCacheKey for this provider (default false)", }), - cacheStrategy: Schema.optional(Schema.Literals(["bedrock"])).annotate({ - description: - "Cache strategy for openai-compatible providers. Set to 'bedrock' when routing through a proxy (e.g. Bifrost, LiteLLM) to AWS Bedrock Claude models to inject cache_control: {type: 'ephemeral'} into message content blocks instead of promptCacheKey.", + cacheStrategy: Schema.optional(Schema.Literal("bedrock")).annotate({ + description: "Cache injection strategy for openai-compatible providers proxying Bedrock. 'bedrock' injects cache_control blocks instead of promptCacheKey.", }), timeout: Schema.optional( Schema.Union([PositiveInt, Schema.Literal(false)]).annotate({ diff --git a/packages/opencode/src/provider/transform.ts b/packages/opencode/src/provider/transform.ts index 2941ce468fa5..4f6b06adf674 100644 --- a/packages/opencode/src/provider/transform.ts +++ b/packages/opencode/src/provider/transform.ts @@ -428,33 +428,25 @@ function unsupportedParts(msgs: ModelMessage[], model: Provider.Model): ModelMes }) } -// Injects cache_control: { type: "ephemeral" } as a content block property for -// openai-compatible providers (e.g. Bifrost, LiteLLM) that route to Anthropic-capable -// backends. Both proxies accept and forward this to the upstream provider. -// -// Key difference from applyCaching: string system/user messages are converted to -// content block arrays so that cache_control lands on the block itself — not as a -// top-level message field (which proxies ignore for caching purposes). +// Injects cache_control on content blocks for openai-compatible providers routing to +// Anthropic-capable backends (e.g. Bifrost, LiteLLM, Databricks). +// Unlike applyCaching (which mutates messages in-place), this returns a remapped array +// with new message objects so that string system/user content is converted to block arrays — +// proxies only honour cache_control when it is on the block itself, not a top-level field. function applyCompatCaching(msgs: ModelMessage[]): ModelMessage[] { const cacheOpt = { openaiCompatible: { cache_control: { type: "ephemeral" } } } const system = msgs.filter((msg) => msg.role === "system").slice(0, 2) const final = msgs.filter((msg) => msg.role !== "system").slice(-2) - const targets = unique([...system, ...final]) return msgs.map((msg) => { - if (!targets.includes(msg)) return msg + if (!unique([...system, ...final]).includes(msg)) return msg if (msg.role === "system") { - // Convert string system message to a content block array so cache_control - // is on the block, matching what Bifrost/LiteLLM expect: - // content: [{ type: "text", text: "...", providerOptions: { openaiCompatible: { cache_control: {...} } } }] - const block = { type: "text" as const, text: msg.content, providerOptions: cacheOpt } - // Cast: @ai-sdk types content as string but openai-compatible serialises array blocks correctly - return { ...msg, content: [block] } as unknown as ModelMessage + // @ai-sdk types system content as string; cast is intentional — proxies serialise block arrays correctly + return { ...msg, content: [{ type: "text" as const, text: msg.content, providerOptions: cacheOpt }] } as unknown as ModelMessage } if (msg.role === "user") { - // Normalise to array so cache_control goes on the last content block const parts: Array = typeof msg.content === "string" ? [{ type: "text" as const, text: msg.content }] @@ -462,11 +454,11 @@ function applyCompatCaching(msgs: ModelMessage[]): ModelMessage[] { ? [...msg.content] : [] if (parts.length === 0) return msg - const last = parts[parts.length - 1] - parts[parts.length - 1] = { ...last, providerOptions: mergeDeep(last.providerOptions ?? {}, cacheOpt) } + parts[parts.length - 1] = { ...parts[parts.length - 1], providerOptions: mergeDeep(parts[parts.length - 1].providerOptions ?? {}, cacheOpt) } return { ...msg, content: parts } as ModelMessage } + // assistant/tool messages in targets are intentionally skipped return msg }) } @@ -475,7 +467,7 @@ export function message( msgs: ModelMessage[], model: Provider.Model, options: Record, - providerOpts?: Record, + providerOpts?: Record, ) { msgs = unsupportedParts(msgs, model) msgs = normalizeMessages(msgs, model, options) @@ -493,11 +485,8 @@ export function message( ) { msgs = applyCaching(msgs, model) } - // For openai-compatible providers (e.g. Bifrost, LiteLLM routing to Bedrock/Anthropic), - // inject cache_control into content blocks when cacheStrategy is "bedrock" or when - // setCacheKey is true and the model id contains "bedrock/". - // This uses Anthropic-style cache_control: { type: "ephemeral" } on content blocks, - // which both Bifrost and LiteLLM accept and translate to the native backend format. + // For openai-compatible providers, inject cache_control on content blocks when + // cacheStrategy is "bedrock" or setCacheKey is true with a bedrock/ model id. if ( model.api.npm === "@ai-sdk/openai-compatible" && (providerOpts?.cacheStrategy === "bedrock" ||