diff --git a/packages/opencode/src/config/provider.ts b/packages/opencode/src/config/provider.ts index a7b6fefc5b21..6b5ab377991d 100644 --- a/packages/opencode/src/config/provider.ts +++ b/packages/opencode/src/config/provider.ts @@ -89,6 +89,9 @@ export const Info = Schema.Struct({ setCacheKey: Schema.optional(Schema.Boolean).annotate({ description: "Enable promptCacheKey for this provider (default false)", }), + cacheStrategy: Schema.optional(Schema.Literal("bedrock")).annotate({ + description: "Cache injection strategy for openai-compatible providers proxying Bedrock. 'bedrock' injects cache_control blocks instead of promptCacheKey.", + }), timeout: Schema.optional( Schema.Union([PositiveInt, Schema.Literal(false)]).annotate({ description: "Timeout in milliseconds for full requests to this provider. Set to false to disable timeout.", diff --git a/packages/opencode/src/provider/transform.ts b/packages/opencode/src/provider/transform.ts index c791aebf9713..09a63ae211b6 100644 --- a/packages/opencode/src/provider/transform.ts +++ b/packages/opencode/src/provider/transform.ts @@ -1,4 +1,4 @@ -import type { ModelMessage, ToolResultPart } from "ai" +import type { ModelMessage, TextPart, ImagePart, FilePart, ToolResultPart } from "ai" import { mergeDeep, unique } from "remeda" import type { JSONSchema7 } from "@ai-sdk/provider" import type * as Provider from "./provider" @@ -431,7 +431,47 @@ function unsupportedParts(msgs: ModelMessage[], model: Provider.Model): ModelMes }) } -export function message(msgs: ModelMessage[], model: Provider.Model, options: Record) { +// Injects cache_control on content blocks for openai-compatible providers routing to +// Anthropic-capable backends (e.g. Bifrost, LiteLLM, Databricks). +// Unlike applyCaching (which mutates messages in-place), this returns a remapped array +// with new message objects so that string system/user content is converted to block arrays — +// proxies only honour cache_control when it is on the block itself, not a top-level field. +function applyCompatCaching(msgs: ModelMessage[]): ModelMessage[] { + const cacheOpt = { openaiCompatible: { cache_control: { type: "ephemeral" } } } + const system = msgs.filter((msg) => msg.role === "system").slice(0, 2) + const final = msgs.filter((msg) => msg.role !== "system").slice(-2) + + return msgs.map((msg) => { + if (!unique([...system, ...final]).includes(msg)) return msg + + if (msg.role === "system") { + // @ai-sdk types system content as string; cast is intentional — proxies serialise block arrays correctly + return { ...msg, content: [{ type: "text" as const, text: msg.content, providerOptions: cacheOpt }] } as unknown as ModelMessage + } + + if (msg.role === "user") { + const parts: Array = + typeof msg.content === "string" + ? [{ type: "text" as const, text: msg.content }] + : Array.isArray(msg.content) + ? [...msg.content] + : [] + if (parts.length === 0) return msg + parts[parts.length - 1] = { ...parts[parts.length - 1], providerOptions: mergeDeep(parts[parts.length - 1].providerOptions ?? {}, cacheOpt) } + return { ...msg, content: parts } as ModelMessage + } + + // assistant/tool messages in targets are intentionally skipped + return msg + }) +} + +export function message( + msgs: ModelMessage[], + model: Provider.Model, + options: Record, + providerOpts?: Record, +) { msgs = unsupportedParts(msgs, model) msgs = normalizeMessages(msgs, model, options) if ( @@ -443,10 +483,20 @@ export function message(msgs: ModelMessage[], model: Provider.Model, options: Re model.id.includes("claude") || model.api.npm === "@ai-sdk/anthropic" || model.api.npm === "@ai-sdk/alibaba") && - model.api.npm !== "@ai-sdk/gateway" + model.api.npm !== "@ai-sdk/gateway" && + model.api.npm !== "@ai-sdk/openai-compatible" ) { msgs = applyCaching(msgs, model) } + // For openai-compatible providers, inject cache_control on content blocks when + // cacheStrategy is "bedrock" or setCacheKey is true with a bedrock/ model id. + if ( + model.api.npm === "@ai-sdk/openai-compatible" && + (providerOpts?.cacheStrategy === "bedrock" || + (providerOpts?.setCacheKey === true && model.id.toLowerCase().includes("bedrock/"))) + ) { + msgs = applyCompatCaching(msgs) + } // Remap providerOptions keys from stored providerID to expected SDK key const key = sdkKey(model.api.npm) diff --git a/packages/opencode/src/session/llm.ts b/packages/opencode/src/session/llm.ts index ae790a50f1bf..124e8c2119f2 100644 --- a/packages/opencode/src/session/llm.ts +++ b/packages/opencode/src/session/llm.ts @@ -305,6 +305,22 @@ const live: Layer.Layer< tool: failed.toolCall.toolName, error: failed.error.message, }), + ...input.model.headers, + ...headers, + }, + maxRetries: input.retries ?? 0, + messages, + model: wrapLanguageModel({ + model: language, + middleware: [ + { + specificationVersion: "v3" as const, + async transformParams(args) { + if (args.type === "stream") { + // @ts-expect-error + args.params.prompt = ProviderTransform.message(args.params.prompt, input.model, options, item.options) + } + return args.params toolName: "invalid", } }, diff --git a/packages/opencode/test/provider/transform.test.ts b/packages/opencode/test/provider/transform.test.ts index 7fb22ddf5770..6e518ed460b3 100644 --- a/packages/opencode/test/provider/transform.test.ts +++ b/packages/opencode/test/provider/transform.test.ts @@ -120,6 +120,121 @@ describe("ProviderTransform.options - setCacheKey", () => { }) }) +describe("ProviderTransform.message - openai-compatible Bedrock caching", () => { + const createCompatModel = (modelId: string, providerID = "my-bifrost") => + ({ + id: modelId, + providerID, + api: { + id: modelId, + url: "https://bifrost.example.com/api/v1", + npm: "@ai-sdk/openai-compatible", + }, + name: "Claude via Bifrost", + capabilities: { + temperature: true, + reasoning: false, + attachment: true, + toolcall: true, + input: { text: true, audio: false, image: true, video: false, pdf: true }, + output: { text: true, audio: false, image: false, video: false, pdf: false }, + interleaved: false, + }, + cost: { input: 0.003, output: 0.015, cache: { read: 0.0003, write: 0.00375 } }, + limit: { context: 200000, output: 8192 }, + status: "active", + options: {}, + headers: {}, + }) as any + + const makeMsgs = () => + [ + { role: "system", content: "You are a helpful assistant." }, + { role: "user", content: [{ type: "text", text: "Hello" }] }, + ] as any[] + + const cacheOpt = { type: "ephemeral" } + + test("string system message is converted to content block array with cache_control on last block", () => { + const model = createCompatModel("my-bifrost/bedrock/anthropic.claude-sonnet-4") + const result = ProviderTransform.message(makeMsgs(), model, {}, { cacheStrategy: "bedrock" }) as any[] + const sysMsg = result.find((m: any) => m.role === "system") + // String content must become a content block array so cache_control is on the block, + // not a top-level message field (which proxies like Bifrost ignore for caching) + expect(Array.isArray(sysMsg?.content)).toBe(true) + expect(sysMsg.content[0].type).toBe("text") + expect(sysMsg.content[0].text).toBe("You are a helpful assistant.") + expect(sysMsg.content[0].providerOptions?.openaiCompatible?.cache_control).toEqual(cacheOpt) + // must NOT be on the message object itself + expect(sysMsg.providerOptions?.openaiCompatible?.cache_control).toBeUndefined() + }) + + test("user message content block gets cache_control on last block", () => { + const model = createCompatModel("my-bifrost/bedrock/anthropic.claude-sonnet-4") + const result = ProviderTransform.message(makeMsgs(), model, {}, { cacheStrategy: "bedrock" }) as any[] + const userMsg = result.find((m: any) => m.role === "user") + expect(Array.isArray(userMsg?.content)).toBe(true) + const lastBlock = userMsg.content[userMsg.content.length - 1] + expect(lastBlock.providerOptions?.openaiCompatible?.cache_control).toEqual(cacheOpt) + // must NOT be on the message object itself + expect(userMsg.providerOptions?.openaiCompatible?.cache_control).toBeUndefined() + }) + + test("auto-triggers on setCacheKey: true when model id contains bedrock/", () => { + const model = createCompatModel("my-bifrost/bedrock/anthropic.claude-sonnet-4") + const result = ProviderTransform.message(makeMsgs(), model, {}, { setCacheKey: true }) as any[] + const sysMsg = result.find((m: any) => m.role === "system") + expect(Array.isArray(sysMsg?.content)).toBe(true) + expect(sysMsg.content[0].providerOptions?.openaiCompatible?.cache_control).toEqual(cacheOpt) + }) + + test("does not inject cache_control for openai-compatible without cacheStrategy or bedrock model id", () => { + const model = createCompatModel("my-provider/gpt-4o") + const result = ProviderTransform.message(makeMsgs(), model, {}, { setCacheKey: true }) as any[] + const sysMsg = result.find((m: any) => m.role === "system") + // system message must stay as a plain string — no content block conversion + expect(typeof sysMsg?.content).toBe("string") + expect(sysMsg?.providerOptions?.openaiCompatible?.cache_control).toBeUndefined() + }) + + test("does not inject cache_control when no providerOpts are passed", () => { + const model = createCompatModel("my-bifrost/bedrock/amazon.nova-pro") + const result = ProviderTransform.message(makeMsgs(), model, {}) as any[] + const sysMsg = result.find((m: any) => m.role === "system") + expect(typeof sysMsg?.content).toBe("string") + expect(sysMsg?.providerOptions?.openaiCompatible?.cache_control).toBeUndefined() + }) + + test("native anthropic provider is unaffected by cacheStrategy option", () => { + const anthropicModel = { + ...createCompatModel("anthropic/claude-sonnet-4", "anthropic"), + api: { id: "claude-sonnet-4", url: "https://api.anthropic.com", npm: "@ai-sdk/anthropic" }, + } + const result = ProviderTransform.message(makeMsgs(), anthropicModel, {}, { cacheStrategy: "bedrock" }) as any[] + const sysMsg = result.find((m: any) => m.role === "system") + // Native anthropic path: cache hint is at message-level providerOptions, not converted to block + expect(sysMsg?.providerOptions?.anthropic?.cacheControl).toEqual({ type: "ephemeral" }) + }) + + test("multi-part user message: only last content block gets cache_control", () => { + const model = createCompatModel("my-bifrost/bedrock/anthropic.claude-sonnet-4") + const msgs = [ + { role: "system", content: "System prompt." }, + { + role: "user", + content: [ + { type: "text", text: "First part" }, + { type: "text", text: "Second part" }, + ], + }, + ] as any[] + const result = ProviderTransform.message(msgs, model, {}, { cacheStrategy: "bedrock" }) as any[] + const userMsg = result.find((m: any) => m.role === "user") + expect(userMsg.content[0].providerOptions?.openaiCompatible?.cache_control).toBeUndefined() + expect(userMsg.content[1].providerOptions?.openaiCompatible?.cache_control).toEqual(cacheOpt) + }) +}) + describe("ProviderTransform.options - zai/zhipuai thinking", () => { const sessionID = "test-session-123" diff --git a/packages/web/src/content/docs/config.mdx b/packages/web/src/content/docs/config.mdx index 346ce63805ee..d5bcaa8abd5e 100644 --- a/packages/web/src/content/docs/config.mdx +++ b/packages/web/src/content/docs/config.mdx @@ -368,7 +368,7 @@ You can configure the providers and models you want to use in your OpenCode conf The `small_model` option configures a separate model for lightweight tasks like title generation. By default, OpenCode tries to use a cheaper model if one is available from your provider, otherwise it falls back to your main model. -Provider options can include `timeout`, `chunkTimeout`, and `setCacheKey`: +Provider options can include `timeout`, `chunkTimeout`, `setCacheKey`, and `cacheStrategy`: ```json title="opencode.json" { @@ -388,6 +388,7 @@ Provider options can include `timeout`, `chunkTimeout`, and `setCacheKey`: - `timeout` - Request timeout in milliseconds (default: 300000). Set to `false` to disable. - `chunkTimeout` - Timeout in milliseconds between streamed response chunks. If no chunk arrives in time, the request is aborted. - `setCacheKey` - Ensure a cache key is always set for designated provider. +- `cacheStrategy` - Cache strategy for openai-compatible providers. Set to `"bedrock"` when routing through a proxy (e.g. Bifrost, LiteLLM) to AWS Bedrock Claude models. This injects `cache_control: {type: "ephemeral"}` into message content blocks instead of `promptCacheKey`, matching how native Bedrock/Anthropic providers handle caching. See [Caching with OpenAI-Compatible Bedrock Proxies](#caching-with-openai-compatible-bedrock-proxies) below. You can also configure [local models](/docs/models#local). [Learn more](/docs/models). @@ -480,6 +481,34 @@ Bearer tokens (`AWS_BEARER_TOKEN_BEDROCK` or `/connect`) take precedence over pr --- +##### Caching with OpenAI-Compatible Bedrock Proxies + +When routing Claude models through an OpenAI-compatible proxy (e.g. [Bifrost](https://github.com/maximhq/bifrost), [LiteLLM](https://github.com/BerriAI/litellm)) to AWS Bedrock, standard `setCacheKey` sends a `promptCacheKey` header — which is not the correct caching mechanism for Bedrock Claude. Bedrock requires `cache_control: {type: "ephemeral"}` injected into message content blocks. + +Set `cacheStrategy: "bedrock"` to enable this: + +```json title="opencode.json" +{ + "$schema": "https://opencode.ai/config.json", + "provider": { + "my-bifrost": { + "npm": "@ai-sdk/openai-compatible", + "options": { + "baseURL": "https://your-bifrost-endpoint/api/v1", + "apiKey": "your-key", + "cacheStrategy": "bedrock" + } + } + } +} +``` + +This is equivalent to how native `@ai-sdk/anthropic` and `@ai-sdk/amazon-bedrock` providers handle caching internally. + +You can also trigger the bedrock cache strategy automatically without `cacheStrategy` by setting `setCacheKey: true` and using a model ID that contains `bedrock/` (e.g. `my-bifrost/bedrock/anthropic.claude-sonnet-4`). + +--- + ### Themes Set your UI theme in `tui.json`.