Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions packages/opencode/src/config/provider.ts
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,9 @@ export const Info = Schema.Struct({
setCacheKey: Schema.optional(Schema.Boolean).annotate({
description: "Enable promptCacheKey for this provider (default false)",
}),
cacheStrategy: Schema.optional(Schema.Literal("bedrock")).annotate({
description: "Cache injection strategy for openai-compatible providers proxying Bedrock. 'bedrock' injects cache_control blocks instead of promptCacheKey.",
}),
timeout: Schema.optional(
Schema.Union([PositiveInt, Schema.Literal(false)]).annotate({
description: "Timeout in milliseconds for full requests to this provider. Set to false to disable timeout.",
Expand Down
56 changes: 53 additions & 3 deletions packages/opencode/src/provider/transform.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import type { ModelMessage, ToolResultPart } from "ai"
import type { ModelMessage, TextPart, ImagePart, FilePart, ToolResultPart } from "ai"
import { mergeDeep, unique } from "remeda"
import type { JSONSchema7 } from "@ai-sdk/provider"
import type * as Provider from "./provider"
Expand Down Expand Up @@ -431,7 +431,47 @@ function unsupportedParts(msgs: ModelMessage[], model: Provider.Model): ModelMes
})
}

export function message(msgs: ModelMessage[], model: Provider.Model, options: Record<string, unknown>) {
// Injects cache_control on content blocks for openai-compatible providers routing to
// Anthropic-capable backends (e.g. Bifrost, LiteLLM, Databricks).
// Unlike applyCaching (which mutates messages in-place), this returns a remapped array
// with new message objects so that string system/user content is converted to block arrays —
// proxies only honour cache_control when it is on the block itself, not a top-level field.
function applyCompatCaching(msgs: ModelMessage[]): ModelMessage[] {
const cacheOpt = { openaiCompatible: { cache_control: { type: "ephemeral" } } }
const system = msgs.filter((msg) => msg.role === "system").slice(0, 2)
const final = msgs.filter((msg) => msg.role !== "system").slice(-2)

return msgs.map((msg) => {
if (!unique([...system, ...final]).includes(msg)) return msg

if (msg.role === "system") {
// @ai-sdk types system content as string; cast is intentional — proxies serialise block arrays correctly
return { ...msg, content: [{ type: "text" as const, text: msg.content, providerOptions: cacheOpt }] } as unknown as ModelMessage
}

if (msg.role === "user") {
const parts: Array<TextPart | ImagePart | FilePart> =
typeof msg.content === "string"
? [{ type: "text" as const, text: msg.content }]
: Array.isArray(msg.content)
? [...msg.content]
: []
if (parts.length === 0) return msg
parts[parts.length - 1] = { ...parts[parts.length - 1], providerOptions: mergeDeep(parts[parts.length - 1].providerOptions ?? {}, cacheOpt) }
return { ...msg, content: parts } as ModelMessage
}

// assistant/tool messages in targets are intentionally skipped
return msg
})
}

export function message(
msgs: ModelMessage[],
model: Provider.Model,
options: Record<string, unknown>,
providerOpts?: Record<string, any>,
) {
msgs = unsupportedParts(msgs, model)
msgs = normalizeMessages(msgs, model, options)
if (
Expand All @@ -443,10 +483,20 @@ export function message(msgs: ModelMessage[], model: Provider.Model, options: Re
model.id.includes("claude") ||
model.api.npm === "@ai-sdk/anthropic" ||
model.api.npm === "@ai-sdk/alibaba") &&
model.api.npm !== "@ai-sdk/gateway"
model.api.npm !== "@ai-sdk/gateway" &&
model.api.npm !== "@ai-sdk/openai-compatible"
) {
msgs = applyCaching(msgs, model)
}
// For openai-compatible providers, inject cache_control on content blocks when
// cacheStrategy is "bedrock" or setCacheKey is true with a bedrock/ model id.
if (
model.api.npm === "@ai-sdk/openai-compatible" &&
(providerOpts?.cacheStrategy === "bedrock" ||
(providerOpts?.setCacheKey === true && model.id.toLowerCase().includes("bedrock/")))
) {
msgs = applyCompatCaching(msgs)
}

// Remap providerOptions keys from stored providerID to expected SDK key
const key = sdkKey(model.api.npm)
Expand Down
16 changes: 16 additions & 0 deletions packages/opencode/src/session/llm.ts
Original file line number Diff line number Diff line change
Expand Up @@ -305,6 +305,22 @@ const live: Layer.Layer<
tool: failed.toolCall.toolName,
error: failed.error.message,
}),
...input.model.headers,
...headers,
},
maxRetries: input.retries ?? 0,
messages,
model: wrapLanguageModel({
model: language,
middleware: [
{
specificationVersion: "v3" as const,
async transformParams(args) {
if (args.type === "stream") {
// @ts-expect-error
args.params.prompt = ProviderTransform.message(args.params.prompt, input.model, options, item.options)
}
return args.params
toolName: "invalid",
}
},
Expand Down
115 changes: 115 additions & 0 deletions packages/opencode/test/provider/transform.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,121 @@ describe("ProviderTransform.options - setCacheKey", () => {
})
})

describe("ProviderTransform.message - openai-compatible Bedrock caching", () => {
const createCompatModel = (modelId: string, providerID = "my-bifrost") =>
({
id: modelId,
providerID,
api: {
id: modelId,
url: "https://bifrost.example.com/api/v1",
npm: "@ai-sdk/openai-compatible",
},
name: "Claude via Bifrost",
capabilities: {
temperature: true,
reasoning: false,
attachment: true,
toolcall: true,
input: { text: true, audio: false, image: true, video: false, pdf: true },
output: { text: true, audio: false, image: false, video: false, pdf: false },
interleaved: false,
},
cost: { input: 0.003, output: 0.015, cache: { read: 0.0003, write: 0.00375 } },
limit: { context: 200000, output: 8192 },
status: "active",
options: {},
headers: {},
}) as any

const makeMsgs = () =>
[
{ role: "system", content: "You are a helpful assistant." },
{ role: "user", content: [{ type: "text", text: "Hello" }] },
] as any[]

const cacheOpt = { type: "ephemeral" }

test("string system message is converted to content block array with cache_control on last block", () => {
const model = createCompatModel("my-bifrost/bedrock/anthropic.claude-sonnet-4")
const result = ProviderTransform.message(makeMsgs(), model, {}, { cacheStrategy: "bedrock" }) as any[]
const sysMsg = result.find((m: any) => m.role === "system")
// String content must become a content block array so cache_control is on the block,
// not a top-level message field (which proxies like Bifrost ignore for caching)
expect(Array.isArray(sysMsg?.content)).toBe(true)
expect(sysMsg.content[0].type).toBe("text")
expect(sysMsg.content[0].text).toBe("You are a helpful assistant.")
expect(sysMsg.content[0].providerOptions?.openaiCompatible?.cache_control).toEqual(cacheOpt)
// must NOT be on the message object itself
expect(sysMsg.providerOptions?.openaiCompatible?.cache_control).toBeUndefined()
})

test("user message content block gets cache_control on last block", () => {
const model = createCompatModel("my-bifrost/bedrock/anthropic.claude-sonnet-4")
const result = ProviderTransform.message(makeMsgs(), model, {}, { cacheStrategy: "bedrock" }) as any[]
const userMsg = result.find((m: any) => m.role === "user")
expect(Array.isArray(userMsg?.content)).toBe(true)
const lastBlock = userMsg.content[userMsg.content.length - 1]
expect(lastBlock.providerOptions?.openaiCompatible?.cache_control).toEqual(cacheOpt)
// must NOT be on the message object itself
expect(userMsg.providerOptions?.openaiCompatible?.cache_control).toBeUndefined()
})

test("auto-triggers on setCacheKey: true when model id contains bedrock/", () => {
const model = createCompatModel("my-bifrost/bedrock/anthropic.claude-sonnet-4")
const result = ProviderTransform.message(makeMsgs(), model, {}, { setCacheKey: true }) as any[]
const sysMsg = result.find((m: any) => m.role === "system")
expect(Array.isArray(sysMsg?.content)).toBe(true)
expect(sysMsg.content[0].providerOptions?.openaiCompatible?.cache_control).toEqual(cacheOpt)
})

test("does not inject cache_control for openai-compatible without cacheStrategy or bedrock model id", () => {
const model = createCompatModel("my-provider/gpt-4o")
const result = ProviderTransform.message(makeMsgs(), model, {}, { setCacheKey: true }) as any[]
const sysMsg = result.find((m: any) => m.role === "system")
// system message must stay as a plain string — no content block conversion
expect(typeof sysMsg?.content).toBe("string")
expect(sysMsg?.providerOptions?.openaiCompatible?.cache_control).toBeUndefined()
})

test("does not inject cache_control when no providerOpts are passed", () => {
const model = createCompatModel("my-bifrost/bedrock/amazon.nova-pro")
const result = ProviderTransform.message(makeMsgs(), model, {}) as any[]
const sysMsg = result.find((m: any) => m.role === "system")
expect(typeof sysMsg?.content).toBe("string")
expect(sysMsg?.providerOptions?.openaiCompatible?.cache_control).toBeUndefined()
})

test("native anthropic provider is unaffected by cacheStrategy option", () => {
const anthropicModel = {
...createCompatModel("anthropic/claude-sonnet-4", "anthropic"),
api: { id: "claude-sonnet-4", url: "https://api.anthropic.com", npm: "@ai-sdk/anthropic" },
}
const result = ProviderTransform.message(makeMsgs(), anthropicModel, {}, { cacheStrategy: "bedrock" }) as any[]
const sysMsg = result.find((m: any) => m.role === "system")
// Native anthropic path: cache hint is at message-level providerOptions, not converted to block
expect(sysMsg?.providerOptions?.anthropic?.cacheControl).toEqual({ type: "ephemeral" })
})

test("multi-part user message: only last content block gets cache_control", () => {
const model = createCompatModel("my-bifrost/bedrock/anthropic.claude-sonnet-4")
const msgs = [
{ role: "system", content: "System prompt." },
{
role: "user",
content: [
{ type: "text", text: "First part" },
{ type: "text", text: "Second part" },
],
},
] as any[]
const result = ProviderTransform.message(msgs, model, {}, { cacheStrategy: "bedrock" }) as any[]
const userMsg = result.find((m: any) => m.role === "user")
expect(userMsg.content[0].providerOptions?.openaiCompatible?.cache_control).toBeUndefined()
expect(userMsg.content[1].providerOptions?.openaiCompatible?.cache_control).toEqual(cacheOpt)
})
})

describe("ProviderTransform.options - zai/zhipuai thinking", () => {
const sessionID = "test-session-123"

Expand Down
31 changes: 30 additions & 1 deletion packages/web/src/content/docs/config.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -368,7 +368,7 @@ You can configure the providers and models you want to use in your OpenCode conf

The `small_model` option configures a separate model for lightweight tasks like title generation. By default, OpenCode tries to use a cheaper model if one is available from your provider, otherwise it falls back to your main model.

Provider options can include `timeout`, `chunkTimeout`, and `setCacheKey`:
Provider options can include `timeout`, `chunkTimeout`, `setCacheKey`, and `cacheStrategy`:

```json title="opencode.json"
{
Expand All @@ -388,6 +388,7 @@ Provider options can include `timeout`, `chunkTimeout`, and `setCacheKey`:
- `timeout` - Request timeout in milliseconds (default: 300000). Set to `false` to disable.
- `chunkTimeout` - Timeout in milliseconds between streamed response chunks. If no chunk arrives in time, the request is aborted.
- `setCacheKey` - Ensure a cache key is always set for designated provider.
- `cacheStrategy` - Cache strategy for openai-compatible providers. Set to `"bedrock"` when routing through a proxy (e.g. Bifrost, LiteLLM) to AWS Bedrock Claude models. This injects `cache_control: {type: "ephemeral"}` into message content blocks instead of `promptCacheKey`, matching how native Bedrock/Anthropic providers handle caching. See [Caching with OpenAI-Compatible Bedrock Proxies](#caching-with-openai-compatible-bedrock-proxies) below.

You can also configure [local models](/docs/models#local). [Learn more](/docs/models).

Expand Down Expand Up @@ -480,6 +481,34 @@ Bearer tokens (`AWS_BEARER_TOKEN_BEDROCK` or `/connect`) take precedence over pr

---

##### Caching with OpenAI-Compatible Bedrock Proxies

When routing Claude models through an OpenAI-compatible proxy (e.g. [Bifrost](https://github.com/maximhq/bifrost), [LiteLLM](https://github.com/BerriAI/litellm)) to AWS Bedrock, standard `setCacheKey` sends a `promptCacheKey` header — which is not the correct caching mechanism for Bedrock Claude. Bedrock requires `cache_control: {type: "ephemeral"}` injected into message content blocks.

Set `cacheStrategy: "bedrock"` to enable this:

```json title="opencode.json"
{
"$schema": "https://opencode.ai/config.json",
"provider": {
"my-bifrost": {
"npm": "@ai-sdk/openai-compatible",
"options": {
"baseURL": "https://your-bifrost-endpoint/api/v1",
"apiKey": "your-key",
"cacheStrategy": "bedrock"
}
}
}
}
```

This is equivalent to how native `@ai-sdk/anthropic` and `@ai-sdk/amazon-bedrock` providers handle caching internally.

You can also trigger the bedrock cache strategy automatically without `cacheStrategy` by setting `setCacheKey: true` and using a model ID that contains `bedrock/` (e.g. `my-bifrost/bedrock/anthropic.claude-sonnet-4`).

---

### Themes

Set your UI theme in `tui.json`.
Expand Down
Loading