diff --git a/packages/opencode/src/provider/provider.ts b/packages/opencode/src/provider/provider.ts index 8f6e1556ada3..6267706c3364 100644 --- a/packages/opencode/src/provider/provider.ts +++ b/packages/opencode/src/provider/provider.ts @@ -1130,16 +1130,27 @@ const layer: Layer.Layer< if (model.id && model.id !== modelID) return modelID return existingModel?.name ?? modelID }) + const resolvedNpm = + model.provider?.npm ?? + provider.npm ?? + existingModel?.api.npm ?? + modelsDev[providerID]?.npm ?? + "@ai-sdk/openai-compatible" + // For custom models on OpenAI-compatible providers, default image input + // to true since the SDK natively converts file parts to image_url format. + // This ensures custom provider models support image attachments out of the + // box without requiring explicit modalities configuration. + const isOpenAICompatible = + resolvedNpm === "@ai-sdk/openai-compatible" || + resolvedNpm === "@ai-sdk/openai" || + resolvedNpm === "@ai-sdk/azure" || + resolvedNpm === "@ai-sdk/github-copilot" + const defaultImageInput = isOpenAICompatible ? true : false const parsedModel: Model = { id: ModelID.make(modelID), api: { id: model.id ?? existingModel?.api.id ?? modelID, - npm: - model.provider?.npm ?? - provider.npm ?? - existingModel?.api.npm ?? - modelsDev[providerID]?.npm ?? - "@ai-sdk/openai-compatible", + npm: resolvedNpm, url: model.provider?.api ?? provider?.api ?? existingModel?.api.url ?? modelsDev[providerID]?.api ?? "", }, status: model.status ?? existingModel?.status ?? "active", @@ -1148,12 +1159,15 @@ const layer: Layer.Layer< capabilities: { temperature: model.temperature ?? existingModel?.capabilities.temperature ?? false, reasoning: model.reasoning ?? existingModel?.capabilities.reasoning ?? false, - attachment: model.attachment ?? existingModel?.capabilities.attachment ?? false, + attachment: model.attachment ?? existingModel?.capabilities.attachment ?? defaultImageInput, toolcall: model.tool_call ?? existingModel?.capabilities.toolcall ?? true, input: { text: model.modalities?.input?.includes("text") ?? existingModel?.capabilities.input.text ?? true, audio: model.modalities?.input?.includes("audio") ?? existingModel?.capabilities.input.audio ?? false, - image: model.modalities?.input?.includes("image") ?? existingModel?.capabilities.input.image ?? false, + image: + model.modalities?.input?.includes("image") ?? + existingModel?.capabilities.input.image ?? + defaultImageInput, video: model.modalities?.input?.includes("video") ?? existingModel?.capabilities.input.video ?? false, pdf: model.modalities?.input?.includes("pdf") ?? existingModel?.capabilities.input.pdf ?? false, }, diff --git a/packages/opencode/test/provider/provider.test.ts b/packages/opencode/test/provider/provider.test.ts index 8993020820e3..de17c00682e6 100644 --- a/packages/opencode/test/provider/provider.test.ts +++ b/packages/opencode/test/provider/provider.test.ts @@ -922,6 +922,55 @@ test("model modalities default correctly", async () => { const model = providers[ProviderID.make("test-provider")].models["test-model"] expect(model.capabilities.input.text).toBe(true) expect(model.capabilities.output.text).toBe(true) + // OpenAI-compatible providers should default image input to true + expect(model.capabilities.input.image).toBe(true) + expect(model.capabilities.attachment).toBe(true) + }, + }) +}) + +test("custom openai-compatible provider defaults image input to true", async () => { + await using tmp = await tmpdir({ + init: async (dir) => { + await Bun.write( + path.join(dir, "opencode.json"), + JSON.stringify({ + $schema: "https://opencode.ai/config.json", + provider: { + "custom-vision": { + name: "Custom Vision Provider", + npm: "@ai-sdk/openai-compatible", + env: [], + models: { + "vision-model": { + name: "Vision Model", + tool_call: true, + limit: { context: 128000, output: 4096 }, + // No modalities specified - should default image to true + }, + }, + options: { + apiKey: "test-key", + baseURL: "https://api.custom.com/v1", + }, + }, + }, + }), + ) + }, + }) + await Instance.provide({ + directory: tmp.path, + fn: async () => { + const providers = await list() + const model = providers[ProviderID.make("custom-vision")].models["vision-model"] + // Should default to true for OpenAI-compatible providers + expect(model.capabilities.input.image).toBe(true) + expect(model.capabilities.attachment).toBe(true) + // Other modalities should still default to false + expect(model.capabilities.input.audio).toBe(false) + expect(model.capabilities.input.video).toBe(false) + expect(model.capabilities.input.pdf).toBe(false) }, }) })