From 446b4e09ef638dc4c8b05d240f7732cf61622801 Mon Sep 17 00:00:00 2001 From: Simon Ferquel's Clanker Date: Mon, 11 May 2026 09:00:37 +0000 Subject: [PATCH 1/5] fix: pass fully-qualified provider/model ID to modelcaps.Load modelcaps.Load requires a "provider/model" identifier (e.g. "anthropic/claude-sonnet-4-6") to look up model capabilities in the models.dev database. All provider call sites were passing just the bare model name (e.g. "claude-sonnet-4-6"), so the lookup always missed and attachments (images, PDFs) were silently dropped with StrategyDrop. Fix all affected call sites across anthropic, openai, dmr, bedrock, and gemini providers to pass c.ModelConfig.Provider+"/"+c.ModelConfig.Model. --- pkg/model/provider/anthropic/beta_converter.go | 2 +- pkg/model/provider/anthropic/client.go | 2 +- pkg/model/provider/bedrock/client.go | 2 +- pkg/model/provider/dmr/client.go | 2 +- pkg/model/provider/gemini/client.go | 2 +- pkg/model/provider/openai/client.go | 4 ++-- 6 files changed, 7 insertions(+), 7 deletions(-) diff --git a/pkg/model/provider/anthropic/beta_converter.go b/pkg/model/provider/anthropic/beta_converter.go index 63896a163..7bb91f13b 100644 --- a/pkg/model/provider/anthropic/beta_converter.go +++ b/pkg/model/provider/anthropic/beta_converter.go @@ -277,7 +277,7 @@ func (c *Client) convertBetaUserMultiContent(ctx context.Context, parts []chat.M case chat.MessagePartTypeDocument: if part.Document != nil { - stdBlocks, err := convertDocument(ctx, *part.Document, c.ModelConfig.Model) + stdBlocks, err := convertDocument(ctx, *part.Document, c.ModelConfig.Provider+"/"+c.ModelConfig.Model) if err != nil { return nil, fmt.Errorf("failed to convert document attachment %q: %w", part.Document.Name, err) } diff --git a/pkg/model/provider/anthropic/client.go b/pkg/model/provider/anthropic/client.go index be1ca1643..596941b3a 100644 --- a/pkg/model/provider/anthropic/client.go +++ b/pkg/model/provider/anthropic/client.go @@ -553,7 +553,7 @@ func (c *Client) convertUserMultiContent(ctx context.Context, parts []chat.Messa case chat.MessagePartTypeDocument: if part.Document != nil { - docBlocks, err := convertDocument(ctx, *part.Document, c.ModelConfig.Model) + docBlocks, err := convertDocument(ctx, *part.Document, c.ModelConfig.Provider+"/"+c.ModelConfig.Model) if err != nil { return nil, fmt.Errorf("failed to convert document attachment %q: %w", part.Document.Name, err) } diff --git a/pkg/model/provider/bedrock/client.go b/pkg/model/provider/bedrock/client.go index 82d19bc69..25c24212e 100644 --- a/pkg/model/provider/bedrock/client.go +++ b/pkg/model/provider/bedrock/client.go @@ -236,7 +236,7 @@ func (c *Client) buildConverseStreamInput(ctx context.Context, messages []chat.M enableCaching := c.promptCachingEnabled() // Convert and set messages (excluding system) - input.Messages, input.System = convertMessages(ctx, messages, c.ModelConfig.Model, enableCaching) + input.Messages, input.System = convertMessages(ctx, messages, c.ModelConfig.Provider+"/"+c.ModelConfig.Model, enableCaching) // Compute thinking fields first — its presence drives the inference config. additionalFields := c.buildAdditionalModelRequestFields() diff --git a/pkg/model/provider/dmr/client.go b/pkg/model/provider/dmr/client.go index 2f9887524..30c7b7615 100644 --- a/pkg/model/provider/dmr/client.go +++ b/pkg/model/provider/dmr/client.go @@ -147,7 +147,7 @@ func NewClient(ctx context.Context, cfg *latest.ModelConfig, opts ...options.Opt // convertMessages converts chat messages to OpenAI format and merges consecutive // system/user messages, which is needed by some local models run by DMR. func (c *Client) convertMessages(ctx context.Context, messages []chat.Message) []openai.ChatCompletionMessageParamUnion { - openaiMessages := oaistream.ConvertMessages(ctx, messages, c.ModelConfig.Model) + openaiMessages := oaistream.ConvertMessages(ctx, messages, c.ModelConfig.Provider+"/"+c.ModelConfig.Model) return oaistream.MergeConsecutiveMessages(openaiMessages) } diff --git a/pkg/model/provider/gemini/client.go b/pkg/model/provider/gemini/client.go index 1c944ee88..6cbb40e64 100644 --- a/pkg/model/provider/gemini/client.go +++ b/pkg/model/provider/gemini/client.go @@ -601,7 +601,7 @@ func (c *Client) CreateChatCompletionStream( } } - contents := convertMessagesToGemini(ctx, messages, c.ModelConfig.Model) + contents := convertMessagesToGemini(ctx, messages, c.ModelConfig.Provider+"/"+c.ModelConfig.Model) // Debug: Log the messages we're sending slog.DebugContext(ctx, "Gemini messages", "count", len(contents)) diff --git a/pkg/model/provider/openai/client.go b/pkg/model/provider/openai/client.go index 28e860491..65596d25e 100644 --- a/pkg/model/provider/openai/client.go +++ b/pkg/model/provider/openai/client.go @@ -181,7 +181,7 @@ func (c *Client) Close() { // convertMessages converts chat.Message to openai.ChatCompletionMessageParamUnion // using the shared oaistream implementation. func (c *Client) convertMessages(ctx context.Context, messages []chat.Message) []openai.ChatCompletionMessageParamUnion { - return oaistream.ConvertMessages(ctx, messages, c.ModelConfig.Model) + return oaistream.ConvertMessages(ctx, messages, c.ModelConfig.Provider+"/"+c.ModelConfig.Model) } // CreateChatCompletionStream creates a streaming chat completion request @@ -613,7 +613,7 @@ func (c *Client) convertMessagesToResponseInput(ctx context.Context, messages [] } case chat.MessagePartTypeDocument: if part.Document != nil { - docParts, err := convertDocumentToResponseInput(ctx, *part.Document, c.ModelConfig.Model) + docParts, err := convertDocumentToResponseInput(ctx, *part.Document, c.ModelConfig.Provider+"/"+c.ModelConfig.Model) if err != nil { slog.WarnContext(ctx, "failed to convert document attachment", "error", err, "doc", part.Document.Name) continue From 1dbac3d8084874812dfe372025988c1766844d04 Mon Sep 17 00:00:00 2001 From: Simon Ferquel's Clanker Date: Mon, 11 May 2026 10:24:08 +0000 Subject: [PATCH 2/5] test: add regression tests for qualified provider/model ID fix Covers the bug where callers passed a bare model name (e.g. "claude-sonnet-4-6") to modelcaps.Load instead of a fully-qualified "provider/model" identifier, causing all image/PDF attachments to be silently dropped. Three tests added: - modelcaps.TestLoadFromStore_QualifiedIDRequired: verifies at the modelcaps layer that a bare model name does NOT resolve to vision caps while the qualified ID does. - anthropic.TestConvertDocumentAnthropic_QualifiedIDRequired: end-to-end through convertDocumentWithCaps; image is dropped for bare ID and preserved as a native image block for the qualified ID. - oaistream.TestConvertDocument_QualifiedIDRequired: same for the OpenAI/ oaistream path; image is dropped for bare ID and preserved as an image URL part for the qualified ID. All three inject a fake modelsdev.Store (no network) and use the LoadFromStore helper so the tests are deterministic and fast. --- pkg/attachment/modelcaps/modelcaps_test.go | 45 +++++++++++++++++++ .../provider/anthropic/attachments_test.go | 44 ++++++++++++++++++ .../provider/oaistream/attachments_test.go | 39 ++++++++++++++++ 3 files changed, 128 insertions(+) diff --git a/pkg/attachment/modelcaps/modelcaps_test.go b/pkg/attachment/modelcaps/modelcaps_test.go index a473887a6..fc410c86e 100644 --- a/pkg/attachment/modelcaps/modelcaps_test.go +++ b/pkg/attachment/modelcaps/modelcaps_test.go @@ -13,6 +13,51 @@ func buildStore(providers map[string]modelsdev.Provider) *modelsdev.Store { return modelsdev.NewDatabaseStore(db) } +// TestLoadFromStore_QualifiedIDRequired is the regression test for the bug +// fixed by pass-fully-qualified-provider-model-ID: modelcaps.Load (and +// LoadFromStore) requires a "provider/model" key to find a model in the +// models.dev database. A bare model name without the provider prefix must +// NOT resolve to vision capabilities — it falls back to text-only. +// +// Before the fix, callers passed c.ModelConfig.Model (e.g. "claude-sonnet-4-6") +// instead of c.ModelConfig.Provider+"/"+c.ModelConfig.Model; the lookup always +// missed and all image / PDF attachments were silently dropped. +func TestLoadFromStore_QualifiedIDRequired(t *testing.T) { + store := buildStore(map[string]modelsdev.Provider{ + "anthropic": { + Models: map[string]modelsdev.Model{ + "claude-sonnet-4-6": { + Name: "Claude Sonnet 4.6", + Modalities: modelsdev.Modalities{ + Input: []string{"text", "image", "pdf"}, + Output: []string{"text"}, + }, + }, + }, + }, + }) + + // Bare model name (the original bug): must fall back to conservative text-only caps. + bareID := "claude-sonnet-4-6" + mcBare := modelcaps.LoadFromStore(store, bareID) + if mcBare.Supports("image/jpeg") { + t.Errorf("bare model name %q must NOT resolve to vision caps: image/jpeg should be dropped", bareID) + } + if mcBare.Supports("application/pdf") { + t.Errorf("bare model name %q must NOT resolve to vision caps: application/pdf should be dropped", bareID) + } + + // Fully-qualified ID (the fix): must resolve to vision+pdf caps. + qualifiedID := "anthropic/claude-sonnet-4-6" + mcQualified := modelcaps.LoadFromStore(store, qualifiedID) + if !mcQualified.Supports("image/jpeg") { + t.Errorf("qualified ID %q must resolve to vision caps: image/jpeg should be passed through", qualifiedID) + } + if !mcQualified.Supports("application/pdf") { + t.Errorf("qualified ID %q must resolve to vision caps: application/pdf should be passed through", qualifiedID) + } +} + func TestLoadFromStore_VisionModel(t *testing.T) { store := buildStore(map[string]modelsdev.Provider{ "anthropic": { diff --git a/pkg/model/provider/anthropic/attachments_test.go b/pkg/model/provider/anthropic/attachments_test.go index 6cc8551a6..c20168703 100644 --- a/pkg/model/provider/anthropic/attachments_test.go +++ b/pkg/model/provider/anthropic/attachments_test.go @@ -9,6 +9,7 @@ import ( "github.com/docker/docker-agent/pkg/attachment/modelcaps" "github.com/docker/docker-agent/pkg/chat" + "github.com/docker/docker-agent/pkg/modelsdev" ) // minJPEG is a minimal JPEG magic-byte header for use in tests. @@ -51,6 +52,49 @@ func TestConvertDocumentAnthropic_StrategyB64_PDF(t *testing.T) { assert.Nil(t, blocks[0].OfText, "expected no text block for PDF") } +// TestConvertDocumentAnthropic_QualifiedIDRequired is the regression test for +// the bug where callers passed a bare model name to convertDocument instead of +// a "provider/model" qualified identifier, causing all image/PDF attachments +// to be silently dropped because modelcaps.Load never found the model. +// +// It uses LoadFromStore (fake in-memory store) to avoid network calls, and +// feeds the resulting caps into convertDocumentWithCaps so we can verify the +// full chain: qualified ID -> vision caps -> image block produced. +func TestConvertDocumentAnthropic_QualifiedIDRequired(t *testing.T) { + store := modelsdev.NewDatabaseStore(&modelsdev.Database{ + Providers: map[string]modelsdev.Provider{ + "anthropic": { + Models: map[string]modelsdev.Model{ + "claude-sonnet-4-6": { + Modalities: modelsdev.Modalities{ + Input: []string{"text", "image", "pdf"}, + }, + }, + }, + }, + }, + }) + + doc := chat.Document{ + Name: "photo.jpg", + MimeType: "image/jpeg", + Source: chat.DocumentSource{InlineData: minJPEG}, + } + + // Bare model name (the original bug): caps lookup misses, image must be dropped. + capsBare := modelcaps.LoadFromStore(store, "claude-sonnet-4-6") + blocksBare, err := convertDocumentWithCaps(t.Context(), doc, capsBare) + require.NoError(t, err) + assert.Nil(t, blocksBare, "bare model name must not resolve caps: image should be dropped") + + // Qualified ID (the fix): caps lookup succeeds, image must be preserved. + capsQualified := modelcaps.LoadFromStore(store, "anthropic/claude-sonnet-4-6") + blocksQualified, err := convertDocumentWithCaps(t.Context(), doc, capsQualified) + require.NoError(t, err) + require.Len(t, blocksQualified, 1, "qualified ID must resolve caps: image should be present") + assert.NotNil(t, blocksQualified[0].OfImage, "expected native image block for qualified model ID") +} + func TestConvertDocumentAnthropic_StrategyTXT(t *testing.T) { doc := chat.Document{ Name: "spec.md", diff --git a/pkg/model/provider/oaistream/attachments_test.go b/pkg/model/provider/oaistream/attachments_test.go index 005a47c54..d73b42467 100644 --- a/pkg/model/provider/oaistream/attachments_test.go +++ b/pkg/model/provider/oaistream/attachments_test.go @@ -10,6 +10,7 @@ import ( "github.com/docker/docker-agent/pkg/attachment/modelcaps" "github.com/docker/docker-agent/pkg/chat" + "github.com/docker/docker-agent/pkg/modelsdev" ) // minJPEG is a minimal JPEG magic-byte header for use in tests. @@ -53,6 +54,44 @@ func TestConvertDocument_StrategyB64_ImageDropped(t *testing.T) { assert.Nil(t, parts, "image should be dropped for text-only model") } +// TestConvertDocument_QualifiedIDRequired is the regression test for the bug +// where callers passed a bare model name instead of a "provider/model" ID, +// causing modelcaps.Load to miss every model and silently drop image/PDF. +func TestConvertDocument_QualifiedIDRequired(t *testing.T) { + store := modelsdev.NewDatabaseStore(&modelsdev.Database{ + Providers: map[string]modelsdev.Provider{ + "openai": { + Models: map[string]modelsdev.Model{ + "gpt-4o": { + Modalities: modelsdev.Modalities{ + Input: []string{"text", "image"}, + }, + }, + }, + }, + }, + }) + + doc := chat.Document{ + Name: "photo.jpg", + MimeType: "image/jpeg", + Source: chat.DocumentSource{InlineData: minJPEG}, + } + + // Bare model name (the original bug): image must be dropped. + capsBare := modelcaps.LoadFromStore(store, "gpt-4o") + partsBare, err := convertDocumentWithCaps(t.Context(), doc, capsBare) + require.NoError(t, err) + assert.Nil(t, partsBare, "bare model name must not resolve caps: image should be dropped") + + // Qualified ID (the fix): image must be preserved. + capsQualified := modelcaps.LoadFromStore(store, "openai/gpt-4o") + partsQualified, err := convertDocumentWithCaps(t.Context(), doc, capsQualified) + require.NoError(t, err) + require.Len(t, partsQualified, 1, "qualified ID must resolve caps: image should be present") + assert.NotNil(t, partsQualified[0].OfImageURL, "expected image URL part for qualified model ID") +} + func TestConvertDocument_StrategyTXT(t *testing.T) { doc := chat.Document{ Name: "readme.md", From 9e4db9252c6d2f08a2aafdf0b4168114e17c237f Mon Sep 17 00:00:00 2001 From: Simon Ferquel's Clanker Date: Mon, 11 May 2026 10:32:22 +0000 Subject: [PATCH 3/5] test: test qualified model ID via convertDocumentFromStore injection MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous regression tests called convertDocumentWithCaps with pre-resolved caps, bypassing the modelID → store lookup path. The reviewer asked to test that the fully-qualified ID is what gets passed to the caps resolver. Add convertDocumentFromStore(ctx, doc, modelID, store) to anthropic and oaistream attachments — the store-injectable path that convertDocument delegates to. The two regression tests now call convertDocumentFromStore directly, which exercises the full: modelID string → LoadFromStore(store, modelID) → ModelCapabilities → blocks chain without hitting the network: - bare "claude-sonnet-4-6" → store miss → text-only caps → image dropped - qualified "anthropic/claude-sonnet-4-6" → store hit → vision caps → image block Same pattern for oaistream (gpt-4o / openai/gpt-4o). --- pkg/model/provider/anthropic/attachments.go | 14 +++++++++++- .../provider/anthropic/attachments_test.go | 22 +++++++++---------- pkg/model/provider/oaistream/attachments.go | 13 ++++++++++- .../provider/oaistream/attachments_test.go | 18 +++++++++------ 4 files changed, 47 insertions(+), 20 deletions(-) diff --git a/pkg/model/provider/anthropic/attachments.go b/pkg/model/provider/anthropic/attachments.go index 773e185c6..f3cbc4bb2 100644 --- a/pkg/model/provider/anthropic/attachments.go +++ b/pkg/model/provider/anthropic/attachments.go @@ -12,6 +12,7 @@ import ( "github.com/docker/docker-agent/pkg/attachment" "github.com/docker/docker-agent/pkg/attachment/modelcaps" "github.com/docker/docker-agent/pkg/chat" + "github.com/docker/docker-agent/pkg/modelsdev" ) // convertDocument converts a chat.Document to standard Anthropic SDK content blocks @@ -23,7 +24,18 @@ import ( // - text with InlineText → TextBlockParam with TXTEnvelope // - unsupported / no content → nil (logged as warning) func convertDocument(ctx context.Context, doc chat.Document, modelID string) ([]anthropic.ContentBlockParamUnion, error) { - mc, _ := modelcaps.Load(modelID) + store, err := modelsdev.NewStore() + if err != nil { + // Fall back to conservative text-only caps when the store is unavailable. + return convertDocumentWithCaps(ctx, doc, modelcaps.ModelCapabilities{}) + } + return convertDocumentFromStore(ctx, doc, modelID, store) +} + +// convertDocumentFromStore is the store-injectable variant of convertDocument, +// used by tests to inject a fake in-memory modelsdev.Store. +func convertDocumentFromStore(ctx context.Context, doc chat.Document, modelID string, store *modelsdev.Store) ([]anthropic.ContentBlockParamUnion, error) { + mc := modelcaps.LoadFromStore(store, modelID) return convertDocumentWithCaps(ctx, doc, mc) } diff --git a/pkg/model/provider/anthropic/attachments_test.go b/pkg/model/provider/anthropic/attachments_test.go index c20168703..c3fbf8e98 100644 --- a/pkg/model/provider/anthropic/attachments_test.go +++ b/pkg/model/provider/anthropic/attachments_test.go @@ -54,12 +54,12 @@ func TestConvertDocumentAnthropic_StrategyB64_PDF(t *testing.T) { // TestConvertDocumentAnthropic_QualifiedIDRequired is the regression test for // the bug where callers passed a bare model name to convertDocument instead of -// a "provider/model" qualified identifier, causing all image/PDF attachments -// to be silently dropped because modelcaps.Load never found the model. +// a "provider/model" qualified identifier, causing modelcaps to miss the model +// and silently drop all image/PDF attachments. // -// It uses LoadFromStore (fake in-memory store) to avoid network calls, and -// feeds the resulting caps into convertDocumentWithCaps so we can verify the -// full chain: qualified ID -> vision caps -> image block produced. +// It calls convertDocumentFromStore directly with an injected fake store so +// the test exercises the full modelID → LoadFromStore → caps → blocks path +// without hitting the network. func TestConvertDocumentAnthropic_QualifiedIDRequired(t *testing.T) { store := modelsdev.NewDatabaseStore(&modelsdev.Database{ Providers: map[string]modelsdev.Provider{ @@ -81,15 +81,15 @@ func TestConvertDocumentAnthropic_QualifiedIDRequired(t *testing.T) { Source: chat.DocumentSource{InlineData: minJPEG}, } - // Bare model name (the original bug): caps lookup misses, image must be dropped. - capsBare := modelcaps.LoadFromStore(store, "claude-sonnet-4-6") - blocksBare, err := convertDocumentWithCaps(t.Context(), doc, capsBare) + // Bare model name (the original bug): LoadFromStore misses the model, + // capabilities fall back to text-only, image must be dropped. + blocksBare, err := convertDocumentFromStore(t.Context(), doc, "claude-sonnet-4-6", store) require.NoError(t, err) assert.Nil(t, blocksBare, "bare model name must not resolve caps: image should be dropped") - // Qualified ID (the fix): caps lookup succeeds, image must be preserved. - capsQualified := modelcaps.LoadFromStore(store, "anthropic/claude-sonnet-4-6") - blocksQualified, err := convertDocumentWithCaps(t.Context(), doc, capsQualified) + // Qualified ID (the fix): LoadFromStore finds the model, vision caps + // are set, image must be preserved as a native image block. + blocksQualified, err := convertDocumentFromStore(t.Context(), doc, "anthropic/claude-sonnet-4-6", store) require.NoError(t, err) require.Len(t, blocksQualified, 1, "qualified ID must resolve caps: image should be present") assert.NotNil(t, blocksQualified[0].OfImage, "expected native image block for qualified model ID") diff --git a/pkg/model/provider/oaistream/attachments.go b/pkg/model/provider/oaistream/attachments.go index 2a23ce7bb..31eaa8d07 100644 --- a/pkg/model/provider/oaistream/attachments.go +++ b/pkg/model/provider/oaistream/attachments.go @@ -12,6 +12,7 @@ import ( "github.com/docker/docker-agent/pkg/attachment" "github.com/docker/docker-agent/pkg/attachment/modelcaps" "github.com/docker/docker-agent/pkg/chat" + "github.com/docker/docker-agent/pkg/modelsdev" ) // convertDocument converts a chat.Document to zero or more @@ -25,7 +26,17 @@ import ( // - text MIMEs with InlineText → text part with TXTEnvelope // - unsupported / no content → nil (logged as warning) func convertDocument(ctx context.Context, doc chat.Document, modelID string) ([]openai.ChatCompletionContentPartUnionParam, error) { - mc, _ := modelcaps.Load(modelID) + store, err := modelsdev.NewStore() + if err != nil { + return convertDocumentWithCaps(ctx, doc, modelcaps.ModelCapabilities{}) + } + return convertDocumentFromStore(ctx, doc, modelID, store) +} + +// convertDocumentFromStore is the store-injectable variant of convertDocument, +// used by tests to inject a fake in-memory modelsdev.Store. +func convertDocumentFromStore(ctx context.Context, doc chat.Document, modelID string, store *modelsdev.Store) ([]openai.ChatCompletionContentPartUnionParam, error) { + mc := modelcaps.LoadFromStore(store, modelID) return convertDocumentWithCaps(ctx, doc, mc) } diff --git a/pkg/model/provider/oaistream/attachments_test.go b/pkg/model/provider/oaistream/attachments_test.go index d73b42467..c3f06fad3 100644 --- a/pkg/model/provider/oaistream/attachments_test.go +++ b/pkg/model/provider/oaistream/attachments_test.go @@ -56,7 +56,11 @@ func TestConvertDocument_StrategyB64_ImageDropped(t *testing.T) { // TestConvertDocument_QualifiedIDRequired is the regression test for the bug // where callers passed a bare model name instead of a "provider/model" ID, -// causing modelcaps.Load to miss every model and silently drop image/PDF. +// causing modelcaps to miss the model and silently drop image/PDF attachments. +// +// It calls convertDocumentFromStore directly with an injected fake store so +// the test exercises the full modelID → LoadFromStore → caps → parts path +// without hitting the network. func TestConvertDocument_QualifiedIDRequired(t *testing.T) { store := modelsdev.NewDatabaseStore(&modelsdev.Database{ Providers: map[string]modelsdev.Provider{ @@ -78,15 +82,15 @@ func TestConvertDocument_QualifiedIDRequired(t *testing.T) { Source: chat.DocumentSource{InlineData: minJPEG}, } - // Bare model name (the original bug): image must be dropped. - capsBare := modelcaps.LoadFromStore(store, "gpt-4o") - partsBare, err := convertDocumentWithCaps(t.Context(), doc, capsBare) + // Bare model name (the original bug): LoadFromStore misses the model, + // capabilities fall back to text-only, image must be dropped. + partsBare, err := convertDocumentFromStore(t.Context(), doc, "gpt-4o", store) require.NoError(t, err) assert.Nil(t, partsBare, "bare model name must not resolve caps: image should be dropped") - // Qualified ID (the fix): image must be preserved. - capsQualified := modelcaps.LoadFromStore(store, "openai/gpt-4o") - partsQualified, err := convertDocumentWithCaps(t.Context(), doc, capsQualified) + // Qualified ID (the fix): LoadFromStore finds the model, vision caps + // are set, image must be preserved as a data-URI image part. + partsQualified, err := convertDocumentFromStore(t.Context(), doc, "openai/gpt-4o", store) require.NoError(t, err) require.Len(t, partsQualified, 1, "qualified ID must resolve caps: image should be present") assert.NotNil(t, partsQualified[0].OfImageURL, "expected image URL part for qualified model ID") From bda5311d9032b956fa39a00a29a593d73198ba91 Mon Sep 17 00:00:00 2001 From: Simon Ferquel's Clanker Date: Mon, 11 May 2026 11:09:27 +0000 Subject: [PATCH 4/5] refactor: use c.ID() at all modelcaps call sites; inject store for testability MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per reviewer request, replace all c.ModelConfig.Provider+"/"+c.ModelConfig.Model expressions with c.ID() at the modelcaps lookup call sites in every provider. Also wire store injection so regression tests can verify the full path: anthropic/client.go: - Add modelsStore *modelsdev.Store field (nil = use real store) - Add convertDoc() method routing through store when set, using c.ID() - convertUserMultiContent now calls c.convertDoc() anthropic/beta_converter.go: - convertBetaUserMultiContent calls c.convertDoc() openai/client.go: - Add modelsStore field; convertMessages routes through ConvertMessagesFromStore - Responses API inline call uses c.ID() oaistream/messages.go: - Add ConvertMessagesFromStore / ConvertMultiContentFromStore (store-injectable) - Internal convertMessagesWithStore / convertMultiContentWithStore helpers dmr, gemini, bedrock: - Use c.ID() directly Tests updated: - anthropic test constructs Client{modelsStore: store} and calls convertUserMultiContent — image present only if c.ID() is used - oaistream test calls ConvertMultiContentFromStore with qualified vs bare ID — image present only when provider prefix is included --- .../provider/anthropic/attachments_test.go | 51 +++++++++++-------- .../provider/anthropic/beta_converter.go | 2 +- pkg/model/provider/anthropic/client.go | 13 ++++- pkg/model/provider/bedrock/client.go | 2 +- pkg/model/provider/dmr/client.go | 2 +- pkg/model/provider/gemini/client.go | 2 +- .../provider/oaistream/attachments_test.go | 32 ++++++------ pkg/model/provider/oaistream/messages.go | 31 ++++++++++- pkg/model/provider/openai/client.go | 10 +++- 9 files changed, 99 insertions(+), 46 deletions(-) diff --git a/pkg/model/provider/anthropic/attachments_test.go b/pkg/model/provider/anthropic/attachments_test.go index c3fbf8e98..99eeacad6 100644 --- a/pkg/model/provider/anthropic/attachments_test.go +++ b/pkg/model/provider/anthropic/attachments_test.go @@ -9,6 +9,8 @@ import ( "github.com/docker/docker-agent/pkg/attachment/modelcaps" "github.com/docker/docker-agent/pkg/chat" + "github.com/docker/docker-agent/pkg/config/latest" + "github.com/docker/docker-agent/pkg/model/provider/base" "github.com/docker/docker-agent/pkg/modelsdev" ) @@ -53,13 +55,15 @@ func TestConvertDocumentAnthropic_StrategyB64_PDF(t *testing.T) { } // TestConvertDocumentAnthropic_QualifiedIDRequired is the regression test for -// the bug where callers passed a bare model name to convertDocument instead of -// a "provider/model" qualified identifier, causing modelcaps to miss the model -// and silently drop all image/PDF attachments. +// the bug where convertUserMultiContent passed only c.ModelConfig.Model (bare +// model name) to convertDocument instead of c.ModelConfig.Provider+"/"+c.ModelConfig.Model. +// When the bare name was used, modelcaps.Load always missed the model and all +// image/PDF attachments were silently dropped. // -// It calls convertDocumentFromStore directly with an injected fake store so -// the test exercises the full modelID → LoadFromStore → caps → blocks path -// without hitting the network. +// The test constructs a Client with Provider="anthropic" and Model="claude-sonnet-4-6", +// injects a fake modelsdev.Store, and calls convertUserMultiContent directly. +// The image block must be present in the output — which only happens if the +// fully-qualified "anthropic/claude-sonnet-4-6" was used for the caps lookup. func TestConvertDocumentAnthropic_QualifiedIDRequired(t *testing.T) { store := modelsdev.NewDatabaseStore(&modelsdev.Database{ Providers: map[string]modelsdev.Provider{ @@ -75,24 +79,31 @@ func TestConvertDocumentAnthropic_QualifiedIDRequired(t *testing.T) { }, }) - doc := chat.Document{ - Name: "photo.jpg", - MimeType: "image/jpeg", - Source: chat.DocumentSource{InlineData: minJPEG}, + c := &Client{ + Config: base.Config{ + ModelConfig: latest.ModelConfig{ + Provider: "anthropic", + Model: "claude-sonnet-4-6", + }, + }, + modelsStore: store, } - // Bare model name (the original bug): LoadFromStore misses the model, - // capabilities fall back to text-only, image must be dropped. - blocksBare, err := convertDocumentFromStore(t.Context(), doc, "claude-sonnet-4-6", store) - require.NoError(t, err) - assert.Nil(t, blocksBare, "bare model name must not resolve caps: image should be dropped") + parts := []chat.MessagePart{ + { + Type: chat.MessagePartTypeDocument, + Document: &chat.Document{ + Name: "photo.jpg", + MimeType: "image/jpeg", + Source: chat.DocumentSource{InlineData: minJPEG}, + }, + }, + } - // Qualified ID (the fix): LoadFromStore finds the model, vision caps - // are set, image must be preserved as a native image block. - blocksQualified, err := convertDocumentFromStore(t.Context(), doc, "anthropic/claude-sonnet-4-6", store) + blocks, err := c.convertUserMultiContent(t.Context(), parts) require.NoError(t, err) - require.Len(t, blocksQualified, 1, "qualified ID must resolve caps: image should be present") - assert.NotNil(t, blocksQualified[0].OfImage, "expected native image block for qualified model ID") + require.Len(t, blocks, 1, "image must not be dropped when provider+model ID is used for caps lookup") + assert.NotNil(t, blocks[0].OfImage, "expected native image block") } func TestConvertDocumentAnthropic_StrategyTXT(t *testing.T) { diff --git a/pkg/model/provider/anthropic/beta_converter.go b/pkg/model/provider/anthropic/beta_converter.go index 7bb91f13b..252f2f7b3 100644 --- a/pkg/model/provider/anthropic/beta_converter.go +++ b/pkg/model/provider/anthropic/beta_converter.go @@ -277,7 +277,7 @@ func (c *Client) convertBetaUserMultiContent(ctx context.Context, parts []chat.M case chat.MessagePartTypeDocument: if part.Document != nil { - stdBlocks, err := convertDocument(ctx, *part.Document, c.ModelConfig.Provider+"/"+c.ModelConfig.Model) + stdBlocks, err := c.convertDoc(ctx, *part.Document) if err != nil { return nil, fmt.Errorf("failed to convert document attachment %q: %w", part.Document.Name, err) } diff --git a/pkg/model/provider/anthropic/client.go b/pkg/model/provider/anthropic/client.go index 596941b3a..2c2fde223 100644 --- a/pkg/model/provider/anthropic/client.go +++ b/pkg/model/provider/anthropic/client.go @@ -23,6 +23,7 @@ import ( "github.com/docker/docker-agent/pkg/model/provider/base" "github.com/docker/docker-agent/pkg/model/provider/options" "github.com/docker/docker-agent/pkg/model/provider/providerutil" + "github.com/docker/docker-agent/pkg/modelsdev" "github.com/docker/docker-agent/pkg/tools" ) @@ -33,6 +34,7 @@ type Client struct { clientFn func(context.Context) (anthropic.Client, error) fileManager *FileManager + modelsStore *modelsdev.Store // nil in production (uses modelsdev.NewStore()); set in tests } // NewClient creates a new Anthropic client from the provided configuration @@ -313,6 +315,15 @@ func (c *Client) CreateChatCompletionStream( return ad, nil } +// convertDoc converts a document attachment using the client's model ID and +// the injected store (if set) or the real modelsdev store. +func (c *Client) convertDoc(ctx context.Context, doc chat.Document) ([]anthropic.ContentBlockParamUnion, error) { + if c.modelsStore != nil { + return convertDocumentFromStore(ctx, doc, c.ID(), c.modelsStore) + } + return convertDocument(ctx, doc, c.ID()) +} + func (c *Client) convertMessages(ctx context.Context, messages []chat.Message) ([]anthropic.MessageParam, error) { var anthropicMessages []anthropic.MessageParam // Track whether the last appended assistant message included tool_use blocks @@ -553,7 +564,7 @@ func (c *Client) convertUserMultiContent(ctx context.Context, parts []chat.Messa case chat.MessagePartTypeDocument: if part.Document != nil { - docBlocks, err := convertDocument(ctx, *part.Document, c.ModelConfig.Provider+"/"+c.ModelConfig.Model) + docBlocks, err := c.convertDoc(ctx, *part.Document) if err != nil { return nil, fmt.Errorf("failed to convert document attachment %q: %w", part.Document.Name, err) } diff --git a/pkg/model/provider/bedrock/client.go b/pkg/model/provider/bedrock/client.go index 25c24212e..46cc375bf 100644 --- a/pkg/model/provider/bedrock/client.go +++ b/pkg/model/provider/bedrock/client.go @@ -236,7 +236,7 @@ func (c *Client) buildConverseStreamInput(ctx context.Context, messages []chat.M enableCaching := c.promptCachingEnabled() // Convert and set messages (excluding system) - input.Messages, input.System = convertMessages(ctx, messages, c.ModelConfig.Provider+"/"+c.ModelConfig.Model, enableCaching) + input.Messages, input.System = convertMessages(ctx, messages, c.ID(), enableCaching) // Compute thinking fields first — its presence drives the inference config. additionalFields := c.buildAdditionalModelRequestFields() diff --git a/pkg/model/provider/dmr/client.go b/pkg/model/provider/dmr/client.go index 30c7b7615..72c52ca0c 100644 --- a/pkg/model/provider/dmr/client.go +++ b/pkg/model/provider/dmr/client.go @@ -147,7 +147,7 @@ func NewClient(ctx context.Context, cfg *latest.ModelConfig, opts ...options.Opt // convertMessages converts chat messages to OpenAI format and merges consecutive // system/user messages, which is needed by some local models run by DMR. func (c *Client) convertMessages(ctx context.Context, messages []chat.Message) []openai.ChatCompletionMessageParamUnion { - openaiMessages := oaistream.ConvertMessages(ctx, messages, c.ModelConfig.Provider+"/"+c.ModelConfig.Model) + openaiMessages := oaistream.ConvertMessages(ctx, messages, c.ID()) return oaistream.MergeConsecutiveMessages(openaiMessages) } diff --git a/pkg/model/provider/gemini/client.go b/pkg/model/provider/gemini/client.go index 6cbb40e64..0cdc2576e 100644 --- a/pkg/model/provider/gemini/client.go +++ b/pkg/model/provider/gemini/client.go @@ -601,7 +601,7 @@ func (c *Client) CreateChatCompletionStream( } } - contents := convertMessagesToGemini(ctx, messages, c.ModelConfig.Provider+"/"+c.ModelConfig.Model) + contents := convertMessagesToGemini(ctx, messages, c.ID()) // Debug: Log the messages we're sending slog.DebugContext(ctx, "Gemini messages", "count", len(contents)) diff --git a/pkg/model/provider/oaistream/attachments_test.go b/pkg/model/provider/oaistream/attachments_test.go index c3f06fad3..53af81a57 100644 --- a/pkg/model/provider/oaistream/attachments_test.go +++ b/pkg/model/provider/oaistream/attachments_test.go @@ -58,9 +58,8 @@ func TestConvertDocument_StrategyB64_ImageDropped(t *testing.T) { // where callers passed a bare model name instead of a "provider/model" ID, // causing modelcaps to miss the model and silently drop image/PDF attachments. // -// It calls convertDocumentFromStore directly with an injected fake store so -// the test exercises the full modelID → LoadFromStore → caps → parts path -// without hitting the network. +// It calls ConvertMultiContentFromStore with an injected fake store, exercising +// the same path as the production client (which calls ConvertMessages with c.ID()). func TestConvertDocument_QualifiedIDRequired(t *testing.T) { store := modelsdev.NewDatabaseStore(&modelsdev.Database{ Providers: map[string]modelsdev.Provider{ @@ -76,22 +75,21 @@ func TestConvertDocument_QualifiedIDRequired(t *testing.T) { }, }) - doc := chat.Document{ - Name: "photo.jpg", - MimeType: "image/jpeg", - Source: chat.DocumentSource{InlineData: minJPEG}, - } + msgParts := []chat.MessagePart{{ + Type: chat.MessagePartTypeDocument, + Document: &chat.Document{ + Name: "photo.jpg", + MimeType: "image/jpeg", + Source: chat.DocumentSource{InlineData: minJPEG}, + }, + }} - // Bare model name (the original bug): LoadFromStore misses the model, - // capabilities fall back to text-only, image must be dropped. - partsBare, err := convertDocumentFromStore(t.Context(), doc, "gpt-4o", store) - require.NoError(t, err) - assert.Nil(t, partsBare, "bare model name must not resolve caps: image should be dropped") + // Bare model name (the original bug): image must be dropped. + partsBare := ConvertMultiContentFromStore(t.Context(), msgParts, "gpt-4o", store) + assert.Empty(t, partsBare, "bare model name must not resolve caps: image should be dropped") - // Qualified ID (the fix): LoadFromStore finds the model, vision caps - // are set, image must be preserved as a data-URI image part. - partsQualified, err := convertDocumentFromStore(t.Context(), doc, "openai/gpt-4o", store) - require.NoError(t, err) + // Qualified ID (the fix, matching what c.ID() returns): image must be preserved. + partsQualified := ConvertMultiContentFromStore(t.Context(), msgParts, "openai/gpt-4o", store) require.Len(t, partsQualified, 1, "qualified ID must resolve caps: image should be present") assert.NotNil(t, partsQualified[0].OfImageURL, "expected image URL part for qualified model ID") } diff --git a/pkg/model/provider/oaistream/messages.go b/pkg/model/provider/oaistream/messages.go index 7e06e014f..fcf6f8428 100644 --- a/pkg/model/provider/oaistream/messages.go +++ b/pkg/model/provider/oaistream/messages.go @@ -14,6 +14,7 @@ import ( "github.com/openai/openai-go/v3/packages/param" "github.com/docker/docker-agent/pkg/chat" + "github.com/docker/docker-agent/pkg/modelsdev" ) // JSONSchema is a helper type that implements json.Marshaler for map[string]any. @@ -30,6 +31,16 @@ func (j JSONSchema) MarshalJSON() ([]byte, error) { // modelID is used for attachment capability lookups; pass an empty string to // skip capability checks (all documents are attempted). func ConvertMultiContent(ctx context.Context, multiContent []chat.MessagePart, modelID string) []openai.ChatCompletionContentPartUnionParam { + return convertMultiContentWithStore(ctx, multiContent, modelID, nil) +} + +// ConvertMultiContentFromStore is the store-injectable variant of ConvertMultiContent, +// used by tests to inject a fake in-memory modelsdev.Store. +func ConvertMultiContentFromStore(ctx context.Context, multiContent []chat.MessagePart, modelID string, store *modelsdev.Store) []openai.ChatCompletionContentPartUnionParam { + return convertMultiContentWithStore(ctx, multiContent, modelID, store) +} + +func convertMultiContentWithStore(ctx context.Context, multiContent []chat.MessagePart, modelID string, store *modelsdev.Store) []openai.ChatCompletionContentPartUnionParam { parts := make([]openai.ChatCompletionContentPartUnionParam, 0, len(multiContent)) for _, part := range multiContent { switch part.Type { @@ -45,7 +56,13 @@ func ConvertMultiContent(ctx context.Context, multiContent []chat.MessagePart, m } case chat.MessagePartTypeDocument: if part.Document != nil { - docParts, err := convertDocument(ctx, *part.Document, modelID) + var docParts []openai.ChatCompletionContentPartUnionParam + var err error + if store != nil { + docParts, err = convertDocumentFromStore(ctx, *part.Document, modelID, store) + } else { + docParts, err = convertDocument(ctx, *part.Document, modelID) + } if err != nil { slog.WarnContext(ctx, "failed to convert document attachment", "error", err, "doc", part.Document.Name) continue @@ -62,6 +79,16 @@ func ConvertMultiContent(ctx context.Context, multiContent []chat.MessagePart, m // modelID is forwarded to convertDocument for attachment capability lookups. // This is the base conversion without any provider-specific post-processing. func ConvertMessages(ctx context.Context, messages []chat.Message, modelID string) []openai.ChatCompletionMessageParamUnion { + return convertMessagesWithStore(ctx, messages, modelID, nil) +} + +// ConvertMessagesFromStore is the store-injectable variant of ConvertMessages, +// used by tests to inject a fake in-memory modelsdev.Store. +func ConvertMessagesFromStore(ctx context.Context, messages []chat.Message, modelID string, store *modelsdev.Store) []openai.ChatCompletionMessageParamUnion { + return convertMessagesWithStore(ctx, messages, modelID, store) +} + +func convertMessagesWithStore(ctx context.Context, messages []chat.Message, modelID string, store *modelsdev.Store) []openai.ChatCompletionMessageParamUnion { openaiMessages := make([]openai.ChatCompletionMessageParamUnion, 0, len(messages)) for i := range messages { msg := &messages[i] @@ -94,7 +121,7 @@ func ConvertMessages(ctx context.Context, messages []chat.Message, modelID strin if len(msg.MultiContent) == 0 { openaiMessage = openai.UserMessage(msg.Content) } else { - openaiMessage = openai.UserMessage(ConvertMultiContent(ctx, msg.MultiContent, modelID)) + openaiMessage = openai.UserMessage(convertMultiContentWithStore(ctx, msg.MultiContent, modelID, store)) } case chat.MessageRoleAssistant: diff --git a/pkg/model/provider/openai/client.go b/pkg/model/provider/openai/client.go index 65596d25e..c0e771622 100644 --- a/pkg/model/provider/openai/client.go +++ b/pkg/model/provider/openai/client.go @@ -26,6 +26,7 @@ import ( "github.com/docker/docker-agent/pkg/model/provider/oaistream" "github.com/docker/docker-agent/pkg/model/provider/options" "github.com/docker/docker-agent/pkg/modelinfo" + "github.com/docker/docker-agent/pkg/modelsdev" "github.com/docker/docker-agent/pkg/rag/prompts" "github.com/docker/docker-agent/pkg/rag/types" "github.com/docker/docker-agent/pkg/tools" @@ -41,6 +42,8 @@ type Client struct { // wsPool is initialized in NewClient when transport=websocket is configured. // It maintains a persistent WebSocket connection across requests. wsPool *wsPool + + modelsStore *modelsdev.Store // nil in production (uses modelsdev.NewStore()); set in tests } // NewClient creates a new OpenAI client from the provided configuration @@ -181,7 +184,10 @@ func (c *Client) Close() { // convertMessages converts chat.Message to openai.ChatCompletionMessageParamUnion // using the shared oaistream implementation. func (c *Client) convertMessages(ctx context.Context, messages []chat.Message) []openai.ChatCompletionMessageParamUnion { - return oaistream.ConvertMessages(ctx, messages, c.ModelConfig.Provider+"/"+c.ModelConfig.Model) + if c.modelsStore != nil { + return oaistream.ConvertMessagesFromStore(ctx, messages, c.ID(), c.modelsStore) + } + return oaistream.ConvertMessages(ctx, messages, c.ID()) } // CreateChatCompletionStream creates a streaming chat completion request @@ -613,7 +619,7 @@ func (c *Client) convertMessagesToResponseInput(ctx context.Context, messages [] } case chat.MessagePartTypeDocument: if part.Document != nil { - docParts, err := convertDocumentToResponseInput(ctx, *part.Document, c.ModelConfig.Provider+"/"+c.ModelConfig.Model) + docParts, err := convertDocumentToResponseInput(ctx, *part.Document, c.ID()) if err != nil { slog.WarnContext(ctx, "failed to convert document attachment", "error", err, "doc", part.Document.Name) continue From a490b33efc9ad40af391d58cf16db2d1f46326f7 Mon Sep 17 00:00:00 2001 From: Simon Ferquel's Clanker Date: Mon, 11 May 2026 11:35:30 +0000 Subject: [PATCH 5/5] refactor: remove bare convertDocument variants; always init modelsStore at construction MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per reviewer request, eliminate the dual-path (bare store + injected store) in favour of a single path that always goes through a *modelsdev.Store. Changes: - anthropic/attachments.go: remove convertDocument(); keep only convertDocumentFromStore(ctx, doc, modelID, store) - oaistream/attachments.go: same - openai/attachments.go: remove convertDocumentToResponseInput(); keep only convertDocumentToResponseInputFromStore(ctx, doc, modelID, store) - bedrock/attachments.go: same pattern - gemini/attachments.go: same pattern All provider clients now initialise modelsStore in NewClient: - anthropic, openai, dmr, bedrock, gemini each call modelsdev.NewStore() at construction; on error they fall back to an empty store (text-only caps) rather than a nil store - convertDoc / convertMessages / convertMessagesToGemini / convertMessages (bedrock) all pass c.modelsStore through — no nil checks oaistream/messages.go: - Remove ConvertMessages / ConvertMultiContent (bare); keep only ConvertMessagesFromStore / ConvertMultiContentFromStore - convertMultiContentWithStore always passes store to convertDocumentFromStore All test files updated to use FromStore / WithCaps variants and import modelsdev where needed. --- pkg/model/provider/anthropic/attachments.go | 15 +------ .../provider/anthropic/attachments_test.go | 6 +-- pkg/model/provider/anthropic/client.go | 18 +++++---- pkg/model/provider/bedrock/attachments.go | 8 ++-- .../provider/bedrock/attachments_test.go | 6 +-- pkg/model/provider/bedrock/client.go | 12 +++++- pkg/model/provider/bedrock/client_test.go | 27 +++++++------ pkg/model/provider/bedrock/convert.go | 9 +++-- pkg/model/provider/dmr/client.go | 23 +++++++---- pkg/model/provider/gemini/attachments.go | 8 ++-- pkg/model/provider/gemini/attachments_test.go | 6 +-- pkg/model/provider/gemini/client.go | 23 +++++++---- pkg/model/provider/gemini/client_test.go | 3 +- pkg/model/provider/oaistream/attachments.go | 17 ++------ .../provider/oaistream/attachments_test.go | 6 +-- pkg/model/provider/oaistream/messages.go | 40 +++++-------------- pkg/model/provider/oaistream/messages_test.go | 5 ++- pkg/model/provider/openai/attachments.go | 12 +++--- pkg/model/provider/openai/attachments_test.go | 6 +-- pkg/model/provider/openai/client.go | 18 +++++---- 20 files changed, 134 insertions(+), 134 deletions(-) diff --git a/pkg/model/provider/anthropic/attachments.go b/pkg/model/provider/anthropic/attachments.go index f3cbc4bb2..62cb81ddd 100644 --- a/pkg/model/provider/anthropic/attachments.go +++ b/pkg/model/provider/anthropic/attachments.go @@ -15,25 +15,14 @@ import ( "github.com/docker/docker-agent/pkg/modelsdev" ) -// convertDocument converts a chat.Document to standard Anthropic SDK content blocks -// (not the Beta API). +// convertDocumentFromStore converts a chat.Document to standard Anthropic SDK content blocks +// using an explicit modelsdev.Store for capability lookup. // // Routing: // - image/* with InlineData → ImageBlockParam (base64 source) // - application/pdf with InlineData → DocumentBlockParam (base64) // - text with InlineText → TextBlockParam with TXTEnvelope // - unsupported / no content → nil (logged as warning) -func convertDocument(ctx context.Context, doc chat.Document, modelID string) ([]anthropic.ContentBlockParamUnion, error) { - store, err := modelsdev.NewStore() - if err != nil { - // Fall back to conservative text-only caps when the store is unavailable. - return convertDocumentWithCaps(ctx, doc, modelcaps.ModelCapabilities{}) - } - return convertDocumentFromStore(ctx, doc, modelID, store) -} - -// convertDocumentFromStore is the store-injectable variant of convertDocument, -// used by tests to inject a fake in-memory modelsdev.Store. func convertDocumentFromStore(ctx context.Context, doc chat.Document, modelID string, store *modelsdev.Store) ([]anthropic.ContentBlockParamUnion, error) { mc := modelcaps.LoadFromStore(store, modelID) return convertDocumentWithCaps(ctx, doc, mc) diff --git a/pkg/model/provider/anthropic/attachments_test.go b/pkg/model/provider/anthropic/attachments_test.go index 99eeacad6..889089063 100644 --- a/pkg/model/provider/anthropic/attachments_test.go +++ b/pkg/model/provider/anthropic/attachments_test.go @@ -113,7 +113,7 @@ func TestConvertDocumentAnthropic_StrategyTXT(t *testing.T) { Source: chat.DocumentSource{InlineText: "## Specification"}, } - blocks, err := convertDocument(t.Context(), doc, "") + blocks, err := convertDocumentWithCaps(t.Context(), doc, modelcaps.ModelCapabilities{}) require.NoError(t, err) require.Len(t, blocks, 1) require.NotNil(t, blocks[0].OfText) @@ -129,7 +129,7 @@ func TestConvertDocumentAnthropic_StrategyTXT_Envelope(t *testing.T) { Source: chat.DocumentSource{InlineText: "some notes"}, } - blocks, err := convertDocument(t.Context(), doc, "") + blocks, err := convertDocumentWithCaps(t.Context(), doc, modelcaps.ModelCapabilities{}) require.NoError(t, err) require.Len(t, blocks, 1) require.NotNil(t, blocks[0].OfText) @@ -144,7 +144,7 @@ func TestConvertDocumentAnthropic_Drop_NoContent(t *testing.T) { Source: chat.DocumentSource{}, } - blocks, err := convertDocument(t.Context(), doc, "") + blocks, err := convertDocumentWithCaps(t.Context(), doc, modelcaps.ModelCapabilities{}) require.NoError(t, err) assert.Nil(t, blocks, "should be dropped when no inline content") } diff --git a/pkg/model/provider/anthropic/client.go b/pkg/model/provider/anthropic/client.go index 2c2fde223..b609b6a06 100644 --- a/pkg/model/provider/anthropic/client.go +++ b/pkg/model/provider/anthropic/client.go @@ -34,7 +34,7 @@ type Client struct { clientFn func(context.Context) (anthropic.Client, error) fileManager *FileManager - modelsStore *modelsdev.Store // nil in production (uses modelsdev.NewStore()); set in tests + modelsStore *modelsdev.Store // initialised in NewClient; overrideable in tests } // NewClient creates a new Anthropic client from the provided configuration @@ -69,6 +69,13 @@ func NewClient(ctx context.Context, cfg *latest.ModelConfig, env environment.Pro }, } + store, err := modelsdev.NewStore() + if err != nil { + slog.WarnContext(ctx, "anthropic: failed to load models.dev store, attachments will use conservative caps", "error", err) + store = modelsdev.NewDatabaseStore(&modelsdev.Database{}) // empty: conservative text-only + } + anthropicClient.modelsStore = store + if gateway := globalOptions.Gateway(); gateway == "" { authOpts, err := buildDirectAuthOptions(ctx, cfg, env) if err != nil { @@ -315,13 +322,10 @@ func (c *Client) CreateChatCompletionStream( return ad, nil } -// convertDoc converts a document attachment using the client's model ID and -// the injected store (if set) or the real modelsdev store. +// convertDoc converts a document attachment using the client's model ID +// and the store initialized at construction time. func (c *Client) convertDoc(ctx context.Context, doc chat.Document) ([]anthropic.ContentBlockParamUnion, error) { - if c.modelsStore != nil { - return convertDocumentFromStore(ctx, doc, c.ID(), c.modelsStore) - } - return convertDocument(ctx, doc, c.ID()) + return convertDocumentFromStore(ctx, doc, c.ID(), c.modelsStore) } func (c *Client) convertMessages(ctx context.Context, messages []chat.Message) ([]anthropic.MessageParam, error) { diff --git a/pkg/model/provider/bedrock/attachments.go b/pkg/model/provider/bedrock/attachments.go index 022235cda..6f37583b3 100644 --- a/pkg/model/provider/bedrock/attachments.go +++ b/pkg/model/provider/bedrock/attachments.go @@ -13,6 +13,7 @@ import ( "github.com/docker/docker-agent/pkg/attachment" "github.com/docker/docker-agent/pkg/attachment/modelcaps" "github.com/docker/docker-agent/pkg/chat" + "github.com/docker/docker-agent/pkg/modelsdev" ) // imageFormatFromMIME maps a MIME type to a Bedrock ImageFormat. @@ -32,15 +33,16 @@ func imageFormatFromMIME(mimeType string) (types.ImageFormat, bool) { } } -// convertDocument converts a chat.Document to zero or more Bedrock ContentBlocks. +// convertDocumentFromStore converts a chat.Document to zero or more Bedrock ContentBlocks +// using the provided modelsdev.Store for capability lookup. // // Routing: // - image/* with InlineData → ContentBlockMemberImage // - application/pdf with InlineData → ContentBlockMemberDocument (PDF) // - text/* with InlineText → ContentBlockMemberText with TXTEnvelope // - unsupported / no content → nil (logged as warning) -func convertDocument(ctx context.Context, doc chat.Document, modelID string) ([]types.ContentBlock, error) { - mc, _ := modelcaps.Load(modelID) +func convertDocumentFromStore(ctx context.Context, doc chat.Document, modelID string, store *modelsdev.Store) ([]types.ContentBlock, error) { + mc := modelcaps.LoadFromStore(store, modelID) return convertDocumentWithCaps(ctx, doc, mc) } diff --git a/pkg/model/provider/bedrock/attachments_test.go b/pkg/model/provider/bedrock/attachments_test.go index 976a201a9..3dc766db7 100644 --- a/pkg/model/provider/bedrock/attachments_test.go +++ b/pkg/model/provider/bedrock/attachments_test.go @@ -79,7 +79,7 @@ func TestConvertDocumentBedrock_StrategyTXT(t *testing.T) { Source: chat.DocumentSource{InlineText: "## Notes"}, } - blocks, err := convertDocument(t.Context(), doc, "") + blocks, err := convertDocumentWithCaps(t.Context(), doc, modelcaps.ModelCapabilities{}) require.NoError(t, err) require.Len(t, blocks, 1) textBlock, ok := blocks[0].(*types.ContentBlockMemberText) @@ -96,7 +96,7 @@ func TestConvertDocumentBedrock_StrategyTXT_Envelope(t *testing.T) { Source: chat.DocumentSource{InlineText: "a,b"}, } - blocks, err := convertDocument(t.Context(), doc, "") + blocks, err := convertDocumentWithCaps(t.Context(), doc, modelcaps.ModelCapabilities{}) require.NoError(t, err) require.Len(t, blocks, 1) textBlock, ok := blocks[0].(*types.ContentBlockMemberText) @@ -111,7 +111,7 @@ func TestConvertDocumentBedrock_Drop_NoContent(t *testing.T) { Source: chat.DocumentSource{}, } - blocks, err := convertDocument(t.Context(), doc, "") + blocks, err := convertDocumentWithCaps(t.Context(), doc, modelcaps.ModelCapabilities{}) require.NoError(t, err) assert.Nil(t, blocks, "should be nil when no inline content") } diff --git a/pkg/model/provider/bedrock/client.go b/pkg/model/provider/bedrock/client.go index 46cc375bf..e1ea5d1f3 100644 --- a/pkg/model/provider/bedrock/client.go +++ b/pkg/model/provider/bedrock/client.go @@ -30,7 +30,8 @@ type Client struct { base.Config bedrockClient *bedrockruntime.Client - cachingSupported bool // Cached at init time for efficiency + cachingSupported bool // Cached at init time for efficiency + modelsStore *modelsdev.Store // initialised in NewClient } // bearerTokenTransport adds Authorization header with bearer token to requests @@ -116,6 +117,12 @@ func NewClient(ctx context.Context, cfg *latest.ModelConfig, env environment.Pro // Uses models.dev cache pricing as proxy for capability detection. cachingSupported := detectCachingSupport(ctx, cfg.Model) + attachStore, err := modelsdev.NewStore() + if err != nil { + slog.WarnContext(ctx, "bedrock: failed to load models.dev store, attachments will use conservative caps", "error", err) + attachStore = modelsdev.NewDatabaseStore(&modelsdev.Database{}) + } + slog.DebugContext(ctx, "Bedrock client created successfully", "model", cfg.Model, "region", awsCfg.Region, @@ -129,6 +136,7 @@ func NewClient(ctx context.Context, cfg *latest.ModelConfig, env environment.Pro }, bedrockClient: bedrockClient, cachingSupported: cachingSupported, + modelsStore: attachStore, }, nil } @@ -236,7 +244,7 @@ func (c *Client) buildConverseStreamInput(ctx context.Context, messages []chat.M enableCaching := c.promptCachingEnabled() // Convert and set messages (excluding system) - input.Messages, input.System = convertMessages(ctx, messages, c.ID(), enableCaching) + input.Messages, input.System = convertMessages(ctx, messages, c.ID(), c.modelsStore, enableCaching) // Compute thinking fields first — its presence drives the inference config. additionalFields := c.buildAdditionalModelRequestFields() diff --git a/pkg/model/provider/bedrock/client_test.go b/pkg/model/provider/bedrock/client_test.go index 1584f7f61..793f5c2f9 100644 --- a/pkg/model/provider/bedrock/client_test.go +++ b/pkg/model/provider/bedrock/client_test.go @@ -14,6 +14,7 @@ import ( "github.com/docker/docker-agent/pkg/config/latest" "github.com/docker/docker-agent/pkg/environment" "github.com/docker/docker-agent/pkg/model/provider/base" + "github.com/docker/docker-agent/pkg/modelsdev" "github.com/docker/docker-agent/pkg/tools" ) @@ -25,7 +26,7 @@ func TestConvertMessages_UserText(t *testing.T) { Content: "Hello, world!", }} - bedrockMsgs, system := convertMessages(t.Context(), msgs, "", false) + bedrockMsgs, system := convertMessages(t.Context(), msgs, "", modelsdev.NewDatabaseStore(&modelsdev.Database{}), false) require.Len(t, bedrockMsgs, 1) assert.Empty(t, system) @@ -45,7 +46,7 @@ func TestConvertMessages_SystemExtraction(t *testing.T) { {Role: chat.MessageRoleUser, Content: "Hi"}, } - bedrockMsgs, system := convertMessages(t.Context(), msgs, "", false) + bedrockMsgs, system := convertMessages(t.Context(), msgs, "", modelsdev.NewDatabaseStore(&modelsdev.Database{}), false) require.Len(t, bedrockMsgs, 1) // Only user message require.Len(t, system, 1) // System extracted @@ -70,7 +71,7 @@ func TestConvertMessages_AssistantWithToolCalls(t *testing.T) { }}, }} - bedrockMsgs, _ := convertMessages(t.Context(), msgs, "", false) + bedrockMsgs, _ := convertMessages(t.Context(), msgs, "", modelsdev.NewDatabaseStore(&modelsdev.Database{}), false) require.Len(t, bedrockMsgs, 1) require.Len(t, bedrockMsgs[0].Content, 1) @@ -91,7 +92,7 @@ func TestConvertMessages_ToolResult(t *testing.T) { Content: "Weather is sunny", }} - bedrockMsgs, _ := convertMessages(t.Context(), msgs, "", false) + bedrockMsgs, _ := convertMessages(t.Context(), msgs, "", modelsdev.NewDatabaseStore(&modelsdev.Database{}), false) require.Len(t, bedrockMsgs, 1) assert.Equal(t, types.ConversationRoleUser, bedrockMsgs[0].Role) @@ -115,7 +116,7 @@ func TestConvertMessages_EmptyContent(t *testing.T) { {Role: chat.MessageRoleUser, Content: " "}, } - bedrockMsgs, _ := convertMessages(t.Context(), msgs, "", false) + bedrockMsgs, _ := convertMessages(t.Context(), msgs, "", modelsdev.NewDatabaseStore(&modelsdev.Database{}), false) // Both messages now produce user turns with empty or whitespace content blocks. assert.Len(t, bedrockMsgs, 2) } @@ -181,7 +182,7 @@ func TestConvertMessages_MultiContent(t *testing.T) { }, }} - bedrockMsgs, _ := convertMessages(t.Context(), msgs, "", false) + bedrockMsgs, _ := convertMessages(t.Context(), msgs, "", modelsdev.NewDatabaseStore(&modelsdev.Database{}), false) require.Len(t, bedrockMsgs, 1) require.Len(t, bedrockMsgs[0].Content, 2) @@ -205,7 +206,7 @@ func TestConvertMessages_ConsecutiveToolResults(t *testing.T) { {Role: chat.MessageRoleUser, Content: "Continue"}, } - bedrockMsgs, _ := convertMessages(t.Context(), msgs, "", false) + bedrockMsgs, _ := convertMessages(t.Context(), msgs, "", modelsdev.NewDatabaseStore(&modelsdev.Database{}), false) // Expect: user, assistant, user (grouped tool results), user require.Len(t, bedrockMsgs, 4) @@ -1136,7 +1137,7 @@ func TestConvertMessages_WithCaching(t *testing.T) { {Role: chat.MessageRoleUser, Content: "How are you?"}, } - bedrockMsgs, system := convertMessages(t.Context(), msgs, "", true) + bedrockMsgs, system := convertMessages(t.Context(), msgs, "", modelsdev.NewDatabaseStore(&modelsdev.Database{}), true) // System should have text block + cache point require.Len(t, system, 2) @@ -1167,7 +1168,7 @@ func TestConvertMessages_WithoutCaching(t *testing.T) { {Role: chat.MessageRoleUser, Content: "Hello"}, } - bedrockMsgs, system := convertMessages(t.Context(), msgs, "", false) + bedrockMsgs, system := convertMessages(t.Context(), msgs, "", modelsdev.NewDatabaseStore(&modelsdev.Database{}), false) // System should only have text block, no cache point require.Len(t, system, 1) @@ -1258,7 +1259,7 @@ func TestConvertMessages_EmptyWithCaching(t *testing.T) { t.Parallel() // Empty message list should not panic with caching enabled - bedrockMsgs, system := convertMessages(t.Context(), []chat.Message{}, "", true) + bedrockMsgs, system := convertMessages(t.Context(), []chat.Message{}, "", modelsdev.NewDatabaseStore(&modelsdev.Database{}), true) assert.Empty(t, bedrockMsgs) assert.Empty(t, system) @@ -1271,7 +1272,7 @@ func TestConvertMessages_SingleMessageWithCaching(t *testing.T) { {Role: chat.MessageRoleUser, Content: "Hello"}, } - bedrockMsgs, _ := convertMessages(t.Context(), msgs, "", true) + bedrockMsgs, _ := convertMessages(t.Context(), msgs, "", modelsdev.NewDatabaseStore(&modelsdev.Database{}), true) require.Len(t, bedrockMsgs, 1) // Single message should get a cache point appended @@ -1291,7 +1292,7 @@ func TestConvertMessages_MultiContentWithCaching(t *testing.T) { }, }} - bedrockMsgs, _ := convertMessages(t.Context(), msgs, "", true) + bedrockMsgs, _ := convertMessages(t.Context(), msgs, "", modelsdev.NewDatabaseStore(&modelsdev.Database{}), true) require.Len(t, bedrockMsgs, 1) // 2 text blocks + cache point = 3 content blocks @@ -1314,7 +1315,7 @@ func TestConvertMessages_ToolResultWithCaching(t *testing.T) { {Role: chat.MessageRoleTool, ToolCallID: "tool-1", Content: "Result"}, } - bedrockMsgs, _ := convertMessages(t.Context(), msgs, "", true) + bedrockMsgs, _ := convertMessages(t.Context(), msgs, "", modelsdev.NewDatabaseStore(&modelsdev.Database{}), true) // Expect: user, assistant, user (tool result) require.Len(t, bedrockMsgs, 3) diff --git a/pkg/model/provider/bedrock/convert.go b/pkg/model/provider/bedrock/convert.go index 428246978..d6ab7b988 100644 --- a/pkg/model/provider/bedrock/convert.go +++ b/pkg/model/provider/bedrock/convert.go @@ -12,13 +12,14 @@ import ( "github.com/aws/aws-sdk-go-v2/service/bedrockruntime/types" "github.com/docker/docker-agent/pkg/chat" + "github.com/docker/docker-agent/pkg/modelsdev" "github.com/docker/docker-agent/pkg/tools" ) // convertMessages handles Bedrock's Converse API constraints: // - Tool results must immediately follow the assistant message with tool_use // - Multiple consecutive tool results must be grouped into a single user message -func convertMessages(ctx context.Context, messages []chat.Message, modelID string, enableCaching bool) ([]types.Message, []types.SystemContentBlock) { +func convertMessages(ctx context.Context, messages []chat.Message, modelID string, store *modelsdev.Store, enableCaching bool) ([]types.Message, []types.SystemContentBlock) { var bedrockMessages []types.Message var systemBlocks []types.SystemContentBlock @@ -43,7 +44,7 @@ func convertMessages(ctx context.Context, messages []chat.Message, modelID strin } case chat.MessageRoleUser: - contentBlocks := convertUserContent(ctx, msg, modelID) + contentBlocks := convertUserContent(ctx, msg, modelID, store) if len(contentBlocks) > 0 { bedrockMessages = append(bedrockMessages, types.Message{ Role: types.ConversationRoleUser, @@ -120,7 +121,7 @@ func applyCachePointsToMessages(messages []types.Message) { } } -func convertUserContent(ctx context.Context, msg *chat.Message, modelID string) []types.ContentBlock { +func convertUserContent(ctx context.Context, msg *chat.Message, modelID string, store *modelsdev.Store) []types.ContentBlock { var blocks []types.ContentBlock if len(msg.MultiContent) > 0 { @@ -139,7 +140,7 @@ func convertUserContent(ctx context.Context, msg *chat.Message, modelID string) } case chat.MessagePartTypeDocument: if part.Document != nil { - docBlocks, err := convertDocument(ctx, *part.Document, modelID) + docBlocks, err := convertDocumentFromStore(ctx, *part.Document, modelID, store) if err != nil { slog.WarnContext(ctx, "failed to convert document attachment", "error", err, "doc", part.Document.Name) continue diff --git a/pkg/model/provider/dmr/client.go b/pkg/model/provider/dmr/client.go index 72c52ca0c..f145e067d 100644 --- a/pkg/model/provider/dmr/client.go +++ b/pkg/model/provider/dmr/client.go @@ -21,6 +21,7 @@ import ( "github.com/docker/docker-agent/pkg/model/provider/base" "github.com/docker/docker-agent/pkg/model/provider/oaistream" "github.com/docker/docker-agent/pkg/model/provider/options" + "github.com/docker/docker-agent/pkg/modelsdev" "github.com/docker/docker-agent/pkg/tools" ) @@ -52,9 +53,10 @@ const ( type Client struct { base.Config - client openai.Client - httpClient *http.Client - engine string + client openai.Client + httpClient *http.Client + engine string + modelsStore *modelsdev.Store // initialised in NewClient } // NewClient creates a new DMR client from the provided configuration @@ -132,22 +134,29 @@ func NewClient(ctx context.Context, cfg *latest.ModelConfig, opts ...options.Opt slog.DebugContext(ctx, "DMR client created successfully", "model", cfg.Model, "base_url", baseURL) + store, err := modelsdev.NewStore() + if err != nil { + slog.WarnContext(ctx, "dmr: failed to load models.dev store, attachments will use conservative caps", "error", err) + store = modelsdev.NewDatabaseStore(&modelsdev.Database{}) + } + return &Client{ Config: base.Config{ ModelConfig: *cfg, ModelOptions: globalOptions, BaseURL: baseURL, }, - client: openai.NewClient(clientOptions...), - httpClient: httpClient, - engine: engine, + client: openai.NewClient(clientOptions...), + httpClient: httpClient, + engine: engine, + modelsStore: store, }, nil } // convertMessages converts chat messages to OpenAI format and merges consecutive // system/user messages, which is needed by some local models run by DMR. func (c *Client) convertMessages(ctx context.Context, messages []chat.Message) []openai.ChatCompletionMessageParamUnion { - openaiMessages := oaistream.ConvertMessages(ctx, messages, c.ID()) + openaiMessages := oaistream.ConvertMessagesFromStore(ctx, messages, c.ID(), c.modelsStore) return oaistream.MergeConsecutiveMessages(openaiMessages) } diff --git a/pkg/model/provider/gemini/attachments.go b/pkg/model/provider/gemini/attachments.go index 9d5b53d00..ce3c5fb6b 100644 --- a/pkg/model/provider/gemini/attachments.go +++ b/pkg/model/provider/gemini/attachments.go @@ -10,16 +10,18 @@ import ( "github.com/docker/docker-agent/pkg/attachment" "github.com/docker/docker-agent/pkg/attachment/modelcaps" "github.com/docker/docker-agent/pkg/chat" + "github.com/docker/docker-agent/pkg/modelsdev" ) -// convertDocument converts a chat.Document to a Gemini genai.Part. +// convertDocumentFromStore converts a chat.Document to a Gemini genai.Part +// using the provided modelsdev.Store for capability lookup. // // Routing: // - image/* or binary with InlineData → genai.Blob part // - text MIMEs with InlineText → genai.Text part with TXTEnvelope // - unsupported / no content → nil (logged as warning) -func convertDocument(ctx context.Context, doc chat.Document, modelID string) (*genai.Part, error) { - mc, _ := modelcaps.Load(modelID) +func convertDocumentFromStore(ctx context.Context, doc chat.Document, modelID string, store *modelsdev.Store) (*genai.Part, error) { + mc := modelcaps.LoadFromStore(store, modelID) return convertDocumentWithCaps(ctx, doc, mc) } diff --git a/pkg/model/provider/gemini/attachments_test.go b/pkg/model/provider/gemini/attachments_test.go index 634859984..2d705b1ee 100644 --- a/pkg/model/provider/gemini/attachments_test.go +++ b/pkg/model/provider/gemini/attachments_test.go @@ -55,7 +55,7 @@ func TestConvertDocumentGemini_StrategyTXT(t *testing.T) { Source: chat.DocumentSource{InlineText: "# Read Me"}, } - part, err := convertDocument(t.Context(), doc, "") + part, err := convertDocumentWithCaps(t.Context(), doc, modelcaps.ModelCapabilities{}) require.NoError(t, err) require.NotNil(t, part) assert.Contains(t, part.Text, "readme-md") @@ -70,7 +70,7 @@ func TestConvertDocumentGemini_StrategyTXT_Envelope(t *testing.T) { Source: chat.DocumentSource{InlineText: "col1,col2"}, } - part, err := convertDocument(t.Context(), doc, "") + part, err := convertDocumentWithCaps(t.Context(), doc, modelcaps.ModelCapabilities{}) require.NoError(t, err) require.NotNil(t, part) assert.True(t, strings.HasPrefix(part.Text, " 0 { - parts := convertMultiContent(ctx, msg.MultiContent, msg.ThoughtSignature, modelID) + parts := convertMultiContent(ctx, msg.MultiContent, msg.ThoughtSignature, modelID, store) if len(parts) > 0 { contents = append(contents, genai.NewContentFromParts(parts, role)) } @@ -288,7 +297,7 @@ func newTextPartWithSignature(text string, signature []byte) *genai.Part { } // convertMultiContent converts multi-part content to Gemini parts -func convertMultiContent(ctx context.Context, multiContent []chat.MessagePart, thoughtSignature []byte, modelID string) []*genai.Part { +func convertMultiContent(ctx context.Context, multiContent []chat.MessagePart, thoughtSignature []byte, modelID string, store *modelsdev.Store) []*genai.Part { parts := make([]*genai.Part, 0, len(multiContent)) for _, part := range multiContent { switch part.Type { @@ -301,7 +310,7 @@ func convertMultiContent(ctx context.Context, multiContent []chat.MessagePart, t } case chat.MessagePartTypeDocument: if part.Document != nil { - docPart, err := convertDocument(ctx, *part.Document, modelID) + docPart, err := convertDocumentFromStore(ctx, *part.Document, modelID, store) if err != nil { slog.WarnContext(ctx, "failed to convert document attachment", "error", err, "doc", part.Document.Name) continue @@ -601,7 +610,7 @@ func (c *Client) CreateChatCompletionStream( } } - contents := convertMessagesToGemini(ctx, messages, c.ID()) + contents := convertMessagesToGemini(ctx, messages, c.ID(), c.modelsStore) // Debug: Log the messages we're sending slog.DebugContext(ctx, "Gemini messages", "count", len(contents)) diff --git a/pkg/model/provider/gemini/client_test.go b/pkg/model/provider/gemini/client_test.go index 8aeb7b205..a8e83e1c5 100644 --- a/pkg/model/provider/gemini/client_test.go +++ b/pkg/model/provider/gemini/client_test.go @@ -10,6 +10,7 @@ import ( "github.com/docker/docker-agent/pkg/chat" "github.com/docker/docker-agent/pkg/config/latest" "github.com/docker/docker-agent/pkg/model/provider/base" + "github.com/docker/docker-agent/pkg/modelsdev" "github.com/docker/docker-agent/pkg/tools" ) @@ -365,7 +366,7 @@ func TestConvertMessagesToGemini_ThoughtSignature(t *testing.T) { contents := convertMessagesToGemini(t.Context(), []chat.Message{ {Role: chat.MessageRoleUser, Content: "go"}, tt.message, - }, "") + }, "", modelsdev.NewDatabaseStore(&modelsdev.Database{})) require.Len(t, contents, 2) assistant := contents[1] diff --git a/pkg/model/provider/oaistream/attachments.go b/pkg/model/provider/oaistream/attachments.go index 31eaa8d07..aa2c573b6 100644 --- a/pkg/model/provider/oaistream/attachments.go +++ b/pkg/model/provider/oaistream/attachments.go @@ -15,26 +15,15 @@ import ( "github.com/docker/docker-agent/pkg/modelsdev" ) -// convertDocument converts a chat.Document to zero or more +// convertDocumentFromStore converts a chat.Document to zero or more // ChatCompletionContentPartUnionParam values using the OpenAI Chat Completions -// format. It is also used by all oaistream-based providers (Mistral, xAI, -// Ollama, Nebius, MiniMax, GitHub Copilot, Azure, Requesty). +// format. It uses the provided modelsdev.Store for capability lookups. // // Routing: // - image/* with InlineData → data-URI image part -// - other binary MIMEs with InlineData → text part with TXTEnvelope fallback +// - other binary MIMEs with InlineData → drop (no native document block on Chat Completions) // - text MIMEs with InlineText → text part with TXTEnvelope // - unsupported / no content → nil (logged as warning) -func convertDocument(ctx context.Context, doc chat.Document, modelID string) ([]openai.ChatCompletionContentPartUnionParam, error) { - store, err := modelsdev.NewStore() - if err != nil { - return convertDocumentWithCaps(ctx, doc, modelcaps.ModelCapabilities{}) - } - return convertDocumentFromStore(ctx, doc, modelID, store) -} - -// convertDocumentFromStore is the store-injectable variant of convertDocument, -// used by tests to inject a fake in-memory modelsdev.Store. func convertDocumentFromStore(ctx context.Context, doc chat.Document, modelID string, store *modelsdev.Store) ([]openai.ChatCompletionContentPartUnionParam, error) { mc := modelcaps.LoadFromStore(store, modelID) return convertDocumentWithCaps(ctx, doc, mc) diff --git a/pkg/model/provider/oaistream/attachments_test.go b/pkg/model/provider/oaistream/attachments_test.go index 53af81a57..d3047ee6c 100644 --- a/pkg/model/provider/oaistream/attachments_test.go +++ b/pkg/model/provider/oaistream/attachments_test.go @@ -101,7 +101,7 @@ func TestConvertDocument_StrategyTXT(t *testing.T) { Source: chat.DocumentSource{InlineText: "# Hello World"}, } - parts, err := convertDocument(t.Context(), doc, "") + parts, err := convertDocumentWithCaps(t.Context(), doc, modelcaps.ModelCapabilities{}) require.NoError(t, err) require.Len(t, parts, 1) require.NotNil(t, parts[0].OfText) @@ -117,7 +117,7 @@ func TestConvertDocument_StrategyTXT_Envelope(t *testing.T) { Source: chat.DocumentSource{InlineText: "a,b,c\n1,2,3"}, } - parts, err := convertDocument(t.Context(), doc, "") + parts, err := convertDocumentWithCaps(t.Context(), doc, modelcaps.ModelCapabilities{}) require.NoError(t, err) require.Len(t, parts, 1) require.NotNil(t, parts[0].OfText) @@ -132,7 +132,7 @@ func TestConvertDocument_Drop_NoContent(t *testing.T) { Source: chat.DocumentSource{}, } - parts, err := convertDocument(t.Context(), doc, "") + parts, err := convertDocumentWithCaps(t.Context(), doc, modelcaps.ModelCapabilities{}) require.NoError(t, err) assert.Nil(t, parts, "should be dropped when no inline content") } diff --git a/pkg/model/provider/oaistream/messages.go b/pkg/model/provider/oaistream/messages.go index fcf6f8428..d7e8ae301 100644 --- a/pkg/model/provider/oaistream/messages.go +++ b/pkg/model/provider/oaistream/messages.go @@ -26,20 +26,18 @@ func (j JSONSchema) MarshalJSON() ([]byte, error) { return json.Marshal(map[string]any(j)) } -// ConvertMultiContent converts chat.MessagePart slices to OpenAI content parts. -// ctx is forwarded to convertDocument for logging and future cancellation support. -// modelID is used for attachment capability lookups; pass an empty string to -// skip capability checks (all documents are attempted). -func ConvertMultiContent(ctx context.Context, multiContent []chat.MessagePart, modelID string) []openai.ChatCompletionContentPartUnionParam { - return convertMultiContentWithStore(ctx, multiContent, modelID, nil) -} - -// ConvertMultiContentFromStore is the store-injectable variant of ConvertMultiContent, -// used by tests to inject a fake in-memory modelsdev.Store. +// ConvertMultiContentFromStore converts chat.MessagePart slices to OpenAI content +// parts using the provided modelsdev.Store for capability lookups. func ConvertMultiContentFromStore(ctx context.Context, multiContent []chat.MessagePart, modelID string, store *modelsdev.Store) []openai.ChatCompletionContentPartUnionParam { return convertMultiContentWithStore(ctx, multiContent, modelID, store) } +// ConvertMessagesFromStore converts chat.Message slices to OpenAI message params +// using the provided modelsdev.Store for capability lookups. +func ConvertMessagesFromStore(ctx context.Context, messages []chat.Message, modelID string, store *modelsdev.Store) []openai.ChatCompletionMessageParamUnion { + return convertMessagesWithStore(ctx, messages, modelID, store) +} + func convertMultiContentWithStore(ctx context.Context, multiContent []chat.MessagePart, modelID string, store *modelsdev.Store) []openai.ChatCompletionContentPartUnionParam { parts := make([]openai.ChatCompletionContentPartUnionParam, 0, len(multiContent)) for _, part := range multiContent { @@ -56,13 +54,7 @@ func convertMultiContentWithStore(ctx context.Context, multiContent []chat.Messa } case chat.MessagePartTypeDocument: if part.Document != nil { - var docParts []openai.ChatCompletionContentPartUnionParam - var err error - if store != nil { - docParts, err = convertDocumentFromStore(ctx, *part.Document, modelID, store) - } else { - docParts, err = convertDocument(ctx, *part.Document, modelID) - } + docParts, err := convertDocumentFromStore(ctx, *part.Document, modelID, store) if err != nil { slog.WarnContext(ctx, "failed to convert document attachment", "error", err, "doc", part.Document.Name) continue @@ -74,20 +66,6 @@ func convertMultiContentWithStore(ctx context.Context, multiContent []chat.Messa return parts } -// ConvertMessages converts chat.Message slices to OpenAI message params. -// ctx is forwarded to convertDocument for logging and cancellation support. -// modelID is forwarded to convertDocument for attachment capability lookups. -// This is the base conversion without any provider-specific post-processing. -func ConvertMessages(ctx context.Context, messages []chat.Message, modelID string) []openai.ChatCompletionMessageParamUnion { - return convertMessagesWithStore(ctx, messages, modelID, nil) -} - -// ConvertMessagesFromStore is the store-injectable variant of ConvertMessages, -// used by tests to inject a fake in-memory modelsdev.Store. -func ConvertMessagesFromStore(ctx context.Context, messages []chat.Message, modelID string, store *modelsdev.Store) []openai.ChatCompletionMessageParamUnion { - return convertMessagesWithStore(ctx, messages, modelID, store) -} - func convertMessagesWithStore(ctx context.Context, messages []chat.Message, modelID string, store *modelsdev.Store) []openai.ChatCompletionMessageParamUnion { openaiMessages := make([]openai.ChatCompletionMessageParamUnion, 0, len(messages)) for i := range messages { diff --git a/pkg/model/provider/oaistream/messages_test.go b/pkg/model/provider/oaistream/messages_test.go index 3f2cba58c..cb80c4ec0 100644 --- a/pkg/model/provider/oaistream/messages_test.go +++ b/pkg/model/provider/oaistream/messages_test.go @@ -8,6 +8,7 @@ import ( "github.com/stretchr/testify/require" "github.com/docker/docker-agent/pkg/chat" + "github.com/docker/docker-agent/pkg/modelsdev" "github.com/docker/docker-agent/pkg/tools" ) @@ -62,7 +63,7 @@ func TestConvertMultiContent(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { t.Parallel() - result := ConvertMultiContent(t.Context(), tt.multiContent, "") + result := ConvertMultiContentFromStore(t.Context(), tt.multiContent, "", modelsdev.NewDatabaseStore(&modelsdev.Database{})) assert.Len(t, result, tt.wantCount) }) } @@ -137,7 +138,7 @@ func TestConvertMessages(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { t.Parallel() - result := ConvertMessages(t.Context(), tt.messages, "") + result := ConvertMessagesFromStore(t.Context(), tt.messages, "", modelsdev.NewDatabaseStore(&modelsdev.Database{})) assert.Len(t, result, tt.want) }) } diff --git a/pkg/model/provider/openai/attachments.go b/pkg/model/provider/openai/attachments.go index 0c13d5be0..77060e766 100644 --- a/pkg/model/provider/openai/attachments.go +++ b/pkg/model/provider/openai/attachments.go @@ -13,18 +13,20 @@ import ( "github.com/docker/docker-agent/pkg/attachment" "github.com/docker/docker-agent/pkg/attachment/modelcaps" "github.com/docker/docker-agent/pkg/chat" + "github.com/docker/docker-agent/pkg/modelsdev" ) -// convertDocumentToResponseInput converts a chat.Document to zero or more -// ResponseInputContentUnionParam values for the OpenAI Responses API. +// convertDocumentToResponseInputFromStore converts a chat.Document to zero or +// more ResponseInputContentUnionParam values for the OpenAI Responses API, +// using the provided modelsdev.Store for capability lookup. // // Routing: // - image/* with InlineData → OfInputImage with a data URI -// - other binary with InlineData → OfInputText with TXTEnvelope fallback +// - application/pdf with InlineData → OfInputFile (base64) // - text MIMEs with InlineText → OfInputText with TXTEnvelope // - unsupported / no content → nil (logged as warning) -func convertDocumentToResponseInput(ctx context.Context, doc chat.Document, modelID string) ([]responses.ResponseInputContentUnionParam, error) { - mc, _ := modelcaps.Load(modelID) +func convertDocumentToResponseInputFromStore(ctx context.Context, doc chat.Document, modelID string, store *modelsdev.Store) ([]responses.ResponseInputContentUnionParam, error) { + mc := modelcaps.LoadFromStore(store, modelID) return convertDocumentToResponseInputWithCaps(ctx, doc, mc) } diff --git a/pkg/model/provider/openai/attachments_test.go b/pkg/model/provider/openai/attachments_test.go index 6ff9e7c42..e9985fcb1 100644 --- a/pkg/model/provider/openai/attachments_test.go +++ b/pkg/model/provider/openai/attachments_test.go @@ -60,7 +60,7 @@ func TestConvertDocumentResponseInput_StrategyTXT(t *testing.T) { Source: chat.DocumentSource{InlineText: "## API Spec"}, } - parts, err := convertDocumentToResponseInput(t.Context(), doc, "") + parts, err := convertDocumentToResponseInputWithCaps(t.Context(), doc, modelcaps.ModelCapabilities{}) require.NoError(t, err) require.Len(t, parts, 1) require.NotNil(t, parts[0].OfInputText) @@ -77,7 +77,7 @@ func TestConvertDocumentResponseInput_StrategyTXT_Envelope(t *testing.T) { Source: chat.DocumentSource{InlineText: "x,y"}, } - parts, err := convertDocumentToResponseInput(t.Context(), doc, "") + parts, err := convertDocumentToResponseInputWithCaps(t.Context(), doc, modelcaps.ModelCapabilities{}) require.NoError(t, err) require.Len(t, parts, 1) require.NotNil(t, parts[0].OfInputText) @@ -92,7 +92,7 @@ func TestConvertDocumentResponseInput_Drop_NoContent(t *testing.T) { Source: chat.DocumentSource{}, } - parts, err := convertDocumentToResponseInput(t.Context(), doc, "") + parts, err := convertDocumentToResponseInputWithCaps(t.Context(), doc, modelcaps.ModelCapabilities{}) require.NoError(t, err) assert.Nil(t, parts, "should be nil when no inline content") } diff --git a/pkg/model/provider/openai/client.go b/pkg/model/provider/openai/client.go index c0e771622..0d278efbe 100644 --- a/pkg/model/provider/openai/client.go +++ b/pkg/model/provider/openai/client.go @@ -43,7 +43,7 @@ type Client struct { // It maintains a persistent WebSocket connection across requests. wsPool *wsPool - modelsStore *modelsdev.Store // nil in production (uses modelsdev.NewStore()); set in tests + modelsStore *modelsdev.Store // initialised in NewClient } // NewClient creates a new OpenAI client from the provided configuration @@ -153,13 +153,20 @@ func NewClient(ctx context.Context, cfg *latest.ModelConfig, env environment.Pro slog.DebugContext(ctx, "OpenAI client created successfully", "model", cfg.Model) + store, err := modelsdev.NewStore() + if err != nil { + slog.WarnContext(ctx, "openai: failed to load models.dev store, attachments will use conservative caps", "error", err) + store = modelsdev.NewDatabaseStore(&modelsdev.Database{}) + } + client := &Client{ Config: base.Config{ ModelConfig: *cfg, ModelOptions: globalOptions, Env: env, }, - clientFn: clientFn, + clientFn: clientFn, + modelsStore: store, } // Pre-create the WebSocket pool when the transport is configured. @@ -184,10 +191,7 @@ func (c *Client) Close() { // convertMessages converts chat.Message to openai.ChatCompletionMessageParamUnion // using the shared oaistream implementation. func (c *Client) convertMessages(ctx context.Context, messages []chat.Message) []openai.ChatCompletionMessageParamUnion { - if c.modelsStore != nil { - return oaistream.ConvertMessagesFromStore(ctx, messages, c.ID(), c.modelsStore) - } - return oaistream.ConvertMessages(ctx, messages, c.ID()) + return oaistream.ConvertMessagesFromStore(ctx, messages, c.ID(), c.modelsStore) } // CreateChatCompletionStream creates a streaming chat completion request @@ -619,7 +623,7 @@ func (c *Client) convertMessagesToResponseInput(ctx context.Context, messages [] } case chat.MessagePartTypeDocument: if part.Document != nil { - docParts, err := convertDocumentToResponseInput(ctx, *part.Document, c.ID()) + docParts, err := convertDocumentToResponseInputFromStore(ctx, *part.Document, c.ID(), c.modelsStore) if err != nil { slog.WarnContext(ctx, "failed to convert document attachment", "error", err, "doc", part.Document.Name) continue