Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 45 additions & 0 deletions pkg/attachment/modelcaps/modelcaps_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,51 @@ func buildStore(providers map[string]modelsdev.Provider) *modelsdev.Store {
return modelsdev.NewDatabaseStore(db)
}

// TestLoadFromStore_QualifiedIDRequired is the regression test for the bug
// fixed by pass-fully-qualified-provider-model-ID: modelcaps.Load (and
// LoadFromStore) requires a "provider/model" key to find a model in the
// models.dev database. A bare model name without the provider prefix must
// NOT resolve to vision capabilities — it falls back to text-only.
//
// Before the fix, callers passed c.ModelConfig.Model (e.g. "claude-sonnet-4-6")
// instead of c.ModelConfig.Provider+"/"+c.ModelConfig.Model; the lookup always
// missed and all image / PDF attachments were silently dropped.
func TestLoadFromStore_QualifiedIDRequired(t *testing.T) {
store := buildStore(map[string]modelsdev.Provider{
"anthropic": {
Models: map[string]modelsdev.Model{
"claude-sonnet-4-6": {
Name: "Claude Sonnet 4.6",
Modalities: modelsdev.Modalities{
Input: []string{"text", "image", "pdf"},
Output: []string{"text"},
},
},
},
},
})

// Bare model name (the original bug): must fall back to conservative text-only caps.
bareID := "claude-sonnet-4-6"
mcBare := modelcaps.LoadFromStore(store, bareID)
if mcBare.Supports("image/jpeg") {
t.Errorf("bare model name %q must NOT resolve to vision caps: image/jpeg should be dropped", bareID)
}
if mcBare.Supports("application/pdf") {
t.Errorf("bare model name %q must NOT resolve to vision caps: application/pdf should be dropped", bareID)
}

// Fully-qualified ID (the fix): must resolve to vision+pdf caps.
qualifiedID := "anthropic/claude-sonnet-4-6"
mcQualified := modelcaps.LoadFromStore(store, qualifiedID)
if !mcQualified.Supports("image/jpeg") {
t.Errorf("qualified ID %q must resolve to vision caps: image/jpeg should be passed through", qualifiedID)
}
if !mcQualified.Supports("application/pdf") {
t.Errorf("qualified ID %q must resolve to vision caps: application/pdf should be passed through", qualifiedID)
}
}

func TestLoadFromStore_VisionModel(t *testing.T) {
store := buildStore(map[string]modelsdev.Provider{
"anthropic": {
Expand Down
9 changes: 5 additions & 4 deletions pkg/model/provider/anthropic/attachments.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,18 +12,19 @@ import (
"github.com/docker/docker-agent/pkg/attachment"
"github.com/docker/docker-agent/pkg/attachment/modelcaps"
"github.com/docker/docker-agent/pkg/chat"
"github.com/docker/docker-agent/pkg/modelsdev"
)

// convertDocument converts a chat.Document to standard Anthropic SDK content blocks
// (not the Beta API).
// convertDocumentFromStore converts a chat.Document to standard Anthropic SDK content blocks
// using an explicit modelsdev.Store for capability lookup.
//
// Routing:
// - image/* with InlineData → ImageBlockParam (base64 source)
// - application/pdf with InlineData → DocumentBlockParam (base64)
// - text with InlineText → TextBlockParam with TXTEnvelope
// - unsupported / no content → nil (logged as warning)
func convertDocument(ctx context.Context, doc chat.Document, modelID string) ([]anthropic.ContentBlockParamUnion, error) {
mc, _ := modelcaps.Load(modelID)
func convertDocumentFromStore(ctx context.Context, doc chat.Document, modelID string, store *modelsdev.Store) ([]anthropic.ContentBlockParamUnion, error) {
mc := modelcaps.LoadFromStore(store, modelID)
return convertDocumentWithCaps(ctx, doc, mc)
}

Expand Down
61 changes: 58 additions & 3 deletions pkg/model/provider/anthropic/attachments_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@ import (

"github.com/docker/docker-agent/pkg/attachment/modelcaps"
"github.com/docker/docker-agent/pkg/chat"
"github.com/docker/docker-agent/pkg/config/latest"
"github.com/docker/docker-agent/pkg/model/provider/base"
"github.com/docker/docker-agent/pkg/modelsdev"
)

// minJPEG is a minimal JPEG magic-byte header for use in tests.
Expand Down Expand Up @@ -51,14 +54,66 @@ func TestConvertDocumentAnthropic_StrategyB64_PDF(t *testing.T) {
assert.Nil(t, blocks[0].OfText, "expected no text block for PDF")
}

// TestConvertDocumentAnthropic_QualifiedIDRequired is the regression test for
// the bug where convertUserMultiContent passed only c.ModelConfig.Model (bare
// model name) to convertDocument instead of c.ModelConfig.Provider+"/"+c.ModelConfig.Model.
// When the bare name was used, modelcaps.Load always missed the model and all
// image/PDF attachments were silently dropped.
//
// The test constructs a Client with Provider="anthropic" and Model="claude-sonnet-4-6",
// injects a fake modelsdev.Store, and calls convertUserMultiContent directly.
// The image block must be present in the output — which only happens if the
// fully-qualified "anthropic/claude-sonnet-4-6" was used for the caps lookup.
func TestConvertDocumentAnthropic_QualifiedIDRequired(t *testing.T) {
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That is not really what we want to test: we want to test that the fully qualified ID is passed when resolving the models caps

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done in commit 9e4db92. Added convertDocumentFromStore(ctx, doc, modelID, store) to anthropic/attachments.go as the store-injectable path; convertDocument delegates to it. The test now calls convertDocumentFromStore directly, exercising the full modelID → LoadFromStore → caps → blocks chain: bare "claude-sonnet-4-6" misses the store (text-only, image dropped), qualified "anthropic/claude-sonnet-4-6" hits it (vision caps, image block produced).

store := modelsdev.NewDatabaseStore(&modelsdev.Database{
Providers: map[string]modelsdev.Provider{
"anthropic": {
Models: map[string]modelsdev.Model{
"claude-sonnet-4-6": {
Modalities: modelsdev.Modalities{
Input: []string{"text", "image", "pdf"},
},
},
},
},
},
})

c := &Client{
Config: base.Config{
ModelConfig: latest.ModelConfig{
Provider: "anthropic",
Model: "claude-sonnet-4-6",
},
},
modelsStore: store,
}

parts := []chat.MessagePart{
{
Type: chat.MessagePartTypeDocument,
Document: &chat.Document{
Name: "photo.jpg",
MimeType: "image/jpeg",
Source: chat.DocumentSource{InlineData: minJPEG},
},
},
}

blocks, err := c.convertUserMultiContent(t.Context(), parts)
require.NoError(t, err)
require.Len(t, blocks, 1, "image must not be dropped when provider+model ID is used for caps lookup")
assert.NotNil(t, blocks[0].OfImage, "expected native image block")
}

func TestConvertDocumentAnthropic_StrategyTXT(t *testing.T) {
doc := chat.Document{
Name: "spec.md",
MimeType: "text/markdown",
Source: chat.DocumentSource{InlineText: "## Specification"},
}

blocks, err := convertDocument(t.Context(), doc, "")
blocks, err := convertDocumentWithCaps(t.Context(), doc, modelcaps.ModelCapabilities{})
require.NoError(t, err)
require.Len(t, blocks, 1)
require.NotNil(t, blocks[0].OfText)
Expand All @@ -74,7 +129,7 @@ func TestConvertDocumentAnthropic_StrategyTXT_Envelope(t *testing.T) {
Source: chat.DocumentSource{InlineText: "some notes"},
}

blocks, err := convertDocument(t.Context(), doc, "")
blocks, err := convertDocumentWithCaps(t.Context(), doc, modelcaps.ModelCapabilities{})
require.NoError(t, err)
require.Len(t, blocks, 1)
require.NotNil(t, blocks[0].OfText)
Expand All @@ -89,7 +144,7 @@ func TestConvertDocumentAnthropic_Drop_NoContent(t *testing.T) {
Source: chat.DocumentSource{},
}

blocks, err := convertDocument(t.Context(), doc, "")
blocks, err := convertDocumentWithCaps(t.Context(), doc, modelcaps.ModelCapabilities{})
require.NoError(t, err)
assert.Nil(t, blocks, "should be dropped when no inline content")
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/model/provider/anthropic/beta_converter.go
Original file line number Diff line number Diff line change
Expand Up @@ -277,7 +277,7 @@ func (c *Client) convertBetaUserMultiContent(ctx context.Context, parts []chat.M

case chat.MessagePartTypeDocument:
if part.Document != nil {
stdBlocks, err := convertDocument(ctx, *part.Document, c.ModelConfig.Model)
stdBlocks, err := c.convertDoc(ctx, *part.Document)
if err != nil {
return nil, fmt.Errorf("failed to convert document attachment %q: %w", part.Document.Name, err)
}
Expand Down
17 changes: 16 additions & 1 deletion pkg/model/provider/anthropic/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (
"github.com/docker/docker-agent/pkg/model/provider/base"
"github.com/docker/docker-agent/pkg/model/provider/options"
"github.com/docker/docker-agent/pkg/model/provider/providerutil"
"github.com/docker/docker-agent/pkg/modelsdev"
"github.com/docker/docker-agent/pkg/tools"
)

Expand All @@ -33,6 +34,7 @@ type Client struct {

clientFn func(context.Context) (anthropic.Client, error)
fileManager *FileManager
modelsStore *modelsdev.Store // initialised in NewClient; overrideable in tests
}

// NewClient creates a new Anthropic client from the provided configuration
Expand Down Expand Up @@ -67,6 +69,13 @@ func NewClient(ctx context.Context, cfg *latest.ModelConfig, env environment.Pro
},
}

store, err := modelsdev.NewStore()
if err != nil {
slog.WarnContext(ctx, "anthropic: failed to load models.dev store, attachments will use conservative caps", "error", err)
store = modelsdev.NewDatabaseStore(&modelsdev.Database{}) // empty: conservative text-only
}
anthropicClient.modelsStore = store

if gateway := globalOptions.Gateway(); gateway == "" {
authOpts, err := buildDirectAuthOptions(ctx, cfg, env)
if err != nil {
Expand Down Expand Up @@ -313,6 +322,12 @@ func (c *Client) CreateChatCompletionStream(
return ad, nil
}

// convertDoc converts a document attachment using the client's model ID
// and the store initialized at construction time.
func (c *Client) convertDoc(ctx context.Context, doc chat.Document) ([]anthropic.ContentBlockParamUnion, error) {
return convertDocumentFromStore(ctx, doc, c.ID(), c.modelsStore)
}

func (c *Client) convertMessages(ctx context.Context, messages []chat.Message) ([]anthropic.MessageParam, error) {
var anthropicMessages []anthropic.MessageParam
// Track whether the last appended assistant message included tool_use blocks
Expand Down Expand Up @@ -553,7 +568,7 @@ func (c *Client) convertUserMultiContent(ctx context.Context, parts []chat.Messa

case chat.MessagePartTypeDocument:
if part.Document != nil {
docBlocks, err := convertDocument(ctx, *part.Document, c.ModelConfig.Model)
docBlocks, err := c.convertDoc(ctx, *part.Document)
if err != nil {
return nil, fmt.Errorf("failed to convert document attachment %q: %w", part.Document.Name, err)
}
Expand Down
8 changes: 5 additions & 3 deletions pkg/model/provider/bedrock/attachments.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import (
"github.com/docker/docker-agent/pkg/attachment"
"github.com/docker/docker-agent/pkg/attachment/modelcaps"
"github.com/docker/docker-agent/pkg/chat"
"github.com/docker/docker-agent/pkg/modelsdev"
)

// imageFormatFromMIME maps a MIME type to a Bedrock ImageFormat.
Expand All @@ -32,15 +33,16 @@ func imageFormatFromMIME(mimeType string) (types.ImageFormat, bool) {
}
}

// convertDocument converts a chat.Document to zero or more Bedrock ContentBlocks.
// convertDocumentFromStore converts a chat.Document to zero or more Bedrock ContentBlocks
// using the provided modelsdev.Store for capability lookup.
//
// Routing:
// - image/* with InlineData → ContentBlockMemberImage
// - application/pdf with InlineData → ContentBlockMemberDocument (PDF)
// - text/* with InlineText → ContentBlockMemberText with TXTEnvelope
// - unsupported / no content → nil (logged as warning)
func convertDocument(ctx context.Context, doc chat.Document, modelID string) ([]types.ContentBlock, error) {
mc, _ := modelcaps.Load(modelID)
func convertDocumentFromStore(ctx context.Context, doc chat.Document, modelID string, store *modelsdev.Store) ([]types.ContentBlock, error) {
mc := modelcaps.LoadFromStore(store, modelID)
return convertDocumentWithCaps(ctx, doc, mc)
}

Expand Down
6 changes: 3 additions & 3 deletions pkg/model/provider/bedrock/attachments_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ func TestConvertDocumentBedrock_StrategyTXT(t *testing.T) {
Source: chat.DocumentSource{InlineText: "## Notes"},
}

blocks, err := convertDocument(t.Context(), doc, "")
blocks, err := convertDocumentWithCaps(t.Context(), doc, modelcaps.ModelCapabilities{})
require.NoError(t, err)
require.Len(t, blocks, 1)
textBlock, ok := blocks[0].(*types.ContentBlockMemberText)
Expand All @@ -96,7 +96,7 @@ func TestConvertDocumentBedrock_StrategyTXT_Envelope(t *testing.T) {
Source: chat.DocumentSource{InlineText: "a,b"},
}

blocks, err := convertDocument(t.Context(), doc, "")
blocks, err := convertDocumentWithCaps(t.Context(), doc, modelcaps.ModelCapabilities{})
require.NoError(t, err)
require.Len(t, blocks, 1)
textBlock, ok := blocks[0].(*types.ContentBlockMemberText)
Expand All @@ -111,7 +111,7 @@ func TestConvertDocumentBedrock_Drop_NoContent(t *testing.T) {
Source: chat.DocumentSource{},
}

blocks, err := convertDocument(t.Context(), doc, "")
blocks, err := convertDocumentWithCaps(t.Context(), doc, modelcaps.ModelCapabilities{})
require.NoError(t, err)
assert.Nil(t, blocks, "should be nil when no inline content")
}
Expand Down
12 changes: 10 additions & 2 deletions pkg/model/provider/bedrock/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@ type Client struct {
base.Config

bedrockClient *bedrockruntime.Client
cachingSupported bool // Cached at init time for efficiency
cachingSupported bool // Cached at init time for efficiency
modelsStore *modelsdev.Store // initialised in NewClient
}

// bearerTokenTransport adds Authorization header with bearer token to requests
Expand Down Expand Up @@ -116,6 +117,12 @@ func NewClient(ctx context.Context, cfg *latest.ModelConfig, env environment.Pro
// Uses models.dev cache pricing as proxy for capability detection.
cachingSupported := detectCachingSupport(ctx, cfg.Model)

attachStore, err := modelsdev.NewStore()
if err != nil {
slog.WarnContext(ctx, "bedrock: failed to load models.dev store, attachments will use conservative caps", "error", err)
attachStore = modelsdev.NewDatabaseStore(&modelsdev.Database{})
}

slog.DebugContext(ctx, "Bedrock client created successfully",
"model", cfg.Model,
"region", awsCfg.Region,
Expand All @@ -129,6 +136,7 @@ func NewClient(ctx context.Context, cfg *latest.ModelConfig, env environment.Pro
},
bedrockClient: bedrockClient,
cachingSupported: cachingSupported,
modelsStore: attachStore,
}, nil
}

Expand Down Expand Up @@ -236,7 +244,7 @@ func (c *Client) buildConverseStreamInput(ctx context.Context, messages []chat.M
enableCaching := c.promptCachingEnabled()

// Convert and set messages (excluding system)
input.Messages, input.System = convertMessages(ctx, messages, c.ModelConfig.Model, enableCaching)
input.Messages, input.System = convertMessages(ctx, messages, c.ID(), c.modelsStore, enableCaching)

// Compute thinking fields first — its presence drives the inference config.
additionalFields := c.buildAdditionalModelRequestFields()
Expand Down
Loading
Loading