From 2b403c830cbc6a5a90b8015feb1fb4867d7b2da9 Mon Sep 17 00:00:00 2001
From: David Gageot <david.gageot@docker.com>
Date: Mon, 11 May 2026 16:21:48 +0200
Subject: [PATCH] refactor: centralize modelsdev.Store creation via
 RuntimeConfig
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Rebase PR #2746 on top of origin/main, resolving conflicts with PR #2738.

Key changes:
- Add RuntimeConfig.ModelsDevStore() lazy accessor with sync.Once caching
- Add options.WithModelsDevStore to thread the store through provider options
- Merge modelcaps.Load and LoadFromStore into a single Load(store, modelID)
- Remove per-client modelsStore fields from all provider clients (anthropic,
  openai, gemini, bedrock, dmr) — they now get the store via ModelOptions
- Convert modelsdev.NewStore from sync.OnceValues singleton to a plain
  function since RuntimeConfig handles the caching
- Wire RuntimeConfig.ModelsDevStore through teamloader, cmd/root/models,
  and rag/strategy/embedding
- Update detectCachingSupport (bedrock) to accept store parameter
- Rename oaistream ConvertMessagesFromStore/ConvertMultiContentFromStore
  to ConvertMessages/ConvertMultiContent
- Keep c.ID() (qualified provider/model) from PR #2738 at all call sites
---
 cmd/root/models.go                            |  3 +-
 pkg/attachment/modelcaps/modelcaps.go         | 51 +++++--------------
 pkg/attachment/modelcaps/modelcaps_test.go    | 26 +++++-----
 pkg/config/runtime.go                         | 26 +++++++++-
 pkg/model/provider/anthropic/attachments.go   |  6 +--
 .../provider/anthropic/attachments_test.go    |  6 ++-
 pkg/model/provider/anthropic/client.go        | 11 +---
 pkg/model/provider/bedrock/attachments.go     |  6 +--
 pkg/model/provider/bedrock/client.go          | 20 ++------
 pkg/model/provider/bedrock/client_test.go     | 15 ++++--
 pkg/model/provider/bedrock/convert.go         |  2 +-
 pkg/model/provider/dmr/client.go              | 23 +++------
 pkg/model/provider/gemini/attachments.go      |  6 +--
 pkg/model/provider/gemini/client.go           | 16 ++----
 pkg/model/provider/oaistream/attachments.go   |  6 +--
 .../provider/oaistream/attachments_test.go    |  6 +--
 pkg/model/provider/oaistream/messages.go      | 10 ++--
 pkg/model/provider/oaistream/messages_test.go |  4 +-
 pkg/model/provider/openai/attachments.go      |  6 +--
 pkg/model/provider/openai/client.go           | 16 ++----
 pkg/model/provider/options/options.go         | 15 ++++++
 pkg/modelsdev/store.go                        | 16 +++---
 pkg/rag/builder.go                            |  3 ++
 pkg/rag/strategy/embedding.go                 | 13 +++--
 pkg/rag/strategy/strategy.go                  |  2 +
 pkg/teamloader/registry.go                    |  1 +
 pkg/teamloader/teamloader.go                  | 13 +++--
 27 files changed, 159 insertions(+), 169 deletions(-)

diff --git a/cmd/root/models.go b/cmd/root/models.go
index 050616778..4c3f028f9 100644
--- a/cmd/root/models.go
+++ b/cmd/root/models.go
@@ -14,7 +14,6 @@ import (
 	"github.com/docker/docker-agent/pkg/config"
 	"github.com/docker/docker-agent/pkg/config/latest"
 	"github.com/docker/docker-agent/pkg/model/provider"
-	"github.com/docker/docker-agent/pkg/modelsdev"
 	"github.com/docker/docker-agent/pkg/telemetry"
 )
 
@@ -161,7 +160,7 @@ func (f *modelsListFlags) collectModels(ctx context.Context, availableProviders
 	}
 
 	// Fetch catalog and add all text-capable models.
-	store, err := modelsdev.NewStore()
+	store, err := f.runConfig.ModelsDevStore()
 	if err != nil {
 		return rows
 	}
diff --git a/pkg/attachment/modelcaps/modelcaps.go b/pkg/attachment/modelcaps/modelcaps.go
index 105756207..6cd6e7b4c 100644
--- a/pkg/attachment/modelcaps/modelcaps.go
+++ b/pkg/attachment/modelcaps/modelcaps.go
@@ -83,31 +83,27 @@ func (mc ModelCapabilities) Supports(mimeType string) bool {
 const loadTimeout = 10 * time.Second
 
 // Load fetches (or returns from cache) the capability record for the given
-// model ID.  The model ID should be in "provider/model" format as used by
-// models.dev (e.g. "anthropic/claude-3-5-sonnet-20241022").
+// model ID using the provided store. The model ID should be in
+// "provider/model" format as used by models.dev
+// (e.g. "anthropic/claude-3-5-sonnet-20241022").
 //
-// When the model is not found in the models.dev database, Load returns a
-// conservative capability set that only allows text MIME types.  The returned
-// error is always nil; capability detection failures are silent and safe.
-func Load(modelID string) (ModelCapabilities, error) {
+// When the store is nil or the model is not found, Load returns a
+// conservative capability set that only allows text MIME types.
+func Load(store *modelsdev.Store, modelID string) ModelCapabilities {
+	if store == nil {
+		return ModelCapabilities{modelFound: false}
+	}
+
 	ctx, cancel := context.WithTimeout(context.Background(), loadTimeout)
 	defer cancel()
 
-	store, err := modelsdev.NewStore()
-	if err != nil {
-		slog.WarnContext(ctx, "modelcaps: failed to load models.dev store, using conservative caps",
-			"error", err, "model", modelID)
-		return ModelCapabilities{modelFound: false}, nil
-	}
-
 	model, err := store.GetModel(ctx, modelID)
 	if err != nil {
 		if ctx.Err() != nil {
 			slog.WarnContext(ctx, "modelcaps: models.dev lookup timed out, using conservative caps",
 				"model", modelID, "timeout", loadTimeout)
 		}
-		// Model not found or context cancelled — conservative: text-only.
-		return ModelCapabilities{modelFound: false}, nil
+		return ModelCapabilities{modelFound: false}
 	}
 
 	mc := ModelCapabilities{modelFound: true}
@@ -119,7 +115,7 @@ func Load(modelID string) (ModelCapabilities, error) {
 			mc.supportsPDF = true
 		}
 	}
-	return mc, nil
+	return mc
 }
 
 // CapsWith constructs a ModelCapabilities value directly from booleans. This is
@@ -132,26 +128,3 @@ func CapsWith(supportsImage, supportsPDF bool) ModelCapabilities {
 		modelFound:    true,
 	}
 }
-
-// LoadFromStore is like Load but accepts an explicit *modelsdev.Store, making
-// it convenient for tests that inject a pre-populated in-memory store.
-func LoadFromStore(store *modelsdev.Store, modelID string) ModelCapabilities {
-	ctx, cancel := context.WithTimeout(context.Background(), loadTimeout)
-	defer cancel()
-
-	model, err := store.GetModel(ctx, modelID)
-	if err != nil {
-		return ModelCapabilities{modelFound: false}
-	}
-
-	mc := ModelCapabilities{modelFound: true}
-	for _, input := range model.Modalities.Input {
-		switch strings.ToLower(input) {
-		case "image":
-			mc.supportsImage = true
-		case "pdf":
-			mc.supportsPDF = true
-		}
-	}
-	return mc
-}
diff --git a/pkg/attachment/modelcaps/modelcaps_test.go b/pkg/attachment/modelcaps/modelcaps_test.go
index fc410c86e..f21c44c35 100644
--- a/pkg/attachment/modelcaps/modelcaps_test.go
+++ b/pkg/attachment/modelcaps/modelcaps_test.go
@@ -13,16 +13,16 @@ func buildStore(providers map[string]modelsdev.Provider) *modelsdev.Store {
 	return modelsdev.NewDatabaseStore(db)
 }
 
-// TestLoadFromStore_QualifiedIDRequired is the regression test for the bug
+// TestLoad_QualifiedIDRequired is the regression test for the bug
 // fixed by pass-fully-qualified-provider-model-ID: modelcaps.Load (and
-// LoadFromStore) requires a "provider/model" key to find a model in the
+// Load) requires a "provider/model" key to find a model in the
 // models.dev database.  A bare model name without the provider prefix must
 // NOT resolve to vision capabilities — it falls back to text-only.
 //
 // Before the fix, callers passed c.ModelConfig.Model (e.g. "claude-sonnet-4-6")
 // instead of c.ModelConfig.Provider+"/"+c.ModelConfig.Model; the lookup always
 // missed and all image / PDF attachments were silently dropped.
-func TestLoadFromStore_QualifiedIDRequired(t *testing.T) {
+func TestLoad_QualifiedIDRequired(t *testing.T) {
 	store := buildStore(map[string]modelsdev.Provider{
 		"anthropic": {
 			Models: map[string]modelsdev.Model{
@@ -39,7 +39,7 @@ func TestLoadFromStore_QualifiedIDRequired(t *testing.T) {
 
 	// Bare model name (the original bug): must fall back to conservative text-only caps.
 	bareID := "claude-sonnet-4-6"
-	mcBare := modelcaps.LoadFromStore(store, bareID)
+	mcBare := modelcaps.Load(store, bareID)
 	if mcBare.Supports("image/jpeg") {
 		t.Errorf("bare model name %q must NOT resolve to vision caps: image/jpeg should be dropped", bareID)
 	}
@@ -49,7 +49,7 @@ func TestLoadFromStore_QualifiedIDRequired(t *testing.T) {
 
 	// Fully-qualified ID (the fix): must resolve to vision+pdf caps.
 	qualifiedID := "anthropic/claude-sonnet-4-6"
-	mcQualified := modelcaps.LoadFromStore(store, qualifiedID)
+	mcQualified := modelcaps.Load(store, qualifiedID)
 	if !mcQualified.Supports("image/jpeg") {
 		t.Errorf("qualified ID %q must resolve to vision caps: image/jpeg should be passed through", qualifiedID)
 	}
@@ -58,7 +58,7 @@ func TestLoadFromStore_QualifiedIDRequired(t *testing.T) {
 	}
 }
 
-func TestLoadFromStore_VisionModel(t *testing.T) {
+func TestLoad_VisionModel(t *testing.T) {
 	store := buildStore(map[string]modelsdev.Provider{
 		"anthropic": {
 			Models: map[string]modelsdev.Model{
@@ -73,7 +73,7 @@ func TestLoadFromStore_VisionModel(t *testing.T) {
 		},
 	})
 
-	mc := modelcaps.LoadFromStore(store, "anthropic/claude-3-5-sonnet")
+	mc := modelcaps.Load(store, "anthropic/claude-3-5-sonnet")
 
 	if !mc.Supports("image/jpeg") {
 		t.Error("expected image/jpeg to be supported for vision model")
@@ -89,7 +89,7 @@ func TestLoadFromStore_VisionModel(t *testing.T) {
 	}
 }
 
-func TestLoadFromStore_TextOnlyModel(t *testing.T) {
+func TestLoad_TextOnlyModel(t *testing.T) {
 	store := buildStore(map[string]modelsdev.Provider{
 		"openai": {
 			Models: map[string]modelsdev.Model{
@@ -104,7 +104,7 @@ func TestLoadFromStore_TextOnlyModel(t *testing.T) {
 		},
 	})
 
-	mc := modelcaps.LoadFromStore(store, "openai/gpt-3.5-turbo")
+	mc := modelcaps.Load(store, "openai/gpt-3.5-turbo")
 
 	if mc.Supports("image/jpeg") {
 		t.Error("expected image/jpeg NOT to be supported for text-only model")
@@ -121,10 +121,10 @@ func TestLoadFromStore_TextOnlyModel(t *testing.T) {
 	}
 }
 
-func TestLoadFromStore_ModelNotFound(t *testing.T) {
+func TestLoad_ModelNotFound(t *testing.T) {
 	store := buildStore(map[string]modelsdev.Provider{})
 
-	mc := modelcaps.LoadFromStore(store, "unknown/nonexistent-model")
+	mc := modelcaps.Load(store, "unknown/nonexistent-model")
 
 	// Conservative fallback: only text is allowed
 	if mc.Supports("image/jpeg") {
@@ -138,7 +138,7 @@ func TestLoadFromStore_ModelNotFound(t *testing.T) {
 	}
 }
 
-func TestLoadFromStore_OfficeDocsNotAllowed(t *testing.T) {
+func TestLoad_OfficeDocsNotAllowed(t *testing.T) {
 	// Office document MIMEs (DOCX, XLSX, etc.) are ZIP-based binaries and
 	// cannot be naively TXT-enveloped. models.dev has no "office" or
 	// "document" modality, so they must return false for all models.
@@ -156,7 +156,7 @@ func TestLoadFromStore_OfficeDocsNotAllowed(t *testing.T) {
 		},
 	})
 
-	mc := modelcaps.LoadFromStore(store, "openai/gpt-4o")
+	mc := modelcaps.Load(store, "openai/gpt-4o")
 
 	for _, officeMIME := range []string{
 		"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
diff --git a/pkg/config/runtime.go b/pkg/config/runtime.go
index 0cb3d244b..14c7cd7f7 100644
--- a/pkg/config/runtime.go
+++ b/pkg/config/runtime.go
@@ -9,6 +9,7 @@ import (
 
 	"github.com/docker/docker-agent/pkg/config/latest"
 	"github.com/docker/docker-agent/pkg/environment"
+	"github.com/docker/docker-agent/pkg/modelsdev"
 )
 
 type RuntimeConfig struct {
@@ -17,6 +18,11 @@ type RuntimeConfig struct {
 	EnvProviderForTests environment.Provider
 	envProvider         environment.Provider
 	envProviderLock     sync.Mutex
+
+	ModelsDevStoreOverride *modelsdev.Store
+	modelsDevStore         *modelsdev.Store
+	modelsDevStoreErr      error
+	modelsDevStoreOnce     sync.Once
 }
 
 type Config struct {
@@ -41,9 +47,14 @@ type Config struct {
 }
 
 func (runConfig *RuntimeConfig) Clone() *RuntimeConfig {
+	store, storeErr := runConfig.ModelsDevStore()
 	clone := &RuntimeConfig{
-		Config: runConfig.Config,
+		Config:                 runConfig.Config,
+		ModelsDevStoreOverride: runConfig.ModelsDevStoreOverride,
+		modelsDevStore:         store,
+		modelsDevStoreErr:      storeErr,
 	}
+	clone.modelsDevStoreOnce.Do(func() {}) // mark as resolved
 	clone.EnvFiles = slices.Clone(runConfig.EnvFiles)
 	clone.Models = maps.Clone(runConfig.Models)
 	clone.Providers = maps.Clone(runConfig.Providers)
@@ -57,6 +68,19 @@ func (runConfig *RuntimeConfig) Clone() *RuntimeConfig {
 	return clone
 }
 
+// ModelsDevStore returns the lazily-initialized models.dev store.
+// The store is created on first access and shared across clones.
+// If ModelsDevStoreOverride is set, it is returned directly.
+func (runConfig *RuntimeConfig) ModelsDevStore() (*modelsdev.Store, error) {
+	if runConfig.ModelsDevStoreOverride != nil {
+		return runConfig.ModelsDevStoreOverride, nil
+	}
+	runConfig.modelsDevStoreOnce.Do(func() {
+		runConfig.modelsDevStore, runConfig.modelsDevStoreErr = modelsdev.NewStore()
+	})
+	return runConfig.modelsDevStore, runConfig.modelsDevStoreErr
+}
+
 func (runConfig *RuntimeConfig) EnvProvider() environment.Provider {
 	if runConfig.EnvProviderForTests != nil {
 		return runConfig.EnvProviderForTests
diff --git a/pkg/model/provider/anthropic/attachments.go b/pkg/model/provider/anthropic/attachments.go
index 62cb81ddd..db14fd683 100644
--- a/pkg/model/provider/anthropic/attachments.go
+++ b/pkg/model/provider/anthropic/attachments.go
@@ -15,7 +15,7 @@ import (
 	"github.com/docker/docker-agent/pkg/modelsdev"
 )
 
-// convertDocumentFromStore converts a chat.Document to standard Anthropic SDK content blocks
+// convertDocument converts a chat.Document to standard Anthropic SDK content blocks
 // using an explicit modelsdev.Store for capability lookup.
 //
 // Routing:
@@ -23,8 +23,8 @@ import (
 //   - application/pdf with InlineData → DocumentBlockParam (base64)
 //   - text with InlineText → TextBlockParam with TXTEnvelope
 //   - unsupported / no content → nil (logged as warning)
-func convertDocumentFromStore(ctx context.Context, doc chat.Document, modelID string, store *modelsdev.Store) ([]anthropic.ContentBlockParamUnion, error) {
-	mc := modelcaps.LoadFromStore(store, modelID)
+func convertDocument(ctx context.Context, doc chat.Document, modelID string, store *modelsdev.Store) ([]anthropic.ContentBlockParamUnion, error) {
+	mc := modelcaps.Load(store, modelID)
 	return convertDocumentWithCaps(ctx, doc, mc)
 }
 
diff --git a/pkg/model/provider/anthropic/attachments_test.go b/pkg/model/provider/anthropic/attachments_test.go
index 889089063..b788a53c3 100644
--- a/pkg/model/provider/anthropic/attachments_test.go
+++ b/pkg/model/provider/anthropic/attachments_test.go
@@ -11,6 +11,7 @@ import (
 	"github.com/docker/docker-agent/pkg/chat"
 	"github.com/docker/docker-agent/pkg/config/latest"
 	"github.com/docker/docker-agent/pkg/model/provider/base"
+	"github.com/docker/docker-agent/pkg/model/provider/options"
 	"github.com/docker/docker-agent/pkg/modelsdev"
 )
 
@@ -79,14 +80,17 @@ func TestConvertDocumentAnthropic_QualifiedIDRequired(t *testing.T) {
 		},
 	})
 
+	var modelOpts options.ModelOptions
+	options.WithModelsDevStore(store)(&modelOpts)
+
 	c := &Client{
 		Config: base.Config{
 			ModelConfig: latest.ModelConfig{
 				Provider: "anthropic",
 				Model:    "claude-sonnet-4-6",
 			},
+			ModelOptions: modelOpts,
 		},
-		modelsStore: store,
 	}
 
 	parts := []chat.MessagePart{
diff --git a/pkg/model/provider/anthropic/client.go b/pkg/model/provider/anthropic/client.go
index b609b6a06..14a85b938 100644
--- a/pkg/model/provider/anthropic/client.go
+++ b/pkg/model/provider/anthropic/client.go
@@ -23,7 +23,6 @@ import (
 	"github.com/docker/docker-agent/pkg/model/provider/base"
 	"github.com/docker/docker-agent/pkg/model/provider/options"
 	"github.com/docker/docker-agent/pkg/model/provider/providerutil"
-	"github.com/docker/docker-agent/pkg/modelsdev"
 	"github.com/docker/docker-agent/pkg/tools"
 )
 
@@ -34,7 +33,6 @@ type Client struct {
 
 	clientFn    func(context.Context) (anthropic.Client, error)
 	fileManager *FileManager
-	modelsStore *modelsdev.Store // initialised in NewClient; overrideable in tests
 }
 
 // NewClient creates a new Anthropic client from the provided configuration
@@ -69,13 +67,6 @@ func NewClient(ctx context.Context, cfg *latest.ModelConfig, env environment.Pro
 		},
 	}
 
-	store, err := modelsdev.NewStore()
-	if err != nil {
-		slog.WarnContext(ctx, "anthropic: failed to load models.dev store, attachments will use conservative caps", "error", err)
-		store = modelsdev.NewDatabaseStore(&modelsdev.Database{}) // empty: conservative text-only
-	}
-	anthropicClient.modelsStore = store
-
 	if gateway := globalOptions.Gateway(); gateway == "" {
 		authOpts, err := buildDirectAuthOptions(ctx, cfg, env)
 		if err != nil {
@@ -325,7 +316,7 @@ func (c *Client) CreateChatCompletionStream(
 // convertDoc converts a document attachment using the client's model ID
 // and the store initialized at construction time.
 func (c *Client) convertDoc(ctx context.Context, doc chat.Document) ([]anthropic.ContentBlockParamUnion, error) {
-	return convertDocumentFromStore(ctx, doc, c.ID(), c.modelsStore)
+	return convertDocument(ctx, doc, c.ID(), c.ModelOptions.ModelsDevStore())
 }
 
 func (c *Client) convertMessages(ctx context.Context, messages []chat.Message) ([]anthropic.MessageParam, error) {
diff --git a/pkg/model/provider/bedrock/attachments.go b/pkg/model/provider/bedrock/attachments.go
index 6f37583b3..6b40034da 100644
--- a/pkg/model/provider/bedrock/attachments.go
+++ b/pkg/model/provider/bedrock/attachments.go
@@ -33,7 +33,7 @@ func imageFormatFromMIME(mimeType string) (types.ImageFormat, bool) {
 	}
 }
 
-// convertDocumentFromStore converts a chat.Document to zero or more Bedrock ContentBlocks
+// convertDocument converts a chat.Document to zero or more Bedrock ContentBlocks
 // using the provided modelsdev.Store for capability lookup.
 //
 // Routing:
@@ -41,8 +41,8 @@ func imageFormatFromMIME(mimeType string) (types.ImageFormat, bool) {
 //   - application/pdf with InlineData → ContentBlockMemberDocument (PDF)
 //   - text/* with InlineText → ContentBlockMemberText with TXTEnvelope
 //   - unsupported / no content → nil (logged as warning)
-func convertDocumentFromStore(ctx context.Context, doc chat.Document, modelID string, store *modelsdev.Store) ([]types.ContentBlock, error) {
-	mc := modelcaps.LoadFromStore(store, modelID)
+func convertDocument(ctx context.Context, doc chat.Document, modelID string, store *modelsdev.Store) ([]types.ContentBlock, error) {
+	mc := modelcaps.Load(store, modelID)
 	return convertDocumentWithCaps(ctx, doc, mc)
 }
 
diff --git a/pkg/model/provider/bedrock/client.go b/pkg/model/provider/bedrock/client.go
index e1ea5d1f3..0643768e5 100644
--- a/pkg/model/provider/bedrock/client.go
+++ b/pkg/model/provider/bedrock/client.go
@@ -30,8 +30,7 @@ type Client struct {
 	base.Config
 
 	bedrockClient    *bedrockruntime.Client
-	cachingSupported bool             // Cached at init time for efficiency
-	modelsStore      *modelsdev.Store // initialised in NewClient
+	cachingSupported bool // Cached at init time for efficiency
 }
 
 // bearerTokenTransport adds Authorization header with bearer token to requests
@@ -115,13 +114,7 @@ func NewClient(ctx context.Context, cfg *latest.ModelConfig, env environment.Pro
 
 	// Detect prompt caching capability at init time for efficiency.
 	// Uses models.dev cache pricing as proxy for capability detection.
-	cachingSupported := detectCachingSupport(ctx, cfg.Model)
-
-	attachStore, err := modelsdev.NewStore()
-	if err != nil {
-		slog.WarnContext(ctx, "bedrock: failed to load models.dev store, attachments will use conservative caps", "error", err)
-		attachStore = modelsdev.NewDatabaseStore(&modelsdev.Database{})
-	}
+	cachingSupported := detectCachingSupport(ctx, cfg.Model, globalOptions.ModelsDevStore())
 
 	slog.DebugContext(ctx, "Bedrock client created successfully",
 		"model", cfg.Model,
@@ -136,17 +129,14 @@ func NewClient(ctx context.Context, cfg *latest.ModelConfig, env environment.Pro
 		},
 		bedrockClient:    bedrockClient,
 		cachingSupported: cachingSupported,
-		modelsStore:      attachStore,
 	}, nil
 }
 
 // detectCachingSupport checks if a model supports prompt caching using models.dev data.
 // Models with non-zero CacheRead/CacheWrite costs support prompt caching.
 // Returns false on lookup failure (safe default for unsupported models).
-func detectCachingSupport(ctx context.Context, model string) bool {
-	store, err := modelsdev.NewStore()
-	if err != nil {
-		slog.DebugContext(ctx, "Bedrock models store unavailable, prompt caching disabled", "error", err)
+func detectCachingSupport(ctx context.Context, model string, store *modelsdev.Store) bool {
+	if store == nil {
 		return false
 	}
 
@@ -244,7 +234,7 @@ func (c *Client) buildConverseStreamInput(ctx context.Context, messages []chat.M
 	enableCaching := c.promptCachingEnabled()
 
 	// Convert and set messages (excluding system)
-	input.Messages, input.System = convertMessages(ctx, messages, c.ID(), c.modelsStore, enableCaching)
+	input.Messages, input.System = convertMessages(ctx, messages, c.ID(), c.ModelOptions.ModelsDevStore(), enableCaching)
 
 	// Compute thinking fields first — its presence drives the inference config.
 	additionalFields := c.buildAdditionalModelRequestFields()
diff --git a/pkg/model/provider/bedrock/client_test.go b/pkg/model/provider/bedrock/client_test.go
index 793f5c2f9..9d0e09e5d 100644
--- a/pkg/model/provider/bedrock/client_test.go
+++ b/pkg/model/provider/bedrock/client_test.go
@@ -1234,24 +1234,33 @@ func TestPromptCachingEnabled_TypeMismatch(t *testing.T) {
 func TestDetectCachingSupport_SupportedModel(t *testing.T) {
 	t.Parallel()
 
+	store, err := modelsdev.NewStore()
+	require.NoError(t, err)
+
 	// Uses real models.dev lookup to verify Claude models support caching
-	supported := detectCachingSupport(t.Context(), "anthropic.claude-opus-4-7")
+	supported := detectCachingSupport(t.Context(), "anthropic.claude-opus-4-7", store)
 	assert.True(t, supported)
 }
 
 func TestDetectCachingSupport_UnsupportedModel(t *testing.T) {
 	t.Parallel()
 
+	store, err := modelsdev.NewStore()
+	require.NoError(t, err)
+
 	// Llama doesn't have cache pricing in models.dev
-	supported := detectCachingSupport(t.Context(), "meta.llama3-8b-instruct-v1:0")
+	supported := detectCachingSupport(t.Context(), "meta.llama3-8b-instruct-v1:0", store)
 	assert.False(t, supported)
 }
 
 func TestDetectCachingSupport_UnknownModel(t *testing.T) {
 	t.Parallel()
 
+	store, err := modelsdev.NewStore()
+	require.NoError(t, err)
+
 	// Unknown model should gracefully return false, not panic
-	supported := detectCachingSupport(t.Context(), "nonexistent.model.that.does.not.exist:v1")
+	supported := detectCachingSupport(t.Context(), "nonexistent.model.that.does.not.exist:v1", store)
 	assert.False(t, supported)
 }
 
diff --git a/pkg/model/provider/bedrock/convert.go b/pkg/model/provider/bedrock/convert.go
index d6ab7b988..73c302ee1 100644
--- a/pkg/model/provider/bedrock/convert.go
+++ b/pkg/model/provider/bedrock/convert.go
@@ -140,7 +140,7 @@ func convertUserContent(ctx context.Context, msg *chat.Message, modelID string,
 				}
 			case chat.MessagePartTypeDocument:
 				if part.Document != nil {
-					docBlocks, err := convertDocumentFromStore(ctx, *part.Document, modelID, store)
+					docBlocks, err := convertDocument(ctx, *part.Document, modelID, store)
 					if err != nil {
 						slog.WarnContext(ctx, "failed to convert document attachment", "error", err, "doc", part.Document.Name)
 						continue
diff --git a/pkg/model/provider/dmr/client.go b/pkg/model/provider/dmr/client.go
index f145e067d..78af92622 100644
--- a/pkg/model/provider/dmr/client.go
+++ b/pkg/model/provider/dmr/client.go
@@ -21,7 +21,6 @@ import (
 	"github.com/docker/docker-agent/pkg/model/provider/base"
 	"github.com/docker/docker-agent/pkg/model/provider/oaistream"
 	"github.com/docker/docker-agent/pkg/model/provider/options"
-	"github.com/docker/docker-agent/pkg/modelsdev"
 	"github.com/docker/docker-agent/pkg/tools"
 )
 
@@ -53,10 +52,9 @@ const (
 type Client struct {
 	base.Config
 
-	client      openai.Client
-	httpClient  *http.Client
-	engine      string
-	modelsStore *modelsdev.Store // initialised in NewClient
+	client     openai.Client
+	httpClient *http.Client
+	engine     string
 }
 
 // NewClient creates a new DMR client from the provided configuration
@@ -134,29 +132,22 @@ func NewClient(ctx context.Context, cfg *latest.ModelConfig, opts ...options.Opt
 
 	slog.DebugContext(ctx, "DMR client created successfully", "model", cfg.Model, "base_url", baseURL)
 
-	store, err := modelsdev.NewStore()
-	if err != nil {
-		slog.WarnContext(ctx, "dmr: failed to load models.dev store, attachments will use conservative caps", "error", err)
-		store = modelsdev.NewDatabaseStore(&modelsdev.Database{})
-	}
-
 	return &Client{
 		Config: base.Config{
 			ModelConfig:  *cfg,
 			ModelOptions: globalOptions,
 			BaseURL:      baseURL,
 		},
-		client:      openai.NewClient(clientOptions...),
-		httpClient:  httpClient,
-		engine:      engine,
-		modelsStore: store,
+		client:     openai.NewClient(clientOptions...),
+		httpClient: httpClient,
+		engine:     engine,
 	}, nil
 }
 
 // convertMessages converts chat messages to OpenAI format and merges consecutive
 // system/user messages, which is needed by some local models run by DMR.
 func (c *Client) convertMessages(ctx context.Context, messages []chat.Message) []openai.ChatCompletionMessageParamUnion {
-	openaiMessages := oaistream.ConvertMessagesFromStore(ctx, messages, c.ID(), c.modelsStore)
+	openaiMessages := oaistream.ConvertMessages(ctx, messages, c.ID(), c.ModelOptions.ModelsDevStore())
 	return oaistream.MergeConsecutiveMessages(openaiMessages)
 }
 
diff --git a/pkg/model/provider/gemini/attachments.go b/pkg/model/provider/gemini/attachments.go
index ce3c5fb6b..a906127dd 100644
--- a/pkg/model/provider/gemini/attachments.go
+++ b/pkg/model/provider/gemini/attachments.go
@@ -13,15 +13,15 @@ import (
 	"github.com/docker/docker-agent/pkg/modelsdev"
 )
 
-// convertDocumentFromStore converts a chat.Document to a Gemini genai.Part
+// convertDocument converts a chat.Document to a Gemini genai.Part
 // using the provided modelsdev.Store for capability lookup.
 //
 // Routing:
 //   - image/* or binary with InlineData → genai.Blob part
 //   - text MIMEs with InlineText → genai.Text part with TXTEnvelope
 //   - unsupported / no content → nil (logged as warning)
-func convertDocumentFromStore(ctx context.Context, doc chat.Document, modelID string, store *modelsdev.Store) (*genai.Part, error) {
-	mc := modelcaps.LoadFromStore(store, modelID)
+func convertDocument(ctx context.Context, doc chat.Document, modelID string, store *modelsdev.Store) (*genai.Part, error) {
+	mc := modelcaps.Load(store, modelID)
 	return convertDocumentWithCaps(ctx, doc, mc)
 }
 
diff --git a/pkg/model/provider/gemini/client.go b/pkg/model/provider/gemini/client.go
index c07213202..5df5241d9 100644
--- a/pkg/model/provider/gemini/client.go
+++ b/pkg/model/provider/gemini/client.go
@@ -34,8 +34,7 @@ import (
 type Client struct {
 	base.Config
 
-	clientFn    func(context.Context) (*genai.Client, error)
-	modelsStore *modelsdev.Store // initialised in NewClient
+	clientFn func(context.Context) (*genai.Client, error)
 }
 
 // NewClient creates a new Gemini client from the provided configuration
@@ -169,20 +168,13 @@ func NewClient(ctx context.Context, cfg *latest.ModelConfig, env environment.Pro
 
 	slog.DebugContext(ctx, "Gemini client created successfully", "model", cfg.Model)
 
-	store, err := modelsdev.NewStore()
-	if err != nil {
-		slog.WarnContext(ctx, "gemini: failed to load models.dev store, attachments will use conservative caps", "error", err)
-		store = modelsdev.NewDatabaseStore(&modelsdev.Database{})
-	}
-
 	return &Client{
 		Config: base.Config{
 			ModelConfig:  *cfg,
 			ModelOptions: globalOptions,
 			Env:          env,
 		},
-		clientFn:    clientFn,
-		modelsStore: store,
+		clientFn: clientFn,
 	}, nil
 }
 
@@ -310,7 +302,7 @@ func convertMultiContent(ctx context.Context, multiContent []chat.MessagePart, t
 			}
 		case chat.MessagePartTypeDocument:
 			if part.Document != nil {
-				docPart, err := convertDocumentFromStore(ctx, *part.Document, modelID, store)
+				docPart, err := convertDocument(ctx, *part.Document, modelID, store)
 				if err != nil {
 					slog.WarnContext(ctx, "failed to convert document attachment", "error", err, "doc", part.Document.Name)
 					continue
@@ -610,7 +602,7 @@ func (c *Client) CreateChatCompletionStream(
 		}
 	}
 
-	contents := convertMessagesToGemini(ctx, messages, c.ID(), c.modelsStore)
+	contents := convertMessagesToGemini(ctx, messages, c.ID(), c.ModelOptions.ModelsDevStore())
 
 	// Debug: Log the messages we're sending
 	slog.DebugContext(ctx, "Gemini messages", "count", len(contents))
diff --git a/pkg/model/provider/oaistream/attachments.go b/pkg/model/provider/oaistream/attachments.go
index aa2c573b6..25ff6e44d 100644
--- a/pkg/model/provider/oaistream/attachments.go
+++ b/pkg/model/provider/oaistream/attachments.go
@@ -15,7 +15,7 @@ import (
 	"github.com/docker/docker-agent/pkg/modelsdev"
 )
 
-// convertDocumentFromStore converts a chat.Document to zero or more
+// convertDocument converts a chat.Document to zero or more
 // ChatCompletionContentPartUnionParam values using the OpenAI Chat Completions
 // format. It uses the provided modelsdev.Store for capability lookups.
 //
@@ -24,8 +24,8 @@ import (
 //   - other binary MIMEs with InlineData → drop (no native document block on Chat Completions)
 //   - text MIMEs with InlineText → text part with TXTEnvelope
 //   - unsupported / no content → nil (logged as warning)
-func convertDocumentFromStore(ctx context.Context, doc chat.Document, modelID string, store *modelsdev.Store) ([]openai.ChatCompletionContentPartUnionParam, error) {
-	mc := modelcaps.LoadFromStore(store, modelID)
+func convertDocument(ctx context.Context, doc chat.Document, modelID string, store *modelsdev.Store) ([]openai.ChatCompletionContentPartUnionParam, error) {
+	mc := modelcaps.Load(store, modelID)
 	return convertDocumentWithCaps(ctx, doc, mc)
 }
 
diff --git a/pkg/model/provider/oaistream/attachments_test.go b/pkg/model/provider/oaistream/attachments_test.go
index d3047ee6c..f55a892eb 100644
--- a/pkg/model/provider/oaistream/attachments_test.go
+++ b/pkg/model/provider/oaistream/attachments_test.go
@@ -58,7 +58,7 @@ func TestConvertDocument_StrategyB64_ImageDropped(t *testing.T) {
 // where callers passed a bare model name instead of a "provider/model" ID,
 // causing modelcaps to miss the model and silently drop image/PDF attachments.
 //
-// It calls ConvertMultiContentFromStore with an injected fake store, exercising
+// It calls ConvertMultiContent with an injected fake store, exercising
 // the same path as the production client (which calls ConvertMessages with c.ID()).
 func TestConvertDocument_QualifiedIDRequired(t *testing.T) {
 	store := modelsdev.NewDatabaseStore(&modelsdev.Database{
@@ -85,11 +85,11 @@ func TestConvertDocument_QualifiedIDRequired(t *testing.T) {
 	}}
 
 	// Bare model name (the original bug): image must be dropped.
-	partsBare := ConvertMultiContentFromStore(t.Context(), msgParts, "gpt-4o", store)
+	partsBare := ConvertMultiContent(t.Context(), msgParts, "gpt-4o", store)
 	assert.Empty(t, partsBare, "bare model name must not resolve caps: image should be dropped")
 
 	// Qualified ID (the fix, matching what c.ID() returns): image must be preserved.
-	partsQualified := ConvertMultiContentFromStore(t.Context(), msgParts, "openai/gpt-4o", store)
+	partsQualified := ConvertMultiContent(t.Context(), msgParts, "openai/gpt-4o", store)
 	require.Len(t, partsQualified, 1, "qualified ID must resolve caps: image should be present")
 	assert.NotNil(t, partsQualified[0].OfImageURL, "expected image URL part for qualified model ID")
 }
diff --git a/pkg/model/provider/oaistream/messages.go b/pkg/model/provider/oaistream/messages.go
index d7e8ae301..105ec4e15 100644
--- a/pkg/model/provider/oaistream/messages.go
+++ b/pkg/model/provider/oaistream/messages.go
@@ -26,15 +26,15 @@ func (j JSONSchema) MarshalJSON() ([]byte, error) {
 	return json.Marshal(map[string]any(j))
 }
 
-// ConvertMultiContentFromStore converts chat.MessagePart slices to OpenAI content
+// ConvertMultiContent converts chat.MessagePart slices to OpenAI content
 // parts using the provided modelsdev.Store for capability lookups.
-func ConvertMultiContentFromStore(ctx context.Context, multiContent []chat.MessagePart, modelID string, store *modelsdev.Store) []openai.ChatCompletionContentPartUnionParam {
+func ConvertMultiContent(ctx context.Context, multiContent []chat.MessagePart, modelID string, store *modelsdev.Store) []openai.ChatCompletionContentPartUnionParam {
 	return convertMultiContentWithStore(ctx, multiContent, modelID, store)
 }
 
-// ConvertMessagesFromStore converts chat.Message slices to OpenAI message params
+// ConvertMessages converts chat.Message slices to OpenAI message params
 // using the provided modelsdev.Store for capability lookups.
-func ConvertMessagesFromStore(ctx context.Context, messages []chat.Message, modelID string, store *modelsdev.Store) []openai.ChatCompletionMessageParamUnion {
+func ConvertMessages(ctx context.Context, messages []chat.Message, modelID string, store *modelsdev.Store) []openai.ChatCompletionMessageParamUnion {
 	return convertMessagesWithStore(ctx, messages, modelID, store)
 }
 
@@ -54,7 +54,7 @@ func convertMultiContentWithStore(ctx context.Context, multiContent []chat.Messa
 			}
 		case chat.MessagePartTypeDocument:
 			if part.Document != nil {
-				docParts, err := convertDocumentFromStore(ctx, *part.Document, modelID, store)
+				docParts, err := convertDocument(ctx, *part.Document, modelID, store)
 				if err != nil {
 					slog.WarnContext(ctx, "failed to convert document attachment", "error", err, "doc", part.Document.Name)
 					continue
diff --git a/pkg/model/provider/oaistream/messages_test.go b/pkg/model/provider/oaistream/messages_test.go
index cb80c4ec0..876aa96df 100644
--- a/pkg/model/provider/oaistream/messages_test.go
+++ b/pkg/model/provider/oaistream/messages_test.go
@@ -63,7 +63,7 @@ func TestConvertMultiContent(t *testing.T) {
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
 			t.Parallel()
-			result := ConvertMultiContentFromStore(t.Context(), tt.multiContent, "", modelsdev.NewDatabaseStore(&modelsdev.Database{}))
+			result := ConvertMultiContent(t.Context(), tt.multiContent, "", modelsdev.NewDatabaseStore(&modelsdev.Database{}))
 			assert.Len(t, result, tt.wantCount)
 		})
 	}
@@ -138,7 +138,7 @@ func TestConvertMessages(t *testing.T) {
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
 			t.Parallel()
-			result := ConvertMessagesFromStore(t.Context(), tt.messages, "", modelsdev.NewDatabaseStore(&modelsdev.Database{}))
+			result := ConvertMessages(t.Context(), tt.messages, "", modelsdev.NewDatabaseStore(&modelsdev.Database{}))
 			assert.Len(t, result, tt.want)
 		})
 	}
diff --git a/pkg/model/provider/openai/attachments.go b/pkg/model/provider/openai/attachments.go
index 77060e766..a6ae79f55 100644
--- a/pkg/model/provider/openai/attachments.go
+++ b/pkg/model/provider/openai/attachments.go
@@ -16,7 +16,7 @@ import (
 	"github.com/docker/docker-agent/pkg/modelsdev"
 )
 
-// convertDocumentToResponseInputFromStore converts a chat.Document to zero or
+// convertDocumentToResponseInput converts a chat.Document to zero or
 // more ResponseInputContentUnionParam values for the OpenAI Responses API,
 // using the provided modelsdev.Store for capability lookup.
 //
@@ -25,8 +25,8 @@ import (
 //   - application/pdf with InlineData → OfInputFile (base64)
 //   - text MIMEs with InlineText → OfInputText with TXTEnvelope
 //   - unsupported / no content → nil (logged as warning)
-func convertDocumentToResponseInputFromStore(ctx context.Context, doc chat.Document, modelID string, store *modelsdev.Store) ([]responses.ResponseInputContentUnionParam, error) {
-	mc := modelcaps.LoadFromStore(store, modelID)
+func convertDocumentToResponseInput(ctx context.Context, doc chat.Document, modelID string, store *modelsdev.Store) ([]responses.ResponseInputContentUnionParam, error) {
+	mc := modelcaps.Load(store, modelID)
 	return convertDocumentToResponseInputWithCaps(ctx, doc, mc)
 }
 
diff --git a/pkg/model/provider/openai/client.go b/pkg/model/provider/openai/client.go
index 0d278efbe..f3eb5d038 100644
--- a/pkg/model/provider/openai/client.go
+++ b/pkg/model/provider/openai/client.go
@@ -26,7 +26,6 @@ import (
 	"github.com/docker/docker-agent/pkg/model/provider/oaistream"
 	"github.com/docker/docker-agent/pkg/model/provider/options"
 	"github.com/docker/docker-agent/pkg/modelinfo"
-	"github.com/docker/docker-agent/pkg/modelsdev"
 	"github.com/docker/docker-agent/pkg/rag/prompts"
 	"github.com/docker/docker-agent/pkg/rag/types"
 	"github.com/docker/docker-agent/pkg/tools"
@@ -42,8 +41,6 @@ type Client struct {
 	// wsPool is initialized in NewClient when transport=websocket is configured.
 	// It maintains a persistent WebSocket connection across requests.
 	wsPool *wsPool
-
-	modelsStore *modelsdev.Store // initialised in NewClient
 }
 
 // NewClient creates a new OpenAI client from the provided configuration
@@ -153,20 +150,13 @@ func NewClient(ctx context.Context, cfg *latest.ModelConfig, env environment.Pro
 
 	slog.DebugContext(ctx, "OpenAI client created successfully", "model", cfg.Model)
 
-	store, err := modelsdev.NewStore()
-	if err != nil {
-		slog.WarnContext(ctx, "openai: failed to load models.dev store, attachments will use conservative caps", "error", err)
-		store = modelsdev.NewDatabaseStore(&modelsdev.Database{})
-	}
-
 	client := &Client{
 		Config: base.Config{
 			ModelConfig:  *cfg,
 			ModelOptions: globalOptions,
 			Env:          env,
 		},
-		clientFn:    clientFn,
-		modelsStore: store,
+		clientFn: clientFn,
 	}
 
 	// Pre-create the WebSocket pool when the transport is configured.
@@ -191,7 +181,7 @@ func (c *Client) Close() {
 // convertMessages converts chat.Message to openai.ChatCompletionMessageParamUnion
 // using the shared oaistream implementation.
 func (c *Client) convertMessages(ctx context.Context, messages []chat.Message) []openai.ChatCompletionMessageParamUnion {
-	return oaistream.ConvertMessagesFromStore(ctx, messages, c.ID(), c.modelsStore)
+	return oaistream.ConvertMessages(ctx, messages, c.ID(), c.ModelOptions.ModelsDevStore())
 }
 
 // CreateChatCompletionStream creates a streaming chat completion request
@@ -623,7 +613,7 @@ func (c *Client) convertMessagesToResponseInput(ctx context.Context, messages []
 						}
 					case chat.MessagePartTypeDocument:
 						if part.Document != nil {
-							docParts, err := convertDocumentToResponseInputFromStore(ctx, *part.Document, c.ID(), c.modelsStore)
+							docParts, err := convertDocumentToResponseInput(ctx, *part.Document, c.ID(), c.ModelOptions.ModelsDevStore())
 							if err != nil {
 								slog.WarnContext(ctx, "failed to convert document attachment", "error", err, "doc", part.Document.Name)
 								continue
diff --git a/pkg/model/provider/options/options.go b/pkg/model/provider/options/options.go
index 8ac2866bc..329e2b276 100644
--- a/pkg/model/provider/options/options.go
+++ b/pkg/model/provider/options/options.go
@@ -2,6 +2,7 @@ package options
 
 import (
 	"github.com/docker/docker-agent/pkg/config/latest"
+	"github.com/docker/docker-agent/pkg/modelsdev"
 )
 
 type ModelOptions struct {
@@ -11,6 +12,7 @@ type ModelOptions struct {
 	noThinking       bool
 	maxTokens        int64
 	providers        map[string]latest.ProviderConfig
+	modelsDevStore   *modelsdev.Store
 }
 
 func (c *ModelOptions) Gateway() string {
@@ -37,6 +39,10 @@ func (c *ModelOptions) Providers() map[string]latest.ProviderConfig {
 	return c.providers
 }
 
+func (c *ModelOptions) ModelsDevStore() *modelsdev.Store {
+	return c.modelsDevStore
+}
+
 type Opt func(*ModelOptions)
 
 func WithGateway(gateway string) Opt {
@@ -75,6 +81,12 @@ func WithProviders(providers map[string]latest.ProviderConfig) Opt {
 	}
 }
 
+func WithModelsDevStore(store *modelsdev.Store) Opt {
+	return func(cfg *ModelOptions) {
+		cfg.modelsDevStore = store
+	}
+}
+
 // FromModelOptions converts a concrete ModelOptions value into a slice of
 // Opt configuration functions. Later Opts override earlier ones when applied.
 func FromModelOptions(m ModelOptions) []Opt {
@@ -97,5 +109,8 @@ func FromModelOptions(m ModelOptions) []Opt {
 	if len(m.providers) > 0 {
 		out = append(out, WithProviders(m.providers))
 	}
+	if m.modelsDevStore != nil {
+		out = append(out, WithModelsDevStore(m.modelsDevStore))
+	}
 	return out
 }
diff --git a/pkg/modelsdev/store.go b/pkg/modelsdev/store.go
index 936063d69..d2981db1d 100644
--- a/pkg/modelsdev/store.go
+++ b/pkg/modelsdev/store.go
@@ -26,21 +26,19 @@ const (
 // Store manages access to the models.dev data.
 // All methods are safe for concurrent use.
 //
-// Use NewStore to obtain the process-wide singleton instance.
 // The database is loaded on first access via GetDatabase and
-// shared across all callers, avoiding redundant disk/network I/O.
+// then cached in memory for the lifetime of the Store.
 type Store struct {
 	cacheFile string
 	mu        sync.Mutex
 	db        *Database
 }
 
-// NewStore returns the process-wide singleton Store.
-//
-// The database is loaded lazily on the first call to GetDatabase and
-// then cached in memory so that every caller shares one copy.
-// The first call creates the cache directory if it does not exist.
-var NewStore = sync.OnceValues(func() (*Store, error) {
+// NewStore creates a new Store backed by the on-disk cache under ~/.cagent.
+// Callers should create one Store and share it rather than calling NewStore
+// repeatedly. RuntimeConfig.ModelsDevStore() is the standard way to obtain
+// a shared instance.
+func NewStore() (*Store, error) {
 	homeDir, err := os.UserHomeDir()
 	if err != nil {
 		return nil, fmt.Errorf("failed to get user home directory: %w", err)
@@ -54,7 +52,7 @@ var NewStore = sync.OnceValues(func() (*Store, error) {
 	return &Store{
 		cacheFile: filepath.Join(cacheDir, CacheFileName),
 	}, nil
-})
+}
 
 // NewDatabaseStore creates a Store pre-populated with the given database.
 // The returned store serves data entirely from memory and never fetches
diff --git a/pkg/rag/builder.go b/pkg/rag/builder.go
index c1919ca41..2553bf9a3 100644
--- a/pkg/rag/builder.go
+++ b/pkg/rag/builder.go
@@ -6,6 +6,7 @@ import (
 	"fmt"
 	"log/slog"
 
+	"github.com/docker/docker-agent/pkg/config"
 	"github.com/docker/docker-agent/pkg/config/latest"
 	"github.com/docker/docker-agent/pkg/environment"
 	"github.com/docker/docker-agent/pkg/model/provider"
@@ -22,6 +23,7 @@ type ManagersBuildConfig struct {
 	Env           environment.Provider
 	Models        map[string]latest.ModelConfig    // Model configurations from config
 	Providers     map[string]latest.ProviderConfig // Custom provider configurations from config
+	RuntimeConfig *config.RuntimeConfig
 }
 
 // NewProvider creates a model provider using the build config's environment,
@@ -58,6 +60,7 @@ func NewManager(
 		Env:           buildCfg.Env,
 		ModelsGateway: buildCfg.ModelsGateway,
 		RespectVCS:    ragCfg.GetRespectVCS(),
+		RuntimeConfig: buildCfg.RuntimeConfig,
 	}
 
 	strategyConfigs, strategyEvents, err := buildStrategyConfigs(ctx, *ragCfg, strategyBuildCtx, ragName)
diff --git a/pkg/rag/strategy/embedding.go b/pkg/rag/strategy/embedding.go
index 74d1b8942..f9f133e58 100644
--- a/pkg/rag/strategy/embedding.go
+++ b/pkg/rag/strategy/embedding.go
@@ -54,11 +54,14 @@ func CreateEmbeddingProvider(ctx context.Context, modelName string, buildCtx Bui
 		modelID = modelCfg.Provider + "/" + modelCfg.Model
 	}
 
-	// Create models.dev store for pricing
-	modelsStore, err := modelsdev.NewStore()
-	if err != nil {
-		slog.DebugContext(ctx, "Failed to create models.dev store for RAG pricing; cost tracking disabled",
-			"error", err)
+	var modelsStore *modelsdev.Store
+	if buildCtx.RuntimeConfig != nil {
+		var err error
+		modelsStore, err = buildCtx.RuntimeConfig.ModelsDevStore()
+		if err != nil {
+			slog.DebugContext(ctx, "Failed to create models.dev store for RAG pricing; cost tracking disabled",
+				"error", err)
+		}
 	}
 
 	return &EmbeddingConfig{
diff --git a/pkg/rag/strategy/strategy.go b/pkg/rag/strategy/strategy.go
index eb1b6e74d..c21194128 100644
--- a/pkg/rag/strategy/strategy.go
+++ b/pkg/rag/strategy/strategy.go
@@ -4,6 +4,7 @@ import (
 	"context"
 	"fmt"
 
+	"github.com/docker/docker-agent/pkg/config"
 	"github.com/docker/docker-agent/pkg/config/latest"
 	"github.com/docker/docker-agent/pkg/environment"
 	"github.com/docker/docker-agent/pkg/model/provider"
@@ -21,6 +22,7 @@ type BuildContext struct {
 	Env           environment.Provider
 	ModelsGateway string
 	RespectVCS    bool // Whether to respect VCS ignore files (e.g., .gitignore) when collecting files
+	RuntimeConfig *config.RuntimeConfig
 }
 
 // NewProvider creates a model provider using the build context's environment,
diff --git a/pkg/teamloader/registry.go b/pkg/teamloader/registry.go
index fdee48fc4..593d44612 100644
--- a/pkg/teamloader/registry.go
+++ b/pkg/teamloader/registry.go
@@ -523,6 +523,7 @@ func createRAGTool(ctx context.Context, toolset latest.Toolset, parentDir string
 		Env:           runConfig.EnvProvider(),
 		Models:        runConfig.Models,
 		Providers:     runConfig.Providers,
+		RuntimeConfig: runConfig,
 	})
 	if err != nil {
 		return nil, fmt.Errorf("failed to create RAG manager: %w", err)
diff --git a/pkg/teamloader/teamloader.go b/pkg/teamloader/teamloader.go
index 34191e3e2..acddc7791 100644
--- a/pkg/teamloader/teamloader.go
+++ b/pkg/teamloader/teamloader.go
@@ -20,7 +20,6 @@ import (
 	"github.com/docker/docker-agent/pkg/model/provider"
 	"github.com/docker/docker-agent/pkg/model/provider/dmr"
 	"github.com/docker/docker-agent/pkg/model/provider/options"
-	"github.com/docker/docker-agent/pkg/modelsdev"
 	"github.com/docker/docker-agent/pkg/permissions"
 	"github.com/docker/docker-agent/pkg/skills"
 	"github.com/docker/docker-agent/pkg/team"
@@ -107,7 +106,7 @@ func LoadWithConfig(ctx context.Context, agentSource config.Source, runConfig *c
 	// Resolve model aliases (e.g., "claude-sonnet-4-5" -> "claude-sonnet-4-5-20250929")
 	// This ensures the API uses the pinned model version. The original name is preserved
 	// in DisplayModel so the sidebar and other UI elements show the user-configured name.
-	modelsStore, err := modelsdev.NewStore()
+	modelsStore, err := runConfig.ModelsDevStore()
 	if err != nil {
 		slog.DebugContext(ctx, "Failed to create modelsdev store for alias resolution", "error", err)
 	} else {
@@ -287,7 +286,7 @@ func getModelsForAgent(ctx context.Context, cfg *latest.Config, a *latest.AgentC
 	var models []provider.Provider
 
 	// Obtain the singleton store once, outside the loop.
-	modelsStore, modelsStoreErr := modelsdev.NewStore()
+	modelsStore, modelsStoreErr := runConfig.ModelsDevStore()
 
 	for name := range strings.SplitSeq(a.Model, ",") {
 		modelCfg, exists := cfg.Models[name]
@@ -321,6 +320,9 @@ func getModelsForAgent(ctx context.Context, cfg *latest.Config, a *latest.AgentC
 		if maxTokens != nil {
 			opts = append(opts, options.WithMaxTokens(*maxTokens))
 		}
+		if modelsStoreErr == nil {
+			opts = append(opts, options.WithModelsDevStore(modelsStore))
+		}
 
 		// Pass the full models map for routing rules to resolve model references
 		model, err := provider.NewWithModels(ctx,
@@ -348,7 +350,7 @@ func getFallbackModelsForAgent(ctx context.Context, cfg *latest.Config, a *lates
 	var fallbackModels []provider.Provider
 
 	// Obtain the singleton store once, outside the loop.
-	modelsStore, modelsStoreErr := modelsdev.NewStore()
+	modelsStore, modelsStoreErr := runConfig.ModelsDevStore()
 
 	for _, name := range a.GetFallbackModels() {
 		modelCfg, exists := cfg.Models[name]
@@ -381,6 +383,9 @@ func getFallbackModelsForAgent(ctx context.Context, cfg *latest.Config, a *lates
 		if maxTokens != nil {
 			opts = append(opts, options.WithMaxTokens(*maxTokens))
 		}
+		if modelsStoreErr == nil {
+			opts = append(opts, options.WithModelsDevStore(modelsStore))
+		}
 
 		// Pass the full models map for routing rules to resolve model references
 		model, err := provider.NewWithModels(ctx,