From a6c70449888dfed0fe279b516993c785e77eb74c Mon Sep 17 00:00:00 2001
From: Trung Nguyen <trung.nguyen@docker.com>
Date: Tue, 19 May 2026 10:27:09 +0200
Subject: [PATCH] feat(modelerrors): classify overflow errors by kind
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Split the single ContextOverflowError bucket into three OverflowKinds so
the runtime and clients can react differently to each:

  * tokens — conversation exceeds the model's context window.
            Compaction can usually help.
  * wire   — request body exceeds the provider's wire-level limit
            (e.g. HTTP 413). The latest turn alone is over the cap;
            compaction-as-retry cannot help.
  * media  — image, PDF, or similar attachment too large.

Classification runs two tiers, in order:

  1. Structured signals (high confidence): HTTP 413, body.error.type =
     "request_too_large", body.error.code = "context_length_exceeded".
  2. Provider-prose substring patterns as fallback, covering the
     observed wording across Anthropic, OpenAI, Bedrock, Gemini,
     Mistral, Groq, Vertex, OpenRouter, Ollama, Kimi, MiniMax, z.ai,
     and others. Best-effort; provider wording is not contractual.

FormatError now returns three distinct, actionable, provider-agnostic
messages instead of one. The runtime emits new ErrorCodes
"request_too_large" and "media_too_large" alongside the existing
"context_exceeded" so external consumers can render the right hint.

Behaviour is preserved: IsContextOverflowError matches the same set of
errors as before, retry classification is unchanged, and auto-compaction
still fires on overflow. NewContextOverflowError now auto-fills Kind
from the underlying error so existing wrap sites get the correct shape
for free.
---
 pkg/modelerrors/modelerrors.go      | 223 +++++++++++++++++++++------
 pkg/modelerrors/modelerrors_test.go | 231 ++++++++++++++++++++++++++++
 pkg/runtime/event.go                |  10 +-
 pkg/runtime/loop_steps.go           |  11 +-
 4 files changed, 430 insertions(+), 45 deletions(-)

diff --git a/pkg/modelerrors/modelerrors.go b/pkg/modelerrors/modelerrors.go
index dfa78b1f4..081d5d9c2 100644
--- a/pkg/modelerrors/modelerrors.go
+++ b/pkg/modelerrors/modelerrors.go
@@ -76,19 +76,51 @@ const (
 	DefaultCooldown = 1 * time.Minute
 )
 
+// OverflowKind classifies the cause of a context overflow, so the runtime
+// and UI can react differently to each shape.
+//
+//   - [OverflowKindTokens]: the accumulated conversation exceeds the model's
+//     context window. Token-count rejection. Compaction can usually help.
+//   - [OverflowKindWire]: the request body exceeds the provider's wire-level
+//     limit (e.g. Anthropic's 32 MB cap, gateway 413s). Compaction usually
+//     CANNOT help because the offending single turn still has to be sent.
+//   - [OverflowKindMedia]: an image, PDF, or similar attachment in the
+//     conversation exceeds the provider's media constraints (size, page
+//     count, dimensions).
+type OverflowKind string
+
+const (
+	OverflowKindTokens OverflowKind = "tokens"
+	OverflowKindWire   OverflowKind = "wire"
+	OverflowKindMedia  OverflowKind = "media"
+)
+
 // ContextOverflowError wraps an underlying error to indicate that the failure
-// was caused by the conversation context exceeding the model's context window.
+// was caused by the conversation context exceeding some provider-side limit.
 // This is used to trigger auto-compaction in the runtime loop instead of
 // surfacing raw HTTP errors to the user.
+//
+// Kind classifies the specific shape of the overflow ([OverflowKindTokens] by
+// default for backwards compatibility). Use [NewContextOverflowError] to have
+// it set automatically by classification, or build the struct directly to
+// force a Kind.
 type ContextOverflowError struct {
 	Underlying error
+	Kind       OverflowKind
 }
 
 // NewContextOverflowError creates a ContextOverflowError wrapping the given
-// underlying error. Use this constructor rather than building the struct
-// directly so that future field additions don't break callers.
+// underlying error. The Kind is inferred from the underlying error via
+// [classifyOverflow]; if classification yields no result, Kind defaults to
+// [OverflowKindTokens] (the historical behaviour). Use this constructor
+// rather than building the struct directly so future field additions don't
+// break callers.
 func NewContextOverflowError(underlying error) *ContextOverflowError {
-	return &ContextOverflowError{Underlying: underlying}
+	kind := classifyOverflow(underlying)
+	if kind == "" {
+		kind = OverflowKindTokens
+	}
+	return &ContextOverflowError{Underlying: underlying, Kind: kind}
 }
 
 func (e *ContextOverflowError) Error() string {
@@ -102,60 +134,155 @@ func (e *ContextOverflowError) Unwrap() error {
 	return e.Underlying
 }
 
-// contextOverflowPatterns contains error message substrings that indicate the
-// prompt/context exceeds the model's context window. These patterns are checked
-// case-insensitively against error messages from various providers.
-var contextOverflowPatterns = []string{
-	"prompt is too long",
-	"maximum context length",
-	"context length exceeded",
-	"context_length_exceeded",
-	"max_tokens must be greater than",
+// tokenOverflowPatterns matches token-count rejections from various providers.
+// Best-effort substring match (case-insensitive) against the error message.
+// Provider error wording is not contractual and drifts over time; this list
+// is heuristics derived from observed errors. Adding a provider only requires
+// appending a phrase.
+var tokenOverflowPatterns = []string{
+	"prompt is too long",                // Anthropic, Vertex (with Anthropic body)
+	"prompt too long",                   // Ollama ("prompt too long; exceeded ...")
+	"maximum context length",            // OpenAI, OpenRouter, DeepSeek, vLLM
+	"context length exceeded",           // OpenAI legacy
+	"context_length_exceeded",           // OpenAI structured code
+	"input is too long",                 // Bedrock
+	"input token count",                 // Gemini ("...exceeds the maximum")
+	"exceeds the context window",        // OpenAI Responses API
+	"reduce the length of the messages", // Groq
+	"exceeded model token limit",        // Kimi, Moonshot
+	"context window exceeds limit",      // MiniMax
+	"model_context_window_exceeded",     // z.ai
+	"max_tokens must be greater than",   // Anthropic edge case: thinking-budget cascade
 	"maximum number of tokens",
 	"content length exceeds",
-	"request too large",
-	"payload too large",
-	"input is too long",
 	"exceeds the model's max token",
 	"token limit",
 	"reduce your prompt",
-	"reduce the length",
 }
 
-// IsContextOverflowError checks whether the error indicates the conversation
-// context has exceeded the model's context window. It inspects both structured
-// SDK error types and raw error message patterns.
-//
-// Recognised patterns include:
-//   - Anthropic 400 "prompt is too long: N tokens > M maximum"
-//   - Anthropic 400 "max_tokens must be greater than thinking.budget_tokens"
-//     (emitted when the prompt is so large that max_tokens can't accommodate
-//     the thinking budget — a proxy for context overflow)
-//   - OpenAI 400 "maximum context length" / "context_length_exceeded"
-//   - Anthropic 500 that is actually a context overflow (heuristic: the error
-//     message is opaque but the conversation was already near the limit)
-//
-// This function intentionally does NOT match generic 500 errors; callers
-// that want to treat an opaque 500 as overflow must check separately with
-// additional context (e.g., session token counts).
+// wireOverflowPatterns matches wire-level rejections — the whole request body
+// is too big to send regardless of context window. These trigger different
+// recovery than token overflows (compaction-as-retry won't help when the
+// latest turn alone is over the wire cap).
+var wireOverflowPatterns = []string{
+	"request_too_large",        // Anthropic structured error.type
+	"request too large",        // Anthropic prose
+	"payload too large",        // HTTP 413 status text
+	"request entity too large", // RFC 7231 status text
+}
+
+// mediaOverflowPatterns matches media-specific rejections (image too big, PDF
+// too many pages, etc.). Distinguished from token/wire because recovery
+// strategies differ — stripping media from history can help here.
+var mediaOverflowPatterns = []string{
+	"image exceeds",           // Anthropic
+	"image dimensions exceed", // Anthropic many-image
+	"pdf pages",               // "maximum of N PDF pages" — Anthropic
+}
+
+// IsContextOverflowError reports whether err indicates the conversation
+// exceeded a provider-side limit (token window, wire size, or media size).
+// Use [OverflowKindOf] to distinguish the three shapes.
 func IsContextOverflowError(err error) bool {
 	if err == nil {
 		return false
 	}
-
-	// Already wrapped
 	if _, ok := errors.AsType[*ContextOverflowError](err); ok {
 		return true
 	}
+	return classifyOverflow(err) != ""
+}
 
-	errMsg := strings.ToLower(err.Error())
-	for _, pattern := range contextOverflowPatterns {
-		if strings.Contains(errMsg, pattern) {
-			return true
+// OverflowKindOf returns the [OverflowKind] of err, or "" if it isn't an
+// overflow error. If err is already wrapped in a [*ContextOverflowError]
+// with a non-empty Kind, that Kind is returned; otherwise classification
+// runs on the unwrapped error.
+func OverflowKindOf(err error) OverflowKind {
+	if err == nil {
+		return ""
+	}
+	if coe, ok := errors.AsType[*ContextOverflowError](err); ok {
+		if coe.Kind != "" {
+			return coe.Kind
 		}
+		// Legacy wrap with no Kind — try classifying the underlying.
+		if coe.Underlying != nil {
+			if k := classifyOverflow(coe.Underlying); k != "" {
+				return k
+			}
+		}
+		return OverflowKindTokens
 	}
+	return classifyOverflow(err)
+}
 
-	return false
+// classifyOverflow inspects err for overflow signals and returns the matching
+// [OverflowKind], or "" if err is not an overflow error.
+//
+// The classifier runs two tiers, in order:
+//
+//	Tier 1 — structured signals (high confidence):
+//	  * body.error.type == "request_too_large"     → OverflowKindWire
+//	  * body.error.code == "context_length_exceeded" → OverflowKindTokens
+//	  * HTTP status 413                            → OverflowKindWire
+//
+//	Tier 2 — substring patterns (best-effort fallback):
+//	  * mediaOverflowPatterns → OverflowKindMedia
+//	  * wireOverflowPatterns  → OverflowKindWire
+//	  * tokenOverflowPatterns → OverflowKindTokens
+//
+// Tier 1 wins when both fire. Within Tier 2, media is checked first because
+// it is the most specific; wire before tokens because some wire signals
+// ("request too large") textually overlap with token-overflow phrasing in a
+// way that benefits from the wire match coming first.
+func classifyOverflow(err error) OverflowKind {
+	if err == nil {
+		return ""
+	}
+	// Already-wrapped errors carry their Kind; respect it.
+	if coe, ok := errors.AsType[*ContextOverflowError](err); ok && coe.Kind != "" {
+		return coe.Kind
+	}
+
+	raw := err.Error()
+
+	// Tier 1: structured body fields.
+	if body := firstJSONObject(raw); body != nil {
+		var parsed providerErrorBody
+		if json.Unmarshal(body, &parsed) == nil && parsed.Error != nil {
+			if parsed.Error.Type == "request_too_large" {
+				return OverflowKindWire
+			}
+			if code := scalarString(parsed.Error.Code); code == "context_length_exceeded" {
+				return OverflowKindTokens
+			}
+		}
+	}
+
+	// Tier 1: HTTP status code (413 → wire).
+	if se, ok := errors.AsType[*StatusError](err); ok && se.StatusCode == http.StatusRequestEntityTooLarge {
+		return OverflowKindWire
+	}
+
+	// Tier 2: substring fallback. Media first (most specific), then wire,
+	// then tokens.
+	msg := strings.ToLower(raw)
+	for _, p := range mediaOverflowPatterns {
+		if strings.Contains(msg, p) {
+			return OverflowKindMedia
+		}
+	}
+	for _, p := range wireOverflowPatterns {
+		if strings.Contains(msg, p) {
+			return OverflowKindWire
+		}
+	}
+	for _, p := range tokenOverflowPatterns {
+		if strings.Contains(msg, p) {
+			return OverflowKindTokens
+		}
+	}
+	return ""
 }
 
 // statusCodeRegex matches HTTP status codes in error messages (e.g., "429", "500", ": 429 ")
@@ -437,16 +564,26 @@ func ClassifyModelError(err error) (retryable, rateLimited bool, retryAfter time
 }
 
 // FormatError returns a user-friendly error message for model errors.
-// Context overflow gets a dedicated actionable message; other errors fall
+// Overflow errors get a kind-specific actionable message; other errors fall
 // through to err.Error(). For HTTP errors that text comes from *StatusError,
 // which itself extracts structured provider details (see parseProviderError).
+//
+// The messages are provider-agnostic by design: docker-agent supports many
+// LLM providers and the cap that triggered the rejection is a deployment
+// detail of the provider, not something the user can act on by name.
 func FormatError(err error) string {
 	if err == nil {
 		return ""
 	}
 
-	// Context overflow gets a dedicated, actionable message.
-	if _, ok := errors.AsType[*ContextOverflowError](err); ok {
+	switch OverflowKindOf(err) {
+	case OverflowKindWire:
+		return "Your message is too large for the AI provider. " +
+			"Try a smaller paste, attach the file separately, or split the content."
+	case OverflowKindMedia:
+		return "An image or file in this conversation is too large for the AI provider. " +
+			"Try a smaller file or remove it from context."
+	case OverflowKindTokens:
 		return "The conversation has exceeded the model's context window and automatic compaction is not enabled. " +
 			"Try running /compact to reduce the conversation size, or start a new session."
 	}
diff --git a/pkg/modelerrors/modelerrors_test.go b/pkg/modelerrors/modelerrors_test.go
index 287f4503b..eebfa8af3 100644
--- a/pkg/modelerrors/modelerrors_test.go
+++ b/pkg/modelerrors/modelerrors_test.go
@@ -159,6 +159,237 @@ func TestIsContextOverflowError(t *testing.T) {
 	}
 }
 
+func TestClassifyOverflow(t *testing.T) {
+	t.Parallel()
+
+	tests := []struct {
+		name string
+		err  error
+		want OverflowKind
+	}{
+		// ── Tier 1: structured ──
+		{
+			name: "anthropic 413 with request_too_large body",
+			err: &StatusError{StatusCode: 413, Err: errors.New(
+				`POST "https://api.anthropic.com/v1/messages": 413 Payload Too Large {"type":"error","error":{"type":"request_too_large","message":"Request exceeds 32MB limit"}}`)},
+			want: OverflowKindWire,
+		},
+		{
+			name: "openai context_length_exceeded structured code",
+			err: errors.New(
+				`POST "https://api.openai.com/v1/chat/completions": 400 Bad Request {"error":{"message":"maximum context length is 128000 tokens","type":"invalid_request_error","code":"context_length_exceeded"}}`),
+			want: OverflowKindTokens,
+		},
+		{
+			name: "bare 413 with empty body still classifies as wire",
+			err:  &StatusError{StatusCode: 413, Err: errors.New(`413 Payload Too Large`)},
+			want: OverflowKindWire,
+		},
+		{
+			name: "vertex 413 with prompt-too-long body — wire wins via 413",
+			err: &StatusError{StatusCode: 413, Err: errors.New(
+				`413 Payload Too Large {"error":{"message":"Prompt is too long"}}`)},
+			want: OverflowKindWire,
+		},
+
+		// ── Tier 2: prose patterns by provider ──
+		{
+			name: "anthropic 400 prompt too long",
+			err: errors.New(
+				`POST "https://api.anthropic.com/v1/messages": 400 Bad Request {"type":"error","error":{"type":"invalid_request_error","message":"prompt is too long: 137500 tokens > 135000 maximum"}}`),
+			want: OverflowKindTokens,
+		},
+		{
+			name: "gemini input token count exceeds maximum",
+			err: errors.New(
+				`googleapi: Error 400: input token count 200000 exceeds the maximum of 128000`),
+			want: OverflowKindTokens,
+		},
+		{
+			name: "bedrock input is too long",
+			err:  errors.New(`ValidationException: input is too long for requested model`),
+			want: OverflowKindTokens,
+		},
+		{
+			name: "groq reduce the length",
+			err:  errors.New(`please reduce the length of the messages or completion`),
+			want: OverflowKindTokens,
+		},
+		{
+			name: "mistral via prose",
+			err:  errors.New(`prompt is too long for model with 32768 maximum context length`),
+			want: OverflowKindTokens,
+		},
+		{
+			name: "openai responses API",
+			err:  errors.New(`This conversation exceeds the context window for this model`),
+			want: OverflowKindTokens,
+		},
+		{
+			name: "ollama prose",
+			err:  errors.New(`prompt too long; exceeded max context length`),
+			want: OverflowKindTokens,
+		},
+		{
+			name: "z.ai non-standard finish reason as error text",
+			err:  errors.New(`finish_reason: model_context_window_exceeded`),
+			want: OverflowKindTokens,
+		},
+		{
+			name: "anthropic thinking-budget cascade (proxy for overflow)",
+			err:  errors.New(`max_tokens must be greater than thinking.budget_tokens`),
+			want: OverflowKindTokens,
+		},
+
+		// ── Tier 2: wire patterns ──
+		{
+			name: "anthropic prose request too large",
+			err:  errors.New(`request too large`),
+			want: OverflowKindWire,
+		},
+
+		// ── Tier 2: media patterns ──
+		{
+			name: "anthropic image exceeds size",
+			err: errors.New(
+				`400 Bad Request {"error":{"message":"image exceeds 5 MB maximum: 5316852 bytes > 5242880 bytes"}}`),
+			want: OverflowKindMedia,
+		},
+		{
+			name: "anthropic many-image dimensions",
+			err:  errors.New(`image dimensions exceed many-image request limit (2000px)`),
+			want: OverflowKindMedia,
+		},
+		{
+			name: "anthropic PDF pages limit",
+			err:  errors.New(`request must have a maximum of 100 PDF pages`),
+			want: OverflowKindMedia,
+		},
+
+		// ── Non-overflow errors ──
+		{
+			name: "rate limit is not overflow",
+			err:  &StatusError{StatusCode: 429, Err: errors.New(`rate_limit_error`)},
+			want: "",
+		},
+		{
+			name: "500 server error is not overflow",
+			err:  &StatusError{StatusCode: 500, Err: errors.New(`internal server error`)},
+			want: "",
+		},
+		{
+			name: "auth error is not overflow",
+			err:  errors.New(`401 unauthorized: invalid api key`),
+			want: "",
+		},
+		{
+			name: "nil",
+			err:  nil,
+			want: "",
+		},
+
+		// ── Wrapped errors ──
+		{
+			name: "already wrapped with Kind preserves Kind",
+			err:  &ContextOverflowError{Underlying: errors.New("anything"), Kind: OverflowKindWire},
+			want: OverflowKindWire,
+		},
+		{
+			name: "errors.As reaches wrapped Kind",
+			err: fmt.Errorf("all models failed: %w",
+				&ContextOverflowError{Underlying: errors.New("anything"), Kind: OverflowKindMedia}),
+			want: OverflowKindMedia,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			t.Parallel()
+			got := classifyOverflow(tt.err)
+			assert.Equal(t, tt.want, got, "classifyOverflow(%v)", tt.err)
+		})
+	}
+}
+
+func TestOverflowKindOf(t *testing.T) {
+	t.Parallel()
+
+	t.Run("returns stored Kind on wrapped error", func(t *testing.T) {
+		t.Parallel()
+		err := &ContextOverflowError{Underlying: errors.New("anything"), Kind: OverflowKindWire}
+		assert.Equal(t, OverflowKindWire, OverflowKindOf(err))
+	})
+
+	t.Run("classifies underlying when wrap has no Kind", func(t *testing.T) {
+		t.Parallel()
+		// Legacy wrap: Kind left empty, Underlying carries the signal.
+		err := &ContextOverflowError{Underlying: errors.New("prompt is too long")}
+		assert.Equal(t, OverflowKindTokens, OverflowKindOf(err))
+	})
+
+	t.Run("falls back to tokens on legacy wrap with no signal", func(t *testing.T) {
+		t.Parallel()
+		err := &ContextOverflowError{Underlying: errors.New("opaque")}
+		assert.Equal(t, OverflowKindTokens, OverflowKindOf(err))
+	})
+
+	t.Run("returns empty on non-overflow error", func(t *testing.T) {
+		t.Parallel()
+		assert.Equal(t, OverflowKind(""), OverflowKindOf(errors.New("rate limited")))
+		assert.Equal(t, OverflowKind(""), OverflowKindOf(nil))
+	})
+
+	t.Run("NewContextOverflowError sets Kind from underlying", func(t *testing.T) {
+		t.Parallel()
+		// Anthropic 413 with structured body → wire
+		under := &StatusError{StatusCode: 413, Err: errors.New(
+			`413 Payload Too Large {"type":"error","error":{"type":"request_too_large","message":"too big"}}`)}
+		wrapped := NewContextOverflowError(under)
+		assert.Equal(t, OverflowKindWire, wrapped.Kind)
+
+		// Token-overflow prose → tokens
+		wrapped = NewContextOverflowError(errors.New("prompt is too long"))
+		assert.Equal(t, OverflowKindTokens, wrapped.Kind)
+
+		// Image rejection → media
+		wrapped = NewContextOverflowError(errors.New("image exceeds 5 MB maximum"))
+		assert.Equal(t, OverflowKindMedia, wrapped.Kind)
+
+		// Unclassifiable underlying → tokens (safe historical default)
+		wrapped = NewContextOverflowError(errors.New("opaque"))
+		assert.Equal(t, OverflowKindTokens, wrapped.Kind)
+	})
+}
+
+func TestFormatError_OverflowKinds(t *testing.T) {
+	t.Parallel()
+
+	t.Run("wire overflow surfaces request-too-large message", func(t *testing.T) {
+		t.Parallel()
+		err := &StatusError{StatusCode: 413, Err: errors.New(`Payload Too Large`)}
+		msg := FormatError(err)
+		assert.Contains(t, msg, "too large")
+		assert.NotContains(t, msg, "/compact")
+		assert.NotContains(t, msg, "context window")
+	})
+
+	t.Run("media overflow surfaces image-too-large message", func(t *testing.T) {
+		t.Parallel()
+		err := errors.New(`image exceeds 5 MB maximum: 5316852 bytes > 5242880 bytes`)
+		msg := FormatError(err)
+		assert.Contains(t, msg, "image or file")
+		assert.NotContains(t, msg, "/compact")
+	})
+
+	t.Run("token overflow keeps the /compact hint", func(t *testing.T) {
+		t.Parallel()
+		err := errors.New(`prompt is too long: 200000 tokens > 128000 maximum`)
+		msg := FormatError(err)
+		assert.Contains(t, msg, "context window")
+		assert.Contains(t, msg, "/compact")
+	})
+}
+
 func TestContextOverflowError(t *testing.T) {
 	t.Parallel()
 
diff --git a/pkg/runtime/event.go b/pkg/runtime/event.go
index 6211c40ad..8acd5cc63 100644
--- a/pkg/runtime/event.go
+++ b/pkg/runtime/event.go
@@ -200,10 +200,18 @@ func AgentChoiceReasoning(agentName, sessionID, content string) Event {
 
 // ErrorCode constants classify errors so external consumers (boards,
 // dashboards) can react programmatically without parsing free-form messages.
+//
+// The three overflow codes ([ErrorCodeContextExceeded],
+// [ErrorCodeRequestTooLarge], [ErrorCodeMediaTooLarge]) mirror
+// [modelerrors.OverflowKind] and let clients render distinct, actionable
+// messages for each shape (token-count overflow, wire-level body cap,
+// media-size rejection) instead of one generic "context window exceeded".
 const (
 	ErrorCodeModelError      = "model_error"
 	ErrorCodeRateLimited     = "rate_limited"
-	ErrorCodeContextExceeded = "context_exceeded"
+	ErrorCodeContextExceeded = "context_exceeded"  // OverflowKindTokens
+	ErrorCodeRequestTooLarge = "request_too_large" // OverflowKindWire
+	ErrorCodeMediaTooLarge   = "media_too_large"   // OverflowKindMedia
 	ErrorCodeToolFailed      = "tool_failed"
 	ErrorCodeHookBlocked     = "hook_blocked"
 	ErrorCodeLoopDetected    = "loop_detected"
diff --git a/pkg/runtime/loop_steps.go b/pkg/runtime/loop_steps.go
index f14148512..7ab158a8b 100644
--- a/pkg/runtime/loop_steps.go
+++ b/pkg/runtime/loop_steps.go
@@ -193,8 +193,17 @@ func (r *LocalRuntime) handleStreamError(
 // structured error events. The classification mirrors [modelerrors]
 // but reduces the granularity to a small set of codes that external
 // consumers can act on.
+//
+// Overflow errors are split by [modelerrors.OverflowKind] so the client
+// can render kind-specific messages (e.g. "request too large" vs "context
+// window exceeded") instead of one generic string.
 func classifyErrorCode(err error) string {
-	if modelerrors.IsContextOverflowError(err) {
+	switch modelerrors.OverflowKindOf(err) {
+	case modelerrors.OverflowKindWire:
+		return ErrorCodeRequestTooLarge
+	case modelerrors.OverflowKindMedia:
+		return ErrorCodeMediaTooLarge
+	case modelerrors.OverflowKindTokens:
 		return ErrorCodeContextExceeded
 	}
 	_, rateLimited, _ := modelerrors.ClassifyModelError(err)