From a6c70449888dfed0fe279b516993c785e77eb74c Mon Sep 17 00:00:00 2001 From: Trung Nguyen Date: Tue, 19 May 2026 10:27:09 +0200 Subject: [PATCH] feat(modelerrors): classify overflow errors by kind MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Split the single ContextOverflowError bucket into three OverflowKinds so the runtime and clients can react differently to each: * tokens — conversation exceeds the model's context window. Compaction can usually help. * wire — request body exceeds the provider's wire-level limit (e.g. HTTP 413). The latest turn alone is over the cap; compaction-as-retry cannot help. * media — image, PDF, or similar attachment too large. Classification runs two tiers, in order: 1. Structured signals (high confidence): HTTP 413, body.error.type = "request_too_large", body.error.code = "context_length_exceeded". 2. Provider-prose substring patterns as fallback, covering the observed wording across Anthropic, OpenAI, Bedrock, Gemini, Mistral, Groq, Vertex, OpenRouter, Ollama, Kimi, MiniMax, z.ai, and others. Best-effort; provider wording is not contractual. FormatError now returns three distinct, actionable, provider-agnostic messages instead of one. The runtime emits new ErrorCodes "request_too_large" and "media_too_large" alongside the existing "context_exceeded" so external consumers can render the right hint. Behaviour is preserved: IsContextOverflowError matches the same set of errors as before, retry classification is unchanged, and auto-compaction still fires on overflow. NewContextOverflowError now auto-fills Kind from the underlying error so existing wrap sites get the correct shape for free. --- pkg/modelerrors/modelerrors.go | 223 +++++++++++++++++++++------ pkg/modelerrors/modelerrors_test.go | 231 ++++++++++++++++++++++++++++ pkg/runtime/event.go | 10 +- pkg/runtime/loop_steps.go | 11 +- 4 files changed, 430 insertions(+), 45 deletions(-) diff --git a/pkg/modelerrors/modelerrors.go b/pkg/modelerrors/modelerrors.go index dfa78b1f4..081d5d9c2 100644 --- a/pkg/modelerrors/modelerrors.go +++ b/pkg/modelerrors/modelerrors.go @@ -76,19 +76,51 @@ const ( DefaultCooldown = 1 * time.Minute ) +// OverflowKind classifies the cause of a context overflow, so the runtime +// and UI can react differently to each shape. +// +// - [OverflowKindTokens]: the accumulated conversation exceeds the model's +// context window. Token-count rejection. Compaction can usually help. +// - [OverflowKindWire]: the request body exceeds the provider's wire-level +// limit (e.g. Anthropic's 32 MB cap, gateway 413s). Compaction usually +// CANNOT help because the offending single turn still has to be sent. +// - [OverflowKindMedia]: an image, PDF, or similar attachment in the +// conversation exceeds the provider's media constraints (size, page +// count, dimensions). +type OverflowKind string + +const ( + OverflowKindTokens OverflowKind = "tokens" + OverflowKindWire OverflowKind = "wire" + OverflowKindMedia OverflowKind = "media" +) + // ContextOverflowError wraps an underlying error to indicate that the failure -// was caused by the conversation context exceeding the model's context window. +// was caused by the conversation context exceeding some provider-side limit. // This is used to trigger auto-compaction in the runtime loop instead of // surfacing raw HTTP errors to the user. +// +// Kind classifies the specific shape of the overflow ([OverflowKindTokens] by +// default for backwards compatibility). Use [NewContextOverflowError] to have +// it set automatically by classification, or build the struct directly to +// force a Kind. type ContextOverflowError struct { Underlying error + Kind OverflowKind } // NewContextOverflowError creates a ContextOverflowError wrapping the given -// underlying error. Use this constructor rather than building the struct -// directly so that future field additions don't break callers. +// underlying error. The Kind is inferred from the underlying error via +// [classifyOverflow]; if classification yields no result, Kind defaults to +// [OverflowKindTokens] (the historical behaviour). Use this constructor +// rather than building the struct directly so future field additions don't +// break callers. func NewContextOverflowError(underlying error) *ContextOverflowError { - return &ContextOverflowError{Underlying: underlying} + kind := classifyOverflow(underlying) + if kind == "" { + kind = OverflowKindTokens + } + return &ContextOverflowError{Underlying: underlying, Kind: kind} } func (e *ContextOverflowError) Error() string { @@ -102,60 +134,155 @@ func (e *ContextOverflowError) Unwrap() error { return e.Underlying } -// contextOverflowPatterns contains error message substrings that indicate the -// prompt/context exceeds the model's context window. These patterns are checked -// case-insensitively against error messages from various providers. -var contextOverflowPatterns = []string{ - "prompt is too long", - "maximum context length", - "context length exceeded", - "context_length_exceeded", - "max_tokens must be greater than", +// tokenOverflowPatterns matches token-count rejections from various providers. +// Best-effort substring match (case-insensitive) against the error message. +// Provider error wording is not contractual and drifts over time; this list +// is heuristics derived from observed errors. Adding a provider only requires +// appending a phrase. +var tokenOverflowPatterns = []string{ + "prompt is too long", // Anthropic, Vertex (with Anthropic body) + "prompt too long", // Ollama ("prompt too long; exceeded ...") + "maximum context length", // OpenAI, OpenRouter, DeepSeek, vLLM + "context length exceeded", // OpenAI legacy + "context_length_exceeded", // OpenAI structured code + "input is too long", // Bedrock + "input token count", // Gemini ("...exceeds the maximum") + "exceeds the context window", // OpenAI Responses API + "reduce the length of the messages", // Groq + "exceeded model token limit", // Kimi, Moonshot + "context window exceeds limit", // MiniMax + "model_context_window_exceeded", // z.ai + "max_tokens must be greater than", // Anthropic edge case: thinking-budget cascade "maximum number of tokens", "content length exceeds", - "request too large", - "payload too large", - "input is too long", "exceeds the model's max token", "token limit", "reduce your prompt", - "reduce the length", } -// IsContextOverflowError checks whether the error indicates the conversation -// context has exceeded the model's context window. It inspects both structured -// SDK error types and raw error message patterns. -// -// Recognised patterns include: -// - Anthropic 400 "prompt is too long: N tokens > M maximum" -// - Anthropic 400 "max_tokens must be greater than thinking.budget_tokens" -// (emitted when the prompt is so large that max_tokens can't accommodate -// the thinking budget — a proxy for context overflow) -// - OpenAI 400 "maximum context length" / "context_length_exceeded" -// - Anthropic 500 that is actually a context overflow (heuristic: the error -// message is opaque but the conversation was already near the limit) -// -// This function intentionally does NOT match generic 500 errors; callers -// that want to treat an opaque 500 as overflow must check separately with -// additional context (e.g., session token counts). +// wireOverflowPatterns matches wire-level rejections — the whole request body +// is too big to send regardless of context window. These trigger different +// recovery than token overflows (compaction-as-retry won't help when the +// latest turn alone is over the wire cap). +var wireOverflowPatterns = []string{ + "request_too_large", // Anthropic structured error.type + "request too large", // Anthropic prose + "payload too large", // HTTP 413 status text + "request entity too large", // RFC 7231 status text +} + +// mediaOverflowPatterns matches media-specific rejections (image too big, PDF +// too many pages, etc.). Distinguished from token/wire because recovery +// strategies differ — stripping media from history can help here. +var mediaOverflowPatterns = []string{ + "image exceeds", // Anthropic + "image dimensions exceed", // Anthropic many-image + "pdf pages", // "maximum of N PDF pages" — Anthropic +} + +// IsContextOverflowError reports whether err indicates the conversation +// exceeded a provider-side limit (token window, wire size, or media size). +// Use [OverflowKindOf] to distinguish the three shapes. func IsContextOverflowError(err error) bool { if err == nil { return false } - - // Already wrapped if _, ok := errors.AsType[*ContextOverflowError](err); ok { return true } + return classifyOverflow(err) != "" +} - errMsg := strings.ToLower(err.Error()) - for _, pattern := range contextOverflowPatterns { - if strings.Contains(errMsg, pattern) { - return true +// OverflowKindOf returns the [OverflowKind] of err, or "" if it isn't an +// overflow error. If err is already wrapped in a [*ContextOverflowError] +// with a non-empty Kind, that Kind is returned; otherwise classification +// runs on the unwrapped error. +func OverflowKindOf(err error) OverflowKind { + if err == nil { + return "" + } + if coe, ok := errors.AsType[*ContextOverflowError](err); ok { + if coe.Kind != "" { + return coe.Kind } + // Legacy wrap with no Kind — try classifying the underlying. + if coe.Underlying != nil { + if k := classifyOverflow(coe.Underlying); k != "" { + return k + } + } + return OverflowKindTokens } + return classifyOverflow(err) +} - return false +// classifyOverflow inspects err for overflow signals and returns the matching +// [OverflowKind], or "" if err is not an overflow error. +// +// The classifier runs two tiers, in order: +// +// Tier 1 — structured signals (high confidence): +// * body.error.type == "request_too_large" → OverflowKindWire +// * body.error.code == "context_length_exceeded" → OverflowKindTokens +// * HTTP status 413 → OverflowKindWire +// +// Tier 2 — substring patterns (best-effort fallback): +// * mediaOverflowPatterns → OverflowKindMedia +// * wireOverflowPatterns → OverflowKindWire +// * tokenOverflowPatterns → OverflowKindTokens +// +// Tier 1 wins when both fire. Within Tier 2, media is checked first because +// it is the most specific; wire before tokens because some wire signals +// ("request too large") textually overlap with token-overflow phrasing in a +// way that benefits from the wire match coming first. +func classifyOverflow(err error) OverflowKind { + if err == nil { + return "" + } + // Already-wrapped errors carry their Kind; respect it. + if coe, ok := errors.AsType[*ContextOverflowError](err); ok && coe.Kind != "" { + return coe.Kind + } + + raw := err.Error() + + // Tier 1: structured body fields. + if body := firstJSONObject(raw); body != nil { + var parsed providerErrorBody + if json.Unmarshal(body, &parsed) == nil && parsed.Error != nil { + if parsed.Error.Type == "request_too_large" { + return OverflowKindWire + } + if code := scalarString(parsed.Error.Code); code == "context_length_exceeded" { + return OverflowKindTokens + } + } + } + + // Tier 1: HTTP status code (413 → wire). + if se, ok := errors.AsType[*StatusError](err); ok && se.StatusCode == http.StatusRequestEntityTooLarge { + return OverflowKindWire + } + + // Tier 2: substring fallback. Media first (most specific), then wire, + // then tokens. + msg := strings.ToLower(raw) + for _, p := range mediaOverflowPatterns { + if strings.Contains(msg, p) { + return OverflowKindMedia + } + } + for _, p := range wireOverflowPatterns { + if strings.Contains(msg, p) { + return OverflowKindWire + } + } + for _, p := range tokenOverflowPatterns { + if strings.Contains(msg, p) { + return OverflowKindTokens + } + } + return "" } // statusCodeRegex matches HTTP status codes in error messages (e.g., "429", "500", ": 429 ") @@ -437,16 +564,26 @@ func ClassifyModelError(err error) (retryable, rateLimited bool, retryAfter time } // FormatError returns a user-friendly error message for model errors. -// Context overflow gets a dedicated actionable message; other errors fall +// Overflow errors get a kind-specific actionable message; other errors fall // through to err.Error(). For HTTP errors that text comes from *StatusError, // which itself extracts structured provider details (see parseProviderError). +// +// The messages are provider-agnostic by design: docker-agent supports many +// LLM providers and the cap that triggered the rejection is a deployment +// detail of the provider, not something the user can act on by name. func FormatError(err error) string { if err == nil { return "" } - // Context overflow gets a dedicated, actionable message. - if _, ok := errors.AsType[*ContextOverflowError](err); ok { + switch OverflowKindOf(err) { + case OverflowKindWire: + return "Your message is too large for the AI provider. " + + "Try a smaller paste, attach the file separately, or split the content." + case OverflowKindMedia: + return "An image or file in this conversation is too large for the AI provider. " + + "Try a smaller file or remove it from context." + case OverflowKindTokens: return "The conversation has exceeded the model's context window and automatic compaction is not enabled. " + "Try running /compact to reduce the conversation size, or start a new session." } diff --git a/pkg/modelerrors/modelerrors_test.go b/pkg/modelerrors/modelerrors_test.go index 287f4503b..eebfa8af3 100644 --- a/pkg/modelerrors/modelerrors_test.go +++ b/pkg/modelerrors/modelerrors_test.go @@ -159,6 +159,237 @@ func TestIsContextOverflowError(t *testing.T) { } } +func TestClassifyOverflow(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + err error + want OverflowKind + }{ + // ── Tier 1: structured ── + { + name: "anthropic 413 with request_too_large body", + err: &StatusError{StatusCode: 413, Err: errors.New( + `POST "https://api.anthropic.com/v1/messages": 413 Payload Too Large {"type":"error","error":{"type":"request_too_large","message":"Request exceeds 32MB limit"}}`)}, + want: OverflowKindWire, + }, + { + name: "openai context_length_exceeded structured code", + err: errors.New( + `POST "https://api.openai.com/v1/chat/completions": 400 Bad Request {"error":{"message":"maximum context length is 128000 tokens","type":"invalid_request_error","code":"context_length_exceeded"}}`), + want: OverflowKindTokens, + }, + { + name: "bare 413 with empty body still classifies as wire", + err: &StatusError{StatusCode: 413, Err: errors.New(`413 Payload Too Large`)}, + want: OverflowKindWire, + }, + { + name: "vertex 413 with prompt-too-long body — wire wins via 413", + err: &StatusError{StatusCode: 413, Err: errors.New( + `413 Payload Too Large {"error":{"message":"Prompt is too long"}}`)}, + want: OverflowKindWire, + }, + + // ── Tier 2: prose patterns by provider ── + { + name: "anthropic 400 prompt too long", + err: errors.New( + `POST "https://api.anthropic.com/v1/messages": 400 Bad Request {"type":"error","error":{"type":"invalid_request_error","message":"prompt is too long: 137500 tokens > 135000 maximum"}}`), + want: OverflowKindTokens, + }, + { + name: "gemini input token count exceeds maximum", + err: errors.New( + `googleapi: Error 400: input token count 200000 exceeds the maximum of 128000`), + want: OverflowKindTokens, + }, + { + name: "bedrock input is too long", + err: errors.New(`ValidationException: input is too long for requested model`), + want: OverflowKindTokens, + }, + { + name: "groq reduce the length", + err: errors.New(`please reduce the length of the messages or completion`), + want: OverflowKindTokens, + }, + { + name: "mistral via prose", + err: errors.New(`prompt is too long for model with 32768 maximum context length`), + want: OverflowKindTokens, + }, + { + name: "openai responses API", + err: errors.New(`This conversation exceeds the context window for this model`), + want: OverflowKindTokens, + }, + { + name: "ollama prose", + err: errors.New(`prompt too long; exceeded max context length`), + want: OverflowKindTokens, + }, + { + name: "z.ai non-standard finish reason as error text", + err: errors.New(`finish_reason: model_context_window_exceeded`), + want: OverflowKindTokens, + }, + { + name: "anthropic thinking-budget cascade (proxy for overflow)", + err: errors.New(`max_tokens must be greater than thinking.budget_tokens`), + want: OverflowKindTokens, + }, + + // ── Tier 2: wire patterns ── + { + name: "anthropic prose request too large", + err: errors.New(`request too large`), + want: OverflowKindWire, + }, + + // ── Tier 2: media patterns ── + { + name: "anthropic image exceeds size", + err: errors.New( + `400 Bad Request {"error":{"message":"image exceeds 5 MB maximum: 5316852 bytes > 5242880 bytes"}}`), + want: OverflowKindMedia, + }, + { + name: "anthropic many-image dimensions", + err: errors.New(`image dimensions exceed many-image request limit (2000px)`), + want: OverflowKindMedia, + }, + { + name: "anthropic PDF pages limit", + err: errors.New(`request must have a maximum of 100 PDF pages`), + want: OverflowKindMedia, + }, + + // ── Non-overflow errors ── + { + name: "rate limit is not overflow", + err: &StatusError{StatusCode: 429, Err: errors.New(`rate_limit_error`)}, + want: "", + }, + { + name: "500 server error is not overflow", + err: &StatusError{StatusCode: 500, Err: errors.New(`internal server error`)}, + want: "", + }, + { + name: "auth error is not overflow", + err: errors.New(`401 unauthorized: invalid api key`), + want: "", + }, + { + name: "nil", + err: nil, + want: "", + }, + + // ── Wrapped errors ── + { + name: "already wrapped with Kind preserves Kind", + err: &ContextOverflowError{Underlying: errors.New("anything"), Kind: OverflowKindWire}, + want: OverflowKindWire, + }, + { + name: "errors.As reaches wrapped Kind", + err: fmt.Errorf("all models failed: %w", + &ContextOverflowError{Underlying: errors.New("anything"), Kind: OverflowKindMedia}), + want: OverflowKindMedia, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + got := classifyOverflow(tt.err) + assert.Equal(t, tt.want, got, "classifyOverflow(%v)", tt.err) + }) + } +} + +func TestOverflowKindOf(t *testing.T) { + t.Parallel() + + t.Run("returns stored Kind on wrapped error", func(t *testing.T) { + t.Parallel() + err := &ContextOverflowError{Underlying: errors.New("anything"), Kind: OverflowKindWire} + assert.Equal(t, OverflowKindWire, OverflowKindOf(err)) + }) + + t.Run("classifies underlying when wrap has no Kind", func(t *testing.T) { + t.Parallel() + // Legacy wrap: Kind left empty, Underlying carries the signal. + err := &ContextOverflowError{Underlying: errors.New("prompt is too long")} + assert.Equal(t, OverflowKindTokens, OverflowKindOf(err)) + }) + + t.Run("falls back to tokens on legacy wrap with no signal", func(t *testing.T) { + t.Parallel() + err := &ContextOverflowError{Underlying: errors.New("opaque")} + assert.Equal(t, OverflowKindTokens, OverflowKindOf(err)) + }) + + t.Run("returns empty on non-overflow error", func(t *testing.T) { + t.Parallel() + assert.Equal(t, OverflowKind(""), OverflowKindOf(errors.New("rate limited"))) + assert.Equal(t, OverflowKind(""), OverflowKindOf(nil)) + }) + + t.Run("NewContextOverflowError sets Kind from underlying", func(t *testing.T) { + t.Parallel() + // Anthropic 413 with structured body → wire + under := &StatusError{StatusCode: 413, Err: errors.New( + `413 Payload Too Large {"type":"error","error":{"type":"request_too_large","message":"too big"}}`)} + wrapped := NewContextOverflowError(under) + assert.Equal(t, OverflowKindWire, wrapped.Kind) + + // Token-overflow prose → tokens + wrapped = NewContextOverflowError(errors.New("prompt is too long")) + assert.Equal(t, OverflowKindTokens, wrapped.Kind) + + // Image rejection → media + wrapped = NewContextOverflowError(errors.New("image exceeds 5 MB maximum")) + assert.Equal(t, OverflowKindMedia, wrapped.Kind) + + // Unclassifiable underlying → tokens (safe historical default) + wrapped = NewContextOverflowError(errors.New("opaque")) + assert.Equal(t, OverflowKindTokens, wrapped.Kind) + }) +} + +func TestFormatError_OverflowKinds(t *testing.T) { + t.Parallel() + + t.Run("wire overflow surfaces request-too-large message", func(t *testing.T) { + t.Parallel() + err := &StatusError{StatusCode: 413, Err: errors.New(`Payload Too Large`)} + msg := FormatError(err) + assert.Contains(t, msg, "too large") + assert.NotContains(t, msg, "/compact") + assert.NotContains(t, msg, "context window") + }) + + t.Run("media overflow surfaces image-too-large message", func(t *testing.T) { + t.Parallel() + err := errors.New(`image exceeds 5 MB maximum: 5316852 bytes > 5242880 bytes`) + msg := FormatError(err) + assert.Contains(t, msg, "image or file") + assert.NotContains(t, msg, "/compact") + }) + + t.Run("token overflow keeps the /compact hint", func(t *testing.T) { + t.Parallel() + err := errors.New(`prompt is too long: 200000 tokens > 128000 maximum`) + msg := FormatError(err) + assert.Contains(t, msg, "context window") + assert.Contains(t, msg, "/compact") + }) +} + func TestContextOverflowError(t *testing.T) { t.Parallel() diff --git a/pkg/runtime/event.go b/pkg/runtime/event.go index 6211c40ad..8acd5cc63 100644 --- a/pkg/runtime/event.go +++ b/pkg/runtime/event.go @@ -200,10 +200,18 @@ func AgentChoiceReasoning(agentName, sessionID, content string) Event { // ErrorCode constants classify errors so external consumers (boards, // dashboards) can react programmatically without parsing free-form messages. +// +// The three overflow codes ([ErrorCodeContextExceeded], +// [ErrorCodeRequestTooLarge], [ErrorCodeMediaTooLarge]) mirror +// [modelerrors.OverflowKind] and let clients render distinct, actionable +// messages for each shape (token-count overflow, wire-level body cap, +// media-size rejection) instead of one generic "context window exceeded". const ( ErrorCodeModelError = "model_error" ErrorCodeRateLimited = "rate_limited" - ErrorCodeContextExceeded = "context_exceeded" + ErrorCodeContextExceeded = "context_exceeded" // OverflowKindTokens + ErrorCodeRequestTooLarge = "request_too_large" // OverflowKindWire + ErrorCodeMediaTooLarge = "media_too_large" // OverflowKindMedia ErrorCodeToolFailed = "tool_failed" ErrorCodeHookBlocked = "hook_blocked" ErrorCodeLoopDetected = "loop_detected" diff --git a/pkg/runtime/loop_steps.go b/pkg/runtime/loop_steps.go index f14148512..7ab158a8b 100644 --- a/pkg/runtime/loop_steps.go +++ b/pkg/runtime/loop_steps.go @@ -193,8 +193,17 @@ func (r *LocalRuntime) handleStreamError( // structured error events. The classification mirrors [modelerrors] // but reduces the granularity to a small set of codes that external // consumers can act on. +// +// Overflow errors are split by [modelerrors.OverflowKind] so the client +// can render kind-specific messages (e.g. "request too large" vs "context +// window exceeded") instead of one generic string. func classifyErrorCode(err error) string { - if modelerrors.IsContextOverflowError(err) { + switch modelerrors.OverflowKindOf(err) { + case modelerrors.OverflowKindWire: + return ErrorCodeRequestTooLarge + case modelerrors.OverflowKindMedia: + return ErrorCodeMediaTooLarge + case modelerrors.OverflowKindTokens: return ErrorCodeContextExceeded } _, rateLimited, _ := modelerrors.ClassifyModelError(err)