diff --git a/pkg/runtime/loop_steps.go b/pkg/runtime/loop_steps.go index 7ab158a8b..740d50f27 100644 --- a/pkg/runtime/loop_steps.go +++ b/pkg/runtime/loop_steps.go @@ -161,22 +161,42 @@ func (r *LocalRuntime) handleStreamError( // request instead of surfacing raw errors. We allow at most // r.maxOverflowCompactions consecutive attempts to avoid an infinite // loop when compaction cannot reduce the context enough. + // + // Compaction only helps for token-count overflow ([OverflowKindTokens]): + // summarising older turns reduces the input token count. + // + // For wire-level overflow ([OverflowKindWire]) the request body itself + // is over the provider's cap; the latest turn alone is too large and + // would still have to be sent during compaction. For media overflow + // ([OverflowKindMedia]) we have no media-stripping path today, so a + // retry would resend the same oversized attachment. In both cases the + // recovery attempt always fails, so we skip it and surface the error + // directly — the user can act on it (smaller paste, smaller file) + // faster, without burning a second provider call. if _, ok := errors.AsType[*modelerrors.ContextOverflowError](err); ok && r.sessionCompaction && *overflowCompactions < r.maxOverflowCompactions { - *overflowCompactions++ - slog.WarnContext(ctx, "Context window overflow detected, attempting auto-compaction", + kind := modelerrors.OverflowKindOf(err) + if kind == modelerrors.OverflowKindTokens { + *overflowCompactions++ + slog.WarnContext(ctx, "Context window overflow detected, attempting auto-compaction", + "agent", a.Name(), + "session_id", sess.ID, + "input_tokens", sess.InputTokens, + "output_tokens", sess.OutputTokens, + "context_limit", contextLimit, + "attempt", *overflowCompactions, + ) + events.Emit(Warning( + "The conversation has exceeded the model's context window. Automatically compacting the conversation history...", + a.Name(), + )) + r.compactWithReason(ctx, sess, "", compactionReasonOverflow, events) + return streamErrorRetry + } + slog.InfoContext(ctx, "Skipping auto-compaction for non-token overflow", "agent", a.Name(), "session_id", sess.ID, - "input_tokens", sess.InputTokens, - "output_tokens", sess.OutputTokens, - "context_limit", contextLimit, - "attempt", *overflowCompactions, + "overflow_kind", string(kind), ) - events.Emit(Warning( - "The conversation has exceeded the model's context window. Automatically compacting the conversation history...", - a.Name(), - )) - r.compactWithReason(ctx, sess, "", compactionReasonOverflow, events) - return streamErrorRetry } streamSpan.RecordError(err) diff --git a/pkg/runtime/loop_steps_test.go b/pkg/runtime/loop_steps_test.go index 8c60a2c25..f2115acb6 100644 --- a/pkg/runtime/loop_steps_test.go +++ b/pkg/runtime/loop_steps_test.go @@ -292,3 +292,75 @@ func TestHandleStreamError_GenericError_FatalAndEmitsError(t *testing.T) { } assert.True(t, sawError, "generic error should emit ErrorEvent") } + +// TestHandleStreamError_WireOverflowSkipsCompaction verifies that wire-level +// overflow does not trigger auto-compaction. Compaction would resend the same +// oversized request that just got rejected, so it is guaranteed to fail; we +// surface the error directly instead. +func TestHandleStreamError_WireOverflowSkipsCompaction(t *testing.T) { + t.Parallel() + + rt, a := newTestRuntime(t) + sess := session.New() + events := make(chan Event, 16) + _, sp := noop.NewTracerProvider().Tracer("t").Start(t.Context(), "x") + + overflow := &modelerrors.ContextOverflowError{ + Underlying: errors.New("HTTP 413: Payload Too Large"), + Kind: modelerrors.OverflowKindWire, + } + overflowCount := 0 + + outcome := rt.handleStreamError(t.Context(), sess, a, overflow, 1000, &overflowCount, sp, NewChannelSink(events)) + + assert.Equal(t, streamErrorFatal, outcome, "wire overflow must not trigger compaction retry") + assert.Equal(t, 0, overflowCount, "wire overflow must not bump the compaction counter") + + got := drainEvents(events) + var sawError bool + var sawWarning bool + var errCode string + for _, ev := range got { + switch e := ev.(type) { + case *ErrorEvent: + sawError = true + errCode = e.Code + case *WarningEvent: + sawWarning = true + } + } + assert.True(t, sawError, "wire overflow should emit an ErrorEvent") + assert.False(t, sawWarning, "wire overflow should not emit the compaction warning") + assert.Equal(t, ErrorCodeRequestTooLarge, errCode, "ErrorEvent.Code should distinguish wire overflow") +} + +// TestHandleStreamError_MediaOverflowSkipsCompaction verifies the same skip +// behaviour for media-size rejections. Without media-stripping during +// compaction, the offending attachment would be resent and fail again. +func TestHandleStreamError_MediaOverflowSkipsCompaction(t *testing.T) { + t.Parallel() + + rt, a := newTestRuntime(t) + sess := session.New() + events := make(chan Event, 16) + _, sp := noop.NewTracerProvider().Tracer("t").Start(t.Context(), "x") + + overflow := &modelerrors.ContextOverflowError{ + Underlying: errors.New("image exceeds 5 MB maximum"), + Kind: modelerrors.OverflowKindMedia, + } + overflowCount := 0 + + outcome := rt.handleStreamError(t.Context(), sess, a, overflow, 1000, &overflowCount, sp, NewChannelSink(events)) + + assert.Equal(t, streamErrorFatal, outcome, "media overflow must not trigger compaction retry") + assert.Equal(t, 0, overflowCount, "media overflow must not bump the compaction counter") + + var errCode string + for _, ev := range drainEvents(events) { + if e, ok := ev.(*ErrorEvent); ok { + errCode = e.Code + } + } + assert.Equal(t, ErrorCodeMediaTooLarge, errCode, "ErrorEvent.Code should distinguish media overflow") +}