From dffb7bbcb2f1bd3faaea688686784ba0b7df0c69 Mon Sep 17 00:00:00 2001 From: Trung Nguyen Date: Tue, 19 May 2026 10:53:20 +0200 Subject: [PATCH] refactor(runtime): skip auto-compaction for non-token overflow MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Auto-compaction is only useful when the rejection is a token-count overflow — summarising older turns reduces the input token count. For wire-level overflow ([OverflowKindWire]) the request body itself exceeds the provider's cap, and the latest turn alone is over the limit; the compaction call would have to send the same oversized history and would also be rejected. For media overflow ([OverflowKindMedia]) we have no media-stripping during compaction today, so a retry would resend the same attachment and fail again. In both cases the recovery attempt always fails, then we surface the error anyway, while having spent an extra provider call and several seconds of wall-clock latency. This change skips compaction for those two kinds and surfaces the error directly. The token-overflow path is unchanged. --- pkg/runtime/loop_steps.go | 44 +++++++++++++++------ pkg/runtime/loop_steps_test.go | 72 ++++++++++++++++++++++++++++++++++ 2 files changed, 104 insertions(+), 12 deletions(-) diff --git a/pkg/runtime/loop_steps.go b/pkg/runtime/loop_steps.go index 7ab158a8b..740d50f27 100644 --- a/pkg/runtime/loop_steps.go +++ b/pkg/runtime/loop_steps.go @@ -161,22 +161,42 @@ func (r *LocalRuntime) handleStreamError( // request instead of surfacing raw errors. We allow at most // r.maxOverflowCompactions consecutive attempts to avoid an infinite // loop when compaction cannot reduce the context enough. + // + // Compaction only helps for token-count overflow ([OverflowKindTokens]): + // summarising older turns reduces the input token count. + // + // For wire-level overflow ([OverflowKindWire]) the request body itself + // is over the provider's cap; the latest turn alone is too large and + // would still have to be sent during compaction. For media overflow + // ([OverflowKindMedia]) we have no media-stripping path today, so a + // retry would resend the same oversized attachment. In both cases the + // recovery attempt always fails, so we skip it and surface the error + // directly — the user can act on it (smaller paste, smaller file) + // faster, without burning a second provider call. if _, ok := errors.AsType[*modelerrors.ContextOverflowError](err); ok && r.sessionCompaction && *overflowCompactions < r.maxOverflowCompactions { - *overflowCompactions++ - slog.WarnContext(ctx, "Context window overflow detected, attempting auto-compaction", + kind := modelerrors.OverflowKindOf(err) + if kind == modelerrors.OverflowKindTokens { + *overflowCompactions++ + slog.WarnContext(ctx, "Context window overflow detected, attempting auto-compaction", + "agent", a.Name(), + "session_id", sess.ID, + "input_tokens", sess.InputTokens, + "output_tokens", sess.OutputTokens, + "context_limit", contextLimit, + "attempt", *overflowCompactions, + ) + events.Emit(Warning( + "The conversation has exceeded the model's context window. Automatically compacting the conversation history...", + a.Name(), + )) + r.compactWithReason(ctx, sess, "", compactionReasonOverflow, events) + return streamErrorRetry + } + slog.InfoContext(ctx, "Skipping auto-compaction for non-token overflow", "agent", a.Name(), "session_id", sess.ID, - "input_tokens", sess.InputTokens, - "output_tokens", sess.OutputTokens, - "context_limit", contextLimit, - "attempt", *overflowCompactions, + "overflow_kind", string(kind), ) - events.Emit(Warning( - "The conversation has exceeded the model's context window. Automatically compacting the conversation history...", - a.Name(), - )) - r.compactWithReason(ctx, sess, "", compactionReasonOverflow, events) - return streamErrorRetry } streamSpan.RecordError(err) diff --git a/pkg/runtime/loop_steps_test.go b/pkg/runtime/loop_steps_test.go index 8c60a2c25..f2115acb6 100644 --- a/pkg/runtime/loop_steps_test.go +++ b/pkg/runtime/loop_steps_test.go @@ -292,3 +292,75 @@ func TestHandleStreamError_GenericError_FatalAndEmitsError(t *testing.T) { } assert.True(t, sawError, "generic error should emit ErrorEvent") } + +// TestHandleStreamError_WireOverflowSkipsCompaction verifies that wire-level +// overflow does not trigger auto-compaction. Compaction would resend the same +// oversized request that just got rejected, so it is guaranteed to fail; we +// surface the error directly instead. +func TestHandleStreamError_WireOverflowSkipsCompaction(t *testing.T) { + t.Parallel() + + rt, a := newTestRuntime(t) + sess := session.New() + events := make(chan Event, 16) + _, sp := noop.NewTracerProvider().Tracer("t").Start(t.Context(), "x") + + overflow := &modelerrors.ContextOverflowError{ + Underlying: errors.New("HTTP 413: Payload Too Large"), + Kind: modelerrors.OverflowKindWire, + } + overflowCount := 0 + + outcome := rt.handleStreamError(t.Context(), sess, a, overflow, 1000, &overflowCount, sp, NewChannelSink(events)) + + assert.Equal(t, streamErrorFatal, outcome, "wire overflow must not trigger compaction retry") + assert.Equal(t, 0, overflowCount, "wire overflow must not bump the compaction counter") + + got := drainEvents(events) + var sawError bool + var sawWarning bool + var errCode string + for _, ev := range got { + switch e := ev.(type) { + case *ErrorEvent: + sawError = true + errCode = e.Code + case *WarningEvent: + sawWarning = true + } + } + assert.True(t, sawError, "wire overflow should emit an ErrorEvent") + assert.False(t, sawWarning, "wire overflow should not emit the compaction warning") + assert.Equal(t, ErrorCodeRequestTooLarge, errCode, "ErrorEvent.Code should distinguish wire overflow") +} + +// TestHandleStreamError_MediaOverflowSkipsCompaction verifies the same skip +// behaviour for media-size rejections. Without media-stripping during +// compaction, the offending attachment would be resent and fail again. +func TestHandleStreamError_MediaOverflowSkipsCompaction(t *testing.T) { + t.Parallel() + + rt, a := newTestRuntime(t) + sess := session.New() + events := make(chan Event, 16) + _, sp := noop.NewTracerProvider().Tracer("t").Start(t.Context(), "x") + + overflow := &modelerrors.ContextOverflowError{ + Underlying: errors.New("image exceeds 5 MB maximum"), + Kind: modelerrors.OverflowKindMedia, + } + overflowCount := 0 + + outcome := rt.handleStreamError(t.Context(), sess, a, overflow, 1000, &overflowCount, sp, NewChannelSink(events)) + + assert.Equal(t, streamErrorFatal, outcome, "media overflow must not trigger compaction retry") + assert.Equal(t, 0, overflowCount, "media overflow must not bump the compaction counter") + + var errCode string + for _, ev := range drainEvents(events) { + if e, ok := ev.(*ErrorEvent); ok { + errCode = e.Code + } + } + assert.Equal(t, ErrorCodeMediaTooLarge, errCode, "ErrorEvent.Code should distinguish media overflow") +}