Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 32 additions & 12 deletions pkg/runtime/loop_steps.go
Original file line number Diff line number Diff line change
Expand Up @@ -161,22 +161,42 @@ func (r *LocalRuntime) handleStreamError(
// request instead of surfacing raw errors. We allow at most
// r.maxOverflowCompactions consecutive attempts to avoid an infinite
// loop when compaction cannot reduce the context enough.
//
// Compaction only helps for token-count overflow ([OverflowKindTokens]):
// summarising older turns reduces the input token count.
//
// For wire-level overflow ([OverflowKindWire]) the request body itself
// is over the provider's cap; the latest turn alone is too large and
// would still have to be sent during compaction. For media overflow
// ([OverflowKindMedia]) we have no media-stripping path today, so a
// retry would resend the same oversized attachment. In both cases the
// recovery attempt always fails, so we skip it and surface the error
// directly — the user can act on it (smaller paste, smaller file)
// faster, without burning a second provider call.
if _, ok := errors.AsType[*modelerrors.ContextOverflowError](err); ok && r.sessionCompaction && *overflowCompactions < r.maxOverflowCompactions {
*overflowCompactions++
slog.WarnContext(ctx, "Context window overflow detected, attempting auto-compaction",
kind := modelerrors.OverflowKindOf(err)
if kind == modelerrors.OverflowKindTokens {
*overflowCompactions++
slog.WarnContext(ctx, "Context window overflow detected, attempting auto-compaction",
"agent", a.Name(),
"session_id", sess.ID,
"input_tokens", sess.InputTokens,
"output_tokens", sess.OutputTokens,
"context_limit", contextLimit,
"attempt", *overflowCompactions,
)
events.Emit(Warning(
"The conversation has exceeded the model's context window. Automatically compacting the conversation history...",
a.Name(),
))
r.compactWithReason(ctx, sess, "", compactionReasonOverflow, events)
return streamErrorRetry
}
slog.InfoContext(ctx, "Skipping auto-compaction for non-token overflow",
"agent", a.Name(),
"session_id", sess.ID,
"input_tokens", sess.InputTokens,
"output_tokens", sess.OutputTokens,
"context_limit", contextLimit,
"attempt", *overflowCompactions,
"overflow_kind", string(kind),
)
events.Emit(Warning(
"The conversation has exceeded the model's context window. Automatically compacting the conversation history...",
a.Name(),
))
r.compactWithReason(ctx, sess, "", compactionReasonOverflow, events)
return streamErrorRetry
}

streamSpan.RecordError(err)
Expand Down
72 changes: 72 additions & 0 deletions pkg/runtime/loop_steps_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -292,3 +292,75 @@ func TestHandleStreamError_GenericError_FatalAndEmitsError(t *testing.T) {
}
assert.True(t, sawError, "generic error should emit ErrorEvent")
}

// TestHandleStreamError_WireOverflowSkipsCompaction verifies that wire-level
// overflow does not trigger auto-compaction. Compaction would resend the same
// oversized request that just got rejected, so it is guaranteed to fail; we
// surface the error directly instead.
func TestHandleStreamError_WireOverflowSkipsCompaction(t *testing.T) {
t.Parallel()

rt, a := newTestRuntime(t)
sess := session.New()
events := make(chan Event, 16)
_, sp := noop.NewTracerProvider().Tracer("t").Start(t.Context(), "x")

overflow := &modelerrors.ContextOverflowError{
Underlying: errors.New("HTTP 413: Payload Too Large"),
Kind: modelerrors.OverflowKindWire,
}
overflowCount := 0

outcome := rt.handleStreamError(t.Context(), sess, a, overflow, 1000, &overflowCount, sp, NewChannelSink(events))

assert.Equal(t, streamErrorFatal, outcome, "wire overflow must not trigger compaction retry")
assert.Equal(t, 0, overflowCount, "wire overflow must not bump the compaction counter")

got := drainEvents(events)
var sawError bool
var sawWarning bool
var errCode string
for _, ev := range got {
switch e := ev.(type) {
case *ErrorEvent:
sawError = true
errCode = e.Code
case *WarningEvent:
sawWarning = true
}
}
assert.True(t, sawError, "wire overflow should emit an ErrorEvent")
assert.False(t, sawWarning, "wire overflow should not emit the compaction warning")
assert.Equal(t, ErrorCodeRequestTooLarge, errCode, "ErrorEvent.Code should distinguish wire overflow")
}

// TestHandleStreamError_MediaOverflowSkipsCompaction verifies the same skip
// behaviour for media-size rejections. Without media-stripping during
// compaction, the offending attachment would be resent and fail again.
func TestHandleStreamError_MediaOverflowSkipsCompaction(t *testing.T) {
t.Parallel()

rt, a := newTestRuntime(t)
sess := session.New()
events := make(chan Event, 16)
_, sp := noop.NewTracerProvider().Tracer("t").Start(t.Context(), "x")

overflow := &modelerrors.ContextOverflowError{
Underlying: errors.New("image exceeds 5 MB maximum"),
Kind: modelerrors.OverflowKindMedia,
}
overflowCount := 0

outcome := rt.handleStreamError(t.Context(), sess, a, overflow, 1000, &overflowCount, sp, NewChannelSink(events))

assert.Equal(t, streamErrorFatal, outcome, "media overflow must not trigger compaction retry")
assert.Equal(t, 0, overflowCount, "media overflow must not bump the compaction counter")

var errCode string
for _, ev := range drainEvents(events) {
if e, ok := ev.(*ErrorEvent); ok {
errCode = e.Code
}
}
assert.Equal(t, ErrorCodeMediaTooLarge, errCode, "ErrorEvent.Code should distinguish media overflow")
}
Loading