Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 40 additions & 6 deletions intercept/messages/base.go
Original file line number Diff line number Diff line change
Expand Up @@ -334,7 +334,8 @@ func (*interceptionBase) withAWSBedrockOptions(ctx context.Context, cfg *aibconf

// augmentRequestForBedrock will change the model used for the request since AWS Bedrock doesn't support
// Anthropics' model names. It also converts adaptive thinking to enabled with a budget for models that
// don't support adaptive thinking natively.
// don't support adaptive thinking natively, or enabled thinking to adaptive for models that only support
// adaptive (Opus 4.7+).
func (i *interceptionBase) augmentRequestForBedrock() {
if i.bedrockCfg == nil {
return
Expand All @@ -348,7 +349,21 @@ func (i *interceptionBase) augmentRequestForBedrock() {
}
i.reqPayload = updated

if !bedrockModelSupportsAdaptiveThinking(model) {
switch {
case bedrockModelRequiresAdaptiveThinking(model):
// Symmetric conversion for adaptive-only models (Opus 4.7+): rewrite
// thinking.type "enabled" with budget_tokens to the "adaptive" shape,
// since Bedrock returns 400 for these models when the legacy shape is
// used. Claude Code falls back to the legacy shape when it cannot
// read the upstream model's capability metadata (which is the case
// when AI Bridge is in the path).
updated, err = i.reqPayload.convertEnabledThinkingForBedrock()
if err != nil {
i.logger.Warn(context.Background(), "failed to convert enabled thinking for Bedrock", slog.Error(err))
return
}
i.reqPayload = updated
case !bedrockModelSupportsAdaptiveThinking(model):
updated, err = i.reqPayload.convertAdaptiveThinkingForBedrock()
if err != nil {
i.logger.Warn(context.Background(), "failed to convert adaptive thinking for Bedrock", slog.Error(err))
Expand All @@ -363,8 +378,15 @@ func (i *interceptionBase) augmentRequestForBedrock() {
filterBedrockBetaFlags(i.clientHeaders, model)
}

// Strip body fields that Bedrock does not accept.
updated, err = i.reqPayload.removeUnsupportedBedrockFields(i.clientHeaders)
// Strip body fields that Bedrock does not accept. Adaptive-only models
// (Opus 4.7+) support output_config natively without a beta flag, so
// keep it for those models even when the effort-2025-11-24 flag is
// absent from the request.
var exemptFields []string
if bedrockModelRequiresAdaptiveThinking(model) {
exemptFields = append(exemptFields, messagesReqPathOutputConfig)
}
updated, err = i.reqPayload.removeUnsupportedBedrockFields(i.clientHeaders, exemptFields...)
if err != nil {
i.logger.Warn(context.Background(), "failed to remove unsupported fields for Bedrock", slog.Error(err))
return
Expand All @@ -373,11 +395,23 @@ func (i *interceptionBase) augmentRequestForBedrock() {
}

// bedrockModelSupportsAdaptiveThinking returns true if the given Bedrock model ID
// supports the "adaptive" thinking type natively (i.e. Claude 4.6 models).
// supports the "adaptive" thinking type natively (i.e. Claude 4.6 models, and
// adaptive-only models such as Opus 4.7+).
// See https://docs.aws.amazon.com/bedrock/latest/userguide/claude-messages-adaptive-thinking.html
func bedrockModelSupportsAdaptiveThinking(model string) bool {
return strings.Contains(model, "anthropic.claude-opus-4-6") ||
strings.Contains(model, "anthropic.claude-sonnet-4-6")
strings.Contains(model, "anthropic.claude-sonnet-4-6") ||
bedrockModelRequiresAdaptiveThinking(model)
}

// bedrockModelRequiresAdaptiveThinking returns true if the given Bedrock model
// ID *only* supports the "adaptive" thinking type and rejects the legacy
// "enabled" + budget_tokens shape with a 400. Claude Opus 4.7 was the first
// model in this category.
//
// See https://docs.aws.amazon.com/bedrock/latest/userguide/model-card-anthropic-claude-opus-4-7.html
func bedrockModelRequiresAdaptiveThinking(model string) bool {
return strings.Contains(model, "anthropic.claude-opus-4-7")
}

// filterBedrockBetaFlags removes unsupported beta flags from the Anthropic-Beta
Expand Down
65 changes: 64 additions & 1 deletion intercept/messages/base_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -691,7 +691,8 @@ func TestAugmentRequestForBedrock_AdaptiveThinking(t *testing.T) {
clientBetaFlags string

expectThinkingType string
expectBudgetTokens int64 // 0 means budget_tokens should not be present
expectBudgetTokens int64 // 0 means budget_tokens should not be present
expectEffort string // expected output_config.effort; "" means must not be present
expectRemovedFields []string
expectKeptFields []string
expectBetaValues []string // expected separate Anthropic-Beta header values
Expand Down Expand Up @@ -746,6 +747,7 @@ func TestAugmentRequestForBedrock_AdaptiveThinking(t *testing.T) {
bedrockModel: "anthropic.claude-opus-4-5-20250929-v1:0",
clientBetaFlags: "effort-2025-11-24,interleaved-thinking-2025-05-14",
requestBody: `{"max_tokens":10000,"output_config":{"effort":"high"}}`,
expectEffort: "high",
expectKeptFields: []string{"output_config"},
expectBetaValues: []string{"effort-2025-11-24", "interleaved-thinking-2025-05-14"},
},
Expand Down Expand Up @@ -793,6 +795,60 @@ func TestAugmentRequestForBedrock_AdaptiveThinking(t *testing.T) {
requestBody: `{"max_tokens":10000,"output_config":{"effort":"high"},"metadata":{"user_id":"u123"},"service_tier":"auto","container":"ctr_abc","inference_geo":"us","context_management":{"type":"auto"}}`,
expectRemovedFields: []string{"output_config", "metadata", "service_tier", "container", "inference_geo", "context_management"},
},

// Adaptive-only models (Opus 4.7+) — see issue #280.
// Symmetric counterpart of the adaptive -> enabled conversion in #225.
{
name: "opus_4_7_model_with_enabled_thinking_is_converted_to_adaptive",
bedrockModel: "us.anthropic.claude-opus-4-7",
requestBody: `{"max_tokens":10000,"thinking":{"type":"enabled","budget_tokens":5000}}`,
expectThinkingType: "adaptive",
expectEffort: "medium", // 5000/10000 = 0.5 -> medium
expectKeptFields: []string{"output_config"},
},
{
name: "opus_4_7_model_with_enabled_thinking_low_budget_is_converted_with_low_effort",
bedrockModel: "us.anthropic.claude-opus-4-7",
requestBody: `{"max_tokens":10000,"thinking":{"type":"enabled","budget_tokens":2000}}`,
expectThinkingType: "adaptive",
expectEffort: "low",
expectKeptFields: []string{"output_config"},
},
{
name: "opus_4_7_model_with_adaptive_thinking_is_unchanged",
bedrockModel: "us.anthropic.claude-opus-4-7",
requestBody: `{"max_tokens":10000,"thinking":{"type":"adaptive"}}`,
expectThinkingType: "adaptive",
},
{
name: "opus_4_7_model_without_thinking_field_is_unchanged",
bedrockModel: "us.anthropic.claude-opus-4-7",
requestBody: `{"max_tokens":10000}`,
},
{
name: "opus_4_7_model_preserves_explicit_output_config_effort",
bedrockModel: "us.anthropic.claude-opus-4-7",
requestBody: `{"max_tokens":10000,"thinking":{"type":"enabled","budget_tokens":2000},"output_config":{"effort":"max"}}`,
expectThinkingType: "adaptive",
expectEffort: "max", // ratio would say low, but explicit max wins
expectKeptFields: []string{"output_config"},
},
{
name: "opus_4_7_model_keeps_output_config_without_effort_beta_flag",
bedrockModel: "us.anthropic.claude-opus-4-7",
requestBody: `{"max_tokens":10000,"thinking":{"type":"adaptive"},"output_config":{"effort":"high"}}`,
expectThinkingType: "adaptive",
expectEffort: "high",
expectKeptFields: []string{"output_config"},
},
{
name: "arn_style_opus_4_7_application_inference_profile_is_treated_as_adaptive_only",
bedrockModel: "arn:aws:bedrock:us-east-1:123:application-inference-profile/global.anthropic.claude-opus-4-7",
requestBody: `{"max_tokens":10000,"thinking":{"type":"enabled","budget_tokens":8000}}`,
expectThinkingType: "adaptive",
expectEffort: "high", // 8000/10000 = 0.8 -> high
expectKeptFields: []string{"output_config"},
},
}

for _, tc := range tests {
Expand Down Expand Up @@ -845,6 +901,13 @@ func TestAugmentRequestForBedrock_AdaptiveThinking(t *testing.T) {
require.True(t, gjson.GetBytes(i.reqPayload, field).Exists(), "%s should be kept", field)
}

effort := gjson.GetBytes(i.reqPayload, "output_config.effort")
if tc.expectEffort == "" {
require.False(t, effort.Exists(), "output_config.effort should not be set")
} else {
require.Equal(t, tc.expectEffort, effort.String())
}

got := clientHeaders.Values("Anthropic-Beta")
require.Equal(t, tc.expectBetaValues, got)
})
Expand Down
68 changes: 67 additions & 1 deletion intercept/messages/reqpayload.go
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,9 @@ var (
// If the beta flag is present in the (already-filtered) Anthropic-Beta header,
// the field is kept; otherwise it is stripped. Model-specific beta flags must
// be removed from the header before this check (see filterBedrockBetaFlags).
// Adaptive-only models (Opus 4.7+) are exempt for output_config since they
// support it natively without a beta flag, see
// bedrockModelRequiresAdaptiveThinking.
bedrockBetaGatedFields = map[string]string{
// output_config requires the effort beta (Opus 4.5 only).
messagesReqPathOutputConfig: "effort-2025-11-24",
Expand Down Expand Up @@ -372,12 +375,72 @@ func (p RequestPayload) convertAdaptiveThinkingForBedrock() (RequestPayload, err
})
}

// convertEnabledThinkingForBedrock converts thinking.type "enabled" with a
// budget_tokens budget to the "adaptive" thinking type plus an output_config.effort
// derived from the budget_tokens / max_tokens ratio. The conversion is needed for
// Bedrock models that only support the "adaptive" thinking.type (Opus 4.7+).
//
// See https://docs.aws.amazon.com/bedrock/latest/userguide/model-card-anthropic-claude-opus-4-7.html
// and https://docs.aws.amazon.com/bedrock/latest/userguide/claude-messages-adaptive-thinking.html
//
// This is the symmetric counterpart to convertAdaptiveThinkingForBedrock; the
// ratio thresholds are the midpoints between the forward mapping's anchor
// ratios (low=0.2, medium=0.5, high=0.8, max=0.95), so a payload that
// round-trips through both conversions lands on the same effort level it
// started with.
//
// An explicit output_config.effort already present in the request is preserved.
func (p RequestPayload) convertEnabledThinkingForBedrock() (RequestPayload, error) {
thinkingType := gjson.GetBytes(p, messagesReqPathThinkingType)
if thinkingType.String() != constEnabled {
return p, nil
}

// Derive effort from budget_tokens / max_tokens. If either is missing or
// unusable, fall back to "high" so the resulting request matches the
// default effort assumption in convertAdaptiveThinkingForBedrock.
derivedEffort := "high"
budgetTokens := gjson.GetBytes(p, messagesReqPathThinkingBudgetTokens).Int()
maxTokens := gjson.GetBytes(p, messagesReqPathMaxTokens).Int()
if budgetTokens > 0 && maxTokens > 0 {
ratio := float64(budgetTokens) / float64(maxTokens)
switch {
case ratio < 0.35: // midpoint of low (0.2) and medium (0.5)
derivedEffort = "low"
case ratio < 0.65: // midpoint of medium (0.5) and high (0.8)
derivedEffort = "medium"
case ratio < 0.875: // midpoint of high (0.8) and max (0.95)
derivedEffort = "high"
default:
derivedEffort = "max"
}
}

updated, err := p.set(messagesReqPathThinking, map[string]string{"type": constAdaptive})
if err != nil {
return p, xerrors.Errorf("set thinking: %w", err)
}

// Preserve an explicit output_config.effort if the caller set one. Only
// inject the derived value when the field is absent, so we don't override
// intent.
if gjson.GetBytes(updated, messagesReqPathOutputConfigEffort).String() != "" {
return updated, nil
}
return updated.set(messagesReqPathOutputConfigEffort, derivedEffort)
}

// removeUnsupportedBedrockFields strips top-level fields that Bedrock does not
// support from the payload. Fields that are gated behind a beta flag are only
// removed when the corresponding flag is absent from the Anthropic-Beta header.
// Model-specific beta flags must already be filtered from the header before
// calling this method (see filterBedrockBetaFlags).
func (p RequestPayload) removeUnsupportedBedrockFields(headers http.Header) (RequestPayload, error) {
//
// Fields exempted by exemptFields are always kept regardless of beta flag
// state. Adaptive-only Bedrock models (Opus 4.7+) require output_config
// without a beta flag, so callers pass the field through this set to bypass
// the effort-2025-11-24 gate.
func (p RequestPayload) removeUnsupportedBedrockFields(headers http.Header, exemptFields ...string) (RequestPayload, error) {
var payloadMap map[string]any
if err := json.Unmarshal(p, &payloadMap); err != nil {
return p, xerrors.Errorf("failed to unmarshal request payload when removing unsupported Bedrock fields: %w", err)
Expand All @@ -391,6 +454,9 @@ func (p RequestPayload) removeUnsupportedBedrockFields(headers http.Header) (Req
// Strip beta-gated fields only when their beta flag is missing.
betaValues := headers.Values("Anthropic-Beta")
for field, requiredFlag := range bedrockBetaGatedFields {
if slices.Contains(exemptFields, field) {
continue
}
if !slices.Contains(betaValues, requiredFlag) {
delete(payloadMap, field)
}
Expand Down
103 changes: 103 additions & 0 deletions intercept/messages/reqpayload_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -361,6 +361,109 @@ func TestRequestPayloadConvertAdaptiveThinkingForBedrock(t *testing.T) {
}
}

func TestRequestPayloadConvertEnabledThinkingForBedrock(t *testing.T) {
t.Parallel()

testCases := []struct {
name string

requestBody string

expectedThinkingType string
expectedEffort string
}{
{
name: "no_thinking_field_is_no_op",
requestBody: `{"model":"claude-opus-4-7","max_tokens":10000,"messages":[]}`,
},
{
name: "adaptive_thinking_is_no_op",
requestBody: `{"model":"claude-opus-4-7","max_tokens":10000,"thinking":{"type":"adaptive"},"messages":[]}`,
expectedThinkingType: "adaptive",
},
{
name: "disabled_thinking_is_no_op",
requestBody: `{"model":"claude-opus-4-7","max_tokens":10000,"thinking":{"type":"disabled"},"messages":[]}`,
expectedThinkingType: "disabled",
},
{
name: "enabled_with_low_ratio_maps_to_low_effort",
requestBody: `{"model":"claude-opus-4-7","max_tokens":10000,"thinking":{"type":"enabled","budget_tokens":2000},"messages":[]}`,
expectedThinkingType: "adaptive",
expectedEffort: "low",
},
{
name: "enabled_with_medium_ratio_maps_to_medium_effort",
requestBody: `{"model":"claude-opus-4-7","max_tokens":10000,"thinking":{"type":"enabled","budget_tokens":5000},"messages":[]}`,
expectedThinkingType: "adaptive",
expectedEffort: "medium",
},
{
name: "enabled_with_high_ratio_maps_to_high_effort",
requestBody: `{"model":"claude-opus-4-7","max_tokens":10000,"thinking":{"type":"enabled","budget_tokens":8000},"messages":[]}`,
expectedThinkingType: "adaptive",
expectedEffort: "high",
},
{
name: "enabled_with_max_ratio_maps_to_max_effort",
requestBody: `{"model":"claude-opus-4-7","max_tokens":10000,"thinking":{"type":"enabled","budget_tokens":9500},"messages":[]}`,
expectedThinkingType: "adaptive",
expectedEffort: "max",
},
{
name: "enabled_at_low_medium_boundary_maps_to_medium",
requestBody: `{"model":"claude-opus-4-7","max_tokens":10000,"thinking":{"type":"enabled","budget_tokens":3500},"messages":[]}`,
expectedThinkingType: "adaptive",
expectedEffort: "medium", // 0.35 boundary: lands in medium
},
{
name: "enabled_without_budget_falls_back_to_high_effort",
requestBody: `{"model":"claude-opus-4-7","max_tokens":10000,"thinking":{"type":"enabled"},"messages":[]}`,
expectedThinkingType: "adaptive",
expectedEffort: "high",
},
{
name: "enabled_without_max_tokens_falls_back_to_high_effort",
requestBody: `{"model":"claude-opus-4-7","thinking":{"type":"enabled","budget_tokens":5000},"messages":[]}`,
expectedThinkingType: "adaptive",
expectedEffort: "high",
},
{
name: "enabled_preserves_explicit_effort",
requestBody: `{"model":"claude-opus-4-7","max_tokens":10000,"thinking":{"type":"enabled","budget_tokens":2000},"output_config":{"effort":"max"},"messages":[]}`,
expectedThinkingType: "adaptive",
expectedEffort: "max", // ratio would say "low", but explicit "max" wins
},
}

for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
t.Parallel()

payload := mustMessagesPayload(t, tc.requestBody)
updatedPayload, err := payload.convertEnabledThinkingForBedrock()
require.NoError(t, err)

thinking := gjson.GetBytes(updatedPayload, messagesReqPathThinking)
require.NotEqual(t, tc.expectedThinkingType == "", thinking.Exists(), "thinking should not be set")
require.Equal(t, tc.expectedThinkingType, gjson.GetBytes(updatedPayload, messagesReqPathThinkingType).String())

// budget_tokens must always be absent after a successful conversion.
budgetTokens := gjson.GetBytes(updatedPayload, messagesReqPathThinkingBudgetTokens)
if tc.expectedThinkingType == "adaptive" {
require.False(t, budgetTokens.Exists(), "budget_tokens should be removed after conversion")
}

effort := gjson.GetBytes(updatedPayload, messagesReqPathOutputConfigEffort)
if tc.expectedEffort == "" {
// Effort is only set when we converted an "enabled" payload.
return
}
require.Equal(t, tc.expectedEffort, effort.String())
})
}
}

func TestRequestPayloadDisableParallelToolCalls(t *testing.T) {
t.Parallel()

Expand Down
Loading