From d12b78076aefb25f45c83d577a7445d2c789e8bb Mon Sep 17 00:00:00 2001 From: Danny Kopping Date: Thu, 14 May 2026 12:34:59 +0000 Subject: [PATCH] fix(intercept/messages): convert enabled thinking to adaptive for Bedrock Opus 4.7+ Claude Opus 4.7 (and future adaptive-only Bedrock models) reject the legacy thinking.type "enabled" + budget_tokens shape with a 400. Claude Code falls back to that shape when it cannot read the upstream model's capability metadata, which is exactly the case when AI Bridge sits between the client and Bedrock. This is the symmetric counterpart to the adaptive -> enabled conversion added in #225 for older Bedrock models. The new conversion is gated on a bedrockModelRequiresAdaptiveThinking helper that matches Opus 4.7 model IDs (and ARN-style application inference profile names). The effort level is derived from the original budget_tokens / max_tokens ratio using the midpoints of the forward mapping's anchor ratios, so a payload that round-trips through both conversions lands on the same effort level it started with. An explicit output_config.effort already present in the request is preserved. Adaptive-only models support output_config natively (no beta flag), so the field-strip pass is updated to exempt output_config for those models. Fixes #280 --- intercept/messages/base.go | 46 ++++++++++-- intercept/messages/base_test.go | 65 +++++++++++++++- intercept/messages/reqpayload.go | 68 ++++++++++++++++- intercept/messages/reqpayload_test.go | 103 ++++++++++++++++++++++++++ 4 files changed, 274 insertions(+), 8 deletions(-) diff --git a/intercept/messages/base.go b/intercept/messages/base.go index 7fd190df..52852220 100644 --- a/intercept/messages/base.go +++ b/intercept/messages/base.go @@ -334,7 +334,8 @@ func (*interceptionBase) withAWSBedrockOptions(ctx context.Context, cfg *aibconf // augmentRequestForBedrock will change the model used for the request since AWS Bedrock doesn't support // Anthropics' model names. It also converts adaptive thinking to enabled with a budget for models that -// don't support adaptive thinking natively. +// don't support adaptive thinking natively, or enabled thinking to adaptive for models that only support +// adaptive (Opus 4.7+). func (i *interceptionBase) augmentRequestForBedrock() { if i.bedrockCfg == nil { return @@ -348,7 +349,21 @@ func (i *interceptionBase) augmentRequestForBedrock() { } i.reqPayload = updated - if !bedrockModelSupportsAdaptiveThinking(model) { + switch { + case bedrockModelRequiresAdaptiveThinking(model): + // Symmetric conversion for adaptive-only models (Opus 4.7+): rewrite + // thinking.type "enabled" with budget_tokens to the "adaptive" shape, + // since Bedrock returns 400 for these models when the legacy shape is + // used. Claude Code falls back to the legacy shape when it cannot + // read the upstream model's capability metadata (which is the case + // when AI Bridge is in the path). + updated, err = i.reqPayload.convertEnabledThinkingForBedrock() + if err != nil { + i.logger.Warn(context.Background(), "failed to convert enabled thinking for Bedrock", slog.Error(err)) + return + } + i.reqPayload = updated + case !bedrockModelSupportsAdaptiveThinking(model): updated, err = i.reqPayload.convertAdaptiveThinkingForBedrock() if err != nil { i.logger.Warn(context.Background(), "failed to convert adaptive thinking for Bedrock", slog.Error(err)) @@ -363,8 +378,15 @@ func (i *interceptionBase) augmentRequestForBedrock() { filterBedrockBetaFlags(i.clientHeaders, model) } - // Strip body fields that Bedrock does not accept. - updated, err = i.reqPayload.removeUnsupportedBedrockFields(i.clientHeaders) + // Strip body fields that Bedrock does not accept. Adaptive-only models + // (Opus 4.7+) support output_config natively without a beta flag, so + // keep it for those models even when the effort-2025-11-24 flag is + // absent from the request. + var exemptFields []string + if bedrockModelRequiresAdaptiveThinking(model) { + exemptFields = append(exemptFields, messagesReqPathOutputConfig) + } + updated, err = i.reqPayload.removeUnsupportedBedrockFields(i.clientHeaders, exemptFields...) if err != nil { i.logger.Warn(context.Background(), "failed to remove unsupported fields for Bedrock", slog.Error(err)) return @@ -373,11 +395,23 @@ func (i *interceptionBase) augmentRequestForBedrock() { } // bedrockModelSupportsAdaptiveThinking returns true if the given Bedrock model ID -// supports the "adaptive" thinking type natively (i.e. Claude 4.6 models). +// supports the "adaptive" thinking type natively (i.e. Claude 4.6 models, and +// adaptive-only models such as Opus 4.7+). // See https://docs.aws.amazon.com/bedrock/latest/userguide/claude-messages-adaptive-thinking.html func bedrockModelSupportsAdaptiveThinking(model string) bool { return strings.Contains(model, "anthropic.claude-opus-4-6") || - strings.Contains(model, "anthropic.claude-sonnet-4-6") + strings.Contains(model, "anthropic.claude-sonnet-4-6") || + bedrockModelRequiresAdaptiveThinking(model) +} + +// bedrockModelRequiresAdaptiveThinking returns true if the given Bedrock model +// ID *only* supports the "adaptive" thinking type and rejects the legacy +// "enabled" + budget_tokens shape with a 400. Claude Opus 4.7 was the first +// model in this category. +// +// See https://docs.aws.amazon.com/bedrock/latest/userguide/model-card-anthropic-claude-opus-4-7.html +func bedrockModelRequiresAdaptiveThinking(model string) bool { + return strings.Contains(model, "anthropic.claude-opus-4-7") } // filterBedrockBetaFlags removes unsupported beta flags from the Anthropic-Beta diff --git a/intercept/messages/base_test.go b/intercept/messages/base_test.go index 2c78adc0..a33c0058 100644 --- a/intercept/messages/base_test.go +++ b/intercept/messages/base_test.go @@ -691,7 +691,8 @@ func TestAugmentRequestForBedrock_AdaptiveThinking(t *testing.T) { clientBetaFlags string expectThinkingType string - expectBudgetTokens int64 // 0 means budget_tokens should not be present + expectBudgetTokens int64 // 0 means budget_tokens should not be present + expectEffort string // expected output_config.effort; "" means must not be present expectRemovedFields []string expectKeptFields []string expectBetaValues []string // expected separate Anthropic-Beta header values @@ -746,6 +747,7 @@ func TestAugmentRequestForBedrock_AdaptiveThinking(t *testing.T) { bedrockModel: "anthropic.claude-opus-4-5-20250929-v1:0", clientBetaFlags: "effort-2025-11-24,interleaved-thinking-2025-05-14", requestBody: `{"max_tokens":10000,"output_config":{"effort":"high"}}`, + expectEffort: "high", expectKeptFields: []string{"output_config"}, expectBetaValues: []string{"effort-2025-11-24", "interleaved-thinking-2025-05-14"}, }, @@ -793,6 +795,60 @@ func TestAugmentRequestForBedrock_AdaptiveThinking(t *testing.T) { requestBody: `{"max_tokens":10000,"output_config":{"effort":"high"},"metadata":{"user_id":"u123"},"service_tier":"auto","container":"ctr_abc","inference_geo":"us","context_management":{"type":"auto"}}`, expectRemovedFields: []string{"output_config", "metadata", "service_tier", "container", "inference_geo", "context_management"}, }, + + // Adaptive-only models (Opus 4.7+) — see issue #280. + // Symmetric counterpart of the adaptive -> enabled conversion in #225. + { + name: "opus_4_7_model_with_enabled_thinking_is_converted_to_adaptive", + bedrockModel: "us.anthropic.claude-opus-4-7", + requestBody: `{"max_tokens":10000,"thinking":{"type":"enabled","budget_tokens":5000}}`, + expectThinkingType: "adaptive", + expectEffort: "medium", // 5000/10000 = 0.5 -> medium + expectKeptFields: []string{"output_config"}, + }, + { + name: "opus_4_7_model_with_enabled_thinking_low_budget_is_converted_with_low_effort", + bedrockModel: "us.anthropic.claude-opus-4-7", + requestBody: `{"max_tokens":10000,"thinking":{"type":"enabled","budget_tokens":2000}}`, + expectThinkingType: "adaptive", + expectEffort: "low", + expectKeptFields: []string{"output_config"}, + }, + { + name: "opus_4_7_model_with_adaptive_thinking_is_unchanged", + bedrockModel: "us.anthropic.claude-opus-4-7", + requestBody: `{"max_tokens":10000,"thinking":{"type":"adaptive"}}`, + expectThinkingType: "adaptive", + }, + { + name: "opus_4_7_model_without_thinking_field_is_unchanged", + bedrockModel: "us.anthropic.claude-opus-4-7", + requestBody: `{"max_tokens":10000}`, + }, + { + name: "opus_4_7_model_preserves_explicit_output_config_effort", + bedrockModel: "us.anthropic.claude-opus-4-7", + requestBody: `{"max_tokens":10000,"thinking":{"type":"enabled","budget_tokens":2000},"output_config":{"effort":"max"}}`, + expectThinkingType: "adaptive", + expectEffort: "max", // ratio would say low, but explicit max wins + expectKeptFields: []string{"output_config"}, + }, + { + name: "opus_4_7_model_keeps_output_config_without_effort_beta_flag", + bedrockModel: "us.anthropic.claude-opus-4-7", + requestBody: `{"max_tokens":10000,"thinking":{"type":"adaptive"},"output_config":{"effort":"high"}}`, + expectThinkingType: "adaptive", + expectEffort: "high", + expectKeptFields: []string{"output_config"}, + }, + { + name: "arn_style_opus_4_7_application_inference_profile_is_treated_as_adaptive_only", + bedrockModel: "arn:aws:bedrock:us-east-1:123:application-inference-profile/global.anthropic.claude-opus-4-7", + requestBody: `{"max_tokens":10000,"thinking":{"type":"enabled","budget_tokens":8000}}`, + expectThinkingType: "adaptive", + expectEffort: "high", // 8000/10000 = 0.8 -> high + expectKeptFields: []string{"output_config"}, + }, } for _, tc := range tests { @@ -845,6 +901,13 @@ func TestAugmentRequestForBedrock_AdaptiveThinking(t *testing.T) { require.True(t, gjson.GetBytes(i.reqPayload, field).Exists(), "%s should be kept", field) } + effort := gjson.GetBytes(i.reqPayload, "output_config.effort") + if tc.expectEffort == "" { + require.False(t, effort.Exists(), "output_config.effort should not be set") + } else { + require.Equal(t, tc.expectEffort, effort.String()) + } + got := clientHeaders.Values("Anthropic-Beta") require.Equal(t, tc.expectBetaValues, got) }) diff --git a/intercept/messages/reqpayload.go b/intercept/messages/reqpayload.go index dfe52fc8..eedea918 100644 --- a/intercept/messages/reqpayload.go +++ b/intercept/messages/reqpayload.go @@ -74,6 +74,9 @@ var ( // If the beta flag is present in the (already-filtered) Anthropic-Beta header, // the field is kept; otherwise it is stripped. Model-specific beta flags must // be removed from the header before this check (see filterBedrockBetaFlags). + // Adaptive-only models (Opus 4.7+) are exempt for output_config since they + // support it natively without a beta flag, see + // bedrockModelRequiresAdaptiveThinking. bedrockBetaGatedFields = map[string]string{ // output_config requires the effort beta (Opus 4.5 only). messagesReqPathOutputConfig: "effort-2025-11-24", @@ -372,12 +375,72 @@ func (p RequestPayload) convertAdaptiveThinkingForBedrock() (RequestPayload, err }) } +// convertEnabledThinkingForBedrock converts thinking.type "enabled" with a +// budget_tokens budget to the "adaptive" thinking type plus an output_config.effort +// derived from the budget_tokens / max_tokens ratio. The conversion is needed for +// Bedrock models that only support the "adaptive" thinking.type (Opus 4.7+). +// +// See https://docs.aws.amazon.com/bedrock/latest/userguide/model-card-anthropic-claude-opus-4-7.html +// and https://docs.aws.amazon.com/bedrock/latest/userguide/claude-messages-adaptive-thinking.html +// +// This is the symmetric counterpart to convertAdaptiveThinkingForBedrock; the +// ratio thresholds are the midpoints between the forward mapping's anchor +// ratios (low=0.2, medium=0.5, high=0.8, max=0.95), so a payload that +// round-trips through both conversions lands on the same effort level it +// started with. +// +// An explicit output_config.effort already present in the request is preserved. +func (p RequestPayload) convertEnabledThinkingForBedrock() (RequestPayload, error) { + thinkingType := gjson.GetBytes(p, messagesReqPathThinkingType) + if thinkingType.String() != constEnabled { + return p, nil + } + + // Derive effort from budget_tokens / max_tokens. If either is missing or + // unusable, fall back to "high" so the resulting request matches the + // default effort assumption in convertAdaptiveThinkingForBedrock. + derivedEffort := "high" + budgetTokens := gjson.GetBytes(p, messagesReqPathThinkingBudgetTokens).Int() + maxTokens := gjson.GetBytes(p, messagesReqPathMaxTokens).Int() + if budgetTokens > 0 && maxTokens > 0 { + ratio := float64(budgetTokens) / float64(maxTokens) + switch { + case ratio < 0.35: // midpoint of low (0.2) and medium (0.5) + derivedEffort = "low" + case ratio < 0.65: // midpoint of medium (0.5) and high (0.8) + derivedEffort = "medium" + case ratio < 0.875: // midpoint of high (0.8) and max (0.95) + derivedEffort = "high" + default: + derivedEffort = "max" + } + } + + updated, err := p.set(messagesReqPathThinking, map[string]string{"type": constAdaptive}) + if err != nil { + return p, xerrors.Errorf("set thinking: %w", err) + } + + // Preserve an explicit output_config.effort if the caller set one. Only + // inject the derived value when the field is absent, so we don't override + // intent. + if gjson.GetBytes(updated, messagesReqPathOutputConfigEffort).String() != "" { + return updated, nil + } + return updated.set(messagesReqPathOutputConfigEffort, derivedEffort) +} + // removeUnsupportedBedrockFields strips top-level fields that Bedrock does not // support from the payload. Fields that are gated behind a beta flag are only // removed when the corresponding flag is absent from the Anthropic-Beta header. // Model-specific beta flags must already be filtered from the header before // calling this method (see filterBedrockBetaFlags). -func (p RequestPayload) removeUnsupportedBedrockFields(headers http.Header) (RequestPayload, error) { +// +// Fields exempted by exemptFields are always kept regardless of beta flag +// state. Adaptive-only Bedrock models (Opus 4.7+) require output_config +// without a beta flag, so callers pass the field through this set to bypass +// the effort-2025-11-24 gate. +func (p RequestPayload) removeUnsupportedBedrockFields(headers http.Header, exemptFields ...string) (RequestPayload, error) { var payloadMap map[string]any if err := json.Unmarshal(p, &payloadMap); err != nil { return p, xerrors.Errorf("failed to unmarshal request payload when removing unsupported Bedrock fields: %w", err) @@ -391,6 +454,9 @@ func (p RequestPayload) removeUnsupportedBedrockFields(headers http.Header) (Req // Strip beta-gated fields only when their beta flag is missing. betaValues := headers.Values("Anthropic-Beta") for field, requiredFlag := range bedrockBetaGatedFields { + if slices.Contains(exemptFields, field) { + continue + } if !slices.Contains(betaValues, requiredFlag) { delete(payloadMap, field) } diff --git a/intercept/messages/reqpayload_test.go b/intercept/messages/reqpayload_test.go index a5de61f8..fe3c535a 100644 --- a/intercept/messages/reqpayload_test.go +++ b/intercept/messages/reqpayload_test.go @@ -361,6 +361,109 @@ func TestRequestPayloadConvertAdaptiveThinkingForBedrock(t *testing.T) { } } +func TestRequestPayloadConvertEnabledThinkingForBedrock(t *testing.T) { + t.Parallel() + + testCases := []struct { + name string + + requestBody string + + expectedThinkingType string + expectedEffort string + }{ + { + name: "no_thinking_field_is_no_op", + requestBody: `{"model":"claude-opus-4-7","max_tokens":10000,"messages":[]}`, + }, + { + name: "adaptive_thinking_is_no_op", + requestBody: `{"model":"claude-opus-4-7","max_tokens":10000,"thinking":{"type":"adaptive"},"messages":[]}`, + expectedThinkingType: "adaptive", + }, + { + name: "disabled_thinking_is_no_op", + requestBody: `{"model":"claude-opus-4-7","max_tokens":10000,"thinking":{"type":"disabled"},"messages":[]}`, + expectedThinkingType: "disabled", + }, + { + name: "enabled_with_low_ratio_maps_to_low_effort", + requestBody: `{"model":"claude-opus-4-7","max_tokens":10000,"thinking":{"type":"enabled","budget_tokens":2000},"messages":[]}`, + expectedThinkingType: "adaptive", + expectedEffort: "low", + }, + { + name: "enabled_with_medium_ratio_maps_to_medium_effort", + requestBody: `{"model":"claude-opus-4-7","max_tokens":10000,"thinking":{"type":"enabled","budget_tokens":5000},"messages":[]}`, + expectedThinkingType: "adaptive", + expectedEffort: "medium", + }, + { + name: "enabled_with_high_ratio_maps_to_high_effort", + requestBody: `{"model":"claude-opus-4-7","max_tokens":10000,"thinking":{"type":"enabled","budget_tokens":8000},"messages":[]}`, + expectedThinkingType: "adaptive", + expectedEffort: "high", + }, + { + name: "enabled_with_max_ratio_maps_to_max_effort", + requestBody: `{"model":"claude-opus-4-7","max_tokens":10000,"thinking":{"type":"enabled","budget_tokens":9500},"messages":[]}`, + expectedThinkingType: "adaptive", + expectedEffort: "max", + }, + { + name: "enabled_at_low_medium_boundary_maps_to_medium", + requestBody: `{"model":"claude-opus-4-7","max_tokens":10000,"thinking":{"type":"enabled","budget_tokens":3500},"messages":[]}`, + expectedThinkingType: "adaptive", + expectedEffort: "medium", // 0.35 boundary: lands in medium + }, + { + name: "enabled_without_budget_falls_back_to_high_effort", + requestBody: `{"model":"claude-opus-4-7","max_tokens":10000,"thinking":{"type":"enabled"},"messages":[]}`, + expectedThinkingType: "adaptive", + expectedEffort: "high", + }, + { + name: "enabled_without_max_tokens_falls_back_to_high_effort", + requestBody: `{"model":"claude-opus-4-7","thinking":{"type":"enabled","budget_tokens":5000},"messages":[]}`, + expectedThinkingType: "adaptive", + expectedEffort: "high", + }, + { + name: "enabled_preserves_explicit_effort", + requestBody: `{"model":"claude-opus-4-7","max_tokens":10000,"thinking":{"type":"enabled","budget_tokens":2000},"output_config":{"effort":"max"},"messages":[]}`, + expectedThinkingType: "adaptive", + expectedEffort: "max", // ratio would say "low", but explicit "max" wins + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + + payload := mustMessagesPayload(t, tc.requestBody) + updatedPayload, err := payload.convertEnabledThinkingForBedrock() + require.NoError(t, err) + + thinking := gjson.GetBytes(updatedPayload, messagesReqPathThinking) + require.NotEqual(t, tc.expectedThinkingType == "", thinking.Exists(), "thinking should not be set") + require.Equal(t, tc.expectedThinkingType, gjson.GetBytes(updatedPayload, messagesReqPathThinkingType).String()) + + // budget_tokens must always be absent after a successful conversion. + budgetTokens := gjson.GetBytes(updatedPayload, messagesReqPathThinkingBudgetTokens) + if tc.expectedThinkingType == "adaptive" { + require.False(t, budgetTokens.Exists(), "budget_tokens should be removed after conversion") + } + + effort := gjson.GetBytes(updatedPayload, messagesReqPathOutputConfigEffort) + if tc.expectedEffort == "" { + // Effort is only set when we converted an "enabled" payload. + return + } + require.Equal(t, tc.expectedEffort, effort.String()) + }) + } +} + func TestRequestPayloadDisableParallelToolCalls(t *testing.T) { t.Parallel()