diff --git a/intercept/messages/base.go b/intercept/messages/base.go index 7fd190d..5285222 100644 --- a/intercept/messages/base.go +++ b/intercept/messages/base.go @@ -334,7 +334,8 @@ func (*interceptionBase) withAWSBedrockOptions(ctx context.Context, cfg *aibconf // augmentRequestForBedrock will change the model used for the request since AWS Bedrock doesn't support // Anthropics' model names. It also converts adaptive thinking to enabled with a budget for models that -// don't support adaptive thinking natively. +// don't support adaptive thinking natively, or enabled thinking to adaptive for models that only support +// adaptive (Opus 4.7+). func (i *interceptionBase) augmentRequestForBedrock() { if i.bedrockCfg == nil { return @@ -348,7 +349,21 @@ func (i *interceptionBase) augmentRequestForBedrock() { } i.reqPayload = updated - if !bedrockModelSupportsAdaptiveThinking(model) { + switch { + case bedrockModelRequiresAdaptiveThinking(model): + // Symmetric conversion for adaptive-only models (Opus 4.7+): rewrite + // thinking.type "enabled" with budget_tokens to the "adaptive" shape, + // since Bedrock returns 400 for these models when the legacy shape is + // used. Claude Code falls back to the legacy shape when it cannot + // read the upstream model's capability metadata (which is the case + // when AI Bridge is in the path). + updated, err = i.reqPayload.convertEnabledThinkingForBedrock() + if err != nil { + i.logger.Warn(context.Background(), "failed to convert enabled thinking for Bedrock", slog.Error(err)) + return + } + i.reqPayload = updated + case !bedrockModelSupportsAdaptiveThinking(model): updated, err = i.reqPayload.convertAdaptiveThinkingForBedrock() if err != nil { i.logger.Warn(context.Background(), "failed to convert adaptive thinking for Bedrock", slog.Error(err)) @@ -363,8 +378,15 @@ func (i *interceptionBase) augmentRequestForBedrock() { filterBedrockBetaFlags(i.clientHeaders, model) } - // Strip body fields that Bedrock does not accept. - updated, err = i.reqPayload.removeUnsupportedBedrockFields(i.clientHeaders) + // Strip body fields that Bedrock does not accept. Adaptive-only models + // (Opus 4.7+) support output_config natively without a beta flag, so + // keep it for those models even when the effort-2025-11-24 flag is + // absent from the request. + var exemptFields []string + if bedrockModelRequiresAdaptiveThinking(model) { + exemptFields = append(exemptFields, messagesReqPathOutputConfig) + } + updated, err = i.reqPayload.removeUnsupportedBedrockFields(i.clientHeaders, exemptFields...) if err != nil { i.logger.Warn(context.Background(), "failed to remove unsupported fields for Bedrock", slog.Error(err)) return @@ -373,11 +395,23 @@ func (i *interceptionBase) augmentRequestForBedrock() { } // bedrockModelSupportsAdaptiveThinking returns true if the given Bedrock model ID -// supports the "adaptive" thinking type natively (i.e. Claude 4.6 models). +// supports the "adaptive" thinking type natively (i.e. Claude 4.6 models, and +// adaptive-only models such as Opus 4.7+). // See https://docs.aws.amazon.com/bedrock/latest/userguide/claude-messages-adaptive-thinking.html func bedrockModelSupportsAdaptiveThinking(model string) bool { return strings.Contains(model, "anthropic.claude-opus-4-6") || - strings.Contains(model, "anthropic.claude-sonnet-4-6") + strings.Contains(model, "anthropic.claude-sonnet-4-6") || + bedrockModelRequiresAdaptiveThinking(model) +} + +// bedrockModelRequiresAdaptiveThinking returns true if the given Bedrock model +// ID *only* supports the "adaptive" thinking type and rejects the legacy +// "enabled" + budget_tokens shape with a 400. Claude Opus 4.7 was the first +// model in this category. +// +// See https://docs.aws.amazon.com/bedrock/latest/userguide/model-card-anthropic-claude-opus-4-7.html +func bedrockModelRequiresAdaptiveThinking(model string) bool { + return strings.Contains(model, "anthropic.claude-opus-4-7") } // filterBedrockBetaFlags removes unsupported beta flags from the Anthropic-Beta diff --git a/intercept/messages/base_test.go b/intercept/messages/base_test.go index 2c78adc..a33c005 100644 --- a/intercept/messages/base_test.go +++ b/intercept/messages/base_test.go @@ -691,7 +691,8 @@ func TestAugmentRequestForBedrock_AdaptiveThinking(t *testing.T) { clientBetaFlags string expectThinkingType string - expectBudgetTokens int64 // 0 means budget_tokens should not be present + expectBudgetTokens int64 // 0 means budget_tokens should not be present + expectEffort string // expected output_config.effort; "" means must not be present expectRemovedFields []string expectKeptFields []string expectBetaValues []string // expected separate Anthropic-Beta header values @@ -746,6 +747,7 @@ func TestAugmentRequestForBedrock_AdaptiveThinking(t *testing.T) { bedrockModel: "anthropic.claude-opus-4-5-20250929-v1:0", clientBetaFlags: "effort-2025-11-24,interleaved-thinking-2025-05-14", requestBody: `{"max_tokens":10000,"output_config":{"effort":"high"}}`, + expectEffort: "high", expectKeptFields: []string{"output_config"}, expectBetaValues: []string{"effort-2025-11-24", "interleaved-thinking-2025-05-14"}, }, @@ -793,6 +795,60 @@ func TestAugmentRequestForBedrock_AdaptiveThinking(t *testing.T) { requestBody: `{"max_tokens":10000,"output_config":{"effort":"high"},"metadata":{"user_id":"u123"},"service_tier":"auto","container":"ctr_abc","inference_geo":"us","context_management":{"type":"auto"}}`, expectRemovedFields: []string{"output_config", "metadata", "service_tier", "container", "inference_geo", "context_management"}, }, + + // Adaptive-only models (Opus 4.7+) — see issue #280. + // Symmetric counterpart of the adaptive -> enabled conversion in #225. + { + name: "opus_4_7_model_with_enabled_thinking_is_converted_to_adaptive", + bedrockModel: "us.anthropic.claude-opus-4-7", + requestBody: `{"max_tokens":10000,"thinking":{"type":"enabled","budget_tokens":5000}}`, + expectThinkingType: "adaptive", + expectEffort: "medium", // 5000/10000 = 0.5 -> medium + expectKeptFields: []string{"output_config"}, + }, + { + name: "opus_4_7_model_with_enabled_thinking_low_budget_is_converted_with_low_effort", + bedrockModel: "us.anthropic.claude-opus-4-7", + requestBody: `{"max_tokens":10000,"thinking":{"type":"enabled","budget_tokens":2000}}`, + expectThinkingType: "adaptive", + expectEffort: "low", + expectKeptFields: []string{"output_config"}, + }, + { + name: "opus_4_7_model_with_adaptive_thinking_is_unchanged", + bedrockModel: "us.anthropic.claude-opus-4-7", + requestBody: `{"max_tokens":10000,"thinking":{"type":"adaptive"}}`, + expectThinkingType: "adaptive", + }, + { + name: "opus_4_7_model_without_thinking_field_is_unchanged", + bedrockModel: "us.anthropic.claude-opus-4-7", + requestBody: `{"max_tokens":10000}`, + }, + { + name: "opus_4_7_model_preserves_explicit_output_config_effort", + bedrockModel: "us.anthropic.claude-opus-4-7", + requestBody: `{"max_tokens":10000,"thinking":{"type":"enabled","budget_tokens":2000},"output_config":{"effort":"max"}}`, + expectThinkingType: "adaptive", + expectEffort: "max", // ratio would say low, but explicit max wins + expectKeptFields: []string{"output_config"}, + }, + { + name: "opus_4_7_model_keeps_output_config_without_effort_beta_flag", + bedrockModel: "us.anthropic.claude-opus-4-7", + requestBody: `{"max_tokens":10000,"thinking":{"type":"adaptive"},"output_config":{"effort":"high"}}`, + expectThinkingType: "adaptive", + expectEffort: "high", + expectKeptFields: []string{"output_config"}, + }, + { + name: "arn_style_opus_4_7_application_inference_profile_is_treated_as_adaptive_only", + bedrockModel: "arn:aws:bedrock:us-east-1:123:application-inference-profile/global.anthropic.claude-opus-4-7", + requestBody: `{"max_tokens":10000,"thinking":{"type":"enabled","budget_tokens":8000}}`, + expectThinkingType: "adaptive", + expectEffort: "high", // 8000/10000 = 0.8 -> high + expectKeptFields: []string{"output_config"}, + }, } for _, tc := range tests { @@ -845,6 +901,13 @@ func TestAugmentRequestForBedrock_AdaptiveThinking(t *testing.T) { require.True(t, gjson.GetBytes(i.reqPayload, field).Exists(), "%s should be kept", field) } + effort := gjson.GetBytes(i.reqPayload, "output_config.effort") + if tc.expectEffort == "" { + require.False(t, effort.Exists(), "output_config.effort should not be set") + } else { + require.Equal(t, tc.expectEffort, effort.String()) + } + got := clientHeaders.Values("Anthropic-Beta") require.Equal(t, tc.expectBetaValues, got) }) diff --git a/intercept/messages/reqpayload.go b/intercept/messages/reqpayload.go index dfe52fc..eedea91 100644 --- a/intercept/messages/reqpayload.go +++ b/intercept/messages/reqpayload.go @@ -74,6 +74,9 @@ var ( // If the beta flag is present in the (already-filtered) Anthropic-Beta header, // the field is kept; otherwise it is stripped. Model-specific beta flags must // be removed from the header before this check (see filterBedrockBetaFlags). + // Adaptive-only models (Opus 4.7+) are exempt for output_config since they + // support it natively without a beta flag, see + // bedrockModelRequiresAdaptiveThinking. bedrockBetaGatedFields = map[string]string{ // output_config requires the effort beta (Opus 4.5 only). messagesReqPathOutputConfig: "effort-2025-11-24", @@ -372,12 +375,72 @@ func (p RequestPayload) convertAdaptiveThinkingForBedrock() (RequestPayload, err }) } +// convertEnabledThinkingForBedrock converts thinking.type "enabled" with a +// budget_tokens budget to the "adaptive" thinking type plus an output_config.effort +// derived from the budget_tokens / max_tokens ratio. The conversion is needed for +// Bedrock models that only support the "adaptive" thinking.type (Opus 4.7+). +// +// See https://docs.aws.amazon.com/bedrock/latest/userguide/model-card-anthropic-claude-opus-4-7.html +// and https://docs.aws.amazon.com/bedrock/latest/userguide/claude-messages-adaptive-thinking.html +// +// This is the symmetric counterpart to convertAdaptiveThinkingForBedrock; the +// ratio thresholds are the midpoints between the forward mapping's anchor +// ratios (low=0.2, medium=0.5, high=0.8, max=0.95), so a payload that +// round-trips through both conversions lands on the same effort level it +// started with. +// +// An explicit output_config.effort already present in the request is preserved. +func (p RequestPayload) convertEnabledThinkingForBedrock() (RequestPayload, error) { + thinkingType := gjson.GetBytes(p, messagesReqPathThinkingType) + if thinkingType.String() != constEnabled { + return p, nil + } + + // Derive effort from budget_tokens / max_tokens. If either is missing or + // unusable, fall back to "high" so the resulting request matches the + // default effort assumption in convertAdaptiveThinkingForBedrock. + derivedEffort := "high" + budgetTokens := gjson.GetBytes(p, messagesReqPathThinkingBudgetTokens).Int() + maxTokens := gjson.GetBytes(p, messagesReqPathMaxTokens).Int() + if budgetTokens > 0 && maxTokens > 0 { + ratio := float64(budgetTokens) / float64(maxTokens) + switch { + case ratio < 0.35: // midpoint of low (0.2) and medium (0.5) + derivedEffort = "low" + case ratio < 0.65: // midpoint of medium (0.5) and high (0.8) + derivedEffort = "medium" + case ratio < 0.875: // midpoint of high (0.8) and max (0.95) + derivedEffort = "high" + default: + derivedEffort = "max" + } + } + + updated, err := p.set(messagesReqPathThinking, map[string]string{"type": constAdaptive}) + if err != nil { + return p, xerrors.Errorf("set thinking: %w", err) + } + + // Preserve an explicit output_config.effort if the caller set one. Only + // inject the derived value when the field is absent, so we don't override + // intent. + if gjson.GetBytes(updated, messagesReqPathOutputConfigEffort).String() != "" { + return updated, nil + } + return updated.set(messagesReqPathOutputConfigEffort, derivedEffort) +} + // removeUnsupportedBedrockFields strips top-level fields that Bedrock does not // support from the payload. Fields that are gated behind a beta flag are only // removed when the corresponding flag is absent from the Anthropic-Beta header. // Model-specific beta flags must already be filtered from the header before // calling this method (see filterBedrockBetaFlags). -func (p RequestPayload) removeUnsupportedBedrockFields(headers http.Header) (RequestPayload, error) { +// +// Fields exempted by exemptFields are always kept regardless of beta flag +// state. Adaptive-only Bedrock models (Opus 4.7+) require output_config +// without a beta flag, so callers pass the field through this set to bypass +// the effort-2025-11-24 gate. +func (p RequestPayload) removeUnsupportedBedrockFields(headers http.Header, exemptFields ...string) (RequestPayload, error) { var payloadMap map[string]any if err := json.Unmarshal(p, &payloadMap); err != nil { return p, xerrors.Errorf("failed to unmarshal request payload when removing unsupported Bedrock fields: %w", err) @@ -391,6 +454,9 @@ func (p RequestPayload) removeUnsupportedBedrockFields(headers http.Header) (Req // Strip beta-gated fields only when their beta flag is missing. betaValues := headers.Values("Anthropic-Beta") for field, requiredFlag := range bedrockBetaGatedFields { + if slices.Contains(exemptFields, field) { + continue + } if !slices.Contains(betaValues, requiredFlag) { delete(payloadMap, field) } diff --git a/intercept/messages/reqpayload_test.go b/intercept/messages/reqpayload_test.go index a5de61f..fe3c535 100644 --- a/intercept/messages/reqpayload_test.go +++ b/intercept/messages/reqpayload_test.go @@ -361,6 +361,109 @@ func TestRequestPayloadConvertAdaptiveThinkingForBedrock(t *testing.T) { } } +func TestRequestPayloadConvertEnabledThinkingForBedrock(t *testing.T) { + t.Parallel() + + testCases := []struct { + name string + + requestBody string + + expectedThinkingType string + expectedEffort string + }{ + { + name: "no_thinking_field_is_no_op", + requestBody: `{"model":"claude-opus-4-7","max_tokens":10000,"messages":[]}`, + }, + { + name: "adaptive_thinking_is_no_op", + requestBody: `{"model":"claude-opus-4-7","max_tokens":10000,"thinking":{"type":"adaptive"},"messages":[]}`, + expectedThinkingType: "adaptive", + }, + { + name: "disabled_thinking_is_no_op", + requestBody: `{"model":"claude-opus-4-7","max_tokens":10000,"thinking":{"type":"disabled"},"messages":[]}`, + expectedThinkingType: "disabled", + }, + { + name: "enabled_with_low_ratio_maps_to_low_effort", + requestBody: `{"model":"claude-opus-4-7","max_tokens":10000,"thinking":{"type":"enabled","budget_tokens":2000},"messages":[]}`, + expectedThinkingType: "adaptive", + expectedEffort: "low", + }, + { + name: "enabled_with_medium_ratio_maps_to_medium_effort", + requestBody: `{"model":"claude-opus-4-7","max_tokens":10000,"thinking":{"type":"enabled","budget_tokens":5000},"messages":[]}`, + expectedThinkingType: "adaptive", + expectedEffort: "medium", + }, + { + name: "enabled_with_high_ratio_maps_to_high_effort", + requestBody: `{"model":"claude-opus-4-7","max_tokens":10000,"thinking":{"type":"enabled","budget_tokens":8000},"messages":[]}`, + expectedThinkingType: "adaptive", + expectedEffort: "high", + }, + { + name: "enabled_with_max_ratio_maps_to_max_effort", + requestBody: `{"model":"claude-opus-4-7","max_tokens":10000,"thinking":{"type":"enabled","budget_tokens":9500},"messages":[]}`, + expectedThinkingType: "adaptive", + expectedEffort: "max", + }, + { + name: "enabled_at_low_medium_boundary_maps_to_medium", + requestBody: `{"model":"claude-opus-4-7","max_tokens":10000,"thinking":{"type":"enabled","budget_tokens":3500},"messages":[]}`, + expectedThinkingType: "adaptive", + expectedEffort: "medium", // 0.35 boundary: lands in medium + }, + { + name: "enabled_without_budget_falls_back_to_high_effort", + requestBody: `{"model":"claude-opus-4-7","max_tokens":10000,"thinking":{"type":"enabled"},"messages":[]}`, + expectedThinkingType: "adaptive", + expectedEffort: "high", + }, + { + name: "enabled_without_max_tokens_falls_back_to_high_effort", + requestBody: `{"model":"claude-opus-4-7","thinking":{"type":"enabled","budget_tokens":5000},"messages":[]}`, + expectedThinkingType: "adaptive", + expectedEffort: "high", + }, + { + name: "enabled_preserves_explicit_effort", + requestBody: `{"model":"claude-opus-4-7","max_tokens":10000,"thinking":{"type":"enabled","budget_tokens":2000},"output_config":{"effort":"max"},"messages":[]}`, + expectedThinkingType: "adaptive", + expectedEffort: "max", // ratio would say "low", but explicit "max" wins + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + + payload := mustMessagesPayload(t, tc.requestBody) + updatedPayload, err := payload.convertEnabledThinkingForBedrock() + require.NoError(t, err) + + thinking := gjson.GetBytes(updatedPayload, messagesReqPathThinking) + require.NotEqual(t, tc.expectedThinkingType == "", thinking.Exists(), "thinking should not be set") + require.Equal(t, tc.expectedThinkingType, gjson.GetBytes(updatedPayload, messagesReqPathThinkingType).String()) + + // budget_tokens must always be absent after a successful conversion. + budgetTokens := gjson.GetBytes(updatedPayload, messagesReqPathThinkingBudgetTokens) + if tc.expectedThinkingType == "adaptive" { + require.False(t, budgetTokens.Exists(), "budget_tokens should be removed after conversion") + } + + effort := gjson.GetBytes(updatedPayload, messagesReqPathOutputConfigEffort) + if tc.expectedEffort == "" { + // Effort is only set when we converted an "enabled" payload. + return + } + require.Equal(t, tc.expectedEffort, effort.String()) + }) + } +} + func TestRequestPayloadDisableParallelToolCalls(t *testing.T) { t.Parallel()