diff --git a/internal/cmd/flags.go b/internal/cmd/flags.go index 896e453f9..7dc65e017 100644 --- a/internal/cmd/flags.go +++ b/internal/cmd/flags.go @@ -26,8 +26,8 @@ // // This keeps the env-var name co-located with the flag declaration. // -// Exception: getDefaultDIFCMode() in flags_difc.go is kept as a named helper -// because it contains validation logic beyond a simple env lookup. +// Exception: difc.DefaultEnforcementMode() is kept as a named helper because +// it contains validation logic beyond a simple env lookup. // // When adding a new flag with an environment variable override: // 1. Use envutil.GetEnv* directly in the RegisterFlag call. diff --git a/internal/cmd/flags_difc.go b/internal/cmd/flags_difc.go index 27a9f1612..5521af9b6 100644 --- a/internal/cmd/flags_difc.go +++ b/internal/cmd/flags_difc.go @@ -3,14 +3,11 @@ package cmd // DIFC (Decentralized Information Flow Control) related flags import ( - "fmt" "os" - "strings" "github.com/github/gh-aw-mcpg/internal/config" "github.com/github/gh-aw-mcpg/internal/difc" "github.com/github/gh-aw-mcpg/internal/envutil" - "github.com/github/gh-aw-mcpg/internal/strutil" "github.com/spf13/cobra" ) @@ -30,7 +27,7 @@ const containerGuardWasmPath = "/guards/github/00-github-guard.wasm" func init() { RegisterFlag(func(cmd *cobra.Command) { - cmd.Flags().StringVar(&difcMode, "guards-mode", getDefaultDIFCMode(), "Guards enforcement mode: strict (deny violations), filter (remove denied tools), or propagate (auto-adjust agent labels on reads)") + cmd.Flags().StringVar(&difcMode, "guards-mode", difc.DefaultEnforcementMode(), "Guards enforcement mode: strict (deny violations), filter (remove denied tools), or propagate (auto-adjust agent labels on reads)") cmd.Flags().StringVar(&difcSinkServerIDs, "guards-sink-server-ids", envutil.GetEnvString("MCP_GATEWAY_GUARDS_SINK_SERVER_IDS", ""), "Comma-separated server IDs whose RPC JSONL logs should include agent secrecy/integrity tag snapshots") cmd.Flags().StringVar(&guardPolicyJSON, "guard-policy-json", envutil.GetEnvString(config.EnvGuardPolicyJSON, ""), "Guard policy JSON (e.g. {\"allow-only\":{\"repos\":\"public\",\"min-integrity\":\"none\"}})") cmd.Flags().BoolVar(&allowOnlyPublic, "allowonly-scope-public", envutil.GetEnvBool(config.EnvAllowOnlyScopePublic, false), "Use public AllowOnly scope") @@ -74,50 +71,3 @@ func resolveGuardPolicyOverride(cmd *cobra.Command) (*config.GuardPolicy, string allowOnlyMinInt, ) } - -// getDefaultDIFCMode returns the default guards mode, checking MCP_GATEWAY_GUARDS_MODE -// environment variable first, then falling back to the hardcoded default (strict) -func getDefaultDIFCMode() string { - if envMode := os.Getenv("MCP_GATEWAY_GUARDS_MODE"); envMode != "" { - mode := strings.ToLower(envMode) - if _, err := difc.ParseEnforcementMode(mode); err == nil { - debugLog.Printf("Guards mode set from MCP_GATEWAY_GUARDS_MODE: %s", mode) - return mode - } - debugLog.Printf("MCP_GATEWAY_GUARDS_MODE value %q is invalid, falling back to default: %s", envMode, difc.ModeStrict) - } - return difc.ModeStrict -} - -// validateDIFCModeFlag validates the value of the --guards-mode CLI flag. -func validateDIFCModeFlag(mode string) error { - if _, err := difc.ParseEnforcementMode(mode); err != nil { - return fmt.Errorf("invalid --guards-mode flag: %w", err) - } - return nil -} - -func parseDIFCSinkServerIDs(input string) ([]string, error) { - if strings.TrimSpace(input) == "" { - return nil, nil - } - - debugLog.Printf("Parsing DIFC sink server IDs: input=%q", input) - - parts := strings.Split(input, ",") - validated := make([]string, 0, len(parts)) - for _, part := range parts { - value := strings.TrimSpace(part) - if value == "" { - continue - } - if strings.ContainsAny(value, " \t\n\r") { - return nil, fmt.Errorf("invalid guards sink server ID %q: whitespace is not allowed", value) - } - validated = append(validated, value) - } - - result := strutil.DeduplicateStrings(validated, false) - debugLog.Printf("Parsed %d unique DIFC sink server IDs: %v", len(result), result) - return result, nil -} diff --git a/internal/cmd/flags_difc_test.go b/internal/cmd/flags_difc_test.go index 45fe42c30..bb2b20a3e 100644 --- a/internal/cmd/flags_difc_test.go +++ b/internal/cmd/flags_difc_test.go @@ -69,58 +69,6 @@ func TestValidateDIFCMode(t *testing.T) { } } -func TestGetDefaultDIFCMode(t *testing.T) { - tests := []struct { - name string - envValue string - want string - }{ - { - name: "no env var returns strict", - envValue: "", - want: "strict", - }, - { - name: "env var strict", - envValue: "strict", - want: "strict", - }, - { - name: "env var filter", - envValue: "filter", - want: "filter", - }, - { - name: "env var propagate", - envValue: "propagate", - want: "propagate", - }, - { - name: "env var FILTER uppercase", - envValue: "FILTER", - want: "filter", - }, - { - name: "env var invalid falls back to strict", - envValue: "invalid", - want: "strict", - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - if tt.envValue != "" { - t.Setenv("MCP_GATEWAY_GUARDS_MODE", tt.envValue) - } else { - t.Setenv("MCP_GATEWAY_GUARDS_MODE", "") - } - - got := getDefaultDIFCMode() - assert.Equal(t, tt.want, got) - }) - } -} - func TestValidDIFCModes(t *testing.T) { require := require.New(t) @@ -136,32 +84,6 @@ func TestValidDIFCModes(t *testing.T) { require.Len(difc.ValidModes, 3, "should only have 3 valid modes") } -func TestValidateDIFCModeFlag(t *testing.T) { - tests := []struct { - name string - mode string - wantErr bool - }{ - {name: "strict valid", mode: "strict", wantErr: false}, - {name: "filter valid", mode: "filter", wantErr: false}, - {name: "propagate valid", mode: "propagate", wantErr: false}, - {name: "empty defaults to strict", mode: "", wantErr: false}, - {name: "invalid mode", mode: "bogus", wantErr: true}, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - err := validateDIFCModeFlag(tt.mode) - if tt.wantErr { - require.Error(t, err, "expected error for mode %q", tt.mode) - assert.ErrorContains(t, err, "invalid --guards-mode flag") - } else { - assert.NoError(t, err, "unexpected error for mode %q", tt.mode) - } - }) - } -} - func TestParseDIFCSinkServerIDs(t *testing.T) { tests := []struct { name string @@ -218,7 +140,7 @@ func TestParseDIFCSinkServerIDs(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - result, err := parseDIFCSinkServerIDs(tt.input) + result, err := difc.ParseSinkServerIDs(tt.input) if tt.wantErr { require.Error(t, err) return diff --git a/internal/cmd/proxy.go b/internal/cmd/proxy.go index 01c1dfb7a..982b7d766 100644 --- a/internal/cmd/proxy.go +++ b/internal/cmd/proxy.go @@ -146,8 +146,8 @@ func runProxy(cmd *cobra.Command, args []string) error { logProxyCmd.Printf("Starting proxy: listen=%s, guard=%s, mode=%s, tls=%v", proxyListen, proxyGuardWasm, proxyDIFCMode, proxyTLS) - if err := validateDIFCModeFlag(proxyDIFCMode); err != nil { - return err + if _, err := difc.ParseEnforcementMode(proxyDIFCMode); err != nil { + return fmt.Errorf("invalid --guards-mode flag: %w", err) } // Initialize loggers diff --git a/internal/cmd/root.go b/internal/cmd/root.go index 1e34cd469..52f82cd3f 100644 --- a/internal/cmd/root.go +++ b/internal/cmd/root.go @@ -209,8 +209,8 @@ func run(cmd *cobra.Command, args []string) error { } // Validate guards mode before applying - if err := validateDIFCModeFlag(difcMode); err != nil { - return err + if _, err := difc.ParseEnforcementMode(difcMode); err != nil { + return fmt.Errorf("invalid --guards-mode flag: %w", err) } // Apply command-line flags to config @@ -237,7 +237,7 @@ func run(cmd *cobra.Command, args []string) error { logger.StartupInfo("MCP_GATEWAY_GUARDS_SINK_SERVER_IDS=%q", envSinkServerIDs) } - resolvedSinkServerIDs, err := parseDIFCSinkServerIDs(difcSinkServerIDs) + resolvedSinkServerIDs, err := difc.ParseSinkServerIDs(difcSinkServerIDs) if err != nil { return fmt.Errorf("invalid --guards-sink-server-ids value: %w", err) } diff --git a/internal/config/guard_policy_integrity_levels.go b/internal/config/guard_policy_integrity_levels.go new file mode 100644 index 000000000..6f180377f --- /dev/null +++ b/internal/config/guard_policy_integrity_levels.go @@ -0,0 +1,8 @@ +package config + +var allowedGuardPolicyIntegrityLevels = []string{ + IntegrityNone, + IntegrityUnapproved, + IntegrityApproved, + IntegrityMerged, +} diff --git a/internal/guard/policy_helpers.go b/internal/config/guard_policy_marshal.go similarity index 61% rename from internal/guard/policy_helpers.go rename to internal/config/guard_policy_marshal.go index f76615b76..22f80fd70 100644 --- a/internal/guard/policy_helpers.go +++ b/internal/config/guard_policy_marshal.go @@ -1,14 +1,14 @@ -package guard +package config import ( "encoding/json" "fmt" ) -// PolicyToMap converts a policy value to a generic map through a JSON roundtrip. -// It returns an error if the value cannot be serialized or does not decode to a -// JSON object. -func PolicyToMap(policy interface{}) (map[string]interface{}, error) { +// GuardPolicyToMap converts a policy value to a generic map through a JSON +// roundtrip. It returns an error if the value cannot be serialized or does not +// decode to a JSON object. +func GuardPolicyToMap(policy interface{}) (map[string]interface{}, error) { if policy == nil { return nil, fmt.Errorf("policy is required") } diff --git a/internal/guard/policy_helpers_test.go b/internal/config/guard_policy_marshal_test.go similarity index 82% rename from internal/guard/policy_helpers_test.go rename to internal/config/guard_policy_marshal_test.go index 81e316f17..63482b6f1 100644 --- a/internal/guard/policy_helpers_test.go +++ b/internal/config/guard_policy_marshal_test.go @@ -1,4 +1,4 @@ -package guard +package config import ( "math" @@ -8,7 +8,7 @@ import ( "github.com/stretchr/testify/require" ) -func TestPolicyToMap(t *testing.T) { +func TestGuardPolicyToMap(t *testing.T) { t.Run("returns deep copy for map input", func(t *testing.T) { policy := map[string]interface{}{ "allow-only": map[string]interface{}{ @@ -17,7 +17,7 @@ func TestPolicyToMap(t *testing.T) { }, } - payload, err := PolicyToMap(policy) + payload, err := GuardPolicyToMap(policy) require.NoError(t, err) require.NotNil(t, payload) @@ -31,19 +31,19 @@ func TestPolicyToMap(t *testing.T) { }) t.Run("nil policy returns error", func(t *testing.T) { - _, err := PolicyToMap(nil) + _, err := GuardPolicyToMap(nil) require.Error(t, err) assert.ErrorContains(t, err, "policy is required") }) t.Run("non-object policy returns error", func(t *testing.T) { - _, err := PolicyToMap([]string{"not-an-object"}) + _, err := GuardPolicyToMap([]string{"not-an-object"}) require.Error(t, err) assert.ErrorContains(t, err, "policy must decode to a JSON object") }) t.Run("unmarshalable policy returns error", func(t *testing.T) { - _, err := PolicyToMap(math.NaN()) + _, err := GuardPolicyToMap(math.NaN()) require.Error(t, err) assert.ErrorContains(t, err, "failed to serialize policy") }) diff --git a/internal/config/guard_policy_parse.go b/internal/config/guard_policy_parse.go index 30d3a4fc0..059e7380f 100644 --- a/internal/config/guard_policy_parse.go +++ b/internal/config/guard_policy_parse.go @@ -6,7 +6,6 @@ import ( "strings" "github.com/github/gh-aw-mcpg/internal/envutil" - "github.com/github/gh-aw-mcpg/internal/guard" ) // Environment variable names for guard policy configuration. @@ -153,7 +152,7 @@ func BuildAllowOnlyPolicy(public bool, owner, repo, minIntegrity string) (*Guard return nil, fmt.Errorf("min-integrity is required") } if !hasIntegrity { - return nil, fmt.Errorf("min-integrity must be one of: %s", strings.Join(guard.AllowedIntegrityLevels, ", ")) + return nil, fmt.Errorf("min-integrity must be one of: %s", strings.Join(allowedGuardPolicyIntegrityLevels, ", ")) } var repos interface{} diff --git a/internal/config/guard_policy_parse_test.go b/internal/config/guard_policy_parse_test.go index e02377e4e..061224b50 100644 --- a/internal/config/guard_policy_parse_test.go +++ b/internal/config/guard_policy_parse_test.go @@ -5,7 +5,6 @@ import ( "strings" "testing" - "github.com/github/gh-aw-mcpg/internal/guard" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -560,7 +559,7 @@ func TestBuildAllowOnlyPolicy_InvalidIntegrityErrorListsCanonicalValues(t *testi got, err := BuildAllowOnlyPolicy(true, "", "", "superstrict") require.Nil(t, got) require.EqualError(t, err, - fmt.Sprintf("min-integrity must be one of: %s", strings.Join(guard.AllowedIntegrityLevels, ", "))) + fmt.Sprintf("min-integrity must be one of: %s", strings.Join(allowedGuardPolicyIntegrityLevels, ", "))) } // TestParsePolicyMap_LegacyMinIntegrityTakesPrecedence verifies that diff --git a/internal/config/guard_policy_validation.go b/internal/config/guard_policy_validation.go index 525cef236..9be8f390f 100644 --- a/internal/config/guard_policy_validation.go +++ b/internal/config/guard_policy_validation.go @@ -4,8 +4,6 @@ import ( "fmt" "sort" "strings" - - "github.com/github/gh-aw-mcpg/internal/guard" ) // ValidateGuardPolicy validates AllowOnly or WriteSink policy input. @@ -99,7 +97,7 @@ func NormalizeGuardPolicy(policy *GuardPolicy) (*NormalizedGuardPolicy, error) { integrity := strings.ToLower(strings.TrimSpace(policy.AllowOnly.MinIntegrity)) if _, ok := validMinIntegrityValues[integrity]; !ok { - return nil, fmt.Errorf("allow-only.min-integrity must be one of: %s", strings.Join(guard.AllowedIntegrityLevels, ", ")) + return nil, fmt.Errorf("allow-only.min-integrity must be one of: %s", strings.Join(allowedGuardPolicyIntegrityLevels, ", ")) } normalized := &NormalizedGuardPolicy{MinIntegrity: integrity} @@ -143,7 +141,7 @@ func NormalizeGuardPolicy(policy *GuardPolicy) (*NormalizedGuardPolicy, error) { // uses Rust-side default of "none" when endorsement/disapproval is evaluated). if v := strings.ToLower(strings.TrimSpace(policy.AllowOnly.DisapprovalIntegrity)); v != "" { if _, ok := validMinIntegrityValues[v]; !ok { - return nil, fmt.Errorf("allow-only.disapproval-integrity must be one of: %s", strings.Join(guard.AllowedIntegrityLevels, ", ")) + return nil, fmt.Errorf("allow-only.disapproval-integrity must be one of: %s", strings.Join(allowedGuardPolicyIntegrityLevels, ", ")) } normalized.DisapprovalIntegrity = v } @@ -152,7 +150,7 @@ func NormalizeGuardPolicy(policy *GuardPolicy) (*NormalizedGuardPolicy, error) { // uses Rust-side default of "approved" when evaluating reactor eligibility). if v := strings.ToLower(strings.TrimSpace(policy.AllowOnly.EndorserMinIntegrity)); v != "" { if _, ok := validMinIntegrityValues[v]; !ok { - return nil, fmt.Errorf("allow-only.endorser-min-integrity must be one of: %s", strings.Join(guard.AllowedIntegrityLevels, ", ")) + return nil, fmt.Errorf("allow-only.endorser-min-integrity must be one of: %s", strings.Join(allowedGuardPolicyIntegrityLevels, ", ")) } normalized.EndorserMinIntegrity = v } diff --git a/internal/difc/evaluator.go b/internal/difc/evaluator.go index 4e6f09555..ec005be37 100644 --- a/internal/difc/evaluator.go +++ b/internal/difc/evaluator.go @@ -2,6 +2,7 @@ package difc import ( "fmt" + "os" "strings" "github.com/github/gh-aw-mcpg/internal/logger" @@ -89,6 +90,20 @@ func ParseEnforcementMode(s string) (EnforcementMode, error) { } } +// DefaultEnforcementMode returns the default guards mode, checking +// MCP_GATEWAY_GUARDS_MODE first and falling back to strict. +func DefaultEnforcementMode() string { + if envMode := os.Getenv("MCP_GATEWAY_GUARDS_MODE"); envMode != "" { + mode := strings.ToLower(envMode) + if _, err := ParseEnforcementMode(mode); err == nil { + logEvaluator.Printf("Guards mode set from MCP_GATEWAY_GUARDS_MODE: %s", mode) + return mode + } + logEvaluator.Printf("MCP_GATEWAY_GUARDS_MODE value %q is invalid, falling back to default: %s", envMode, ModeStrict) + } + return ModeStrict +} + // DIFCComponents holds the set of DIFC objects needed by a server or proxy. type DIFCComponents struct { Mode EnforcementMode diff --git a/internal/difc/evaluator_test.go b/internal/difc/evaluator_test.go index 6c563655d..c03e21fdd 100644 --- a/internal/difc/evaluator_test.go +++ b/internal/difc/evaluator_test.go @@ -776,6 +776,28 @@ func TestParseEnforcementMode(t *testing.T) { } } +func TestDefaultEnforcementMode(t *testing.T) { + tests := []struct { + name string + envValue string + want string + }{ + {name: "no env var returns strict", envValue: "", want: "strict"}, + {name: "env var strict", envValue: "strict", want: "strict"}, + {name: "env var filter", envValue: "filter", want: "filter"}, + {name: "env var propagate", envValue: "propagate", want: "propagate"}, + {name: "env var FILTER uppercase", envValue: "FILTER", want: "filter"}, + {name: "env var invalid falls back to strict", envValue: "invalid", want: "strict"}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Setenv("MCP_GATEWAY_GUARDS_MODE", tt.envValue) + assert.Equal(t, tt.want, DefaultEnforcementMode()) + }) + } +} + // TestEvaluationResult_RequiresPropagation tests the RequiresPropagation method func TestEvaluationResult_RequiresPropagation(t *testing.T) { tests := []struct { diff --git a/internal/difc/sink_server_ids.go b/internal/difc/sink_server_ids.go index 96744852d..808147345 100644 --- a/internal/difc/sink_server_ids.go +++ b/internal/difc/sink_server_ids.go @@ -1,6 +1,8 @@ package difc import ( + "fmt" + "strings" "sync" "github.com/github/gh-aw-mcpg/internal/logger" @@ -58,3 +60,29 @@ func IsSinkServerID(serverID string) bool { } return false } + +// ParseSinkServerIDs parses and validates comma-separated sink server IDs. +func ParseSinkServerIDs(input string) ([]string, error) { + if strings.TrimSpace(input) == "" { + return nil, nil + } + + logSink.Printf("Parsing DIFC sink server IDs: input=%q", input) + + parts := strings.Split(input, ",") + validated := make([]string, 0, len(parts)) + for _, part := range parts { + value := strings.TrimSpace(part) + if value == "" { + continue + } + if strings.ContainsAny(value, " \t\n\r") { + return nil, fmt.Errorf("invalid guards sink server ID %q: whitespace is not allowed", value) + } + validated = append(validated, value) + } + + result := strutil.DeduplicateStrings(validated, false) + logSink.Printf("Parsed %d unique DIFC sink server IDs: %v", len(result), result) + return result, nil +} diff --git a/internal/difc/sink_server_ids_test.go b/internal/difc/sink_server_ids_test.go index 465ae9e5a..1344f56ba 100644 --- a/internal/difc/sink_server_ids_test.go +++ b/internal/difc/sink_server_ids_test.go @@ -5,6 +5,7 @@ import ( "testing" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) func TestSetSinkServerIDs(t *testing.T) { @@ -113,3 +114,34 @@ func TestIsSinkServerID(t *testing.T) { wg.Wait() }) } + +func TestParseSinkServerIDs(t *testing.T) { + tests := []struct { + name string + input string + expect []string + wantErr bool + }{ + {name: "empty input", input: "", expect: nil}, + {name: "single server id", input: "safeoutputs", expect: []string{"safeoutputs"}}, + {name: "multiple server ids", input: "safeoutputs,github", expect: []string{"safeoutputs", "github"}}, + {name: "trims whitespace around separators", input: " safeoutputs , github ", expect: []string{"safeoutputs", "github"}}, + {name: "deduplicates server ids", input: "safeoutputs,github,safeoutputs", expect: []string{"safeoutputs", "github"}}, + {name: "consecutive commas skip empty parts", input: "safeoutputs,,github", expect: []string{"safeoutputs", "github"}}, + {name: "trailing comma skips empty part", input: "safeoutputs,github,", expect: []string{"safeoutputs", "github"}}, + {name: "rejects embedded whitespace", input: "safe outputs", wantErr: true}, + {name: "rejects embedded tab", input: "safe\toutputs", wantErr: true}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result, err := ParseSinkServerIDs(tt.input) + if tt.wantErr { + require.Error(t, err) + return + } + require.NoError(t, err) + assert.Equal(t, tt.expect, result) + }) + } +} diff --git a/internal/guard/wasm_payload.go b/internal/guard/wasm_payload.go index ffd00c328..8f6c1130b 100644 --- a/internal/guard/wasm_payload.go +++ b/internal/guard/wasm_payload.go @@ -4,6 +4,8 @@ import ( "encoding/json" "fmt" "strings" + + "github.com/github/gh-aw-mcpg/internal/config" ) // normalizePolicyPayload coerces a policy value to a map[string]interface{}. @@ -56,7 +58,7 @@ func buildStrictLabelAgentPayload(policy interface{}) (map[string]interface{}, e } } - payload, err := PolicyToMap(policy) + payload, err := config.GuardPolicyToMap(policy) if err != nil { return nil, fmt.Errorf("failed to decode label_agent policy payload: %w", err) } @@ -191,7 +193,7 @@ func BuildLabelAgentPayload(policy interface{}, trustedBots []string, trustedUse // Convert the policy to a generic map so we can inject the trusted-bots and // trusted-users keys alongside the allow-only policy without altering the // policy itself. - payload, err := PolicyToMap(policy) + payload, err := config.GuardPolicyToMap(policy) if err != nil { // If we can't convert the policy, return it as-is; buildStrictLabelAgentPayload // will surface the error later. diff --git a/internal/logger/log_cleanup.go b/internal/logger/log_cleanup.go new file mode 100644 index 000000000..d11090f51 --- /dev/null +++ b/internal/logger/log_cleanup.go @@ -0,0 +1,49 @@ +package logger + +import ( + "regexp" + "strings" + + "github.com/github/gh-aw-mcpg/internal/strutil" +) + +// Pre-compiled regexes for performance (avoid recompiling in hot paths). +var ( + // Timestamp patterns for log cleanup + // Pattern 1: ISO 8601 with T or space separator (e.g., "2024-01-01T12:00:00.123Z " or "2024-01-01 12:00:00 "). + timestampPattern1 = regexp.MustCompile(`^\d{4}-\d{2}-\d{2}[T\s]\d{2}:\d{2}:\d{2}(\.\d+)?([+-]\d{2}:\d{2}|Z)?\s*`) + // Pattern 2: Bracketed date-time (e.g., "[2024-01-01 12:00:00] "). + timestampPattern2 = regexp.MustCompile(`^\[\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2}:\d{2}\]\s*`) + // Pattern 3: Bracketed time only (e.g., "[12:00:00] "). + timestampPattern3 = regexp.MustCompile(`^\[\d{2}:\d{2}:\d{2}\]\s+`) + // Pattern 4: Time only with optional milliseconds (e.g., "12:00:00.123 "). + timestampPattern4 = regexp.MustCompile(`^\d{2}:\d{2}:\d{2}(\.\d+)?\s+`) + + // Log level pattern for message cleanup (case-insensitive). + logLevelPattern = regexp.MustCompile(`(?i)^\[?(ERROR|WARNING|WARN|INFO|DEBUG)\]?\s*[:-]?\s*`) +) + +// ExtractErrorMessage extracts a clean error message from a log line. +// It removes timestamps, log level prefixes, and other common noise. +// If the message is longer than 200 characters, it will be truncated. +func ExtractErrorMessage(line string) string { + // Remove common timestamp patterns using pre-compiled regexes + cleanedLine := line + cleanedLine = timestampPattern1.ReplaceAllString(cleanedLine, "") + cleanedLine = timestampPattern2.ReplaceAllString(cleanedLine, "") + cleanedLine = timestampPattern3.ReplaceAllString(cleanedLine, "") + cleanedLine = timestampPattern4.ReplaceAllString(cleanedLine, "") + + // Remove common log level prefixes using pre-compiled regex + cleanedLine = logLevelPattern.ReplaceAllString(cleanedLine, "") + + // Trim whitespace + cleanedLine = strings.TrimSpace(cleanedLine) + + // If the line is too long (>200 chars), truncate it + if len(cleanedLine) > 200 { + cleanedLine = strutil.Truncate(cleanedLine, 197) + } + + return cleanedLine +} diff --git a/internal/logger/rpc_helpers.go b/internal/logger/rpc_helpers.go index 686bbcd8a..dad71a885 100644 --- a/internal/logger/rpc_helpers.go +++ b/internal/logger/rpc_helpers.go @@ -6,7 +6,6 @@ // // - truncateAndSanitize: Combines secret sanitization with length truncation // - extractEssentialFields: Extracts key JSON-RPC fields for compact logging -// - ExtractErrorMessage: Extracts clean error messages from log lines // // These helpers are used by the RPC logging system to safely and efficiently // process message payloads before logging them. @@ -14,29 +13,11 @@ package logger import ( "encoding/json" - "regexp" - "strings" "github.com/github/gh-aw-mcpg/internal/logger/sanitize" "github.com/github/gh-aw-mcpg/internal/strutil" ) -// Pre-compiled regexes for performance (avoid recompiling in hot paths). -var ( - // Timestamp patterns for log cleanup - // Pattern 1: ISO 8601 with T or space separator (e.g., "2024-01-01T12:00:00.123Z " or "2024-01-01 12:00:00 "). - timestampPattern1 = regexp.MustCompile(`^\d{4}-\d{2}-\d{2}[T\s]\d{2}:\d{2}:\d{2}(\.\d+)?([+-]\d{2}:\d{2}|Z)?\s*`) - // Pattern 2: Bracketed date-time (e.g., "[2024-01-01 12:00:00] "). - timestampPattern2 = regexp.MustCompile(`^\[\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2}:\d{2}\]\s*`) - // Pattern 3: Bracketed time only (e.g., "[12:00:00] "). - timestampPattern3 = regexp.MustCompile(`^\[\d{2}:\d{2}:\d{2}\]\s+`) - // Pattern 4: Time only with optional milliseconds (e.g., "12:00:00.123 "). - timestampPattern4 = regexp.MustCompile(`^\d{2}:\d{2}:\d{2}(\.\d+)?\s+`) - - // Log level pattern for message cleanup (case-insensitive). - logLevelPattern = regexp.MustCompile(`(?i)^\[?(ERROR|WARNING|WARN|INFO|DEBUG)\]?\s*[:-]?\s*`) -) - // truncateAndSanitize truncates the payload to max length and sanitizes secrets func truncateAndSanitize(payload string, maxLength int) string { // First sanitize secrets @@ -86,28 +67,3 @@ func extractEssentialFields(payload []byte) map[string]interface{} { return essential } - -// ExtractErrorMessage extracts a clean error message from a log line. -// It removes timestamps, log level prefixes, and other common noise. -// If the message is longer than 200 characters, it will be truncated. -func ExtractErrorMessage(line string) string { - // Remove common timestamp patterns using pre-compiled regexes - cleanedLine := line - cleanedLine = timestampPattern1.ReplaceAllString(cleanedLine, "") - cleanedLine = timestampPattern2.ReplaceAllString(cleanedLine, "") - cleanedLine = timestampPattern3.ReplaceAllString(cleanedLine, "") - cleanedLine = timestampPattern4.ReplaceAllString(cleanedLine, "") - - // Remove common log level prefixes using pre-compiled regex - cleanedLine = logLevelPattern.ReplaceAllString(cleanedLine, "") - - // Trim whitespace - cleanedLine = strings.TrimSpace(cleanedLine) - - // If the line is too long (>200 chars), truncate it - if len(cleanedLine) > 200 { - cleanedLine = strutil.Truncate(cleanedLine, 197) - } - - return cleanedLine -}