From aa23fd5c8e327037a32e82424428cd533bd5ac27 Mon Sep 17 00:00:00 2001 From: Md Yunus Date: Sat, 9 May 2026 18:41:42 +0530 Subject: [PATCH 1/5] feat: built-in large tool output handling Add a new builtin hook that automatically handles large tool responses. When enabled, tool responses exceeding the threshold are saved to disk and replaced with a pointer that the agent can read back using shell tools. This prevents large MCP tool responses from exhausting the model's context window while still allowing the agent to access the full data. The feature is configured at the agent level: handle_large_tool_output: enabled: true threshold: 5000 # characters (default) output_dir: /tmp # default: os.TempDir() preview_size: 3000 # chars in preview (default) This is a root-cause fix for #2722 - it addresses both the lack of MCP output limits and context window overflow issues. --- agent-schema.json | 24 +++++ pkg/agent/agent.go | 7 ++ pkg/agent/opts.go | 10 ++ pkg/config/latest/types.go | 46 +++++++--- pkg/hooks/builtins/builtins.go | 13 +++ .../builtins/handle_large_tool_output.go | 91 +++++++++++++++++++ pkg/runtime/hooks.go | 9 +- pkg/teamloader/teamloader.go | 1 + 8 files changed, 184 insertions(+), 17 deletions(-) create mode 100644 pkg/hooks/builtins/handle_large_tool_output.go diff --git a/agent-schema.json b/agent-schema.json index 33b549c2e..4cf8b8bd9 100644 --- a/agent-schema.json +++ b/agent-schema.json @@ -434,6 +434,30 @@ "type": "boolean", "description": "When true, the runtime auto-installs the redact_secrets builtin on all three of pre_tool_use (scrubs detected secrets from tool arguments), before_llm_call (scrubs the messages sent to the LLM), and tool_response_transform (scrubs tool output before it reaches event consumers, the persisted session, the post_tool_use hook input, or the next LLM call). The same hook entries can be authored directly in YAML for finer-grained control — see the hooks.tool_response_transform / hooks.before_llm_call / hooks.pre_tool_use sections. Detection uses the docker-agent secretsscan ruleset (GitHub PATs, AWS keys, Stripe / Slack / GitLab tokens, JWTs, private keys, etc.). Each detected span is replaced with the literal '[REDACTED]'." }, + "handle_large_tool_output": { + "type": "object", + "description": "When enabled, tool responses exceeding the threshold are saved to disk and replaced with a pointer that the agent can read back using shell tools (e.g. cat). This prevents large MCP tool responses from exhausting the model's context window while still allowing the agent to access the full data.", + "properties": { + "enabled": { + "type": "boolean", + "description": "Whether to enable large tool output handling. Default: false." + }, + "threshold": { + "type": "integer", + "description": "Character count threshold above which tool output is saved to disk. Default: 5000.", + "minimum": 0 + }, + "output_dir": { + "type": "string", + "description": "Directory where large outputs are saved. Default: system temp directory." + }, + "preview_size": { + "type": "integer", + "description": "Number of characters to include in the preview shown to the agent. Default: 3000.", + "minimum": 0 + } + } + }, "max_iterations": { "type": "integer", "description": "Maximum number of iterations", diff --git a/pkg/agent/agent.go b/pkg/agent/agent.go index f4c236a12..aa98464a7 100644 --- a/pkg/agent/agent.go +++ b/pkg/agent/agent.go @@ -44,6 +44,7 @@ type Agent struct { commands types.Commands hooks *latest.HooksConfig cache *cache.Cache + handleLargeToolOutput *latest.HandleLargeToolOutputConfig // warningsMu guards pendingWarnings. AddToolWarning and DrainWarnings // may be called concurrently from the runtime loop, the MCP server, @@ -95,6 +96,12 @@ func (a *Agent) RedactSecrets() bool { return a.redactSecrets } +// HandleLargeToolOutput returns the configuration for large tool output +// handling, or nil if not configured. +func (a *Agent) HandleLargeToolOutput() *latest.HandleLargeToolOutputConfig { + return a.handleLargeToolOutput +} + func (a *Agent) MaxIterations() int { return a.maxIterations } diff --git a/pkg/agent/opts.go b/pkg/agent/opts.go index 9657d00d3..e1ba9ad25 100644 --- a/pkg/agent/opts.go +++ b/pkg/agent/opts.go @@ -120,6 +120,16 @@ func WithRedactSecrets(redactSecrets bool) Opt { } } +// WithHandleLargeToolOutput configures automatic handling of large tool +// responses. When enabled, tool responses exceeding the threshold are saved +// to disk and replaced with a pointer that the agent can read back using shell +// tools. +func WithHandleLargeToolOutput(cfg *latest.HandleLargeToolOutputConfig) Opt { + return func(a *Agent) { + a.handleLargeToolOutput = cfg + } +} + func WithAddDescriptionParameter(addDescriptionParameter bool) Opt { return func(a *Agent) { a.addDescriptionParameter = addDescriptionParameter diff --git a/pkg/config/latest/types.go b/pkg/config/latest/types.go index bfac3d6a1..1f5b56672 100644 --- a/pkg/config/latest/types.go +++ b/pkg/config/latest/types.go @@ -391,19 +391,39 @@ type AgentConfig struct { // hook entries by hand — the runtime auto-injects them when this // flag is true. See pkg/hooks/builtins/redact_secrets.go for the // hook-side implementation. - RedactSecrets bool `json:"redact_secrets,omitempty"` - CodeModeTools bool `json:"code_mode_tools,omitempty"` - AddDescriptionParameter bool `json:"add_description_parameter,omitempty"` - MaxIterations int `json:"max_iterations,omitempty"` - MaxConsecutiveToolCalls int `json:"max_consecutive_tool_calls,omitempty"` - MaxOldToolCallTokens int `json:"max_old_tool_call_tokens,omitempty"` - NumHistoryItems int `json:"num_history_items,omitempty"` - AddPromptFiles []string `json:"add_prompt_files,omitempty" yaml:"add_prompt_files,omitempty"` - Commands types.Commands `json:"commands,omitempty"` - StructuredOutput *StructuredOutput `json:"structured_output,omitempty"` - Skills SkillsConfig `json:"skills,omitzero"` - Hooks *HooksConfig `json:"hooks,omitempty"` - Cache *CacheConfig `json:"cache,omitempty"` + RedactSecrets bool `json:"redact_secrets,omitempty"` + CodeModeTools bool `json:"code_mode_tools,omitempty"` + AddDescriptionParameter bool `json:"add_description_parameter,omitempty"` + MaxIterations int `json:"max_iterations,omitempty"` + MaxConsecutiveToolCalls int `json:"max_consecutive_tool_calls,omitempty"` + MaxOldToolCallTokens int `json:"max_old_tool_call_tokens,omitempty"` + NumHistoryItems int `json:"num_history_items,omitempty"` + AddPromptFiles []string `json:"add_prompt_files,omitempty" yaml:"add_prompt_files,omitempty"` + Commands types.Commands `json:"commands,omitempty"` + StructuredOutput *StructuredOutput `json:"structured_output,omitempty"` + Skills SkillsConfig `json:"skills,omitzero"` + Hooks *HooksConfig `json:"hooks,omitempty"` + Cache *CacheConfig `json:"cache,omitempty"` + HandleLargeToolOutput *HandleLargeToolOutputConfig `json:"handle_large_tool_output,omitempty"` +} + +// HandleLargeToolOutputConfig configures automatic handling of large tool +// responses. When enabled, tool responses exceeding the threshold are saved +// to disk and replaced with a pointer that the agent can read back using shell +// tools. This prevents large MCP tool responses from exhausting the model's +// context window while still allowing the agent to access the full data. +type HandleLargeToolOutputConfig struct { + // Enabled controls whether large tool output handling is active. + Enabled bool `json:"enabled,omitempty" yaml:"enabled,omitempty"` + // Threshold is the character count above which tool output is saved + // to disk. Default: 5000. + Threshold int `json:"threshold,omitempty" yaml:"threshold,omitempty"` + // OutputDir is the directory where large outputs are saved. Default: + // system temp directory. + OutputDir string `json:"output_dir,omitempty" yaml:"output_dir,omitempty"` + // PreviewSize is the number of characters to include in the preview + // shown to the agent. Default: 3000. + PreviewSize int `json:"preview_size,omitempty" yaml:"preview_size,omitempty"` } // CacheConfig configures the agent's response cache. When set and Enabled diff --git a/pkg/hooks/builtins/builtins.go b/pkg/hooks/builtins/builtins.go index 520ac4383..3c82a8dc6 100644 --- a/pkg/hooks/builtins/builtins.go +++ b/pkg/hooks/builtins/builtins.go @@ -49,8 +49,10 @@ package builtins import ( + "encoding/json" "errors" + "github.com/docker/docker-agent/pkg/config/latest" "github.com/docker/docker-agent/pkg/hooks" ) @@ -71,6 +73,7 @@ func Register(r *hooks.Registry) error { r.RegisterBuiltin(AddRecentCommits, addRecentCommits), r.RegisterBuiltin(MaxIterations, maxIterations), r.RegisterBuiltin(RedactSecrets, redactSecrets), + r.RegisterBuiltin(HandleLargeToolOutput, handleLargeToolOutput), r.RegisterBuiltin(HTTPPost, httpPost), ) } @@ -88,6 +91,9 @@ type AgentDefaults struct { // makes the auto-injection idempotent against an explicit YAML // entry that already names the same builtin. RedactSecrets bool + // HandleLargeToolOutput auto-injects the handle_large_tool_output + // builtin under tool_response_transform when configured. + HandleLargeToolOutput *latest.HandleLargeToolOutputConfig } // AutoInjector adds default hooks to an agent's hook configuration. @@ -142,6 +148,13 @@ func ApplyAgentDefaults(cfg *hooks.Config, d AgentDefaults) *hooks.Config { Hooks: []hooks.Hook{builtinHook(RedactSecrets)}, }) } + if d.HandleLargeToolOutput != nil && d.HandleLargeToolOutput.Enabled { + cfgBytes, _ := json.Marshal(d.HandleLargeToolOutput) + cfg.ToolResponseTransform = append(cfg.ToolResponseTransform, hooks.MatcherConfig{ + Matcher: "*", + Hooks: []hooks.Hook{builtinHook(HandleLargeToolOutput, string(cfgBytes))}, + }) + } if cfg.IsEmpty() { return nil } diff --git a/pkg/hooks/builtins/handle_large_tool_output.go b/pkg/hooks/builtins/handle_large_tool_output.go new file mode 100644 index 000000000..7a55e283e --- /dev/null +++ b/pkg/hooks/builtins/handle_large_tool_output.go @@ -0,0 +1,91 @@ +package builtins + +import ( + "context" + "encoding/json" + "fmt" + "os" + "path/filepath" + + "github.com/docker/docker-agent/pkg/hooks" +) + +const HandleLargeToolOutput = "handle_large_tool_output" + +func handleLargeToolOutput(ctx context.Context, in *hooks.Input, args []string) (*hooks.Output, error) { + if in == nil { + return nil, nil + } + + if in.HookEventName != hooks.EventToolResponseTransform { + return nil, nil + } + + response, ok := in.ToolResponse.(string) + if !ok || response == "" { + return nil, nil + } + + cfg := parseArgs(args) + threshold := cfg.Threshold + if threshold == 0 { + threshold = 5000 + } + + if len(response) <= threshold { + return nil, nil + } + + outputDir := cfg.OutputDir + if outputDir == "" { + outputDir = os.TempDir() + } + + if err := os.MkdirAll(outputDir, 0o750); err != nil { + return nil, fmt.Errorf("create output directory: %w", err) + } + + filename := fmt.Sprintf("%s_%s.txt", in.SessionID, in.ToolUseID) + path := filepath.Join(outputDir, filename) + + if err := os.WriteFile(path, []byte(response), 0o640); err != nil { + return nil, fmt.Errorf("write output file: %w", err) + } + + previewSize := cfg.PreviewSize + if previewSize == 0 { + previewSize = 3000 + } + preview := response + if len(preview) > previewSize { + preview = response[:previewSize] + } + + pointer := fmt.Sprintf("[%s response: %d chars, full output saved to %s]\n\nFirst %d chars:\n%s\n\n[Use shell tool to read: cat %s]", + in.ToolName, len(response), path, previewSize, preview, path) + + return &hooks.Output{ + HookSpecificOutput: &hooks.HookSpecificOutput{ + HookEventName: hooks.EventToolResponseTransform, + UpdatedToolResponse: &pointer, + }, + }, nil +} + +type toolOutputConfig struct { + Threshold int + OutputDir string + PreviewSize int +} + +func parseArgs(args []string) toolOutputConfig { + if len(args) == 0 { + return toolOutputConfig{} + } + + var cfg toolOutputConfig + if err := json.Unmarshal([]byte(args[0]), &cfg); err != nil { + return toolOutputConfig{} + } + return cfg +} diff --git a/pkg/runtime/hooks.go b/pkg/runtime/hooks.go index 158cacc55..e69476601 100644 --- a/pkg/runtime/hooks.go +++ b/pkg/runtime/hooks.go @@ -35,10 +35,11 @@ func (r *LocalRuntime) buildHooksExecutors() { continue } cfg := builtins.ApplyAgentDefaults(a.Hooks(), builtins.AgentDefaults{ - AddDate: a.AddDate(), - AddEnvironmentInfo: a.AddEnvironmentInfo(), - AddPromptFiles: a.AddPromptFiles(), - RedactSecrets: a.RedactSecrets(), + AddDate: a.AddDate(), + AddEnvironmentInfo: a.AddEnvironmentInfo(), + AddPromptFiles: a.AddPromptFiles(), + RedactSecrets: a.RedactSecrets(), + HandleLargeToolOutput: a.HandleLargeToolOutput(), }) cfg = applyAutoInjectors(cfg, r.autoInjectors) cfg = applyCacheDefault(cfg, a) diff --git a/pkg/teamloader/teamloader.go b/pkg/teamloader/teamloader.go index 34191e3e2..dfa1027d6 100644 --- a/pkg/teamloader/teamloader.go +++ b/pkg/teamloader/teamloader.go @@ -165,6 +165,7 @@ func LoadWithConfig(ctx context.Context, agentSource config.Source, runConfig *c agent.WithAddEnvironmentInfo(agentConfig.AddEnvironmentInfo), agent.WithAddDescriptionParameter(agentConfig.AddDescriptionParameter), agent.WithRedactSecrets(agentConfig.RedactSecrets), + agent.WithHandleLargeToolOutput(agentConfig.HandleLargeToolOutput), agent.WithAddPromptFiles(promptFiles), agent.WithMaxIterations(agentConfig.MaxIterations), agent.WithMaxConsecutiveToolCalls(agentConfig.MaxConsecutiveToolCalls), From c78377ab6edbbc2721c0a0f45b5b5ef041b588b5 Mon Sep 17 00:00:00 2001 From: Md Yunus Date: Mon, 11 May 2026 23:44:04 +0530 Subject: [PATCH 2/5] feat: add tests and fix permissions for handle_large_tool_output - Add unit tests for handle_large_tool_output builtin covering: - Response under threshold passes through unchanged - Response over threshold saves to disk with pointer returned - Custom output_dir and preview_size configuration - Fallback to os.TempDir() when output_dir not set - No-op on non tool_response_transform events - No-op on nil/empty/non-string tool responses - Error propagation on WriteFile failure - ApplyAgentDefaults injection behavior (enabled/disabled/nil) - Builtin registration check - Fix file permissions: 0o640 -> 0o600 (tool output may contain sensitive data) --- .../builtins/handle_large_tool_output.go | 2 +- .../builtins/handle_large_tool_output_test.go | 251 ++++++++++++++++++ 2 files changed, 252 insertions(+), 1 deletion(-) create mode 100644 pkg/hooks/builtins/handle_large_tool_output_test.go diff --git a/pkg/hooks/builtins/handle_large_tool_output.go b/pkg/hooks/builtins/handle_large_tool_output.go index 7a55e283e..2eb183661 100644 --- a/pkg/hooks/builtins/handle_large_tool_output.go +++ b/pkg/hooks/builtins/handle_large_tool_output.go @@ -48,7 +48,7 @@ func handleLargeToolOutput(ctx context.Context, in *hooks.Input, args []string) filename := fmt.Sprintf("%s_%s.txt", in.SessionID, in.ToolUseID) path := filepath.Join(outputDir, filename) - if err := os.WriteFile(path, []byte(response), 0o640); err != nil { + if err := os.WriteFile(path, []byte(response), 0o600); err != nil { return nil, fmt.Errorf("write output file: %w", err) } diff --git a/pkg/hooks/builtins/handle_large_tool_output_test.go b/pkg/hooks/builtins/handle_large_tool_output_test.go new file mode 100644 index 000000000..f01d622cb --- /dev/null +++ b/pkg/hooks/builtins/handle_large_tool_output_test.go @@ -0,0 +1,251 @@ +package builtins + +import ( + "encoding/json" + "os" + "path/filepath" + "strings" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/docker/docker-agent/pkg/config/latest" + "github.com/docker/docker-agent/pkg/hooks" +) + +func TestHandleLargeToolOutputPassThroughUnderThreshold(t *testing.T) { + t.Parallel() + + cfg := toolOutputConfig{Threshold: 5000} + args, _ := json.Marshal(cfg) + + out, err := handleLargeToolOutput(t.Context(), &hooks.Input{ + HookEventName: hooks.EventToolResponseTransform, + ToolName: "mcp_tool", + ToolUseID: "use_123", + SessionID: "session_456", + ToolResponse: strings.Repeat("a", 1000), + }, []string{string(args)}) + require.NoError(t, err) + assert.Nil(t, out, "response under threshold must pass through unchanged") +} + +func TestHandleLargeToolOutputSavesToDiskOverThreshold(t *testing.T) { + t.Parallel() + + tmpDir := t.TempDir() + cfg := toolOutputConfig{Threshold: 100, OutputDir: tmpDir} + args, _ := json.Marshal(cfg) + + largeResponse := strings.Repeat("x", 500) + + out, err := handleLargeToolOutput(t.Context(), &hooks.Input{ + HookEventName: hooks.EventToolResponseTransform, + ToolName: "mcp_tool", + ToolUseID: "use_789", + SessionID: "session_abc", + ToolResponse: largeResponse, + }, []string{string(args)}) + require.NoError(t, err) + require.NotNil(t, out, "response over threshold must produce output") + require.NotNil(t, out.HookSpecificOutput) + require.NotNil(t, out.HookSpecificOutput.UpdatedToolResponse) + + pointer := *out.HookSpecificOutput.UpdatedToolResponse + assert.Contains(t, pointer, "mcp_tool response: 500 chars") + assert.Contains(t, pointer, tmpDir) + assert.Contains(t, pointer, "session_abc_use_789.txt") + assert.Contains(t, pointer, "Use shell tool to read: cat") + + filePath := filepath.Join(tmpDir, "session_abc_use_789.txt") + data, err := os.ReadFile(filePath) + require.NoError(t, err) + assert.Equal(t, largeResponse, string(data)) +} + +func TestHandleLargeToolOutputNoArgsUsesDefaultThreshold(t *testing.T) { + t.Parallel() + + out, err := handleLargeToolOutput(t.Context(), &hooks.Input{ + HookEventName: hooks.EventToolResponseTransform, + ToolName: "mcp_tool", + ToolUseID: "use_123", + SessionID: "session_456", + ToolResponse: strings.Repeat("x", 10000), + }, nil) + require.NoError(t, err) + assert.NotNil(t, out, "no args means default threshold (5000) is used — response 10000 > 5000, so it must be processed") +} + +func TestHandleLargeToolOutputFallsBackToTempDir(t *testing.T) { + t.Parallel() + + cfg := toolOutputConfig{Threshold: 50} + args, _ := json.Marshal(cfg) + + out, err := handleLargeToolOutput(t.Context(), &hooks.Input{ + HookEventName: hooks.EventToolResponseTransform, + ToolName: "mcp_tool", + ToolUseID: "use_999", + SessionID: "session_xyz", + ToolResponse: strings.Repeat("y", 500), + }, []string{string(args)}) + require.NoError(t, err) + require.NotNil(t, out) + require.NotNil(t, out.HookSpecificOutput) + require.NotNil(t, out.HookSpecificOutput.UpdatedToolResponse) + + pointer := *out.HookSpecificOutput.UpdatedToolResponse + tmpDir := os.TempDir() + assert.Contains(t, pointer, tmpDir, "must fall back to os.TempDir() when output_dir not set") +} + +func TestHandleLargeToolOutputCustomPreviewSize(t *testing.T) { + t.Parallel() + + tmpDir := t.TempDir() + cfg := toolOutputConfig{Threshold: 50, OutputDir: tmpDir, PreviewSize: 100} + args, _ := json.Marshal(cfg) + + response := strings.Repeat("z", 500) + + out, err := handleLargeToolOutput(t.Context(), &hooks.Input{ + HookEventName: hooks.EventToolResponseTransform, + ToolName: "mcp_tool", + ToolUseID: "use_preview", + SessionID: "session_preview", + ToolResponse: response, + }, []string{string(args)}) + require.NoError(t, err) + require.NotNil(t, out) + require.NotNil(t, out.HookSpecificOutput) + require.NotNil(t, out.HookSpecificOutput.UpdatedToolResponse) + + pointer := *out.HookSpecificOutput.UpdatedToolResponse + assert.Contains(t, pointer, "First 100 chars:") + + parts := strings.Split(pointer, "First 100 chars:\n") + require.Len(t, parts, 2, "pointer must have preview section") + previewContent := strings.Split(parts[1], "\n\n[Use shell")[0] + assert.Len(t, previewContent, 100, "preview content must be exactly 100 chars") +} + +func TestHandleLargeToolOutputNonToolResponseTransformEventIsNoOp(t *testing.T) { + t.Parallel() + + out, err := handleLargeToolOutput(t.Context(), &hooks.Input{ + HookEventName: hooks.EventPreToolUse, + ToolName: "shell", + ToolResponse: strings.Repeat("x", 10000), + }, nil) + require.NoError(t, err) + assert.Nil(t, out, "non tool_response_transform event must be no-op") +} + +func TestHandleLargeToolOutputNilInput(t *testing.T) { + t.Parallel() + + out, err := handleLargeToolOutput(t.Context(), nil, nil) + require.NoError(t, err) + assert.Nil(t, out) +} + +func TestHandleLargeToolOutputNonStringResponseIsNoOp(t *testing.T) { + t.Parallel() + + out, err := handleLargeToolOutput(t.Context(), &hooks.Input{ + HookEventName: hooks.EventToolResponseTransform, + ToolName: "mcp_tool", + ToolResponse: map[string]any{"structured": "payload"}, + }, nil) + require.NoError(t, err) + assert.Nil(t, out, "non-string ToolResponse must be no-op") +} + +func TestHandleLargeToolOutputEmptyStringResponseIsNoOp(t *testing.T) { + t.Parallel() + + out, err := handleLargeToolOutput(t.Context(), &hooks.Input{ + HookEventName: hooks.EventToolResponseTransform, + ToolName: "mcp_tool", + ToolResponse: "", + }, nil) + require.NoError(t, err) + assert.Nil(t, out, "empty string response must be no-op") +} + +func TestHandleLargeToolOutputWriteFailureIsPropagated(t *testing.T) { + t.Parallel() + + cfg := toolOutputConfig{Threshold: 10, OutputDir: "/nonexistent/path"} + args, _ := json.Marshal(cfg) + + _, err := handleLargeToolOutput(t.Context(), &hooks.Input{ + HookEventName: hooks.EventToolResponseTransform, + ToolName: "mcp_tool", + ToolUseID: "use_err", + SessionID: "session_err", + ToolResponse: strings.Repeat("x", 500), + }, []string{string(args)}) + require.Error(t, err) + assert.Contains(t, err.Error(), "create output directory") +} + +func TestHandleLargeToolOutputIsRegistered(t *testing.T) { + t.Parallel() + + reg := hooks.NewRegistry() + require.NoError(t, Register(reg)) + + handler, ok := reg.LookupBuiltin(HandleLargeToolOutput) + require.Truef(t, ok, "builtin %q must be registered", HandleLargeToolOutput) + + cfg := toolOutputConfig{Threshold: 50} + args, _ := json.Marshal(cfg) + + out, err := handler(t.Context(), &hooks.Input{ + HookEventName: hooks.EventToolResponseTransform, + ToolName: "mcp_tool", + ToolUseID: "use_reg", + SessionID: "session_reg", + ToolResponse: strings.Repeat("x", 500), + }, []string{string(args)}) + require.NoError(t, err) + require.NotNil(t, out) + require.NotNil(t, out.HookSpecificOutput) + require.NotNil(t, out.HookSpecificOutput.UpdatedToolResponse) + assert.Contains(t, *out.HookSpecificOutput.UpdatedToolResponse, "mcp_tool") +} + +func TestApplyAgentDefaultsInjectsHandleLargeToolOutput(t *testing.T) { + t.Parallel() + + cfg := ApplyAgentDefaults(nil, AgentDefaults{ + HandleLargeToolOutput: &latest.HandleLargeToolOutputConfig{Enabled: true}, + }) + require.NotNil(t, cfg) + require.Len(t, cfg.ToolResponseTransform, 1) + assert.Equal(t, "*", cfg.ToolResponseTransform[0].Matcher) + require.Len(t, cfg.ToolResponseTransform[0].Hooks, 1) + assert.Equal(t, hooks.HookTypeBuiltin, cfg.ToolResponseTransform[0].Hooks[0].Type) + assert.Equal(t, HandleLargeToolOutput, cfg.ToolResponseTransform[0].Hooks[0].Command) +} + +func TestApplyAgentDefaultsDoesNotInjectWhenDisabled(t *testing.T) { + t.Parallel() + + cfg := ApplyAgentDefaults(nil, AgentDefaults{ + HandleLargeToolOutput: &latest.HandleLargeToolOutputConfig{Enabled: false}, + }) + assert.Nil(t, cfg, "disabled config must not inject hooks (returns nil)") +} + +func TestApplyAgentDefaultsDoesNotInjectWhenNil(t *testing.T) { + t.Parallel() + + cfg := ApplyAgentDefaults(nil, AgentDefaults{ + HandleLargeToolOutput: nil, + }) + assert.Nil(t, cfg, "nil config must return nil (no hooks to inject)") +} From 1a44f61cb9da579ecc2528d18c2e860d4791d7b4 Mon Sep 17 00:00:00 2001 From: Md Yunus Date: Mon, 11 May 2026 23:45:34 +0530 Subject: [PATCH 3/5] chore: align default threshold with builtin tool limit (30k) Updates the default threshold from 5000 to 30000 to match the existing 30,000-char limit on builtin tools (shell, openapi, api). This ensures consistent behavior across all tool types when the feature is enabled. --- pkg/hooks/builtins/handle_large_tool_output.go | 2 +- pkg/hooks/builtins/handle_large_tool_output_test.go | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pkg/hooks/builtins/handle_large_tool_output.go b/pkg/hooks/builtins/handle_large_tool_output.go index 2eb183661..8c4f1a799 100644 --- a/pkg/hooks/builtins/handle_large_tool_output.go +++ b/pkg/hooks/builtins/handle_large_tool_output.go @@ -29,7 +29,7 @@ func handleLargeToolOutput(ctx context.Context, in *hooks.Input, args []string) cfg := parseArgs(args) threshold := cfg.Threshold if threshold == 0 { - threshold = 5000 + threshold = 30000 } if len(response) <= threshold { diff --git a/pkg/hooks/builtins/handle_large_tool_output_test.go b/pkg/hooks/builtins/handle_large_tool_output_test.go index f01d622cb..31d88ed0a 100644 --- a/pkg/hooks/builtins/handle_large_tool_output_test.go +++ b/pkg/hooks/builtins/handle_large_tool_output_test.go @@ -17,7 +17,7 @@ import ( func TestHandleLargeToolOutputPassThroughUnderThreshold(t *testing.T) { t.Parallel() - cfg := toolOutputConfig{Threshold: 5000} + cfg := toolOutputConfig{Threshold: 10000} args, _ := json.Marshal(cfg) out, err := handleLargeToolOutput(t.Context(), &hooks.Input{ @@ -72,10 +72,10 @@ func TestHandleLargeToolOutputNoArgsUsesDefaultThreshold(t *testing.T) { ToolName: "mcp_tool", ToolUseID: "use_123", SessionID: "session_456", - ToolResponse: strings.Repeat("x", 10000), + ToolResponse: strings.Repeat("x", 50000), }, nil) require.NoError(t, err) - assert.NotNil(t, out, "no args means default threshold (5000) is used — response 10000 > 5000, so it must be processed") + assert.NotNil(t, out, "no args means default threshold (30000) is used — response 50000 > 30000, so it must be processed") } func TestHandleLargeToolOutputFallsBackToTempDir(t *testing.T) { From 3872496b8a34ae6611d94b688016c46685ee4a5b Mon Sep 17 00:00:00 2001 From: Md Yunus Date: Tue, 12 May 2026 03:32:27 +0530 Subject: [PATCH 4/5] security: fix path traversal in handle_large_tool_output Sanitize SessionID and ToolUseID before constructing file paths to prevent path traversal attacks. An attacker controlling an MCP tool server could inject '../' sequences to write files outside the configured output directory. Fix: replace '..' with '__' and '/' with '_' in both values before path construction. Added tests for path traversal blocking and sanitizeFilename edge cases. Resolves: Path Traversal in Large Tool Output File Handling (High) --- .../builtins/handle_large_tool_output.go | 12 ++++- .../builtins/handle_large_tool_output_test.go | 52 +++++++++++++++++++ 2 files changed, 63 insertions(+), 1 deletion(-) diff --git a/pkg/hooks/builtins/handle_large_tool_output.go b/pkg/hooks/builtins/handle_large_tool_output.go index 8c4f1a799..e92269e60 100644 --- a/pkg/hooks/builtins/handle_large_tool_output.go +++ b/pkg/hooks/builtins/handle_large_tool_output.go @@ -6,6 +6,7 @@ import ( "fmt" "os" "path/filepath" + "strings" "github.com/docker/docker-agent/pkg/hooks" ) @@ -45,7 +46,9 @@ func handleLargeToolOutput(ctx context.Context, in *hooks.Input, args []string) return nil, fmt.Errorf("create output directory: %w", err) } - filename := fmt.Sprintf("%s_%s.txt", in.SessionID, in.ToolUseID) + filename := fmt.Sprintf("%s_%s.txt", + sanitizeFilename(in.SessionID), + sanitizeFilename(in.ToolUseID)) path := filepath.Join(outputDir, filename) if err := os.WriteFile(path, []byte(response), 0o600); err != nil { @@ -89,3 +92,10 @@ func parseArgs(args []string) toolOutputConfig { } return cfg } + +func sanitizeFilename(name string) string { + name = strings.ReplaceAll(name, "..", "__") + name = strings.ReplaceAll(name, "/", "_") + name = strings.ReplaceAll(name, "\\", "_") + return name +} diff --git a/pkg/hooks/builtins/handle_large_tool_output_test.go b/pkg/hooks/builtins/handle_large_tool_output_test.go index 31d88ed0a..e491f5bfc 100644 --- a/pkg/hooks/builtins/handle_large_tool_output_test.go +++ b/pkg/hooks/builtins/handle_large_tool_output_test.go @@ -249,3 +249,55 @@ func TestApplyAgentDefaultsDoesNotInjectWhenNil(t *testing.T) { }) assert.Nil(t, cfg, "nil config must return nil (no hooks to inject)") } + +func TestHandleLargeToolOutputPathTraversalIsBlocked(t *testing.T) { + t.Parallel() + + tmpDir := t.TempDir() + cfg := toolOutputConfig{Threshold: 50, OutputDir: tmpDir} + args, _ := json.Marshal(cfg) + + out, err := handleLargeToolOutput(t.Context(), &hooks.Input{ + HookEventName: hooks.EventToolResponseTransform, + ToolName: "mcp_tool", + ToolUseID: "../../../etc/cron.d/malicious", + SessionID: "session_../../../tmp", + ToolResponse: strings.Repeat("z", 500), + }, []string{string(args)}) + require.NoError(t, err) + require.NotNil(t, out) + require.NotNil(t, out.HookSpecificOutput) + require.NotNil(t, out.HookSpecificOutput.UpdatedToolResponse) + + pointer := *out.HookSpecificOutput.UpdatedToolResponse + assert.Contains(t, pointer, tmpDir, "path must resolve to configured output directory") + assert.Contains(t, pointer, "__", ".. must be replaced to prevent traversal") + + parentDir, err := filepath.Abs("..") + require.NoError(t, err) + _ = parentDir + assert.NotContains(t, pointer, "/../", "no path traversal sequences allowed") +} + +func TestSanitizeFilename(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + input string + expected string + }{ + {"simple", "session123", "session123"}, + {"with slash", "session/123", "session_123"}, + {"with backslash", "session\\123", "session_123"}, + {"path traversal", "../../../etc/passwd", "__etc_passwd"}, + {"mixed", "path/to/../../../etc", "path_to____etc"}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := sanitizeFilename(tt.input) + assert.Equal(t, tt.expected, result) + }) + } +} From 82f8a4f38023c8921385686aa0751a8f2abdd8e3 Mon Sep 17 00:00:00 2001 From: Md Yunus Date: Tue, 12 May 2026 13:00:55 +0530 Subject: [PATCH 5/5] fix: update test expectations for sanitizeFilename MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The function replaces '..' first (becoming '__'), then '/' becomes '_', so '../../../etc/passwd' → '_________etc_passwd' (3 dots → 3 underscores, then 3 slashes → 3 underscores). Aligning test expectations with actual function behavior. --- pkg/hooks/builtins/handle_large_tool_output_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/hooks/builtins/handle_large_tool_output_test.go b/pkg/hooks/builtins/handle_large_tool_output_test.go index e491f5bfc..195931559 100644 --- a/pkg/hooks/builtins/handle_large_tool_output_test.go +++ b/pkg/hooks/builtins/handle_large_tool_output_test.go @@ -290,8 +290,8 @@ func TestSanitizeFilename(t *testing.T) { {"simple", "session123", "session123"}, {"with slash", "session/123", "session_123"}, {"with backslash", "session\\123", "session_123"}, - {"path traversal", "../../../etc/passwd", "__etc_passwd"}, - {"mixed", "path/to/../../../etc", "path_to____etc"}, + {"path traversal", "../../../etc/passwd", "_________etc_passwd"}, + {"mixed", "path/to/../../../etc", "path_to__________etc"}, } for _, tt := range tests {