From 64201422810c4097aed78fad0400aee8587a518a Mon Sep 17 00:00:00 2001 From: Djordje Lukic Date: Wed, 20 May 2026 00:40:47 +0200 Subject: [PATCH] feat: add external coding harness agents Add support for harness-backed agents that delegate coding tasks to external coding CLIs through github.com/rumpl/harness. Harness agents can now be configured with a `harness` block in agent YAML and used as sub-agents by an orchestrator. The runtime detects harness-backed agents, builds a prompt from the current session, streams text/reasoning output back into docker-agent, records final assistant responses, and forwards usage when reported by the harness. Supported harness types: - claude-code - codex - pi - opencode Also update config validation and schema support so harness agents do not need docker-agent model providers, add an example configuration, and handle external harness tool call lifecycle events including partial tool calls and tool results.: Signed-off-by: Djordje Lukic --- agent-schema.json | 58 ++- examples/README.md | 2 + .../coding_harness_background_agents.yaml | 41 ++ examples/coding_harnesses.yaml | 20 + go.mod | 1 + go.sum | 2 + pkg/agent/agent.go | 13 + pkg/agent/opts.go | 11 + pkg/codingharness/provider.go | 71 +++ pkg/codingharness/provider_test.go | 32 ++ pkg/config/config_test.go | 43 ++ pkg/config/examples_test.go | 4 +- pkg/config/gather.go | 5 +- pkg/config/latest/types.go | 18 + pkg/config/latest/validate.go | 37 ++ pkg/config/overrides.go | 4 + pkg/config/schema_test.go | 1 + pkg/runtime/agent_delegation.go | 4 +- pkg/runtime/harness.go | 407 ++++++++++++++++++ pkg/runtime/harness_test.go | 190 ++++++++ pkg/runtime/loop.go | 5 + pkg/runtime/runtime.go | 11 +- pkg/teamloader/teamloader.go | 56 +-- pkg/teamloader/teamloader_test.go | 24 ++ 24 files changed, 1027 insertions(+), 33 deletions(-) create mode 100644 examples/coding_harness_background_agents.yaml create mode 100644 examples/coding_harnesses.yaml create mode 100644 pkg/codingharness/provider.go create mode 100644 pkg/codingharness/provider_test.go create mode 100644 pkg/runtime/harness.go create mode 100644 pkg/runtime/harness_test.go diff --git a/agent-schema.json b/agent-schema.json index 9ed93b491..6c8c3a0ab 100644 --- a/agent-schema.json +++ b/agent-schema.json @@ -2,7 +2,7 @@ "$schema": "http://json-schema.org/draft-07/schema#", "$id": "https://github.com/docker/docker-agent/blob/main/agent-schema.json", "title": "Docker Agent Configuration", - "description": "Configuration schema for Docker Agent v8", + "description": "Configuration schema for Docker Agent v9", "type": "object", "properties": { "version": { @@ -17,7 +17,8 @@ "5", "6", "7", - "8" + "8", + "9" ], "examples": [ "0", @@ -28,7 +29,8 @@ "5", "6", "7", - "8" + "8", + "9" ] }, "providers": { @@ -530,6 +532,10 @@ "type": "string", "description": "Instructions for the agent" }, + "harness": { + "$ref": "#/definitions/HarnessConfig", + "description": "External coding harness to run this agent with instead of a docker-agent model provider" + }, "code_mode_tools": { "type": "boolean", "description": "Enable Code Mode for tools" @@ -749,6 +755,52 @@ }, "additionalProperties": false }, + "HarnessConfig": { + "type": "object", + "description": "Configuration for running an agent through an external coding-agent CLI via github.com/rumpl/harness", + "properties": { + "type": { + "type": "string", + "description": "External coding harness provider to use", + "enum": [ + "claude-code", + "codex", + "pi", + "opencode" + ], + "examples": [ + "claude-code", + "codex" + ] + }, + "model": { + "type": "string", + "description": "Optional model name passed to the external harness. When omitted, the external CLI uses its own default model." + }, + "effort": { + "type": "string", + "description": "Claude Code effort level forwarded as --effort", + "enum": [ + "low", + "medium", + "high", + "max" + ] + }, + "agent": { + "type": "string", + "description": "opencode agent name forwarded as --agent" + }, + "thinking": { + "type": "boolean", + "description": "Enable opencode thinking output" + } + }, + "required": [ + "type" + ], + "additionalProperties": false + }, "FallbackConfig": { "type": "object", "description": "Configuration for fallback model behavior when the primary model fails", diff --git a/examples/README.md b/examples/README.md index 57acbf170..e3a640b64 100644 --- a/examples/README.md +++ b/examples/README.md @@ -159,6 +159,8 @@ remote MCP endpoints. | [`writer.yaml`](writer.yaml) | Story writing supervisor with specialized sub-agents. | | [`finance.yaml`](finance.yaml) | Financial research orchestrating analysts. | | [`background_agents.yaml`](background_agents.yaml) | Parallel research delegated to background sub-agents. | +| [`coding_harnesses.yaml`](coding_harnesses.yaml) | Orchestrator delegating coding tasks to external harness-backed sub-agents. | +| [`coding_harness_background_agents.yaml`](coding_harness_background_agents.yaml) | Orchestrator running external coding harnesses concurrently via background agents. | | [`dev-team.yaml`](dev-team.yaml) | Product-manager-led team (designer + engineer) with shared memory. | | [`multi-code.yaml`](multi-code.yaml) | Tech-lead routing tasks to a frontend and a Go expert. | | [`coder.yaml`](coder.yaml) | Coding agent with planner, implementer, and librarian sub-agents. | diff --git a/examples/coding_harness_background_agents.yaml b/examples/coding_harness_background_agents.yaml new file mode 100644 index 000000000..10b602f78 --- /dev/null +++ b/examples/coding_harness_background_agents.yaml @@ -0,0 +1,41 @@ +agents: + root: + model: anthropic/claude-sonnet-4-5 + description: Orchestrator that runs external coding harnesses in parallel + instruction: | + You coordinate coding work across external coding harnesses. + + When the user asks for work that can be split into independent tasks: + 1. Start multiple specialists with `run_background_agent` before waiting + for any one task to finish. + 2. Use `list_background_agents` to monitor running work. + 3. Use `view_background_agent` to inspect each task's output. + 4. Synthesize the results into a concise final answer with changed files, + tests run, and any follow-up work. + + Use claude-coder for broad refactors and multi-file changes. Use + codex-coder for focused implementation, test generation, or isolated fixes. + sub_agents: + - claude-coder + - codex-coder + toolsets: + - type: think + - type: background_agents + + claude-coder: + description: Claude Code running as an external coding harness + instruction: | + You are a senior software engineer. Complete the assigned coding task in + the repository. Keep changes focused, run relevant tests when possible, + and summarize files changed, validation performed, and any blockers. + harness: + type: claude-code + + codex-coder: + description: Codex running as an external coding harness + instruction: | + You are a pragmatic software engineer. Complete the assigned coding task + in the repository. Prefer small, well-tested changes and report the exact + validation commands you ran. + harness: + type: codex diff --git a/examples/coding_harnesses.yaml b/examples/coding_harnesses.yaml new file mode 100644 index 000000000..9ed6482ea --- /dev/null +++ b/examples/coding_harnesses.yaml @@ -0,0 +1,20 @@ +agents: + root: + model: anthropic/claude-sonnet-4-5 + description: Orchestrator + instruction: Route coding tasks to the appropriate specialist. + sub_agents: + - claude-coder + - codex-coder + + claude-coder: + description: Claude Code for complex refactors + instruction: You are a senior software engineer. + harness: + type: claude-code + + codex-coder: + description: Codex for code generation + instruction: You are a software engineer. + harness: + type: codex diff --git a/go.mod b/go.mod index 2e5512c0e..407c66bc1 100644 --- a/go.mod +++ b/go.mod @@ -53,6 +53,7 @@ require ( github.com/openai/openai-go/v3 v3.36.0 github.com/pb33f/libopenapi v0.36.4 github.com/rivo/uniseg v0.4.7 + github.com/rumpl/harness v0.0.0-20260519225334-1d956be4fff1 github.com/smacker/go-tree-sitter v0.0.0-20240827094217-dd81d9e9be82 github.com/spf13/cobra v1.10.2 github.com/stretchr/testify v1.11.1 diff --git a/go.sum b/go.sum index 6fd5d370d..a67665d05 100644 --- a/go.sum +++ b/go.sum @@ -454,6 +454,8 @@ github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ= github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88= github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ= github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc= +github.com/rumpl/harness v0.0.0-20260519225334-1d956be4fff1 h1:yyMEKlqFxa0ujeH4hrBD0rYj5TRD/h98zspUSbFaCl8= +github.com/rumpl/harness v0.0.0-20260519225334-1d956be4fff1/go.mod h1:D0KcsF5BBYJDBeIQYXMNZpGYFgGMeQ4uOKKX81SwUv0= github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= github.com/sebdah/goldie/v2 v2.8.0 h1:dZb9wR8q5++oplmEiJT+U/5KyotVD+HNGCAc5gNr8rc= github.com/sebdah/goldie/v2 v2.8.0/go.mod h1:oZ9fp0+se1eapSRjfYbsV/0Hqhbuu3bJVvKI/NNtssI= diff --git a/pkg/agent/agent.go b/pkg/agent/agent.go index d50a6c577..c98102e1d 100644 --- a/pkg/agent/agent.go +++ b/pkg/agent/agent.go @@ -42,6 +42,7 @@ type Agent struct { addPromptFiles []string tools []tools.Tool commands types.Commands + harness *latest.HarnessConfig hooks *latest.HooksConfig cache *cache.Cache @@ -160,6 +161,9 @@ func (a *Agent) Model(ctx context.Context) provider.Provider { selected = (*overrides)[rand.Intn(len(*overrides))] poolSize = len(*overrides) } else { + if len(a.models) == 0 { + return nil + } selected = a.models[rand.Intn(len(a.models))] poolSize = len(a.models) } @@ -268,6 +272,15 @@ func (a *Agent) Commands() types.Commands { return a.commands } +// Harness returns the external coding harness configuration for this agent. +func (a *Agent) Harness() *latest.HarnessConfig { + return a.harness +} + +func (a *Agent) HasHarness() bool { + return a.harness != nil +} + // Hooks returns the hooks configuration for this agent. func (a *Agent) Hooks() *latest.HooksConfig { return a.hooks diff --git a/pkg/agent/opts.go b/pkg/agent/opts.go index 9657d00d3..0b7e19232 100644 --- a/pkg/agent/opts.go +++ b/pkg/agent/opts.go @@ -170,6 +170,17 @@ func WithCommands(commands types.Commands) Opt { } } +func WithHarness(harness *latest.HarnessConfig) Opt { + return func(a *Agent) { + if harness == nil { + a.harness = nil + return + } + cfg := *harness + a.harness = &cfg + } +} + func WithLoadTimeWarnings(warnings []string) Opt { return func(a *Agent) { for _, w := range warnings { diff --git a/pkg/codingharness/provider.go b/pkg/codingharness/provider.go new file mode 100644 index 000000000..58f434df7 --- /dev/null +++ b/pkg/codingharness/provider.go @@ -0,0 +1,71 @@ +package codingharness + +import ( + "errors" + "fmt" + "strings" + + baseharness "github.com/rumpl/harness" + "github.com/rumpl/harness/claudecode" + "github.com/rumpl/harness/codex" + "github.com/rumpl/harness/opencode" + "github.com/rumpl/harness/pi" + + "github.com/docker/docker-agent/pkg/config/latest" +) + +const ( + TypeClaudeCode = "claude-code" + TypeCodex = "codex" + TypePi = "pi" + TypeOpenCode = "opencode" +) + +func NewProvider(cfg *latest.HarnessConfig) (baseharness.Provider, error) { + if cfg == nil { + return nil, errors.New("harness config is nil") + } + + switch cfg.Type { + case TypeClaudeCode: + return newClaudeCodeProvider(cfg), nil + case TypeCodex: + return codex.New(cfg.Model), nil + case TypePi: + return pi.New(cfg.Model), nil + case TypeOpenCode: + return newOpenCodeProvider(cfg), nil + default: + return nil, fmt.Errorf("unsupported harness type %q", cfg.Type) + } +} + +func Label(cfg *latest.HarnessConfig) string { + if cfg == nil { + return "" + } + model := strings.TrimSpace(cfg.Model) + if model == "" { + return cfg.Type + } + return cfg.Type + "/" + model +} + +func newClaudeCodeProvider(cfg *latest.HarnessConfig) baseharness.Provider { + var opts []claudecode.Option + if cfg.Effort != "" { + opts = append(opts, claudecode.WithEffort(claudecode.Effort(cfg.Effort))) + } + return claudecode.New(cfg.Model, opts...) +} + +func newOpenCodeProvider(cfg *latest.HarnessConfig) baseharness.Provider { + var opts []opencode.Option + if cfg.Agent != "" { + opts = append(opts, opencode.WithAgent(cfg.Agent)) + } + if cfg.Thinking { + opts = append(opts, opencode.WithThinking()) + } + return opencode.New(cfg.Model, opts...) +} diff --git a/pkg/codingharness/provider_test.go b/pkg/codingharness/provider_test.go new file mode 100644 index 000000000..33a3cca69 --- /dev/null +++ b/pkg/codingharness/provider_test.go @@ -0,0 +1,32 @@ +package codingharness + +import ( + "testing" + + "github.com/stretchr/testify/require" + + "github.com/docker/docker-agent/pkg/config/latest" +) + +func TestNewProviderOmitsModelFlagWhenModelEmpty(t *testing.T) { + p, err := NewProvider(&latest.HarnessConfig{Type: TypeCodex}) + require.NoError(t, err) + + cmd := p.PrintCommand("do it") + require.Contains(t, cmd, "codex exec --json") + require.NotContains(t, cmd, " -m ") +} + +func TestNewProviderUsesConfiguredModel(t *testing.T) { + p, err := NewProvider(&latest.HarnessConfig{Type: TypeClaudeCode, Model: "claude-sonnet-4-5", Effort: "high"}) + require.NoError(t, err) + + cmd := p.PrintCommand("do it") + require.Contains(t, cmd, "--model 'claude-sonnet-4-5'") + require.Contains(t, cmd, "--effort high") +} + +func TestLabel(t *testing.T) { + require.Equal(t, "codex", Label(&latest.HarnessConfig{Type: TypeCodex})) + require.Equal(t, "codex/gpt-5", Label(&latest.HarnessConfig{Type: TypeCodex, Model: "gpt-5"})) +} diff --git a/pkg/config/config_test.go b/pkg/config/config_test.go index d80d80490..87606d48a 100644 --- a/pkg/config/config_test.go +++ b/pkg/config/config_test.go @@ -466,6 +466,49 @@ func TestApplyModelOverrides(t *testing.T) { } } +func TestValidateConfig_HarnessAgentsSkipModelValidation(t *testing.T) { + t.Parallel() + + cfg := &latest.Config{ + Agents: []latest.AgentConfig{ + {Name: "root", Model: "openai/gpt-4o", SubAgents: []string{"coder"}}, + {Name: "coder", Harness: &latest.HarnessConfig{Type: "codex"}}, + }, + } + + require.NoError(t, validateConfig(cfg)) + _, exists := cfg.Models[""] + assert.False(t, exists) +} + +func TestValidateConfig_HarnessValidation(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + harness *latest.HarnessConfig + wantErr string + }{ + {name: "valid claude code", harness: &latest.HarnessConfig{Type: "claude-code", Effort: "high"}}, + {name: "missing type", harness: &latest.HarnessConfig{}, wantErr: "harness.type is required"}, + {name: "bad type", harness: &latest.HarnessConfig{Type: "vim"}, wantErr: "unsupported harness.type"}, + {name: "bad effort", harness: &latest.HarnessConfig{Type: "claude-code", Effort: "ultra"}, wantErr: "harness.effort must be one of"}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + cfg := &latest.Config{Agents: []latest.AgentConfig{{Name: "root", Harness: tt.harness}}} + err := cfg.Validate() + if tt.wantErr != "" { + require.ErrorContains(t, err, tt.wantErr) + } else { + require.NoError(t, err) + } + }) + } +} + func TestValidateConfig_ExternalSubAgentReferences(t *testing.T) { t.Parallel() diff --git a/pkg/config/examples_test.go b/pkg/config/examples_test.go index a31798a30..a297f65a5 100644 --- a/pkg/config/examples_test.go +++ b/pkg/config/examples_test.go @@ -53,7 +53,9 @@ func TestParseExamples(t *testing.T) { require.NotEmpty(t, cfg.Agents.First().Description, "Description should not be empty in %s", file) for _, agent := range cfg.Agents { - require.NotEmpty(t, agent.Model) + if agent.Harness == nil { + require.NotEmpty(t, agent.Model) + } require.NotEmpty(t, agent.Instruction, "Instruction should not be empty in %s", file) } diff --git a/pkg/config/gather.go b/pkg/config/gather.go index dcca91db6..a10a43a14 100644 --- a/pkg/config/gather.go +++ b/pkg/config/gather.go @@ -53,8 +53,11 @@ func gatherMissingEnvVars(ctx context.Context, cfg *latest.Config, modelsGateway func GatherEnvVarsForModels(cfg *latest.Config) []string { requiredEnv := map[string]bool{} - // Inspect only the models that are actually used by agents + // Inspect only the models that are actually used by docker-agent model-backed agents. for _, agent := range cfg.Agents { + if agent.Harness != nil { + continue + } modelNames := strings.SplitSeq(agent.Model, ",") for modelName := range modelNames { modelName = strings.TrimSpace(modelName) diff --git a/pkg/config/latest/types.go b/pkg/config/latest/types.go index 0eb22cb4e..1dc1a2737 100644 --- a/pkg/config/latest/types.go +++ b/pkg/config/latest/types.go @@ -368,6 +368,23 @@ func (d Duration) MarshalJSON() ([]byte, error) { return json.Marshal(d.String()) } +// HarnessConfig configures an agent that delegates execution to an external +// coding-agent CLI through github.com/rumpl/harness instead of using a +// docker-agent model provider. +type HarnessConfig struct { + // Type identifies the external harness provider: claude-code, codex, pi, or opencode. + Type string `json:"type,omitempty"` + // Model is passed to harnesses that accept a model flag. When omitted, + // docker-agent lets the external CLI use its own default model. + Model string `json:"model,omitempty"` + // Effort is forwarded to Claude Code's --effort flag. + Effort string `json:"effort,omitempty"` + // Agent is forwarded to opencode's --agent flag. + Agent string `json:"agent,omitempty"` + // Thinking enables opencode's --thinking flag. + Thinking bool `json:"thinking,omitempty"` +} + // AgentConfig represents a single agent configuration type AgentConfig struct { Name string @@ -377,6 +394,7 @@ type AgentConfig struct { WelcomeMessage string `json:"welcome_message,omitempty"` Toolsets []Toolset `json:"toolsets,omitempty"` Instruction string `json:"instruction,omitempty"` + Harness *HarnessConfig `json:"harness,omitempty"` SubAgents []string `json:"sub_agents,omitempty"` Handoffs []string `json:"handoffs,omitempty"` diff --git a/pkg/config/latest/validate.go b/pkg/config/latest/validate.go index 744d15d86..71db737b1 100644 --- a/pkg/config/latest/validate.go +++ b/pkg/config/latest/validate.go @@ -37,6 +37,9 @@ func (t *Config) Validate() error { if err := agent.validateFallback(); err != nil { return err } + if err := agent.validateHarness(); err != nil { + return err + } for j := range agent.Toolsets { if err := agent.Toolsets[j].validate(); err != nil { @@ -70,6 +73,40 @@ func (a *AgentConfig) validateFallback() error { return nil } +func (a *AgentConfig) validateHarness() error { + if a.Harness == nil { + return nil + } + + h := a.Harness + switch h.Type { + case "claude-code", "codex", "pi", "opencode": + case "": + return errors.New("harness.type is required") + default: + return fmt.Errorf("unsupported harness.type %q (must be one of: claude-code, codex, pi, opencode)", h.Type) + } + + if h.Effort != "" { + if h.Type != "claude-code" { + return errors.New("harness.effort can only be used with harness.type 'claude-code'") + } + switch h.Effort { + case "low", "medium", "high", "max": + default: + return errors.New("harness.effort must be one of: low, medium, high, max") + } + } + if h.Agent != "" && h.Type != "opencode" { + return errors.New("harness.agent can only be used with harness.type 'opencode'") + } + if h.Thinking && h.Type != "opencode" { + return errors.New("harness.thinking can only be used with harness.type 'opencode'") + } + + return nil +} + func (t *Toolset) validate() error { // Attributes used on the wrong toolset type. if len(t.Shell) > 0 && t.Type != "script" { diff --git a/pkg/config/overrides.go b/pkg/config/overrides.go index 842027ccd..735e124d8 100644 --- a/pkg/config/overrides.go +++ b/pkg/config/overrides.go @@ -84,6 +84,10 @@ func ensureModelsExist(cfg *latest.Config) error { // Expand alloy model compositions in agent model references and ensure resulting // referenced models exist. for _, agent := range cfg.Agents { + if agent.Harness != nil { + continue + } + expandedModel, err := expandAlloyModelRef(cfg, agent.Model) if err != nil { return fmt.Errorf("agent '%s': %w", agent.Name, err) diff --git a/pkg/config/schema_test.go b/pkg/config/schema_test.go index 44f8e95b1..2c3d6c981 100644 --- a/pkg/config/schema_test.go +++ b/pkg/config/schema_test.go @@ -87,6 +87,7 @@ func TestSchemaMatchesGoTypes(t *testing.T) { definitionMap := map[string]reflect.Type{ "AgentConfig": reflect.TypeFor[latest.AgentConfig](), "FallbackConfig": reflect.TypeFor[latest.FallbackConfig](), + "HarnessConfig": reflect.TypeFor[latest.HarnessConfig](), "ModelConfig": reflect.TypeFor[latest.ModelConfig](), "Metadata": reflect.TypeFor[latest.Metadata](), "ProviderConfig": reflect.TypeFor[latest.ProviderConfig](), diff --git a/pkg/runtime/agent_delegation.go b/pkg/runtime/agent_delegation.go index 5d0d04f17..7304bffd5 100644 --- a/pkg/runtime/agent_delegation.go +++ b/pkg/runtime/agent_delegation.go @@ -218,12 +218,12 @@ func (r *LocalRuntime) swapCurrentAgent(ctx context.Context, sessionID string, f evts.Emit(AgentSwitching(true, from.Name(), to.Name())) r.executeOnAgentSwitchHooks(ctx, from, sessionID, from.Name(), to.Name(), agentSwitchKindTransferTask) r.setCurrentAgent(to.Name()) - evts.Emit(AgentInfo(to.Name(), getAgentModelID(to).String(), to.Description(), to.WelcomeMessage())) + evts.Emit(AgentInfo(to.Name(), agentModelLabel(to), to.Description(), to.WelcomeMessage())) return func() { r.setCurrentAgent(from.Name()) evts.Emit(AgentSwitching(false, to.Name(), from.Name())) r.executeOnAgentSwitchHooks(ctx, from, sessionID, to.Name(), from.Name(), agentSwitchKindTransferTaskReturn) - evts.Emit(AgentInfo(from.Name(), getAgentModelID(from).String(), from.Description(), from.WelcomeMessage())) + evts.Emit(AgentInfo(from.Name(), agentModelLabel(from), from.Description(), from.WelcomeMessage())) } } diff --git a/pkg/runtime/harness.go b/pkg/runtime/harness.go new file mode 100644 index 000000000..431afc3fa --- /dev/null +++ b/pkg/runtime/harness.go @@ -0,0 +1,407 @@ +package runtime + +import ( + "context" + "encoding/json" + "fmt" + "log/slog" + "strings" + "time" + + baseharness "github.com/rumpl/harness" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/codes" + "go.opentelemetry.io/otel/trace" + + "github.com/docker/docker-agent/pkg/agent" + "github.com/docker/docker-agent/pkg/chat" + "github.com/docker/docker-agent/pkg/codingharness" + "github.com/docker/docker-agent/pkg/session" + "github.com/docker/docker-agent/pkg/tools" +) + +func (r *LocalRuntime) runHarnessAgent(ctx context.Context, sess *session.Session, a *agent.Agent, baseExtra []chat.Message, events EventSink) string { + ctx, span := r.startSpan(ctx, "runtime.harness", trace.WithAttributes(traceAttributesForHarness(sess, a)...)) + defer span.End() + + provider, err := codingharness.NewProvider(a.Harness()) + if err != nil { + msg := fmt.Sprintf("failed to configure harness: %v", err) + events.Emit(ErrorWithCode(ErrorCodeModelError, msg)) + r.notifyError(ctx, a, sess.ID, msg) + span.RecordError(err) + span.SetStatus(codes.Error, "harness configuration error") + return turnEndReasonError + } + + modelID := agentModelLabel(a) + events.Emit(AgentInfo(a.Name(), modelID, a.Description(), a.WelcomeMessage())) + + endReason := turnEndReasonNormal + defer func() { + if ctx.Err() != nil && endReason == turnEndReasonNormal { + endReason = turnEndReasonCanceled + } + r.executeTurnEndHooks(context.WithoutCancel(ctx), sess, a, endReason, events) + }() + + turnStartMsgs := r.executeTurnStartHooks(ctx, sess, a, events) + messages := sess.GetMessages(a, append(baseExtra, turnStartMsgs...)...) + stop, msg, rewritten := r.executeBeforeLLMCallHooks(ctx, sess, a, modelID, 1, messages) + if stop { + slog.WarnContext(ctx, "before_llm_call hook signalled run termination", + "agent", a.Name(), "session_id", sess.ID, "reason", msg) + r.emitHookDrivenShutdown(ctx, a, sess, msg, events) + endReason = turnEndReasonHookBlocked + return endReason + } + if rewritten != nil { + messages = rewritten + } + messages = r.applyBeforeLLMCallTransforms(ctx, sess, a, modelID, messages) + + prompt := harnessPromptFromMessages(messages) + var streamed strings.Builder + var finalResult string + var usage *chat.Usage + var cost float64 + toolCallSeq := 0 + pendingToolCalls := make(map[string]harnessToolCall) + startToolCall := func(ev baseharness.Event) harnessToolCall { + toolCallSeq++ + pending := newHarnessToolCall(toolCallSeq, ev, "") + pendingToolCalls[pending.key] = pending + events.Emit(PartialToolCall(pending.call, pending.definition, a.Name())) + return pending + } + emitToolCallDelta := func(ev baseharness.Event) { + if ev.ToolArgs == "" { + return + } + pending, ok := pendingToolCallForEvent(pendingToolCalls, ev) + if !ok { + if ev.ToolName == "" { + return + } + pending = startToolCall(ev) + } + events.Emit(PartialToolCall(tools.ToolCall{ + ID: pending.call.ID, + Type: pending.call.Type, + Function: tools.FunctionCall{ + Name: pending.call.Function.Name, + Arguments: ev.ToolArgs, + }, + }, tools.Tool{}, a.Name())) + } + completeToolCall := func(ev baseharness.Event) { + pending, ok := pendingToolCallForEvent(pendingToolCalls, ev) + if !ok { + return + } + result := harnessToolResult(ev) + events.Emit(ToolCallResponse(pending.call.ID, pending.definition, result, result.Output, a.Name())) + delete(pendingToolCalls, pending.key) + } + completeRemainingToolCalls := func(result *tools.ToolCallResult) { + if result == nil { + return + } + for key, pending := range pendingToolCalls { + events.Emit(ToolCallResponse(pending.call.ID, pending.definition, result, result.Output, a.Name())) + delete(pendingToolCalls, key) + } + } + + err = baseharness.Run(ctx, provider, prompt, func(ev baseharness.Event) { + switch ev.Type { + case baseharness.EventText: + if ev.Text == "" { + return + } + if isHarnessReplayText(streamed.String(), ev.Text) { + return + } + streamed.WriteString(ev.Text) + events.Emit(AgentChoice(a.Name(), sess.ID, ev.Text)) + case baseharness.EventReasoning: + if ev.Reasoning != "" { + events.Emit(AgentChoiceReasoning(a.Name(), sess.ID, ev.Reasoning)) + } + case baseharness.EventToolCallStart: + startToolCall(ev) + case baseharness.EventToolCallDelta: + emitToolCallDelta(ev) + case baseharness.EventToolCall: + if shouldSkipHarnessToolCall(ev) { + return + } + if pending, ok := pendingToolCallForEvent(pendingToolCalls, ev); ok { + if arguments := harnessToolCallArguments(ev); arguments != "" { + pending.call.Function.Arguments = arguments + pendingToolCalls[pending.key] = pending + } + events.Emit(ToolCall(pending.call, pending.definition, a.Name())) + return + } + toolCallSeq++ + pending := newHarnessToolCall(toolCallSeq, ev, harnessToolCallArguments(ev)) + pendingToolCalls[pending.key] = pending + events.Emit(ToolCall(pending.call, pending.definition, a.Name())) + case baseharness.EventToolResult: + completeToolCall(ev) + case baseharness.EventResult: + if ev.Result != "" { + finalResult = ev.Result + } + if ev.Usage != nil { + usage = harnessUsage(ev.Usage) + cost = ev.Usage.TotalCostUSD + } + } + }) + if err != nil { + if ctx.Err() != nil { + completeRemainingToolCalls(tools.ResultError("External harness was canceled.")) + span.RecordError(ctx.Err()) + span.SetStatus(codes.Error, "harness canceled") + endReason = turnEndReasonCanceled + return endReason + } + msg := fmt.Sprintf("harness %s failed: %v", provider.Name(), err) + completeRemainingToolCalls(tools.ResultError(msg)) + events.Emit(ErrorWithCode(ErrorCodeModelError, msg)) + r.notifyError(ctx, a, sess.ID, msg) + span.RecordError(err) + span.SetStatus(codes.Error, "harness run error") + endReason = turnEndReasonError + return endReason + } + + completeRemainingToolCalls(harnessToolCompletedResult()) + + content := strings.TrimSpace(streamed.String()) + if content == "" && strings.TrimSpace(finalResult) != "" { + content = strings.TrimSpace(finalResult) + events.Emit(AgentChoice(a.Name(), sess.ID, content)) + } + if content == "" { + content = strings.TrimSpace(finalResult) + } + + r.executeAfterLLMCallHooks(ctx, sess, a, content) + r.recordHarnessAssistantMessage(sess, a, content, modelID, usage, cost, events) + r.executeStopHooks(ctx, sess, a, content, events) + + span.SetAttributes(attribute.Int("content.length", len(content))) + span.SetStatus(codes.Ok, "harness completed") + return endReason +} + +func agentModelLabel(a *agent.Agent) string { + if a == nil { + return "" + } + if a.HasHarness() { + return codingharness.Label(a.Harness()) + } + return getAgentModelID(a).String() +} + +func traceAttributesForHarness(sess *session.Session, a *agent.Agent) []attribute.KeyValue { + return []attribute.KeyValue{ + attribute.String("agent", a.Name()), + attribute.String("session.id", sess.ID), + attribute.String("harness.type", a.Harness().Type), + } +} + +type harnessToolCall struct { + key string + call tools.ToolCall + definition tools.Tool +} + +func newHarnessToolCall(seq int, ev baseharness.Event, arguments string) harnessToolCall { + name := ev.ToolName + if name == "" { + name = "tool" + } + key := harnessToolEventID(ev) + callID := key + if callID == "" { + callID = fmt.Sprintf("harness-%d", seq) + key = callID + } else { + callID = "harness-" + callID + } + return harnessToolCall{ + key: key, + call: tools.ToolCall{ + ID: callID, + Type: "function", + Function: tools.FunctionCall{ + Name: name, + Arguments: arguments, + }, + }, + definition: tools.Tool{ + Name: name, + Category: "harness", + Description: "Tool call reported by an external coding harness", + }, + } +} + +func pendingToolCallForEvent(pending map[string]harnessToolCall, ev baseharness.Event) (harnessToolCall, bool) { + key := harnessToolEventID(ev) + if key != "" { + pending, ok := pending[key] + return pending, ok + } + if len(pending) != 1 { + return harnessToolCall{}, false + } + for _, pending := range pending { + return pending, true + } + return harnessToolCall{}, false +} + +func harnessToolResult(ev baseharness.Event) *tools.ToolCallResult { + output := ev.ToolOutput + if output == "" { + output = "Completed by external harness." + } + if ev.ToolError { + return tools.ResultError(output) + } + return tools.ResultSuccess(output) +} + +func harnessToolCompletedResult() *tools.ToolCallResult { + return tools.ResultSuccess("Completed by external harness.") +} + +func harnessToolCallArguments(ev baseharness.Event) string { + args := strings.TrimSpace(ev.ToolArgs) + if args == "" { + return "" + } + var obj map[string]any + if json.Unmarshal([]byte(args), &obj) == nil { + return args + } + wrapped, _ := json.Marshal(map[string]string{"input": ev.ToolArgs}) + return string(wrapped) +} + +func shouldSkipHarnessToolCall(ev baseharness.Event) bool { + return strings.TrimSpace(ev.ToolName) != "" && strings.TrimSpace(ev.ToolArgs) == "" && harnessToolEventID(ev) == "" +} + +func isHarnessReplayText(existing, next string) bool { + if existing == "" || next == "" { + return false + } + existing = normalizeHarnessText(existing) + next = normalizeHarnessText(next) + return next == existing +} + +func normalizeHarnessText(s string) string { + return strings.TrimSpace(strings.ReplaceAll(s, "\r\n", "\n")) +} + +func harnessToolEventID(ev baseharness.Event) string { + return ev.ToolID +} + +func harnessUsage(u *baseharness.Usage) *chat.Usage { + if u == nil { + return nil + } + return &chat.Usage{ + InputTokens: int64(u.InputTokens), + OutputTokens: int64(u.OutputTokens), + CachedInputTokens: int64(u.CacheReadInputTokens), + CacheWriteTokens: int64(u.CacheCreationInputTokens), + } +} + +func (r *LocalRuntime) recordHarnessAssistantMessage(sess *session.Session, a *agent.Agent, content, modelID string, usage *chat.Usage, cost float64, events EventSink) { + if strings.TrimSpace(content) == "" && usage == nil { + return + } + + msg := chat.Message{ + Role: chat.MessageRoleAssistant, + Content: content, + CreatedAt: r.now().Format(time.RFC3339), + Usage: usage, + Model: modelID, + Cost: cost, + FinishReason: chat.FinishReasonStop, + } + addAgentMessage(sess, a, &msg, events) + + if usage == nil { + return + } + input := usage.InputTokens + usage.CachedInputTokens + usage.CacheWriteTokens + sess.SetUsage(input, usage.OutputTokens) + msgUsage := &MessageUsage{ + Usage: *usage, + Cost: cost, + Model: modelID, + FinishReason: chat.FinishReasonStop, + } + usageEvent := SessionUsage(sess, 0) + usageEvent.LastMessage = msgUsage + events.Emit(NewTokenUsageEvent(sess.ID, a.Name(), usageEvent)) +} + +func harnessPromptFromMessages(messages []chat.Message) string { + var b strings.Builder + for _, msg := range messages { + content := harnessMessageContent(msg) + if strings.TrimSpace(content) == "" { + continue + } + fmt.Fprintf(&b, "<%s>\n%s\n\n\n", msg.Role, content, msg.Role) + } + return strings.TrimSpace(b.String()) +} + +func harnessMessageContent(msg chat.Message) string { + var parts []string + if msg.Content != "" { + parts = append(parts, msg.Content) + } + for _, part := range msg.MultiContent { + switch part.Type { + case chat.MessagePartTypeText: + if part.Text != "" { + parts = append(parts, part.Text) + } + case chat.MessagePartTypeFile: + if part.File != nil && part.File.Path != "" { + parts = append(parts, "Attached file: "+part.File.Path) + } + case chat.MessagePartTypeImageURL: + if part.ImageURL != nil && part.ImageURL.URL != "" { + parts = append(parts, "Attached image: "+part.ImageURL.URL) + } + case chat.MessagePartTypeDocument: + if part.Document == nil { + continue + } + if part.Document.Source.InlineText != "" { + parts = append(parts, fmt.Sprintf("Attached document %s:\n%s", part.Document.Name, part.Document.Source.InlineText)) + } else { + parts = append(parts, fmt.Sprintf("Attached document: %s (%s)", part.Document.Name, part.Document.MimeType)) + } + } + } + return strings.Join(parts, "\n\n") +} diff --git a/pkg/runtime/harness_test.go b/pkg/runtime/harness_test.go new file mode 100644 index 000000000..1792156de --- /dev/null +++ b/pkg/runtime/harness_test.go @@ -0,0 +1,190 @@ +package runtime + +import ( + "os" + "path/filepath" + stdruntime "runtime" + "strings" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/docker/docker-agent/pkg/agent" + "github.com/docker/docker-agent/pkg/config/latest" + "github.com/docker/docker-agent/pkg/session" + "github.com/docker/docker-agent/pkg/team" +) + +func TestHarnessAgentRunStream(t *testing.T) { + if stdruntime.GOOS == "windows" { + t.Skip("shell script shim test") + } + + binDir := t.TempDir() + writeHarnessScript(t, binDir, "codex", `#!/bin/sh +printf '%s\n' '{"type":"item.completed","item":{"type":"agent_message","text":"harness done"}}' +`) + t.Setenv("PATH", binDir+string(os.PathListSeparator)+os.Getenv("PATH")) + + rt := newHarnessRuntime(t, "codex") + sess := session.New(session.WithUserMessage("do the task")) + events := collectRuntimeEvents(t, rt, sess) + + assert.True(t, hasEventType(t, events, &AgentChoiceEvent{})) + assert.Equal(t, "harness done", sess.GetLastAssistantMessageContent()) + + var sawHarnessModel bool + for _, ev := range events { + if info, ok := ev.(*AgentInfoEvent); ok && info.Model == "codex" { + sawHarnessModel = true + } + } + assert.True(t, sawHarnessModel, "expected AgentInfo event with codex harness label") +} + +func TestHarnessToolCallCompletes(t *testing.T) { + if stdruntime.GOOS == "windows" { + t.Skip("shell script shim test") + } + + binDir := t.TempDir() + writeHarnessScript(t, binDir, "codex", `#!/bin/sh +printf '%s\n' '{"type":"item.started","item":{"type":"command_execution","command":"npm test"}}' +printf '%s\n' '{"type":"item.completed","item":{"type":"agent_message","text":"tests passed"}}' +`) + t.Setenv("PATH", binDir+string(os.PathListSeparator)+os.Getenv("PATH")) + + rt := newHarnessRuntime(t, "codex") + events := collectRuntimeEvents(t, rt, session.New(session.WithUserMessage("run tests"))) + + var toolCall *ToolCallEvent + var toolResponse *ToolCallResponseEvent + for _, ev := range events { + switch ev := ev.(type) { + case *ToolCallEvent: + toolCall = ev + case *ToolCallResponseEvent: + toolResponse = ev + } + } + require.NotNil(t, toolCall) + require.NotNil(t, toolResponse) + assert.Equal(t, toolCall.ToolCall.ID, toolResponse.ToolCallID) + require.NotNil(t, toolResponse.Result) + assert.False(t, toolResponse.Result.IsError) +} + +func TestHarnessShowsClaudeCodeToolCallAlongsideText(t *testing.T) { + if stdruntime.GOOS == "windows" { + t.Skip("shell script shim test") + } + + binDir := t.TempDir() + writeHarnessScript(t, binDir, "claude", `#!/bin/sh +printf '%s\n' '{"type":"assistant","message":{"content":[{"type":"text","text":"I will create the file."},{"type":"tool_use","id":"toolu_write","name":"Write","input":{"file_path":"/tmp/poem.md","content":"roses"}}]}}' +printf '%s\n' '{"type":"result","result":"created"}' +`) + t.Setenv("PATH", binDir+string(os.PathListSeparator)+os.Getenv("PATH")) + + rt := newHarnessRuntime(t, "claude-code") + events := collectRuntimeEvents(t, rt, session.New(session.WithUserMessage("write poem"))) + + var sawText bool + var toolCall *ToolCallEvent + for _, ev := range events { + switch ev := ev.(type) { + case *AgentChoiceEvent: + if strings.Contains(ev.Content, "I will create the file") { + sawText = true + } + case *ToolCallEvent: + toolCall = ev + } + } + assert.True(t, sawText) + require.NotNil(t, toolCall) + assert.Equal(t, "Write", toolCall.ToolCall.Function.Name) + assert.Contains(t, toolCall.ToolCall.Function.Arguments, "/tmp/poem.md") +} + +func TestHarnessSuppressesDuplicateClaudeCodeToolCall(t *testing.T) { + if stdruntime.GOOS == "windows" { + t.Skip("shell script shim test") + } + + binDir := t.TempDir() + writeHarnessScript(t, binDir, "claude", `#!/bin/sh +printf '%s\n' '{"type":"stream_event","event":{"type":"content_block_start","index":1,"content_block":{"type":"tool_use","id":"toolu_1","name":"Bash"}}}' +printf '%s\n' '{"type":"stream_event","event":{"type":"content_block_delta","index":1,"delta":{"type":"input_json_delta","partial_json":"{\"command\":\"uname -a\"}"}}}' +printf '%s\n' '{"type":"stream_event","event":{"type":"content_block_stop","index":1}}' +printf '%s\n' '{"type":"assistant","message":{"content":[{"type":"tool_use","id":"toolu_1","name":"Bash","input":{"command":"uname -a"}}]}}' +printf '%s\n' '{"type":"result","result":"done"}' +`) + t.Setenv("PATH", binDir+string(os.PathListSeparator)+os.Getenv("PATH")) + + rt := newHarnessRuntime(t, "claude-code") + events := collectRuntimeEvents(t, rt, session.New(session.WithUserMessage("run uname"))) + + var toolCalls []ToolCallEvent + var partialArgs strings.Builder + for _, ev := range events { + switch ev := ev.(type) { + case *ToolCallEvent: + toolCalls = append(toolCalls, *ev) + case *PartialToolCallEvent: + partialArgs.WriteString(ev.ToolCall.Function.Arguments) + } + } + require.Len(t, toolCalls, 1) + assert.Equal(t, "Bash", toolCalls[0].ToolCall.Function.Name) + assert.Contains(t, partialArgs.String(), "uname -a") +} + +func TestHarnessSuppressesReplayedClaudeCodeFinalText(t *testing.T) { + if stdruntime.GOOS == "windows" { + t.Skip("shell script shim test") + } + + binDir := t.TempDir() + writeHarnessScript(t, binDir, "claude", `#!/bin/sh +printf '%s\n' '{"type":"stream_event","event":{"type":"content_block_delta","delta":{"type":"text_delta","text":"Hello"}}}' +printf '%s\n' '{"type":"stream_event","event":{"type":"content_block_delta","delta":{"type":"text_delta","text":" world"}}}' +printf '%s\n' '{"type":"assistant","message":{"content":[{"type":"text","text":"Hello world"}]}}' +printf '%s\n' '{"type":"result","result":"Hello world"}' +`) + t.Setenv("PATH", binDir+string(os.PathListSeparator)+os.Getenv("PATH")) + + rt := newHarnessRuntime(t, "claude-code") + events := collectRuntimeEvents(t, rt, session.New(session.WithUserMessage("say hello"))) + + var chunks []string + for _, ev := range events { + if choice, ok := ev.(*AgentChoiceEvent); ok { + chunks = append(chunks, choice.Content) + } + } + assert.Equal(t, []string{"Hello", " world"}, chunks) +} + +func writeHarnessScript(t *testing.T, dir, name, content string) { + t.Helper() + require.NoError(t, os.WriteFile(filepath.Join(dir, name), []byte(content), 0o755)) +} + +func newHarnessRuntime(t *testing.T, harnessType string) *LocalRuntime { + t.Helper() + root := agent.New("root", "You are an external coder.", agent.WithHarness(&latest.HarnessConfig{Type: harnessType})) + rt, err := NewLocalRuntime(team.New(team.WithAgents(root)), WithSessionCompaction(false), WithModelStore(mockModelStore{})) + require.NoError(t, err) + return rt +} + +func collectRuntimeEvents(t *testing.T, rt *LocalRuntime, sess *session.Session) []Event { + t.Helper() + var events []Event + for ev := range rt.RunStream(t.Context(), sess) { + events = append(events, ev) + } + return events +} diff --git a/pkg/runtime/loop.go b/pkg/runtime/loop.go index e2cafbb94..aaa1ba6cd 100644 --- a/pkg/runtime/loop.go +++ b/pkg/runtime/loop.go @@ -264,6 +264,11 @@ func (r *LocalRuntime) runStreamLoop(ctx context.Context, sess *session.Session, r.finalizeEventChannel(ctx, sess, streamReason, prevElicitationCh, events) }() + if a.HasHarness() { + streamReason = r.runHarnessAgent(ctx, sess, a, slices.Concat(ls.sessionStartMsgs, ls.userPromptMsgs), sink) + return + } + // Response cache lookup. On a hit, replay the stored answer and // skip the model entirely. The matching storage half is // implemented as the cache_response stop-hook builtin (see diff --git a/pkg/runtime/runtime.go b/pkg/runtime/runtime.go index 534f776db..8c81081a7 100644 --- a/pkg/runtime/runtime.go +++ b/pkg/runtime/runtime.go @@ -550,7 +550,7 @@ func NewLocalRuntime(agents *team.Team, opts ...Opt) (*LocalRuntime, error) { return nil, err } - if defaultAgent.Model(context.TODO()) == nil { + if defaultAgent.Model(context.TODO()) == nil && !defaultAgent.HasHarness() { return nil, fmt.Errorf("agent %s has no valid model", defaultAgent.Name()) } @@ -855,6 +855,9 @@ func (r *LocalRuntime) TitleGenerator() *sessiontitle.Generator { // getAgentModelID returns the model ID for an agent. The zero ID is // returned when no model is configured. func getAgentModelID(a *agent.Agent) modelsdev.ID { + if a == nil { + return modelsdev.ID{} + } if model := a.Model(context.TODO()); model != nil { return model.ID() } @@ -1013,7 +1016,11 @@ func (r *LocalRuntime) EmitStartupInfo(ctx context.Context, sess *session.Sessio // Emit agent and team information immediately for fast sidebar display // Use getEffectiveModelID to account for active fallback cooldowns modelID := r.getEffectiveModelID(a) - if !send(AgentInfo(a.Name(), modelID.String(), a.Description(), a.WelcomeMessage())) { + modelLabel := modelID.String() + if a.HasHarness() { + modelLabel = agentModelLabel(a) + } + if !send(AgentInfo(a.Name(), modelLabel, a.Description(), a.WelcomeMessage())) { return } if !send(TeamInfo(r.agentDetailsFromTeam(), r.CurrentAgentName())) { diff --git a/pkg/teamloader/teamloader.go b/pkg/teamloader/teamloader.go index ba74d6329..a7fd35a03 100644 --- a/pkg/teamloader/teamloader.go +++ b/pkg/teamloader/teamloader.go @@ -182,33 +182,41 @@ func LoadWithConfig(ctx context.Context, agentSource config.Source, runConfig *c opts = append(opts, agent.WithCache(c)) } - models, err := getModelsForAgent(ctx, cfg, &agentConfig, autoModel, runConfig) - if err != nil { - // Return auto model fallback errors and DMR not installed errors directly - // without wrapping to provide cleaner messages - if _, ok := errors.AsType[*config.AutoModelFallbackError](err); ok || errors.Is(err, dmr.ErrNotInstalled) { - return nil, err + if agentConfig.Harness != nil { + harnessCfg := *agentConfig.Harness + if harnessCfg.Model == "" { + harnessCfg.Model = agentConfig.Model } - return nil, fmt.Errorf("failed to get models: %w", err) - } - for _, model := range models { - opts = append(opts, agent.WithModel(model)) - } - - // Load fallback models if configured - fallbackModelRefs := agentConfig.GetFallbackModels() - if len(fallbackModelRefs) > 0 { - fallbackModels, err := getFallbackModelsForAgent(ctx, cfg, &agentConfig, runConfig) + opts = append(opts, agent.WithHarness(&harnessCfg)) + } else { + models, err := getModelsForAgent(ctx, cfg, &agentConfig, autoModel, runConfig) if err != nil { - return nil, fmt.Errorf("failed to get fallback models: %w", err) + // Return auto model fallback errors and DMR not installed errors directly + // without wrapping to provide cleaner messages + if _, ok := errors.AsType[*config.AutoModelFallbackError](err); ok || errors.Is(err, dmr.ErrNotInstalled) { + return nil, err + } + return nil, fmt.Errorf("failed to get models: %w", err) + } + for _, model := range models { + opts = append(opts, agent.WithModel(model)) } - for _, model := range fallbackModels { - opts = append(opts, agent.WithFallbackModel(model)) + + // Load fallback models if configured + fallbackModelRefs := agentConfig.GetFallbackModels() + if len(fallbackModelRefs) > 0 { + fallbackModels, err := getFallbackModelsForAgent(ctx, cfg, &agentConfig, runConfig) + if err != nil { + return nil, fmt.Errorf("failed to get fallback models: %w", err) + } + for _, model := range fallbackModels { + opts = append(opts, agent.WithFallbackModel(model)) + } + opts = append(opts, + agent.WithFallbackRetries(agentConfig.GetFallbackRetries()), + agent.WithFallbackCooldown(agentConfig.GetFallbackCooldown()), + ) } - opts = append(opts, - agent.WithFallbackRetries(agentConfig.GetFallbackRetries()), - agent.WithFallbackCooldown(agentConfig.GetFallbackCooldown()), - ) } agentTools, warnings := getToolsForAgent(ctx, &agentConfig, parentDir, runConfig, loadOpts.toolsetRegistry, configName, expander) @@ -267,7 +275,7 @@ func LoadWithConfig(ctx context.Context, agentSource config.Source, runConfig *c // Build agent default models map agentDefaultModels := make(map[string]string) for _, agent := range cfg.Agents { - if agent.Model != "" { + if agent.Harness == nil && agent.Model != "" { agentDefaultModels[agent.Name] = agent.Model } } diff --git a/pkg/teamloader/teamloader_test.go b/pkg/teamloader/teamloader_test.go index 6381ca5a5..a7a89f08a 100644 --- a/pkg/teamloader/teamloader_test.go +++ b/pkg/teamloader/teamloader_test.go @@ -202,6 +202,30 @@ func TestOverrideModel(t *testing.T) { } } +func TestLoadHarnessAgentWithoutModel(t *testing.T) { + t.Setenv("OPENAI_API_KEY", "dummy") + + data := []byte(`agents: + root: + model: openai/gpt-4o + sub_agents: [coder] + coder: + description: External coder + instruction: You are a coding agent. + harness: + type: codex +`) + + team, err := Load(t.Context(), config.NewBytesSource("harness.yaml", data), &config.RuntimeConfig{}) + require.NoError(t, err) + + coder, err := team.Agent("coder") + require.NoError(t, err) + require.True(t, coder.HasHarness()) + require.Equal(t, "codex", coder.Harness().Type) + require.Nil(t, coder.Model(t.Context())) +} + func TestToolsetInstructions(t *testing.T) { t.Setenv("OPENAI_API_KEY", "dummy")