From 64201422810c4097aed78fad0400aee8587a518a Mon Sep 17 00:00:00 2001
From: Djordje Lukic <djordje.lukic@docker.com>
Date: Wed, 20 May 2026 00:40:47 +0200
Subject: [PATCH] feat: add external coding harness agents

Add support for harness-backed agents that delegate coding tasks to external
coding CLIs through github.com/rumpl/harness.

Harness agents can now be configured with a `harness` block in agent YAML and
used as sub-agents by an orchestrator. The runtime detects harness-backed
agents, builds a prompt from the current session, streams text/reasoning output
back into docker-agent, records final assistant responses, and forwards usage
when reported by the harness.

Supported harness types:
- claude-code
- codex
- pi
- opencode

Also update config validation and schema support so harness agents do not need
docker-agent model providers, add an example configuration, and handle external
harness tool call lifecycle events including partial tool calls and tool
results.:

Signed-off-by: Djordje Lukic <djordje.lukic@docker.com>
---
 agent-schema.json                             |  58 ++-
 examples/README.md                            |   2 +
 .../coding_harness_background_agents.yaml     |  41 ++
 examples/coding_harnesses.yaml                |  20 +
 go.mod                                        |   1 +
 go.sum                                        |   2 +
 pkg/agent/agent.go                            |  13 +
 pkg/agent/opts.go                             |  11 +
 pkg/codingharness/provider.go                 |  71 +++
 pkg/codingharness/provider_test.go            |  32 ++
 pkg/config/config_test.go                     |  43 ++
 pkg/config/examples_test.go                   |   4 +-
 pkg/config/gather.go                          |   5 +-
 pkg/config/latest/types.go                    |  18 +
 pkg/config/latest/validate.go                 |  37 ++
 pkg/config/overrides.go                       |   4 +
 pkg/config/schema_test.go                     |   1 +
 pkg/runtime/agent_delegation.go               |   4 +-
 pkg/runtime/harness.go                        | 407 ++++++++++++++++++
 pkg/runtime/harness_test.go                   | 190 ++++++++
 pkg/runtime/loop.go                           |   5 +
 pkg/runtime/runtime.go                        |  11 +-
 pkg/teamloader/teamloader.go                  |  56 +--
 pkg/teamloader/teamloader_test.go             |  24 ++
 24 files changed, 1027 insertions(+), 33 deletions(-)
 create mode 100644 examples/coding_harness_background_agents.yaml
 create mode 100644 examples/coding_harnesses.yaml
 create mode 100644 pkg/codingharness/provider.go
 create mode 100644 pkg/codingharness/provider_test.go
 create mode 100644 pkg/runtime/harness.go
 create mode 100644 pkg/runtime/harness_test.go

diff --git a/agent-schema.json b/agent-schema.json
index 9ed93b491..6c8c3a0ab 100644
--- a/agent-schema.json
+++ b/agent-schema.json
@@ -2,7 +2,7 @@
   "$schema": "http://json-schema.org/draft-07/schema#",
   "$id": "https://github.com/docker/docker-agent/blob/main/agent-schema.json",
   "title": "Docker Agent Configuration",
-  "description": "Configuration schema for Docker Agent v8",
+  "description": "Configuration schema for Docker Agent v9",
   "type": "object",
   "properties": {
     "version": {
@@ -17,7 +17,8 @@
         "5",
         "6",
         "7",
-        "8"
+        "8",
+        "9"
       ],
       "examples": [
         "0",
@@ -28,7 +29,8 @@
         "5",
         "6",
         "7",
-        "8"
+        "8",
+        "9"
       ]
     },
     "providers": {
@@ -530,6 +532,10 @@
           "type": "string",
           "description": "Instructions for the agent"
         },
+        "harness": {
+          "$ref": "#/definitions/HarnessConfig",
+          "description": "External coding harness to run this agent with instead of a docker-agent model provider"
+        },
         "code_mode_tools": {
           "type": "boolean",
           "description": "Enable Code Mode for tools"
@@ -749,6 +755,52 @@
       },
       "additionalProperties": false
     },
+    "HarnessConfig": {
+      "type": "object",
+      "description": "Configuration for running an agent through an external coding-agent CLI via github.com/rumpl/harness",
+      "properties": {
+        "type": {
+          "type": "string",
+          "description": "External coding harness provider to use",
+          "enum": [
+            "claude-code",
+            "codex",
+            "pi",
+            "opencode"
+          ],
+          "examples": [
+            "claude-code",
+            "codex"
+          ]
+        },
+        "model": {
+          "type": "string",
+          "description": "Optional model name passed to the external harness. When omitted, the external CLI uses its own default model."
+        },
+        "effort": {
+          "type": "string",
+          "description": "Claude Code effort level forwarded as --effort",
+          "enum": [
+            "low",
+            "medium",
+            "high",
+            "max"
+          ]
+        },
+        "agent": {
+          "type": "string",
+          "description": "opencode agent name forwarded as --agent"
+        },
+        "thinking": {
+          "type": "boolean",
+          "description": "Enable opencode thinking output"
+        }
+      },
+      "required": [
+        "type"
+      ],
+      "additionalProperties": false
+    },
     "FallbackConfig": {
       "type": "object",
       "description": "Configuration for fallback model behavior when the primary model fails",
diff --git a/examples/README.md b/examples/README.md
index 57acbf170..e3a640b64 100644
--- a/examples/README.md
+++ b/examples/README.md
@@ -159,6 +159,8 @@ remote MCP endpoints.
 | [`writer.yaml`](writer.yaml) | Story writing supervisor with specialized sub-agents. |
 | [`finance.yaml`](finance.yaml) | Financial research orchestrating analysts. |
 | [`background_agents.yaml`](background_agents.yaml) | Parallel research delegated to background sub-agents. |
+| [`coding_harnesses.yaml`](coding_harnesses.yaml) | Orchestrator delegating coding tasks to external harness-backed sub-agents. |
+| [`coding_harness_background_agents.yaml`](coding_harness_background_agents.yaml) | Orchestrator running external coding harnesses concurrently via background agents. |
 | [`dev-team.yaml`](dev-team.yaml) | Product-manager-led team (designer + engineer) with shared memory. |
 | [`multi-code.yaml`](multi-code.yaml) | Tech-lead routing tasks to a frontend and a Go expert. |
 | [`coder.yaml`](coder.yaml) | Coding agent with planner, implementer, and librarian sub-agents. |
diff --git a/examples/coding_harness_background_agents.yaml b/examples/coding_harness_background_agents.yaml
new file mode 100644
index 000000000..10b602f78
--- /dev/null
+++ b/examples/coding_harness_background_agents.yaml
@@ -0,0 +1,41 @@
+agents:
+  root:
+    model: anthropic/claude-sonnet-4-5
+    description: Orchestrator that runs external coding harnesses in parallel
+    instruction: |
+      You coordinate coding work across external coding harnesses.
+
+      When the user asks for work that can be split into independent tasks:
+      1. Start multiple specialists with `run_background_agent` before waiting
+         for any one task to finish.
+      2. Use `list_background_agents` to monitor running work.
+      3. Use `view_background_agent` to inspect each task's output.
+      4. Synthesize the results into a concise final answer with changed files,
+         tests run, and any follow-up work.
+
+      Use claude-coder for broad refactors and multi-file changes. Use
+      codex-coder for focused implementation, test generation, or isolated fixes.
+    sub_agents:
+      - claude-coder
+      - codex-coder
+    toolsets:
+      - type: think
+      - type: background_agents
+
+  claude-coder:
+    description: Claude Code running as an external coding harness
+    instruction: |
+      You are a senior software engineer. Complete the assigned coding task in
+      the repository. Keep changes focused, run relevant tests when possible,
+      and summarize files changed, validation performed, and any blockers.
+    harness:
+      type: claude-code
+
+  codex-coder:
+    description: Codex running as an external coding harness
+    instruction: |
+      You are a pragmatic software engineer. Complete the assigned coding task
+      in the repository. Prefer small, well-tested changes and report the exact
+      validation commands you ran.
+    harness:
+      type: codex
diff --git a/examples/coding_harnesses.yaml b/examples/coding_harnesses.yaml
new file mode 100644
index 000000000..9ed6482ea
--- /dev/null
+++ b/examples/coding_harnesses.yaml
@@ -0,0 +1,20 @@
+agents:
+  root:
+    model: anthropic/claude-sonnet-4-5
+    description: Orchestrator
+    instruction: Route coding tasks to the appropriate specialist.
+    sub_agents:
+      - claude-coder
+      - codex-coder
+
+  claude-coder:
+    description: Claude Code for complex refactors
+    instruction: You are a senior software engineer.
+    harness:
+      type: claude-code
+
+  codex-coder:
+    description: Codex for code generation
+    instruction: You are a software engineer.
+    harness:
+      type: codex
diff --git a/go.mod b/go.mod
index 2e5512c0e..407c66bc1 100644
--- a/go.mod
+++ b/go.mod
@@ -53,6 +53,7 @@ require (
 	github.com/openai/openai-go/v3 v3.36.0
 	github.com/pb33f/libopenapi v0.36.4
 	github.com/rivo/uniseg v0.4.7
+	github.com/rumpl/harness v0.0.0-20260519225334-1d956be4fff1
 	github.com/smacker/go-tree-sitter v0.0.0-20240827094217-dd81d9e9be82
 	github.com/spf13/cobra v1.10.2
 	github.com/stretchr/testify v1.11.1
diff --git a/go.sum b/go.sum
index 6fd5d370d..a67665d05 100644
--- a/go.sum
+++ b/go.sum
@@ -454,6 +454,8 @@ github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ=
 github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88=
 github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ=
 github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc=
+github.com/rumpl/harness v0.0.0-20260519225334-1d956be4fff1 h1:yyMEKlqFxa0ujeH4hrBD0rYj5TRD/h98zspUSbFaCl8=
+github.com/rumpl/harness v0.0.0-20260519225334-1d956be4fff1/go.mod h1:D0KcsF5BBYJDBeIQYXMNZpGYFgGMeQ4uOKKX81SwUv0=
 github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
 github.com/sebdah/goldie/v2 v2.8.0 h1:dZb9wR8q5++oplmEiJT+U/5KyotVD+HNGCAc5gNr8rc=
 github.com/sebdah/goldie/v2 v2.8.0/go.mod h1:oZ9fp0+se1eapSRjfYbsV/0Hqhbuu3bJVvKI/NNtssI=
diff --git a/pkg/agent/agent.go b/pkg/agent/agent.go
index d50a6c577..c98102e1d 100644
--- a/pkg/agent/agent.go
+++ b/pkg/agent/agent.go
@@ -42,6 +42,7 @@ type Agent struct {
 	addPromptFiles          []string
 	tools                   []tools.Tool
 	commands                types.Commands
+	harness                 *latest.HarnessConfig
 	hooks                   *latest.HooksConfig
 	cache                   *cache.Cache
 
@@ -160,6 +161,9 @@ func (a *Agent) Model(ctx context.Context) provider.Provider {
 		selected = (*overrides)[rand.Intn(len(*overrides))]
 		poolSize = len(*overrides)
 	} else {
+		if len(a.models) == 0 {
+			return nil
+		}
 		selected = a.models[rand.Intn(len(a.models))]
 		poolSize = len(a.models)
 	}
@@ -268,6 +272,15 @@ func (a *Agent) Commands() types.Commands {
 	return a.commands
 }
 
+// Harness returns the external coding harness configuration for this agent.
+func (a *Agent) Harness() *latest.HarnessConfig {
+	return a.harness
+}
+
+func (a *Agent) HasHarness() bool {
+	return a.harness != nil
+}
+
 // Hooks returns the hooks configuration for this agent.
 func (a *Agent) Hooks() *latest.HooksConfig {
 	return a.hooks
diff --git a/pkg/agent/opts.go b/pkg/agent/opts.go
index 9657d00d3..0b7e19232 100644
--- a/pkg/agent/opts.go
+++ b/pkg/agent/opts.go
@@ -170,6 +170,17 @@ func WithCommands(commands types.Commands) Opt {
 	}
 }
 
+func WithHarness(harness *latest.HarnessConfig) Opt {
+	return func(a *Agent) {
+		if harness == nil {
+			a.harness = nil
+			return
+		}
+		cfg := *harness
+		a.harness = &cfg
+	}
+}
+
 func WithLoadTimeWarnings(warnings []string) Opt {
 	return func(a *Agent) {
 		for _, w := range warnings {
diff --git a/pkg/codingharness/provider.go b/pkg/codingharness/provider.go
new file mode 100644
index 000000000..58f434df7
--- /dev/null
+++ b/pkg/codingharness/provider.go
@@ -0,0 +1,71 @@
+package codingharness
+
+import (
+	"errors"
+	"fmt"
+	"strings"
+
+	baseharness "github.com/rumpl/harness"
+	"github.com/rumpl/harness/claudecode"
+	"github.com/rumpl/harness/codex"
+	"github.com/rumpl/harness/opencode"
+	"github.com/rumpl/harness/pi"
+
+	"github.com/docker/docker-agent/pkg/config/latest"
+)
+
+const (
+	TypeClaudeCode = "claude-code"
+	TypeCodex      = "codex"
+	TypePi         = "pi"
+	TypeOpenCode   = "opencode"
+)
+
+func NewProvider(cfg *latest.HarnessConfig) (baseharness.Provider, error) {
+	if cfg == nil {
+		return nil, errors.New("harness config is nil")
+	}
+
+	switch cfg.Type {
+	case TypeClaudeCode:
+		return newClaudeCodeProvider(cfg), nil
+	case TypeCodex:
+		return codex.New(cfg.Model), nil
+	case TypePi:
+		return pi.New(cfg.Model), nil
+	case TypeOpenCode:
+		return newOpenCodeProvider(cfg), nil
+	default:
+		return nil, fmt.Errorf("unsupported harness type %q", cfg.Type)
+	}
+}
+
+func Label(cfg *latest.HarnessConfig) string {
+	if cfg == nil {
+		return ""
+	}
+	model := strings.TrimSpace(cfg.Model)
+	if model == "" {
+		return cfg.Type
+	}
+	return cfg.Type + "/" + model
+}
+
+func newClaudeCodeProvider(cfg *latest.HarnessConfig) baseharness.Provider {
+	var opts []claudecode.Option
+	if cfg.Effort != "" {
+		opts = append(opts, claudecode.WithEffort(claudecode.Effort(cfg.Effort)))
+	}
+	return claudecode.New(cfg.Model, opts...)
+}
+
+func newOpenCodeProvider(cfg *latest.HarnessConfig) baseharness.Provider {
+	var opts []opencode.Option
+	if cfg.Agent != "" {
+		opts = append(opts, opencode.WithAgent(cfg.Agent))
+	}
+	if cfg.Thinking {
+		opts = append(opts, opencode.WithThinking())
+	}
+	return opencode.New(cfg.Model, opts...)
+}
diff --git a/pkg/codingharness/provider_test.go b/pkg/codingharness/provider_test.go
new file mode 100644
index 000000000..33a3cca69
--- /dev/null
+++ b/pkg/codingharness/provider_test.go
@@ -0,0 +1,32 @@
+package codingharness
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/require"
+
+	"github.com/docker/docker-agent/pkg/config/latest"
+)
+
+func TestNewProviderOmitsModelFlagWhenModelEmpty(t *testing.T) {
+	p, err := NewProvider(&latest.HarnessConfig{Type: TypeCodex})
+	require.NoError(t, err)
+
+	cmd := p.PrintCommand("do it")
+	require.Contains(t, cmd, "codex exec --json")
+	require.NotContains(t, cmd, " -m ")
+}
+
+func TestNewProviderUsesConfiguredModel(t *testing.T) {
+	p, err := NewProvider(&latest.HarnessConfig{Type: TypeClaudeCode, Model: "claude-sonnet-4-5", Effort: "high"})
+	require.NoError(t, err)
+
+	cmd := p.PrintCommand("do it")
+	require.Contains(t, cmd, "--model 'claude-sonnet-4-5'")
+	require.Contains(t, cmd, "--effort high")
+}
+
+func TestLabel(t *testing.T) {
+	require.Equal(t, "codex", Label(&latest.HarnessConfig{Type: TypeCodex}))
+	require.Equal(t, "codex/gpt-5", Label(&latest.HarnessConfig{Type: TypeCodex, Model: "gpt-5"}))
+}
diff --git a/pkg/config/config_test.go b/pkg/config/config_test.go
index d80d80490..87606d48a 100644
--- a/pkg/config/config_test.go
+++ b/pkg/config/config_test.go
@@ -466,6 +466,49 @@ func TestApplyModelOverrides(t *testing.T) {
 	}
 }
 
+func TestValidateConfig_HarnessAgentsSkipModelValidation(t *testing.T) {
+	t.Parallel()
+
+	cfg := &latest.Config{
+		Agents: []latest.AgentConfig{
+			{Name: "root", Model: "openai/gpt-4o", SubAgents: []string{"coder"}},
+			{Name: "coder", Harness: &latest.HarnessConfig{Type: "codex"}},
+		},
+	}
+
+	require.NoError(t, validateConfig(cfg))
+	_, exists := cfg.Models[""]
+	assert.False(t, exists)
+}
+
+func TestValidateConfig_HarnessValidation(t *testing.T) {
+	t.Parallel()
+
+	tests := []struct {
+		name    string
+		harness *latest.HarnessConfig
+		wantErr string
+	}{
+		{name: "valid claude code", harness: &latest.HarnessConfig{Type: "claude-code", Effort: "high"}},
+		{name: "missing type", harness: &latest.HarnessConfig{}, wantErr: "harness.type is required"},
+		{name: "bad type", harness: &latest.HarnessConfig{Type: "vim"}, wantErr: "unsupported harness.type"},
+		{name: "bad effort", harness: &latest.HarnessConfig{Type: "claude-code", Effort: "ultra"}, wantErr: "harness.effort must be one of"},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			t.Parallel()
+			cfg := &latest.Config{Agents: []latest.AgentConfig{{Name: "root", Harness: tt.harness}}}
+			err := cfg.Validate()
+			if tt.wantErr != "" {
+				require.ErrorContains(t, err, tt.wantErr)
+			} else {
+				require.NoError(t, err)
+			}
+		})
+	}
+}
+
 func TestValidateConfig_ExternalSubAgentReferences(t *testing.T) {
 	t.Parallel()
 
diff --git a/pkg/config/examples_test.go b/pkg/config/examples_test.go
index a31798a30..a297f65a5 100644
--- a/pkg/config/examples_test.go
+++ b/pkg/config/examples_test.go
@@ -53,7 +53,9 @@ func TestParseExamples(t *testing.T) {
 			require.NotEmpty(t, cfg.Agents.First().Description, "Description should not be empty in %s", file)
 
 			for _, agent := range cfg.Agents {
-				require.NotEmpty(t, agent.Model)
+				if agent.Harness == nil {
+					require.NotEmpty(t, agent.Model)
+				}
 				require.NotEmpty(t, agent.Instruction, "Instruction should not be empty in %s", file)
 			}
 
diff --git a/pkg/config/gather.go b/pkg/config/gather.go
index dcca91db6..a10a43a14 100644
--- a/pkg/config/gather.go
+++ b/pkg/config/gather.go
@@ -53,8 +53,11 @@ func gatherMissingEnvVars(ctx context.Context, cfg *latest.Config, modelsGateway
 func GatherEnvVarsForModels(cfg *latest.Config) []string {
 	requiredEnv := map[string]bool{}
 
-	// Inspect only the models that are actually used by agents
+	// Inspect only the models that are actually used by docker-agent model-backed agents.
 	for _, agent := range cfg.Agents {
+		if agent.Harness != nil {
+			continue
+		}
 		modelNames := strings.SplitSeq(agent.Model, ",")
 		for modelName := range modelNames {
 			modelName = strings.TrimSpace(modelName)
diff --git a/pkg/config/latest/types.go b/pkg/config/latest/types.go
index 0eb22cb4e..1dc1a2737 100644
--- a/pkg/config/latest/types.go
+++ b/pkg/config/latest/types.go
@@ -368,6 +368,23 @@ func (d Duration) MarshalJSON() ([]byte, error) {
 	return json.Marshal(d.String())
 }
 
+// HarnessConfig configures an agent that delegates execution to an external
+// coding-agent CLI through github.com/rumpl/harness instead of using a
+// docker-agent model provider.
+type HarnessConfig struct {
+	// Type identifies the external harness provider: claude-code, codex, pi, or opencode.
+	Type string `json:"type,omitempty"`
+	// Model is passed to harnesses that accept a model flag. When omitted,
+	// docker-agent lets the external CLI use its own default model.
+	Model string `json:"model,omitempty"`
+	// Effort is forwarded to Claude Code's --effort flag.
+	Effort string `json:"effort,omitempty"`
+	// Agent is forwarded to opencode's --agent flag.
+	Agent string `json:"agent,omitempty"`
+	// Thinking enables opencode's --thinking flag.
+	Thinking bool `json:"thinking,omitempty"`
+}
+
 // AgentConfig represents a single agent configuration
 type AgentConfig struct {
 	Name           string
@@ -377,6 +394,7 @@ type AgentConfig struct {
 	WelcomeMessage string          `json:"welcome_message,omitempty"`
 	Toolsets       []Toolset       `json:"toolsets,omitempty"`
 	Instruction    string          `json:"instruction,omitempty"`
+	Harness        *HarnessConfig  `json:"harness,omitempty"`
 	SubAgents      []string        `json:"sub_agents,omitempty"`
 	Handoffs       []string        `json:"handoffs,omitempty"`
 
diff --git a/pkg/config/latest/validate.go b/pkg/config/latest/validate.go
index 744d15d86..71db737b1 100644
--- a/pkg/config/latest/validate.go
+++ b/pkg/config/latest/validate.go
@@ -37,6 +37,9 @@ func (t *Config) Validate() error {
 		if err := agent.validateFallback(); err != nil {
 			return err
 		}
+		if err := agent.validateHarness(); err != nil {
+			return err
+		}
 
 		for j := range agent.Toolsets {
 			if err := agent.Toolsets[j].validate(); err != nil {
@@ -70,6 +73,40 @@ func (a *AgentConfig) validateFallback() error {
 	return nil
 }
 
+func (a *AgentConfig) validateHarness() error {
+	if a.Harness == nil {
+		return nil
+	}
+
+	h := a.Harness
+	switch h.Type {
+	case "claude-code", "codex", "pi", "opencode":
+	case "":
+		return errors.New("harness.type is required")
+	default:
+		return fmt.Errorf("unsupported harness.type %q (must be one of: claude-code, codex, pi, opencode)", h.Type)
+	}
+
+	if h.Effort != "" {
+		if h.Type != "claude-code" {
+			return errors.New("harness.effort can only be used with harness.type 'claude-code'")
+		}
+		switch h.Effort {
+		case "low", "medium", "high", "max":
+		default:
+			return errors.New("harness.effort must be one of: low, medium, high, max")
+		}
+	}
+	if h.Agent != "" && h.Type != "opencode" {
+		return errors.New("harness.agent can only be used with harness.type 'opencode'")
+	}
+	if h.Thinking && h.Type != "opencode" {
+		return errors.New("harness.thinking can only be used with harness.type 'opencode'")
+	}
+
+	return nil
+}
+
 func (t *Toolset) validate() error {
 	// Attributes used on the wrong toolset type.
 	if len(t.Shell) > 0 && t.Type != "script" {
diff --git a/pkg/config/overrides.go b/pkg/config/overrides.go
index 842027ccd..735e124d8 100644
--- a/pkg/config/overrides.go
+++ b/pkg/config/overrides.go
@@ -84,6 +84,10 @@ func ensureModelsExist(cfg *latest.Config) error {
 	// Expand alloy model compositions in agent model references and ensure resulting
 	// referenced models exist.
 	for _, agent := range cfg.Agents {
+		if agent.Harness != nil {
+			continue
+		}
+
 		expandedModel, err := expandAlloyModelRef(cfg, agent.Model)
 		if err != nil {
 			return fmt.Errorf("agent '%s': %w", agent.Name, err)
diff --git a/pkg/config/schema_test.go b/pkg/config/schema_test.go
index 44f8e95b1..2c3d6c981 100644
--- a/pkg/config/schema_test.go
+++ b/pkg/config/schema_test.go
@@ -87,6 +87,7 @@ func TestSchemaMatchesGoTypes(t *testing.T) {
 	definitionMap := map[string]reflect.Type{
 		"AgentConfig":           reflect.TypeFor[latest.AgentConfig](),
 		"FallbackConfig":        reflect.TypeFor[latest.FallbackConfig](),
+		"HarnessConfig":         reflect.TypeFor[latest.HarnessConfig](),
 		"ModelConfig":           reflect.TypeFor[latest.ModelConfig](),
 		"Metadata":              reflect.TypeFor[latest.Metadata](),
 		"ProviderConfig":        reflect.TypeFor[latest.ProviderConfig](),
diff --git a/pkg/runtime/agent_delegation.go b/pkg/runtime/agent_delegation.go
index 5d0d04f17..7304bffd5 100644
--- a/pkg/runtime/agent_delegation.go
+++ b/pkg/runtime/agent_delegation.go
@@ -218,12 +218,12 @@ func (r *LocalRuntime) swapCurrentAgent(ctx context.Context, sessionID string, f
 	evts.Emit(AgentSwitching(true, from.Name(), to.Name()))
 	r.executeOnAgentSwitchHooks(ctx, from, sessionID, from.Name(), to.Name(), agentSwitchKindTransferTask)
 	r.setCurrentAgent(to.Name())
-	evts.Emit(AgentInfo(to.Name(), getAgentModelID(to).String(), to.Description(), to.WelcomeMessage()))
+	evts.Emit(AgentInfo(to.Name(), agentModelLabel(to), to.Description(), to.WelcomeMessage()))
 	return func() {
 		r.setCurrentAgent(from.Name())
 		evts.Emit(AgentSwitching(false, to.Name(), from.Name()))
 		r.executeOnAgentSwitchHooks(ctx, from, sessionID, to.Name(), from.Name(), agentSwitchKindTransferTaskReturn)
-		evts.Emit(AgentInfo(from.Name(), getAgentModelID(from).String(), from.Description(), from.WelcomeMessage()))
+		evts.Emit(AgentInfo(from.Name(), agentModelLabel(from), from.Description(), from.WelcomeMessage()))
 	}
 }
 
diff --git a/pkg/runtime/harness.go b/pkg/runtime/harness.go
new file mode 100644
index 000000000..431afc3fa
--- /dev/null
+++ b/pkg/runtime/harness.go
@@ -0,0 +1,407 @@
+package runtime
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"log/slog"
+	"strings"
+	"time"
+
+	baseharness "github.com/rumpl/harness"
+	"go.opentelemetry.io/otel/attribute"
+	"go.opentelemetry.io/otel/codes"
+	"go.opentelemetry.io/otel/trace"
+
+	"github.com/docker/docker-agent/pkg/agent"
+	"github.com/docker/docker-agent/pkg/chat"
+	"github.com/docker/docker-agent/pkg/codingharness"
+	"github.com/docker/docker-agent/pkg/session"
+	"github.com/docker/docker-agent/pkg/tools"
+)
+
+func (r *LocalRuntime) runHarnessAgent(ctx context.Context, sess *session.Session, a *agent.Agent, baseExtra []chat.Message, events EventSink) string {
+	ctx, span := r.startSpan(ctx, "runtime.harness", trace.WithAttributes(traceAttributesForHarness(sess, a)...))
+	defer span.End()
+
+	provider, err := codingharness.NewProvider(a.Harness())
+	if err != nil {
+		msg := fmt.Sprintf("failed to configure harness: %v", err)
+		events.Emit(ErrorWithCode(ErrorCodeModelError, msg))
+		r.notifyError(ctx, a, sess.ID, msg)
+		span.RecordError(err)
+		span.SetStatus(codes.Error, "harness configuration error")
+		return turnEndReasonError
+	}
+
+	modelID := agentModelLabel(a)
+	events.Emit(AgentInfo(a.Name(), modelID, a.Description(), a.WelcomeMessage()))
+
+	endReason := turnEndReasonNormal
+	defer func() {
+		if ctx.Err() != nil && endReason == turnEndReasonNormal {
+			endReason = turnEndReasonCanceled
+		}
+		r.executeTurnEndHooks(context.WithoutCancel(ctx), sess, a, endReason, events)
+	}()
+
+	turnStartMsgs := r.executeTurnStartHooks(ctx, sess, a, events)
+	messages := sess.GetMessages(a, append(baseExtra, turnStartMsgs...)...)
+	stop, msg, rewritten := r.executeBeforeLLMCallHooks(ctx, sess, a, modelID, 1, messages)
+	if stop {
+		slog.WarnContext(ctx, "before_llm_call hook signalled run termination",
+			"agent", a.Name(), "session_id", sess.ID, "reason", msg)
+		r.emitHookDrivenShutdown(ctx, a, sess, msg, events)
+		endReason = turnEndReasonHookBlocked
+		return endReason
+	}
+	if rewritten != nil {
+		messages = rewritten
+	}
+	messages = r.applyBeforeLLMCallTransforms(ctx, sess, a, modelID, messages)
+
+	prompt := harnessPromptFromMessages(messages)
+	var streamed strings.Builder
+	var finalResult string
+	var usage *chat.Usage
+	var cost float64
+	toolCallSeq := 0
+	pendingToolCalls := make(map[string]harnessToolCall)
+	startToolCall := func(ev baseharness.Event) harnessToolCall {
+		toolCallSeq++
+		pending := newHarnessToolCall(toolCallSeq, ev, "")
+		pendingToolCalls[pending.key] = pending
+		events.Emit(PartialToolCall(pending.call, pending.definition, a.Name()))
+		return pending
+	}
+	emitToolCallDelta := func(ev baseharness.Event) {
+		if ev.ToolArgs == "" {
+			return
+		}
+		pending, ok := pendingToolCallForEvent(pendingToolCalls, ev)
+		if !ok {
+			if ev.ToolName == "" {
+				return
+			}
+			pending = startToolCall(ev)
+		}
+		events.Emit(PartialToolCall(tools.ToolCall{
+			ID:   pending.call.ID,
+			Type: pending.call.Type,
+			Function: tools.FunctionCall{
+				Name:      pending.call.Function.Name,
+				Arguments: ev.ToolArgs,
+			},
+		}, tools.Tool{}, a.Name()))
+	}
+	completeToolCall := func(ev baseharness.Event) {
+		pending, ok := pendingToolCallForEvent(pendingToolCalls, ev)
+		if !ok {
+			return
+		}
+		result := harnessToolResult(ev)
+		events.Emit(ToolCallResponse(pending.call.ID, pending.definition, result, result.Output, a.Name()))
+		delete(pendingToolCalls, pending.key)
+	}
+	completeRemainingToolCalls := func(result *tools.ToolCallResult) {
+		if result == nil {
+			return
+		}
+		for key, pending := range pendingToolCalls {
+			events.Emit(ToolCallResponse(pending.call.ID, pending.definition, result, result.Output, a.Name()))
+			delete(pendingToolCalls, key)
+		}
+	}
+
+	err = baseharness.Run(ctx, provider, prompt, func(ev baseharness.Event) {
+		switch ev.Type {
+		case baseharness.EventText:
+			if ev.Text == "" {
+				return
+			}
+			if isHarnessReplayText(streamed.String(), ev.Text) {
+				return
+			}
+			streamed.WriteString(ev.Text)
+			events.Emit(AgentChoice(a.Name(), sess.ID, ev.Text))
+		case baseharness.EventReasoning:
+			if ev.Reasoning != "" {
+				events.Emit(AgentChoiceReasoning(a.Name(), sess.ID, ev.Reasoning))
+			}
+		case baseharness.EventToolCallStart:
+			startToolCall(ev)
+		case baseharness.EventToolCallDelta:
+			emitToolCallDelta(ev)
+		case baseharness.EventToolCall:
+			if shouldSkipHarnessToolCall(ev) {
+				return
+			}
+			if pending, ok := pendingToolCallForEvent(pendingToolCalls, ev); ok {
+				if arguments := harnessToolCallArguments(ev); arguments != "" {
+					pending.call.Function.Arguments = arguments
+					pendingToolCalls[pending.key] = pending
+				}
+				events.Emit(ToolCall(pending.call, pending.definition, a.Name()))
+				return
+			}
+			toolCallSeq++
+			pending := newHarnessToolCall(toolCallSeq, ev, harnessToolCallArguments(ev))
+			pendingToolCalls[pending.key] = pending
+			events.Emit(ToolCall(pending.call, pending.definition, a.Name()))
+		case baseharness.EventToolResult:
+			completeToolCall(ev)
+		case baseharness.EventResult:
+			if ev.Result != "" {
+				finalResult = ev.Result
+			}
+			if ev.Usage != nil {
+				usage = harnessUsage(ev.Usage)
+				cost = ev.Usage.TotalCostUSD
+			}
+		}
+	})
+	if err != nil {
+		if ctx.Err() != nil {
+			completeRemainingToolCalls(tools.ResultError("External harness was canceled."))
+			span.RecordError(ctx.Err())
+			span.SetStatus(codes.Error, "harness canceled")
+			endReason = turnEndReasonCanceled
+			return endReason
+		}
+		msg := fmt.Sprintf("harness %s failed: %v", provider.Name(), err)
+		completeRemainingToolCalls(tools.ResultError(msg))
+		events.Emit(ErrorWithCode(ErrorCodeModelError, msg))
+		r.notifyError(ctx, a, sess.ID, msg)
+		span.RecordError(err)
+		span.SetStatus(codes.Error, "harness run error")
+		endReason = turnEndReasonError
+		return endReason
+	}
+
+	completeRemainingToolCalls(harnessToolCompletedResult())
+
+	content := strings.TrimSpace(streamed.String())
+	if content == "" && strings.TrimSpace(finalResult) != "" {
+		content = strings.TrimSpace(finalResult)
+		events.Emit(AgentChoice(a.Name(), sess.ID, content))
+	}
+	if content == "" {
+		content = strings.TrimSpace(finalResult)
+	}
+
+	r.executeAfterLLMCallHooks(ctx, sess, a, content)
+	r.recordHarnessAssistantMessage(sess, a, content, modelID, usage, cost, events)
+	r.executeStopHooks(ctx, sess, a, content, events)
+
+	span.SetAttributes(attribute.Int("content.length", len(content)))
+	span.SetStatus(codes.Ok, "harness completed")
+	return endReason
+}
+
+func agentModelLabel(a *agent.Agent) string {
+	if a == nil {
+		return ""
+	}
+	if a.HasHarness() {
+		return codingharness.Label(a.Harness())
+	}
+	return getAgentModelID(a).String()
+}
+
+func traceAttributesForHarness(sess *session.Session, a *agent.Agent) []attribute.KeyValue {
+	return []attribute.KeyValue{
+		attribute.String("agent", a.Name()),
+		attribute.String("session.id", sess.ID),
+		attribute.String("harness.type", a.Harness().Type),
+	}
+}
+
+type harnessToolCall struct {
+	key        string
+	call       tools.ToolCall
+	definition tools.Tool
+}
+
+func newHarnessToolCall(seq int, ev baseharness.Event, arguments string) harnessToolCall {
+	name := ev.ToolName
+	if name == "" {
+		name = "tool"
+	}
+	key := harnessToolEventID(ev)
+	callID := key
+	if callID == "" {
+		callID = fmt.Sprintf("harness-%d", seq)
+		key = callID
+	} else {
+		callID = "harness-" + callID
+	}
+	return harnessToolCall{
+		key: key,
+		call: tools.ToolCall{
+			ID:   callID,
+			Type: "function",
+			Function: tools.FunctionCall{
+				Name:      name,
+				Arguments: arguments,
+			},
+		},
+		definition: tools.Tool{
+			Name:        name,
+			Category:    "harness",
+			Description: "Tool call reported by an external coding harness",
+		},
+	}
+}
+
+func pendingToolCallForEvent(pending map[string]harnessToolCall, ev baseharness.Event) (harnessToolCall, bool) {
+	key := harnessToolEventID(ev)
+	if key != "" {
+		pending, ok := pending[key]
+		return pending, ok
+	}
+	if len(pending) != 1 {
+		return harnessToolCall{}, false
+	}
+	for _, pending := range pending {
+		return pending, true
+	}
+	return harnessToolCall{}, false
+}
+
+func harnessToolResult(ev baseharness.Event) *tools.ToolCallResult {
+	output := ev.ToolOutput
+	if output == "" {
+		output = "Completed by external harness."
+	}
+	if ev.ToolError {
+		return tools.ResultError(output)
+	}
+	return tools.ResultSuccess(output)
+}
+
+func harnessToolCompletedResult() *tools.ToolCallResult {
+	return tools.ResultSuccess("Completed by external harness.")
+}
+
+func harnessToolCallArguments(ev baseharness.Event) string {
+	args := strings.TrimSpace(ev.ToolArgs)
+	if args == "" {
+		return ""
+	}
+	var obj map[string]any
+	if json.Unmarshal([]byte(args), &obj) == nil {
+		return args
+	}
+	wrapped, _ := json.Marshal(map[string]string{"input": ev.ToolArgs})
+	return string(wrapped)
+}
+
+func shouldSkipHarnessToolCall(ev baseharness.Event) bool {
+	return strings.TrimSpace(ev.ToolName) != "" && strings.TrimSpace(ev.ToolArgs) == "" && harnessToolEventID(ev) == ""
+}
+
+func isHarnessReplayText(existing, next string) bool {
+	if existing == "" || next == "" {
+		return false
+	}
+	existing = normalizeHarnessText(existing)
+	next = normalizeHarnessText(next)
+	return next == existing
+}
+
+func normalizeHarnessText(s string) string {
+	return strings.TrimSpace(strings.ReplaceAll(s, "\r\n", "\n"))
+}
+
+func harnessToolEventID(ev baseharness.Event) string {
+	return ev.ToolID
+}
+
+func harnessUsage(u *baseharness.Usage) *chat.Usage {
+	if u == nil {
+		return nil
+	}
+	return &chat.Usage{
+		InputTokens:       int64(u.InputTokens),
+		OutputTokens:      int64(u.OutputTokens),
+		CachedInputTokens: int64(u.CacheReadInputTokens),
+		CacheWriteTokens:  int64(u.CacheCreationInputTokens),
+	}
+}
+
+func (r *LocalRuntime) recordHarnessAssistantMessage(sess *session.Session, a *agent.Agent, content, modelID string, usage *chat.Usage, cost float64, events EventSink) {
+	if strings.TrimSpace(content) == "" && usage == nil {
+		return
+	}
+
+	msg := chat.Message{
+		Role:         chat.MessageRoleAssistant,
+		Content:      content,
+		CreatedAt:    r.now().Format(time.RFC3339),
+		Usage:        usage,
+		Model:        modelID,
+		Cost:         cost,
+		FinishReason: chat.FinishReasonStop,
+	}
+	addAgentMessage(sess, a, &msg, events)
+
+	if usage == nil {
+		return
+	}
+	input := usage.InputTokens + usage.CachedInputTokens + usage.CacheWriteTokens
+	sess.SetUsage(input, usage.OutputTokens)
+	msgUsage := &MessageUsage{
+		Usage:        *usage,
+		Cost:         cost,
+		Model:        modelID,
+		FinishReason: chat.FinishReasonStop,
+	}
+	usageEvent := SessionUsage(sess, 0)
+	usageEvent.LastMessage = msgUsage
+	events.Emit(NewTokenUsageEvent(sess.ID, a.Name(), usageEvent))
+}
+
+func harnessPromptFromMessages(messages []chat.Message) string {
+	var b strings.Builder
+	for _, msg := range messages {
+		content := harnessMessageContent(msg)
+		if strings.TrimSpace(content) == "" {
+			continue
+		}
+		fmt.Fprintf(&b, "<%s>\n%s\n</%s>\n\n", msg.Role, content, msg.Role)
+	}
+	return strings.TrimSpace(b.String())
+}
+
+func harnessMessageContent(msg chat.Message) string {
+	var parts []string
+	if msg.Content != "" {
+		parts = append(parts, msg.Content)
+	}
+	for _, part := range msg.MultiContent {
+		switch part.Type {
+		case chat.MessagePartTypeText:
+			if part.Text != "" {
+				parts = append(parts, part.Text)
+			}
+		case chat.MessagePartTypeFile:
+			if part.File != nil && part.File.Path != "" {
+				parts = append(parts, "Attached file: "+part.File.Path)
+			}
+		case chat.MessagePartTypeImageURL:
+			if part.ImageURL != nil && part.ImageURL.URL != "" {
+				parts = append(parts, "Attached image: "+part.ImageURL.URL)
+			}
+		case chat.MessagePartTypeDocument:
+			if part.Document == nil {
+				continue
+			}
+			if part.Document.Source.InlineText != "" {
+				parts = append(parts, fmt.Sprintf("Attached document %s:\n%s", part.Document.Name, part.Document.Source.InlineText))
+			} else {
+				parts = append(parts, fmt.Sprintf("Attached document: %s (%s)", part.Document.Name, part.Document.MimeType))
+			}
+		}
+	}
+	return strings.Join(parts, "\n\n")
+}
diff --git a/pkg/runtime/harness_test.go b/pkg/runtime/harness_test.go
new file mode 100644
index 000000000..1792156de
--- /dev/null
+++ b/pkg/runtime/harness_test.go
@@ -0,0 +1,190 @@
+package runtime
+
+import (
+	"os"
+	"path/filepath"
+	stdruntime "runtime"
+	"strings"
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+
+	"github.com/docker/docker-agent/pkg/agent"
+	"github.com/docker/docker-agent/pkg/config/latest"
+	"github.com/docker/docker-agent/pkg/session"
+	"github.com/docker/docker-agent/pkg/team"
+)
+
+func TestHarnessAgentRunStream(t *testing.T) {
+	if stdruntime.GOOS == "windows" {
+		t.Skip("shell script shim test")
+	}
+
+	binDir := t.TempDir()
+	writeHarnessScript(t, binDir, "codex", `#!/bin/sh
+printf '%s\n' '{"type":"item.completed","item":{"type":"agent_message","text":"harness done"}}'
+`)
+	t.Setenv("PATH", binDir+string(os.PathListSeparator)+os.Getenv("PATH"))
+
+	rt := newHarnessRuntime(t, "codex")
+	sess := session.New(session.WithUserMessage("do the task"))
+	events := collectRuntimeEvents(t, rt, sess)
+
+	assert.True(t, hasEventType(t, events, &AgentChoiceEvent{}))
+	assert.Equal(t, "harness done", sess.GetLastAssistantMessageContent())
+
+	var sawHarnessModel bool
+	for _, ev := range events {
+		if info, ok := ev.(*AgentInfoEvent); ok && info.Model == "codex" {
+			sawHarnessModel = true
+		}
+	}
+	assert.True(t, sawHarnessModel, "expected AgentInfo event with codex harness label")
+}
+
+func TestHarnessToolCallCompletes(t *testing.T) {
+	if stdruntime.GOOS == "windows" {
+		t.Skip("shell script shim test")
+	}
+
+	binDir := t.TempDir()
+	writeHarnessScript(t, binDir, "codex", `#!/bin/sh
+printf '%s\n' '{"type":"item.started","item":{"type":"command_execution","command":"npm test"}}'
+printf '%s\n' '{"type":"item.completed","item":{"type":"agent_message","text":"tests passed"}}'
+`)
+	t.Setenv("PATH", binDir+string(os.PathListSeparator)+os.Getenv("PATH"))
+
+	rt := newHarnessRuntime(t, "codex")
+	events := collectRuntimeEvents(t, rt, session.New(session.WithUserMessage("run tests")))
+
+	var toolCall *ToolCallEvent
+	var toolResponse *ToolCallResponseEvent
+	for _, ev := range events {
+		switch ev := ev.(type) {
+		case *ToolCallEvent:
+			toolCall = ev
+		case *ToolCallResponseEvent:
+			toolResponse = ev
+		}
+	}
+	require.NotNil(t, toolCall)
+	require.NotNil(t, toolResponse)
+	assert.Equal(t, toolCall.ToolCall.ID, toolResponse.ToolCallID)
+	require.NotNil(t, toolResponse.Result)
+	assert.False(t, toolResponse.Result.IsError)
+}
+
+func TestHarnessShowsClaudeCodeToolCallAlongsideText(t *testing.T) {
+	if stdruntime.GOOS == "windows" {
+		t.Skip("shell script shim test")
+	}
+
+	binDir := t.TempDir()
+	writeHarnessScript(t, binDir, "claude", `#!/bin/sh
+printf '%s\n' '{"type":"assistant","message":{"content":[{"type":"text","text":"I will create the file."},{"type":"tool_use","id":"toolu_write","name":"Write","input":{"file_path":"/tmp/poem.md","content":"roses"}}]}}'
+printf '%s\n' '{"type":"result","result":"created"}'
+`)
+	t.Setenv("PATH", binDir+string(os.PathListSeparator)+os.Getenv("PATH"))
+
+	rt := newHarnessRuntime(t, "claude-code")
+	events := collectRuntimeEvents(t, rt, session.New(session.WithUserMessage("write poem")))
+
+	var sawText bool
+	var toolCall *ToolCallEvent
+	for _, ev := range events {
+		switch ev := ev.(type) {
+		case *AgentChoiceEvent:
+			if strings.Contains(ev.Content, "I will create the file") {
+				sawText = true
+			}
+		case *ToolCallEvent:
+			toolCall = ev
+		}
+	}
+	assert.True(t, sawText)
+	require.NotNil(t, toolCall)
+	assert.Equal(t, "Write", toolCall.ToolCall.Function.Name)
+	assert.Contains(t, toolCall.ToolCall.Function.Arguments, "/tmp/poem.md")
+}
+
+func TestHarnessSuppressesDuplicateClaudeCodeToolCall(t *testing.T) {
+	if stdruntime.GOOS == "windows" {
+		t.Skip("shell script shim test")
+	}
+
+	binDir := t.TempDir()
+	writeHarnessScript(t, binDir, "claude", `#!/bin/sh
+printf '%s\n' '{"type":"stream_event","event":{"type":"content_block_start","index":1,"content_block":{"type":"tool_use","id":"toolu_1","name":"Bash"}}}'
+printf '%s\n' '{"type":"stream_event","event":{"type":"content_block_delta","index":1,"delta":{"type":"input_json_delta","partial_json":"{\"command\":\"uname -a\"}"}}}'
+printf '%s\n' '{"type":"stream_event","event":{"type":"content_block_stop","index":1}}'
+printf '%s\n' '{"type":"assistant","message":{"content":[{"type":"tool_use","id":"toolu_1","name":"Bash","input":{"command":"uname -a"}}]}}'
+printf '%s\n' '{"type":"result","result":"done"}'
+`)
+	t.Setenv("PATH", binDir+string(os.PathListSeparator)+os.Getenv("PATH"))
+
+	rt := newHarnessRuntime(t, "claude-code")
+	events := collectRuntimeEvents(t, rt, session.New(session.WithUserMessage("run uname")))
+
+	var toolCalls []ToolCallEvent
+	var partialArgs strings.Builder
+	for _, ev := range events {
+		switch ev := ev.(type) {
+		case *ToolCallEvent:
+			toolCalls = append(toolCalls, *ev)
+		case *PartialToolCallEvent:
+			partialArgs.WriteString(ev.ToolCall.Function.Arguments)
+		}
+	}
+	require.Len(t, toolCalls, 1)
+	assert.Equal(t, "Bash", toolCalls[0].ToolCall.Function.Name)
+	assert.Contains(t, partialArgs.String(), "uname -a")
+}
+
+func TestHarnessSuppressesReplayedClaudeCodeFinalText(t *testing.T) {
+	if stdruntime.GOOS == "windows" {
+		t.Skip("shell script shim test")
+	}
+
+	binDir := t.TempDir()
+	writeHarnessScript(t, binDir, "claude", `#!/bin/sh
+printf '%s\n' '{"type":"stream_event","event":{"type":"content_block_delta","delta":{"type":"text_delta","text":"Hello"}}}'
+printf '%s\n' '{"type":"stream_event","event":{"type":"content_block_delta","delta":{"type":"text_delta","text":" world"}}}'
+printf '%s\n' '{"type":"assistant","message":{"content":[{"type":"text","text":"Hello world"}]}}'
+printf '%s\n' '{"type":"result","result":"Hello world"}'
+`)
+	t.Setenv("PATH", binDir+string(os.PathListSeparator)+os.Getenv("PATH"))
+
+	rt := newHarnessRuntime(t, "claude-code")
+	events := collectRuntimeEvents(t, rt, session.New(session.WithUserMessage("say hello")))
+
+	var chunks []string
+	for _, ev := range events {
+		if choice, ok := ev.(*AgentChoiceEvent); ok {
+			chunks = append(chunks, choice.Content)
+		}
+	}
+	assert.Equal(t, []string{"Hello", " world"}, chunks)
+}
+
+func writeHarnessScript(t *testing.T, dir, name, content string) {
+	t.Helper()
+	require.NoError(t, os.WriteFile(filepath.Join(dir, name), []byte(content), 0o755))
+}
+
+func newHarnessRuntime(t *testing.T, harnessType string) *LocalRuntime {
+	t.Helper()
+	root := agent.New("root", "You are an external coder.", agent.WithHarness(&latest.HarnessConfig{Type: harnessType}))
+	rt, err := NewLocalRuntime(team.New(team.WithAgents(root)), WithSessionCompaction(false), WithModelStore(mockModelStore{}))
+	require.NoError(t, err)
+	return rt
+}
+
+func collectRuntimeEvents(t *testing.T, rt *LocalRuntime, sess *session.Session) []Event {
+	t.Helper()
+	var events []Event
+	for ev := range rt.RunStream(t.Context(), sess) {
+		events = append(events, ev)
+	}
+	return events
+}
diff --git a/pkg/runtime/loop.go b/pkg/runtime/loop.go
index e2cafbb94..aaa1ba6cd 100644
--- a/pkg/runtime/loop.go
+++ b/pkg/runtime/loop.go
@@ -264,6 +264,11 @@ func (r *LocalRuntime) runStreamLoop(ctx context.Context, sess *session.Session,
 		r.finalizeEventChannel(ctx, sess, streamReason, prevElicitationCh, events)
 	}()
 
+	if a.HasHarness() {
+		streamReason = r.runHarnessAgent(ctx, sess, a, slices.Concat(ls.sessionStartMsgs, ls.userPromptMsgs), sink)
+		return
+	}
+
 	// Response cache lookup. On a hit, replay the stored answer and
 	// skip the model entirely. The matching storage half is
 	// implemented as the cache_response stop-hook builtin (see
diff --git a/pkg/runtime/runtime.go b/pkg/runtime/runtime.go
index 534f776db..8c81081a7 100644
--- a/pkg/runtime/runtime.go
+++ b/pkg/runtime/runtime.go
@@ -550,7 +550,7 @@ func NewLocalRuntime(agents *team.Team, opts ...Opt) (*LocalRuntime, error) {
 		return nil, err
 	}
 
-	if defaultAgent.Model(context.TODO()) == nil {
+	if defaultAgent.Model(context.TODO()) == nil && !defaultAgent.HasHarness() {
 		return nil, fmt.Errorf("agent %s has no valid model", defaultAgent.Name())
 	}
 
@@ -855,6 +855,9 @@ func (r *LocalRuntime) TitleGenerator() *sessiontitle.Generator {
 // getAgentModelID returns the model ID for an agent. The zero ID is
 // returned when no model is configured.
 func getAgentModelID(a *agent.Agent) modelsdev.ID {
+	if a == nil {
+		return modelsdev.ID{}
+	}
 	if model := a.Model(context.TODO()); model != nil {
 		return model.ID()
 	}
@@ -1013,7 +1016,11 @@ func (r *LocalRuntime) EmitStartupInfo(ctx context.Context, sess *session.Sessio
 	// Emit agent and team information immediately for fast sidebar display
 	// Use getEffectiveModelID to account for active fallback cooldowns
 	modelID := r.getEffectiveModelID(a)
-	if !send(AgentInfo(a.Name(), modelID.String(), a.Description(), a.WelcomeMessage())) {
+	modelLabel := modelID.String()
+	if a.HasHarness() {
+		modelLabel = agentModelLabel(a)
+	}
+	if !send(AgentInfo(a.Name(), modelLabel, a.Description(), a.WelcomeMessage())) {
 		return
 	}
 	if !send(TeamInfo(r.agentDetailsFromTeam(), r.CurrentAgentName())) {
diff --git a/pkg/teamloader/teamloader.go b/pkg/teamloader/teamloader.go
index ba74d6329..a7fd35a03 100644
--- a/pkg/teamloader/teamloader.go
+++ b/pkg/teamloader/teamloader.go
@@ -182,33 +182,41 @@ func LoadWithConfig(ctx context.Context, agentSource config.Source, runConfig *c
 			opts = append(opts, agent.WithCache(c))
 		}
 
-		models, err := getModelsForAgent(ctx, cfg, &agentConfig, autoModel, runConfig)
-		if err != nil {
-			// Return auto model fallback errors and DMR not installed errors directly
-			// without wrapping to provide cleaner messages
-			if _, ok := errors.AsType[*config.AutoModelFallbackError](err); ok || errors.Is(err, dmr.ErrNotInstalled) {
-				return nil, err
+		if agentConfig.Harness != nil {
+			harnessCfg := *agentConfig.Harness
+			if harnessCfg.Model == "" {
+				harnessCfg.Model = agentConfig.Model
 			}
-			return nil, fmt.Errorf("failed to get models: %w", err)
-		}
-		for _, model := range models {
-			opts = append(opts, agent.WithModel(model))
-		}
-
-		// Load fallback models if configured
-		fallbackModelRefs := agentConfig.GetFallbackModels()
-		if len(fallbackModelRefs) > 0 {
-			fallbackModels, err := getFallbackModelsForAgent(ctx, cfg, &agentConfig, runConfig)
+			opts = append(opts, agent.WithHarness(&harnessCfg))
+		} else {
+			models, err := getModelsForAgent(ctx, cfg, &agentConfig, autoModel, runConfig)
 			if err != nil {
-				return nil, fmt.Errorf("failed to get fallback models: %w", err)
+				// Return auto model fallback errors and DMR not installed errors directly
+				// without wrapping to provide cleaner messages
+				if _, ok := errors.AsType[*config.AutoModelFallbackError](err); ok || errors.Is(err, dmr.ErrNotInstalled) {
+					return nil, err
+				}
+				return nil, fmt.Errorf("failed to get models: %w", err)
+			}
+			for _, model := range models {
+				opts = append(opts, agent.WithModel(model))
 			}
-			for _, model := range fallbackModels {
-				opts = append(opts, agent.WithFallbackModel(model))
+
+			// Load fallback models if configured
+			fallbackModelRefs := agentConfig.GetFallbackModels()
+			if len(fallbackModelRefs) > 0 {
+				fallbackModels, err := getFallbackModelsForAgent(ctx, cfg, &agentConfig, runConfig)
+				if err != nil {
+					return nil, fmt.Errorf("failed to get fallback models: %w", err)
+				}
+				for _, model := range fallbackModels {
+					opts = append(opts, agent.WithFallbackModel(model))
+				}
+				opts = append(opts,
+					agent.WithFallbackRetries(agentConfig.GetFallbackRetries()),
+					agent.WithFallbackCooldown(agentConfig.GetFallbackCooldown()),
+				)
 			}
-			opts = append(opts,
-				agent.WithFallbackRetries(agentConfig.GetFallbackRetries()),
-				agent.WithFallbackCooldown(agentConfig.GetFallbackCooldown()),
-			)
 		}
 
 		agentTools, warnings := getToolsForAgent(ctx, &agentConfig, parentDir, runConfig, loadOpts.toolsetRegistry, configName, expander)
@@ -267,7 +275,7 @@ func LoadWithConfig(ctx context.Context, agentSource config.Source, runConfig *c
 	// Build agent default models map
 	agentDefaultModels := make(map[string]string)
 	for _, agent := range cfg.Agents {
-		if agent.Model != "" {
+		if agent.Harness == nil && agent.Model != "" {
 			agentDefaultModels[agent.Name] = agent.Model
 		}
 	}
diff --git a/pkg/teamloader/teamloader_test.go b/pkg/teamloader/teamloader_test.go
index 6381ca5a5..a7a89f08a 100644
--- a/pkg/teamloader/teamloader_test.go
+++ b/pkg/teamloader/teamloader_test.go
@@ -202,6 +202,30 @@ func TestOverrideModel(t *testing.T) {
 	}
 }
 
+func TestLoadHarnessAgentWithoutModel(t *testing.T) {
+	t.Setenv("OPENAI_API_KEY", "dummy")
+
+	data := []byte(`agents:
+  root:
+    model: openai/gpt-4o
+    sub_agents: [coder]
+  coder:
+    description: External coder
+    instruction: You are a coding agent.
+    harness:
+      type: codex
+`)
+
+	team, err := Load(t.Context(), config.NewBytesSource("harness.yaml", data), &config.RuntimeConfig{})
+	require.NoError(t, err)
+
+	coder, err := team.Agent("coder")
+	require.NoError(t, err)
+	require.True(t, coder.HasHarness())
+	require.Equal(t, "codex", coder.Harness().Type)
+	require.Nil(t, coder.Model(t.Context()))
+}
+
 func TestToolsetInstructions(t *testing.T) {
 	t.Setenv("OPENAI_API_KEY", "dummy")