diff --git a/.codex/plans/2026-04-09-hooks-cli-endpoints.md b/.codex/plans/2026-04-09-hooks-cli-endpoints.md new file mode 100644 index 000000000..d4919d5a3 --- /dev/null +++ b/.codex/plans/2026-04-09-hooks-cli-endpoints.md @@ -0,0 +1,123 @@ +# Completar `agh hooks` no CLI e no Transporte + +## Summary + +- A causa raiz é estrutural: a entrega anterior parou em handlers HTTP locais, com filtros parciais e sem fechar a superfície compartilhada. Hoje: + - o catálogo só filtra `workspace` e `agent` + - os runs não filtram `outcome` + - a taxonomia de eventos não aceita filtros + - o `DaemonClient` não expõe hooks + - o CLI não tem comando `hooks` + - o UDS não registra `/api/hooks/*`, então mesmo um client novo falharia com `404` +- A correção deve ser feita de ponta a ponta: ampliar os tipos/filtros de domínio, mover a lógica de endpoint de hooks para a camada compartilhada de API, registrar as rotas também no UDS, adicionar os 3 métodos no client e expor `agh hooks list|info|events|runs`. +- `hooks sources` fica removido do escopo. `hooks info ` retorna todos os hooks resolvidos com aquele nome, em ordem de catálogo. + +## API & Types + +- Adicionar em `contract` os DTOs de query compartilhados: + - `HookCatalogQuery { Workspace, Agent, Event, Source, Mode string }` + - `HookRunsQuery { Session, Event, Outcome, Since string; Last int }` + - `HookEventsQuery { Family string; SyncOnly bool }` +- Adicionar aliases no client para esses tipos e para os records: + - `HookCatalogRecord = contract.HookCatalogPayload` + - `HookRunRecord = contract.HookRunPayload` + - `HookEventRecord = contract.HookEventPayload` +- Ampliar os tipos de domínio para suportar os filtros sem hacks: + - `hookspkg.CatalogFilter` ganha `Event HookEvent`, `Source *HookSource`, `Mode HookMode` + - `hookspkg.EventFilter` novo com `Family HookEventFamily` e `SyncOnly bool` + - `store.HookRunQuery` ganha `Outcome HookRunOutcome` + - adicionar `Validate()` para `HookRunOutcome` e `HookEventFamily` +- Ampliar o catálogo para suportar `hooks info` sem endpoint dedicado: + - `hookspkg.CatalogEntry` ganha `ExecutorKind HookExecutorKind` + - `contract.HookCatalogPayload` ganha `ExecutorKind string` +- Manter `last` como nome público do filtro de hooks. O handler traduz `last` para `store.HookRunQuery.Limit`; não renomear o restante da API para `last`. + +## Transport Changes + +- Extrair a lógica de hooks de `httpapi` para `internal/api/core`, com métodos compartilhados em `BaseHandlers`: + - `HookCatalog` + - `HookRuns` + - `HookEvents` +- Registrar as mesmas três rotas em ambos os transportes: + - `GET /api/hooks/catalog` + - `GET /api/hooks/runs` + - `GET /api/hooks/events` +- HTTP/UDS devem usar a mesma implementação compartilhada; não duplicar parsing, validação nem payload mapping entre `httpapi` e `udsapi`. +- Regras exatas de parsing/validação: + - `catalog`: aceitar `workspace`, `agent`, `event`, `source`, `mode`; resolver `workspace` como hoje; validar `event`, `source` e `mode` antes de consultar o observer + - `runs`: exigir `session`; aceitar `event`, `outcome`, `since`, `last`; validar sessão, `event`, `outcome`, `last >= 0`; `since` na API continua timestamp absoluto (`RFC3339`/`RFC3339Nano`) + - `events`: aceitar `family` e `sync_only`; validar `family`; parsear `sync_only` com `strconv.ParseBool` +- O observer/core passa a aceitar filtro de eventos: + - `QueryHookEvents(ctx, filter hookspkg.EventFilter)` +- O store/session DB passa a filtrar `hook_runs` também por `outcome`, preservando a ordenação cronológica ascendente na resposta final mesmo quando `last` é usado. + +## CLI + +- Registrar `newHooksCommand(deps)` no root command. +- Adicionar no `DaemonClient`: + - `HookCatalog(ctx, HookCatalogQuery) ([]HookCatalogRecord, error)` + - `HookRuns(ctx, HookRunsQuery) ([]HookRunRecord, error)` + - `HookEvents(ctx, HookEventsQuery) ([]HookEventRecord, error)` +- Implementar builders de query string no client: + - `event`, `source`, `mode`, `outcome`, `since`, `last`, `family`, `sync_only` +- Implementar `internal/cli/hooks.go` com 4 subcomandos: + - `list` + - flags: `--workspace`, `--agent`, `--event`, `--source`, `--mode` + - tabela humana: `Order Name Event Source Mode Priority` + - JSON: slice completo de `HookCatalogRecord` + - Toon: `hooks[n]{order,name,event,source,skill_source,mode,required,priority}` + - `info ` + - flag: `--workspace` + - faz `HookCatalog`, filtra por nome no cliente e retorna todos os matches + - human: um bloco por hook com cabeçalho e seções para campos principais, `Matcher` e `Metadata` + - JSON: slice completo de `HookCatalogRecord` já filtrado pelo nome + - Toon: array `hooks[n]{name,event,source,skill_source,mode,required,priority,timeout_ms,executor_kind}` seguido dos blocos `matcher[...]` e `metadata[...]` + - `events` + - flags: `--family`, `--sync-only` + - tabela humana: `Event Family Sync Payload Patch` + - JSON: slice completo de `HookEventRecord` + - Toon: `events[n]{event,family,sync_eligible,payload_schema,patch_schema}` + - `runs` + - flag obrigatória: `--session` + - flags opcionais: `--event`, `--outcome`, `--since`, `--last` + - `--since` continua aceitando RFC3339 ou duração relativa no CLI; o comando converte para timestamp absoluto antes de chamar o client + - tabela humana: `Hook Event Outcome Duration Error` + - JSON: slice completo de `HookRunRecord` + - Toon: `runs[n]{hook_name,event,outcome,duration_ms,error,recorded_at}` +- Não adicionar `--agent` nem `--source` em `info`; o comportamento acordado para colisão de nome é mostrar todos. + +## Test Plan + +- `internal/store/sessiondb` + - filtra `HookRunQuery` por `outcome` + - combina `event + outcome + since + last` + - mantém ordenação cronológica ascendente após aplicar `last` +- `internal/hooks` / `internal/observe` + - `CatalogFilter` filtra por `event`, `source` e `mode` + - `EventFilter` filtra por `family` e `sync_only` + - `CatalogEntry` expõe `ExecutorKind` +- `internal/api/httpapi` + - `catalog` propaga `event/source/mode` + - `runs` propaga `outcome/since/last` + - `events` propaga `family/sync_only` + - casos inválidos retornam `400` + - integração continua cobrindo resposta real de `/api/hooks/*` +- `internal/api/udsapi` + - atualizar teste de rotas para incluir os 3 endpoints de hooks + - adicionar pelo menos um smoke test de handler para garantir que hooks funcionam no transporte usado pelo CLI +- `internal/cli/client_test.go` + - cobrir os 3 métodos novos + - verificar encoding de `last`, `sync_only`, `source`, `mode`, `outcome` +- `internal/cli/hooks_test.go` + - `list`, `info`, `events`, `runs` em `human`, `json` e `toon` + - `info` retorna múltiplos matches do mesmo nome + - `runs` falha sem `--session` + - `runs --since 5m` vira timestamp absoluto antes da chamada +- Fechamento: rodar testes focados das áreas alteradas e depois `make verify` + +## Assumptions + +- `hooks sources` foi removido da spec e não será implementado agora. +- `Order` no catálogo continua sendo ordem do pipeline dentro de cada evento; como `event` sempre é exibido, não há necessidade de redefinir isso como ordem global. +- A API pública de hooks aceita `since` absoluto; duração relativa é responsabilidade do CLI. +- Não haverá endpoint `info`; `info` é uma composição de `HookCatalog` + filtro por nome no cliente. diff --git a/.compozy/tasks/web-ui-redesign/_meta.md b/.compozy/tasks/_archived/20260409-183155-web-ui-redesign/_meta.md similarity index 71% rename from .compozy/tasks/web-ui-redesign/_meta.md rename to .compozy/tasks/_archived/20260409-183155-web-ui-redesign/_meta.md index db509d36a..e1cc894c4 100644 --- a/.compozy/tasks/web-ui-redesign/_meta.md +++ b/.compozy/tasks/_archived/20260409-183155-web-ui-redesign/_meta.md @@ -1,6 +1,6 @@ --- created_at: 2026-04-09T05:04:36.506547Z -updated_at: 2026-04-09T11:28:57.651031Z +updated_at: 2026-04-09T18:31:55.557592Z --- ## Summary diff --git a/.compozy/tasks/web-ui-redesign/_tasks.md b/.compozy/tasks/_archived/20260409-183155-web-ui-redesign/_tasks.md similarity index 100% rename from .compozy/tasks/web-ui-redesign/_tasks.md rename to .compozy/tasks/_archived/20260409-183155-web-ui-redesign/_tasks.md diff --git a/.compozy/tasks/web-ui-redesign/_techspec.md b/.compozy/tasks/_archived/20260409-183155-web-ui-redesign/_techspec.md similarity index 100% rename from .compozy/tasks/web-ui-redesign/_techspec.md rename to .compozy/tasks/_archived/20260409-183155-web-ui-redesign/_techspec.md diff --git a/.compozy/tasks/web-ui-redesign/adrs/adr-001.md b/.compozy/tasks/_archived/20260409-183155-web-ui-redesign/adrs/adr-001.md similarity index 100% rename from .compozy/tasks/web-ui-redesign/adrs/adr-001.md rename to .compozy/tasks/_archived/20260409-183155-web-ui-redesign/adrs/adr-001.md diff --git a/.compozy/tasks/web-ui-redesign/adrs/adr-002.md b/.compozy/tasks/_archived/20260409-183155-web-ui-redesign/adrs/adr-002.md similarity index 100% rename from .compozy/tasks/web-ui-redesign/adrs/adr-002.md rename to .compozy/tasks/_archived/20260409-183155-web-ui-redesign/adrs/adr-002.md diff --git a/.compozy/tasks/web-ui-redesign/adrs/adr-003.md b/.compozy/tasks/_archived/20260409-183155-web-ui-redesign/adrs/adr-003.md similarity index 100% rename from .compozy/tasks/web-ui-redesign/adrs/adr-003.md rename to .compozy/tasks/_archived/20260409-183155-web-ui-redesign/adrs/adr-003.md diff --git a/.compozy/tasks/web-ui-redesign/adrs/adr-004.md b/.compozy/tasks/_archived/20260409-183155-web-ui-redesign/adrs/adr-004.md similarity index 100% rename from .compozy/tasks/web-ui-redesign/adrs/adr-004.md rename to .compozy/tasks/_archived/20260409-183155-web-ui-redesign/adrs/adr-004.md diff --git a/.compozy/tasks/web-ui-redesign/memory/MEMORY.md b/.compozy/tasks/_archived/20260409-183155-web-ui-redesign/memory/MEMORY.md similarity index 100% rename from .compozy/tasks/web-ui-redesign/memory/MEMORY.md rename to .compozy/tasks/_archived/20260409-183155-web-ui-redesign/memory/MEMORY.md diff --git a/.compozy/tasks/web-ui-redesign/memory/task_01.md b/.compozy/tasks/_archived/20260409-183155-web-ui-redesign/memory/task_01.md similarity index 100% rename from .compozy/tasks/web-ui-redesign/memory/task_01.md rename to .compozy/tasks/_archived/20260409-183155-web-ui-redesign/memory/task_01.md diff --git a/.compozy/tasks/web-ui-redesign/memory/task_02.md b/.compozy/tasks/_archived/20260409-183155-web-ui-redesign/memory/task_02.md similarity index 100% rename from .compozy/tasks/web-ui-redesign/memory/task_02.md rename to .compozy/tasks/_archived/20260409-183155-web-ui-redesign/memory/task_02.md diff --git a/.compozy/tasks/web-ui-redesign/memory/task_03.md b/.compozy/tasks/_archived/20260409-183155-web-ui-redesign/memory/task_03.md similarity index 100% rename from .compozy/tasks/web-ui-redesign/memory/task_03.md rename to .compozy/tasks/_archived/20260409-183155-web-ui-redesign/memory/task_03.md diff --git a/.compozy/tasks/web-ui-redesign/memory/task_04.md b/.compozy/tasks/_archived/20260409-183155-web-ui-redesign/memory/task_04.md similarity index 100% rename from .compozy/tasks/web-ui-redesign/memory/task_04.md rename to .compozy/tasks/_archived/20260409-183155-web-ui-redesign/memory/task_04.md diff --git a/.compozy/tasks/web-ui-redesign/memory/task_05.md b/.compozy/tasks/_archived/20260409-183155-web-ui-redesign/memory/task_05.md similarity index 100% rename from .compozy/tasks/web-ui-redesign/memory/task_05.md rename to .compozy/tasks/_archived/20260409-183155-web-ui-redesign/memory/task_05.md diff --git a/.compozy/tasks/web-ui-redesign/memory/task_06.md b/.compozy/tasks/_archived/20260409-183155-web-ui-redesign/memory/task_06.md similarity index 100% rename from .compozy/tasks/web-ui-redesign/memory/task_06.md rename to .compozy/tasks/_archived/20260409-183155-web-ui-redesign/memory/task_06.md diff --git a/.compozy/tasks/web-ui-redesign/memory/task_07.md b/.compozy/tasks/_archived/20260409-183155-web-ui-redesign/memory/task_07.md similarity index 100% rename from .compozy/tasks/web-ui-redesign/memory/task_07.md rename to .compozy/tasks/_archived/20260409-183155-web-ui-redesign/memory/task_07.md diff --git a/.compozy/tasks/web-ui-redesign/memory/task_08.md b/.compozy/tasks/_archived/20260409-183155-web-ui-redesign/memory/task_08.md similarity index 100% rename from .compozy/tasks/web-ui-redesign/memory/task_08.md rename to .compozy/tasks/_archived/20260409-183155-web-ui-redesign/memory/task_08.md diff --git a/.compozy/tasks/web-ui-redesign/task_01.md b/.compozy/tasks/_archived/20260409-183155-web-ui-redesign/task_01.md similarity index 100% rename from .compozy/tasks/web-ui-redesign/task_01.md rename to .compozy/tasks/_archived/20260409-183155-web-ui-redesign/task_01.md diff --git a/.compozy/tasks/web-ui-redesign/task_02.md b/.compozy/tasks/_archived/20260409-183155-web-ui-redesign/task_02.md similarity index 100% rename from .compozy/tasks/web-ui-redesign/task_02.md rename to .compozy/tasks/_archived/20260409-183155-web-ui-redesign/task_02.md diff --git a/.compozy/tasks/web-ui-redesign/task_03.md b/.compozy/tasks/_archived/20260409-183155-web-ui-redesign/task_03.md similarity index 100% rename from .compozy/tasks/web-ui-redesign/task_03.md rename to .compozy/tasks/_archived/20260409-183155-web-ui-redesign/task_03.md diff --git a/.compozy/tasks/web-ui-redesign/task_04.md b/.compozy/tasks/_archived/20260409-183155-web-ui-redesign/task_04.md similarity index 100% rename from .compozy/tasks/web-ui-redesign/task_04.md rename to .compozy/tasks/_archived/20260409-183155-web-ui-redesign/task_04.md diff --git a/.compozy/tasks/web-ui-redesign/task_05.md b/.compozy/tasks/_archived/20260409-183155-web-ui-redesign/task_05.md similarity index 100% rename from .compozy/tasks/web-ui-redesign/task_05.md rename to .compozy/tasks/_archived/20260409-183155-web-ui-redesign/task_05.md diff --git a/.compozy/tasks/web-ui-redesign/task_06.md b/.compozy/tasks/_archived/20260409-183155-web-ui-redesign/task_06.md similarity index 100% rename from .compozy/tasks/web-ui-redesign/task_06.md rename to .compozy/tasks/_archived/20260409-183155-web-ui-redesign/task_06.md diff --git a/.compozy/tasks/web-ui-redesign/task_07.md b/.compozy/tasks/_archived/20260409-183155-web-ui-redesign/task_07.md similarity index 100% rename from .compozy/tasks/web-ui-redesign/task_07.md rename to .compozy/tasks/_archived/20260409-183155-web-ui-redesign/task_07.md diff --git a/.compozy/tasks/web-ui-redesign/task_08.md b/.compozy/tasks/_archived/20260409-183155-web-ui-redesign/task_08.md similarity index 100% rename from .compozy/tasks/web-ui-redesign/task_08.md rename to .compozy/tasks/_archived/20260409-183155-web-ui-redesign/task_08.md diff --git a/.compozy/tasks/extensability/_meta.md b/.compozy/tasks/extensability/_meta.md new file mode 100644 index 000000000..cf56d3631 --- /dev/null +++ b/.compozy/tasks/extensability/_meta.md @@ -0,0 +1,9 @@ +--- +created_at: 2026-04-09T18:31:42.353443Z +updated_at: 2026-04-09T18:32:06.45853Z +--- + +## Summary +- Total: 0 +- Completed: 0 +- Pending: 0 diff --git a/.compozy/tasks/hooks/_meta.md b/.compozy/tasks/hooks/_meta.md new file mode 100644 index 000000000..4e78ba9fb --- /dev/null +++ b/.compozy/tasks/hooks/_meta.md @@ -0,0 +1,9 @@ +--- +created_at: 2026-04-09T18:31:42.353794Z +updated_at: 2026-04-09T22:02:29.188502Z +--- + +## Summary +- Total: 12 +- Completed: 12 +- Pending: 0 diff --git a/.compozy/tasks/hooks/_tasks.md b/.compozy/tasks/hooks/_tasks.md new file mode 100644 index 000000000..52b4a5bf7 --- /dev/null +++ b/.compozy/tasks/hooks/_tasks.md @@ -0,0 +1,18 @@ +# Lifecycle Hooks Platform — Task List + +## Tasks + +| # | Title | Status | Complexity | Dependencies | +|---|-------|--------|------------|--------------| +| 01 | Core types and hook taxonomy | completed | medium | — | +| 02 | Declaration normalization, matchers, and ordering | completed | medium | task_01 | +| 03 | Executor contracts and implementations | completed | medium | task_01 | +| 04 | Generic pipeline with sync composition and guards | completed | high | task_02, task_03 | +| 05 | Async worker pool | completed | medium | task_01 | +| 06 | Hooks struct with typed dispatch, registry, and Notifier | completed | high | task_04, task_05 | +| 07 | Migrate skills hook parsing to new declarations | completed | medium | task_01 | +| 08 | Config and agent-definition hook declarations | completed | medium | task_01 | +| 09 | Wire Hooks in daemon — replace notifierFanout | completed | critical | task_06, task_07, task_08 | +| 10 | Integrate session, input, prompt, event, and agent dispatch | completed | high | task_06, task_09 | +| 11 | Integrate turn, message, and context dispatch | completed | medium | task_10 | +| 12 | Hook observability storage and HTTP introspection | completed | medium | task_09 | diff --git a/.compozy/tasks/hooks/_techspec.md b/.compozy/tasks/hooks/_techspec.md new file mode 100644 index 000000000..20153552e --- /dev/null +++ b/.compozy/tasks/hooks/_techspec.md @@ -0,0 +1,452 @@ +# TechSpec: Lifecycle Hooks Platform + +## Executive Summary + +This TechSpec introduces a first-class hooks platform for AGH. The platform defines a typed lifecycle taxonomy, a centralized dispatcher with per-event type-safe functions, and a multi-source declaration model so extensions can observe, block, enrich, and transform runtime operations without changing core packages for each new capability. + +The implementation strategy is to create a dedicated `internal/hooks` package that exposes typed dispatch functions (not a generic event bus), uses Go generics internally for shared infrastructure, and wires into `internal/session`, `internal/daemon`, `internal/skills`, `internal/config`, and future tool and permission paths. The primary trade-off is additional runtime and configuration complexity in exchange for a stable extensibility contract that matches AGH's documented platform ambitions. Because AGH is greenfield alpha, the system should define the full contract now rather than evolve through incompatible one-off seams. + +The dispatcher replaces the existing `notifierFanout` by implementing the `session.Notifier` interface, unifying hook dispatch and session notification into a single path. + +## System Architecture + +### Component Overview + +- `internal/hooks` + - `Hooks`: main struct owning the registry, async worker pool, and typed dispatch functions. Implements `session.Notifier`. + - `Registry`: hot-reloadable registry using `sync.RWMutex` with build-then-swap semantics (same pattern as `skills.Registry`). Stores pre-sorted `map[HookEvent][]*ResolvedHook` snapshots. + - `pipeline[P, R]`: generic internal type that executes sync hooks as a sequential pipeline and schedules async hooks to the worker pool. Each typed dispatch function instantiates a concrete pipeline. + - `Executors`: native callback executor, subprocess executor, and a future Wasm-ready executor seam. + - `Matchers`: event-specific filtering by tool name, agent name/type, workspace, session type, message kind, and provider. + - `Telemetry`: emits structured hook lifecycle records into `internal/observe`, including patch audit trail for security-relevant families. + - `Worker pool`: fixed-size goroutine pool (stdlib channel + WaitGroup) for async hook execution with bounded shutdown. +- `internal/skills` + - Parses `metadata.agh.hooks` into typed hook declarations; no longer owns dispatch. +- `internal/config` + - Parses hook declarations from policy, user, and workspace config and feeds them into the registry. +- `internal/session` + - Receives the `Hooks` dispatcher as its `Notifier`. Calls typed dispatch functions at session, input/prompt, event recording, agent lifecycle, and future turn/message seams. +- `internal/daemon` + - Composition root. Creates the `Hooks` dispatcher, wires native hooks, declaration providers, executor implementations, and reload triggers. Replaces `notifierFanout` and `skillsHookDispatcher`. +- `internal/tools` and permission flow + - Integrate with `tool.*` and `permission.*` hook families through the same typed dispatch functions. + +### Hook Taxonomy + +Events are classified as **sync-eligible** or **async-only**. Sync-eligible events accept both sync and async hooks. Async-only events reject sync hook registration. + +#### Sync-Eligible Events + +- `session.pre_create`, `session.post_create` +- `session.pre_resume`, `session.post_resume` +- `session.pre_stop`, `session.post_stop` +- `input.pre_submit` +- `prompt.post_assemble` +- `agent.pre_start`, `agent.spawned`, `agent.crashed`, `agent.stopped` +- `turn.start`, `turn.end` +- `message.start`, `message.end` +- `tool.pre_call`, `tool.post_call`, `tool.post_error` +- `permission.request` +- `context.pre_compact`, `context.post_compact` + +#### Async-Only Events + +- `event.pre_record`, `event.post_record` +- `message.delta` +- `permission.resolved`, `permission.denied` + +Rationale: async-only events are either high-frequency (`message.delta`, `event.*`) where subprocess fork/exec per invocation is a denial-of-service, or post-decision observations (`permission.resolved`, `permission.denied`) where mutation is semantically invalid. Note: `event.pre_record` and `event.post_record` use "pre/post" naming to indicate timing relative to the record operation, but as async-only events they are observation-only — hooks cannot mutate or block the record. + +### Dispatch Model + +- **Sync hooks compose as a sequential pipeline.** Each sync hook receives the payload as modified by all previous hooks in the chain. Hook A receives the original, returns a patch. The patch is applied, producing a modified payload. Hook B receives the modified payload. This continues through all sync hooks in order. +- **Pipeline short-circuit**: an explicit deny from any hook stops the pipeline. A `required` hook that fails (error or timeout) stops the pipeline with an error. A non-required hook that fails is skipped. +- **Async hooks are observational/background hooks.** They may emit side effects and telemetry but may not block or mutate the completed primary operation. Async hooks are submitted to the worker pool after sync pipeline completion. +- `required` is valid only for sync hooks on sync-eligible events. +- **Dispatch depth guard**: a counter in `context.Context` tracks dispatch nesting. Max depth is 3. Exceeding it returns an error immediately — prevents circular dispatch (e.g., a hook on `event.pre_record` triggering `recordEvent` which fires `event.pre_record` again). +- **Permission invariant**: hooks in the `permission.*` family may observe, enrich context, or deny — but may **never** upgrade a deny to an allow. This is enforced in code by the dispatcher, not by documentation. Any patch attempting deny→allow is rejected and logged as `hook.dispatch.permission_escalation_blocked`. +- Dispatch order is deterministic: + 1. Go-native hooks + 2. settings/config hooks + 3. agent-definition hooks + 4. skill hooks +- Within each source class: + 1. higher priority runs first (descending) + 2. stable name order (ascending lexicographic) +- Default priorities by source: native=1000, config=500, agent-definition=100, skill=0. +- Skill hooks preserve existing skill-source precedence (Bundled → Marketplace → User → Additional → Workspace) as sub-ordering before name. + +## Implementation Design + +### Core Interfaces + +```go +type RegisteredHook struct { + Name string + Event HookEvent + Source HookSource + Mode HookMode // sync | async + Required bool + Priority int + Timeout time.Duration + Matcher HookMatcher + Executor Executor +} +``` + +```go +type Executor interface { + Kind() HookExecutorKind + Execute(ctx context.Context, hook RegisteredHook, payload []byte) ([]byte, error) +} +``` + +```go +// Generic internal pipeline — package-private +type pipeline[P any, R any] struct { + hooks *Hooks + event HookEvent + apply func(P, R) P // typed patch applicator + encode func(P) ([]byte, error) // serialize payload for subprocess/Wasm executors + decode func([]byte) (R, error) // deserialize patch from subprocess/Wasm executors +} +``` + +Each typed dispatch function provides concrete `encode`/`decode` functions at initialization. Native Go executors bypass serialization entirely — they receive the typed payload directly via a type-safe callback. The `[]byte` boundary exists only for subprocess and future Wasm executors. + +```go +// Typed public dispatch functions — one per event +func (h *Hooks) DispatchSessionPreCreate(ctx context.Context, payload SessionPreCreatePayload) (SessionPreCreatePayload, error) +func (h *Hooks) DispatchToolPreCall(ctx context.Context, payload ToolPreCallPayload) (ToolCallPatch, error) +func (h *Hooks) DispatchPromptPostAssemble(ctx context.Context, payload PromptPayload) (PromptPayload, error) +// ... one function per event in the taxonomy +``` + +```go +// Hooks implements session.Notifier +var _ session.Notifier = (*Hooks)(nil) + +func (h *Hooks) OnSessionCreated(ctx context.Context, session *session.Session) +func (h *Hooks) OnSessionStopped(ctx context.Context, session *session.Session) +func (h *Hooks) OnAgentEvent(ctx context.Context, sessionID string, event acp.AgentEvent) +``` + +### Data Models + +- `HookDecl` + - declarative hook source record from config, agent definitions, or skills +- `RegisteredHook` + - normalized hook ready for dispatch, with resolved source, mode, matcher, timeout, and executor binding +- `ResolvedHook` + - pre-sorted hook in the registry snapshot, with executor and matcher pre-compiled +- `HookRunRecord` + - persisted observability record: hook name, event, source, mode, duration, outcome, dispatch depth, and `PatchApplied json.RawMessage` (populated for security-relevant families: `permission.*`, `prompt.*`, `tool.*`, `input.*`) +- Event-specific payload and patch types (one pair per event): + - `SessionPreCreatePayload` / `SessionCreatePatch` + - `PromptPayload` / `PromptPatch` + - `ToolPreCallPayload` / `ToolCallPatch` + - `ToolPostCallPayload` / `ToolResultPatch` + - `InputPreSubmitPayload` / `InputPreSubmitPatch` + - `ContextCompactPayload` / `ContextCompactionPatch` + - Other events use payload-only types (no patch) when they are async-only or observation-only. + +### Declaration Model + +- Go-native declarations are registered from the composition root. +- Settings/config declarations are loaded from existing AGH config layers and may be used for organizational policy. +- Agent-definition declarations are scoped to one agent type and execute only for matching sessions. +- Skill declarations remain the portable, user-facing mechanism for reusable procedures and automation. +- The declaration schema supports: + - `name` + - `event` + - `mode` (sync | async — validated against event eligibility) + - `required` (valid only for sync hooks) + - `priority` (default: 0 for skills, 100 for agent-definitions, 500 for config) + - `timeout` + - `matcher` + - `executor` + - `metadata` for observability labels + +### Matcher Model + +- `session.*`: session type, workspace id/root, agent name +- `input/prompt.*`: agent name, workspace id/root, input class +- `event.*`: ACP event type, turn id, agent name +- `tool.*`: tool name, namespace, read-only flag +- `permission.*`: tool name, decision class +- `message.*`: message role and delta type +- `context.*`: compaction reason and strategy + +### Hot Reload + +The registry uses `sync.RWMutex` with build-then-swap semantics, following the same pattern as `skills.Registry`: + +- **Read path**: `RLock`, copy slice reference for target event, `RUnlock`, dispatch against snapshot. Zero allocations in critical section. +- **Write path**: `Rebuild(ctx)` reads all 4 sources, validates declarations, builds pre-sorted snapshot map, then `Lock` + swap + `Unlock`. If validation fails, old snapshot stays. +- **Triggers**: `skills.Watcher` notifies on skill changes. Config and agent-definition changes use analogous watch or explicit refresh. +- **Version counter**: `atomic.Int64` bumped on each swap for staleness detection. +- **Consistency**: in-flight dispatches operate on the snapshot they read — concurrent reloads do not affect them. + +### Async Worker Pool + +Async hooks execute in a fixed-size worker pool using Go stdlib primitives: + +- **Pool size**: configurable, default 4 workers +- **Queue**: buffered channel, configurable capacity, default 64 +- **Backpressure**: non-blocking send with `select`/`default` — full buffer drops the hook with structured log `hook.dispatch.async_dropped` and metric +- **Workers**: `select { case task := <-ch: execute(task) case <-ctx.Done(): return }` +- **Shutdown**: close channel, workers drain with deadline (10s), `sync.WaitGroup.Wait()` +- **Ownership**: `Hooks` struct owns the pool, starts on init, joins on `Close()` +- **Panic recovery**: each worker wraps execution in `recover()` to prevent a panicking hook from killing the pool +- **Shutdown ordering**: `Hooks.Close()` runs **after** session manager shutdown (so `session.post_stop` hooks can fire during session teardown) and **before** database close (so async hooks that write telemetry can complete). In the daemon shutdown sequence: stop sessions → `Hooks.Close()` (drain async pool) → close HTTP/UDS servers → close database → release lock. + +### API Endpoints + +- `GET /api/hooks/catalog?workspace=:id&agent=:name` + - Returns resolved active hooks after precedence, matching defaults, and source attribution. Shows the sequential pipeline order. +- `GET /api/hooks/runs?session=:id&event=:event` + - Returns recent hook execution records including patch diffs for security-relevant families. +- `GET /api/hooks/events` + - Returns the supported hook taxonomy with sync eligibility classification and event-specific payload/patch schema names. + +## Integration Points + +- Local subprocess execution for skill/config/agent shell hooks +- Existing `internal/observe` pipeline for run records and metrics +- Existing `internal/session` lifecycle — `Hooks` replaces `notifierFanout` as the `session.Notifier` +- Existing `internal/skills` watcher for reload triggers +- Future Wasm executor seam — same `Executor` contract, no taxonomy change + +## Migration from Current Hooks Implementation + +AGH is greenfield alpha with zero legacy tolerance. The migration is a hard cut-over — delete old code, replace with new. No compatibility shims, no mapping layers, no deprecation period. + +### Code to Delete + +| File / Symbol | Current Role | Replacement | +|--------------|-------------|-------------| +| `internal/skills/hooks.go` — `HookRunner`, `RunHooks`, `runHook`, `orderSkillsForHooks`, `skillHasHookEvent`, `hookCapture*` | Subprocess hook execution, ordering, output capture | `internal/hooks` subprocess executor + `pipeline[P,R]` ordering | +| `internal/skills/types.go` — `HookDecl`, `HookEvent`, `HookPayload`, `HookResult` (hook-related types only) | Hook declaration and execution types | `internal/hooks` typed declarations and per-event payload/patch types | +| `internal/skills/registry.go` — `cloneHookDecls` | Deep-copy helper for old HookDecl | Not needed — new declarations are handled by the hooks registry | +| `internal/daemon/notifier.go` — `notifierFanout`, `skillsHookDispatcher`, `sessionHookPhase` | Session notification fanout and hook dispatch bridge | `internal/hooks.Hooks` implementing `session.Notifier` | +| `internal/skills/hooks_test.go` | Tests for old HookRunner | New tests in `internal/hooks` | +| Related test helpers in `registry_test.go` | `newSkillWithHook`, hook-related assertions | Updated to use new declaration types | + +### Event Name Migration + +Old event names are deleted, not mapped: + +| Old Name | New Name | +|----------|----------| +| `on_session_created` | `session.post_create` | +| `on_session_stopped` | `session.post_stop` | + +Existing skill YAML frontmatter using old names will fail validation at parse time with a clear error message pointing to the new name. The `validHookEvent` function in the skill loader is rewritten to accept only the new dotted taxonomy. + +### Skill Declaration Schema Migration + +Old schema (5 fields): +```yaml +hooks: + - event: on_session_created + command: ./setup.sh + args: ["--init"] + timeout: 5s + env: + KEY: value +``` + +New schema (up to 9 fields, backward-compatible subset): +```yaml +hooks: + - event: session.post_create + command: ./setup.sh + args: ["--init"] + timeout: 5s + env: + KEY: value + # New optional fields: + mode: async # default: async for skill hooks + priority: 0 # default: 0 for skills + matcher: # optional — narrows when hook fires + agent_name: claude +``` + +The minimal migration for existing skills: change `on_session_created` → `session.post_create` and `on_session_stopped` → `session.post_stop`. All other fields remain compatible. New fields are optional with sensible defaults. + +### Subprocess Payload Migration + +Old payload (JSON via stdin): +```json +{"session_id": "...", "agent_name": "...", "workspace": "...", "event": "on_session_created"} +``` + +New payload (JSON via stdin, event-specific): +```json +{"session_id": "...", "agent_name": "...", "workspace": "...", "event": "session.post_create", "session_type": "...", "workspace_id": "..."} +``` + +The new payload is a superset — it includes all old fields plus event-specific fields. Existing subprocess scripts that read `session_id`, `agent_name`, and `workspace` will continue to work without changes. The `event` field changes from `on_session_created` to `session.post_create`. + +### Migration Sequencing + +The migration happens atomically in build order step 10 (daemon wiring): +1. Steps 1-6: build the new `internal/hooks` package (no old code touched yet) +2. Step 7: rewrite `internal/skills` hook parsing to emit new `HookDecl` types, delete old types and `HookRunner` +3. Step 10: delete `notifierFanout` and `skillsHookDispatcher`, wire `Hooks` as `session.Notifier` + +There is no transitional state where both old and new dispatch paths coexist. + +## Impact Analysis + +| Component | Impact Type | Description and Risk | Required Action | +|-----------|-------------|---------------------|-----------------| +| `internal/hooks` | new | Central platform package; medium design risk | Implement registry, pipeline, typed dispatch functions, worker pool, executors | +| `internal/skills` | modified | Moves from owning dispatch to supplying declarations; low risk | Parse richer hook schema and export declarations | +| `internal/config` | modified | Adds hook config parsing and validation; medium risk | Extend config schema and precedence handling | +| `internal/session` | modified | Receives `Hooks` as `Notifier`, calls typed dispatch at lifecycle points; medium risk | Wire typed dispatch invocations at session/input/event/agent paths | +| `internal/daemon` | modified | Replaces `notifierFanout` and `skillsHookDispatcher` with `Hooks`; medium risk | Compose Hooks, wire reload triggers, update shutdown sequence | +| `internal/observe` | modified | Stores hook execution telemetry with patch audit; low risk | Add HookRunRecord with PatchApplied field | +| `internal/tools` | future-modified | Consumes `tool.*` hooks when tool registry lands; medium risk | Reuse typed dispatch functions | +| web/API clients | modified | Optional hook introspection UI; low risk | Consume catalog/run endpoints if needed | + +## Testing Approach + +### Unit Tests + +- Declaration parsing from all four sources +- Matcher evaluation for each hook family +- Ordering: source → priority → name (no specificity) +- Sequential pipeline patch composition: verify hook N sees output of hook N-1 +- Pipeline short-circuit on deny and required-hook failure +- Async worker pool: submission, backpressure drop, graceful shutdown with drain +- Permission invariant: deny→allow escalation is rejected for every source type +- Event eligibility: sync registration rejected for async-only events +- Dispatch depth guard: depth > 3 returns error +- Patch audit trail: security-relevant families persist patches, others do not (unless debug mode) +- Registry hot reload: concurrent read during swap returns consistent snapshot +- Invalid contract cases: + - `required` on async hooks + - sync mode on async-only events + - patch type mismatch + - unknown event + - illegal matcher fields for event family + +### Integration Tests + +- Session create/resume/stop hook flow with real SQLite and subprocess executors +- Sequential pipeline with multiple hooks patching the same field — verify composition order +- Prompt submit + event record hook chains +- Agent crash classification and crash-hook dispatch +- Async hook execution with bounded shutdown and observability emission +- Config + agent + skill source coexistence in one workspace +- Hot reload: add/remove skill mid-session, verify next dispatch uses updated registry +- Permission escalation attempt: verify deny→allow is blocked end-to-end +- Dispatch depth: hook triggering event that fires same hook — verify depth guard +- Hook introspection HTTP endpoints with patch audit data +- Tool and permission family integration tests once corresponding runtime path exists in the same implementation stream + +## Development Sequencing + +### Build Order + +1. Create `internal/hooks` core types: `HookEvent` enum with sync eligibility, `HookSource`, `HookMode`, `RegisteredHook`, event-specific payload/patch types — no dependencies +2. Implement declaration normalization, matcher evaluation, and ordering (source → priority → name) — depends on step 1 +3. Implement executor contracts and native/subprocess executors — depends on step 1 +4. Implement generic `pipeline[P, R]` with sequential sync composition, deny short-circuit, depth guard, and permission invariant — depends on steps 2 and 3 +5. Implement async worker pool (channel + goroutines + WaitGroup) — depends on step 1 +6. Implement `Hooks` struct with typed dispatch functions, registry with RWMutex snapshot swap, and `session.Notifier` implementation — depends on steps 4 and 5 +7. Extend `internal/skills` hook parsing to emit rich `HookDecl` with new schema fields — depends on step 1 +8. Extend `internal/config` with hook declarations and validation — depends on step 1 +9. Extend agent-definition loading to emit hook declarations — depends on step 1 +10. Wire `Hooks` in `internal/daemon`: hard cut-over — delete `notifierFanout` and `skillsHookDispatcher`, wire `Hooks` as `session.Notifier`, connect reload triggers from skills watcher, update shutdown sequence (stop sessions → `Hooks.Close()` → close servers → close DB) — depends on steps 6, 7, 8, and 9 +11. Integrate `session.*`, `input.*`, `prompt.*`, `event.*`, and `agent.*` dispatch points in session manager — depends on steps 6 and 10 +12. Add `turn.*` and `message.*` dispatch from normalized ACP event flow — depends on step 11 +13. Add `context.*` dispatch around compaction paths — depends on step 11 +14. Add `tool.*` and `permission.*` integrations through the typed dispatch functions — depends on step 6 and the corresponding tool/permission runtime paths +15. Add hook observability storage (HookRunRecord with patch audit) and HTTP introspection endpoints — depends on steps 10 through 14 +16. Complete full-package verification and cross-source integration tests — depends on all previous steps + +### Technical Dependencies + +- Existing config precedence rules in `internal/config` +- Existing session lifecycle and `session.Notifier` interface +- Existing `skills.Registry` pattern (RWMutex + snapshot swap) and `skills.Watcher` +- Existing observe/event store primitives +- Future tool registry and permission pipeline for `tool.*` and `permission.*` execution points + +## Monitoring and Observability + +- Metrics + - hook dispatch count by event, source, mode, outcome + - sync hook wall-clock latency (per-hook and full pipeline) + - async queue depth and drain time + - async hook drop count (`hook.dispatch.async_dropped`) + - hook block/deny count + - hook timeout count + - permission escalation block count + - dispatch depth violations count + - registry reload count and duration +- Structured logs + - `hook.dispatch.started` — includes dispatch depth + - `hook.dispatch.completed` — includes pipeline trace + - `hook.dispatch.blocked` — includes deny source + - `hook.dispatch.failed` — includes error and required status + - `hook.dispatch.async_dropped` — includes queue depth at time of drop + - `hook.dispatch.permission_escalation_blocked` — security event + - `hook.dispatch.depth_exceeded` — includes event chain + - `hook.registry.reloaded` — includes version, hook count delta +- Alerting thresholds + - repeated required-hook failure + - async queue backlog above threshold + - p95 sync dispatch latency above configured budget + - permission escalation attempts (any occurrence) + - dispatch depth violations (any occurrence) + +## Technical Considerations + +### Key Decisions + +- The platform uses a dedicated `internal/hooks` package instead of embedding platform behavior in `internal/skills`. (ADR-001) +- The taxonomy is broad because AGH's docs position hooks as foundational extensibility infrastructure. (ADR-002) +- Sync hooks are the only mutation/blocking hooks; async hooks are side-effect/observer hooks. Mutation is event-specific and typed. (ADR-003) +- Declarations come from Go-native callbacks, settings/config, agent definitions, and skills. (ADR-004) +- The public API uses typed per-event dispatch functions, not a generic `Dispatch(ctx, any)`. Internally, Go generics share infrastructure without `any`. (ADR-005, ADR-007) +- Sync hooks compose as a sequential pipeline — each hook sees the output of the previous. (ADR-006) +- Async hooks execute in a stdlib worker pool with bounded channel and graceful shutdown. (ADR-008) +- Permission hooks are deny-only — the dispatcher rejects any deny→allow escalation. (ADR-009) +- Patch audit trail is persisted for security-relevant families (`permission.*`, `prompt.*`, `tool.*`, `input.*`). (ADR-010) +- Ordering uses source → priority → name. Specificity is removed. (ADR-011) +- Events are classified as sync-eligible or async-only. A dispatch depth guard (max 3) prevents circular dispatch. (ADR-012) +- The registry is hot-reloadable using RWMutex + snapshot swap. The dispatcher replaces `notifierFanout` by implementing `session.Notifier`. (ADR-013) + +### Known Risks + +- Overly broad first implementation could stall delivery + - Mitigation: strict build order and package-local milestones +- Tool and permission hook families depend on adjacent runtime work + - Mitigation: implement shared dispatcher now and integrate those families as part of the same program of work +- Async hooks can become silent failure sinks + - Mitigation: explicit telemetry, queue limits, drop metrics, and shutdown draining rules +- Sequential pipeline makes ordering load-bearing — later hooks can overwrite earlier patches + - Mitigation: introspection API exposes full pipeline trace with before/after state per hook, patch audit trail persists forensic records +- Hot reload introduces concurrency between dispatch and registry swap + - Mitigation: RWMutex + snapshot isolation ensures in-flight dispatches are never affected by concurrent reloads +- Generic internals add moderate code complexity + - Mitigation: generics are limited to the `pipeline[P, R]` type and executor interface — the public API is fully concrete + +## Architecture Decision Records + +- [ADR-001: Centralize Hooks in internal/hooks](adrs/adr-001.md) — Creates one typed registry and dispatcher for every hook family and source. +- [ADR-002: Use a Dotted Hook Taxonomy with Rich Families](adrs/adr-002.md) — Defines the full extensibility-facing lifecycle taxonomy. +- [ADR-003: Use Typed Patch Protocols and Hybrid Failure Policy](adrs/adr-003.md) — Limits mutation to typed patch surfaces and keeps fail-closed behavior explicit via `required`. +- [ADR-004: Support Four Declaration Sources with Ordered Dispatch](adrs/adr-004.md) — Combines native, config, agent, and skill hooks into one deterministic dispatch model. +- [ADR-005: Use Typed Per-Event Dispatch Functions Instead of Generic Dispatcher](adrs/adr-005.md) — Replaces the generic `Dispatch(ctx, HookInvocation)` with typed functions. Resolves the "event bus" contradiction. +- [ADR-006: Sequential Pipeline for Sync Hook Patch Composition](adrs/adr-006.md) — Each sync hook sees the output of the previous, Kubernetes-style. +- [ADR-007: Use Go Generics for Internal Dispatcher Type Safety](adrs/adr-007.md) — Shares infrastructure without `any` via `pipeline[P, R]`. +- [ADR-008: Stdlib Worker Pool for Async Hook Execution](adrs/adr-008.md) — Channel + goroutines + WaitGroup, following `consolidation.Runtime` pattern. +- [ADR-009: Permission Hooks Are Deny-Only](adrs/adr-009.md) — Dispatcher rejects deny→allow escalation. Architecturally impossible, not just discouraged. +- [ADR-010: Persist Patch Audit Trail for Security-Relevant Families](adrs/adr-010.md) — HookRunRecord stores patches for `permission.*`, `prompt.*`, `tool.*`, `input.*`. +- [ADR-011: Simplify Ordering to Source, Priority, Name](adrs/adr-011.md) — Removes undefined specificity sort key. Supersedes ordering details in ADR-004. +- [ADR-012: Classify Events into Sync-Eligible and Async-Only with Dispatch Depth Guard](adrs/adr-012.md) — Prevents subprocess fork-bomb on `message.delta` and circular dispatch stack overflow. +- [ADR-013: Hot-Reloadable Registry with RWMutex Snapshot Swap](adrs/adr-013.md) — Same pattern as `skills.Registry`. Dispatcher replaces `notifierFanout`. diff --git a/.compozy/tasks/hooks/adrs/adr-001.md b/.compozy/tasks/hooks/adrs/adr-001.md new file mode 100644 index 000000000..0014171fc --- /dev/null +++ b/.compozy/tasks/hooks/adrs/adr-001.md @@ -0,0 +1,79 @@ +# ADR-001: Centralize Hooks in internal/hooks + +## Status + +Accepted + +## Date + +2026-04-09 + +## Context + +AGH already has several adjacent seams that participate in lifecycle behavior: session notifiers, skill subprocess hooks, prompt assembly, event recording, and future tool and permission pipelines. The current hook behavior is split across `internal/skills`, `internal/daemon/notifier.go`, and session lifecycle code, which makes it difficult to grow from a narrow session hook runner into a platform-grade extensibility surface. + +The project docs and extensibility analysis treat hooks as foundational infrastructure, not a narrow feature. If the system keeps adding hook-like logic in package-local ways, each new seam will introduce a slightly different contract, precedence rule, and observability shape. + +## Decision + +Create a dedicated `internal/hooks` package as the canonical platform for hook taxonomy, declaration normalization, matching, ordering, execution, and telemetry. + +`internal/hooks` will own: + +- hook event taxonomy and payload typing +- registry and normalized `RegisteredHook` records +- dispatch ordering and sync/async execution rules +- executor interfaces and concrete executors +- hook observability records + +Other packages will only publish declarations or invoke dispatch at well-defined runtime boundaries. + +## Alternatives Considered + +### Alternative 1: Keep hooks inside `internal/skills` + +- **Description**: Extend the existing `HookRunner` in `internal/skills` until it covers more lifecycle events. +- **Pros**: Lowest immediate code movement. Builds on code that already exists. +- **Cons**: Couples the platform hook model to skills. Makes config, agent-definition, and native hooks second-class. Encourages more package-local special cases. +- **Why rejected**: The hook platform needs to outgrow skill subprocess hooks. Keeping ownership in `internal/skills` would make the architecture harder to reason about and harder to extend cleanly. + +### Alternative 2: Add a generic event bus + +- **Description**: Introduce a generic publish/subscribe bus and model hooks as subscribers. +- **Pros**: Flexible fan-out model. Familiar pattern. +- **Cons**: Conflicts with AGH's architectural preference for typed direct calls over a generic event bus. Makes blocking, mutation, and precedence rules harder to enforce. +- **Why rejected**: Hooks are not generic observers only. They need typed contracts, deterministic ordering, and barrier behavior, which fit a specialized dispatcher better than a generic bus. + +## Consequences + +### Positive + +- Establishes one stable hook contract for the platform. +- Decouples declaration sources from execution. +- Makes future Wasm and extension executors additive instead of architectural rewrites. +- Improves testability and observability by consolidating hook behavior. + +### Negative + +- Introduces a new core package and cross-package integration work. +- Requires moving responsibility out of currently narrower implementations. + +### Risks + +- The new package could become too abstract and drift away from current runtime needs. + - Mitigation: wire it directly into existing session and daemon seams first, then expand. +- The first version could overreach and slow delivery. + - Mitigation: keep the build order explicit and land families incrementally under one contract. + +## Implementation Notes + +- `internal/skills` remains a declaration source and subprocess executor consumer. +- `internal/daemon` becomes the composition root for registry, providers, executors, and telemetry. +- `internal/session` emits hook invocations at core lifecycle boundaries. + +## References + +- `.compozy/tasks/extensability/analysis.md` +- `.compozy/tasks/extensability/analysis/analysis-claude-code.md` +- `.compozy/tasks/_archived/20260408-201357-skills-v2/_techspec.md` +- `.resources/claude-code/wiki/concepts/hook-system.md` diff --git a/.compozy/tasks/hooks/adrs/adr-002.md b/.compozy/tasks/hooks/adrs/adr-002.md new file mode 100644 index 000000000..d8e8cb108 --- /dev/null +++ b/.compozy/tasks/hooks/adrs/adr-002.md @@ -0,0 +1,94 @@ +# ADR-002: Use a Dotted Hook Taxonomy with Rich Families + +## Status + +Accepted + +## Date + +2026-04-09 + +## Context + +The current AGH implementation only exposes `on_session_created` and `on_session_stopped` through skill subprocess hooks. That is enough for narrow lifecycle notifications, but it is not enough for the extensibility model described in AGH's own research and related systems. + +Claude Code, Pi, Hermes, and GoClaw all expose families of seams rather than isolated lifecycle hooks. The AGH extensibility analysis also places hooks ahead of the extension architecture and the tool registry, which means the taxonomy must support more than session start/stop if it is going to anchor the platform. + +## Decision + +Adopt dotted hook names and define a broad taxonomy of hook families now: + +- `session.*` +- `input.*` +- `prompt.*` +- `event.*` +- `agent.*` +- `turn.*` +- `message.*` +- `tool.*` +- `permission.*` +- `context.*` + +The initial event set is: + +- `session.pre_create`, `session.post_create` +- `session.pre_resume`, `session.post_resume` +- `session.pre_stop`, `session.post_stop` +- `input.pre_submit` +- `prompt.post_assemble` +- `event.pre_record`, `event.post_record` +- `agent.pre_start`, `agent.spawned`, `agent.crashed`, `agent.stopped` +- `turn.start`, `turn.end` +- `message.start`, `message.delta`, `message.end` +- `tool.pre_call`, `tool.post_call`, `tool.post_error` +- `permission.request`, `permission.resolved`, `permission.denied` +- `context.pre_compact`, `context.post_compact` + +## Alternatives Considered + +### Alternative 1: Keep the existing `on_*` names + +- **Description**: Continue with names such as `on_session_created` and add more events in the same style. +- **Pros**: Smallest change from current code. +- **Cons**: Harder to group and evolve semantically. Less consistent with richer families and event-specific payload typing. +- **Why rejected**: Dotted names scale better across families and align with the broader architecture direction. + +### Alternative 2: Define only the first six lifecycle events + +- **Description**: Limit the taxonomy to the six lifecycle events proposed in the early analysis batch. +- **Pros**: Smaller initial scope. +- **Cons**: Underspecifies the platform and forces future specs to redefine the taxonomy. +- **Why rejected**: AGH wants a robust implementation-driving spec. A tiny taxonomy would undercut the extensibility model documented elsewhere. + +## Consequences + +### Positive + +- Gives the platform a stable long-term naming model. +- Makes it easier to define family-specific payloads and matchers. +- Matches the extensibility posture described in research and reference systems. + +### Negative + +- Commits the project to a larger public hook surface. +- Expands the amount of payload typing, testing, and documentation work. + +### Risks + +- A broad taxonomy could outrun the current runtime if implemented without sequence. + - Mitigation: keep the build order explicit and wire hook families incrementally. +- Some hook families depend on adjacent work, especially `tool.*` and `permission.*`. + - Mitigation: define the taxonomy and shared dispatcher now, integrate those families as corresponding runtime subsystems land. + +## Implementation Notes + +- Event names should be compile-time enumerated, not ad hoc strings. +- Each family should map to a specific payload type and matcher contract. +- Existing `on_session_created` and `on_session_stopped` behavior should be replaced by the new taxonomy rather than preserved as a long-term compatibility layer. + +## References + +- `.compozy/tasks/extensability/analysis.md` +- `.compozy/tasks/extensability/analysis/analysis-claude-code.md` +- `.resources/claude-code/wiki/concepts/hook-system.md` +- `.resources/pi/wiki/concepts/extension-and-customization-system.md` diff --git a/.compozy/tasks/hooks/adrs/adr-003.md b/.compozy/tasks/hooks/adrs/adr-003.md new file mode 100644 index 000000000..b0cddb0e7 --- /dev/null +++ b/.compozy/tasks/hooks/adrs/adr-003.md @@ -0,0 +1,88 @@ +# ADR-003: Use Typed Patch Protocols and Hybrid Failure Policy + +## Status + +Accepted + +## Date + +2026-04-09 + +## Context + +To become load-bearing extensibility points, hooks need to do more than emit side effects. They need to block, enrich, and transform operations. At the same time, AGH should avoid a protocol that lets arbitrary executors replace whole runtime payloads, because that would make validation, debugging, and future evolution brittle. + +The platform also needs a clear answer to failure semantics. A fully fail-open system is too weak for policy enforcement. A fully fail-closed system is too brittle operationally. + +## Decision + +Use typed, event-specific patch payloads instead of whole-payload replacement, and apply a hybrid failure policy: + +- sync hooks may block or patch +- async hooks are observational only +- `required` is valid only for sync hooks +- explicit deny always blocks +- execution failure is fail-open unless the hook is marked `required` + +Examples: + +- `input.pre_submit` may patch the submitted message or add context blocks +- `event.post_record` may patch selected event fields +- `tool.pre_call` may patch tool arguments through a typed `ToolCallPatch` +- `tool.post_call` may patch a typed result surface, not replace arbitrary executor output + +## Alternatives Considered + +### Alternative 1: Whole-payload replacement + +- **Description**: Let hooks return a fully new payload object for any event. +- **Pros**: Maximum flexibility. Very simple contract at first glance. +- **Cons**: Hard to validate, hard to evolve safely, and easy to break invariants accidentally. +- **Why rejected**: The platform needs stable, auditable contracts. Whole-payload replacement is too loose. + +### Alternative 2: Fail-open everywhere + +- **Description**: Hook failures never block operations unless a hook explicitly denies. +- **Pros**: Operationally resilient. +- **Cons**: Makes policy hooks weak unless every denial path is explicit and perfect. +- **Why rejected**: Some hooks need to enforce real guarantees. + +### Alternative 3: Fail-closed everywhere + +- **Description**: Any hook failure blocks the primary operation. +- **Pros**: Strong safety posture. +- **Cons**: Turns every timeout or executor hiccup into a platform stall. +- **Why rejected**: Too brittle for a daemon expected to remain available under partial hook failure. + +## Consequences + +### Positive + +- Keeps mutation narrow, typed, and evolvable. +- Allows strong enforcement where it is intentional. +- Preserves resilience for observational and best-effort automation. + +### Negative + +- Requires event-family-specific patch schemas. +- Requires additional validation logic and test coverage. + +### Risks + +- Patch surfaces may become inconsistent across hook families. + - Mitigation: define each patch type next to the corresponding event payload type and validate centrally. +- Operators may misunderstand `required`. + - Mitigation: surface hook source, mode, and required status through catalog and run introspection. + +## Implementation Notes + +- Async hooks must not mutate the completed primary operation. +- Required hooks should emit stronger telemetry and alerting signals. +- Typed patch merge rules should be deterministic and declared per event family. + +## References + +- `.compozy/tasks/extensability/analysis.md` +- `.resources/claude-code/wiki/concepts/hook-system.md` +- `.resources/pi/wiki/concepts/extension-and-customization-system.md` +- `.resources/hermes/website/docs/user-guide/features/hooks.md` diff --git a/.compozy/tasks/hooks/adrs/adr-004.md b/.compozy/tasks/hooks/adrs/adr-004.md new file mode 100644 index 000000000..ce5b64a21 --- /dev/null +++ b/.compozy/tasks/hooks/adrs/adr-004.md @@ -0,0 +1,88 @@ +# ADR-004: Support Four Declaration Sources with Ordered Dispatch + +## Status + +Accepted + +## Date + +2026-04-09 + +## Context + +AGH already has multiple configuration and capability sources: compiled Go behavior, hierarchical config, agent definitions, and skills. A hook platform that only supports one of these would force users back into core code changes or duplicate mechanisms depending on the use case. + +At the same time, mixing many declaration sources without explicit precedence would create unpredictable behavior and hard-to-debug conflicts. + +## Decision + +Support four declaration sources inside the hook platform: + +1. Go-native callbacks +2. settings/config hooks +3. agent-definition hooks +4. skill hooks + +Dispatch order is source-aware and deterministic: + +1. Go-native hooks +2. settings/config hooks +3. agent-definition hooks +4. skill hooks + +Within the same source class: + +- higher priority first +- then higher specificity +- then stable name order + +Skill hooks preserve existing skill-source precedence semantics as a tie-breaker for equivalent declarations. + +## Alternatives Considered + +### Alternative 1: Support only Go-native and skills + +- **Description**: Restrict hooks to compiled callbacks and skill frontmatter. +- **Pros**: Smallest scope. Uses mechanisms already close to current code. +- **Cons**: Weak for policy/config-driven automation and agent-specific behavior. +- **Why rejected**: Too narrow for the extensibility posture this spec is meant to support. + +### Alternative 2: Add future extension manifests immediately + +- **Description**: Include a fifth declarative source for extension/plugin manifests now. +- **Pros**: Most future-looking option. +- **Cons**: Pulls extension architecture details into this spec too early. +- **Why rejected**: The executor seam should be future-ready, but extension-manifest design belongs to the extension architecture spec. + +## Consequences + +### Positive + +- Supports policy, reusable procedures, agent specialization, and first-party native logic under one platform. +- Keeps future extension integration additive rather than disruptive. +- Gives users a clear ownership model for different hook use cases. + +### Negative + +- Increases conflict-resolution complexity. +- Requires stronger introspection and debugging tooling. + +### Risks + +- Operators may not understand why one hook source overrides another. + - Mitigation: expose resolved catalogs with source attribution and ordering. +- Source proliferation may tempt teams to duplicate the same behavior in multiple places. + - Mitigation: document intended ownership for each source class and warn on suspicious overlaps. + +## Implementation Notes + +- Settings/config hooks are the right place for organization and workspace policy. +- Agent-definition hooks are the right place for agent-type-specific startup, input, and lifecycle behavior. +- Skill hooks remain the portable declarative automation path for reusable procedures. + +## References + +- `.compozy/tasks/extensability/analysis.md` +- `.compozy/tasks/_archived/20260408-201357-skills-v2/_techspec.md` +- `.resources/pi/wiki/concepts/extension-and-customization-system.md` +- `.resources/claude-code/wiki/concepts/hook-system.md` diff --git a/.compozy/tasks/hooks/adrs/adr-005.md b/.compozy/tasks/hooks/adrs/adr-005.md new file mode 100644 index 000000000..6bdabed39 --- /dev/null +++ b/.compozy/tasks/hooks/adrs/adr-005.md @@ -0,0 +1,76 @@ +# ADR-005: Use Typed Per-Event Dispatch Functions Instead of Generic Dispatcher + +## Status + +Accepted + +## Date + +2026-04-09 + +## Context + +The original techspec proposed a generic `Dispatcher` interface with a single `Dispatch(ctx context.Context, inv HookInvocation) (DispatchResult, error)` method. A multi-advisor council review identified that this is structurally an event bus with a generic envelope — conflicting with AGH's architectural principle of "no event bus, no NATS, no reflection-based routing" and "direct function calls through interfaces." + +The existing `session.Notifier` interface uses typed method calls (`OnSessionCreated`, `OnSessionStopped`, `OnAgentEvent`). The project's coding standards prohibit `interface{}`/`any` when a concrete type is known. + +The hooks platform needs centralized infrastructure (registry, ordering, matching, timeout) to avoid duplication across 27+ events, but the external API must preserve type safety. + +## Decision + +Replace the generic `Dispatch(ctx, HookInvocation)` interface with typed per-event dispatch functions. The `internal/hooks` package exposes functions like: + +- `DispatchSessionPreCreate(ctx, SessionPreCreatePayload) (SessionCreatePatch, error)` +- `DispatchToolPreCall(ctx, ToolPreCallPayload) (ToolCallPatch, error)` +- `DispatchPromptPostAssemble(ctx, PromptPayload) (PromptPatch, error)` + +Each function has concrete input and output types. Internally, the dispatcher uses Go generics (`Dispatcher[P Payload, R Patch]`) to share registry, ordering, matching, and execution infrastructure without duplicating logic. The public API has zero `any` — type safety is enforced at compile time. + +The dispatcher implements the existing `session.Notifier` interface, replacing the current `notifierFanout`. + +## Alternatives Considered + +### Alternative 1: Generic Dispatcher with HookInvocation envelope + +- **Description**: Single `Dispatch(ctx, HookInvocation)` method accepting any event. +- **Pros**: Minimal API surface. One dispatch path for everything. +- **Cons**: `HookInvocation` payload is necessarily `interface{}`. Type errors surface at runtime, not compile time. Structurally identical to an event bus, violating AGH's architecture principles. +- **Why rejected**: Introduces the generic event bus that the project explicitly prohibits and moves type safety from compile time to runtime. + +### Alternative 2: Typed Notifier expansion without centralized package + +- **Description**: Expand the existing `session.Notifier` pattern with per-family interfaces (`SessionHooks`, `ToolHooks`, `PromptHooks`), each consumed where needed, wired in daemon. +- **Pros**: Fully consistent with "no event bus" principle. No centralized package. +- **Cons**: Duplicates ordering, matching, timeout, and execution logic across every family. The daemon composition root becomes enormous. In practice, a shared infrastructure package emerges anyway. +- **Why rejected**: The duplication cost exceeds the architectural purity benefit. The centralized infrastructure is justified when the external API remains typed. + +## Consequences + +### Positive + +- Zero `any` or `interface{}` in the public dispatch API — compile-time type safety. +- Respects the spirit of "no event bus" — no generic envelope, no string-based routing. +- Centralizes ordering, matching, and execution logic in one testable package. +- The dispatcher can implement `session.Notifier`, providing a clean migration path from the existing `notifierFanout`. + +### Negative + +- More functions to maintain as the taxonomy grows (one per event). +- Internal generics add moderate complexity to the `internal/hooks` package implementation. + +### Risks + +- Go generics have limitations (no method-level type parameters on interfaces) that may require creative internal design. + - Mitigation: use generic standalone functions internally, not generic interface methods. + +## Implementation Notes + +- The generic internal dispatcher should be a package-private type. +- Each public dispatch function instantiates the generic pipeline with concrete types. +- The `session.Notifier` implementation bridges the typed dispatch functions to the existing notification contract. + +## References + +- Council review: Gap #4 (Dispatcher is an event bus by another name) +- Council review: Gap #2 (HookInvocation is `any` in disguise) +- ADR-001: Centralize Hooks in internal/hooks diff --git a/.compozy/tasks/hooks/adrs/adr-006.md b/.compozy/tasks/hooks/adrs/adr-006.md new file mode 100644 index 000000000..27d2a5b26 --- /dev/null +++ b/.compozy/tasks/hooks/adrs/adr-006.md @@ -0,0 +1,78 @@ +# ADR-006: Sequential Pipeline for Sync Hook Patch Composition + +## Status + +Accepted + +## Date + +2026-04-09 + +## Context + +The original techspec defined typed patch protocols (ADR-003) but did not specify how multiple sync hooks compose their patches when targeting the same event. A council review identified this as the most consequential gap in the spec — without defined merge semantics, multi-hook sync chains produce undefined behavior on the first real-world conflict. + +Two fundamental approaches exist: (1) sequential pipeline where each hook sees the output of the previous hook, or (2) parallel patches where all hooks see the original and patches merge at the end. + +Kubernetes mutating admission controllers faced this exact problem and chose sequential application. HTTP middleware frameworks (Express, Gin, Kong) use the same model with explicit `next()` chain threading. + +## Decision + +Sync hooks compose as a sequential pipeline. Each hook receives the payload as modified by all previous hooks in the chain. The dispatch order is deterministic: source class (native → config → agent → skill), then priority (descending), then name (ascending lexicographic). + +Concretely: + +1. Hook A receives the original payload, returns Patch A. +2. Patch A is applied to the payload, producing Payload'. +3. Hook B receives Payload', returns Patch B. +4. Patch B is applied to Payload', producing Payload''. +5. The final patched payload is returned to the caller. + +If any hook returns an explicit deny, the pipeline short-circuits and the operation is blocked. If a `required` hook fails (error or timeout), the pipeline short-circuits with an error. If a non-required hook fails, it is skipped and the pipeline continues. + +## Alternatives Considered + +### Alternative 1: Parallel patches with merge at the end + +- **Description**: All hooks receive the original payload. Patches are collected and merged using a conflict resolution strategy (last-writer-wins, deep merge, or reject-on-conflict). +- **Pros**: Hooks are independent and don't observe each other's mutations. +- **Cons**: Requires a merge operator per patch type. Conflict resolution is inherently ambiguous — "last writer wins" depends on ordering anyway, and "reject on conflict" makes the system fragile to adding new hooks. +- **Why rejected**: Kubernetes tried parallel webhook patches initially and migrated to sequential. The merge problem is harder than the ordering problem. + +### Alternative 2: Reject on conflict + +- **Description**: If two hooks patch the same field, the operation fails. +- **Pros**: Safe — no silent overwrites. +- **Cons**: Extremely fragile. Adding a hook can break existing hooks without either changing. Hostile to extensibility. +- **Why rejected**: An extensibility platform that breaks when you extend it is self-defeating. + +## Consequences + +### Positive + +- Simple, deterministic, debuggable — the introspection endpoint can show payload state before and after each hook. +- No merge operator needed — patches apply directly. +- Ordering is already defined (ADR-004), so the pipeline order is unambiguous. + +### Negative + +- Ordering is load-bearing — changing a hook's priority or source can change the final result. +- Later hooks can silently overwrite earlier hooks' patches. + +### Risks + +- A low-priority skill hook could overwrite a high-priority config hook's patch because it runs later in the pipeline. + - Mitigation: the introspection API exposes the full pipeline trace. The audit trail (ADR-010) persists patches for security-relevant families. + +## Implementation Notes + +- The generic internal dispatcher applies patches using a `func(payload P, patch R) P` applicator passed at dispatch-function registration time. +- The `HookRunRecord` captures the patch returned by each hook for observability. +- Short-circuit on deny must still execute async hooks that were already matched. + +## References + +- Council review: Gap #1 (Patch merge semantics undefined) +- ADR-003: Use Typed Patch Protocols and Hybrid Failure Policy +- ADR-004: Support Four Declaration Sources with Ordered Dispatch +- Kubernetes Mutating Admission Webhooks: sequential patch application model diff --git a/.compozy/tasks/hooks/adrs/adr-007.md b/.compozy/tasks/hooks/adrs/adr-007.md new file mode 100644 index 000000000..07143b858 --- /dev/null +++ b/.compozy/tasks/hooks/adrs/adr-007.md @@ -0,0 +1,84 @@ +# ADR-007: Use Go Generics for Internal Dispatcher Type Safety + +## Status + +Accepted + +## Date + +2026-04-09 + +## Context + +ADR-005 establishes that the public dispatch API uses typed per-event functions. Internally, the dispatcher needs shared infrastructure for registry lookup, ordering, matching, timeout enforcement, sync pipeline execution, and async scheduling. Without a shared implementation, this logic would be duplicated across 27+ event dispatch functions. + +AGH's coding standards prohibit `interface{}`/`any` when a concrete type is known. The question is how to share infrastructure without resorting to `any`-typed internals. + +## Decision + +Use Go generics for the internal dispatcher pipeline. Define a package-private generic type: + +```go +type pipeline[P any, R any] struct { + registry *Registry + event HookEvent + apply func(P, R) P +} + +func (p *pipeline[P, R]) execute(ctx context.Context, payload P) (P, error) +``` + +Each public dispatch function creates a `pipeline` with concrete types: + +```go +func DispatchSessionPreCreate(ctx context.Context, d *Hooks, payload SessionPreCreatePayload) (SessionPreCreatePayload, error) { + return d.sessionPreCreate.execute(ctx, payload) +} +``` + +The `apply` function is a typed patch applicator provided at initialization time. The executor interface uses generics to serialize/deserialize payloads for subprocess hooks with full type information. + +## Alternatives Considered + +### Alternative 1: Type assertion at controlled boundaries + +- **Description**: Internal dispatcher uses `any` for payload/patch. Each public function casts at entry and exit. +- **Pros**: Simpler implementation. No generics complexity. +- **Cons**: Violates the `any` prohibition. Type errors at runtime, not compile time. A subprocess hook returning the wrong patch type is only caught during deserialization. +- **Why rejected**: Moves type safety from compile time to runtime, directly violating project coding standards. + +### Alternative 2: No shared infrastructure — per-family dispatchers + +- **Description**: Each event family implements its own dispatch loop with concrete types. +- **Pros**: Zero generics, zero `any`. Maximum type safety. +- **Cons**: Massive duplication of ordering, matching, timeout, and execution logic across families. Inconsistent behavior across families as implementations drift. +- **Why rejected**: Duplication across 9+ families is unjustifiable. This is the exact problem generics were designed to solve. + +## Consequences + +### Positive + +- Zero `any` anywhere in the dispatch path — compile-time type safety end to end. +- Shared infrastructure is written and tested once. +- Adding a new event requires only defining payload/patch types and a one-line pipeline instantiation. + +### Negative + +- Go generics have limitations (no method-level type parameters, no variance) that may require workarounds. +- Internal code is slightly harder to read for developers unfamiliar with Go generics. + +### Risks + +- Generic type parameter proliferation could make the internal code hard to navigate. + - Mitigation: limit generics to the pipeline type and executor interface. Keep the registry non-generic (it stores `RegisteredHook` with event metadata, not payloads). + +## Implementation Notes + +- The `pipeline[P, R]` type is package-private. +- The `apply func(P, R) P` pattern lets each event family define its own merge behavior without the pipeline knowing the patch structure. +- Subprocess executors use JSON serialization with the concrete types, not `json.RawMessage`. + +## References + +- Council review: Gap #2 (HookInvocation is `any` in disguise) +- ADR-005: Use Typed Per-Event Dispatch Functions diff --git a/.compozy/tasks/hooks/adrs/adr-008.md b/.compozy/tasks/hooks/adrs/adr-008.md new file mode 100644 index 000000000..4d26b90ca --- /dev/null +++ b/.compozy/tasks/hooks/adrs/adr-008.md @@ -0,0 +1,86 @@ +# ADR-008: Stdlib Worker Pool for Async Hook Execution + +## Status + +Accepted + +## Date + +2026-04-09 + +## Context + +The hooks platform needs async hook execution for observational/background hooks. AGH's concurrency rules require every goroutine to have explicit ownership and shutdown via `context.Context`, with no fire-and-forget goroutines. + +The project already has a similar pattern in `internal/memory/consolidation/runtime.go` — a single-worker goroutine consuming from a buffered channel with `sync.WaitGroup` for shutdown. + +Research into Go worker pool libraries (`pond`, `ants`, `conc`, `gammazero/workerpool`) showed that none are current dependencies, and the hook dispatch throughput (tens of hooks per session event) does not justify the complexity of an external library. + +## Decision + +Implement async hook execution using Go stdlib primitives: a fixed-size goroutine pool consuming from a buffered channel, with `sync.WaitGroup` for shutdown tracking and `context.WithTimeout` for drain deadline. + +Design: + +- **Pool size**: configurable, default 4 workers +- **Queue**: buffered channel, configurable capacity, default 64 +- **Backpressure**: non-blocking send with `select`/`default` — if the buffer is full, the hook invocation is dropped with a structured log (`hook.dispatch.async_dropped`) and a metric increment +- **Shutdown**: close the channel, workers drain remaining items with a deadline (default 10s), `sync.WaitGroup.Wait()` with the drain context +- **Ownership**: the `Hooks` struct (dispatcher) owns the pool, starts workers on initialization, and joins them on `Close()` +- **Each worker**: `select { case task := <-ch: execute(task) case <-ctx.Done(): return }` + +## Alternatives Considered + +### Alternative 1: External library (pond, conc, ants) + +- **Description**: Use a third-party worker pool library. +- **Pros**: Pre-built API, potentially more features (metrics, dynamic resizing). +- **Cons**: Adds a dependency for ~50 lines of stdlib code. No existing pool library in go.mod. The project's dependency posture is conservative. +- **Why rejected**: The throughput does not justify an external dependency. The stdlib pattern is already proven in the codebase. + +### Alternative 2: Goroutine-per-hook with semaphore + +- **Description**: Spawn a goroutine per async hook, limited by a counting semaphore. +- **Pros**: More parallelism per dispatch. +- **Cons**: Diffuse ownership — each goroutine needs individual WaitGroup tracking. Harder to reason about backpressure and shutdown ordering. +- **Why rejected**: Violates the explicit ownership principle. The pool model is simpler and sufficient. + +### Alternative 3: Fire-and-forget with timeout + +- **Description**: Spawn goroutines without tracking, relying on context timeout for cleanup. +- **Pros**: Simplest implementation. +- **Cons**: Directly violates "no fire-and-forget goroutines" rule. +- **Why rejected**: Non-negotiable rule violation. + +## Consequences + +### Positive + +- Zero new dependencies. +- Consistent with existing `consolidation.Runtime` pattern. +- Explicit ownership and shutdown — daemon can wait for all async hooks to complete during graceful shutdown. +- Backpressure via drop + metric prevents queue buildup from affecting sync path latency. + +### Negative + +- A slow hook occupies a worker for its full duration, reducing pool throughput. + - Mitigation: per-hook timeout (from `RegisteredHook.Timeout`) limits the blast radius. +- Dropped hooks are lost — no retry mechanism. + - Mitigation: async hooks are observational. Dropping an observation is acceptable; the metric and log ensure visibility. + +### Risks + +- Default pool size (4) may be too small for workloads with many async hooks per event. + - Mitigation: configurable via daemon config. Monitor `async queue depth` metric. + +## Implementation Notes + +- The pool is started in the `Hooks` constructor and stopped in `Hooks.Close()`. +- `Hooks.Close()` must be called in the daemon shutdown sequence before closing the database, so async hooks that write telemetry can complete. +- Each worker wraps execution in a recover() to prevent a panicking hook from killing the pool. + +## References + +- Council review: Gap #3 (Async lanes without concurrency contract) +- `internal/memory/consolidation/runtime.go` — existing single-worker channel pattern +- AGH CLAUDE.md concurrency rules diff --git a/.compozy/tasks/hooks/adrs/adr-009.md b/.compozy/tasks/hooks/adrs/adr-009.md new file mode 100644 index 000000000..201e7c907 --- /dev/null +++ b/.compozy/tasks/hooks/adrs/adr-009.md @@ -0,0 +1,70 @@ +# ADR-009: Permission Hooks Are Deny-Only — Never Upgrade Deny to Allow + +## Status + +Accepted + +## Date + +2026-04-09 + +## Context + +The hook taxonomy includes a `permission.*` family with events `permission.request`, `permission.resolved`, and `permission.denied`. Sync hooks on `permission.request` can patch the operation before the permission decision is made. A security review identified that if a hook can return an allow-patch on a request that the base policy would deny, the hook has escalated privileges — bypassing the entire permission model. + +This is the most dangerous security gap in the original spec. Permission enforcement is a trust boundary, and hooks from skills or agent definitions should not be able to weaken it. + +## Decision + +Enforce an immutable invariant in the dispatcher: **permission hooks may observe, enrich context, or escalate a deny (deny with a custom reason), but may never upgrade a deny to an allow.** + +This is enforced in code, not documentation: + +- The `permission.request` pipeline applies patches but the dispatcher validates that no patch transitions the decision from deny to allow. If a patch attempts this, the patch is rejected, the hook is logged as a security violation (`hook.dispatch.permission_escalation_blocked`), and the original deny stands. +- `permission.resolved` and `permission.denied` are async-only events (observational) — they cannot patch at all. + +## Alternatives Considered + +### Alternative 1: Allow upgrade from native and config sources only + +- **Description**: Permit deny→allow escalation, but restrict it to Go-native and config hook sources. Skills and agent definitions cannot escalate. +- **Pros**: More flexible for operators who want config-level permission overrides. +- **Cons**: Creates a trust model where "who declared the hook" determines security capability. Easy to confuse and hard to audit. A config typo could silently escalate permissions. +- **Why rejected**: The complexity of a source-based trust model for permission escalation is not justified. Operators who need to change permission policy should change the permission policy, not use hooks to override it. + +### Alternative 2: Permission hooks are async-only + +- **Description**: Remove sync capability from the entire permission family. +- **Pros**: Eliminates the escalation vector entirely. +- **Cons**: Loses the ability for hooks to add denials — for example, a hook that blocks tool calls outside business hours, or a hook that enforces rate limits. +- **Why rejected**: Deny-only sync hooks are valuable for policy enforcement. Removing them to avoid the escalation problem is overly restrictive. + +## Consequences + +### Positive + +- Permission escalation via hooks is architecturally impossible, not just discouraged. +- The permission model remains the authoritative trust boundary. +- Deny-only hooks still enable valuable use cases (rate limiting, time-based restrictions, audit denials). + +### Negative + +- Hooks cannot be used to implement "soft allow" overrides for testing or development. + - Workaround: change the base permission policy instead. + +### Risks + +- The invariant enforcement code must be correct — a bug here is a security vulnerability. + - Mitigation: dedicated unit tests for every deny→allow transition attempt across all source types. + +## Implementation Notes + +- The invariant check lives in the `permission.request` pipeline, after each hook's patch is returned and before it is applied. +- The check compares the pre-patch decision state with the post-patch decision state. +- `permission.resolved` and `permission.denied` are classified as async-only events (see ADR-012). + +## References + +- Council review: Gap #5 (Permission hooks can escalate privileges) +- ADR-003: Use Typed Patch Protocols and Hybrid Failure Policy +- ADR-012: Classify Events into Sync-Eligible and Async-Only diff --git a/.compozy/tasks/hooks/adrs/adr-010.md b/.compozy/tasks/hooks/adrs/adr-010.md new file mode 100644 index 000000000..37c623cd8 --- /dev/null +++ b/.compozy/tasks/hooks/adrs/adr-010.md @@ -0,0 +1,74 @@ +# ADR-010: Persist Patch Audit Trail for Security-Relevant Hook Families + +## Status + +Accepted + +## Date + +2026-04-09 + +## Context + +The original techspec defined `HookRunRecord` for observability but did not specify whether the actual patch content is persisted. A security review identified that if `prompt.post_assemble` rewrites a system prompt, `tool.pre_call` modifies tool arguments, or `permission.request` attempts to alter a decision, there is no forensic record of what changed. + +The existing hook implementation in `internal/skills/hooks.go` uses `hookCaptureSummary` which actively truncates output for logging. This is correct for logs but insufficient for an audit trail of security-relevant mutations. + +## Decision + +The `HookRunRecord` includes a `PatchApplied` field that stores the serialized patch returned by each hook. This field is populated for security-relevant hook families: + +- `permission.*` — always persisted +- `prompt.*` — always persisted +- `tool.*` — always persisted +- `input.*` — always persisted + +For other families (`session.*`, `event.*`, `message.*`, `context.*`, `agent.*`, `turn.*`), the patch field is optional — populated only when a debug/verbose mode is enabled via config. + +The patch is stored as JSON in the `HookRunRecord` alongside the existing fields (hook name, event, source, duration, outcome). + +## Alternatives Considered + +### Alternative 1: Persist patches for all families always + +- **Description**: Every hook execution records its full patch content. +- **Pros**: Complete audit trail for everything. +- **Cons**: `message.delta` can fire hundreds of times per session. Storing patches for high-frequency, low-risk events wastes storage without security benefit. +- **Why rejected**: The storage cost for high-frequency events is disproportionate to the security value. + +### Alternative 2: Only structured logs, no persistent audit + +- **Description**: Patches are logged via `slog` but not stored in the database. +- **Pros**: Simple. No schema changes. +- **Cons**: Logs are ephemeral and hard to correlate with specific sessions. Insufficient for forensic analysis of a security incident. +- **Why rejected**: Security-relevant mutations require durable, queryable records, not ephemeral logs. + +## Consequences + +### Positive + +- Forensic capability for security-relevant mutations — operators can trace exactly what a hook changed. +- The introspection API (`/api/hooks/runs`) can expose patch diffs for debugging. +- Audit records enable post-incident analysis of prompt injection, permission escalation attempts, and tool argument manipulation. + +### Negative + +- Additional storage per hook execution for security-relevant families. + - Mitigation: patches are small typed structs. Storage is bounded by session lifetime. + +### Risks + +- Patch content may contain sensitive data (user input, tool arguments). + - Mitigation: the audit store follows the same access controls as the session event store. Redaction policies can be applied at the API layer. + +## Implementation Notes + +- `HookRunRecord.PatchApplied` is `json.RawMessage` (nullable) — populated by the pipeline after each hook returns. +- The pipeline passes the serialized patch to the telemetry emitter regardless of whether the patch was successfully applied (captures rejected patches too, especially for permission escalation attempts per ADR-009). +- The observability storage uses the existing `internal/observe` pipeline and per-session SQLite store. + +## References + +- Council review: Gap #7 (No audit trail for patches) +- ADR-003: Use Typed Patch Protocols and Hybrid Failure Policy +- ADR-009: Permission Hooks Are Deny-Only diff --git a/.compozy/tasks/hooks/adrs/adr-011.md b/.compozy/tasks/hooks/adrs/adr-011.md new file mode 100644 index 000000000..953e44069 --- /dev/null +++ b/.compozy/tasks/hooks/adrs/adr-011.md @@ -0,0 +1,71 @@ +# ADR-011: Simplify Ordering to Source, Priority, Name — Remove Specificity + +## Status + +Accepted (supersedes ordering details in ADR-004) + +## Date + +2026-04-09 + +## Context + +ADR-004 established the ordering rule as "source class → priority → specificity → name." A council review identified that "specificity" is never defined — there is no comparator for determining which matcher is more specific. CSS specificity works because it has three well-defined levels (id > class > element). Hook matchers have heterogeneous fields per family with no natural hierarchy. + +Without a formal comparator, specificity is indeterminate, making the ordering non-deterministic in practice. + +## Decision + +Remove specificity from the ordering rule. The final ordering is: + +1. **Source class** (native → config → agent-definition → skill) +2. **Priority** (descending — higher priority runs first) +3. **Name** (ascending lexicographic — stable tie-breaker) + +Within the skill source class, existing skill-source precedence (Bundled → Marketplace → User → Additional → Workspace) is used as a sub-ordering before name. + +If a user needs a hook to run before another, they adjust the priority value. The introspection endpoint shows the resolved order, making it easy to verify. + +## Alternatives Considered + +### Alternative 1: Define specificity as matcher field count + +- **Description**: More matcher fields = more specific. `tool.name=grep AND tool.namespace=fs` (2 fields) beats `tool.name=grep` (1 field). +- **Pros**: Deterministic. +- **Cons**: Arbitrary — field count does not reflect semantic specificity. A matcher on two low-relevance fields would outrank a matcher on one high-relevance field. +- **Why rejected**: The metric is not meaningful. It would produce surprising results that are harder to reason about than explicit priority. + +### Alternative 2: Specificity as explicit weight field + +- **Description**: Add a `specificity: int` field to the matcher declaration. +- **Pros**: User-controlled. +- **Cons**: Functionally identical to the existing `priority` field. Two numeric ordering dimensions is confusing — users would not know when to use priority vs. specificity. +- **Why rejected**: Conceptual duplication with no added value. + +## Consequences + +### Positive + +- Ordering is fully deterministic with well-defined comparators at every level. +- Simple to implement, test, and explain. +- The introspection endpoint shows exactly why hooks are ordered the way they are. + +### Negative + +- Loses the "most specific matcher wins" semantic, which can be intuitive in some cases. + - Mitigation: explicit priority covers the same use cases with less ambiguity. + +### Risks + +- Users may expect CSS-like specificity behavior. + - Mitigation: document the ordering rule clearly in the hook declaration schema and surface it in the catalog endpoint. + +## Implementation Notes + +- The ordering function is a single `sort.SliceStable` with three-level comparison. +- Priority defaults to 0 if not specified. Native hooks default to priority 1000, config to 500, agent to 100, skill to 0 — providing sensible defaults that match the source ordering. + +## References + +- Council review: Gap #8 (Specificity as phantom sort key) +- ADR-004: Support Four Declaration Sources with Ordered Dispatch diff --git a/.compozy/tasks/hooks/adrs/adr-012.md b/.compozy/tasks/hooks/adrs/adr-012.md new file mode 100644 index 000000000..c14772364 --- /dev/null +++ b/.compozy/tasks/hooks/adrs/adr-012.md @@ -0,0 +1,103 @@ +# ADR-012: Classify Events into Sync-Eligible and Async-Only with Dispatch Depth Guard + +## Status + +Accepted + +## Date + +2026-04-09 + +## Context + +Two related gaps were identified by the council review: + +1. **Event eligibility**: `message.delta` fires at token-streaming frequency (hundreds per second). A subprocess sync hook on `message.delta` would fork/exec per token — a denial-of-service by configuration. The spec did not define which events are eligible for sync hooks. + +2. **Circular dispatch**: A sync hook on `event.pre_record` could trigger logic that calls `recordEvent`, which fires `event.pre_record` again. Without re-entrancy detection, this causes stack overflow and daemon crash. + +## Decision + +### Event Eligibility + +Each event in the taxonomy has a fixed sync eligibility classification: + +**Sync-eligible** (hooks may be sync or async): +- `session.pre_create`, `session.post_create` +- `session.pre_resume`, `session.post_resume` +- `session.pre_stop`, `session.post_stop` +- `input.pre_submit` +- `prompt.post_assemble` +- `agent.pre_start`, `agent.spawned`, `agent.crashed`, `agent.stopped` +- `turn.start`, `turn.end` +- `message.start`, `message.end` +- `tool.pre_call`, `tool.post_call`, `tool.post_error` +- `permission.request` +- `context.pre_compact`, `context.post_compact` + +**Async-only** (hooks must be async — sync registration is rejected): +- `event.pre_record`, `event.post_record` +- `message.delta` +- `permission.resolved`, `permission.denied` + +The eligibility is enforced at registration time: if a hook declaration specifies `mode: sync` for an async-only event, the registry rejects it with a clear error message. + +### Dispatch Depth Guard + +The dispatcher injects a depth counter into `context.Context` via `context.WithValue`. Each dispatch call increments the counter. If the counter exceeds max depth (3), the dispatch returns an error immediately without executing any hooks. The depth is recorded in the `HookRunRecord` for observability. + +## Alternatives Considered + +### Alternative 1: Allow sync everywhere with aggressive timeout cap + +- **Description**: All events accept sync hooks, but high-frequency events cap timeout at 50ms. +- **Pros**: Maximum flexibility. +- **Cons**: Does not prevent the fork/exec-per-token problem — even a fast subprocess has fork overhead. 50ms per token adds seconds of latency to a streaming response. +- **Why rejected**: Mitigation is insufficient. The performance impact is unacceptable even with capped timeouts. + +### Alternative 2: Subprocess hooks are always async, only native hooks can be sync + +- **Description**: Restrict sync mode to Go-native hooks. +- **Pros**: Eliminates subprocess latency from the sync path entirely. +- **Cons**: Kills legitimate use cases — a subprocess sync hook on `tool.pre_call` that validates arguments before execution is a core use case. +- **Why rejected**: Too restrictive for the platform's extensibility goals. + +### Alternative 3: Event name set in context for exact cycle detection + +- **Description**: Track which events are currently in-flight in the context. Block re-entrant dispatch of the same event. +- **Pros**: More precise — allows A→B→A but blocks A→A. +- **Cons**: Does not prevent indirect cycles (A→B→C→A) unless all events are tracked. More complex, allocates a set per dispatch chain. +- **Why rejected**: Depth-based detection is simpler and catches both direct and indirect cycles. + +## Consequences + +### Positive + +- High-frequency events cannot be blocked by slow sync hooks — prevents performance degradation by design. +- Circular dispatch crashes are prevented — bounded depth with clear error. +- Eligibility classification is simple to enforce and document. + +### Negative + +- Some events cannot have sync hooks even if a use case exists. + - Mitigation: the classification can be revised if a compelling use case emerges. +- Depth limit of 3 may be restrictive for legitimate deep hook chains. + - Mitigation: depth > 3 is a design smell. If needed, the limit can be made configurable. + +### Risks + +- The eligibility classification may need revision as the platform evolves. + - Mitigation: the classification is centralized in the event taxonomy definition, easy to update. + +## Implementation Notes + +- Event eligibility is a property of the `HookEvent` enum — stored as a boolean `SyncEligible` field. +- The depth counter key is a package-private `contextKey` type to avoid collisions. +- The depth guard is checked at the top of every dispatch function, before registry lookup. + +## References + +- Council review: Gap #9 (Circular dispatch) +- Council review: Gap #10 (Event eligibility for sync mode) +- ADR-002: Use a Dotted Hook Taxonomy with Rich Families +- ADR-009: Permission Hooks Are Deny-Only (permission.resolved and permission.denied are async-only) diff --git a/.compozy/tasks/hooks/adrs/adr-013.md b/.compozy/tasks/hooks/adrs/adr-013.md new file mode 100644 index 000000000..537e86c00 --- /dev/null +++ b/.compozy/tasks/hooks/adrs/adr-013.md @@ -0,0 +1,88 @@ +# ADR-013: Hot-Reloadable Registry with RWMutex Snapshot Swap + +## Status + +Accepted + +## Date + +2026-04-09 + +## Context + +AGH is an Agent Operating System where agents can self-configure at runtime — installing skills, modifying agent definitions, and changing workspace config. If the hook registry is immutable and requires a daemon restart, agent-driven changes are not reflected until restart, breaking a core platform capability. + +The original techspec did not address whether hooks can be registered/deregistered while sessions are active, or how the dispatcher relates to the existing `session.Notifier`. + +The AGH codebase already implements hot reload in `skills.Registry` using `sync.RWMutex` with build-then-swap semantics. The `skills.Watcher` polls filesystem directories every 3 seconds, compares snapshots, and triggers `RefreshGlobal()` on change. This is a proven pattern in the codebase. + +Research into other systems confirmed this approach: Kubernetes uses informer/watch with atomic swap, Caddy uses `atomic.Value` for full config swap, and the Go community canonical pattern for concurrent-read/rare-write is `sync.RWMutex` guarding an immutable snapshot. + +## Decision + +The hook registry uses `sync.RWMutex` guarding an immutable snapshot map, with build-then-swap semantics — the same pattern as `skills.Registry`. + +Design: + +- **Storage**: `map[HookEvent][]*ResolvedHook` — a pre-sorted slice per event, ready for dispatch. +- **Read path**: `RLock`, copy the slice reference for the target event, `RUnlock`, dispatch against that snapshot. The critical section is a map lookup and slice reference copy — no allocations. +- **Write path**: rebuild the complete map from all 4 sources (native Go callbacks, config, agent definitions, skills), validate all declarations, sort each event's hooks, then `Lock` + swap + `Unlock`. +- **Trigger**: the existing `skills.Watcher` can notify the hook registry on skill changes. Config and agent definition changes use analogous filesystem watch or explicit refresh calls. +- **Consistency**: a dispatch in progress operates on the snapshot it read at the start. A concurrent reload does not affect in-flight dispatches. +- **Version counter**: an `atomic.Int64` bumped on every swap, allowing consumers to detect staleness cheaply (same pattern as `skills.Registry.globalVersion`). + +The `Hooks` dispatcher replaces the existing `notifierFanout` by implementing the `session.Notifier` interface. The `notifierFanout` struct and `skillsHookDispatcher` become unnecessary — the `Hooks` dispatcher encapsulates fanout, ordering, sync/async execution, and telemetry. + +## Alternatives Considered + +### Alternative 1: Immutable registry, restart required + +- **Description**: Build the registry once at boot, require daemon restart for changes. +- **Pros**: Zero concurrency complexity. No race conditions. +- **Cons**: Agents cannot self-configure and see changes reflected. Breaks a core AGH capability. Restart disrupts all active sessions. +- **Why rejected**: AGH's agent-first model requires runtime reconfigurability. + +### Alternative 2: atomic.Pointer for lock-free reads + +- **Description**: Use `atomic.Pointer[snapshotMap]` instead of `sync.RWMutex`. +- **Pros**: Zero contention on read path — strictly better performance. +- **Cons**: AGH does not use `atomic.Pointer` anywhere in the codebase. Inconsistent with established patterns. The performance difference is negligible for hook dispatch throughput. +- **Why rejected**: Consistency with existing codebase patterns outweighs the marginal performance benefit. + +## Consequences + +### Positive + +- Agents can install skills, modify config, and see hook changes reflected without restart. +- Consistent with the proven `skills.Registry` pattern — familiar to anyone working on the codebase. +- In-flight dispatches are not affected by concurrent reloads — snapshot isolation. +- The dispatcher replaces `notifierFanout`, simplifying the notification architecture. + +### Negative + +- RWMutex has non-zero read-side cost (atomic operations for lock/unlock). + - Mitigation: the critical section is a map lookup, not computation. Negligible in practice. +- Reload triggers need coordination — the skills watcher, config watcher, and agent definition changes must all trigger registry rebuild. + - Mitigation: a single `Rebuild()` method that reads all sources. Any trigger calls `Rebuild()`. + +### Risks + +- A malformed declaration during reload could invalidate the entire registry. + - Mitigation: build-then-validate-then-swap. If validation fails, the old snapshot stays and the error is logged. +- Frequent rebuilds (e.g., watcher polling every 3s) could cause write contention. + - Mitigation: snapshot comparison (like `skills.Registry.reloadGlobal`) skips the swap if nothing changed. + +## Implementation Notes + +- The `Hooks` struct owns the `sync.RWMutex`, the snapshot map, and the async worker pool. +- `Hooks.Rebuild(ctx)` is the public method for triggering a reload. It is safe to call concurrently. +- The daemon's `boot.go` wires the `Hooks` dispatcher as the `session.Notifier` and connects reload triggers. +- The `Hooks.Close()` method must be called during shutdown, after session stop but before database close, to drain async workers. + +## References + +- Council review: Gap #11/12 (Hot config reload + Notifier relationship) +- `internal/skills/registry.go` — existing RWMutex + snapshot swap pattern +- `internal/skills/watcher.go` — existing filesystem polling pattern +- Kubernetes informer/watch pattern +- Caddy atomic config swap pattern diff --git a/.compozy/tasks/hooks/memory/MEMORY.md b/.compozy/tasks/hooks/memory/MEMORY.md new file mode 100644 index 000000000..306eab30b --- /dev/null +++ b/.compozy/tasks/hooks/memory/MEMORY.md @@ -0,0 +1,37 @@ +# Workflow Memory + +Keep only durable, cross-task context here. Do not duplicate facts that are obvious from the repository, PRD documents, or git history. + +## Current State +- Task 01 established the `internal/hooks` base package with stdlib-only types, taxonomy metadata, and test coverage. Follow-on tasks should extend this package without importing other `internal/` packages back into the base layer. + +## Shared Decisions +- Event payloads in `internal/hooks` are snapshot structs that duplicate the needed session and ACP fields by value to preserve the package's dependency-free boundary. +- Task 02 mirrors skill-source precedence inside `internal/hooks` with a local `HookSkillSource` enum so ordering can preserve Bundled → Marketplace → User → Additional → Workspace without importing `internal/skills`. +- Task 07 confirmed the base `internal/hooks` package cannot import `internal/session` once `internal/skills` depends on `internal/hooks`; notifier/session bridge code must stay in an upper layer such as `internal/daemon` rather than in the base hooks package. +- Task 08 widened `session.Notifier.OnAgentEvent` to accept `any`; ACP-specific downcasting now stays in upper layers (for example `internal/observe`) so config-driven hook declarations can reuse `internal/hooks` without reintroducing a `config -> hooks -> acp -> config` import cycle. +- Task 09 kept the `session.Notifier` bridge as a daemon-local adapter over `Hooks` instead of moving the interface implementation into `internal/hooks`; `Hooks` remains the authoritative runtime, but the adapter is required to avoid the existing `session -> config/skills -> hooks` import cycle. +- Task 10 added a separate `session.HookDispatcher` seam for the load-bearing runtime dispatch path; `session.Notifier` remains for observer/dream fan-out, while session lifecycle hook execution should go through `SessionManagerDeps.Hooks`. +- Task 11 requires new session-scoped hook families to be wired in lockstep across `session.HookDispatcher`, the daemon-local `hooksNotifier` bridge, and the daemon test fake, because the session package still cannot depend directly on the hooks runtime implementation. + +## Shared Learnings +- `Executor` and `HookExecutorKind` now exist in the base package, so later executor and registry tasks can build on the contract instead of redefining it. +- `HookDecl.PrioritySet` is needed to distinguish explicit `priority: 0` from an unset priority; later parsers must set this when a declaration explicitly supplies a priority value. +- `RegisteredHook` intentionally carries normalized dispatch metadata only; declaration-specific shell details such as command, args, env, and working directory must stay bound inside executor instances during normalization/resolution instead of being copied onto the registered hook. +- Go filenames ending in `*_wasm.go` are treated as GOARCH-specific build files; future non-wasm stub seams should avoid that suffix unless they are intentionally wasm-only. +- Task 04 added the package-private generic sync `pipeline[P, R]` plus `encodeJSON`/`decodeJSON` helpers and a depth guard; future typed dispatchers should feed it one hook snapshot per dispatch rather than re-selecting hooks mid-pipeline. +- Native Go hooks now have a typed bypass via `NewTypedNativeExecutor`, while subprocess/Wasm-style executors stay on the byte serialization boundary; future task wiring should prefer the typed native path for in-process callbacks. +- Task 05 added a package-private `asyncPool` that passes a pool-owned worker context into each submitted task; future async dispatch wiring must layer per-hook timeout logic inside the submitted closure instead of expecting the pool to read `RegisteredHook.Timeout` directly. +- Task 06 added source-specific declaration-provider seams plus an executor resolver to `Hooks`; future task_07/task_08 loaders can feed `HookDecl` slices directly into those providers without changing rebuild semantics, while native hooks still require an explicit resolver binding. +- Task 06 matches async hooks against the pre-pipeline payload, then runs the matched async set after the sync pipeline against the final payload snapshot; later tasks should preserve that behavior so already-matched async hooks still fire even when the sync path short-circuits. +- Skill-owned hook declarations need a final normalization pass after `skills.Skill.Source` and provenance are resolved so each `hooks.HookDecl` carries the correct `Source` and `SkillSource` metadata for ordering and marketplace policy checks. +- Task 09 added `skills.Watcher.SetAfterRefresh`; hooks rebuilds should attach there so a watcher refresh updates the skills registry and swaps the hooks snapshot in the same change cycle before the next lifecycle dispatch. +- The permission deny-only invariant must treat ACP `reject-once` and `reject-always` decisions as denied states, not just generic `deny`/`block` strings; otherwise subprocess `permission.request` hooks can escalate rejected requests into allows. +- Task 11 introduced `Manager.runContextCompaction` as the session seam for `context.pre_compact` and `context.post_compact`; future compaction paths should route through that helper so hook patches change the actual compaction inputs instead of being observed out-of-band. +- Task 12 stores hook execution audits in each session DB via a `hook_runs` table; observer introspection reopens the per-session store on demand for `/api/hooks/runs` rather than depending on an in-memory recorder. +- Task 12 routes session-managed hook telemetry through context-carried writers first and only falls back to the observer sink when no live session recorder is available; future hook telemetry should preserve that preference to avoid duplicate write paths. + +## Open Risks +- `Hooks.OnAgentEvent` remains intentionally conservative in task_06 because the current notifier surface lacks enough typed data for the full taxonomy; task_10 still owns the direct session/runtime integrations for richer event families. + +## Handoffs diff --git a/.compozy/tasks/hooks/memory/task_01.md b/.compozy/tasks/hooks/memory/task_01.md new file mode 100644 index 000000000..24105e207 --- /dev/null +++ b/.compozy/tasks/hooks/memory/task_01.md @@ -0,0 +1,34 @@ +# Task Memory: task_01.md + +Keep only task-local execution context here. Do not duplicate facts that are obvious from the repository, task file, PRD documents, or git history. + +## Objective Snapshot +- Create the dependency-free `internal/hooks` base package for task_01. +- Deliver the 27-event hook taxonomy, sync-eligibility lookup, core enums/structs, event payload and patch models, and unit tests. +- Succeed with focused hook-package tests plus clean repository verification before tracking is marked complete. + +## Important Decisions +- Keep `internal/hooks` stdlib-only for this task even when payloads mirror existing `session` or `acp` data. +- Add validation on `RegisteredHook` to enforce `required` and sync-eligibility rules because the task tests require those failure modes. +- Define the `Executor` interface and `HookExecutorKind` in the base package now so task_03 can implement concrete executors without moving type contracts later. + +## Learnings +- The documented async-only events are `event.pre_record`, `event.post_record`, `message.delta`, `permission.resolved`, and `permission.denied`. +- The current hook implementation still lives under `internal/skills/types.go` with the legacy `on_session_created` and `on_session_stopped` names. +- Package-local coverage reached 89.7% after adding targeted enum and validation tests. + +## Files / Surfaces +- `internal/hooks/doc.go` +- `internal/hooks/events.go` +- `internal/hooks/types.go` +- `internal/hooks/payloads.go` +- `internal/hooks/*_test.go` +- `.compozy/tasks/hooks/task_01.md` +- `.compozy/tasks/hooks/_tasks.md` + +## Errors / Corrections +- Pre-change baseline confirmed the package is not implemented yet: `go test ./internal/hooks` failed because the directory did not exist. +- The first `make verify` run failed on one `staticcheck` lint in `internal/hooks/types_test.go`; fixed by rewriting the ordering assertion and rerunning the full pipeline successfully. + +## Ready for Next Run +- Task 01 is implemented and verified. Next tasks can consume `internal/hooks` types directly and should keep tracking-only file changes out of the automatic code commit unless explicitly required. diff --git a/.compozy/tasks/hooks/memory/task_02.md b/.compozy/tasks/hooks/memory/task_02.md new file mode 100644 index 000000000..f23e8e853 --- /dev/null +++ b/.compozy/tasks/hooks/memory/task_02.md @@ -0,0 +1,33 @@ +# Task Memory: task_02.md + +Keep only task-local execution context here. Do not duplicate facts that are obvious from the repository, task file, PRD documents, or git history. + +## Objective Snapshot +- Implement declaration normalization, matcher evaluation, and deterministic ordering in `internal/hooks`, with unit coverage above 80% and repository-wide verification via `make verify`. + +## Important Decisions +- Added `ValidateHookDecl` / `NormalizeHookDecl` split so later config and agent-definition loaders can reuse declaration validation without requiring concrete executors. +- Added internal-only `HookSkillSource` and `HookDecl.PrioritySet` fields in `internal/hooks/types.go` to preserve skill precedence ordering and explicit zero priorities without importing `internal/skills`. +- Kept executor inference strict: shell fields require `subprocess`, `native` executors are limited to native hook sources, and non-native declarations without command or executor kind fail normalization. + +## Learnings +- Permission matcher uses `PermissionToolCall.Kind` as the tool-name surface because the current permission payload does not expose a separate `ToolName` field. +- `internal/hooks` package coverage reached 87.4% after adding family-level matcher tests and declaration-slice normalization tests. +- Full repository verification passed after the implementation (`make verify`). + +## Files / Surfaces +- `internal/hooks/types.go` +- `internal/hooks/normalize.go` +- `internal/hooks/matcher.go` +- `internal/hooks/ordering.go` +- `internal/hooks/types_test.go` +- `internal/hooks/normalize_test.go` +- `internal/hooks/matcher_test.go` +- `internal/hooks/ordering_test.go` + +## Errors / Corrections +- Initial package coverage was 79.5%; added broader matcher-family coverage and declaration-slice tests to raise it above the 80% task gate. +- Self-review caught that `SkillSource` should be rejected for non-skill declarations; normalization now enforces that invariant. + +## Ready for Next Run +- Task 02 is implemented and verified. Next dependent tasks can consume `ValidateHookDecl`, `NormalizeHookDecl(s)`, the family matcher helpers, and `SortResolvedHooks` / `OrderedResolvedHooks`. diff --git a/.compozy/tasks/hooks/memory/task_03.md b/.compozy/tasks/hooks/memory/task_03.md new file mode 100644 index 000000000..42b6121a4 --- /dev/null +++ b/.compozy/tasks/hooks/memory/task_03.md @@ -0,0 +1,34 @@ +# Task Memory: task_03.md + +Keep only task-local execution context here. Do not duplicate facts that are obvious from the repository, task file, PRD documents, or git history. + +## Objective Snapshot +- Implement `internal/hooks` executors for native callbacks, subprocess hooks, and a wasm stub, with focused tests and verification for task_03. + +## Important Decisions +- Kept the task_01 executor contract intact and moved executor-specific types into dedicated files instead of changing the public shape midstream. +- Bound subprocess command, args, env, and working directory inside `SubprocessExecutor`; `RegisteredHook` remains metadata-only. +- Used `executor_wasm_stub.go` instead of `executor_wasm.go` because Go treats the latter as a wasm-architecture file and excludes it from normal builds. + +## Learnings +- Porting the old `internal/skills` runner directly worked cleanly once the process-group helpers, env allowlist, timeout handling, and 8KB capture logic moved into `internal/hooks`. +- The subprocess tests can validate graceful shutdown and descendant cleanup with real shell scripts; no test doubles were needed. + +## Files / Surfaces +- `internal/hooks/executor.go` +- `internal/hooks/executor_native.go` +- `internal/hooks/executor_subprocess.go` +- `internal/hooks/executor_subprocess_unix.go` +- `internal/hooks/executor_subprocess_windows.go` +- `internal/hooks/executor_wasm_stub.go` +- `internal/hooks/executor_test.go` +- `internal/hooks/executor_subprocess_unix_test.go` +- `internal/hooks/types.go` + +## Errors / Corrections +- Fixed a real build issue after the first pass: `executor_wasm.go` was excluded from non-wasm builds because the filename matched Go's GOARCH suffix rules. +- Fixed a test bug where `t.Setenv` was used together with `t.Parallel()`. + +## Ready for Next Run +- Focused validation is green: `go test ./internal/hooks -count=1` and `go test ./internal/hooks -cover -count=1` passed with `86.0%` coverage before the full repo gate. +- `make verify` passed after the executor implementation landed. diff --git a/.compozy/tasks/hooks/memory/task_04.md b/.compozy/tasks/hooks/memory/task_04.md new file mode 100644 index 000000000..6a70a056a --- /dev/null +++ b/.compozy/tasks/hooks/memory/task_04.md @@ -0,0 +1,36 @@ +# Task Memory: task_04.md + +Keep only task-local execution context here. Do not duplicate facts that are obvious from the repository, task file, PRD documents, or git history. + +## Objective Snapshot +- Implement task_04's generic sync pipeline, dispatch depth guard, permission deny-only enforcement, and unit tests in `internal/hooks`. +- Pre-change baseline: `internal/hooks` has ordering, matcher, normalization, payloads, and executors, but no `pipeline.go`, `depth.go`, or `permission.go`, and no tests for pipeline/guard behavior. + +## Important Decisions +- The approved design is already captured by `_techspec.md` and ADR-005/006/007/009/012, so implementation can proceed directly without reopening design. +- Keep the pipeline centered on `ResolvedHook` plus the existing executor contract so task_06 can wrap it without back-importing other packages. +- Select and sort the hook snapshot once per dispatch, then run the sequential pipeline against that fixed order while each hook still sees the patched payload from earlier hooks. +- Treat permission deny→allow attempts as rejected patches instead of hook failures so the original deny stands and later hooks still see the unchanged denied payload. + +## Learnings +- `PermissionRequestPayload` carries both `Decision` and `DecisionClass`, while `PermissionRequestPatch` can mutate both, so deny-only enforcement needs to validate the effective decision after every patch. +- Native executors currently still implement the byte-based `Executor` interface, so the pipeline will need an explicit typed bypass to satisfy the task requirement that native callbacks avoid serialization. +- Per-hook timeout enforcement needs to happen in the pipeline as well as inside subprocess execution so required native hooks can time out cleanly on `ctx.Done()`. + +## Files / Surfaces +- `internal/hooks/types.go` +- `internal/hooks/payloads.go` +- `internal/hooks/ordering.go` +- `internal/hooks/executor.go` +- `internal/hooks/executor_native.go` +- `internal/hooks/executor_subprocess.go` +- `internal/hooks/pipeline.go` +- `internal/hooks/depth.go` +- `internal/hooks/permission.go` +- `internal/hooks/pipeline_test.go` + +## Errors / Corrections +- Corrected the permission test helper so explicit deny takes precedence over any patched decision value, matching the production guard semantics. + +## Ready for Next Run +- Task 04 is implemented and verified. Next dependency is task_05 (async worker pool) plus task_06 wiring the new pipeline into the `Hooks` struct and typed dispatch functions. diff --git a/.compozy/tasks/hooks/memory/task_05.md b/.compozy/tasks/hooks/memory/task_05.md new file mode 100644 index 000000000..cf0e7510c --- /dev/null +++ b/.compozy/tasks/hooks/memory/task_05.md @@ -0,0 +1,31 @@ +# Task Memory: task_05.md + +Keep only task-local execution context here. Do not duplicate facts that are obvious from the repository, task file, PRD documents, or git history. + +## Objective Snapshot +- Implement the async hook worker pool in `internal/hooks/pool.go` with configurable worker count, queue capacity, drop-on-full backpressure, panic recovery, and bounded shutdown. +- Prove the task requirements with focused `internal/hooks` tests, `-race`, and package coverage >=80%. + +## Important Decisions +- Kept the pool package-private as `asyncPool` with `asyncTask` and `asyncPoolConfig`; task 06 can own it from the `Hooks` struct without widening the public hooks surface. +- `Submit` uses a buffered channel protected by an `RWMutex` so non-blocking sends cannot race a concurrent `Close()` and panic on a closed channel. +- On shutdown deadline, the pool discards any still-buffered tasks before canceling worker contexts so queued async hooks are abandoned instead of running after the deadline. + +## Learnings +- The worker context is pool-owned; future async hook dispatch code must wrap any per-hook timeout inside the submitted `task.run` closure. +- A closed channel plus a canceled context are both selectable, so abandoning queued work after the drain deadline requires explicitly draining the buffer before cancellation. + +## Files / Surfaces +- `internal/hooks/pool.go` +- `internal/hooks/pool_test.go` +- `.compozy/tasks/hooks/memory/MEMORY.md` +- `.compozy/tasks/hooks/task_05.md` +- `.compozy/tasks/hooks/_tasks.md` + +## Errors / Corrections +- Initial panic-recovery test could fill the single-slot queue before the worker started; fixed by waiting for the first task to begin before submitting the recovery task. +- Initial shutdown implementation could still run a buffered task after the drain deadline because the worker `select` could choose the closed channel over `ctx.Done()`; fixed by discarding queued tasks on timeout before canceling workers. + +## Ready for Next Run +- Verification evidence: `go test -race -cover ./internal/hooks` passed with 83.9% coverage, and `make verify` passed after the pool/test changes. +- Remaining follow-up is task 06 wiring: submit async hook executions through `asyncPool` and apply per-hook timeout inside each submitted closure. diff --git a/.compozy/tasks/hooks/memory/task_06.md b/.compozy/tasks/hooks/memory/task_06.md new file mode 100644 index 000000000..ba55aa8ff --- /dev/null +++ b/.compozy/tasks/hooks/memory/task_06.md @@ -0,0 +1,44 @@ +# Task Memory: task_06.md + +Keep only task-local execution context here. Do not duplicate facts that are obvious from the repository, task file, PRD documents, or git history. + +## Objective Snapshot +- Implement task_06's `internal/hooks` `Hooks` struct, typed dispatchers, atomic registry rebuild/swap, `session.Notifier` bridge, and unit tests. +- Pre-change baseline: `internal/hooks` has the base types, normalization, executors, pipeline, and async pool, but no `hooks.go`, `dispatch.go`, or `notifier.go`, and no `Hooks` implementation. + +## Important Decisions +- Use four declaration-provider seams plus one executor resolver so the registry can already rebuild from native/config/agent/skill sources without pulling in task_07/task_08 package dependencies yet. +- Compare rebuild results using normalized/sorted hook metadata fingerprints instead of executor pointer identity so unchanged declarations skip the swap and preserve the version counter. +- Keep `OnAgentEvent` conservative for task_06: bridge only the notifier surface that can be derived from the current ACP event payloads, and leave broader session/input/prompt/event integration for task_10. + +## Learnings +- The explicit dispatch surface is large enough that package coverage only cleared the `>=80%` gate after adding direct family-level tests for the typed patch applicators, not just smoke calls and pipeline tests. +- Matching async hooks before the sync pipeline and then executing them with the pool-owned worker context preserved the ADR requirement that already-matched async hooks still run after sync short-circuit paths. + +## Files / Surfaces +- `.codex/ledger/2026-04-09-MEMORY-hooks-struct.md` +- `.compozy/tasks/hooks/memory/MEMORY.md` +- `internal/hooks/hooks.go` +- `internal/hooks/dispatch.go` +- `internal/hooks/notifier.go` +- `internal/hooks/hooks_test.go` +- `internal/hooks/dispatch_integration_test.go` +- `.compozy/tasks/hooks/memory/task_06.md` + +## Errors / Corrections +- The first compile pass exposed three concrete issues: a helper name collision with `pipeline_test.go`, a missing `time` import in `notifier.go`, and an invalid direct struct comparison in a no-hooks dispatch test because the payload carried slices. +- Focused coverage initially landed at `72.3%`; corrected by adding family-level typed dispatch tests plus a full exported-dispatch smoke pass, bringing `internal/hooks` coverage to `85.2%`. +- The first local commit unintentionally included unrelated staged review-doc deletions that were already present in the worktree; corrected by restoring those docs in a separate follow-up commit instead of rewriting history. + +## Ready for Next Run +- Task 06 implementation is complete and verified. +- Verification evidence: + - `go test ./internal/hooks -count=1` + - `go test -race -cover ./internal/hooks -count=1` with `85.2%` coverage + - `go test -tags integration ./internal/hooks -count=1` + - `make verify` +- Final handoff turn re-ran the same verification commands successfully before reporting completion. +- Local commits created during this run: + - `b2b3a01 feat: implement hooks dispatcher core` + - `2bd9796 docs: restore review docs` +- Remaining follow-up belongs to later tasks: task_09 wires `Hooks` into the daemon and task_10 expands runtime event integration beyond the current notifier bridge. diff --git a/.compozy/tasks/hooks/memory/task_07.md b/.compozy/tasks/hooks/memory/task_07.md new file mode 100644 index 000000000..9bf659f46 --- /dev/null +++ b/.compozy/tasks/hooks/memory/task_07.md @@ -0,0 +1,42 @@ +# Task Memory: task_07.md + +Keep only task-local execution context here. Do not duplicate facts that are obvious from the repository, task file, PRD documents, or git history. + +## Objective Snapshot +- Replace `internal/skills` hook-owned types and runner with `internal/hooks.HookDecl` declarations, migrate skill loader parsing to the dotted taxonomy, update fixtures/tests, and leave the task with clean verification. + +## Important Decisions +- Treat task 07 as a hard cut-over: legacy `on_*` skill hook events are rejected with replacement guidance instead of being silently remapped. +- Remove the `internal/hooks` notifier adapter because switching `internal/skills` to import `internal/hooks` exposed a package cycle through `internal/session`. +- Keep the daemon compiling in task 07 by constructing transient `hooks.Hooks` dispatchers inside `internal/daemon` until task 09 owns the full notifier-fanout replacement. + +## Learnings +- The skill loader needs a post-parse normalization pass after source and provenance assignment so each `hooks.HookDecl` carries stable `Source` and `SkillSource` values. +- Strict YAML decoding with `KnownFields(true)` works for the new hook schema and still leaves room for descriptive legacy-event errors before the declaration hits `hooks.ValidateHookDecl`. + +## Files / Surfaces +- `internal/skills/types.go` +- `internal/skills/loader.go` +- `internal/skills/registry.go` +- `internal/skills/hook_decl.go` +- `internal/skills/loader_test.go` +- `internal/skills/registry_test.go` +- `internal/skills/testdata/loader/*` +- `internal/daemon/boot.go` +- `internal/daemon/notifier.go` +- `internal/daemon/notifier_test.go` +- `internal/daemon/notifier_integration_test.go` +- `internal/hooks/hooks_test.go` +- `internal/hooks/agent_event.go` + +## Errors / Corrections +- The first cut introduced an import cycle: `internal/session -> internal/skills -> internal/hooks -> internal/session`. Fixed by removing the base-package notifier bridge and updating hooks tests to call the typed dispatchers directly. +- `make verify` exposed an unused `durationValue` helper left behind in the loader rewrite; removing it restored a clean lint/build pass. + +## Ready for Next Run +- Verification is clean: + - `go test ./internal/hooks ./internal/skills ./internal/daemon -count=1` + - `go test -tags integration ./internal/daemon -run 'TestNotifierFanout' -count=1` + - `go test -coverprofile=/tmp/internal-skills.cover ./internal/skills -count=1` -> `81.3%` + - `make verify` +- Task 09 should replace the temporary daemon-side bridge helpers with the final hooks-platform notifier wiring. diff --git a/.compozy/tasks/hooks/memory/task_08.md b/.compozy/tasks/hooks/memory/task_08.md new file mode 100644 index 000000000..94eaf616a --- /dev/null +++ b/.compozy/tasks/hooks/memory/task_08.md @@ -0,0 +1,49 @@ +# Task Memory: task_08.md + +Keep only task-local execution context here. Do not duplicate facts that are obvious from the repository, task file, PRD documents, or git history. + +## Objective Snapshot +- Extend `internal/config` and agent-definition loading so config layers and agent defs can declare hooks, expose combined `[]hooks.HookDecl` for the registry, enforce task_02 validation/defaults, and satisfy task_08 tests plus full repo verification. + +## Important Decisions +- Added package-level `HookDeclarations(cfg Config, agents []AgentDef) ([]hooks.HookDecl, error)` as the registry-facing export. +- Config hook declarations are parsed into `Config.Hooks.Declarations` and merged across precedence layers by declaration name, with later layers replacing matching names. +- Agent-definition hooks are always scoped to the defining agent name via `matcher.agent_name`; mismatched explicit values fail parsing. +- Agent frontmatter decoding now supports strict YAML first and strict TOML fallback so hook declarations work in both metadata formats. +- `session.Notifier.OnAgentEvent` now accepts `any`, with `internal/observe` performing ACP downcasting, to avoid a new import cycle caused by config-driven hook declarations. + +## Learnings +- `internal/hooks.ValidateHookDecl` is enough for load-time validation, while `NormalizeHookDecl` is the right path for applying default priority and executor kind before registry consumption. +- The existing frontmatter splitter is format-agnostic; YAML/TOML support can be layered entirely in the decode callback without changing `internal/frontmatter`. + +## Files / Surfaces +- `internal/config/config.go` +- `internal/config/merge.go` +- `internal/config/agent.go` +- `internal/config/bootstrap.go` +- `internal/config/hooks.go` +- `internal/config/hooks_test.go` +- `internal/workspace/clone.go` +- `internal/session/interfaces.go` +- `internal/hooks/agent_event.go` +- `internal/observe/observer.go` +- `internal/daemon/notifier.go` +- `internal/daemon/notifier_test.go` +- `internal/daemon/daemon_test.go` +- `internal/session/manager_test.go` +- `internal/cli/cli_integration_test.go` +- `internal/api/httpapi/httpapi_integration_test.go` +- `internal/api/udsapi/udsapi_integration_test.go` +- `internal/hooks/hooks_test.go` + +## Errors / Corrections +- Initial implementation covered YAML agent frontmatter only; corrected by adding strict TOML fallback to satisfy the task requirement for YAML/TOML agent definitions. +- Fixed a compile error in the TOML unknown-field path by using `toml.Key.String()` instead of passing a single `toml.Key` into the config overlay helper that expects a slice. + +## Ready for Next Run +- Verification complete after the final code change: + - `go test ./internal/config -count=1` + - `go test -cover ./internal/config -count=1` (`82.5%`) + - `make verify` (exit `0`) +- Task tracking is updated locally and the code-only commit is `ee38729` (`feat: add config hook declarations`). +- Committed `HEAD` was re-verified with `make verify` (exit `0`). diff --git a/.compozy/tasks/hooks/memory/task_09.md b/.compozy/tasks/hooks/memory/task_09.md new file mode 100644 index 000000000..2b4adfa1f --- /dev/null +++ b/.compozy/tasks/hooks/memory/task_09.md @@ -0,0 +1,35 @@ +# Task Memory: task_09.md + +Keep only task-local execution context here. Do not duplicate facts that are obvious from the repository, task file, PRD documents, or git history. + +## Objective Snapshot +- Replace daemon `notifierFanout` / `skillsHookDispatcher` with a single `internal/hooks.Hooks` composition in `boot.go`, wire skill reload + native callbacks, update shutdown ordering, and replace notifier-era tests with task_09 coverage. + +## Important Decisions +- Hard cut-over only: delete notifier fanout/dispatcher code instead of keeping compatibility layers. +- Preserve observer and dream-session side effects through native hook callbacks owned by the daemon/hooks composition, not through separate post-session callback lists. +- Keep the `session.Notifier` seam in `internal/daemon` as a thin `hooksNotifier` adapter over `Hooks`; moving the interface implementation into `internal/hooks` would reintroduce the current `session -> config/skills -> hooks` package cycle. + +## Learnings +- Current `internal/hooks.Hooks` already owns registry rebuild and async worker-pool lifecycle, but only exposes a no-op `OnAgentEvent`; task_09 still needs the session lifecycle notifier bridge in addition to daemon composition. +- The existing skills watcher only refreshes the skills registry; task_09 needs a daemon-level callback so the same watcher cycle also triggers `Hooks.Rebuild()`. +- Skill hook metadata now rejects the legacy per-hook `name` field; task fixtures must use the current declaration shape (`event`, executor fields, matcher fields, etc.) during daemon integration tests. + +## Files / Surfaces +- `internal/daemon/boot.go` +- `internal/daemon/daemon.go` +- `internal/daemon/hooks_bridge.go` +- `internal/daemon/notifier.go` +- `internal/daemon/notifier_integration_test.go` +- `internal/daemon/notifier_test.go` +- `internal/daemon/daemon_test.go` +- `internal/daemon/daemon_integration_test.go` +- `internal/hooks/payloads.go` +- `internal/skills/watcher.go` + +## Errors / Corrections +- `go test -tags integration ./internal/daemon` initially failed because the new skill-hook integration fixtures still used the deprecated per-hook `name` field; corrected the fixtures to match the current parser contract. + +## Ready for Next Run +- Verification complete after the daemon hooks cut-over: focused daemon/hooks tests, daemon integration tests, `internal/daemon` coverage (`80.4%`), and `make verify` all passed. +- Local code commit created: `4b3d39e` (`refactor: wire daemon hooks runtime`). Workflow memory and PRD tracking updates were kept out of the commit. diff --git a/.compozy/tasks/hooks/memory/task_10.md b/.compozy/tasks/hooks/memory/task_10.md new file mode 100644 index 000000000..fb80a384b --- /dev/null +++ b/.compozy/tasks/hooks/memory/task_10.md @@ -0,0 +1,47 @@ +# Task Memory: task_10.md + +Keep only task-local execution context here. Do not duplicate facts that are obvious from the repository, task file, PRD documents, or git history. + +## Objective Snapshot +- Wire typed hook dispatch into the session manager lifecycle and runtime paths required by task_10: session create/resume/stop, input submission, prompt assembly, event recording, and agent lifecycle. +- Preserve the current session manager cleanup/finalization semantics while adding sync barriers where required and async observation where the taxonomy marks events async-only. +- Finish with task-specific tests, clean `make verify`, tracking updates, and one local commit. + +## Important Decisions +- Keep `internal/session` on a narrow local hook-dispatch interface instead of importing the concrete `internal/hooks.Hooks` runtime directly; `internal/daemon` will adapt the real runtime into that seam. +- Treat the approved task spec, techspec, and ADRs as the validated design context for this implementation run; no extra design loop is needed unless a contradiction appears. +- Keep `session.Notifier` for legacy observer/dream fan-out only; session lifecycle hook execution now flows through `SessionManagerDeps.Hooks` so post-create/post-stop dispatch is not duplicated. + +## Learnings +- Current baseline before edits: `internal/session` only has `Notifier`; `startupPrompt` only assembles and returns a string; `recordEvent` writes directly to the recorder; and `internal/hooks.OnAgentEvent` is still a no-op for the richer task_10 agent lifecycle events. +- The permission escalation invariant initially missed ACP's `reject-once` and `reject-always` deny states, which allowed a subprocess `permission.request` patch to escalate a rejected request; fixing the classifier closed the real bug and made the end-to-end test pass. +- Task 09-style daemon tests that manually invoked `Notifier.OnSessionCreated`/`OnSessionStopped` no longer exercised the load-bearing path after task_10; they now need to call `SessionManagerDeps.Hooks.DispatchSessionPostCreate/PostStop` instead. + +## Files / Surfaces +- `internal/session/manager.go` +- `internal/session/manager_lifecycle.go` +- `internal/session/manager_helpers.go` +- `internal/session/manager_prompt.go` +- `internal/session/manager_hooks.go` +- `internal/session/interfaces.go` +- `internal/daemon/hooks_bridge.go` +- `internal/daemon/boot.go` +- `internal/daemon/daemon.go` +- `internal/daemon/daemon_integration_test.go` +- `internal/session/manager_test.go` +- `internal/session/manager_hooks_test.go` +- `internal/session/manager_integration_test.go` +- `internal/daemon/notifier_test.go` +- `internal/daemon/daemon_test.go` +- `internal/hooks/dispatch.go` +- `internal/hooks/dispatch_integration_test.go` +- `internal/hooks/pipeline.go` +- `internal/hooks/permission.go` +- `internal/hooks/permission_test.go` + +## Errors / Corrections +- Corrected the permission deny-state classifier so `reject-once` and `reject-always` are treated as denials by the dispatcher guard. +- Corrected daemon integration tests to use the new hook-dispatch seam instead of the legacy notifier callbacks. + +## Ready for Next Run +- Task 10 is implemented and verified. The remaining follow-on work is task 11, which can reuse the same session-owned hook-dispatch seam for `turn.*`, `message.*`, and `context.*` wiring. diff --git a/.compozy/tasks/hooks/memory/task_11.md b/.compozy/tasks/hooks/memory/task_11.md new file mode 100644 index 000000000..145cc4324 --- /dev/null +++ b/.compozy/tasks/hooks/memory/task_11.md @@ -0,0 +1,36 @@ +# Task Memory: task_11.md + +Keep only task-local execution context here. Do not duplicate facts that are obvious from the repository, task file, PRD documents, or git history. + +## Objective Snapshot +- Wire `turn.*`, `message.*`, and `context.*` typed hook dispatch into the session ACP flow and compaction wrapper, then close the task with passing verification and session coverage at or above 80%. + +## Important Decisions +- `turn.start` fires after `input.pre_submit` patching and before prompt streaming begins; `turn.end` fires once at terminal `done`/`error` boundaries with a stream-close fallback. +- Assistant `agent_message` and `thought` ACP events are treated as message chunks for hook dispatch; any non-message ACP event closes the open message before downstream processing continues. +- Context compaction uses a dedicated `Manager.runContextCompaction` wrapper so `context.pre_compact` can patch the effective compaction params and `context.post_compact` observes the final result without expanding unrelated runtime surfaces. + +## Learnings +- `message.start` can safely patch the first streamed assistant chunk before it is emitted and persisted. +- `message.delta` non-blocking behavior is best verified with the real hooks async runtime rather than a fake dispatcher. +- Extending `session.HookDispatcher` also requires matching daemon bridge and daemon test-fake updates or the repo-wide verification gate fails in `internal/daemon`. + +## Files / Surfaces +- `internal/session/interfaces.go` +- `internal/session/manager_hooks.go` +- `internal/session/manager_prompt.go` +- `internal/session/manager_test.go` +- `internal/session/manager_hooks_test.go` +- `internal/session/manager_integration_test.go` +- `internal/daemon/hooks_bridge.go` +- `internal/daemon/notifier_test.go` +- `internal/daemon/daemon_test.go` + +## Errors / Corrections +- `internal/daemon/notifier_test.go` initially used flattened payload fields for aliased hook payload structs; corrected the test to build payloads through embedded `PayloadBase`, `SessionContext`, and `TurnContext`. +- `internal/session/manager_hooks.go` needed a nil-safe local clock function inside `runContextCompaction` to satisfy staticcheck before the final `make verify` pass. + +## Ready for Next Run +- Fresh post-commit verification succeeded with `go test -tags integration ./internal/session`, `go test -cover ./internal/session` (`81.9%`), and `make verify`. +- Local code commit created: `04aab8f` (`feat: integrate turn message context dispatch`). +- Task tracking and workflow memory updates were intentionally left unstaged per the automatic-commit staging rule for tracking-only files. diff --git a/.compozy/tasks/hooks/memory/task_12.md b/.compozy/tasks/hooks/memory/task_12.md new file mode 100644 index 000000000..2c308fe29 --- /dev/null +++ b/.compozy/tasks/hooks/memory/task_12.md @@ -0,0 +1,45 @@ +# Task Memory: task_12.md + +Keep only task-local execution context here. Do not duplicate facts that are obvious from the repository, task file, PRD documents, or git history. + +## Objective Snapshot +- Implement hook run persistence, HTTP introspection endpoints, and telemetry/logging/metrics for task 12. + +## Important Decisions +- Treat the existing hooks PRD/techspec as the approved design baseline and keep scope focused on persistence/introspection rather than expanding runtime hook families. +- Use the per-session SQLite store for hook run history and let the observer own read/query access for the HTTP endpoints. +- Persist hook run audits in a dedicated `hook_runs` table inside each session DB and query them through observer-owned store openers so `/api/hooks/runs` can inspect historical sessions without a live recorder. +- Reuse active session dispatch context as the preferred hook run writer; fall back to the observer sink only when the pipeline is outside a session-managed recorder path. +- Gate `PatchApplied` persistence by event family unless debug logging is enabled: always keep audit patches for `permission.*`, `prompt.*`, `tool.*`, and `input.*`, omit them for other families by default. + +## Learnings +- The current hook runtime has no telemetry sink or registry introspection surface yet; the observer API is still limited to global event summaries and health. +- Active session hook dispatch can reuse the existing recorder path if hook telemetry is passed through context instead of opening a second writer by default. +- `session.pre_create` remains best-effort for persistent telemetry because the per-session DB may not exist yet; the observer intentionally skips writes when the session DB path has not been created. +- The hooks runtime can expose catalog and taxonomy introspection without leaking internal ordering logic into HTTP handlers by centralizing that translation inside `internal/hooks`. + +## Files / Surfaces +- `internal/hooks` +- `internal/store/sessiondb` +- `internal/observe` +- `internal/api/httpapi` +- `internal/api/contract` +- `internal/daemon` +- `internal/session` +- `internal/api/core` +- `internal/api/testutil` +- `internal/store/types.go` + +## Errors / Corrections +- Adjusted session-managed dispatch paths to carry hook recorder context so task 12 writes use the existing session DB writer instead of opening duplicate handles. +- Added observer and HTTP test seams for hook catalog, run, and events queries because the previous observer interface only covered event summaries and health. + +## Ready for Next Run +- Verification evidence: +- `go test ./internal/hooks ./internal/store/sessiondb ./internal/observe ./internal/api/httpapi ./internal/daemon ./internal/session -count=1` +- `go test -tags integration ./internal/api/httpapi ./internal/hooks -count=1` +- `go test -cover ./internal/hooks ./internal/store/sessiondb ./internal/observe ./internal/api/httpapi -count=1` with `internal/hooks 82.7%`, `internal/store/sessiondb 82.4%`, `internal/observe 81.4%`, and `internal/api/httpapi 81.4%` +- `make verify` before commit +- `git commit -m "feat: add hooks observability api"` created `945db92` +- `make verify` on committed `HEAD` +- Follow-up risk to remember: if persistent auditing is ever required for `session.pre_create`, the session creation flow will need an earlier store allocation point instead of the current best-effort skip behavior. diff --git a/.compozy/tasks/hooks/task_01.md b/.compozy/tasks/hooks/task_01.md new file mode 100644 index 000000000..79451c690 --- /dev/null +++ b/.compozy/tasks/hooks/task_01.md @@ -0,0 +1,90 @@ +--- +status: completed +title: Core types and hook taxonomy +type: backend +complexity: medium +dependencies: [] +--- + +# Task 1: Core types and hook taxonomy + +## Overview + +Define the foundational types for the hooks platform in a new `internal/hooks` package: the `HookEvent` enum with sync eligibility classification, `HookSource`, `HookMode`, `RegisteredHook`, `ResolvedHook`, and all event-specific payload/patch type pairs. This is the dependency-free base that every subsequent task builds on. + + +- ALWAYS READ the PRD and TechSpec before starting +- REFERENCE TECHSPEC for implementation details — do not duplicate here +- FOCUS ON "WHAT" — describe what needs to be accomplished, not how +- MINIMIZE CODE — show code only to illustrate current structure or problem areas +- TESTS REQUIRED — every task MUST include tests in deliverables + + + +- MUST create `internal/hooks/` package with no imports from other `internal/` packages except stdlib +- MUST define `HookEvent` as a typed string enum with compile-time constants for all 27 events in the taxonomy +- MUST define a `SyncEligible` property per event — `message.delta`, `event.pre_record`, `event.post_record`, `permission.resolved`, `permission.denied` are async-only +- MUST define `HookSource` enum: Native, Config, AgentDefinition, Skill +- MUST define `HookMode` enum: Sync, Async +- MUST define `RegisteredHook` struct per TechSpec "Core Interfaces" section +- MUST define all event-specific payload and patch type pairs per TechSpec "Data Models" section +- MUST define `HookRunRecord` with `PatchApplied json.RawMessage` field +- SHOULD define `HookDecl` as the declarative source record for config/agent/skill declarations + + +## Subtasks +- [x] 1.1 Create `internal/hooks/` package directory and `doc.go` +- [x] 1.2 Define `HookEvent` enum with all 27 events and sync eligibility lookup +- [x] 1.3 Define `HookSource`, `HookMode`, `HookExecutorKind` enums +- [x] 1.4 Define `RegisteredHook`, `ResolvedHook`, `HookDecl` structs +- [x] 1.5 Define event-specific payload/patch type pairs for each family +- [x] 1.6 Define `HookRunRecord` observability struct with patch audit field +- [x] 1.7 Write unit tests for type validation and sync eligibility classification + +## Implementation Details + +Create new files in `internal/hooks/`: +- `events.go` — HookEvent enum, sync eligibility map, event family constants +- `types.go` — RegisteredHook, ResolvedHook, HookDecl, HookRunRecord, HookSource, HookMode +- `payloads.go` — All event-specific payload and patch types + +Reference TechSpec "Hook Taxonomy", "Core Interfaces", and "Data Models" sections for type definitions. + +### Relevant Files +- `internal/skills/types.go` — Current HookDecl/HookEvent definitions (lines 55-70) to understand what's being replaced +- `internal/session/interfaces.go` — Session type used in payload types (line 22) +- `internal/acp/types.go` — AgentEvent type referenced in event payloads + +### Dependent Files +- `internal/hooks/` — All subsequent hooks package files will import these types + +### Related ADRs +- [ADR-002: Use a Dotted Hook Taxonomy with Rich Families](../adrs/adr-002.md) — Defines the event taxonomy +- [ADR-005: Use Typed Per-Event Dispatch Functions](../adrs/adr-005.md) — Requires concrete payload/patch types per event +- [ADR-012: Classify Events into Sync-Eligible and Async-Only](../adrs/adr-012.md) — Defines which events are async-only + +## Deliverables +- `internal/hooks/events.go` with complete HookEvent enum and sync eligibility +- `internal/hooks/types.go` with all core structs +- `internal/hooks/payloads.go` with all event-specific payload/patch types +- Unit tests with 80%+ coverage **(REQUIRED)** + +## Tests +- Unit tests: + - [x] All 27 HookEvent constants are defined and have non-empty string values + - [x] `SyncEligible` returns true for `session.pre_create` and false for `message.delta` + - [x] `SyncEligible` returns false for all 5 async-only events + - [x] `SyncEligible` returns true for all sync-eligible events + - [x] `HookSource` ordering: Native < Config < AgentDefinition < Skill + - [x] `RegisteredHook` with `Required=true` and `Mode=Async` fails validation + - [x] `RegisteredHook` with `Mode=Sync` on async-only event fails validation + - [x] All payload/patch types serialize to JSON and back without data loss +- Test coverage target: >=80% +- All tests must pass + +## Success Criteria +- All tests passing +- Test coverage >=80% +- `make lint` passes with zero warnings +- No imports from other `internal/` packages (except stdlib) +- All 27 events defined with correct sync eligibility diff --git a/.compozy/tasks/hooks/task_02.md b/.compozy/tasks/hooks/task_02.md new file mode 100644 index 000000000..05324b8c3 --- /dev/null +++ b/.compozy/tasks/hooks/task_02.md @@ -0,0 +1,91 @@ +--- +status: completed +title: Declaration normalization, matchers, and ordering +type: backend +complexity: medium +dependencies: + - task_01 +--- + +# Task 2: Declaration normalization, matchers, and ordering + +## Overview + +Implement the declaration normalization pipeline that converts raw `HookDecl` from any source into `ResolvedHook`, the matcher evaluation system for event-specific filtering, and the deterministic ordering function (source → priority → name). This is the resolution engine that the dispatcher depends on. + + +- ALWAYS READ the PRD and TechSpec before starting +- REFERENCE TECHSPEC for implementation details — do not duplicate here +- FOCUS ON "WHAT" — describe what needs to be accomplished, not how +- MINIMIZE CODE — show code only to illustrate current structure or problem areas +- TESTS REQUIRED — every task MUST include tests in deliverables + + + +- MUST normalize `HookDecl` into `ResolvedHook` with validated source, mode, matcher, timeout, and executor binding +- MUST reject declarations with `mode: sync` on async-only events at normalization time +- MUST reject declarations with `required: true` and `mode: async` +- MUST implement per-family matcher evaluation: session (type, workspace, agent), tool (name, namespace, read-only), permission (tool, decision class), message (role, delta type), context (reason, strategy) +- MUST implement ordering: source class (Native→Config→AgentDef→Skill) → priority (desc) → name (asc lexicographic) +- MUST apply default priorities per source: native=1000, config=500, agent-definition=100, skill=0 +- MUST preserve skill sub-ordering (Bundled→Marketplace→User→Additional→Workspace) before name + + +## Subtasks +- [x] 2.1 Implement `HookDecl` → `ResolvedHook` normalization with validation +- [x] 2.2 Implement matcher types and evaluation functions per event family +- [x] 2.3 Implement deterministic ordering function with three-level sort +- [x] 2.4 Implement skill sub-ordering using existing `SkillSource` precedence +- [x] 2.5 Write unit tests for normalization, matchers, and ordering + +## Implementation Details + +Create new files in `internal/hooks/`: +- `normalize.go` — Declaration normalization and validation +- `matcher.go` — Matcher types and evaluation per event family +- `ordering.go` — Deterministic sort function + +Reference TechSpec "Matcher Model" and "Dispatch Model" sections. Reference ADR-011 for ordering rules. + +### Relevant Files +- `internal/hooks/types.go` (task_01) — ResolvedHook, HookDecl, HookSource types +- `internal/hooks/events.go` (task_01) — Sync eligibility for validation +- `internal/skills/hooks.go:220-252` — Current `orderSkillsForHooks` for skill sub-ordering pattern +- `internal/skills/types.go:30-37` — SkillSource enum and ordering + +### Dependent Files +- `internal/hooks/` — Pipeline (task_04) and registry (task_06) depend on these functions + +### Related ADRs +- [ADR-004: Support Four Declaration Sources with Ordered Dispatch](../adrs/adr-004.md) — Source ordering rules +- [ADR-011: Simplify Ordering to Source, Priority, Name](../adrs/adr-011.md) — Removes specificity, defines ordering +- [ADR-012: Classify Events into Sync-Eligible and Async-Only](../adrs/adr-012.md) — Validation of sync mode vs event eligibility + +## Deliverables +- `internal/hooks/normalize.go` with normalization and validation +- `internal/hooks/matcher.go` with per-family matcher evaluation +- `internal/hooks/ordering.go` with deterministic sort +- Unit tests with 80%+ coverage **(REQUIRED)** + +## Tests +- Unit tests: + - [x] Normalization rejects `mode: sync` on `message.delta` with clear error + - [x] Normalization rejects `required: true` on async hook + - [x] Normalization applies default priority 1000 for Native source when priority unset + - [x] Normalization applies default priority 0 for Skill source + - [x] Session matcher matches on workspace ID and agent name, rejects non-matching + - [x] Tool matcher matches on tool name and namespace, handles wildcard + - [x] Permission matcher matches on tool name and decision class + - [x] Ordering sorts Native before Config before AgentDef before Skill + - [x] Ordering sorts higher priority first within same source + - [x] Ordering sorts names ascending for equal source and priority + - [x] Skill sub-ordering: Bundled before Marketplace before User before Workspace + - [x] Ordering is stable across multiple sorts with same input +- Test coverage target: >=80% +- All tests must pass + +## Success Criteria +- All tests passing +- Test coverage >=80% +- Invalid declarations fail with descriptive error messages +- Ordering is fully deterministic — same input always produces same output diff --git a/.compozy/tasks/hooks/task_03.md b/.compozy/tasks/hooks/task_03.md new file mode 100644 index 000000000..cf1bd28fd --- /dev/null +++ b/.compozy/tasks/hooks/task_03.md @@ -0,0 +1,92 @@ +--- +status: completed +title: Executor contracts and implementations +type: backend +complexity: medium +dependencies: + - task_01 +--- + +# Task 3: Executor contracts and implementations + +## Overview + +Implement the `Executor` interface and two concrete executors: a native Go callback executor (for in-process hooks) and a subprocess executor (for skill/config/agent shell hooks). The subprocess executor replaces the current `HookRunner.runHook()` in `internal/skills/hooks.go`, reusing its proven patterns for timeout, signal handling, environment allowlisting, and output capture. + + +- ALWAYS READ the PRD and TechSpec before starting +- REFERENCE TECHSPEC for implementation details — do not duplicate here +- FOCUS ON "WHAT" — describe what needs to be accomplished, not how +- MINIMIZE CODE — show code only to illustrate current structure or problem areas +- TESTS REQUIRED — every task MUST include tests in deliverables + + + +- MUST define `Executor` interface with `Kind() HookExecutorKind` and `Execute(ctx, RegisteredHook, []byte) ([]byte, error)` +- MUST implement `NativeExecutor` that calls Go callbacks directly, bypassing serialization +- MUST implement `SubprocessExecutor` that runs shell commands with JSON payload via stdin, captures stdout/stderr +- MUST reuse environment allowlist pattern from existing `internal/skills/hooks.go:292-315` +- MUST enforce timeout via `context.WithTimeout` with graceful shutdown (signal, wait, kill) +- MUST capture stdout/stderr with existing 8KB limit pattern +- SHOULD leave a Wasm executor seam (empty `WasmExecutor` struct implementing the interface with `ErrNotImplemented`) + + +## Subtasks +- [x] 3.1 Define `Executor` interface and `HookExecutorKind` enum +- [x] 3.2 Implement `NativeExecutor` for Go callback hooks +- [x] 3.3 Implement `SubprocessExecutor` with timeout, signal handling, env allowlist, capture +- [x] 3.4 Add Wasm executor stub returning `ErrNotImplemented` +- [x] 3.5 Write unit tests for both executors including timeout and error paths + +## Implementation Details + +Create new files in `internal/hooks/`: +- `executor.go` — Executor interface and kind enum +- `executor_native.go` — Native Go callback executor +- `executor_subprocess.go` — Subprocess executor (port from skills/hooks.go) +- `executor_subprocess_unix.go` / `executor_subprocess_windows.go` — Platform-specific process management (port from skills/hook_process_*.go) +- `executor_wasm.go` — Stub + +Reference existing `internal/skills/hooks.go` lines 127-204 for subprocess execution pattern and lines 292-315 for environment allowlist. + +### Relevant Files +- `internal/skills/hooks.go:127-204` — Current `runHook()` subprocess execution to port +- `internal/skills/hooks.go:292-315` — Environment allowlist (`hookAllowedEnvVars`) +- `internal/skills/hooks.go:339-401` — `hookCapture` output limiting pattern +- `internal/skills/hook_process_unix.go` — Unix-specific process group/signal handling +- `internal/skills/hook_process_windows.go` — Windows-specific process handling + +### Dependent Files +- `internal/hooks/` — Pipeline (task_04) calls executors + +### Related ADRs +- [ADR-005: Use Typed Per-Event Dispatch Functions](../adrs/adr-005.md) — Executor uses `[]byte` at serialization boundary + +## Deliverables +- `internal/hooks/executor.go` with Executor interface +- `internal/hooks/executor_native.go` with NativeExecutor +- `internal/hooks/executor_subprocess.go` with SubprocessExecutor +- Platform-specific process management files +- `internal/hooks/executor_wasm.go` stub +- Unit tests with 80%+ coverage **(REQUIRED)** + +## Tests +- Unit tests: + - [x] NativeExecutor calls Go callback with correct payload and returns result + - [x] NativeExecutor returns error when callback panics (recovered) + - [x] SubprocessExecutor runs `echo` command and captures stdout as result + - [x] SubprocessExecutor passes JSON payload via stdin + - [x] SubprocessExecutor enforces timeout — command exceeding timeout is killed + - [x] SubprocessExecutor graceful shutdown — SIGTERM sent before SIGKILL + - [x] SubprocessExecutor filters environment to allowlist only + - [x] SubprocessExecutor captures stderr on non-zero exit + - [x] SubprocessExecutor respects 8KB stdout/stderr capture limit + - [x] WasmExecutor returns ErrNotImplemented +- Test coverage target: >=80% +- All tests must pass + +## Success Criteria +- All tests passing +- Test coverage >=80% +- Subprocess executor handles timeout gracefully without zombie processes +- Environment allowlist prevents ambient secret leakage diff --git a/.compozy/tasks/hooks/task_04.md b/.compozy/tasks/hooks/task_04.md new file mode 100644 index 000000000..edd97a4de --- /dev/null +++ b/.compozy/tasks/hooks/task_04.md @@ -0,0 +1,101 @@ +--- +status: completed +title: Generic pipeline with sync composition and guards +type: backend +complexity: high +dependencies: + - task_02 + - task_03 +--- + +# Task 4: Generic pipeline with sync composition and guards + +## Overview + +Implement the core `pipeline[P, R]` generic type that executes sync hooks as a sequential pipeline (each hook sees the output of the previous) and includes the dispatch depth guard and permission deny-only invariant. This is the most complex component — it ties together ordering, executors, and typed patch composition into a single execution engine. + + +- ALWAYS READ the PRD and TechSpec before starting +- REFERENCE TECHSPEC for implementation details — do not duplicate here +- FOCUS ON "WHAT" — describe what needs to be accomplished, not how +- MINIMIZE CODE — show code only to illustrate current structure or problem areas +- TESTS REQUIRED — every task MUST include tests in deliverables + + + +- MUST implement `pipeline[P, R]` as a package-private generic type with `apply`, `encode`, `decode` functions +- MUST execute sync hooks sequentially — hook N receives the payload patched by hook N-1 +- MUST short-circuit on explicit deny from any hook +- MUST short-circuit on `required` hook failure (error or timeout) with error +- MUST skip non-required hook failures and continue pipeline +- MUST implement dispatch depth guard via `context.WithValue` counter, max depth 3 +- MUST enforce permission deny-only invariant: reject any patch that attempts deny→allow, log as `hook.dispatch.permission_escalation_blocked` +- MUST provide `encode`/`decode` functions for subprocess executor serialization boundary +- MUST use native executor path (no serialization) for Go callback hooks + + +## Subtasks +- [x] 4.1 Implement `pipeline[P, R]` generic struct with `execute(ctx, payload) (P, error)` method +- [x] 4.2 Implement sequential sync hook composition with patch application loop +- [x] 4.3 Implement pipeline short-circuit on deny and required-hook failure +- [x] 4.4 Implement dispatch depth guard with context counter (max 3) +- [x] 4.5 Implement permission deny-only invariant check +- [x] 4.6 Implement encode/decode bridge for subprocess executors vs native bypass + +## Implementation Details + +Create new files in `internal/hooks/`: +- `pipeline.go` — Generic pipeline type, sequential execution, short-circuit logic +- `depth.go` — Dispatch depth context key and guard functions +- `permission.go` — Permission invariant check + +Reference TechSpec "Core Interfaces" section for `pipeline[P, R]` design. Reference ADR-006 for sequential composition, ADR-009 for permission invariant, ADR-012 for depth guard. + +### Relevant Files +- `internal/hooks/ordering.go` (task_02) — Provides sorted hook list for pipeline execution +- `internal/hooks/executor.go` (task_03) — Executor interface called by pipeline +- `internal/hooks/executor_native.go` (task_03) — Native executor bypasses serialization +- `internal/hooks/executor_subprocess.go` (task_03) — Subprocess executor uses encode/decode +- `internal/hooks/events.go` (task_01) — HookEvent for depth guard context key + +### Dependent Files +- `internal/hooks/` — Hooks struct (task_06) wraps pipelines for each event + +### Related ADRs +- [ADR-005: Use Typed Per-Event Dispatch Functions](../adrs/adr-005.md) — Pipeline is the internal engine behind typed dispatch +- [ADR-006: Sequential Pipeline for Sync Hook Patch Composition](../adrs/adr-006.md) — Defines sequential composition model +- [ADR-007: Use Go Generics for Internal Dispatcher Type Safety](../adrs/adr-007.md) — Defines generic pipeline approach +- [ADR-009: Permission Hooks Are Deny-Only](../adrs/adr-009.md) — Permission invariant enforcement +- [ADR-012: Classify Events into Sync-Eligible and Async-Only](../adrs/adr-012.md) — Dispatch depth guard + +## Deliverables +- `internal/hooks/pipeline.go` with generic pipeline implementation +- `internal/hooks/depth.go` with depth guard +- `internal/hooks/permission.go` with deny-only invariant +- Unit tests with 80%+ coverage **(REQUIRED)** + +## Tests +- Unit tests: + - [x] Pipeline with 3 sync hooks — hook 2 sees payload patched by hook 1, hook 3 sees payload patched by hook 2 + - [x] Pipeline with explicit deny from hook 2 — hook 3 never executes, deny returned + - [x] Pipeline with required hook timeout — pipeline returns error, subsequent hooks skipped + - [x] Pipeline with non-required hook failure — hook skipped, pipeline continues + - [x] Pipeline with no matching hooks — returns original payload unchanged + - [x] Depth guard: dispatch at depth 1 succeeds + - [x] Depth guard: dispatch at depth 3 succeeds (at limit) + - [x] Depth guard: dispatch at depth 4 returns error immediately + - [x] Depth guard: nested dispatch increments depth from parent context + - [x] Permission invariant: patch that keeps deny returns deny (allowed) + - [x] Permission invariant: patch that changes deny→allow is rejected and logged + - [x] Permission invariant: patch that changes allow→deny is allowed (deny escalation ok) + - [x] Native executor path skips serialization — callback receives typed payload directly + - [x] Subprocess executor path uses encode/decode for JSON serialization +- Test coverage target: >=80% +- All tests must pass + +## Success Criteria +- All tests passing +- Test coverage >=80% +- Sequential composition is deterministic — same hooks + same input always produces same output +- Permission escalation is architecturally impossible via pipeline enforcement +- Depth guard prevents stack overflow from circular dispatch diff --git a/.compozy/tasks/hooks/task_05.md b/.compozy/tasks/hooks/task_05.md new file mode 100644 index 000000000..5aa6306b1 --- /dev/null +++ b/.compozy/tasks/hooks/task_05.md @@ -0,0 +1,82 @@ +--- +status: completed +title: Async worker pool +type: backend +complexity: medium +dependencies: + - task_01 +--- + +# Task 5: Async worker pool + +## Overview + +Implement the fixed-size goroutine worker pool for async hook execution using Go stdlib primitives (buffered channel, WaitGroup, context). This mirrors the single-worker pattern in `internal/memory/consolidation/runtime.go` but extends it to N workers with backpressure and graceful shutdown. + + +- ALWAYS READ the PRD and TechSpec before starting +- REFERENCE TECHSPEC for implementation details — do not duplicate here +- FOCUS ON "WHAT" — describe what needs to be accomplished, not how +- MINIMIZE CODE — show code only to illustrate current structure or problem areas +- TESTS REQUIRED — every task MUST include tests in deliverables + + + +- MUST implement a fixed-size goroutine pool with configurable worker count (default 4) +- MUST use a buffered channel as work queue with configurable capacity (default 64) +- MUST implement non-blocking send with `select`/`default` for backpressure — full buffer drops the hook +- MUST log dropped hooks as `hook.dispatch.async_dropped` with queue depth +- MUST implement graceful shutdown: close channel, drain with deadline (10s), `sync.WaitGroup.Wait()` +- MUST wrap each worker's execution in `recover()` to prevent panicking hooks from killing the pool +- MUST use `select { case task := <-ch: ... case <-ctx.Done(): return }` in each worker +- MUST track all goroutines via `sync.WaitGroup` — no fire-and-forget + + +## Subtasks +- [x] 5.1 Define async task type and pool configuration struct +- [x] 5.2 Implement worker pool with N goroutines consuming from buffered channel +- [x] 5.3 Implement non-blocking submit with drop-on-full backpressure +- [x] 5.4 Implement graceful shutdown with drain deadline +- [x] 5.5 Write unit tests including backpressure, shutdown, and panic recovery + +## Implementation Details + +Create new file in `internal/hooks/`: +- `pool.go` — Worker pool struct, Start, Submit, Close methods + +Reference `internal/memory/consolidation/runtime.go` lines 39-51, 135-150 for the existing single-worker channel pattern. Reference TechSpec "Async Worker Pool" section. + +### Relevant Files +- `internal/memory/consolidation/runtime.go:39-150` — Single-worker channel pattern to extend +- `internal/hooks/types.go` (task_01) — HookRunRecord for async execution telemetry + +### Dependent Files +- `internal/hooks/` — Hooks struct (task_06) owns and operates the pool + +### Related ADRs +- [ADR-008: Stdlib Worker Pool for Async Hook Execution](../adrs/adr-008.md) — Defines pool design + +## Deliverables +- `internal/hooks/pool.go` with complete worker pool implementation +- Unit tests with 80%+ coverage **(REQUIRED)** + +## Tests +- Unit tests: + - [x] Pool starts N workers — submitting N tasks runs them concurrently + - [x] Submit to pool with available capacity succeeds + - [x] Submit to full pool drops the task and returns false + - [x] Dropped task is logged with queue depth + - [x] Graceful shutdown: pending tasks in channel are drained before Close returns + - [x] Shutdown with deadline: tasks exceeding deadline are abandoned, Close returns + - [x] Panicking task is recovered — worker continues processing next task + - [x] Context cancellation stops all workers + - [x] Pool with 0 submitted tasks shuts down cleanly + - [x] Concurrent submit from multiple goroutines is safe (no data race with -race) +- Test coverage target: >=80% +- All tests must pass + +## Success Criteria +- All tests passing +- Test coverage >=80% +- `-race` flag passes with concurrent submit/shutdown +- No goroutine leaks — all workers join on Close() diff --git a/.compozy/tasks/hooks/task_06.md b/.compozy/tasks/hooks/task_06.md new file mode 100644 index 000000000..061762d08 --- /dev/null +++ b/.compozy/tasks/hooks/task_06.md @@ -0,0 +1,100 @@ +--- +status: completed +title: Hooks struct with typed dispatch, registry, and Notifier +type: backend +complexity: high +dependencies: + - task_04 + - task_05 +--- + +# Task 6: Hooks struct with typed dispatch, registry, and Notifier + +## Overview + +Assemble the main `Hooks` struct that owns the hot-reloadable registry (RWMutex + snapshot swap), the async worker pool, and exposes typed dispatch functions for every event in the taxonomy. The `Hooks` struct also implements `session.Notifier` to serve as the replacement for the current `notifierFanout`. + + +- ALWAYS READ the PRD and TechSpec before starting +- REFERENCE TECHSPEC for implementation details — do not duplicate here +- FOCUS ON "WHAT" — describe what needs to be accomplished, not how +- MINIMIZE CODE — show code only to illustrate current structure or problem areas +- TESTS REQUIRED — every task MUST include tests in deliverables + + + +- MUST define `Hooks` struct owning: `sync.RWMutex`, snapshot map `map[HookEvent][]*ResolvedHook`, async worker pool, version counter `atomic.Int64` +- MUST implement `Rebuild(ctx)` — builds complete snapshot from all 4 sources, validates, sorts, swaps under write lock. Skip swap if unchanged. +- MUST implement read path: `RLock`, copy slice reference, `RUnlock`, dispatch against snapshot +- MUST implement typed dispatch functions for all 27 events (one function per event) using `pipeline[P, R]` +- MUST implement `session.Notifier` interface: `OnSessionCreated`, `OnSessionStopped`, `OnAgentEvent` +- MUST implement `Close()` for graceful async pool shutdown +- MUST provide compile-time interface check: `var _ session.Notifier = (*Hooks)(nil)` +- MUST use functional options pattern for constructor: `NewHooks(opts ...Option)` + + +## Subtasks +- [x] 6.1 Define `Hooks` struct with registry, pool, mutex, version counter, and observer dependencies +- [x] 6.2 Implement `NewHooks(opts ...Option)` constructor with functional options +- [x] 6.3 Implement `Rebuild(ctx)` with build-then-validate-then-swap semantics +- [x] 6.4 Implement typed dispatch functions for all event families using pipeline +- [x] 6.5 Implement `session.Notifier` interface bridging to typed dispatch +- [x] 6.6 Implement `Close()` for pool shutdown +- [x] 6.7 Write unit tests for registry swap, dispatch, and Notifier implementation + +## Implementation Details + +Create new files in `internal/hooks/`: +- `hooks.go` — Main Hooks struct, constructor, Close, Rebuild +- `dispatch.go` — All typed dispatch functions +- `notifier.go` — session.Notifier implementation + +Follow `internal/skills/registry.go` pattern for RWMutex + snapshot swap (lines 28+, `reloadGlobal`). Reference TechSpec "Hot Reload" and "Core Interfaces" sections. + +### Relevant Files +- `internal/hooks/pipeline.go` (task_04) — Pipeline engine for sync execution +- `internal/hooks/pool.go` (task_05) — Async worker pool +- `internal/hooks/ordering.go` (task_02) — Sorts hooks for registry snapshot +- `internal/hooks/normalize.go` (task_02) — Validates declarations +- `internal/skills/registry.go:28-120` — Existing RWMutex + snapshot swap pattern +- `internal/session/interfaces.go:150-155` — Notifier interface to implement + +### Dependent Files +- `internal/daemon/boot.go` — Will wire Hooks in task_09 +- `internal/session/` — Will receive Hooks as Notifier in task_09 + +### Related ADRs +- [ADR-005: Use Typed Per-Event Dispatch Functions](../adrs/adr-005.md) — Typed dispatch function design +- [ADR-013: Hot-Reloadable Registry with RWMutex Snapshot Swap](../adrs/adr-013.md) — Registry design + +## Deliverables +- `internal/hooks/hooks.go` with Hooks struct, constructor, Rebuild, Close +- `internal/hooks/dispatch.go` with all typed dispatch functions +- `internal/hooks/notifier.go` with session.Notifier implementation +- Unit tests with 80%+ coverage **(REQUIRED)** + +## Tests +- Unit tests: + - [x] `NewHooks()` creates Hooks with empty registry and started pool + - [x] `Rebuild()` with valid declarations populates registry snapshot + - [x] `Rebuild()` with invalid declaration keeps old snapshot and returns error + - [x] `Rebuild()` with unchanged declarations skips swap (version unchanged) + - [x] `Rebuild()` bumps version counter on successful swap + - [x] Concurrent `Rebuild()` and dispatch do not race (test with `-race`) + - [x] Typed dispatch function returns original payload when no hooks match + - [x] Typed dispatch function applies patches from matching hooks in order + - [x] `OnSessionCreated` calls the appropriate dispatch function + - [x] `OnSessionStopped` calls the appropriate dispatch function + - [x] `Close()` drains async pool and returns + - [x] Compile-time check: `var _ session.Notifier = (*Hooks)(nil)` compiles +- Integration tests: + - [x] Full dispatch with native + subprocess hooks on same event — ordering correct +- Test coverage target: >=80% +- All tests must pass + +## Success Criteria +- All tests passing +- Test coverage >=80% +- `session.Notifier` compile-time check passes +- Registry swap is atomic — no partial state visible to readers +- `-race` flag passes with concurrent rebuild + dispatch diff --git a/.compozy/tasks/hooks/task_07.md b/.compozy/tasks/hooks/task_07.md new file mode 100644 index 000000000..ac0263a6c --- /dev/null +++ b/.compozy/tasks/hooks/task_07.md @@ -0,0 +1,94 @@ +--- +status: completed +title: Migrate skills hook parsing to new declarations +type: refactor +complexity: medium +dependencies: + - task_01 +--- + +# Task 7: Migrate skills hook parsing to new declarations + +## Overview + +Migrate `internal/skills` from owning hook dispatch to supplying typed declarations to the new hooks platform. This is the hard cut-over for skills: delete `HookRunner`, old `HookDecl`/`HookEvent`/`HookPayload`/`HookResult`, rewrite the loader to parse the new schema (dotted event names, mode, priority, matcher), and update all tests. + + +- ALWAYS READ the PRD and TechSpec before starting +- REFERENCE TECHSPEC for implementation details — do not duplicate here +- FOCUS ON "WHAT" — describe what needs to be accomplished, not how +- MINIMIZE CODE — show code only to illustrate current structure or problem areas +- TESTS REQUIRED — every task MUST include tests in deliverables + + + +- MUST delete `internal/skills/hooks.go` entirely (HookRunner, RunHooks, runHook, orderSkillsForHooks, all helpers) +- MUST delete `internal/skills/hook_process_unix.go` and `hook_process_windows.go` +- MUST delete `internal/skills/hooks_test.go` +- MUST remove `HookDecl`, `HookEvent`, `HookPayload`, `HookResult` from `internal/skills/types.go` +- MUST add new `Hooks []hooks.HookDecl` field to `Skill` struct using types from `internal/hooks` +- MUST rewrite `parseHookDecls()` in `loader.go` to parse new schema: `event` (dotted), `command`, `args`, `timeout`, `env`, `mode`, `priority`, `matcher` +- MUST rewrite `validHookEvent()` to validate against `internal/hooks` event taxonomy +- MUST remove `cloneHookDecls()` from `registry.go` and update `cloneSkill()` +- MUST update all test files that reference old hook types +- MUST update testdata YAML files with new event names (`on_session_created` → `session.post_create`) + + +## Subtasks +- [x] 7.1 Delete `hooks.go`, `hook_process_unix.go`, `hook_process_windows.go`, `hooks_test.go` +- [x] 7.2 Remove old hook types from `types.go`, add new `Hooks` field using `hooks.HookDecl` +- [x] 7.3 Rewrite `parseHookDecls()` and `validHookEvent()` in `loader.go` +- [x] 7.4 Remove `cloneHookDecls()` from `registry.go`, update `cloneSkill()` +- [x] 7.5 Update testdata YAML and all affected tests +- [x] 7.6 Write new unit tests for the rewritten hook parsing + +## Implementation Details + +Modify existing files in `internal/skills/`: +- Delete: `hooks.go`, `hook_process_unix.go`, `hook_process_windows.go`, `hooks_test.go` +- Modify: `types.go` (remove old types, add hooks.HookDecl reference), `loader.go` (rewrite parsing), `registry.go` (remove cloneHookDecls) +- Update testdata: `testdata/loader/hooks-only/SKILL.md`, `testdata/loader/invalid-hook-command/SKILL.md`, `testdata/loader/combined/SKILL.md` + +Reference TechSpec "Migration from Current Hooks Implementation" section for exact mapping. + +### Relevant Files +- `internal/skills/types.go:55-70` — Old HookDecl, HookEvent to delete +- `internal/skills/hooks.go` — Entire file to delete +- `internal/skills/loader.go:290-343` — parseHookDecls, validHookEvent to rewrite +- `internal/skills/registry.go:762-779` — cloneHookDecls to delete +- `internal/skills/registry.go:704` — cloneSkill hook clone call to update +- `internal/skills/hooks_test.go` — Entire file to delete +- `internal/skills/testdata/loader/hooks-only/SKILL.md` — Update event name +- `internal/hooks/types.go` (task_01) — New HookDecl type to import + +### Dependent Files +- `internal/daemon/notifier.go` — skillsHookDispatcher references HookRunner (deleted in task_09) +- `internal/daemon/boot.go` — hookRunner creation (deleted in task_09) + +### Related ADRs +- [ADR-002: Use a Dotted Hook Taxonomy](../adrs/adr-002.md) — New event names + +## Deliverables +- Deleted files: `hooks.go`, `hook_process_*.go`, `hooks_test.go` +- Modified files: `types.go`, `loader.go`, `registry.go` +- Updated testdata files +- New unit tests for rewritten parsing with 80%+ coverage **(REQUIRED)** + +## Tests +- Unit tests: + - [x] Parse YAML with `event: session.post_create` succeeds + - [x] Parse YAML with old `event: on_session_created` fails with descriptive error mentioning new name + - [x] Parse YAML with unknown event `event: foo.bar` fails validation + - [x] Parse YAML with new optional fields: mode, priority, matcher + - [x] Parse YAML with minimal fields (just event + command) uses defaults + - [x] `cloneSkill()` correctly deep-copies the new Hooks field + - [x] Skill struct `Hooks` field is `[]hooks.HookDecl` type +- Test coverage target: >=80% +- All tests must pass + +## Success Criteria +- All tests passing +- Test coverage >=80% +- `make lint` passes — no references to deleted types +- `make build` passes — no broken imports +- Zero references to `HookRunner`, old `HookDecl`, `HookPayload` in `internal/skills/` diff --git a/.compozy/tasks/hooks/task_08.md b/.compozy/tasks/hooks/task_08.md new file mode 100644 index 000000000..d31343fb1 --- /dev/null +++ b/.compozy/tasks/hooks/task_08.md @@ -0,0 +1,91 @@ +--- +status: completed +title: Config and agent-definition hook declarations +type: backend +complexity: medium +dependencies: + - task_01 +--- + +# Task 8: Config and agent-definition hook declarations + +## Overview + +Extend `internal/config` to parse hook declarations from TOML config layers (policy, user, workspace) and extend agent-definition loading to emit hook declarations. Both feed into the hooks registry as declaration sources alongside Go-native and skill hooks. + + +- ALWAYS READ the PRD and TechSpec before starting +- REFERENCE TECHSPEC for implementation details — do not duplicate here +- FOCUS ON "WHAT" — describe what needs to be accomplished, not how +- MINIMIZE CODE — show code only to illustrate current structure or problem areas +- TESTS REQUIRED — every task MUST include tests in deliverables + + + +- MUST extend config schema to support hook declarations under a `[hooks]` or `[[hooks.declarations]]` section +- MUST parse hook fields: name, event, mode, required, priority, timeout, matcher, executor (command, args, env) +- MUST validate declarations at config load time using hooks normalization from task_02 +- MUST respect existing config precedence (policy → user → workspace) +- MUST extend agent-definition parsing to support hooks in agent def YAML/TOML +- MUST scope agent-definition hooks to execute only for matching agent type sessions +- MUST export a function that returns `[]hooks.HookDecl` for the registry to consume +- SHOULD apply default priority 500 for config hooks and 100 for agent-definition hooks + + +## Subtasks +- [x] 8.1 Extend config schema with hook declaration fields +- [x] 8.2 Implement TOML parsing for hook declarations in config loader +- [x] 8.3 Implement hook declaration parsing in agent-definition loader +- [x] 8.4 Implement validation using hooks normalization functions +- [x] 8.5 Export `HookDeclarations()` function from config for registry consumption +- [x] 8.6 Write unit tests for config and agent-definition hook parsing + +## Implementation Details + +Modify existing files: +- `internal/config/config.go` — Add hooks section to config struct +- `internal/config/loader.go` or relevant loading file — Parse hook declarations from TOML +- Agent definition loading code — Parse hooks from agent definitions + +Reference TechSpec "Declaration Model" section. Reference existing config loading patterns in `internal/config/`. + +### Relevant Files +- `internal/config/config.go:104-112` — Current SkillsConfig with AllowedMarketplaceHooks +- `internal/config/` — Config loading and validation patterns +- `internal/hooks/types.go` (task_01) — HookDecl type to use +- `internal/hooks/normalize.go` (task_02) — Normalization functions for validation + +### Dependent Files +- `internal/hooks/hooks.go` (task_06) — Registry Rebuild consumes these declarations +- `internal/daemon/boot.go` — Wiring (task_09) connects config declarations to registry + +### Related ADRs +- [ADR-004: Support Four Declaration Sources](../adrs/adr-004.md) — Config and agent-definition as sources +- [ADR-011: Simplify Ordering](../adrs/adr-011.md) — Default priority 500 for config, 100 for agent-def + +## Deliverables +- Extended config schema with hook declarations +- Config hook parsing and validation +- Agent-definition hook parsing +- Export function for registry consumption +- Unit tests with 80%+ coverage **(REQUIRED)** + +## Tests +- Unit tests: + - [x] Parse valid TOML hook declaration with all fields + - [x] Parse TOML hook with minimal fields (name + event + command) — defaults applied + - [x] Invalid event in config hook fails validation with descriptive error + - [x] Config hook `required: true` with `mode: async` fails validation + - [x] Config hooks from multiple precedence levels merge correctly + - [x] Agent-definition hook parsed from YAML with agent name scope + - [x] Agent-definition hook scoped to agent type — matcher includes agent name + - [x] `HookDeclarations()` returns combined config + agent-def declarations + - [x] Empty config hooks section returns empty list (not nil) +- Test coverage target: >=80% +- All tests must pass + +## Success Criteria +- All tests passing +- Test coverage >=80% +- Config hooks integrate with existing config precedence +- Agent-definition hooks are properly scoped to agent type diff --git a/.compozy/tasks/hooks/task_09.md b/.compozy/tasks/hooks/task_09.md new file mode 100644 index 000000000..d8a408d13 --- /dev/null +++ b/.compozy/tasks/hooks/task_09.md @@ -0,0 +1,99 @@ +--- +status: completed +title: Wire Hooks in daemon — replace notifierFanout +type: refactor +complexity: critical +dependencies: + - task_06 + - task_07 + - task_08 +--- + +# Task 9: Wire Hooks in daemon — replace notifierFanout + +## Overview + +Hard cut-over in the daemon composition root: delete `notifierFanout` and `skillsHookDispatcher`, wire the new `Hooks` struct as the `session.Notifier`, connect reload triggers from the skills watcher, and update the shutdown sequence to `stop sessions → Hooks.Close() → close servers → close DB`. This is the most critical integration point — it connects the entire hooks platform to the running daemon. + + +- ALWAYS READ the PRD and TechSpec before starting +- REFERENCE TECHSPEC for implementation details — do not duplicate here +- FOCUS ON "WHAT" — describe what needs to be accomplished, not how +- MINIMIZE CODE — show code only to illustrate current structure or problem areas +- TESTS REQUIRED — every task MUST include tests in deliverables + + + +- MUST delete `notifierFanout` struct and all its methods from `daemon/notifier.go` +- MUST delete `skillsHookDispatcher` struct and all its methods from `daemon/notifier.go` +- MUST delete `sessionHookPhase` interface from `daemon/notifier.go` +- MUST create `Hooks` in `boot.go` using `hooks.NewHooks()` with functional options +- MUST register Go-native hooks, config declarations, and skill declarations into `Hooks` +- MUST pass `Hooks` as `session.Notifier` to session manager +- MUST wire `observe.Observer` as a notifier within `Hooks` (or compose separately — preserve existing observability) +- MUST connect skills watcher reload to `Hooks.Rebuild()` +- MUST connect dream handler callback (postSessionStopped) to `Hooks` via native hook or direct callback +- MUST update shutdown sequence: stop sessions → `Hooks.Close()` → shutdown HTTP → shutdown UDS → close DB → release lock +- MUST remove `hookRunner` creation from boot.go + + +## Subtasks +- [x] 9.1 Delete `notifierFanout`, `skillsHookDispatcher`, `sessionHookPhase` from `daemon/notifier.go` +- [x] 9.2 Create and configure `Hooks` in `boot.go` with all declaration providers +- [x] 9.3 Wire `Hooks` as `session.Notifier` in session manager creation +- [x] 9.4 Wire skills watcher to trigger `Hooks.Rebuild()` on change +- [x] 9.5 Migrate dream handler and observer callbacks to work with `Hooks` +- [x] 9.6 Update shutdown sequence in `daemon.go` +- [x] 9.7 Write integration tests for daemon wiring and hot reload + +## Implementation Details + +Modify existing files: +- `internal/daemon/notifier.go` — Delete almost entire file, keep only minimal content or delete file entirely +- `internal/daemon/boot.go` — Replace notifier composition (lines 197-273) with Hooks creation and wiring +- `internal/daemon/daemon.go` — Update shutdown sequence (lines 369-442) to include Hooks.Close() + +Reference TechSpec "Migration from Current Hooks Implementation" and "Async Worker Pool" (shutdown ordering) sections. + +### Relevant Files +- `internal/daemon/notifier.go:21-148` — notifierFanout and skillsHookDispatcher to delete +- `internal/daemon/boot.go:122` — hookRunner creation to delete +- `internal/daemon/boot.go:197-273` — Notifier composition to rewrite +- `internal/daemon/daemon.go:369-442` — Shutdown sequence to update +- `internal/hooks/hooks.go` (task_06) — Hooks struct to instantiate +- `internal/session/interfaces.go:150-155` — Notifier interface that Hooks implements +- `internal/observe/observer.go` — Observer that was previously wired into notifierFanout + +### Dependent Files +- `internal/session/` — Receives new Notifier (no code change needed — same interface) +- `internal/observe/` — May need adjustment if observer was composed in notifierFanout + +### Related ADRs +- [ADR-013: Hot-Reloadable Registry with RWMutex Snapshot Swap](../adrs/adr-013.md) — Dispatcher replaces notifierFanout + +## Deliverables +- Deleted/gutted `daemon/notifier.go` +- Rewritten notifier composition in `boot.go` +- Updated shutdown sequence in `daemon.go` +- Integration tests with 80%+ coverage **(REQUIRED)** + +## Tests +- Unit tests: + - [x] `Hooks` is created with valid options and starts pool + - [x] `Hooks` is wired as session.Notifier — compile-time check + - [x] Shutdown sequence calls `Hooks.Close()` after session stop +- Integration tests: + - [x] Daemon boot creates `Hooks`, registers declarations, builds initial registry + - [x] Skills watcher file change triggers `Hooks.Rebuild()` — new hooks visible in next dispatch + - [x] Session create fires `session.post_create` hooks via `Hooks.OnSessionCreated` + - [x] Session stop fires `session.post_stop` hooks via `Hooks.OnSessionStopped` + - [x] Graceful shutdown drains async hooks before closing database +- Test coverage target: >=80% +- All tests must pass + +## Success Criteria +- All tests passing +- Test coverage >=80% +- `make verify` passes (fmt, lint, test, build) +- Zero references to `notifierFanout` or `skillsHookDispatcher` in codebase +- Daemon boots, runs sessions, and shuts down cleanly with new Hooks diff --git a/.compozy/tasks/hooks/task_10.md b/.compozy/tasks/hooks/task_10.md new file mode 100644 index 000000000..049af4fd3 --- /dev/null +++ b/.compozy/tasks/hooks/task_10.md @@ -0,0 +1,96 @@ +--- +status: completed +title: Integrate session, input, prompt, event, and agent dispatch +type: backend +complexity: high +dependencies: + - task_06 + - task_09 +--- + +# Task 10: Integrate session, input, prompt, event, and agent dispatch + +## Overview + +Wire typed dispatch calls into the session manager at all `session.*`, `input.*`, `prompt.*`, `event.*`, and `agent.*` lifecycle points. This transforms the session manager from notifying after-the-fact to dispatching sync barriers before operations (pre_create, pre_resume, pre_stop) and async observations after them. + + +- ALWAYS READ the PRD and TechSpec before starting +- REFERENCE TECHSPEC for implementation details — do not duplicate here +- FOCUS ON "WHAT" — describe what needs to be accomplished, not how +- MINIMIZE CODE — show code only to illustrate current structure or problem areas +- TESTS REQUIRED — every task MUST include tests in deliverables + + + +- MUST add `session.pre_create` sync dispatch before session creation — can block or patch +- MUST add `session.post_create` async dispatch after session activation +- MUST add `session.pre_resume` sync dispatch before session resume +- MUST add `session.post_resume` async dispatch after resume +- MUST add `session.pre_stop` sync dispatch before session stop +- MUST add `session.post_stop` async dispatch after session stopped +- MUST add `input.pre_submit` sync dispatch before user input is processed — can patch message +- MUST add `prompt.post_assemble` sync dispatch after prompt assembly — can patch assembled prompt +- MUST add `event.pre_record` and `event.post_record` async dispatch around event recording +- MUST add `agent.pre_start`, `agent.spawned`, `agent.crashed`, `agent.stopped` dispatch at agent lifecycle points +- MUST handle pre_create dispatch failure by aborting session creation with error +- MUST preserve existing session manager error handling and cleanup patterns + + +## Subtasks +- [x] 10.1 Add session lifecycle dispatch (pre/post create, resume, stop) in manager_lifecycle.go +- [x] 10.2 Add input.pre_submit dispatch in the input/prompt processing path +- [x] 10.3 Add prompt.post_assemble dispatch in the prompt assembly path +- [x] 10.4 Add event dispatch (pre/post record) in the event recording path +- [x] 10.5 Add agent lifecycle dispatch (pre_start, spawned, crashed, stopped) +- [x] 10.6 Write integration tests for session lifecycle hooks and permission escalation e2e + +## Implementation Details + +Modify existing files: +- `internal/session/manager_lifecycle.go` — Add dispatch calls around Create, Resume, Stop +- `internal/session/manager_helpers.go` — Add dispatch at activation and watch points +- `internal/session/manager_prompt.go` — Add dispatch around prompt assembly +- Event recording path — Add dispatch around event persistence + +The session manager needs access to typed dispatch functions. Either inject `*hooks.Hooks` directly or define a narrow interface consumed by session that `Hooks` implements. + +### Relevant Files +- `internal/session/manager_lifecycle.go` — Create (line 82), Stop (lines 317-402) +- `internal/session/manager_helpers.go` — activateAndWatch, notifier.OnSessionCreated call +- `internal/session/manager_prompt.go` — Prompt assembly path +- `internal/session/interfaces.go` — May need extended interface or Hooks injection +- `internal/hooks/hooks.go` (task_06) — Typed dispatch functions to call + +### Dependent Files +- `internal/session/` — Multiple files modified with dispatch calls + +### Related ADRs +- [ADR-006: Sequential Pipeline for Sync Hook Patch Composition](../adrs/adr-006.md) — Pre-create can block creation + +## Deliverables +- Modified session manager files with dispatch calls at all lifecycle points +- Integration tests with 80%+ coverage **(REQUIRED)** + +## Tests +- Unit tests: + - [x] `session.pre_create` hook that denies — session creation returns error + - [x] `session.pre_create` hook that patches — patched payload used for creation + - [x] `session.post_create` async hook fires after session is active + - [x] `input.pre_submit` hook patches message — patched message is processed + - [x] `prompt.post_assemble` hook patches prompt — patched prompt is used + - [x] `agent.crashed` hook fires when agent process crashes + - [x] `event.pre_record` and `event.post_record` fire around event persistence (async-only) +- Integration tests: + - [x] Full session lifecycle: create → input → prompt → agent events → stop — all hooks fire in order + - [x] Permission escalation e2e: permission.request hook attempts deny→allow — blocked by invariant + - [x] Pre-stop hook with required flag — hook failure prevents clean stop, error propagated +- Test coverage target: >=80% +- All tests must pass + +## Success Criteria +- All tests passing +- Test coverage >=80% +- Sync barriers can block operations — pre_create deny prevents session creation +- Existing session manager error handling and cleanup are preserved +- No regression in session lifecycle without hooks configured diff --git a/.compozy/tasks/hooks/task_11.md b/.compozy/tasks/hooks/task_11.md new file mode 100644 index 000000000..df355501a --- /dev/null +++ b/.compozy/tasks/hooks/task_11.md @@ -0,0 +1,85 @@ +--- +status: completed +title: Integrate turn, message, and context dispatch +type: backend +complexity: medium +dependencies: + - task_10 +--- + +# Task 11: Integrate turn, message, and context dispatch + +## Overview + +Wire typed dispatch calls for `turn.*`, `message.*`, and `context.*` hook families into the ACP event flow and compaction paths. Turn and message hooks observe the normalized ACP event stream. Context hooks wrap the compaction/context-window management operations. + + +- ALWAYS READ the PRD and TechSpec before starting +- REFERENCE TECHSPEC for implementation details — do not duplicate here +- FOCUS ON "WHAT" — describe what needs to be accomplished, not how +- MINIMIZE CODE — show code only to illustrate current structure or problem areas +- TESTS REQUIRED — every task MUST include tests in deliverables + + + +- MUST add `turn.start` and `turn.end` dispatch at turn boundaries in the ACP event flow +- MUST add `message.start` sync dispatch at message begin +- MUST add `message.delta` async-only dispatch for streaming tokens +- MUST add `message.end` sync dispatch at message completion +- MUST add `context.pre_compact` sync dispatch before compaction — can patch compaction params +- MUST add `context.post_compact` async dispatch after compaction completes +- MUST ensure `message.delta` dispatch does not block token streaming (async-only enforced by event eligibility) +- MUST ensure `turn.start`/`turn.end` fire at correct ACP event boundaries + + +## Subtasks +- [x] 11.1 Add turn.start and turn.end dispatch in ACP event processing +- [x] 11.2 Add message.start, message.delta, and message.end dispatch in message flow +- [x] 11.3 Add context.pre_compact and context.post_compact dispatch around compaction +- [x] 11.4 Verify message.delta is async-only and does not block streaming +- [x] 11.5 Write unit tests for turn, message, and context hooks + +## Implementation Details + +Modify existing files in: +- ACP event processing path — Where agent events are normalized and dispatched +- Compaction/context management path — Where context window compaction happens +- Session event flow — Where turn and message boundaries are identified + +### Relevant Files +- `internal/session/` — ACP event processing and turn management +- `internal/acp/` — Agent event types and turn boundaries +- `internal/hooks/hooks.go` (task_06) — Typed dispatch functions +- `internal/hooks/events.go` (task_01) — message.delta is async-only + +### Dependent Files +- ACP event flow files — Modified with dispatch calls + +### Related ADRs +- [ADR-012: Classify Events into Sync-Eligible and Async-Only](../adrs/adr-012.md) — message.delta is async-only + +## Deliverables +- Modified ACP event flow with turn and message dispatch +- Modified compaction path with context dispatch +- Unit tests with 80%+ coverage **(REQUIRED)** + +## Tests +- Unit tests: + - [x] `turn.start` fires when a new turn begins in ACP event flow + - [x] `turn.end` fires when a turn completes + - [x] `message.start` sync hook fires at message begin + - [x] `message.delta` dispatches asynchronously — does not block token delivery + - [x] `message.end` sync hook fires at message completion + - [x] `context.pre_compact` hook patches compaction params — patched params used + - [x] `context.post_compact` fires after compaction completes +- Integration tests: + - [x] Full message flow: start → deltas → end — hooks fire at correct points + - [x] Compaction with pre_compact hook — verifies patched params are used +- Test coverage target: >=80% +- All tests must pass + +## Success Criteria +- All tests passing +- Test coverage >=80% +- Token streaming is not blocked by message.delta hooks +- Turn boundaries correctly identified in ACP event flow diff --git a/.compozy/tasks/hooks/task_12.md b/.compozy/tasks/hooks/task_12.md new file mode 100644 index 000000000..d6fe1f5dd --- /dev/null +++ b/.compozy/tasks/hooks/task_12.md @@ -0,0 +1,97 @@ +--- +status: completed +title: Hook observability storage and HTTP introspection +type: backend +complexity: medium +dependencies: + - task_09 +--- + +# Task 12: Hook observability storage and HTTP introspection + +## Overview + +Implement `HookRunRecord` persistence in the observability store and three HTTP introspection endpoints: catalog (resolved hooks with ordering), runs (execution history with patch audit), and events (taxonomy with eligibility). This provides the debugging and forensic capabilities the platform needs. + + +- ALWAYS READ the PRD and TechSpec before starting +- REFERENCE TECHSPEC for implementation details — do not duplicate here +- FOCUS ON "WHAT" — describe what needs to be accomplished, not how +- MINIMIZE CODE — show code only to illustrate current structure or problem areas +- TESTS REQUIRED — every task MUST include tests in deliverables + + + +- MUST persist `HookRunRecord` to the observability store with all fields: hook name, event, source, mode, duration, outcome, dispatch depth, `PatchApplied` +- MUST populate `PatchApplied` for security-relevant families: `permission.*`, `prompt.*`, `tool.*`, `input.*` +- MUST omit `PatchApplied` for other families unless debug mode is enabled +- MUST implement `GET /api/hooks/catalog?workspace=:id&agent=:name` — returns resolved hooks with source attribution and pipeline order +- MUST implement `GET /api/hooks/runs?session=:id&event=:event` — returns execution history including patch diffs +- MUST implement `GET /api/hooks/events` — returns taxonomy with sync eligibility and payload/patch schema names +- MUST integrate telemetry emission into the dispatch pipeline (called after each hook execution) +- MUST add metrics: dispatch count, latency, queue depth, drop count, permission escalation blocks, depth violations + + +## Subtasks +- [x] 12.1 Add `HookRunRecord` schema to observability store +- [x] 12.2 Implement telemetry emitter called by pipeline after each hook execution +- [x] 12.3 Implement `GET /api/hooks/catalog` endpoint +- [x] 12.4 Implement `GET /api/hooks/runs` endpoint with patch audit data +- [x] 12.5 Implement `GET /api/hooks/events` endpoint +- [x] 12.6 Add structured log events and metrics per TechSpec monitoring section +- [x] 12.7 Write tests for storage, endpoints, and telemetry + +## Implementation Details + +Create/modify files: +- `internal/observe/` — Add HookRunRecord to store schema, add WriteHookRecord method +- `internal/api/httpapi/` — Add hook introspection endpoint handlers +- `internal/hooks/telemetry.go` — Telemetry emitter integrated into pipeline + +Reference TechSpec "Monitoring and Observability" and "API Endpoints" sections. + +### Relevant Files +- `internal/observe/observer.go` — Observer struct and Registry interface +- `internal/store/sessiondb/` — Per-session event store (for run records) +- `internal/api/httpapi/` — HTTP handler patterns +- `internal/api/contract/` — Shared contract types for API responses +- `internal/hooks/pipeline.go` (task_04) — Pipeline calls telemetry after each hook +- `internal/hooks/types.go` (task_01) — HookRunRecord struct + +### Dependent Files +- `internal/observe/` — New methods for hook telemetry +- `internal/api/httpapi/` — New endpoint handlers +- `internal/store/sessiondb/` — New table/schema for run records + +### Related ADRs +- [ADR-010: Persist Patch Audit Trail for Security-Relevant Families](../adrs/adr-010.md) — Patch audit storage + +## Deliverables +- HookRunRecord persistence in observability store +- Three HTTP introspection endpoints +- Telemetry emitter in dispatch pipeline +- Structured logs and metrics +- Unit and integration tests with 80%+ coverage **(REQUIRED)** + +## Tests +- Unit tests: + - [x] HookRunRecord persisted with all fields for security-relevant family (permission.request) + - [x] HookRunRecord PatchApplied is null for non-security family (session.post_create) in normal mode + - [x] HookRunRecord PatchApplied is populated for non-security family when debug mode enabled + - [x] Telemetry emitter records duration and outcome correctly + - [x] Permission escalation block generates `hook.dispatch.permission_escalation_blocked` log +- Integration tests: + - [x] `GET /api/hooks/catalog` returns hooks sorted by pipeline order with source attribution + - [x] `GET /api/hooks/catalog?workspace=X` filters to workspace-scoped hooks + - [x] `GET /api/hooks/runs?session=X` returns execution history with patch diffs + - [x] `GET /api/hooks/events` returns all 27 events with correct sync eligibility + - [x] Dispatch → store → query cycle: hook fires, record persisted, endpoint returns it +- Test coverage target: >=80% +- All tests must pass + +## Success Criteria +- All tests passing +- Test coverage >=80% +- Patch audit data available for forensic analysis via runs endpoint +- Catalog endpoint shows resolved pipeline order for debugging +- All structured log events from TechSpec monitoring section are emitted diff --git a/.compozy/tasks/session-resilience/_tasks.md b/.compozy/tasks/session-resilience/_tasks.md new file mode 100644 index 000000000..57e3ff7df --- /dev/null +++ b/.compozy/tasks/session-resilience/_tasks.md @@ -0,0 +1,10 @@ +# Session Resilience — Task List + +## Tasks + +| # | Title | Status | Complexity | Dependencies | +|---|-------|--------|------------|--------------| +| 01 | StopReason + StopCause types | pending | medium | — | +| 02 | Stop classification + cause propagation | pending | medium | task_01 | +| 03 | Data layer propagation | pending | medium | task_01 | +| 04 | Resume repair + config + verification | pending | high | task_02, task_03 | diff --git a/.compozy/tasks/session-resilience/_techspec.md b/.compozy/tasks/session-resilience/_techspec.md new file mode 100644 index 000000000..14a48abb0 --- /dev/null +++ b/.compozy/tasks/session-resilience/_techspec.md @@ -0,0 +1,410 @@ +# TechSpec: Session Resilience + +## Executive Summary + +This TechSpec adds two resilience capabilities to AGH's session lifecycle: a canonical stop reason taxonomy and infrastructure-level repair on resume. Together they close the gaps identified in the extensibility analysis (P3, P4) where AGH has no classification of why sessions stopped and no validation when resuming after a crash. + +The implementation strategy adds a `StopReason` type in `internal/store` (avoiding import cycles), an explicit `StopCause` signal in the session lifecycle (so classification doesn't rely on context inference), and a validation pipeline in `Resume()`. These ship as two standalone PRs with **zero hooks dependency** — hook integration points are prepared as seams but don't block delivery. + +Loop/recursion guards (P5) — including the LoopGuard with SHA-256 hashing, graduated verdicts, and iteration budgets — are **deferred to Phase 2** after the hooks platform is fully wired and real session data shows which loop patterns occur in practice. The hooks techspec already defines `tool.pre_call` as a sync-eligible deny-capable hook point, which is the correct intervention surface for pre-execution guards. Phase 2 will use that hook point, not the post-execution `tool.post_call` observation model originally proposed. + +The primary trade-off is shipping safety incrementally: Phase 1 provides observability and crash recovery without loop protection. This is acceptable because ACP agents (Claude Code, Codex, Gemini CLI) have their own in-loop guards, and AGH's iteration budget can be added as a trivial `tool.pre_call` native hook during hooks task 10 (turn/tool dispatch integration) without a separate techspec. + +## System Architecture + +### Component Overview + +- **`StopReason` type** — Lives in `internal/store` (same package as `SessionMeta`) to avoid import cycles between `session` and `store`. Constants for 10 canonical values. Validated at the store boundary. + +- **`StopCause`** — New type in `internal/session` that explicitly signals WHY a stop was requested. Set by the caller of `Stop()` and by `handleProcessExit()`. Eliminates the need to infer `user_canceled` vs `shutdown` vs `completed` from `ctx.Err()`. + +- **Stop reason classification** — Logic in `finalizeStopped()` that maps `StopCause` + `waitErr` + process exit status → `StopReason`. Deterministic, testable, no context inference. + +- **Resume repair pipeline** — Validation functions in `Resume()` that check infrastructure state before starting the ACP agent. Classifies previous crashes from meta state. Hook dispatch points are prepared as no-op seams until the hooks platform wires them. + +### Data Flow + +``` +Stop request (with StopCause) + ↓ +finalizeStopped(ctx, session, waitErr) + ↓ +classifyStopReason(cause, waitErr, processExitStatus) + ↓ +StopReason persisted to: + → SessionMeta (JSON on disk) + → Session.Info() (in-memory) + → Global DB sessions table + → session_stopped event +``` + +``` +Resume(ctx, id) + ↓ +ReadSessionMeta() + ↓ +classifyPreviousStop(meta) → set StopReason if crashed + ↓ +validateInfrastructure(meta) → workspace, agent def, event store, meta fields + ↓ +[hook seam: session.pre_resume — no-op until hooks wired] + ↓ +resolveResumeWorkspace() → driver.Start() → activateAndWatch() + ↓ +[hook seam: session.post_resume — no-op until hooks wired] +``` + +## Implementation Design + +### Core Interfaces + +```go +// internal/store/types.go — StopReason lives here to avoid import cycles +type StopReason string + +const ( + StopCompleted StopReason = "completed" + StopUserCanceled StopReason = "user_canceled" + StopMaxIterations StopReason = "max_iterations" + StopLoopDetected StopReason = "loop_detected" + StopTimeout StopReason = "timeout" + StopBudgetExceeded StopReason = "budget_exceeded" + StopError StopReason = "error" + StopAgentCrashed StopReason = "agent_crashed" + StopHookStopped StopReason = "hook_stopped" + StopShutdown StopReason = "shutdown" +) + +func ValidStopReason(r StopReason) bool +``` + +```go +// internal/session/stop_cause.go — explicit stop request signal +type StopCause int + +const ( + CauseNone StopCause = iota // not yet stopped + CauseCompleted // agent finished naturally + CauseUserRequested // user called Stop() + CauseShutdown // daemon shutting down + CauseHookDenied // hook denied continuation + CauseProcessExited // subprocess exited unexpectedly +) +``` + +### Data Models + +**SessionMeta** (extended — `internal/store/types.go`): + +```go +type SessionMeta struct { + // ... existing fields ... + StopReason *StopReason `json:"stop_reason,omitempty"` + StopDetail string `json:"stop_detail,omitempty"` +} +``` + +`Validate()` on `SessionMeta` checks `StopReason` membership via `ValidStopReason()` when non-nil. + +**SessionInfo** (extended — `internal/session/session.go`): + +```go +type SessionInfo struct { + // ... existing fields ... + StopReason store.StopReason + StopDetail string +} +``` + +**Session** (extended — `internal/session/session.go`): + +```go +type Session struct { + // ... existing fields ... + stopCause StopCause // set by prepareStop() or handleProcessExit() + stopReason store.StopReason + stopDetail string +} +``` + +**Global DB sessions table** (extended — `internal/store/globaldb/`): + +```sql +ALTER TABLE sessions ADD COLUMN stop_reason TEXT; +ALTER TABLE sessions ADD COLUMN stop_detail TEXT; +``` + +**SessionStateUpdate** (extended): + +```go +type SessionStateUpdate struct { + // ... existing fields ... + StopReason *string + StopDetail string +} +``` + +### Stop Cause Propagation + +The `StopCause` is set explicitly at each stop initiation point: + +| Call Site | StopCause | How | +|-----------|-----------|-----| +| `Manager.Stop(ctx, id)` | `CauseUserRequested` | New parameter or method variant | +| `daemon.Shutdown()` → `sessions.Stop()` | `CauseShutdown` | Set on session before calling Stop | +| `handleProcessExit()` — clean exit | `CauseCompleted` | `waitErr == nil` and `stopWasRequested()` is false | +| `handleProcessExit()` — unexpected exit | `CauseProcessExited` | `waitErr != nil` | +| Future: hook denied continuation | `CauseHookDenied` | Hook pipeline sets cause before stopping | +| Future: iteration budget exhausted | `CauseUserRequested` + detail | Guard calls `Manager.Stop()` with budget reason | + +### Stop Reason Classification Logic + +In `finalizeStopped(ctx, session, waitErr)`: + +```go +func classifyStopReason(cause StopCause, waitErr error, detail string) (store.StopReason, string) { + switch cause { + case CauseShutdown: + return store.StopShutdown, "daemon shutdown" + case CauseHookDenied: + return store.StopHookStopped, detail + case CauseUserRequested: + // Check if detail indicates a guard trigger + if strings.Contains(detail, "max_iterations") { + return store.StopMaxIterations, detail + } + if strings.Contains(detail, "loop_detected") { + return store.StopLoopDetected, detail + } + if strings.Contains(detail, "budget_exceeded") { + return store.StopBudgetExceeded, detail + } + return store.StopUserCanceled, detail + case CauseProcessExited: + if waitErr != nil { + return store.StopAgentCrashed, waitErr.Error() + } + return store.StopError, "process exited unexpectedly" + case CauseCompleted: + return store.StopCompleted, "" + default: + if waitErr != nil { + return store.StopError, waitErr.Error() + } + return store.StopCompleted, "" + } +} +``` + +No context inference. No checking `ctx.Err()` to guess intent. The cause is explicit. + +### Resume Repair Pipeline + +Inserted into `Resume()` after `ReadSessionMeta()` and before `resolveResumeWorkspace()`: + +``` +1. ReadSessionMeta(metaPath) + +2. classifyPreviousStop(meta): + - meta.State == "active" → StopReason = "agent_crashed", detail = "daemon crashed while session active" + - meta.State == "stopping" → StopReason = "agent_crashed", detail = "stop did not complete" + - meta.State == "starting" → StopReason = "error", detail = "start did not complete" + - meta.State == "stopped" → use existing StopReason from meta (already classified) + +3. validateInfrastructure(meta) → returns []error (independent checks): + a. os.Stat(workspace.RootDir) — exists and accessible? + b. config.ResolveAgent(meta.AgentName) — agent definition still present? + c. os.Stat(sessionDBPath) — event store file exists and size > 0? + d. meta.ID, meta.AgentName, meta.WorkspaceID all non-empty? + +4. IF meta was classified as crashed in step 2: + - Update meta.StopReason and meta.StopDetail + - WriteMeta to persist the classification + - Log structured event: session.resume.crash_classified + +5. [HOOK SEAM] — prepared for session.pre_resume dispatch + When hooks platform wires this event, payload includes: + session_id, agent_name, workspace_id, previous_stop_reason, previous_stop_detail + +6-9. [existing] resolveResumeWorkspace() → ResolveAgent() → driver.Start() → activateAndWatch() + +10. [HOOK SEAM] — prepared for session.post_resume dispatch + Payload includes: session_id, agent_name, resume_from_crash (bool) +``` + +### API Endpoints + +Existing endpoints extended, no new endpoints: + +- `GET /api/sessions` — response includes `stop_reason` and `stop_detail` on stopped sessions +- `GET /api/sessions/:id` — response includes `stop_reason` and `stop_detail` +- Session SSE events include `stop_reason` in the `session_stopped` event payload + +## Integration Points + +- **Store** (`internal/store`) — `StopReason` type defined here. `SessionMeta` extended with new fields. +- **Global DB** (`internal/store/globaldb`) — New columns, updated queries for `RegisterSession()`, `UpdateSessionState()`, `ReconcileSessions()`, scan helpers. +- **Observer** (`internal/observe`) — `OnSessionStopped` passes StopReason to global DB update. +- **API contract** (`internal/api/contract`) — `SessionResponse` includes stop_reason/stop_detail. Conversion in `internal/api/core/conversions.go`. +- **Session query** (`internal/session/query.go`) — `sessionInfoFromMeta()` maps StopReason from meta. +- **Config** (`internal/config`) — New `[session.limits]` section with `timeout` field. `LoopGuardConfig` deferred to Phase 2. +- **Daemon** (`internal/daemon`) — `Shutdown()` sets `CauseShutdown` on sessions before stopping them. `boot.go` wiring unchanged in Phase 1. +- **Hooks** (`internal/hooks`) — Seams prepared for `session.pre_resume` and `session.post_resume`. Payloads extended with `PreviousStopReason` and `ResumeFromCrash` fields when hooks land. + +## Impact Analysis + +| Component | Impact Type | Description and Risk | Required Action | +|-----------|-------------|---------------------|-----------------| +| `internal/store/types.go` | modified | Add `StopReason` type, constants, validation. Add fields to `SessionMeta`. Low risk. | Define type, update Validate() | +| `internal/session/session.go` | modified | Add `StopCause`, `stopReason`, `stopDetail` fields. Update `Info()`, `Meta()`. Low risk. | Add fields, update snapshot methods | +| `internal/session/stop_cause.go` | new | `StopCause` enum. Low risk. | Simple type + constants | +| `internal/session/manager_lifecycle.go` | modified | `classifyStopReason()` in `finalizeStopped()`. Repair pipeline in `Resume()`. `Stop()` accepts/propagates cause. Medium risk — core lifecycle. | Classification logic, validation pipeline, cause propagation | +| `internal/store/globaldb/` | modified | Add columns, update `RegisterSession`, `UpdateSessionState`, `ReconcileSessions`, scan helpers, migration SQL. Low risk. | Schema + query updates | +| `internal/session/query.go` | modified | `sessionInfoFromMeta()` maps StopReason. Low risk. | Add field mapping | +| `internal/api/contract/contract.go` | modified | Add stop_reason/stop_detail to session response types. Low risk. | Add fields | +| `internal/api/core/conversions.go` | modified | Include StopReason in session info conversion. Low risk. | Update conversion | +| `internal/observe/observer.go` | modified | Pass StopReason in `OnSessionStopped` → global DB. Low risk. | Pass through fields | +| `internal/config/config.go` | modified | Add `SessionLimitsConfig` with `timeout`. Low risk. | Add struct, defaults, TOML parsing | +| `internal/config/merge.go` | modified | Merge session limits config. Low risk. | Add merge logic | +| `internal/daemon/daemon.go` | modified | Set `CauseShutdown` on sessions during shutdown. Low risk. | Propagate cause | + +## Testing Approach + +### Unit Tests + +**StopReason validation** (`internal/store/types_test.go`): +- All 10 constants pass `ValidStopReason()` +- Empty string and arbitrary strings fail +- SessionMeta.Validate() rejects invalid StopReason values + +**Stop classification** (`manager_lifecycle_test.go`): +- Table-driven: (StopCause, waitErr, detail) → expected (StopReason, StopDetail) +- `CauseShutdown` → always `StopShutdown` regardless of waitErr +- `CauseUserRequested` → `StopUserCanceled` +- `CauseUserRequested` + detail "max_iterations" → `StopMaxIterations` +- `CauseProcessExited` + waitErr → `StopAgentCrashed` +- `CauseProcessExited` + nil waitErr → `StopError` +- `CauseCompleted` → `StopCompleted` +- `CauseHookDenied` → `StopHookStopped` +- Precedence: shutdown wins over all other signals + +**Resume repair** (`manager_lifecycle_test.go`): +- Crash classification: meta.State="active" → crashed, "stopping" → crashed, "starting" → error, "stopped" → preserved +- Missing workspace dir → descriptive error with path +- Missing agent definition → descriptive error with agent name +- Missing/zero-size event store → descriptive error +- Invalid meta fields → descriptive error per field +- Multiple failures: all checks run independently, all errors collected +- Crashed session: StopReason written to meta.json on disk + +**StopCause propagation** (`manager_lifecycle_test.go`): +- `Stop()` sets `CauseUserRequested` +- Daemon shutdown sets `CauseShutdown` +- Process clean exit sets `CauseCompleted` +- Process unexpected exit sets `CauseProcessExited` + +### Integration Tests + +**Stop reason end-to-end** (`manager_integration_test.go`): +- Create → Stop explicitly → verify `StopUserCanceled` in meta JSON, global DB, API response +- Create → kill subprocess → verify `StopAgentCrashed` in meta, global DB, API +- Create → daemon shutdown → verify `StopShutdown` + +**Resume after crash** (`manager_integration_test.go`): +- Create → write meta with State="active" (simulate crash) → Resume → verify crash classified, StopReason set in meta +- Create → delete workspace dir → Resume → verify descriptive error returned +- Create → remove agent from config → Resume → verify descriptive error +- Create → truncate event store to 0 bytes → Resume → verify descriptive error +- Create → crash → Resume → verify session activates successfully after classification + +**API propagation** (`httpapi_integration_test.go`): +- Stop session → GET /api/sessions/:id → verify stop_reason and stop_detail in JSON response +- List sessions → verify stopped sessions include stop_reason + +## Development Sequencing + +### Build Order + +1. **StopReason type in `internal/store`** — Define type, constants, `ValidStopReason()`. Add `StopReason` and `StopDetail` fields to `SessionMeta`. Update `Validate()`. — no dependencies + +2. **StopCause type in `internal/session`** — Define `StopCause` enum. Add `stopCause`, `stopReason`, `stopDetail` fields to `Session`. Update `Info()`, `Meta()`. — depends on step 1 + +3. **Global DB schema** — Add columns to sessions table. Update `RegisterSession()`, `UpdateSessionState()`, `ReconcileSessions()`, scan helpers. Migration SQL. — depends on step 1 + +4. **Stop reason classification** — Implement `classifyStopReason()`. Wire into `finalizeStopped()`. Propagate `StopCause` through `Stop()`, `handleProcessExit()`, `daemon.Shutdown()`. — depends on steps 1, 2, 3 + +5. **Resume repair pipeline** — Implement `classifyPreviousStop()`, `validateInfrastructure()`. Insert into `Resume()`. Prepare hook seams (no-op functions). — depends on steps 1, 4 + +6. **API and query propagation** — Update `sessionInfoFromMeta()` in `query.go`. Update contract types. Update conversions. Update Observer. — depends on steps 3, 4 + +7. **Config extension** — Add `SessionLimitsConfig` with `timeout` field. TOML parsing, merge logic, defaults. — no dependencies (can parallel with steps 1-6) + +8. **Full verification** — Integration tests, `make verify` — depends on all previous steps + +### Technical Dependencies + +- **No hooks dependency** — Phase 1 ships without the hooks platform. Hook seams are prepared as no-op function calls. +- **Existing session lifecycle** — `finalizeStopped()`, `Resume()`, `Stop()`, `Session`, `SessionMeta` +- **Existing global DB** — sessions table, registration/update/reconcile functions +- **Existing config** — TOML parsing, merge infrastructure + +## Phase 2: Loop/Recursion Guards (Deferred) + +Phase 2 is explicitly deferred until: +1. The hooks platform is fully wired (tasks 8-12 of hooks techspec) +2. Real session telemetry shows loop patterns and frequency +3. StopReason data from Phase 1 informs which guard types are needed + +### Phase 2 Design Direction (from council + Codex review) + +- **Hook point**: `tool.pre_call` (sync, deny-capable) — NOT `tool.post_call` (async, observation-only). The hooks platform already supports deny on `tool.pre_call`. This enables pre-execution blocking. +- **Architecture**: Decompose into sensor (evidence accumulation on `tool.post_call` async) + actuator (policy enforcement on `tool.pre_call` sync deny). Two components, not a monolithic LoopGuard. +- **Minimal guard first**: A simple `max_turns` counter as a native hook on `turn.start` (~30 lines) should ship as part of hooks task 10 (turn dispatch integration), before the full LoopGuard. +- **Full LoopGuard later**: SHA-256 hashing, same-args detection, outcome-aware detection, ping-pong patterns — only when production data justifies the complexity. +- **Package placement**: Separate package (`internal/guard` or similar) with interface-based injection, not embedded in `internal/session`. +- **Config**: Start global-only. Design structs so per-agent overrides can be added as a backward-compatible extension. + +### Phase 2 Stop Reasons (Pre-wired) + +`StopMaxIterations`, `StopLoopDetected`, and `StopBudgetExceeded` are defined in Phase 1's enum but not yet produced by any code path. They are reserved for Phase 2 guards to use. The classification logic in `classifyStopReason()` already handles them via the `detail` string on `CauseUserRequested`. + +## Monitoring and Observability + +- **Metrics** + - `session.stop_reason` counter by reason — distribution of how sessions end + - `session.resume.crash_recovered` counter — sessions resumed after crash + - `session.resume.validation_failed` counter by check type + +- **Structured logs** + - `session.stopped` — includes `stop_reason`, `stop_detail`, `session_id`, `agent_name`, `duration` + - `session.resume.crash_classified` — includes previous state, classified reason + - `session.resume.validation_failed` — includes check name, error details + - `session.resume.succeeded` — includes session_id, resume_from_crash + +- **Alerting thresholds** + - High crash rate (>10% of sessions stop with `agent_crashed` in a window) + - Resume validation failure rate (>20% — indicates environment instability) + +## Technical Considerations + +### Key Decisions + +- `StopReason` type lives in `internal/store`, not `internal/session`, to avoid import cycles between packages that both need the type. (ADR-001, updated) +- Stop classification uses an explicit `StopCause` signal, not `ctx.Err()` inference. Each stop initiation point sets the cause explicitly. (Council + Codex feedback) +- Resume repair validates infrastructure only (workspace, agent def, event store). ACP message repair is the agent's responsibility. (ADR-003) +- Loop/recursion guards deferred to Phase 2 pending hooks platform completion and production data. The correct hook point is `tool.pre_call` (sync deny), not `tool.post_call` (async observe). (Council + Codex review) +- All resilience config is global-only. Per-agent overrides deferred. (ADR-004) +- Phase 1 has zero hooks dependency. Hook seams prepared but not wired. + +### Known Risks + +- **Incomplete stop classification at alpha**: Some `StopCause` values (`CauseHookDenied`) won't be produced until hooks are wired. These paths will default to `StopError` until then. This is acceptable and self-correcting. +- **Schema migration**: Adding columns to global DB. Greenfield alpha — delete and recreate DB if needed. +- **No loop protection in Phase 1**: ACP agents provide their own guards. The gap is agents without built-in guards (early third-party ACP agents). Mitigation: Phase 2 minimal guard ships with hooks task 10. + +## Architecture Decision Records + +- [ADR-001: Canonical StopReason Enum on SessionMeta](adrs/adr-001.md) — Single ~10-value enum persisted on SessionMeta, classified in finalizeStopped(). Type lives in `internal/store` to avoid import cycles. +- [ADR-003: Infrastructure-Level Repair on Resume](adrs/adr-003.md) — Validation pipeline in Resume() checking workspace, agent def, event store, and meta consistency +- [ADR-004: Global-Only Configuration for Resilience Limits](adrs/adr-004.md) — All limits in agh.toml, no per-agent or per-session overrides +- [ADR-005: Defer Loop Guards to Phase 2](adrs/adr-005.md) — Full LoopGuard deferred until hooks platform complete and production data available. Minimal guard ships with hooks task 10 on `tool.pre_call` sync. diff --git a/.compozy/tasks/session-resilience/adrs/adr-001.md b/.compozy/tasks/session-resilience/adrs/adr-001.md new file mode 100644 index 000000000..064af1bf3 --- /dev/null +++ b/.compozy/tasks/session-resilience/adrs/adr-001.md @@ -0,0 +1,64 @@ +# ADR-001: Canonical StopReason Enum on SessionMeta + +## Status + +Accepted + +## Date + +2026-04-09 + +## Context + +AGH sessions have 4 states (Starting, Active, Stopping, Stopped) but no classification of WHY a session stopped. `finalizeStopped()` records an error event with the raw `waitErr` message but doesn't classify the cause. This makes observability, debugging, billing, and session repair impossible to automate. + +Research across 6 agent harnesses shows that every system needs stop reason classification, but approaches diverge: Claude Code uses a two-layer system (10 internal + 5 SDK-facing), OpenClaw uses a two-layer protocol/internal split, GoClaw distributes reasons across 8 implicit exit paths, Hermes uses free-form strings, OpenFang distributes across ~17 error types, and Pi-Mono uses a 5-value enum. + +## Decision + +Add a single canonical `StopReason` string enum with ~10 values to `SessionMeta` (persisted JSON), `SessionInfo` (in-memory snapshot), and the global DB catalog. A supplementary `StopDetail` string field carries human-readable context (e.g., the actual error message). + +**Type ownership**: `StopReason` lives in `internal/store` (same package as `SessionMeta`) to avoid import cycles. Both `internal/session` and `internal/store/globaldb` need the type — placing it in `internal/session` would create a cycle since `session` already imports `store`. + +**Classification mechanism**: An explicit `StopCause` type in `internal/session` signals WHY a stop was requested (user, shutdown, hook denied, process exited, completed). `finalizeStopped()` maps `StopCause` + `waitErr` → `StopReason` deterministically. No inference from `ctx.Err()`. + +No two-layer split. AGH is an orchestrator, not an LLM caller — it doesn't need retry/failover logic that requires rich internal error categories. + +## Alternatives Considered + +### Alternative 1: Two-Layer (Internal + ACP Surface) + +- **Description**: Rich internal enum (~15 values including rate_limit, auth, billing) plus simple ACP-facing surface (completed, error, cancelled). OpenClaw pattern. +- **Pros**: Better for retry/failover decisions +- **Cons**: AGH doesn't retry LLM calls — agents do. The internal layer would be unused complexity. +- **Why rejected**: AGH observes agent outcomes, it doesn't drive them. One layer is sufficient. + +### Alternative 2: Minimal 5-Value Enum + +- **Description**: completed, error, cancelled, timeout, crashed only. +- **Pros**: Simplest possible +- **Cons**: Can't distinguish max_iterations from loop_detected from budget_exceeded — all critical for different remediation paths. +- **Why rejected**: Loses too much signal for observability and automated recovery. + +## Consequences + +### Positive + +- Single source of truth for why sessions stopped +- Enables automated session repair (crash classification) +- Enables observability dashboards and alerting by stop reason +- Enables billing classification (budget_exceeded vs completed vs error) + +### Negative + +- Classification logic in finalizeStopped() must handle all edge cases +- New field requires migration of global DB schema + +### Risks + +- Classification may be ambiguous (e.g., agent crashed during shutdown — is it `agent_crashed` or `shutdown`?). Mitigation: prefer the most specific reason; `shutdown` only when the daemon initiated the stop. + +## References + +- Cross-harness analysis in `.compozy/tasks/session-resilience/analysis_*.md` +- Current finalizeStopped: `internal/session/manager_lifecycle.go:317-402` diff --git a/.compozy/tasks/session-resilience/adrs/adr-002.md b/.compozy/tasks/session-resilience/adrs/adr-002.md new file mode 100644 index 000000000..8a9b94efd --- /dev/null +++ b/.compozy/tasks/session-resilience/adrs/adr-002.md @@ -0,0 +1,72 @@ +# ADR-002: Event-Level LoopGuard in internal/session + +## Status + +Superseded by ADR-005 + +## Date + +2026-04-09 + +## Context + +AGH has zero loop/recursion protection. A misconfigured agent can loop forever, burning tokens and blocking the daemon. Research shows 5/6 harnesses implement iteration budgets and 3/6 implement SHA-256-based tool call cycle detection. + +AGH is unique because it doesn't run the LLM loop — ACP agents do. AGH observes tool_call and tool_result events via the Notifier/Hooks path. Loop detection must work at the event-observation level, not inside the execution loop. + +## Decision + +Implement a `LoopGuard` struct in `internal/session` that: + +1. Receives tool_call events via a native Go hook on `tool.post_call` +2. Maintains a sliding window of SHA-256 hashed tool calls per session +3. Tracks outcomes (call hash + result hash) for no-progress detection +4. Emits 4 graduated verdicts: Allow, Warn, Block, CircuitBreak +5. On Warn: injects guidance into next prompt via hook payload +6. On Block: individual tool call is flagged (agent decides how to handle) +7. On CircuitBreak: session is stopped with `StopReason = "loop_detected"` + +Additionally, an `IterationBudget` counter tracks total iterations (tool-call rounds) per session, stopping with `StopReason = "max_iterations"` when exceeded. + +Both are configured globally in `agh.toml`, not per-agent. + +## Alternatives Considered + +### Alternative 1: Hook-Based Guard Only + +- **Description**: Implement loop detection purely as a native Go hook on tool.post_call with per-session state in a map. +- **Pros**: No new types, minimal code +- **Cons**: Not independently testable, harder to configure, state management mixed with hook logic +- **Why rejected**: Loop detection is complex enough to warrant its own struct with proper tests. + +### Alternative 2: Standalone internal/guard Package + +- **Description**: New package with LoopGuard, IterationBudget, DepthTracker as independent components. +- **Pros**: Maximum modularity +- **Cons**: Adds a package for something tightly coupled to session lifecycle. LoopGuard needs session context (session ID, agent name) and hooks integration. +- **Why rejected**: Coupling to session lifecycle makes a separate package artificial. + +## Consequences + +### Positive + +- Prevents infinite loops from burning tokens and blocking the daemon +- Graduated response lets agents self-correct before forced termination +- SHA-256 hashing catches subtle loops (same args, same results, ping-pong) +- Iteration budget provides a hard safety net + +### Negative + +- Event-level detection is inherently delayed vs. in-loop detection (agent may execute 1-2 more calls before AGH reacts) +- Hash-based detection adds ~1KB memory per session (30 hashes x 32 bytes) + +### Risks + +- False positives on legitimate repeated tool calls (e.g., polling). Mitigation: configurable thresholds and poll tool exemptions. +- Agent may ignore Warn/Block guidance. Mitigation: CircuitBreak is a hard stop that the agent cannot override. + +## References + +- GoClaw toolloop.go: 3-layer detection (same-args, same-result, read-only streak) +- OpenFang loop_guard.rs: SHA-256 with 4 verdicts, outcome-aware, ping-pong detection +- OpenClaw tool-loop-detection.ts: 4-detector system with configurable thresholds diff --git a/.compozy/tasks/session-resilience/adrs/adr-003.md b/.compozy/tasks/session-resilience/adrs/adr-003.md new file mode 100644 index 000000000..96a1235fb --- /dev/null +++ b/.compozy/tasks/session-resilience/adrs/adr-003.md @@ -0,0 +1,68 @@ +# ADR-003: Infrastructure-Level Repair on Resume + +## Status + +Accepted + +## Date + +2026-04-09 + +## Context + +AGH's `Resume()` re-reads `SessionMeta`, re-resolves workspace/agent, and starts the ACP agent subprocess. It performs no consistency checks. If the daemon crashed, sessions can have inconsistent state: meta says "active" but no process exists, workspace dir may have been deleted, agent definition may have changed. + +Research shows divergent approaches: OpenFang runs a 5-phase repair pipeline before every LLM call, Claude Code runs 5 message-sanitization passes, OpenClaw repairs JSONL files and tool pairing. But AGH delegates message history to ACP agents — it doesn't own the conversation. AGH owns infrastructure: session metadata, workspace config, event store, and process lifecycle. + +## Decision + +Add an infrastructure-level validation pipeline to `Resume()` that checks what AGH owns: + +1. **Crash classification**: If meta.State is "active"/"stopping"/"starting" and no process exists, classify as crashed and set StopReason +2. **Workspace validation**: Verify workspace root dir exists and is accessible +3. **Agent validation**: Verify agent definition is still resolvable from config +4. **Event store integrity**: Verify the session SQLite DB file exists and is non-zero size +5. **Meta consistency**: Validate all required fields in meta.json +6. **Hook dispatch**: Fire `session.pre_resume` hook (sync-eligible) so extensions can add custom checks or bail +7. **Post-resume hook**: Fire `session.post_resume` after successful activation + +Each check fails independently with a clear error. The pipeline does NOT touch ACP message history — that's the agent's responsibility via `session/load`. + +## Alternatives Considered + +### Alternative 1: Infrastructure + Event Store Audit + +- **Description**: Same checks plus deep event store verification: sequence gap detection, last N events parseable, pending tool_call without tool_result detection. +- **Pros**: Catches more corruption +- **Cons**: Reads the event DB on every resume, adding latency. Event store corruption is handled by SQLite WAL. ACP agents handle their own message consistency. +- **Why rejected**: Over-reaches into territory that SQLite and ACP agents already handle. + +### Alternative 2: Minimal (Meta State Only) + +- **Description**: Only check meta.json state consistency. If state says active but no process, mark as crashed. +- **Pros**: Fastest, simplest +- **Cons**: Doesn't catch deleted workspaces, removed agent definitions, or corrupt event stores. User gets an opaque error later instead of a clear diagnostic at resume time. +- **Why rejected**: Resume is a natural checkpoint — worth spending a few milliseconds on validation. + +## Consequences + +### Positive + +- Clear diagnostics when resume fails (not opaque subprocess errors) +- Crash classification enables correct StopReason on previously-crashed sessions +- Hook integration lets extensions add custom repair logic +- Each check is independent — one failing doesn't block others from reporting + +### Negative + +- Adds ~5-10ms to resume path (filesystem stat calls + config resolution) +- Requires coordination with hooks system (assumes hooks platform is implemented) + +### Risks + +- Workspace may exist but be in a bad state (e.g., git repo corrupted). Mitigation: AGH only checks existence and accessibility, not deep state. + +## References + +- Current Resume: `internal/session/manager_lifecycle.go:169-283` +- Hooks techspec: `.compozy/tasks/hooks/_techspec.md` (session.pre_resume, session.post_resume events) diff --git a/.compozy/tasks/session-resilience/adrs/adr-004.md b/.compozy/tasks/session-resilience/adrs/adr-004.md new file mode 100644 index 000000000..b94b73c55 --- /dev/null +++ b/.compozy/tasks/session-resilience/adrs/adr-004.md @@ -0,0 +1,54 @@ +# ADR-004: Global-Only Configuration for Resilience Limits + +## Status + +Accepted + +## Date + +2026-04-09 + +## Context + +Loop guard thresholds, iteration budgets, and timeout values need configuration. Options: global-only (single agh.toml section), per-agent (agent definition overrides), or per-session (callers set limits on CreateSession). + +## Decision + +All resilience configuration lives in `agh.toml` under `[session.limits]` and `[session.loop_guard]`. No per-agent overrides, no per-session overrides. + +Rationale: AGH is in greenfield alpha with a single operator. Per-agent tuning is premature — we don't yet know which agents need different thresholds. Global defaults establish the safety floor. If per-agent tuning proves necessary, it can be added later without breaking the global config. + +## Alternatives Considered + +### Alternative 1: Per-Agent with Global Defaults + +- **Description**: Global defaults in agh.toml, agent definitions can override with tighter/looser values, session-level override for programmatic callers. +- **Pros**: Maximum flexibility for heterogeneous agent workloads +- **Cons**: Three-level precedence is complex. No evidence yet that different agents need different limits. +- **Why rejected**: Premature for alpha. Can be added as a backward-compatible extension later. + +### Alternative 2: Per-Session Only + +- **Description**: No config file defaults. Callers set limits per CreateSession(). +- **Pros**: Maximum flexibility +- **Cons**: No safety net for callers who forget. Every session is unprotected by default. +- **Why rejected**: Safety must be opt-out, not opt-in. + +## Consequences + +### Positive + +- Single place to configure all resilience limits +- No precedence resolution logic needed +- Every session gets the same safety guarantees + +### Negative + +- Can't tune for agents with different behaviors (e.g., research agent needs more iterations than quick-fix agent) +- Must be extended later when multi-agent workloads diverge + +## References + +- GoClaw: per-agent max_iterations with global default of 30 +- Hermes: global max_iterations=90 with subagent override of 50 +- OpenFang: per-agent via AutonomousConfig with default of 50 diff --git a/.compozy/tasks/session-resilience/adrs/adr-005.md b/.compozy/tasks/session-resilience/adrs/adr-005.md new file mode 100644 index 000000000..4ebcbc77b --- /dev/null +++ b/.compozy/tasks/session-resilience/adrs/adr-005.md @@ -0,0 +1,71 @@ +# ADR-005: Defer Loop Guards to Phase 2 + +## Status + +Accepted + +## Date + +2026-04-09 + +## Context + +The original techspec proposed a LoopGuard with SHA-256 hashing, 3 detection strategies (same-args, outcome-aware, ping-pong), 4 graduated verdicts, and a 9-parameter config — all wired as native hooks on `tool.post_call` (async) and `turn.end` (sync). + +A council review (5 advisors: Pragmatic Engineer, Architect, Security Advocate, Product Mind, Devil's Advocate) and a Codex code review independently identified the same critical issues: + +1. **Wrong hook point**: `tool.post_call` is async and observational — it cannot prevent execution. The reference implementations (OpenFang, OpenClaw, GoClaw) all operate pre-execution inside the agent loop. AGH's guard would be a "post-mortem classifier masquerading as a safety control" (Security Advocate). +2. **Over-engineering for alpha**: Zero production data on loop frequency, false-positive rates, or which patterns actually occur. The 3-strategy LoopGuard is more complex than many of the agent guards it backs up. +3. **Hooks dependency**: The hooks platform has 6 tasks remaining. Gating safety features on unfinished infrastructure inverts the priority. +4. **Premature complexity**: Engineering time on the full LoopGuard is time NOT spent finishing hooks (P0) and unblocking extensions (P1) and tool registry (P2). + +## Decision + +Defer the full LoopGuard to Phase 2. Phase 1 ships StopReason + resume repair only, with zero hooks dependency. + +Phase 2 design direction (when hooks are complete + production data exists): +- Use `tool.pre_call` (sync, deny-capable) as the enforcement point — NOT `tool.post_call` +- Decompose into sensor (evidence on `tool.post_call` async) + actuator (policy on `tool.pre_call` sync deny) +- Ship a minimal `max_turns` counter (~30 lines) as a native hook during hooks task 10 (turn dispatch), before the full LoopGuard +- Place in a separate package (`internal/guard`) with interface-based injection +- Only add SHA-256 hashing, ping-pong, outcome-aware detection when data justifies it + +## Alternatives Considered + +### Alternative 1: Ship Full LoopGuard Now + +- **Description**: Build all 3 strategies, SHA-256 hashing, 9-parameter config as originally specified +- **Pros**: Maximum protection from day one +- **Cons**: Wrong hook point (post-execution), over-engineered, blocks on hooks platform, no production data to calibrate thresholds +- **Why rejected**: Council unanimity (5/5) and Codex review both flagged this as premature + +### Alternative 2: Ship Simple Counter as Direct Method Call + +- **Description**: IterationBudget as a direct method call in the session manager, no hooks involvement +- **Pros**: Ships immediately, no hooks dependency +- **Cons**: Creates a second code path that hooks would eventually replace. Pragmatic Engineer advocated this but Architect flagged the dual-path problem. +- **Why rejected**: Better to wire the minimal guard into hooks task 10 where turn dispatch is already being integrated — one code path, not two + +## Consequences + +### Positive + +- Phase 1 is smaller, faster to ship, with zero hooks dependency +- Engineering time redirected to finishing hooks platform (P0) +- Phase 2 will use the correct hook point (`tool.pre_call` sync deny) +- Production data from Phase 1's StopReason telemetry will inform Phase 2 guard design + +### Negative + +- No loop protection until Phase 2 +- ACP agents without built-in guards (early third-party agents) are unprotected + +### Risks + +- Phase 2 may slip indefinitely. Mitigation: assign the minimal `max_turns` counter to hooks task 10, not a separate workstream. + +## References + +- Council review: 5 advisors unanimously agreed on deferral +- Codex review: "no-go on full spec" — recommended splitting into two deliverables +- Hooks techspec: `tool.pre_call` is sync-eligible with deny support (`events.go:91`, `dispatch.go:306`) diff --git a/.compozy/tasks/session-resilience/analysis_claude_code.md b/.compozy/tasks/session-resilience/analysis_claude_code.md new file mode 100644 index 000000000..04ce17574 --- /dev/null +++ b/.compozy/tasks/session-resilience/analysis_claude_code.md @@ -0,0 +1,334 @@ +# Claude Code: Session Resilience Analysis + +Deep analysis of Claude Code's source at `/Users/pedronauck/dev/knowledge/.resources/claude-code/` covering stop reason taxonomy, session repair on resume, and loop/recursion guards. + +--- + +## 1. Stop Reason Taxonomy + +Claude Code uses a two-layer stop reason system: **internal query loop reasons** (why the `query()` generator returned) and **SDK result subtypes** (what the external caller sees). + +### 1.1 Internal Query Loop Terminal Reasons + +The `query()` function in `query.ts` returns a `Terminal` object with a `reason` string. Every exit path from the query loop is explicitly named: + +| Reason | Trigger | Location | +|--------|---------|----------| +| `'completed'` | Model finished naturally (no tool calls, stop hooks pass) | `query.ts:1264, 1357` | +| `'aborted_streaming'` | User cancelled during model streaming (`abortController.signal.aborted`) | `query.ts:1051` | +| `'aborted_tools'` | User cancelled during tool execution | `query.ts:1515` | +| `'max_turns'` | Turn count exceeded `maxTurns` limit | `query.ts:1711` | +| `'hook_stopped'` | A Stop hook set `preventContinuation = true` | `query.ts:1520` | +| `'stop_hook_prevented'` | Stop hook explicitly prevented continuation | `query.ts:1279` | +| `'blocking_limit'` | Token count hit hard limit (auto-compact off, context too large) | `query.ts:646` | +| `'prompt_too_long'` | API returned prompt-too-long error, all recovery failed | `query.ts:1175, 1182` | +| `'image_error'` | Image too large or resize failed | `query.ts:977, 1175` | +| `'model_error'` | Unrecoverable API/model error (catch-all) | `query.ts:996` | + +### 1.2 SDK Result Subtypes (External Interface) + +The `QueryEngine` (`QueryEngine.ts`) translates internal reasons into SDK-facing result messages: + +| Subtype | Meaning | `is_error` | +|---------|---------|------------| +| `'success'` | Turn completed normally | `false` (or `true` if API error occurred) | +| `'error_during_execution'` | Model failed to produce valid response | `true` | +| `'error_max_turns'` | Hit `maxTurns` limit | `true` | +| `'error_max_budget_usd'` | Spend exceeded `maxBudgetUsd` | `true` | +| `'error_max_structured_output_retries'` | Structured output validation failed N times (default 5) | `true` | + +### 1.3 API Error Classification + +The `SDKAssistantMessageErrorSchema` defines the error types surfaced to callers: +- `'authentication_failed'` +- `'billing_error'` +- `'rate_limit'` +- `'invalid_request'` +- `'server_error'` +- `'unknown'` +- `'max_output_tokens'` + +### 1.4 Session End Reasons + +The `SessionEnd` hook (`coreSchemas.ts:747-755`) defines why a session ended: +- `'clear'` -- User cleared the session +- `'resume'` -- Session ended because a different session was resumed +- `'logout'` -- User logged out +- `'prompt_input_exit'` -- User exited at the prompt (Ctrl+D, /exit) +- `'other'` -- Catch-all +- `'bypass_permissions_disabled'` -- Bypass permissions mode was disabled + +### 1.5 API-Level Stop Reasons + +From the Anthropic API, `stop_reason` values include: +- `'end_turn'` -- Normal completion +- `'tool_use'` -- Model wants to use a tool (unreliable per comments in `query.ts:557`) +- `'max_tokens'` -- Output token limit hit (handled via recovery loop) + +The SDK captures `stop_reason` from `message_delta` events during streaming (`QueryEngine.ts:806-808`). + +--- + +## 2. Session Repair on Resume + +### 2.1 Resume Entry Points + +Claude Code supports multiple resume paths: +- `--continue` (most recent session) +- `--resume ` (specific session) +- `--resume ` (specific transcript file) + +All funnel through `loadConversationForResume()` in `utils/conversationRecovery.ts`. + +### 2.2 Transcript Loading and Chain Reconstruction + +Sessions are persisted as JSONL files with `parentUuid`-linked messages forming a DAG. Resume must reconstruct the linear conversation: + +1. **`loadTranscriptFile()`** -- Parses JSONL, builds `byUuid` map, identifies leaf UUIDs +2. **`buildConversationChain()`** -- Walks `parentUuid` back from the newest non-sidechain leaf to build the linear chain +3. **`recoverOrphanedParallelToolResults()`** -- Finds `tool_result` messages whose `parentUuid` points to the wrong message (due to parallel tool execution race conditions) and inserts them at the correct position +4. **`removeExtraFields()`** -- Strips internal fields before deserialization + +### 2.3 Consistency Checks + +**`checkResumeConsistency()`** (`sessionStorage.ts:2224`) -- Finds the latest `turn_duration` checkpoint message in the chain and compares its recorded `messageCount` against the chain's reconstructed position. Emits `tengu_resume_consistency_delta` to BigQuery: +- `delta > 0`: resume loaded MORE messages than were in-session (the common failure mode) +- `delta < 0`: resume loaded FEWER (chain truncation) +- `delta = 0`: round-trip consistent + +### 2.4 Message Deserialization Filters + +`deserializeMessagesWithInterruptDetection()` applies five repair passes in order: + +1. **`migrateLegacyAttachmentTypes()`** -- Transforms `new_file` to `file`, `new_directory` to `directory`, backfills `displayPath` +2. **Strip invalid permission modes** -- Removes `permissionMode` values from deserialized user messages that don't match the current build's valid modes +3. **`filterUnresolvedToolUses()`** -- Removes assistant messages containing `tool_use` blocks that have no matching `tool_result`. This is the primary crash recovery mechanism: if the process crashed between emitting a `tool_use` and receiving the `tool_result`, the orphaned assistant message is dropped +4. **`filterOrphanedThinkingOnlyMessages()`** -- Removes assistant messages that contain ONLY thinking/redacted_thinking blocks without text or tool_use. These arise from streaming yielding separate messages per content block; interleaved user messages prevent merging by `message.id` +5. **`filterWhitespaceOnlyAssistantMessages()`** -- Removes assistant messages with only whitespace text (happens when model outputs `\n\n` before thinking and user cancels mid-stream) + +### 2.5 Turn Interruption Detection + +`detectTurnInterruption()` classifies the conversation state after filtering: + +| Last Message Type | Condition | Classification | +|-------------------|-----------|----------------| +| `assistant` | Any | `'none'` (turn completed -- `stop_reason` is always null on persisted messages in the streaming path) | +| `user` (meta/compact) | `isMeta` or `isCompactSummary` | `'none'` | +| `user` (tool result) | Terminal tool (Brief/SendUserFile) | `'none'` | +| `user` (tool result) | Non-terminal | `'interrupted_turn'` -- tool was executing when crash occurred | +| `user` (plain text) | Not meta | `'interrupted_prompt'` -- user submitted prompt, model never responded | +| `attachment` | Any | `'interrupted_turn'` | + +When an interruption is detected: +- `interrupted_turn`: A synthetic "Continue from where you left off." user message is appended +- `interrupted_prompt`: The original user message is preserved for auto-continuation +- A synthetic assistant sentinel (`NO_RESPONSE_REQUESTED`) is appended after the last user message to make the conversation API-valid + +### 2.6 State Restoration + +Beyond messages, resume restores: +- **Skill state** (`restoreSkillStateFromMessages()`) -- Scans for `invoked_skills` attachments and re-registers skills to survive across compaction cycles +- **Skill listing suppression** -- Prevents re-announcing available skills +- **File history** (`copyFileHistoryForResume()`) -- Copies file state snapshots +- **Plans** (`copyPlanForResume()`) -- Associates plans with the resumed session +- **Agent context** -- Restores `agentName`, `agentColor`, `agentSetting`, `customTitle`, `tag`, `mode` (coordinator/normal) +- **Worktree session** -- Restores worktree path if present +- **PR context** -- Restores `prNumber`, `prUrl`, `prRepository` +- **Coordinator mode** (`matchSessionMode()`) -- Flips `CLAUDE_CODE_COORDINATOR_MODE` env var to match the resumed session's mode +- **Session start hooks** -- Fires `processSessionStartHooks('resume', { sessionId })` + +### 2.7 Transcript Pre-flush + +The `QueryEngine` writes the user's message to the transcript BEFORE entering the query loop (`QueryEngine.ts:440-463`). This ensures: +- If the process is killed before the API responds, the transcript is resumable from the point the user message was accepted +- `--resume` finds the session even if no API response ever arrived + +### 2.8 Compact Boundary Handling + +Before writing a `compact_boundary` message, a flush of all in-memory messages up through the preserved segment tail is forced. Without this, if the subprocess restarts between turns, `tailUuid` points to a never-written message, and `applyPreservedSegmentRelinks` fails. + +--- + +## 3. Loop/Recursion Guards + +### 3.1 Max Turns Limit + +**The primary loop guard.** The `query()` function tracks `turnCount` and checks against `maxTurns` before each recursive iteration (`query.ts:1704-1711`): + +```typescript +if (maxTurns && nextTurnCount > maxTurns) { + yield createAttachmentMessage({ + type: 'max_turns_reached', + maxTurns, + turnCount: nextTurnCount, + }) + return { reason: 'max_turns', turnCount: nextTurnCount } +} +``` + +`maxTurns` can be set via: +- `QueryEngine.config.maxTurns` -- SDK/headless callers +- Agent frontmatter `maxTurns` field -- Per-agent limits +- `AgentDefinitionSchema` validates it as a positive integer + +There is **no hardcoded default** for the main session -- the limit is only enforced when explicitly set. Subagents inherit their definition's `maxTurns` or use the caller-provided value. + +### 3.2 Budget/Cost Guards + +**USD Budget** (`QueryEngine.ts:972-1002`): +```typescript +if (maxBudgetUsd !== undefined && getTotalCost() >= maxBudgetUsd) { + // yields error_max_budget_usd result +} +``` +Checked after every message in the query loop. Yields `error_max_budget_usd`. + +**Token Budget** (`query/tokenBudget.ts`): +- Tracks `continuationCount`, `lastDeltaTokens`, `lastGlobalTurnTokens` +- `COMPLETION_THRESHOLD = 0.9` -- Continues auto-nudging until 90% of budget consumed +- `DIMINISHING_THRESHOLD = 500` -- Early stop if less than 500 new tokens per check for 3+ consecutive checks (diminishing returns detection) +- When continuing, injects a nudge message with percentage and token usage + +### 3.3 Max Output Tokens Recovery + +When the model hits `max_output_tokens`, Claude Code has a multi-stage recovery (`query.ts:1188-1256`): + +1. **Escalation** (first attempt): If using the default 8k cap and no env override, retry at `ESCALATED_MAX_TOKENS` (64k) -- single shot, same request +2. **Multi-turn recovery** (up to `MAX_OUTPUT_TOKENS_RECOVERY_LIMIT = 3`): Injects a meta message "Output token limit hit. Resume directly -- no apology, no recap..." and continues the loop +3. **Exhaustion**: After 3 recovery attempts, surfaces the withheld error + +### 3.4 Prompt-Too-Long Recovery Chain + +When the context exceeds the model's limit, a cascading recovery chain fires: + +1. **Context Collapse Drain** -- Commits all staged collapses (cheap, keeps granular context) +2. **Reactive Compact** -- Full conversation summarization as fallback +3. **Surface Error** -- If both fail, yields the error and exits + +Guard against infinite loops: `hasAttemptedReactiveCompact` flag prevents retry spirals. If reactive compact already ran and prompt is still too long, the error surfaces immediately. + +### 3.5 Stop Hook Loop Prevention + +Stop hooks can return `blockingErrors` that cause the query loop to continue (model retries with the error feedback). Key guards: + +- `stopHookActive` flag tracks whether we're already in a stop-hook-retry loop +- `hasAttemptedReactiveCompact` is **preserved** across stop-hook retries to prevent: `compact -> still too long -> error -> stop hook blocking -> compact -> ...` infinite loop (`query.ts:1295-1298`) +- API error messages bypass stop hooks entirely: "hooks evaluating [an error response] create a death spiral: error -> hook blocking -> retry -> error -> ..." (`query.ts:1259-1264`) + +### 3.6 API Retry Guards + +`withRetry.ts` enforces: +- `DEFAULT_MAX_RETRIES = 10` (configurable via `CLAUDE_CODE_MAX_RETRIES` env var) +- `MAX_529_RETRIES = 3` (server overload errors) +- `BASE_DELAY_MS = 500` with exponential backoff +- Foreground-only 529 retry: Background queries (summaries, titles, suggestions) bail immediately on 529 to avoid "3-10x gateway amplification" during capacity cascades +- Model fallback: On `FallbackTriggeredError`, switches to `fallbackModel` and retries + +### 3.7 Structured Output Retry Guard + +`QueryEngine.ts:1004-1048`: +- `MAX_STRUCTURED_OUTPUT_RETRIES` defaults to 5 (configurable via env) +- Counts `SyntheticOutputTool` calls per query; exits with `error_max_structured_output_retries` when exceeded + +### 3.8 Subagent Depth/Recursion Guards + +- Subagents run `query()` independently but with their own `maxTurns` (from agent definition or caller) +- Async agents get a new unlinked `AbortController` (run independently of parent) +- Sync agents share parent's `AbortController` (parent cancel kills child) +- Agent definitions support a `querySource` tag (e.g., `'agent:builtin:fork'`) used for recursive-fork guards at the `AgentTool.tsx` call site +- `filterIncompleteToolCalls()` in `runAgent.ts` strips tool calls from parent context that lack results, preventing API errors in forked conversations + +### 3.9 Auto-Compact as Implicit Guard + +Auto-compaction fires when context tokens approach the model's limit, summarizing history. This acts as an implicit loop guard by preventing context exhaustion, but it is NOT itself a turn limiter. It works with: +- `snipCompact` -- Removes old messages exceeding a threshold +- `microcompact` -- Per-message budget on tool result size +- `contextCollapse` -- Hierarchical context management (staged collapses) +- Consecutive failure tracking with circuit breaker + +### 3.10 AbortController Chain + +Every long-running operation checks `toolUseContext.abortController.signal`: +- Model streaming loop checks after every chunk +- Tool execution checks before and after each tool +- Stop hooks check after each hook result +- User interrupts (Ctrl+C) set `signal.reason = 'interrupt'` for submit-interrupts (queued message follows) + +--- + +## 4. Key Code References + +| File | Key Content | +|------|-------------| +| `query.ts` | Main query loop with all terminal reasons, recovery chains, and guards | +| `QueryEngine.ts` | SDK-facing orchestrator, budget checks, result subtype translation | +| `utils/conversationRecovery.ts` | `loadConversationForResume()`, `deserializeMessagesWithInterruptDetection()`, `detectTurnInterruption()` | +| `utils/sessionStorage.ts:2224` | `checkResumeConsistency()` -- delta monitoring for write/load drift | +| `utils/messages.ts:2795` | `filterUnresolvedToolUses()` -- primary crash recovery filter | +| `utils/messages.ts:4991` | `filterOrphanedThinkingOnlyMessages()` | +| `query/stopHooks.ts` | Stop hook execution, blocking error handling, loop prevention | +| `query/tokenBudget.ts` | Token budget tracking with diminishing returns detection | +| `services/api/withRetry.ts` | API retry logic with exponential backoff, 529-specific limits | +| `entrypoints/sdk/coreSchemas.ts:747` | `EXIT_REASONS` enum, `HOOK_EVENTS` list | +| `entrypoints/sdk/coreSchemas.ts:1407-1455` | SDK result schemas (success, error subtypes) | +| `tools/AgentTool/runAgent.ts` | Subagent lifecycle, abort controller isolation, `maxTurns` inheritance | +| `coordinator/coordinatorMode.ts` | Coordinator mode matching on resume | +| `bootstrap/state.ts` | Global state including `strictToolResultPairing` flag | + +--- + +## 5. Patterns Worth Adopting + +### 5.1 Multi-Pass Message Sanitization on Resume + +Claude Code's `deserializeMessagesWithInterruptDetection()` runs 5 ordered filters that progressively clean the message history. AGH should adopt this pattern: +- Filter orphaned tool calls (no result) +- Filter orphaned thinking blocks +- Filter whitespace-only messages +- Detect and classify turn interruptions +- Append synthetic continuation messages + +This is more robust than trying to validate everything in a single pass. + +### 5.2 Explicit Terminal Reason Taxonomy + +Every exit path from the query loop returns a `{ reason: string }` object. This makes telemetry, debugging, and downstream behavior trivial. AGH should define a Go enum of terminal reasons rather than relying on error types alone. + +### 5.3 Cascading Recovery with One-Shot Guards + +The pattern of `hasAttemptedReactiveCompact` (boolean latch) preventing infinite recovery loops is elegant. For AGH: +- Each recovery mechanism gets a one-shot flag +- Recovery chains cascade (cheap first, expensive last) +- Once exhausted, error surfaces immediately +- Flags are explicitly preserved across stop-hook retries + +### 5.4 Budget Checks in the Hot Loop + +Cost and turn limits are checked inline after every message yield, not after the query completes. This enables fine-grained control and immediate termination. AGH should check budgets at the same granularity (per-event, not per-turn). + +### 5.5 Transcript-Before-Query Pattern + +Writing the user message to the transcript BEFORE entering the query loop ensures sessions are always resumable, even if the API call never completes. This is a crash recovery best practice AGH must adopt. + +### 5.6 Turn Interruption Classification + +The 3-way classification (`none`, `interrupted_prompt`, `interrupted_turn`) with synthetic continuation messages is clean. AGH should detect: +- Clean completion (assistant message is last) +- Mid-tool interruption (tool result is last, non-terminal) +- Pre-response interruption (user message is last, unanswered) + +### 5.7 Consistency Delta Monitoring + +`checkResumeConsistency()` compares checkpointed message counts against reconstructed chain lengths. This detects silent data corruption (messages lost or duplicated during write/load round-trips). AGH should implement similar checksums or sequence counters in its SQLite event store. + +### 5.8 Hook-Aware Loop Guards + +Stop hooks that return blocking errors cause the model to retry, but API error responses bypass stop hooks entirely. Without this, errors create death spirals: `error -> hook blocking -> retry -> error -> ...`. AGH's hook system must have the same bypass for error states. + +### 5.9 Subagent Abort Controller Isolation + +Async subagents get unlinked abort controllers so they run independently. Sync subagents share the parent's controller so parent cancel kills child. AGH should implement the same dual strategy for its session spawning. + +### 5.10 Diminishing Returns Detection + +The token budget system's `DIMINISHING_THRESHOLD = 500` check (if the model produces fewer than 500 new tokens for 3+ consecutive continuation nudges, stop early) prevents wasted compute. AGH should adopt similar heuristics for its agentic loop termination. diff --git a/.compozy/tasks/session-resilience/analysis_goclaw.md b/.compozy/tasks/session-resilience/analysis_goclaw.md new file mode 100644 index 000000000..5d8d337d6 --- /dev/null +++ b/.compozy/tasks/session-resilience/analysis_goclaw.md @@ -0,0 +1,363 @@ +# GoClaw: Session Resilience Analysis + +## 1. Stop Reason Taxonomy + +GoClaw has an **implicit** stop reason taxonomy -- there is no single `StopReason` enum. Instead, termination reasons are encoded across multiple layers: the LLM provider's `FinishReason`, the agent loop's internal exit paths, and the scheduler/event system's lifecycle events. + +### 1.1 Provider-Level FinishReason + +The `ChatResponse.FinishReason` field (`internal/providers/types.go:77`) carries the raw LLM stop signal: + +| Value | Meaning | +|-------|---------| +| `"stop"` | Model completed naturally (no more tool calls, produced text) | +| `"tool_calls"` | Model wants to call tools (loop continues) | +| `"length"` | Output truncated -- hit `max_tokens` ceiling | + +GoClaw does **not** passthrough the provider FinishReason to the caller. It is consumed internally by the loop to decide next steps. + +### 1.2 Agent Loop Exit Paths + +The `runLoop()` function in `internal/agent/loop.go` has these distinct exit paths: + +| Exit Path | Trigger | Result | +|-----------|---------|--------| +| **Natural completion** | `len(resp.ToolCalls) == 0` -- model produces text without requesting tools | Normal `RunResult` with content | +| **Max iterations** | `rs.iteration >= maxIter` (default 30, configurable per-agent and per-request) | Loop ends; last iteration strips all tools and forces text-only response | +| **Budget exceeded (monthly)** | `spentCents >= l.budgetMonthlyCents` (pre-loop check) | Returns error: `"monthly budget exceeded ($X / $Y)"` | +| **Tool budget exceeded** | `rs.totalToolCalls > l.maxToolCalls` | Injects system message forcing summarization, then one more iteration | +| **LLM call error** | Provider returns error (API failure, rate limit, auth) | Returns error: `"LLM call failed (iteration N): "` | +| **Loop detector kill** | Same-args loop, same-result loop, or read-only streak hits critical threshold | `RunResult.LoopKilled = true`, content set to explanation | +| **Truncation retry limit** | `maxTruncationRetries = 3` consecutive truncated outputs | Loop breaks, returns truncation fallback message | +| **Context cancellation** | `ctx.Done()` fires (user `/stop` command or parent cancellation) | Returns `ctx.Err()` -- typically `context.Canceled` | +| **Panic recovery** | `recover()` in deferred handler catches panics | Returns error: `"agent loop panic: "` | + +### 1.3 Run-Level Event Classification + +The `Run()` method in `internal/agent/loop_run.go` maps loop outcomes to four event types: + +| Event | Condition | Protocol Constant | +|-------|-----------|-------------------| +| `run.completed` | `err == nil` | `AgentEventRunCompleted` | +| `run.failed` | `err != nil && ctx.Err() == nil` | `AgentEventRunFailed` | +| `run.cancelled` | `err != nil && ctx.Err() != nil` (user cancel) | `AgentEventRunCancelled` | +| `run.retrying` | LLM call being retried (transient provider error) | `AgentEventRunRetrying` | + +### 1.4 Trace Status Taxonomy + +Traces (observability layer in `internal/store/tracing_store.go`) use four terminal states: + +```go +TraceStatusRunning = "running" +TraceStatusCompleted = "completed" +TraceStatusError = "error" +TraceStatusCancelled = "cancelled" +``` + +The mapping: `run.completed` -> `completed`, `run.failed` -> `error`, `run.cancelled` -> `cancelled`. + +### 1.5 Team Task Outcome Mapping + +In `cmd/gateway_consumer_post_turn.go`, the `resolveTeamTaskOutcome` function maps run results to task lifecycle states: + +| Condition | Task Action | +|-----------|-------------| +| `outcome.Err != nil` | `FailTask` -- agent errored | +| `flags.Completed \|\| flags.Escalated` | Skip -- tool already handled lifecycle | +| `flags.Reviewed` | Renew lock -- task under review | +| `outcome.Result.LoopKilled` | `FailTask` with reason `"loop_detector_kill"` | +| Default (normal completion) | `CompleteTask` with deliverables | + + +## 2. Session Repair on Resume + +GoClaw's session model is **stateless-resume** -- there is no explicit "resume after crash" mechanism with consistency checks. Instead, it relies on layered persistence and startup recovery. + +### 2.1 Session Storage Architecture + +Sessions are persisted in two ways: + +1. **JSON files on disk** (`internal/sessions/manager.go`): Each session is atomically written via temp-file-then-rename (`tmpFile -> Sync -> Rename`). On startup, `loadAll()` reads all `.json` files from the storage directory and populates the in-memory map. There is no schema validation, version check, or integrity verification -- JSON unmarshal errors are silently skipped. + +2. **SQLite database** (`internal/store/sqlitestore/`): For the gateway, sessions are persisted in SQLite with a full schema including `spawn_depth`, `agent_id`, `user_id`, `metadata`, `team_id`, and `tenant_id`. + +### 2.2 Crash Safety: Periodic Checkpoint + +The loop implements periodic checkpointing to limit data loss on crash (`loop.go:750-762`): + +```go +const checkpointInterval = 5 +if rs.iteration > 0 && rs.iteration%checkpointInterval == 0 && len(rs.pendingMsgs) > 0 { + for _, msg := range rs.pendingMsgs { + l.sessions.AddMessage(ctx, req.SessionKey, msg) + } + rs.checkpointFlushedMsgs += len(rs.pendingMsgs) + rs.pendingMsgs = rs.pendingMsgs[:0] + l.sessions.Save(ctx, req.SessionKey) // best-effort persistence +} +``` + +This means: on a crash between checkpoints, up to 5 iterations of messages are lost. The comment explicitly says: "Trade-off: partial visibility to concurrent reads vs full data loss on crash." + +### 2.3 Stale Trace Recovery on Startup + +The tracing collector (`internal/tracing/collector.go:228-246`) performs orphan trace cleanup on startup: + +```go +func (c *Collector) recoverStaleTraces() { + const staleThreshold = 30 * time.Minute + cutoff := time.Now().UTC().Add(-staleThreshold) + recovered, err := c.store.RecoverStaleRunningTraces(ctx, cutoff) +} +``` + +Any trace stuck in `"running"` status from before the crash (older than 30 minutes) is marked as `"error"`. This prevents the UI from showing perpetually-running ghosts. + +### 2.4 Safety Net Trace Finalization + +In `loop_run.go:122-138`, a deferred function ensures root traces are always finalized, even on panic or goroutine leak: + +```go +defer func() { + if traceFinalized { return } + slog.Warn("tracing: safety-net finalizing orphan trace", ...) + l.traceCollector.FinishTrace(safeCtx, traceID, store.TraceStatusError, + "trace finalized by safety net (likely panic or goroutine leak)", "") +}() +``` + +### 2.5 Session Resume Behavior + +When a session is "resumed" (user sends a new message to an existing session key), GoClaw simply: + +1. Loads the full message history from the session store (`GetHistory`) +2. Loads the summary if one exists (`GetSummary`) +3. Rebuilds the LLM messages from scratch via `buildMessages()` +4. Runs a new loop iteration + +There are **no consistency checks** such as: +- Verifying the last message is properly terminated (no half-written assistant turns) +- Detecting orphaned tool calls (tool_use without matching tool_result) +- Repairing incomplete tool execution sequences +- Validating message role alternation + +The system relies on the LLM being robust enough to handle inconsistent history. If a crash left a session with an assistant message containing tool_calls but no corresponding tool results, the next run would simply append the new user message and let the LLM sort it out. + +### 2.6 History Compaction Safety Net + +The `channels/history_compaction.go` notes a "safety net for post-restart scenarios" -- after restart, the in-memory count may be stale, so it re-checks the DB for the real count before deciding whether compaction is needed. + +### 2.7 What GoClaw Does NOT Do on Resume + +- No WAL-style intent logging before operations +- No session state machine (sessions have no status field -- they're just message arrays) +- No "last run outcome" tracking on the session +- No explicit dirty/clean session markers +- No process lock per session to detect unclean shutdown +- No message sequence numbers or gap detection + + +## 3. Loop/Recursion Guards + +GoClaw has the most sophisticated loop detection system of any agent harness I've analyzed. It operates at three layers. + +### 3.1 Layer 1: Same-Args Loop Detection (toolLoopState) + +**File**: `internal/agent/toolloop.go` + +Tracks the last 30 tool calls (`toolLoopHistorySize = 30`) in a sliding window. Each entry records: +- Tool name +- SHA-256 hash of `toolName + stableJSON(args)` (deterministic key ordering) +- SHA-256 hash of the tool result content + +Detection logic (`detect()`): counts records where both argsHash AND resultHash match. Only flags **true no-progress loops** -- same input producing same output. + +| Threshold | Action | +|-----------|--------| +| 3 identical calls (`toolLoopWarningThreshold`) | Inject warning message into conversation: "Try a completely different approach..." | +| 5 identical calls (`toolLoopCriticalThreshold`) | Force-stop loop, set `rs.loopKilled = true`, return explanation to user | + +### 3.2 Layer 2: Same-Result Detection + +**File**: `internal/agent/toolloop.go:246-269` + +Catches a more subtle loop: the agent varies arguments slightly but gets identical results back each time. + +| Threshold | Action | +|-----------|--------| +| 4 same-result calls (`sameResultWarning`) | Warning: "The information is already in your context. Stop re-reading..." | +| 6 same-result calls (`sameResultCritical`) | Force-stop with `loopKilled = true` | + +### 3.3 Layer 3: Read-Only Streak Detection (Uniqueness-Aware) + +**File**: `internal/agent/toolloop.go:199-241` + +Detects agents stuck in read-only mode (reading files without ever writing). Uses a **uniqueness ratio** to distinguish legitimate exploration from stuck loops: + +**Tool classification:** +- **Mutating** (resets streak): `write_file`, `edit`, `edit_file`, `spawn`, `message`, `create_image/video/audio`, `tts`, `cron`, `publish_skill`, `sessions_send` +- **Neutral** (no effect on streak): `exec`, `bash`, `mcp_*` prefixed tools +- **Read-only** (increments streak): everything else (`read_file`, `list_files`, etc.) +- **team_tasks**: classified by action -- `list/get/search` are read-only, `progress` is neutral, `create/complete/cancel/comment` are mutating + +**Uniqueness ratio** = `readOnlyUnique / readOnlyStreak` + +| Mode | Ratio | Warning | Critical | +|------|-------|---------|----------| +| Stuck (re-reading same files) | <= 0.6 | 8 consecutive reads | 12 consecutive reads | +| Exploration (unique files) | > 0.6 | 24 consecutive reads | 36 consecutive reads | + +This was specifically designed to fix issue #506 where an agent exploring a monorepo with 11+ unique file reads was falsely killed. + +### 3.4 Iteration Budget Guards + +**File**: `internal/agent/loop.go`, `internal/config/defaults.go` + +| Guard | Default | Configuration | +|-------|---------|---------------| +| `maxIterations` | 30 | Per-agent via DB, per-request via `RunRequest.MaxIterations` (must be lower than agent default) | +| `maxToolCalls` | 0 (unlimited) | Per-agent via `Loop.maxToolCalls` | +| Final iteration tool strip | At `iteration == maxIter` | Removes all tool definitions, injects "[System] Final iteration reached" | +| 75% budget nudge | At `iteration == maxIter*3/4` when no text response yet | Warns: "Start summarizing your findings" | +| Skill evolution nudges | At 70% and 90% of iteration budget | Budget pressure reminders | + +### 3.5 Truncation Retry Guard + +**File**: `internal/agent/loop.go:417-449` + +When the model's output is truncated (`FinishReason == "length"`) or tool call arguments are malformed: + +```go +const maxTruncationRetries = 3 +``` + +After 3 consecutive truncation retries, the loop gives up rather than burning all iterations. Sets a fallback content message. + +### 3.6 Subagent Spawn Depth Limit + +**File**: `internal/tools/subagent_config.go`, `internal/tools/subagent_spawn.go` + +| Guard | Default | Max | +|-------|---------|-----| +| `MaxSpawnDepth` | 1 | 5 (configurable, capped by edition) | +| `MaxConcurrent` subagents | 8 | Configurable | +| `MaxChildrenPerAgent` | 5 | Configurable | +| `MaxRetries` per subagent | 2 | Configurable | + +At max depth, leaf agents have tools removed (`SubagentDenyLeaf`): they cannot spawn further subagents. The system prompt explicitly tells them: "You are a leaf worker and CANNOT spawn further sub-agents." + +### 3.7 Team Task Circuit Breaker + +From `docs/11-agent-teams.md`: +- Tasks auto-fail after 3 dispatch attempts (`maxTaskDispatches`) -- prevents infinite loops when agents can't complete a task +- Lead self-dispatch guard: tasks assigned to the lead agent are auto-failed (prevents dual-session loop) +- Loop detector kills propagate to task failure with reason `"loop_detector_kill"` + +### 3.8 Input Guards + +**File**: `internal/agent/input_guard.go` + +Prevents injection attacks that could cause runaway behavior: +- Scans for: `ignore_instructions`, `role_override`, `system_tags`, `instruction_injection`, `null_bytes`, `delimiter_escape` +- Actions: `"log"`, `"warn"` (default), `"block"` (rejects message), `"off"` +- Message size limit: `DefaultMaxMessageChars = 32000` -- oversized messages are truncated with a system notice + +### 3.9 Context Window Management + +**File**: `internal/agent/pruning.go`, `internal/agent/loop_compact.go` + +Two-phase approach when context exceeds budget: +1. **Phase 1: Prune old tool results** at 70% of history budget -- soft trim (keep head+tail), then hard clear (replace with placeholder) +2. **Phase 2: Mid-loop compaction** -- LLM-based summarization of first ~70% of messages, keeping last ~30% intact + +Per-result guard: any single tool result exceeding 30% of context window is force-trimmed regardless of overall ratio. + +### 3.10 Panic Recovery + +**Files**: `internal/agent/loop.go:36-44`, `internal/safego/recover.go` + +The main `runLoop()` has a top-level `defer recover()` that catches panics and converts them to errors. Parallel tool execution goroutines each have their own `defer safego.Recover()` that converts panics to error results rather than crashing the loop. + + +## 4. Key Code References + +| File | Lines | What | +|------|-------|------| +| `internal/agent/loop.go` | 35-45 | Panic recovery in runLoop | +| `internal/agent/loop.go` | 138-141 | maxIterations override logic | +| `internal/agent/loop.go` | 144-153 | Monthly budget pre-check | +| `internal/agent/loop.go` | 156-764 | Main iteration loop with all exit paths | +| `internal/agent/loop.go` | 417-449 | Truncation retry guard | +| `internal/agent/loop.go` | 513-524 | Tool budget exceeded handler | +| `internal/agent/loop.go` | 750-762 | Periodic checkpoint flush (every 5 iterations) | +| `internal/agent/loop_run.go` | 18-245 | Run() with event emission and trace lifecycle | +| `internal/agent/loop_run.go` | 122-138 | Safety-net trace finalization deferred | +| `internal/agent/loop_run.go` | 185-214 | Error vs cancel classification for events/traces | +| `internal/agent/toolloop.go` | 12-33 | All detection threshold constants | +| `internal/agent/toolloop.go` | 59-142 | toolLoopState: record + detect (same-args) | +| `internal/agent/toolloop.go` | 148-197 | recordMutation + read-only streak tracking | +| `internal/agent/toolloop.go` | 205-241 | detectReadOnlyStreak with uniqueness ratio | +| `internal/agent/toolloop.go` | 246-269 | detectSameResult (cross-args same output) | +| `internal/agent/loop_tools.go` | 15-147 | processToolResult with loop detection integration | +| `internal/agent/loop_tools.go` | 149-169 | checkReadOnlyStreak with kill flag | +| `internal/agent/loop_types.go` | 488-498 | RunResult including LoopKilled flag | +| `internal/agent/loop_types.go` | 510-557 | runState with all mutable loop state | +| `internal/agent/loop_finalize.go` | 36-209 | finalizeRun: sanitize, flush, build result | +| `internal/agent/loop_tool_filter.go` | 86-93 | Final iteration: strip tools, force text | +| `internal/agent/pruning.go` | 101-269 | Two-pass context pruning (soft trim + hard clear) | +| `internal/agent/loop_compact.go` | 44-118 | Mid-loop LLM compaction | +| `internal/agent/input_guard.go` | 1-99 | Prompt injection detection | +| `internal/config/defaults.go` | 1-13 | All default constants (30 iterations, 200K context, 8192 max tokens) | +| `internal/tools/subagent_config.go` | 7-15 | Subagent defaults (depth 1, max 8 concurrent, 5 children) | +| `internal/tools/subagent_spawn.go` | 38-41 | Spawn depth enforcement | +| `internal/providers/types.go` | 77 | FinishReason field on ChatResponse | +| `internal/store/tracing_store.go` | 13-16 | Trace status constants | +| `internal/tracing/collector.go` | 104 | Startup stale trace recovery | +| `internal/tracing/collector.go` | 228-246 | recoverStaleTraces implementation | +| `internal/sessions/manager.go` | 396-477 | Atomic session persistence (temp+rename) | +| `internal/safego/recover.go` | 1-28 | Panic recovery helper for goroutines | +| `cmd/gateway_consumer_post_turn.go` | 59-211 | Team task outcome mapping (including LoopKilled -> auto-fail) | +| `pkg/protocol/events.go` | 112-121 | Agent event type constants | + + +## 5. Patterns Worth Adopting + +### 5.1 Must Adopt + +1. **Multi-layer loop detection**: GoClaw's three-layer approach (same-args, same-result, read-only streak) catches loops that single-metric detectors miss. The uniqueness ratio for distinguishing exploration from stuck loops is particularly clever. AGH should implement all three layers. + +2. **LoopKilled propagation**: The `RunResult.LoopKilled` flag flows from detector to consumer to task lifecycle. This clear signal path lets higher-level orchestrators make correct decisions (auto-fail tasks, don't announce results). AGH needs this for hook/session state machines. + +3. **Periodic checkpoint flush**: Every 5 iterations, flush pending messages to durable storage. Simple, effective crash safety without full WAL complexity. The explicit trade-off comment is a good pattern: acknowledge what you lose, document why it's acceptable. + +4. **Safety-net trace/span finalization**: Deferred cleanup functions that catch orphaned running traces after panics/goroutine leaks. AGH's observe system needs the same for recording events. + +5. **Stale state recovery on startup**: On boot, scan for "running" records older than a threshold and mark them as errors. Essential for any system that persists in-flight state. + +6. **Truncation retry cap**: Limiting retries when the LLM can't fit output into max_tokens prevents burning the entire iteration budget on a hopeless situation. + +### 5.2 Should Adopt + +7. **Budget pressure nudges**: At 70% and 90% of iteration budget, inject "start wrapping up" messages. At 75%, warn if no text response yet. These prevent the common failure mode of agents spending all iterations on tools without producing a response. + +8. **Final iteration tool stripping**: On the last iteration, remove all tool definitions and inject "[System] Final iteration reached. Summarize and respond." This guarantees the model produces a text response instead of requesting more tools. + +9. **Tool classification for streak detection**: Categorizing tools as mutating/neutral/read-only enables nuanced detection. AGH should maintain a similar classification, especially distinguishing ambiguous tools like `exec`. + +10. **Per-result context guard**: Force-trim any single tool result exceeding 30% of context window, independently of overall context pressure. Catches outlier outputs that would otherwise crowd out everything else. + +### 5.3 Consider Adopting + +11. **Spawn depth enforcement via tool stripping**: At max depth, remove spawn-related tools entirely rather than relying on the LLM to obey instructions. Belt-and-suspenders approach. + +12. **Adaptive tool slow-timer**: Track tool execution times, compute adaptive thresholds (2x historical max), emit "tool_slow" events when exceeded. Useful for observability. + +13. **Input guard for injection detection**: Regex-based scanning for common prompt injection patterns. Low cost, catches obvious attacks. Configurable action levels (log/warn/block/off). + +### 5.4 Should NOT Adopt + +14. **No session state machine**: GoClaw sessions have no status field -- they're just message arrays. This works for GoClaw's gateway model but AGH explicitly needs session lifecycle states for the ACP protocol. AGH should have a proper state machine. + +15. **Silent JSON unmarshal skip on load**: GoClaw silently skips corrupt session files. AGH should at minimum log warnings and consider quarantining corrupt data. + +16. **No message sequence validation on resume**: GoClaw trusts the LLM to handle inconsistent history. AGH should validate message integrity (proper role alternation, no orphaned tool calls) because ACP agents are more sensitive to protocol violations than web LLM APIs. diff --git a/.compozy/tasks/session-resilience/analysis_hermes.md b/.compozy/tasks/session-resilience/analysis_hermes.md new file mode 100644 index 000000000..717192ddb --- /dev/null +++ b/.compozy/tasks/session-resilience/analysis_hermes.md @@ -0,0 +1,418 @@ +# Hermes: Session Resilience Analysis + +## 1. Stop Reason Taxonomy + +Hermes does **not** define a formal enum for stop reasons. Instead, stop reasons are tracked across two independent dimensions: the **session-level** `end_reason` (persisted to SQLite) and the **turn-level** `_turn_exit_reason` (diagnostic logging only). Additionally, the LLM API's `finish_reason` is captured per-message. + +### 1.1 Session-Level End Reasons (`sessions.end_reason` column) + +These are free-form strings passed to `SessionDB.end_session()`. Observed values across the codebase: + +| end_reason | Where set | Meaning | +|---|---|---| +| `"cli_close"` | `cli.py:8536` | User exited the CLI (Ctrl-C, `/exit`, EOF) | +| `"user_exit"` | Tests and examples | Explicit user termination | +| `"new_session"` | `cli.py:3443` | User started a new session (`/new`) | +| `"resumed_other"` | `cli.py:3521` | User switched to a different session via `/resume` | +| `"branched"` | `cli.py:3607` | User branched to a new session via `/branch` | +| `"session_reset"` | `gateway/session.py:763,865` | Gateway session reset (idle timeout, manual `/reset`) | +| `"session_switch"` | `gateway/session.py:919` | Gateway session switched to a named session | +| `"compression"` | `run_agent.py:6049` | Context compression triggered a session split (new child session) | +| `"cron_complete"` | `cron/scheduler.py:803` | Scheduled cron job completed | +| `"timeout"` | Test fixtures | Session timed out | + +**Key observation:** Hermes has no crash-detection end_reason. If the process dies, the session's `ended_at` remains NULL and `end_reason` remains NULL -- the session is effectively "still running" in the database. This is recovered on resume (see Section 2). + +### 1.2 Turn-Level Exit Reasons (`_turn_exit_reason`, diagnostic only) + +These are logged at INFO/WARNING level at the end of every `run_conversation()` call. They are NOT persisted. Observed values: + +| _turn_exit_reason | Condition | +|---|---| +| `"text_response(finish_reason=stop)"` | Normal completion -- model returned text without tool calls | +| `"text_response(finish_reason=length)"` | Model hit max output tokens | +| `"interrupted_by_user"` | User sent a new message while agent was running (gateway interrupt) | +| `"interrupted_during_api_call"` | Interrupt detected while waiting for API response | +| `"budget_exhausted"` | `IterationBudget.remaining <= 0` | +| `"max_iterations_reached(N/M)"` | `api_call_count >= max_iterations` | +| `"error_near_max_iterations(msg)"` | API error when near iteration limit | +| `"unknown"` | Default -- set at loop start, overwritten if a specific reason applies | + +### 1.3 LLM API Finish Reasons (per-message `finish_reason`) + +Stored in the `messages.finish_reason` column per assistant message: + +| finish_reason | Meaning | +|---|---| +| `"stop"` | Normal completion | +| `"tool_calls"` | Model wants to call tools | +| `"length"` | Response truncated by max output tokens | +| `"incomplete"` | Codex Responses API: partial response | + +The `stop_reason_map` in `run_agent.py:7737` normalizes Anthropic's stop reasons: +```python +stop_reason_map = { + "end_turn": "stop", + "tool_use": "tool_calls", + "max_tokens": "length", + "stop_sequence": "stop", +} +``` + +### 1.4 Result Dictionary Returned by `run_conversation()` + +Every turn returns a structured dict with: +```python +{ + "final_response": str | None, + "completed": bool, # True if response exists AND api_calls < max_iterations + "partial": bool, # True only when stopped due to invalid tool calls + "interrupted": bool, # True if user interrupt triggered + "interrupt_message": str, # The message that triggered the interrupt + "api_calls": int, + "messages": list, + "model": str, + "input_tokens": int, + "output_tokens": int, + ... +} +``` + +**What's missing from Hermes:** There is no single canonical `stop_reason` field in the result. The caller must infer from the combination of `completed`, `partial`, `interrupted`, and `final_response is None`. AGH should define a proper enum. + +--- + +## 2. Session Repair on Resume + +### 2.1 Resume Mechanism + +Hermes supports session resume through three paths: + +1. **CLI `--continue` / `--resume`** (`cli.py:2390-2422`, `cli.py:2583-2640`) +2. **CLI `/resume` slash command** (`cli.py:3488-3560`) for mid-conversation session switching +3. **ACP `_restore()`** (`acp_adapter/session.py:333-405`) for editor reconnections after process restart + +### 2.2 What Happens on Resume + +The resume flow performs these steps: + +1. **Look up session in SQLite** (`SessionDB.get_session(session_id)`) -- validates the session exists +2. **Load full message history** (`SessionDB.get_messages_as_conversation(session_id)`) -- restores all role/content/tool_calls/reasoning fields +3. **Filter out metadata entries** -- strips `role="session_meta"` entries +4. **Clear ended_at/end_reason** (`SessionDB.reopen_session()`) -- marks the session as active again +5. **Pass loaded history as `conversation_history`** to the next `run_conversation()` call + +### 2.3 Consistency Checks Performed (or Not Performed) + +**What Hermes DOES check:** +- Session existence in database +- Empty message history (falls back to "starting fresh") +- System prompt caching: on resume, loads the stored `system_prompt` from the session record instead of rebuilding (preserves Anthropic prefix cache) +- Budget warning cleanup: `_strip_budget_warnings_from_history()` removes turn-scoped budget pressure strings from previous turns that could confuse the model +- Todo store hydration: `_hydrate_todo_store()` recovers in-memory todo state from the most recent todo tool response in conversation history +- Preflight context compression: before entering the main loop, checks if restored history already exceeds the model's context threshold (handles model downgrade between sessions) + +**What Hermes does NOT check:** +- **No orphaned tool call detection on resume.** If the session crashed mid-tool-execution (assistant message has tool_calls but some/all tool results are missing), the API call will fail with a mismatched tool_call_id error. The ContextCompressor's `_sanitize_tool_pairs()` only runs after compression, not on resume. +- **No crash marker.** There is no equivalent to `end_reason="crash"`. A crashed session looks identical to an active session (ended_at is NULL). +- **No message integrity validation.** No check for role alternation invariants, no verification that the last message is in a valid state. +- **No checkpoint/rollback.** Although Hermes has a checkpoint system (`checkpoints_enabled`), it is for file-system snapshots (undo tool changes), not for conversation state rollback. + +### 2.4 Crash Recovery is Implicit, Not Explicit + +Hermes relies on the model to handle inconsistent state gracefully. Key patterns: + +1. **`ensure_session()`** (`hermes_state.py:502-521`): If `create_session()` failed at startup (transient SQLite lock), the flush path uses INSERT OR IGNORE to create the session retroactively. + +2. **`_get_messages_up_to_last_assistant()`** (`run_agent.py:1955-1984`): Can roll back to the last complete assistant turn, but is only used for trajectory saving, not for resume. + +3. **Error result injection** (`run_agent.py:9204-9229`): When a tool execution error occurs, the code walks backward to find the assistant message with pending tool_calls and fills in error results for any unanswered tool_call_ids. This prevents the "orphan tool call" API error. + +4. **Context compression summary prefix** (`context_compressor.py:28-35`): Explicitly tells the model that earlier turns were compacted and the session state may reflect prior work: +``` +"[CONTEXT COMPACTION] Earlier turns in this conversation were compacted +to save context space. The summary below describes work that was already +completed, and the current session state may still reflect that work..." +``` + +### 2.5 ACP Session Restore (`acp_adapter/session.py:333-405`) + +The ACP path is more robust because it must handle editor reconnections: + +1. Query the database for the session record +2. Validate `source == "acp"` (only restore ACP sessions) +3. Extract `cwd` from `model_config` JSON +4. Restore `provider`, `base_url`, `api_mode` from session metadata +5. Recreate a fresh AIAgent with the original configuration +6. Load conversation history from the database +7. Re-register task-specific cwd overrides for tools + +--- + +## 3. Loop/Recursion Guards + +Hermes implements multiple layers of guards against infinite loops and resource exhaustion. + +### 3.1 Iteration Budget (Primary Guard) + +**File:** `run_agent.py:168-211` + +```python +class IterationBudget: + def __init__(self, max_total: int): + self.max_total = max_total # Default: 90 for parent, 50 for subagents + self._used = 0 + self._lock = threading.Lock() + + def consume(self) -> bool: + """Try to consume one iteration. Returns True if allowed.""" + + def refund(self) -> None: + """Give back one iteration (for execute_code turns).""" +``` + +- **Parent agent:** `max_iterations=90` (default, configurable) +- **Subagent:** `delegation.max_iterations=50` (configurable via `config.yaml`) +- **Per-turn reset:** Budget resets at the start of each `run_conversation()` call (`run_agent.py:7071`) +- **Refund mechanism:** `execute_code` (programmatic tool calling) iterations are refunded so they don't eat the budget +- **Thread-safe:** Uses `threading.Lock` for concurrent subagent access + +The main loop guard (`run_agent.py:7303`): +```python +while api_call_count < self.max_iterations and self.iteration_budget.remaining > 0: +``` + +### 3.2 Budget Pressure Warnings (Soft Guard) + +**File:** `run_agent.py:674-679, 6777-6798` + +Two tiered thresholds injected into tool result content (not as separate messages): + +| Threshold | Level | Message | +|---|---|---| +| 70% of max_iterations | CAUTION | "Iteration N/M. You're approaching the iteration limit. Start wrapping up..." | +| 90% of max_iterations | WARNING | "Iteration N/M. You are almost out of iterations. Respond NOW..." | + +Previous turns' budget warnings are stripped on resume (`_strip_budget_warnings_from_history()`) to prevent models from refusing to make tool calls. + +### 3.3 Max Iterations Handler (Graceful Degradation) + +**File:** `run_agent.py:6840-6990` + +When max iterations are reached, Hermes doesn't just stop -- it asks the model for a summary: +```python +summary_request = ( + "You've reached the maximum number of tool-calling iterations allowed. " + "Please provide a final response summarizing what you've found..." +) +``` + +This ensures the user always gets a response, even when the agent runs out of budget. + +### 3.4 Subagent Delegation Depth Limit + +**File:** `tools/delegate_tool.py:37` + +```python +MAX_DEPTH = 2 # parent (0) -> child (1) -> grandchild rejected (2) +MAX_CONCURRENT_CHILDREN = 3 +``` + +- `delegate_task` is in `DELEGATE_BLOCKED_TOOLS`, so children cannot recursively delegate +- But even if they could, `MAX_DEPTH=2` would prevent it +- Maximum 3 concurrent child agents per parent (`MAX_CONCURRENT_CHILDREN`) +- Tasks beyond the limit are silently truncated (`run_agent.py:2903-2929`) + +### 3.5 Tool Call Deduplication (Per-Turn) + +**File:** `run_agent.py:2931-2947` + +```python +@staticmethod +def _deduplicate_tool_calls(tool_calls: list) -> list: + """Remove duplicate (tool_name, arguments) pairs within a single turn.""" + seen: set = set() + unique: list = [] + for tc in tool_calls: + key = (tc.function.name, tc.function.arguments) + if key not in seen: + seen.add(key) + unique.append(tc) +``` + +This catches the common case where models emit the same tool call multiple times in one response. Applied at `run_agent.py:8892`. + +### 3.6 Invalid Tool Call Retry Limits + +Multiple retry counters with hard limits of 3: + +| Counter | Max Retries | What it guards | +|---|---|---| +| `_invalid_tool_retries` | 3 | Model hallucinating tool names that don't exist | +| `_invalid_json_retries` | 3 | Model producing malformed JSON in tool arguments | +| `_empty_content_retries` | 3 | Model returning empty/null responses | +| `_incomplete_scratchpad_retries` | 2 | Unclosed reasoning scratchpad tags | +| `_codex_incomplete_retries` | variable | Codex Responses API returning `finish_reason=incomplete` | +| `_thinking_prefill_retries` | variable | Thinking block signature failures | + +All counters reset at the start of each turn (`run_agent.py:7045-7050`). + +### 3.7 Tool Name Repair (Fuzzy Matching) + +**File:** `run_agent.py:2949-2975` + +Before declaring a tool call invalid, Hermes attempts repair: +1. Try lowercase +2. Try normalized (hyphens/spaces to underscores) +3. Try fuzzy match (difflib, cutoff=0.7) + +### 3.8 Tool Result Size Budget (Context Overflow Prevention) + +**File:** `tools/tool_result_storage.py`, `tools/budget_config.py` + +Three-layer defense against context window overflow from tool outputs: + +| Layer | Threshold | Action | +|---|---|---| +| Per-tool output cap | Tool-specific | Tools pre-truncate their own output | +| Per-result persistence | 100K chars default | Large outputs written to disk, replaced with preview + file path | +| Per-turn aggregate budget | 200K chars | If all tool results in a turn exceed 200K, largest are spilled to disk | + +Special case: `read_file` has `threshold=inf` to prevent infinite persist-read-persist loops. + +### 3.9 Context Compression (Automatic) + +**File:** `agent/context_compressor.py` + +When the conversation approaches the model's context limit (default: 50% threshold), Hermes: +1. Prunes old tool results (cheap, no LLM call) +2. Protects head messages (system prompt + first exchange) +3. Protects tail messages by token budget (~20K tokens) +4. Summarizes middle turns with a structured LLM summary +5. Sanitizes orphaned tool_call/tool_result pairs after compression +6. On subsequent compressions, iteratively updates the previous summary + +Post-compression: `_sanitize_tool_pairs()` fixes orphaned tool results and inserts stub results for orphaned tool calls. + +### 3.10 Gateway Inactivity Timeout + +**File:** `gateway/run.py:7143-7242` + +- Default: 1800 seconds (30 minutes) of inactivity +- Configurable via `HERMES_AGENT_TIMEOUT` env var or `agent.gateway_timeout` config +- Warning at 50% of timeout (default 900s) +- Uses activity tracker (`_touch_activity()`) rather than wall clock +- On timeout: interrupts the agent, sends diagnostic summary to user + +### 3.11 Stale Connection Eviction (Gateway) + +**File:** `gateway/run.py:1870-1918` + +Detects leaked locks from hung/crashed handlers: +- Checks both idle time AND wall-clock age +- Wall-clock TTL: max(10x timeout, 2 hours) +- Logs diagnostic info: last activity, iteration count, current tool + +### 3.12 API Error Classification for Recovery + +**File:** `agent/error_classifier.py` + +Structured error taxonomy (`FailoverReason` enum) with recovery hints: + +| Reason | Recovery | +|---|---| +| `auth` | Refresh/rotate credential | +| `billing` | Rotate credential, then fallback | +| `rate_limit` | Backoff, rotate credential, fallback | +| `overloaded` | Backoff | +| `server_error` | Retry | +| `timeout` | Rebuild client, retry | +| `context_overflow` | Compress context | +| `payload_too_large` | Compress payload | +| `model_not_found` | Fallback to different model | +| `format_error` | Abort or strip + retry | +| `thinking_signature` | Retry (Anthropic-specific) | +| `long_context_tier` | Compress (Anthropic tier gate) | +| `unknown` | Retry with backoff | + +Heuristic for server disconnect + large session: reclassified as `context_overflow` (not `timeout`) when `approx_tokens > context_length * 0.6` or `num_messages > 200`. + +--- + +## 4. Key Code References + +| File | Lines | What | +|---|---|---| +| `run_agent.py` | 168-211 | `IterationBudget` class -- thread-safe iteration counter | +| `run_agent.py` | 473, 559-562 | `max_iterations` default (90) and budget initialization | +| `run_agent.py` | 674-679 | Budget pressure thresholds (70%, 90%) | +| `run_agent.py` | 6777-6798 | `_get_budget_warning()` -- tiered budget pressure messages | +| `run_agent.py` | 6840-6990 | `_handle_max_iterations()` -- graceful degradation with summary | +| `run_agent.py` | 7043-7071 | Per-turn retry counter reset and budget reset | +| `run_agent.py` | 7303 | Main loop guard: `while api_call_count < max_iterations and budget.remaining > 0` | +| `run_agent.py` | 9246-9256 | Turn exit: max iterations reached, completion determination | +| `run_agent.py` | 9267-9309 | Turn-exit diagnostic logging | +| `run_agent.py` | 9338-9362 | Result dictionary construction | +| `run_agent.py` | 2931-2947 | `_deduplicate_tool_calls()` -- per-turn dedup | +| `run_agent.py` | 2949-2975 | `_repair_tool_call()` -- fuzzy tool name repair | +| `run_agent.py` | 8770-8860 | Invalid tool call / JSON retry logic with limits | +| `run_agent.py` | 1894-1953 | `_persist_session()` and `_flush_messages_to_session_db()` | +| `run_agent.py` | 9204-9229 | Error result injection for orphaned tool calls | +| `run_agent.py` | 382-407 | `_strip_budget_warnings_from_history()` | +| `hermes_state.py` | 385-399 | `end_session()` and `reopen_session()` | +| `hermes_state.py` | 502-521 | `ensure_session()` -- crash-recovery INSERT OR IGNORE | +| `hermes_state.py` | 951-993 | `get_messages_as_conversation()` -- session restore | +| `cli.py` | 2390-2422 | CLI session resume with validation | +| `cli.py` | 2583-2640 | `_preload_resumed_session()` -- early history load | +| `cli.py` | 3488-3560 | `/resume` slash command -- mid-conversation session switch | +| `acp_adapter/session.py` | 333-405 | `_restore()` -- ACP session restore from database | +| `agent/context_compressor.py` | 452-510 | `_sanitize_tool_pairs()` -- fix orphans after compression | +| `agent/context_compressor.py` | 612-745 | `compress()` -- main compression algorithm | +| `agent/error_classifier.py` | 25-58 | `FailoverReason` enum | +| `agent/error_classifier.py` | 231-404 | `classify_api_error()` -- structured error classification | +| `tools/delegate_tool.py` | 36-38 | `MAX_CONCURRENT_CHILDREN=3`, `MAX_DEPTH=2`, `DEFAULT_MAX_ITERATIONS=50` | +| `tools/delegate_tool.py` | 532-540 | Delegation depth limit check | +| `tools/tool_result_storage.py` | 0-36 | Three-layer tool result budget system | +| `tools/budget_config.py` | 1-52 | Budget constants and config | + +--- + +## 5. Patterns Worth Adopting + +### 5.1 Definitely Adopt + +1. **Iteration Budget with Refund** -- The `IterationBudget` pattern (thread-safe counter with `consume()/refund()`) is clean and simple. Refunding cheap RPC-style calls (like `execute_code`) prevents budget exhaustion from non-LLM operations. AGH should implement this with a similar thread-safe counter per session. + +2. **Tiered Budget Pressure Warnings** -- Injecting budget warnings into tool results at 70% and 90% thresholds is elegant. It uses the model's own reasoning to decide when to wrap up rather than hard-cutting. AGH should adopt this pattern, injecting budget pressure into the context at configurable thresholds. + +3. **Graceful Max-Iterations Handler** -- Instead of just stopping, requesting a summary from the model ensures the user always gets a response. AGH should always attempt a summary turn before terminating for budget exhaustion. + +4. **Structured Error Classification** -- The `FailoverReason` enum with recovery action hints (`retryable`, `should_compress`, `should_rotate_credential`, `should_fallback`) is much better than scattered string matching. AGH should define a comparable Go enum with similar recovery hints. + +5. **Tool Call Deduplication** -- Per-turn deduplication of identical `(name, arguments)` pairs catches a common model failure mode. Simple and effective. + +6. **Tool Name Repair with Fuzzy Matching** -- Auto-correcting hallucinated tool names (lowercase, normalize, difflib) before declaring failure is a practical resilience pattern. + +7. **Turn-Exit Diagnostic Logging** -- The structured log at the end of every turn (`reason, model, api_calls, budget, last_msg_role, last_tool`) is invaluable for debugging "the agent just stopped" issues. AGH should emit a structured log event at every session turn boundary. + +### 5.2 Adopt with Improvements + +8. **Stop Reason Taxonomy** -- Hermes's approach is too informal. `_turn_exit_reason` is a free-form string only used for logging. `end_reason` is another free-form string. AGH should define a canonical `StopReason` enum that covers: `completed`, `max_iterations`, `budget_exhausted`, `interrupted`, `error`, `timeout`, `crash`, `session_reset`, `session_switch`, `compression`. This enum should be persisted, returned in APIs, and used in metrics. + +9. **Session Repair on Resume** -- Hermes does almost no repair. AGH should explicitly: + - Detect orphaned tool calls (assistant has tool_calls but missing tool results) and inject error stubs + - Validate role alternation invariants + - Set `end_reason="crash"` for sessions with NULL `ended_at` that are being resumed + - Log a structured "session recovered" event with diagnostics + +10. **Context Compression Tool Pair Sanitization** -- Hermes only sanitizes after compression. AGH should also sanitize on session load/resume and after any message list mutation. + +### 5.3 Do Differently + +11. **Budget Reset Per Turn** -- Hermes resets the iteration budget every turn (`run_conversation()` call). For a daemon like AGH that manages long-lived sessions, a per-session cumulative budget with per-turn sub-budgets may be more appropriate. The gateway's inactivity timeout is a better overall session guard. + +12. **No Formal Loop Detection** -- Hermes has NO actual loop detection (detecting the agent making the same sequence of tool calls repeatedly). It relies entirely on iteration limits to bound loops. AGH should implement a sliding window check: if the last N tool call sequences are identical, inject a "you appear to be stuck" warning before the budget runs out. + +13. **Crash Recovery** -- Hermes's crash recovery is purely implicit (session with NULL ended_at). AGH, as a daemon, should implement heartbeat-based liveness detection and explicit crash marking. When the daemon restarts, all sessions with NULL ended_at should be inspected and either resumed or marked as `end_reason="crash"`. + +14. **No Formal State Machine** -- Hermes uses boolean flags (`interrupted`, `completed`, `partial`) to represent session state. AGH should use an explicit state machine: `created -> running -> paused -> completed | error | timeout | crashed`. diff --git a/.compozy/tasks/session-resilience/analysis_openclaw.md b/.compozy/tasks/session-resilience/analysis_openclaw.md new file mode 100644 index 000000000..76c2bdee5 --- /dev/null +++ b/.compozy/tasks/session-resilience/analysis_openclaw.md @@ -0,0 +1,363 @@ +# OpenClaw: Session Resilience Analysis + +## 1. Stop Reason Taxonomy + +OpenClaw uses a **two-layer** stop reason taxonomy: an internal FailoverReason system for retry/recovery decisions, and an ACP-facing StopReason for protocol-level communication. + +### ACP-Level StopReason (Protocol Surface) + +Defined via `@agentclientprotocol/sdk`, the StopReason type exposed to ACP clients is intentionally narrow: + +| StopReason | When Emitted | Gateway State | +|---------------|-------------|---------------| +| `end_turn` | Normal completion, errors (mapped), final state events | `state: "final"` or `state: "error"` | +| `max_tokens` | Context window exhausted | `stopReason: "max_tokens"` in gateway payload | +| `cancelled` | User-initiated abort or aborted state | `state: "aborted"` or explicit cancel | + +**Key mapping logic** (`src/acp/translator.ts`, lines 948-964): + +```typescript +if (state === "final") { + const rawStopReason = payload.stopReason as string | undefined; + const stopReason: StopReason = rawStopReason === "max_tokens" ? "max_tokens" : "end_turn"; + await this.finishPrompt(pending.sessionId, pending, stopReason); +} +if (state === "aborted") { + await this.finishPrompt(pending.sessionId, pending, "cancelled"); +} +if (state === "error") { + // ACP has no explicit "server_error" stop reason. Use "end_turn" so clients + // do not treat transient backend errors as deliberate refusals. + void this.finishPrompt(pending.sessionId, pending, "end_turn"); +} +``` + +Design choice: errors are mapped to `end_turn` rather than surfacing a distinct error stop reason. This prevents ACP clients from treating transient backend failures (timeouts, rate limits) as permanent refusals. + +### Internal FailoverReason (Retry/Recovery Engine) + +Defined in `src/agents/pi-embedded-helpers/types.ts`: + +```typescript +export type FailoverReason = + | "auth" // 401 - authentication failure + | "auth_permanent" // 403 - permanent auth rejection + | "format" // 400 - malformed request + | "rate_limit" // 429 - provider rate limiting + | "overloaded" // 503 - provider overloaded + | "billing" // 402 - billing/quota exceeded + | "timeout" // 408 - request timeout + | "model_not_found" // 404 - model unavailable + | "session_expired" // 410 - session no longer exists + | "unknown"; // Catch-all +``` + +Each FailoverReason maps to an HTTP status code via `resolveFailoverStatus()` and drives the retry/failover policy engine. + +### Assistant-Level stopReason (LLM Response) + +The LLM response layer uses its own stop reason set within assistant messages: + +| stopReason | Meaning | +|-------------|---------| +| `stop` | Normal completion (model chose to stop) | +| `toolUse` | Model wants to call a tool (agentic loop continues) | +| `error` | Error during generation | +| `aborted` | Externally aborted | +| `max_tokens` | Context window hit | + +### Session Entry Status + +Persisted session status (`src/config/sessions/types.ts`, line 142): + +```typescript +status?: "running" | "done" | "failed" | "killed" | "timeout"; +``` + +### Unhandled Stop Reason Recovery + +OpenClaw wraps LLM streams to catch unknown/unhandled stop reasons from providers (`src/agents/pi-embedded-runner/run/attempt.stop-reason-recovery.ts`). When a provider returns a stop reason OpenClaw does not recognize, it: +1. Detects the pattern via regex: `/^Unhandled stop reason:\s*(.+)$/i` +2. Patches the assistant message to `stopReason: "error"` with a normalized error message +3. Builds a synthetic error stream to prevent crashes + +--- + +## 2. Session Repair on Resume + +OpenClaw performs multiple layers of session repair, from file-level JSONL integrity to transcript-level tool call pairing. + +### 2.1 Session File Repair (`session-file-repair.ts`) + +On resume, `repairSessionFileIfNeeded()` performs JSONL integrity repair: + +1. **Read the session JSONL file** line by line +2. **Parse each line** as JSON; lines that fail parsing are counted as `droppedLines` +3. **Validate session header** -- first entry must have `type: "session"` with a valid `id` +4. If malformed lines exist: + - **Create a backup** at `{sessionFile}.bak-{pid}-{timestamp}` + - **Write cleaned file** via atomic rename (write to `.tmp`, then `fs.rename`) + - Preserve file permissions from original +5. Return a `RepairReport` with `repaired`, `droppedLines`, `backupPath` + +This runs before each embedded agent attempt via `repairSessionFileIfNeeded` called from the attempt runner. + +### 2.2 Transcript Repair (`session-transcript-repair.ts`) + +The transcript repair system handles structural inconsistencies in the message history: + +**Tool Call Input Repair** (`repairToolCallInputs`): +- Drops tool call blocks missing `input`, `id`, or valid `name` +- Validates tool names against a max length (64 chars) and regex `/^[A-Za-z0-9_:.-]+$/` +- Optionally filters against an `allowedToolNames` set +- Redacts `sessions_spawn` attachment content to prevent transcript bloat +- If all tool calls in an assistant message are dropped, the entire message is removed + +**Tool Use/Result Pairing Repair** (`repairToolUseResultPairing`): +- **Moves displaced toolResult** messages directly after their matching assistant toolCall turn +- **Inserts synthetic error toolResults** for missing IDs (with text: `[openclaw] missing tool result in session history; inserted synthetic error result for transcript repair.`) +- **Drops duplicate toolResults** for the same ID anywhere in the transcript +- **Drops orphan toolResults** that appear outside assistant context +- **Skips synthesis for aborted/errored assistant turns** -- when `stopReason === "error" || "aborted"`, incomplete tool_use blocks are left alone to avoid API 400 errors + +### 2.3 Session Tool Result Guard (`session-tool-result-guard.ts`) + +A live guard installed on `sessionManager.appendMessage` that: +- **Tracks pending tool calls** -- when an assistant message with tool calls is written, their IDs are registered +- **Matches incoming toolResults** to pending IDs and normalizes tool names +- **Caps tool result size** via `truncateToolResultMessage` with `DEFAULT_MAX_LIVE_TOOL_RESULT_CHARS` +- **Flushes synthetic results** for orphaned pending tool calls when new non-tool-result messages arrive +- **Sanitizes tool call inputs** against an allowlist before persistence +- Supports a `beforeMessageWriteHook` that can block or modify messages before persistence + +### 2.4 Session Write Lock (`session-write-lock.ts`) + +Prevents concurrent writes to the same session file: +- **File-based locking** via `fs.open(lockPath, "wx")` (exclusive create) +- **Lock payload** includes `pid`, `createdAt`, and process `starttime` (from `/proc/pid/stat`) +- **Stale lock detection**: + - Dead PID (process no longer alive) + - Recycled PID (start time mismatch -- detects OS PID reuse) + - Age exceeds `DEFAULT_STALE_MS` (30 minutes) + - Orphan self-lock (same PID but no in-memory record) +- **Watchdog timer** runs every 60s, forcibly releasing locks held beyond `maxHoldMs` (default 5 minutes) +- **Signal handlers** (SIGINT, SIGTERM, etc.) release all locks synchronously on process exit +- **Reentrant locking** supported -- same session can re-acquire without deadlock + +### 2.5 Gateway Disconnect Recovery + +The ACP translator handles gateway disconnects with a grace window (`ACP_GATEWAY_DISCONNECT_GRACE_MS = 5000ms`): + +1. On disconnect: start a 5-second grace timer for each pending prompt +2. On reconnect within grace period: + - Call `agent.wait({ runId, timeoutMs: 0 })` to check if the run completed during disconnect + - If `status: "ok"` -- resolve with `end_turn` + - If `status: "timeout"` -- keep pending, schedule another check at the deadline +3. On grace period expiry without reconnect: reject with disconnect error +4. Prompts started during disconnect are queued and reconciled on reconnect + +### 2.6 Subagent Orphan Recovery (`subagent-orphan-recovery.ts`) + +After a gateway restart (SIGUSR1), OpenClaw recovers orphaned subagent sessions: + +1. **Detection**: Scans the subagent run registry for active runs where the session store has `abortedLastRun: true` +2. **Resume message construction**: Builds a synthetic system message: + ``` + [System] Your previous turn was interrupted by a gateway reload. + Your original task was: {task} + The last message from the user before the interruption was: {lastHumanMessage} + Please continue where you left off. + ``` +3. **Config change detection**: Scans transcript for config-related mentions to add a hint preventing duplicate config modifications +4. **Idempotent recovery**: Tracks `resumedSessionKeys` to prevent duplicate resumptions +5. **Retry with exponential backoff**: Up to 3 retries with 2x backoff (starting at 5s delay) +6. **Flag persistence**: `abortedLastRun` flag is only cleared after confirmed successful resume + +--- + +## 3. Loop/Recursion Guards + +### 3.1 Tool Loop Detection (`tool-loop-detection.ts`) + +A sophisticated multi-detector system with configurable thresholds: + +**Configuration** (disabled by default): +```json +{ + "enabled": false, + "historySize": 30, + "warningThreshold": 10, + "criticalThreshold": 20, + "globalCircuitBreakerThreshold": 30, + "detectors": { + "genericRepeat": true, + "knownPollNoProgress": true, + "pingPong": true + } +} +``` + +**Four Detector Types** (`LoopDetectorKind`): + +| Detector | What It Detects | Warning At | Critical At | +|----------|----------------|------------|-------------| +| `generic_repeat` | Same tool + same args repeated | 10 calls | Never (warn only) | +| `known_poll_no_progress` | Polling tools (`command_status`, `process:poll/log`) with identical results | 10 calls | 20 calls | +| `global_circuit_breaker` | Any tool with identical no-progress outcomes | N/A | 30 calls | +| `ping_pong` | Alternating A-B-A-B tool call patterns | 10 alternations | 20 alternations (with no-progress evidence) | + +**How it works**: + +1. **Recording**: Each tool call is hashed (`toolName:sha256(stableStringify(params))`) and stored in a sliding window of the last 30 calls +2. **Outcome tracking**: After each tool call completes, the result is hashed and stored alongside the call record for no-progress detection +3. **No-progress detection**: A "no-progress streak" counts consecutive identical outcomes for the same tool+args combination +4. **Ping-pong detection**: Checks if the tail of the history alternates between exactly two distinct tool signatures, with optional no-progress evidence on both sides +5. **Two severity levels**: + - `warning`: Injected as a system message telling the agent to stop retrying + - `critical`: Blocks session execution entirely + +**Warning key deduplication**: Each detection result includes a `warningKey` to prevent duplicate warnings for the same pattern. + +**Known poll tool identification**: `command_status` and `process:poll/log` are recognized as polling tools with specialized no-progress detection that considers structural result fields (`status`, `exitCode`, `totalLines`, etc.) rather than raw text. + +### 3.2 Run Loop Iteration Guard + +The main agent run loop (`run.ts`) has a hard iteration cap: + +```typescript +const BASE_RUN_RETRY_ITERATIONS = 24; +const RUN_RETRY_ITERATIONS_PER_PROFILE = 8; +const MIN_RUN_RETRY_ITERATIONS = 32; +const MAX_RUN_RETRY_ITERATIONS = 160; + +function resolveMaxRunRetryIterations(profileCandidateCount: number): number { + const scaled = BASE_RUN_RETRY_ITERATIONS + + Math.max(1, profileCandidateCount) * RUN_RETRY_ITERATIONS_PER_PROFILE; + return Math.min(MAX_RUN_RETRY_ITERATIONS, Math.max(MIN_RUN_RETRY_ITERATIONS, scaled)); +} +``` + +When exceeded: +- Logs `[run-retry-limit]` with session key, provider, and attempt count +- Evaluates failover policy: either escalates to fallback model or returns error payload +- Error message: "Request failed after repeated internal retries. Please try again, or use /new to start a fresh session." + +### 3.3 Subagent Announce Loop Guard + +Prevents infinite retry loops when announcing subagent completion (issue #18264): + +- **Announce retry count**: Each `SubagentRunRecord` tracks `announceRetryCount` and `lastAnnounceRetryAt` +- **Max retry budget**: Entries over the retry budget are marked completed without announcing +- **Expiry check**: Entries that ended more than 5 minutes ago with high retry counts are skipped +- **Rejection handling**: When `runSubagentAnnounceFlow` rejects, `cleanupHandled` is reset to allow future retries, but the retry counter increments + +### 3.4 Context Overflow / Compaction Guards + +Multiple compaction guards prevent infinite compaction loops: + +```typescript +const MAX_TIMEOUT_COMPACTION_ATTEMPTS = 2; +const MAX_OVERFLOW_COMPACTION_ATTEMPTS = 3; +``` + +- **Timeout compaction**: Max 2 attempts to compact after timeout errors +- **Overflow compaction**: Max 3 attempts to compact after context overflow +- **Preemptive compaction**: Before prompting, checks if context is near overflow and compacts proactively +- **Tool result truncation**: Before retrying after overflow, truncates oversized tool results in the session + +### 3.5 Auth Profile Rotation Guards + +Rate limit and overload profile rotation have caps: + +- `overloadProfileRotationLimit`: Configurable cap on profile rotations for overloaded providers +- `rateLimitProfileRotationLimit`: Cap before escalating to model fallback +- Exponential backoff before overload failover via `overloadFailoverBackoffMs` + +### 3.6 Planning-Only Retry Guard + +Detects when the LLM only describes a plan without executing (`incomplete-turn.ts`): + +- Regex-based detection: `/\b(?:i(?:'ll| will)|let me|going to|...)\b/i` +- Completion detection to avoid false positives: `/\b(?:done|finished|implemented|...)\b/i` +- Max 700 chars, no code blocks, specific to OpenAI GPT-5 models +- Injects instruction: "Do not restate the plan. Act now: take the first concrete tool action you can." + +### 3.7 Fetch Recursion Guard + +`src/infra/net/fetch-guard.ts` prevents recursive fetch loops in network operations with depth tracking. + +--- + +## 4. Key Code References + +| Component | File | Key Lines/Exports | +|-----------|------|-------------------| +| ACP StopReason mapping | `src/acp/translator.ts` | Lines 948-964 (state-to-stopReason mapping) | +| ACP disconnect grace | `src/acp/translator.ts` | `ACP_GATEWAY_DISCONNECT_GRACE_MS = 5_000` | +| FailoverReason enum | `src/agents/pi-embedded-helpers/types.ts` | Full type definition | +| FailoverError class | `src/agents/failover-error.ts` | `FailoverError`, `resolveFailoverStatus()` | +| Tool loop detection | `src/agents/tool-loop-detection.ts` | `detectToolCallLoop()`, `recordToolCall()`, `recordToolCallOutcome()` | +| Loop detection config | `src/config/types.tools.ts` | `ToolLoopDetectionConfig` type | +| Loop detection docs | `docs/tools/loop-detection.md` | User-facing configuration guide | +| Session file repair | `src/agents/session-file-repair.ts` | `repairSessionFileIfNeeded()` | +| Transcript repair | `src/agents/session-transcript-repair.ts` | `repairToolCallInputs()`, `repairToolUseResultPairing()` | +| Tool result guard | `src/agents/session-tool-result-guard.ts` | `installSessionToolResultGuard()` | +| Session write lock | `src/agents/session-write-lock.ts` | `acquireSessionWriteLock()`, stale lock detection | +| Orphan recovery | `src/agents/subagent-orphan-recovery.ts` | `recoverOrphanedSubagentSessions()`, `scheduleOrphanRecovery()` | +| Run loop guard | `src/agents/pi-embedded-runner/run/helpers.ts` | `resolveMaxRunRetryIterations()`, `MAX_RUN_RETRY_ITERATIONS = 160` | +| Retry limit handler | `src/agents/pi-embedded-runner/run/retry-limit.ts` | `handleRetryLimitExhaustion()` | +| Failover policy | `src/agents/pi-embedded-runner/run/failover-policy.ts` | `resolveRunFailoverDecision()` | +| Stop reason recovery | `src/agents/pi-embedded-runner/run/attempt.stop-reason-recovery.ts` | `wrapStreamFnHandleSensitiveStopReason()` | +| Incomplete turn guard | `src/agents/pi-embedded-runner/run/incomplete-turn.ts` | `resolvePlanningOnlyRetryInstruction()` | +| Announce loop guard | `src/agents/subagent-registry.announce-loop-guard.test.ts` | Regression test for issue #18264 | +| Session entry type | `src/config/sessions/types.ts` | `SessionEntry` with `abortedLastRun`, `status` fields | +| Before-tool-call hook | `src/agents/pi-tools.before-tool-call.ts` | `runBeforeToolCallHook()`, `wrapToolWithBeforeToolCallHook()` | + +--- + +## 5. Patterns Worth Adopting + +### 5.1 Two-Layer Stop Reason Architecture +OpenClaw separates **protocol-facing** stop reasons (simple: `end_turn`, `max_tokens`, `cancelled`) from **internal** failover reasons (rich: `auth`, `rate_limit`, `overloaded`, `billing`, `timeout`, etc.). AGH should adopt this: keep the ACP/external surface simple while using a richer internal taxonomy for retry/recovery decisions. The mapping from internal to external is where policy lives. + +### 5.2 Tool Loop Detection as a Configurable Subsystem +The multi-detector approach with configurable thresholds per agent is excellent. Key ideas for AGH: +- **Sliding window history** (30 calls) with content hashing for pattern detection +- **No-progress detection** via result outcome hashing, not just call counting +- **Ping-pong detection** for alternating A-B-A-B patterns +- **Two severity levels** (warning = inject hint, critical = block execution) +- **Disabled by default** with per-agent opt-in + +### 5.3 Atomic Session File Repair on Resume +The backup-then-atomic-rename pattern for JSONL repair is safe and production-proven: +1. Read the file, drop unparseable lines +2. Write backup with original content +3. Write cleaned content to `.tmp` file +4. Atomic `rename()` to replace original +AGH should adopt this for SQLite WAL recovery and session event store integrity. + +### 5.4 Synthetic Tool Result Injection +When tool calls are found without matching results (crash mid-execution), OpenClaw synthesizes error results. This prevents strict providers from rejecting the entire transcript. AGH's event store should support synthetic event injection for the same reason. + +### 5.5 Session Write Lock with PID Recycling Detection +The lock system's use of `/proc/pid/stat` start time to detect PID recycling is clever and prevents stale lock false-positives. AGH should use a similar approach for its SQLite-based session locking, especially since the daemon runs as a long-lived process. + +### 5.6 Subagent Orphan Recovery with Idempotent Resume +The `abortedLastRun` flag pattern is elegant: +- Flag is set when a run is interrupted +- Flag is only cleared after confirmed successful resume +- If resume fails, the flag persists for the next restart attempt +- `resumedSessionKeys` set prevents duplicate resumptions within a single recovery cycle +AGH should adopt this for its subprocess agent sessions. + +### 5.7 Run Loop Hard Cap with Failover Escalation +The escalating retry strategy (rotate auth profile -> fallback model -> error payload) with a hard iteration cap prevents infinite retry loops while maximizing recovery chances. AGH should implement a similar escalation ladder for its session retry logic. + +### 5.8 Grace Window for Transient Disconnects +The 5-second grace window for gateway disconnects, with `agent.wait` reconciliation on reconnect, prevents unnecessary session failures during transient network issues. AGH should adopt a similar pattern for its HTTP/SSE and UDS connections. + +### 5.9 Before-Tool-Call Hook for Loop Detection Integration +OpenClaw integrates loop detection into the tool execution pipeline via a `before_tool_call` hook that wraps each tool. This is cleaner than checking at the orchestration level because it catches all tool calls regardless of how they were initiated. AGH's hooks system could provide a similar injection point. + +### 5.10 Unhandled Stop Reason Recovery +Wrapping LLM streams to catch and normalize unknown stop reasons prevents crashes from provider-specific behaviors. AGH's ACP client should implement similar defensive normalization when parsing agent subprocess responses. diff --git a/.compozy/tasks/session-resilience/analysis_openfang.md b/.compozy/tasks/session-resilience/analysis_openfang.md new file mode 100644 index 000000000..a5b5b715c --- /dev/null +++ b/.compozy/tasks/session-resilience/analysis_openfang.md @@ -0,0 +1,422 @@ +# OpenFang: Session Resilience Analysis + +## 1. Stop Reason Taxonomy + +### LLM-Level Stop Reasons + +OpenFang defines a minimal 4-variant `StopReason` enum at the LLM protocol level (`crates/openfang-types/src/message.rs:207-216`): + +```rust +pub enum StopReason { + EndTurn, // Model finished its turn naturally + ToolUse, // Model wants to call a tool + MaxTokens, // Model hit the output token limit + StopSequence, // Model hit a configured stop sequence +} +``` + +This is a **wire-level enum** that maps 1:1 to LLM API responses. It intentionally does NOT encode application-level stop reasons like "budget exceeded" or "user cancelled" -- those are expressed at higher layers. + +### Application-Level Stop Reasons (Implicit Taxonomy) + +OpenFang does NOT have an explicit `SessionStopReason` enum. Instead, stop reasons are distributed across error types, hook metadata, and loop exit paths. Reconstructing the full taxonomy from code: + +| Stop Reason | How It's Expressed | Source File | +|---|---|---| +| **Completed naturally** | `StopReason::EndTurn` with non-empty text; `AgentLoopResult` returned with `silent: false` | `agent_loop.rs:464` | +| **Silent completion** | `NO_REPLY` / `[SILENT]` token detected; `AgentLoopResult.silent = true` | `agent_loop.rs:475-496` | +| **Max iterations exceeded** | `OpenFangError::MaxIterationsExceeded(u32)` error returned | `agent_loop.rs:968`, `error.rs:76` | +| **Max continuations (token limit)** | After `MAX_CONTINUATIONS` (5) consecutive `MaxTokens` responses, returns partial | `agent_loop.rs:898-937` | +| **Circuit breaker (loop guard)** | `OpenFangError::Internal(msg)` with "Circuit breaker" reason | `agent_loop.rs:685-704` | +| **Tool call blocked** | `LoopGuardVerdict::Block` -- individual tool skipped, loop continues | `agent_loop.rs:706-714` | +| **Rate limited** | `OpenFangError::LlmDriver("Rate limited after N retries")` | `agent_loop.rs:1019-1022` | +| **Overloaded** | `OpenFangError::LlmDriver("Model overloaded after N retries")` | `agent_loop.rs:1038-1041` | +| **Auth/billing failure** | `OpenFangError::LlmDriver(sanitized_msg)` -- non-retryable | `llm_errors.rs` | +| **Context overflow** | `LlmErrorCategory::ContextOverflow` -- triggers recovery pipeline | `context_overflow.rs`, `llm_errors.rs:32` | +| **Model not found** | `LlmErrorCategory::ModelNotFound` -- triggers fallback chain | `agent_loop.rs:978-984` | +| **Quota exceeded** | `OpenFangError::QuotaExceeded(msg)` -- hourly/daily/monthly | `metering.rs:27-60` | +| **Shutdown in progress** | `OpenFangError::ShuttingDown` | `error.rs:79` | +| **Agent crashed** | `AgentState::Crashed` -- tracked by supervisor | `agent.rs:185` | +| **Tool timeout** | Individual tool returns error after `TOOL_TIMEOUT_SECS` (120s) or `AGENT_TOOL_TIMEOUT_SECS` (600s) | `agent_loop.rs:44-58` | +| **Hook blocked** | Hook `fire()` returns `Err(reason)` -- tool call skipped | `agent_loop.rs:745-757` | +| **Approval denied** | Tool requires human approval, was denied -- tool skipped with guidance | `agent_loop.rs:850-863` | +| **Max restarts exceeded** | `Supervisor.record_agent_restart()` returns `Err(count)` | `supervisor.rs:79-95` | +| **Unresponsive (heartbeat)** | Agent inactive > 2x heartbeat interval -- flagged for recovery | `heartbeat.rs:186` | + +### Hook Metadata for Stop Reasons + +The `AgentLoopEnd` hook fires with structured JSON `data` containing a `reason` field. Known values: +- `"circuit_break"` -- loop guard circuit breaker fired +- `"max_continuations"` -- hit MAX_CONTINUATIONS limit +- `"max_iterations_exceeded"` -- hit max_iterations limit +- (absent/normal) -- completed successfully + +### Agent Lifecycle States + +The `AgentState` enum (`agent.rs:173-186`) tracks macro-level agent health: + +```rust +pub enum AgentState { + Created, // Not yet started + Running, // Active + Suspended, // Paused + Terminated, // Permanently stopped + Crashed, // Awaiting recovery +} +``` + +### LLM Error Classification + +OpenFang has a sophisticated 8-category error classifier (`llm_errors.rs`) that parses raw LLM API errors using pattern matching against 19+ provider error formats: + +```rust +pub enum LlmErrorCategory { + RateLimit, // 429, quota exceeded + Overloaded, // 503, high demand + Timeout, // Network failures + Billing, // 402, insufficient credits + Auth, // 401/403, invalid key + ContextOverflow, // Context window exceeded + Format, // Malformed request + ModelNotFound, // Unknown model +} +``` + +Each category has `is_retryable` and `is_billing` flags. RateLimit, Overloaded, and Timeout are retryable; the rest are not. + +--- + +## 2. Session Repair on Resume + +OpenFang has a **dedicated session repair module** (`crates/openfang-runtime/src/session_repair.rs`) that validates and fixes message history before every LLM call -- not just on resume. This is the primary consistency mechanism. + +### When Repair Runs + +Session repair runs at **three distinct points** in every agent loop iteration: + +1. **Before the initial LLM call** (`agent_loop.rs:319`): `validate_and_repair(&llm_messages)` -- cleans the full message history +2. **After context overflow recovery** (`agent_loop.rs:389`): Re-validates after draining old messages, which may have split ToolUse/ToolResult pairs +3. **After silent failure retry** (`agent_loop.rs:519`): Re-validates if the LLM returned 0 input tokens (indicating broken tool pairing) + +### Repair Phases (Ordered Pipeline) + +The repair pipeline runs 5 phases in strict order (`session_repair.rs:52-197`): + +**Phase 1 -- Collect ToolUse IDs**: Builds a `HashSet` of all `tool_use_id` values from assistant messages. + +**Phase 2 -- Filter orphans and empties**: +- Drops `ToolResult` blocks whose `tool_use_id` has no matching `ToolUse` anywhere in history +- Drops empty messages (empty text or all blocks filtered out) +- Tracks stats: `orphaned_results_removed`, `empty_messages_removed` + +**Phase 2b -- Reorder misplaced ToolResults** (`session_repair.rs:204-339`): +- Builds a `tool_use_id -> assistant_msg_index` map +- For each user message containing ToolResults, checks if it immediately follows the correct assistant message +- If misplaced, moves the ToolResult to the correct position (insert after the assistant message containing the matching ToolUse) +- Handles edge cases: appending to existing user messages, creating new user messages + +**Phase 2c -- Deduplicate ToolResults** (`session_repair.rs:449-476`): +- Keeps only the first `ToolResult` for each `tool_use_id` +- Critical ordering: dedup runs BEFORE synthetic insertion (regression fix for issue #1013 -- Moonshot provider reuses `tool_use_id` values like `memory_store:0` across turns) + +**Phase 2d -- Synthetic error results** (`session_repair.rs:352-438`): +- Counts ToolUse vs ToolResult occurrences per ID (not just presence -- handles providers that reuse IDs) +- For any orphaned ToolUse (no matching ToolResult), inserts a synthetic error result: `"[Tool execution was interrupted or lost]"` with `is_error: true` +- Inserts immediately after the assistant message containing the orphaned ToolUse + +**Phase 2e -- Remove aborted assistant messages** (`session_repair.rs:483-519`): +- Detects assistant messages with empty content (blank text or no blocks) that indicate interrupted tool-use +- Removes these to prevent broken state from propagating + +**Phase 3 -- Merge consecutive same-role messages** (`session_repair.rs:164-176`): +- The Anthropic API requires strict user/assistant alternation +- Merges consecutive messages with the same role by appending content blocks + +### Repair Statistics + +The repair returns a `RepairStats` struct tracking every fix applied: + +```rust +pub struct RepairStats { + pub orphaned_results_removed: usize, + pub empty_messages_removed: usize, + pub messages_merged: usize, + pub results_reordered: usize, + pub synthetic_results_inserted: usize, + pub duplicates_removed: usize, +} +``` + +This is logged as a structured warning whenever any repair was needed. + +### Additional Repair: Tool Result Sanitization + +`strip_tool_result_details()` (`session_repair.rs:542-561`) sanitizes tool output before feeding it back to the LLM: +- Truncates to 10K chars max +- Strips base64 blobs >1000 chars (replaces with placeholder) +- Removes prompt injection markers (`<|im_start|>`, `<>`, `IGNORE PREVIOUS INSTRUCTIONS`, etc.) + +### Heartbeat Pruning + +`prune_heartbeat_turns()` (`session_repair.rs:650-696`) removes `NO_REPLY` / `[no reply needed]` heartbeat turns from session history to save context budget. Keeps the last `keep_recent` messages intact. + +### Context Overflow Recovery Pipeline + +A separate 4-stage recovery pipeline (`context_overflow.rs`) handles sessions that grow too large: + +| Stage | Trigger | Action | +|---|---|---| +| 1 | 70-90% of context window | Moderate trim: keep last 10 messages | +| 2 | >90% of context window | Aggressive trim: keep last 4 messages + summary marker | +| 3 | Still over after stage 2 | Truncate all historical tool results to 2K chars | +| 4 | Still over after stage 3 | Return `FinalError` -- suggest `/reset` or `/compact` | + +The `safe_drain_boundary()` function ensures draining doesn't split ToolUse/ToolResult pairs across the boundary. + +### Interim Saves (Crash Protection) + +The agent loop performs **interim saves after every tool execution round** (`agent_loop.rs:893`): +```rust +// Interim save after tool execution to prevent data loss on crash +if let Err(e) = memory.save_session_async(session).await { + warn!("Failed to interim-save session: {e}"); +} +``` + +It also saves before returning on max iterations exceeded (`agent_loop.rs:950`), max continuations (`agent_loop.rs:908`), and circuit breaker (`agent_loop.rs:688`). + +### Graceful Shutdown Sequence + +The `ShutdownCoordinator` (`graceful_shutdown.rs`) enforces an ordered 10-phase shutdown: + +1. Running -> Draining (stop new requests) +2. Broadcasting shutdown to WebSocket clients +3. Waiting for in-flight agent loops (with `agent_timeout`: 60s default) +4. Closing browser sessions +5. Closing MCP connections +6. Stopping background tasks +7. Flushing audit log +8. Closing database connections +9. Complete + +Each phase has configurable timeouts: `drain_timeout` (30s), `agent_timeout` (60s), `total_timeout` (120s). + +### Crash Recovery via Heartbeat + +The heartbeat monitor (`heartbeat.rs`) detects crashed/unresponsive agents: +- Checks every 30s (configurable) +- Agent considered unresponsive after 2x its heartbeat interval (default: 180s timeout) +- Crashed agents get auto-recovery attempts up to `max_recovery_attempts` (default: 3) +- Recovery has a cooldown between attempts (default: 60s) +- After exhausting recovery attempts, agent is marked `Terminated` +- Idle agents (never processed a message) are skipped to prevent false crash-recover loops + +The `Supervisor` (`supervisor.rs`) tracks per-agent restart counts and enforces `max_restarts` limits (default: 10 from `AutonomousConfig`). + +--- + +## 3. Loop/Recursion Guards + +### LoopGuard (Primary Loop Detection) + +The `LoopGuard` (`crates/openfang-runtime/src/loop_guard.rs`) is the most sophisticated loop detection system I've found in any agent harness. It tracks tool calls within a single agent loop execution using SHA-256 hashes. + +**Configuration defaults** (`LoopGuardConfig`, line 56-68): + +| Parameter | Default | Purpose | +|---|---|---| +| `warn_threshold` | 3 | Identical calls before warning appended to result | +| `block_threshold` | 5 | Identical calls before call is blocked (skipped) | +| `global_circuit_breaker` | 30 | Total tool calls before entire loop is killed | +| `poll_multiplier` | 3 | Multiplier for poll tool thresholds (e.g., effective block = 15) | +| `outcome_warn_threshold` | 2 | Identical call+result pairs before warning | +| `outcome_block_threshold` | 3 | Identical call+result pairs before auto-block | +| `ping_pong_min_repeats` | 3 | Pattern repeats before ping-pong blocking | +| `max_warnings_per_call` | 3 | Warnings per call hash before upgrading to Block | + +**Four verdict levels** (`LoopGuardVerdict`): + +```rust +pub enum LoopGuardVerdict { + Allow, // Proceed normally + Warn(String), // Proceed, but append warning to tool result + Block(String), // Skip this tool call + CircuitBreak(String), // Kill the entire agent loop +} +``` + +### Detection Strategies + +**1. Simple repetition detection** (lines 146-218): +- SHA-256 hash of `(tool_name, serialized_params)` -- deterministic because serde_json sorts object keys +- Per-hash count tracked in `HashMap` +- Graduated response: Allow -> Warn (at threshold 3) -> Block (at threshold 5) + +**2. Outcome-aware detection** (lines 251-281): +- After tool execution, hashes `(tool_name | params_json | result_truncated_1000)` -- the result is truncated to 1000 chars +- If the same call produces the same result 2 times: warning +- If 3 times: the call hash is added to a `blocked_outcomes` set, auto-blocking the NEXT `check()` call +- This catches loops where the agent retries the same failing operation + +**3. Ping-pong detection** (lines 362-498): +- Maintains a ring buffer of last 30 call hashes +- Detects A-B-A-B alternating patterns (checks last 6 entries for 3 repeats of length 2) +- Detects A-B-C-A-B-C cycling patterns (checks last 9 entries for 3 repeats of length 3) +- Below `ping_pong_min_repeats`: warns. At or above: blocks +- Uses separate warning bucket key (`pingpong_{hash}`) to track ping-pong warnings independently + +**4. Warning bucket / escalation** (lines 206-214): +- Tracks how many warnings have been emitted per call hash +- After `max_warnings_per_call` (3) warnings for the same call, upgrades to Block +- Prevents the agent from ignoring repeated warnings + +**5. Poll tool handling** (lines 334-360): +- `POLL_TOOLS` list: `["shell_exec"]` +- A call is considered "polling" if the tool is in POLL_TOOLS AND the params contain status/poll/wait/watch/tail/ps/docker/kubectl keywords +- Generic poll detection: params JSON containing "status", "poll", or "wait" +- Poll calls get relaxed thresholds: effective_warn = 9, effective_block = 15 + +**6. Backoff suggestions** (lines 287-304): +- For poll calls, suggests increasing delays: 5s, 10s, 30s, 60s (capped at 60s) +- Returns `Option` in milliseconds; no backoff on first call + +### Max Iterations Guard + +Defined at the agent loop level (`agent_loop.rs:35`): +- `MAX_ITERATIONS = 50` (constant default) +- Overridable per-agent via `AutonomousConfig.max_iterations` (default: 50, checked at line 355) +- The loop guard's `global_circuit_breaker` is scaled up to `max_iterations * 3` for autonomous agents (line 363-367) +- When exceeded: session is saved, `AgentLoopEnd` hook fires with `reason: "max_iterations_exceeded"`, returns `OpenFangError::MaxIterationsExceeded` + +### Max Continuations Guard + +For `StopReason::MaxTokens` responses (`agent_loop.rs:898-945`): +- `MAX_CONTINUATIONS = 5` +- Consecutive MaxTokens responses are counted +- Under the limit: partial response added, "Please continue." appended, loop continues +- At the limit: returns partial response, fires hook with `reason: "max_continuations"` +- Counter resets on any ToolUse response (line 658) + +### Context Window Guard + +- `MAX_HISTORY_MESSAGES = 20` (`agent_loop.rs:66`) -- hard safety valve for message count +- Context budget system (`context_budget.rs`) for dynamic tool result truncation +- 4-stage context overflow recovery pipeline (see Section 2) + +### Phantom Action Detection + +`phantom_action_detected()` (`agent_loop.rs:71-87`) catches when the LLM claims to have performed an action (sent, posted, emailed) without actually calling any tools. This prevents hallucinated completions where the model fabricates task completion. + +Detection: text contains action verbs ("sent", "posted", "emailed") AND channel references ("telegram", "slack", "discord"). If detected on iteration 0 with no tools executed, the agent is re-prompted: + +``` +[System: You claimed to perform an action but did not call any tools. +You must use the appropriate tool to actually perform the action.] +``` + +### Tool Error Fabrication Prevention + +After tool errors, two guidance injections prevent the agent from fabricating results: + +1. **TOOL_ERROR_GUIDANCE** (`agent_loop.rs:97-98`): Injected when any tool returns `is_error: true`. Tells the agent NOT to invent missing results or pretend failed tools succeeded. + +2. **Non-denial error guidance** (`agent_loop.rs:872-882`): Separate guidance for non-approval-related errors, instructing the agent to report errors honestly. + +### Approval Denial Loop Prevention + +When tools are denied by approval policy (`agent_loop.rs:850-863`), the agent receives guidance to NOT retry denied tools, preventing an infinite retry loop where the agent keeps asking to execute denied operations. + +### Inter-Agent Recursion Limits + +- `AGENT_TOOL_TIMEOUT_SECS = 600` (10 minutes) for `agent_send` / `agent_spawn` tool calls +- Each agent has its own `max_iterations` limit +- The supervisor enforces `max_restarts` per agent (default: 10) +- No explicit recursion depth counter across nested agent calls + +### Provider-Level Circuit Breaker + +`ProviderCooldown` (`auth_cooldown.rs`) prevents request storms to failing providers: +- Three verdicts: `Allow`, `AllowProbe`, `Reject { reason, retry_after_secs }` +- Records successes and failures per provider +- After repeated failures, rejects requests with a cooldown period +- Periodically allows probe requests to test recovery + +--- + +## 4. Key Code References + +| Component | File | Key Lines | +|---|---|---| +| StopReason enum | `crates/openfang-types/src/message.rs` | 207-216 | +| AgentState enum | `crates/openfang-types/src/agent.rs` | 173-186 | +| Error taxonomy | `crates/openfang-types/src/error.rs` | 7-101 | +| LLM error classifier | `crates/openfang-runtime/src/llm_errors.rs` | 19-37 (categories), 241-392 (classifier) | +| Agent loop | `crates/openfang-runtime/src/agent_loop.rs` | 173-968 (main loop) | +| Loop guard | `crates/openfang-runtime/src/loop_guard.rs` | 1-949 (entire file) | +| Session repair | `crates/openfang-runtime/src/session_repair.rs` | 1-1409 (entire file) | +| Context overflow recovery | `crates/openfang-runtime/src/context_overflow.rs` | 117-222 (pipeline) | +| Graceful shutdown | `crates/openfang-runtime/src/graceful_shutdown.rs` | 1-443 (entire file) | +| Supervisor | `crates/openfang-kernel/src/supervisor.rs` | 1-228 | +| Heartbeat monitor | `crates/openfang-kernel/src/heartbeat.rs` | 1-546 | +| Metering/quotas | `crates/openfang-kernel/src/metering.rs` | 1-810 | +| Resource quotas | `crates/openfang-types/src/agent.rs` | 248-282 | +| Autonomous config | `crates/openfang-types/src/agent.rs` | 70-95 | + +--- + +## 5. Patterns Worth Adopting + +### High Priority + +**1. Session repair as a pre-flight check, not just a recovery mechanism** +OpenFang runs `validate_and_repair()` before EVERY LLM call, not just on resume. This is a defensive programming pattern that catches corruption from any source (compaction bugs, provider quirks, crash recovery). AGH should do the same -- validate event replay output before sending to the ACP agent. + +**2. Multi-strategy loop guard with graduated response** +The 4-verdict system (Allow/Warn/Block/CircuitBreak) is more nuanced than a simple iteration counter. Key innovations: +- **Outcome-aware detection**: tracking that identical calls produce identical results is far more useful than just counting call repetitions +- **Ping-pong detection**: A-B-A-B and A-B-C-A-B-C patterns evade simple per-call counting +- **Warning escalation**: warnings upgrade to blocks after `max_warnings_per_call`, preventing agents from ignoring warnings indefinitely +- **Poll tool exemptions**: status-checking tools get relaxed thresholds with backoff suggestions + +**3. Interim saves after every tool execution round** +This is critical for crash resilience. If the process dies mid-loop, the session history up to the last completed tool round is preserved. AGH should persist events after each tool execution, not just at loop end. + +**4. Synthetic error results for interrupted tool calls** +When a ToolUse has no matching ToolResult (crash/interrupt), OpenFang inserts `[Tool execution was interrupted or lost]` with `is_error: true`. This prevents LLM API validation errors and gives the model a signal that something went wrong. AGH needs this for ACP event replay. + +### Medium Priority + +**5. 8-category LLM error classification** +The pattern-matching classifier across 19+ providers is reusable. AGH should classify ACP agent errors into retryable vs. non-retryable categories, with provider-specific pattern tables for Claude Code, Codex, Gemini CLI, etc. + +**6. Ordered shutdown phases with observability** +The 10-phase `ShutdownCoordinator` with timing logs and WS broadcast is a clean pattern. AGH's daemon shutdown should follow a similar sequence: stop accepting -> drain in-flight sessions -> save state -> close stores -> exit. + +**7. Phantom action detection** +Detecting when the LLM claims to have performed an action without tool calls is a clever anti-hallucination guard. AGH could track "claimed completions" vs "actual tool invocations" and flag discrepancies. + +**8. Tool result sanitization (injection prevention)** +Stripping prompt injection markers from tool output before feeding back to the LLM is important for security. AGH should sanitize ACP event content, especially from agents that execute shell commands. + +### Lower Priority (But Worth Noting) + +**9. Context overflow recovery pipeline** +The 4-stage progressive recovery (moderate trim -> aggressive trim -> truncate tool results -> error) is better than a single emergency action. AGH should implement similar staged recovery for sessions approaching context limits. + +**10. Provider circuit breaker** +The `ProviderCooldown` with probe requests prevents request storms to failing LLM providers. AGH could use this pattern for ACP agent subprocess management -- if an agent keeps crashing, back off before respawning. + +**11. Heartbeat-driven crash recovery with idle agent detection** +The `never_active` grace period (agents that were spawned but never received a message are NOT flagged as unresponsive) prevents false crash-recovery loops. AGH should implement similar logic for session health monitoring. + +### What's Missing in OpenFang (Gaps AGH Can Fill) + +1. **No explicit `SessionStopReason` enum** -- stop reasons are scattered across error types, hook metadata, and code paths. AGH should have a single, canonical enum. + +2. **No cross-agent recursion depth tracking** -- if Agent A calls Agent B calls Agent A, only timeouts prevent infinite recursion. AGH should track delegation depth. + +3. **No session-level budget tracking** -- metering is per-agent-per-time-window. There's no "this session has spent $X" limit. AGH should support per-session cost caps. + +4. **No structured resume protocol** -- session repair is purely message-level. There's no "last known good state" checkpoint or resumption protocol. AGH can design a proper checkpoint system with the event store. + +5. **No user cancellation handling** -- there's no explicit `UserCancelled` stop reason or graceful cancellation of an in-flight agent loop (only SIGTERM-level shutdown). AGH should support per-session cancellation via context. diff --git a/.compozy/tasks/session-resilience/analysis_pi_mono.md b/.compozy/tasks/session-resilience/analysis_pi_mono.md new file mode 100644 index 000000000..7d2432f90 --- /dev/null +++ b/.compozy/tasks/session-resilience/analysis_pi_mono.md @@ -0,0 +1,375 @@ +# Pi-Mono: Session Resilience Analysis + +Pi-Mono (authored by Mario Zechner, aka "badlogic") is a TypeScript monorepo containing `pi-agent-core` (generic agent framework), `pi-ai` (LLM streaming library), and `pi-coding-agent` (the coding agent CLI). This analysis covers session resilience across all three layers. + +--- + +## 1. Stop Reason Taxonomy + +### Core StopReason Type (pi-ai layer) + +Defined in `packages/ai/src/types.ts:182`: + +```typescript +export type StopReason = "stop" | "length" | "toolUse" | "error" | "aborted"; +``` + +| StopReason | Meaning | When Produced | +|---|---|---| +| `"stop"` | Natural completion -- the model finished its response | LLM returns a normal stop signal | +| `"length"` | Max tokens reached -- output was truncated | Model hit `maxTokens` limit | +| `"toolUse"` | Tool call requested -- assistant wants to invoke a tool | Model emits one or more `toolCall` content blocks | +| `"error"` | Runtime/API error -- request failed | Network errors, rate limits, overloaded servers, context overflow, auth failures | +| `"aborted"` | User/system cancellation -- the stream was aborted | `AbortSignal` triggered (Ctrl+C, abort button, programmatic cancel) | + +### Stream Event Protocol (pi-ai layer) + +The stream event discriminator in `packages/ai/src/types.ts:237-249` splits terminal events into two categories: + +```typescript +| { type: "done"; reason: Extract; message: AssistantMessage } +| { type: "error"; reason: Extract; error: AssistantMessage } +``` + +This means the stream protocol already separates "success with variants" from "failure" at the type level. A `done` event with `reason: "length"` is still considered a success (truncated but usable). An `error` event always carries an `AssistantMessage` with `errorMessage` populated. + +### Agent-Level Stop Semantics (pi-agent-core layer) + +In `packages/agent/src/agent-loop.ts:194`, the agent loop checks stop reasons to decide whether to continue: + +```typescript +if (message.stopReason === "error" || message.stopReason === "aborted") { + await emit({ type: "turn_end", message, toolResults: [] }); + await emit({ type: "agent_end", messages: newMessages }); + return; // Terminate the loop +} +``` + +The loop only continues if `stopReason` is `"stop"`, `"length"`, or `"toolUse"`. For `"toolUse"`, it enters tool execution. For `"stop"` or `"length"` without tool calls, it checks for steering/follow-up messages before exiting. + +When the agent loop throws an unhandled error (not from the stream), the `Agent` class synthesizes a failure message in `packages/agent/src/agent.ts:459-474`: + +```typescript +const failureMessage = { + role: "assistant", + stopReason: aborted ? "aborted" : "error", + errorMessage: error instanceof Error ? error.message : String(error), + // ... +}; +``` + +### Session-Level Error Classification (pi-coding-agent layer) + +`AgentSession` in `packages/coding-agent/src/core/agent-session.ts` adds a higher-level classification on top of stop reasons: + +1. **Retryable errors** (`_isRetryableError`, line 2381): Errors matching patterns like `overloaded`, `rate_limit`, `429`, `500-504`, `timeout`, `connection_error`, etc. are automatically retried. + +2. **Context overflow errors** (`isContextOverflow` in `packages/ai/src/utils/overflow.ts`): A dedicated subsystem detects context window exceeded errors across 18+ provider-specific patterns. These are NOT retried -- instead they trigger automatic compaction. + +3. **Non-retryable errors**: All other errors (auth failures, malformed requests, etc.) are surfaced to the user. + +4. **User cancellation** (`"aborted"`): Skipped by both retry and compaction logic. + +### Print Mode Exit Codes + +In `packages/coding-agent/src/modes/print-mode.ts:111-123`, the exit code is derived from the stop reason: + +```typescript +if (assistantMsg.stopReason === "error" || assistantMsg.stopReason === "aborted") { + exitCode = 1; +} else { + // Success: exitCode = 0 +} +``` + +### What Pi-Mono Does NOT Have + +- **No "budget exceeded" stop reason**: There is no cost/budget limit enforcement. The system tracks `Usage.cost` per message but never enforces a cap. +- **No "max iterations" stop reason**: The agent loop has no iteration counter or max-turns limit in production code (only in tests). +- **No "loop detected" stop reason**: There is no cycle detection or loop guard at the framework level. +- **No "completed" vs "paused" distinction**: A `"stop"` reason means the model finished, but there is no semantic encoding of "task complete" vs "gave up" vs "waiting for input". + +--- + +## 2. Session Repair on Resume + +### Session Storage Format + +Sessions are stored as JSONL files (one JSON object per line). Each file begins with a `SessionHeader`: + +```typescript +interface SessionHeader { + type: "session"; + version?: number; // Currently version 3 + id: string; + timestamp: string; + cwd: string; + parentSession?: string; +} +``` + +Every subsequent line is a `SessionEntry` with `id` and `parentId` forming a tree structure (not a flat list). The `leafId` pointer tracks the current position in the tree. + +### Resume Flow: `setSessionFile()` + +When resuming a session (`SessionManager.open()` or `SessionManager.continueRecent()`), the key repair logic is in `packages/coding-agent/src/core/session-manager.ts:695-723`: + +```typescript +setSessionFile(sessionFile: string): void { + this.sessionFile = resolve(sessionFile); + if (existsSync(this.sessionFile)) { + this.fileEntries = loadEntriesFromFile(this.sessionFile); + + // REPAIR: If file was empty or corrupted (no valid header), + // truncate and start fresh + if (this.fileEntries.length === 0) { + const explicitPath = this.sessionFile; + this.newSession(); + this.sessionFile = explicitPath; + this._rewriteFile(); + this.flushed = true; + return; + } + + // Extract header, apply migrations + const header = this.fileEntries.find(e => e.type === "session"); + this.sessionId = header?.id ?? randomUUID(); + + if (migrateToCurrentVersion(this.fileEntries)) { + this._rewriteFile(); // Persist migration results + } + + this._buildIndex(); + this.flushed = true; + } else { + // File doesn't exist -- create new session + const explicitPath = this.sessionFile; + this.newSession(); + this.sessionFile = explicitPath; + } +} +``` + +### Repair Checks Performed + +1. **Malformed line recovery** (`loadEntriesFromFile`, line 433-458): + - Parses each line independently with `JSON.parse()` inside a try/catch + - Malformed lines are silently skipped (partial writes from crashes survive) + - Validates the first entry is a `SessionHeader` with a valid `id` field + - Returns empty array if header is missing/invalid + +2. **Empty/corrupted file recovery** (`setSessionFile`, line 699-706): + - If `loadEntriesFromFile` returns no entries, the file is treated as corrupted + - A fresh session is created and the file is overwritten + - The original file path is preserved (user's `--session` flag is honored) + +3. **Version migration** (`migrateToCurrentVersion`, line 261-271): + - v1 -> v2: Adds `id`/`parentId` tree structure to flat entries + - v2 -> v3: Renames `hookMessage` role to `custom` + - Migrations mutate entries in-place, then the file is rewritten + +4. **Orphan handling** (`getTree`, line 1070-1108): + - When building the tree, entries with broken `parentId` chains are treated as roots + - This handles partial writes where a child was written but the parent was not + +5. **Model/state restoration** (`createAgentSession` in `packages/coding-agent/src/core/sdk.ts:190-342`): + - Reads existing session context via `sessionManager.buildSessionContext()` + - Attempts to restore the model from the session's `model_change` entries + - If the model is no longer available (removed provider, expired auth), falls back to `findInitialModel()` + - Thinking level is restored from `thinking_level_change` entries, or defaults + - Messages are replayed into the agent via `agent.state.messages = existingSession.messages` + +6. **Lazy file creation** (`_persist`, line 796-814): + - Session files are not written until the first assistant message arrives + - This prevents clutter from sessions that never got a response (crash during first prompt) + - On resume after crash, this means partially-initialized sessions leave no trace + +### What Pi-Mono Does NOT Do on Resume + +- **No WAL or journaling**: The JSONL is append-only but has no write-ahead log. A crash mid-write can leave a partially-written last line, which is handled by the per-line try/catch but means the last entry may be lost. +- **No lock file for concurrent access**: No check for whether another process has the session open. +- **No integrity checksums**: No CRC or hash verification of entries. +- **No "dirty state" detection**: No mechanism to detect if tools were mid-execution when the crash occurred. Tool results that were never written are simply missing from the resumed session. +- **No pending-tool-call recovery**: If the agent crashed while executing a tool call, the resumed session will have the assistant message with `toolCall` blocks but no corresponding `toolResult` entries. The LLM simply sees missing tool results and works around it. + +--- + +## 3. Loop/Recursion Guards + +### The Notable Absence + +**Pi-Mono has no built-in loop detection, iteration limits, or recursion depth guards in production code.** The agent loop (`packages/agent/src/agent-loop.ts`) is an unbounded `while(true)` loop: + +```typescript +// Outer loop: continues when queued follow-up messages arrive +while (true) { + let hasMoreToolCalls = true; + + // Inner loop: process tool calls and steering messages + while (hasMoreToolCalls || pendingMessages.length > 0) { + // ... stream assistant, execute tools, check steering + } + + // Check for follow-up messages + const followUpMessages = (await config.getFollowUpMessages?.()) || []; + if (followUpMessages.length > 0) { + pendingMessages = followUpMessages; + continue; + } + break; +} +``` + +The only exits from this loop are: +1. **Error/abort**: `stopReason === "error" || "aborted"` terminates immediately +2. **No tool calls + no steering + no follow-ups**: Natural exit when the model says "stop" and no queued work remains +3. **AbortSignal**: User cancellation via Ctrl+C + +### Guards That DO Exist (Indirect) + +1. **Context overflow as implicit iteration limit** (`_checkCompaction` in agent-session.ts:1739-1817): + - When context usage exceeds `contextWindow - reserveTokens` (default: context window minus 16,384 tokens), auto-compaction is triggered + - If compaction + retry fails once, `_overflowRecoveryAttempted` is set to `true` and a second overflow terminates the loop + - This effectively caps session length but not iteration count + +2. **Auto-retry cap** (agent-session.ts:2396-2472): + - Retryable errors have exponential backoff: `baseDelayMs * 2^(attempt-1)` (default: 2s, 4s, 8s) + - Max retries: 3 (configurable via `settings.retry.maxRetries`) + - Max delay cap: 60,000ms per retry + - After max retries, the error is surfaced to the user + +3. **`beforeToolCall` hook** (agent/src/types.ts:42-49): + ```typescript + interface BeforeToolCallResult { + block?: boolean; + reason?: string; + } + ``` + Extensions can block individual tool executions. This is the ONLY hook point for implementing custom loop guards -- an extension could count tool calls per session and block after a threshold. + +4. **Tool validation** (agent-loop.ts:479-522): + - Unknown tools produce an immediate error result (not a loop terminator) + - Schema validation failures produce error results + - These feed back into the LLM as error tool results, which may cause the model to retry the same tool -- potentially creating a loop + +5. **Abort mechanism** (agent.ts:285-287): + - `Agent.abort()` triggers the abort controller + - The signal is threaded through to stream functions and tool execution + - This is the user's manual circuit breaker + +### What Is NOT Guarded + +- **No max-turns/max-iterations limit**: An agent can loop indefinitely through tool calls as long as the context window holds +- **No tool-call cycle detection**: If the model calls `read -> edit -> read -> edit` in a cycle, nothing detects or breaks it +- **No cost budget enforcement**: Token costs accumulate without any cap +- **No wall-clock timeout**: No maximum runtime for a session or prompt +- **No recursion depth tracking**: Subagent spawning (via extensions) has no depth limit +- **No repeated-failure detection**: If the same tool fails 100 times with the same error, the loop continues (the model will eventually run into context overflow) + +### Test-Only Guards + +In test files, manual limits are used to prevent infinite loops: + +```typescript +// packages/ai/test/stream.test.ts:286 +const maxTurns = 5; // Prevent infinite loops + +// packages/coding-agent/test/sdk-codex-cache-probe-tool-loop.ts:44 +const MAX_TURNS = 50; +``` + +These are NOT present in production code. + +--- + +## 4. Key Code References + +### Stop Reason / Error Handling + +| File | Lines | What | +|---|---|---| +| `packages/ai/src/types.ts` | 182 | `StopReason` type definition: `"stop" \| "length" \| "toolUse" \| "error" \| "aborted"` | +| `packages/ai/src/types.ts` | 237-249 | `AssistantMessageEvent` stream protocol with `done`/`error` discriminator | +| `packages/ai/src/utils/overflow.ts` | 28-131 | `isContextOverflow()` with 18 provider-specific regex patterns | +| `packages/agent/src/agent-loop.ts` | 155-232 | `runLoop()` -- the unbounded while(true) agent loop | +| `packages/agent/src/agent-loop.ts` | 194-198 | Error/abort termination check | +| `packages/agent/src/agent.ts` | 459-474 | Synthetic failure message creation for unhandled errors | +| `packages/coding-agent/src/core/agent-session.ts` | 112-129 | `AgentSessionEvent` extensions (compaction, retry events) | +| `packages/coding-agent/src/core/agent-session.ts` | 2381-2393 | `_isRetryableError()` with regex pattern matching | +| `packages/coding-agent/src/core/agent-session.ts` | 2396-2472 | `_handleRetryableError()` with exponential backoff | +| `packages/coding-agent/src/modes/print-mode.ts` | 111-123 | Exit code derivation from stop reason | + +### Session Persistence / Resume + +| File | Lines | What | +|---|---|---| +| `packages/coding-agent/src/core/session-manager.ts` | 29 | `CURRENT_SESSION_VERSION = 3` | +| `packages/coding-agent/src/core/session-manager.ts` | 433-458 | `loadEntriesFromFile()` with malformed-line recovery | +| `packages/coding-agent/src/core/session-manager.ts` | 695-723 | `setSessionFile()` -- repair logic for corrupted/empty files | +| `packages/coding-agent/src/core/session-manager.ts` | 210-271 | Migration pipeline (v1->v2->v3) | +| `packages/coding-agent/src/core/session-manager.ts` | 308-417 | `buildSessionContext()` -- tree traversal for context reconstruction | +| `packages/coding-agent/src/core/session-manager.ts` | 796-814 | `_persist()` -- lazy write with deferred-until-assistant-message guard | +| `packages/coding-agent/src/core/sdk.ts` | 169-364 | `createAgentSession()` factory with model/state restoration | + +### Auto-Compaction (Implicit Loop Guard) + +| File | Lines | What | +|---|---|---| +| `packages/coding-agent/src/core/agent-session.ts` | 1739-1817 | `_checkCompaction()` -- overflow and threshold detection | +| `packages/coding-agent/src/core/agent-session.ts` | 1822-1900 | `_runAutoCompaction()` -- compaction execution with extension hooks | +| `packages/coding-agent/src/core/compaction/compaction.ts` | 219-222 | `shouldCompact()` -- threshold calculation | +| `packages/coding-agent/src/core/settings-manager.ts` | 7-11 | `CompactionSettings` interface (enabled, reserveTokens, keepRecentTokens) | +| `packages/coding-agent/src/core/settings-manager.ts` | 18-23 | `RetrySettings` interface (enabled, maxRetries, baseDelayMs, maxDelayMs) | + +### Tool Blocking (Extension Hook Point) + +| File | Lines | What | +|---|---|---| +| `packages/agent/src/types.ts` | 42-49 | `BeforeToolCallResult` with `block` flag | +| `packages/agent/src/agent-loop.ts` | 491-508 | `beforeToolCall` hook invocation in `prepareToolCall()` | + +--- + +## 5. Patterns Worth Adopting + +### Adopt: Stream-Level Error Encoding + +Pi-Mono's principle that **failures must be encoded in the stream, not thrown** (`packages/ai/src/types.ts:120-128`) is excellent. This ensures that every agent loop iteration produces a well-formed `AssistantMessage` with a classifiable `stopReason`, regardless of whether the LLM call succeeded. AGH should ensure that ACP driver failures always produce a structured event rather than raw errors. + +### Adopt: Provider-Agnostic Overflow Detection + +The `isContextOverflow()` function with 18 provider-specific regex patterns is battle-tested infrastructure. AGH can port this pattern to detect context overflow across different ACP-compatible agents, translating provider-specific error strings into a canonical `stop_reason_overflow` enum value. + +### Adopt: Retryable Error Classification via Regex + +The single regex in `_isRetryableError()` that matches `overloaded|rate_limit|429|500|502|503|504|timeout|connection_error|...` is pragmatic and effective. AGH should have a similar classifier in its session state machine, but should make the patterns configurable per agent driver. + +### Adopt: JSONL Append-Only with Malformed-Line Recovery + +The JSONL format with per-line try/catch parsing is crash-resilient by design. A partial write corrupts at most one entry. AGH's SQLite approach is more robust overall, but AGH should ensure its event store handles partial-write corruption gracefully (SQLite's WAL already provides this, but the application should verify it). + +### Adopt: Deferred File Creation Until First Assistant Message + +Pi-Mono's pattern of not writing the session file until the first assistant message arrives prevents file clutter from failed/cancelled sessions. AGH should adopt this for session DB files -- don't create the per-session SQLite database until the first ACP event is received. + +### Adopt with Enhancement: Auto-Compaction Overflow Guard + +The `_overflowRecoveryAttempted` boolean that prevents infinite compact-and-retry loops is good but primitive. AGH should adopt the concept (compaction as implicit loop guard) but make it more explicit with a configurable max-compaction-retries setting. + +### DO NOT Adopt: Unbounded Agent Loop + +Pi-Mono's unbounded `while(true)` agent loop with no iteration limit is its most significant resilience gap. AGH MUST add: + +1. **Max turns per prompt** -- a hard cap on the number of LLM calls per `session.prompt()` invocation (e.g., 200 turns). Configurable per-session. +2. **Max tool calls per turn** -- a cap on tool calls within a single assistant response (most models already limit this, but a harness-side guard adds defense-in-depth). +3. **Wall-clock timeout per prompt** -- a maximum runtime for a single prompt execution (e.g., 30 minutes). +4. **Cost budget per session** -- a dollar-amount cap that terminates the session when exceeded. +5. **Repeated-failure circuit breaker** -- if the same tool fails N times consecutively with the same error pattern, break the loop. + +### DO NOT Adopt: Missing Tool Result on Resume + +Pi-Mono's resume behavior silently drops tool results that were mid-execution at crash time. The resumed session has assistant messages with `toolCall` blocks but no `toolResult` entries, leaving the LLM to infer what happened. AGH should instead inject synthetic "tool execution was interrupted by session crash" tool results on resume. + +### Consider: Tree-Based Session History + +Pi-Mono's tree structure (id/parentId on every entry) enables branching without duplicating files. This is elegant for interactive use but may be overengineered for AGH's daemon model. However, the concept of branching from a past point in the conversation (branch summarization, `branchWithSummary()`) is worth considering for AGH's session fork feature. diff --git a/.compozy/tasks/session-resilience/task_01.md b/.compozy/tasks/session-resilience/task_01.md new file mode 100644 index 000000000..4b362b9d2 --- /dev/null +++ b/.compozy/tasks/session-resilience/task_01.md @@ -0,0 +1,86 @@ +--- +status: pending +title: StopReason + StopCause types +type: backend +complexity: medium +dependencies: [] +--- + +# Task 01: StopReason + StopCause types + +## Overview + +Define the foundational types for session resilience: a `StopReason` enum in `internal/store` (co-located with `SessionMeta` to avoid import cycles) and a `StopCause` enum in `internal/session` that explicitly signals why a stop was requested. Extend `SessionMeta`, `Session`, and `SessionInfo` with stop reason fields. This task creates the data model that all subsequent tasks build on. + + +- ALWAYS READ the PRD and TechSpec before starting +- REFERENCE TECHSPEC for implementation details — do not duplicate here +- FOCUS ON "WHAT" — describe what needs to be accomplished, not how +- MINIMIZE CODE — show code only to illustrate current structure or problem areas +- TESTS REQUIRED — every task MUST include tests in deliverables + + + +- MUST define `StopReason` as a string type with 10 constants in `internal/store/types.go` +- MUST define `ValidStopReason()` function that returns true for valid enum members +- MUST add `StopReason *StopReason` and `StopDetail string` fields to `SessionMeta` with JSON tags +- MUST update `SessionMeta.Validate()` to reject invalid StopReason values (when non-nil) +- MUST define `StopCause` as an int type with 6 constants in `internal/session/stop_cause.go` +- MUST add `stopCause`, `stopReason`, `stopDetail` fields to `Session` struct +- MUST add `StopReason` and `StopDetail` fields to `session.SessionInfo` +- MUST update `Session.Info()` to include stop reason fields in the snapshot +- MUST update `Session.Meta()` to include stop reason fields in the meta output +- MUST update `ReadSessionMeta()` and `WriteSessionMeta()` to handle new fields + + +## Subtasks +- [ ] 1.1 Define `StopReason` type, 10 constants, and `ValidStopReason()` in `internal/store/types.go` +- [ ] 1.2 Add `StopReason`/`StopDetail` fields to `SessionMeta`, update `Validate()` +- [ ] 1.3 Define `StopCause` type and 6 constants in new file `internal/session/stop_cause.go` +- [ ] 1.4 Add stop fields to `Session` struct and update `Info()` and `Meta()` methods +- [ ] 1.5 Verify `ReadSessionMeta`/`WriteSessionMeta` round-trip with new fields +- [ ] 1.6 Write unit tests for all new types and validation + +## Implementation Details + +See TechSpec "Core Interfaces" and "Data Models" sections for exact type definitions and constant values. + +### Relevant Files +- `internal/store/types.go` — `SessionMeta` struct (line 287), `SessionInfo` struct (line 82), `Validate()` methods +- `internal/store/meta.go` — `ReadSessionMeta()`, `WriteSessionMeta()` for JSON persistence +- `internal/session/session.go` — `Session` struct (line 59), `SessionInfo` (line 45), `Info()` (line 86), `Meta()` (line 354) + +### Dependent Files +- `internal/store/globaldb/global_db_session.go` — will need schema updates (task 03) +- `internal/session/manager_lifecycle.go` — will use StopCause for classification (task 02) +- `internal/session/query.go` — will map StopReason from meta (task 03) + +### Related ADRs +- [ADR-001: Canonical StopReason Enum on SessionMeta](adrs/adr-001.md) — Type ownership in `internal/store`, explicit StopCause mechanism + +## Deliverables +- `StopReason` type with 10 constants and `ValidStopReason()` in `internal/store/types.go` +- `StopCause` type with 6 constants in `internal/session/stop_cause.go` +- Extended `SessionMeta`, `Session`, `SessionInfo` with stop reason fields +- Updated `Info()`, `Meta()`, `Validate()` methods +- Unit tests with 80%+ coverage **(REQUIRED)** + +## Tests +- Unit tests: + - [ ] All 10 StopReason constants pass `ValidStopReason()` + - [ ] Empty string and arbitrary strings fail `ValidStopReason()` + - [ ] `SessionMeta.Validate()` passes when StopReason is nil + - [ ] `SessionMeta.Validate()` passes when StopReason is valid + - [ ] `SessionMeta.Validate()` fails when StopReason is invalid string + - [ ] `Session.Info()` includes StopReason and StopDetail in snapshot + - [ ] `Session.Meta()` includes StopReason and StopDetail in output + - [ ] `ReadSessionMeta`/`WriteSessionMeta` round-trip preserves StopReason and StopDetail + - [ ] `ReadSessionMeta` of legacy meta without StopReason fields succeeds (nil StopReason) +- Test coverage target: >=80% +- All tests must pass + +## Success Criteria +- All tests passing +- Test coverage >=80% +- `make verify` passes +- `StopReason` type usable from both `internal/store` and `internal/session` without import cycles diff --git a/.compozy/tasks/session-resilience/task_02.md b/.compozy/tasks/session-resilience/task_02.md new file mode 100644 index 000000000..1eb7128e6 --- /dev/null +++ b/.compozy/tasks/session-resilience/task_02.md @@ -0,0 +1,97 @@ +--- +status: pending +title: Stop classification + cause propagation +type: backend +complexity: medium +dependencies: + - task_01 +--- + +# Task 02: Stop classification + cause propagation + +## Overview + +Implement the stop reason classification logic in `finalizeStopped()` and propagate `StopCause` through all stop initiation points: `Stop()`, `handleProcessExit()`, and daemon shutdown. After this task, every session stop produces a classified `StopReason` persisted to meta.json and available in `SessionInfo`. + + +- ALWAYS READ the PRD and TechSpec before starting +- REFERENCE TECHSPEC for implementation details — do not duplicate here +- FOCUS ON "WHAT" — describe what needs to be accomplished, not how +- MINIMIZE CODE — show code only to illustrate current structure or problem areas +- TESTS REQUIRED — every task MUST include tests in deliverables + + + +- MUST implement `classifyStopReason(cause StopCause, waitErr error, detail string) (store.StopReason, string)` in `internal/session` +- MUST wire `classifyStopReason()` into `finalizeStopped()` before event recording +- MUST set `StopCause` explicitly at each stop initiation point — no `ctx.Err()` inference +- MUST propagate `CauseUserRequested` from `Manager.Stop()` +- MUST propagate `CauseShutdown` from `daemon.stopSessions()` +- MUST propagate `CauseCompleted` from `handleProcessExit()` when process exits cleanly without stop request +- MUST propagate `CauseProcessExited` from `handleProcessExit()` when process exits unexpectedly +- MUST persist classified StopReason to SessionMeta via `writeMeta()` +- MUST include StopReason in the `session_stopped` event payload +- MUST add `stopWasRequested()` or equivalent to Session for clean exit vs crash distinction + + +## Subtasks +- [ ] 2.1 Implement `classifyStopReason()` function with deterministic mapping from StopCause + waitErr +- [ ] 2.2 Wire classification into `finalizeStopped()` — set session.stopReason/stopDetail, write meta, include in stop event +- [ ] 2.3 Modify `Stop()` to set `CauseUserRequested` on the session before proceeding +- [ ] 2.4 Modify `handleProcessExit()` to set `CauseCompleted` or `CauseProcessExited` based on waitErr and stop-request state +- [ ] 2.5 Modify `daemon.stopSessions()` to set `CauseShutdown` on each session before calling Stop +- [ ] 2.6 Write unit and integration tests for classification and cause propagation + +## Implementation Details + +See TechSpec "Stop Reason Classification Logic" and "Stop Cause Propagation" sections for the classification switch and propagation table. + +The key design principle: `StopCause` is set by the code path that initiates the stop, BEFORE `finalizeStopped()` runs. `finalizeStopped()` reads the cause and maps it deterministically. No ambiguity. + +### Relevant Files +- `internal/session/manager_lifecycle.go` — `finalizeStopped()` (line 317), `Stop()` (line 128), `handleProcessExit()` (line 304), `watchProcess()` (line 285) +- `internal/session/session.go` — `Session` struct, `prepareStop()` (line 285) +- `internal/daemon/daemon.go` — `stopSessions()` (line 465), shutdown sequence (line 369) + +### Dependent Files +- `internal/store/meta.go` — `WriteSessionMeta()` persists the classified reason +- `internal/observe/observer.go` — `OnSessionStopped()` will read StopReason (task 03) +- `internal/session/manager_lifecycle.go` — Resume repair will use classified StopReason (task 04) + +### Related ADRs +- [ADR-001: Canonical StopReason Enum on SessionMeta](adrs/adr-001.md) — Classification uses explicit StopCause, not ctx.Err() inference + +## Deliverables +- `classifyStopReason()` function in `internal/session` +- StopCause propagation in Stop(), handleProcessExit(), daemon.stopSessions() +- StopReason persisted in meta.json and session_stopped event +- Unit tests with 80%+ coverage **(REQUIRED)** +- Integration tests for stop flows **(REQUIRED)** + +## Tests +- Unit tests: + - [ ] `classifyStopReason(CauseShutdown, nil, "")` → `StopShutdown` + - [ ] `classifyStopReason(CauseShutdown, someErr, "")` → `StopShutdown` (shutdown wins) + - [ ] `classifyStopReason(CauseUserRequested, nil, "")` → `StopUserCanceled` + - [ ] `classifyStopReason(CauseUserRequested, nil, "max_iterations")` → `StopMaxIterations` + - [ ] `classifyStopReason(CauseUserRequested, nil, "loop_detected")` → `StopLoopDetected` + - [ ] `classifyStopReason(CauseUserRequested, nil, "budget_exceeded")` → `StopBudgetExceeded` + - [ ] `classifyStopReason(CauseProcessExited, waitErr, "")` → `StopAgentCrashed` + - [ ] `classifyStopReason(CauseProcessExited, nil, "")` → `StopError` + - [ ] `classifyStopReason(CauseCompleted, nil, "")` → `StopCompleted` + - [ ] `classifyStopReason(CauseHookDenied, nil, "reason")` → `StopHookStopped` + - [ ] `classifyStopReason(CauseNone, waitErr, "")` → `StopError` (fallback) + - [ ] `classifyStopReason(CauseNone, nil, "")` → `StopCompleted` (fallback) +- Integration tests: + - [ ] Create session → Stop() → verify meta.json has `stop_reason: "user_canceled"` + - [ ] Create session → kill subprocess → verify meta.json has `stop_reason: "agent_crashed"` + - [ ] Create session → daemon shutdown → verify meta.json has `stop_reason: "shutdown"` +- Test coverage target: >=80% +- All tests must pass + +## Success Criteria +- All tests passing +- Test coverage >=80% +- `make verify` passes +- Every session stop path produces a classified StopReason +- StopReason persisted in meta.json for all stop scenarios diff --git a/.compozy/tasks/session-resilience/task_03.md b/.compozy/tasks/session-resilience/task_03.md new file mode 100644 index 000000000..d0e1a39fd --- /dev/null +++ b/.compozy/tasks/session-resilience/task_03.md @@ -0,0 +1,101 @@ +--- +status: pending +title: Data layer propagation +type: backend +complexity: medium +dependencies: + - task_01 +--- + +# Task 03: Data layer propagation + +## Overview + +Propagate `StopReason` and `StopDetail` through the entire data stack: global DB schema, query functions, API contract types, conversions, and observer. After this task, stop reasons are stored in SQLite, queryable via API, and visible in session list/detail responses. + + +- ALWAYS READ the PRD and TechSpec before starting +- REFERENCE TECHSPEC for implementation details — do not duplicate here +- FOCUS ON "WHAT" — describe what needs to be accomplished, not how +- MINIMIZE CODE — show code only to illustrate current structure or problem areas +- TESTS REQUIRED — every task MUST include tests in deliverables + + + +- MUST add `stop_reason TEXT` and `stop_detail TEXT` columns to the `sessions` table +- MUST update `RegisterSession()` to include stop_reason/stop_detail in UPSERT +- MUST update `UpdateSessionState()` to conditionally include stop_reason/stop_detail in UPDATE +- MUST update `ReconcileSessions()` to handle new columns in upsert +- MUST update `scanSessionInfo()` to scan the 2 new columns (11 columns total) +- MUST add StopReason/StopDetail to `store.SessionInfo` (global DB row type, distinct from `session.SessionInfo`) +- MUST update `sessionInfoFromMeta()` in `query.go` to map StopReason from meta +- MUST add `StopReason` and `StopDetail` fields to `contract.SessionPayload` +- MUST update `SessionPayloadFromInfo()` in `conversions.go` to include stop reason fields +- MUST update `Observer.OnSessionStopped()` to pass StopReason in `SessionStateUpdate` +- MUST update `SessionStateUpdate` to include StopReason/StopDetail fields +- MUST clarify that `contract.SessionPayload.StopReason` is session-level (distinct from existing `AgentEventPayload.StopReason` which is ACP event-level) + + +## Subtasks +- [ ] 3.1 Add columns to sessions table schema and write migration SQL +- [ ] 3.2 Update `store.SessionInfo` with StopReason/StopDetail, update `SessionStateUpdate` +- [ ] 3.3 Update `RegisterSession`, `UpdateSessionState`, `ReconcileSessions`, `scanSessionInfo` +- [ ] 3.4 Update `sessionInfoFromMeta()` in `query.go` to map stop reason fields +- [ ] 3.5 Add fields to `contract.SessionPayload`, update `SessionPayloadFromInfo()` +- [ ] 3.6 Update `Observer.OnSessionStopped()` to pass stop reason in state update +- [ ] 3.7 Write unit tests for all DB operations, conversions, and observer updates + +## Implementation Details + +See TechSpec "Data Models" section for field definitions and "API Endpoints" section for response format. + +Note: `contract.go` already has a `StopReason` field on `AgentEventPayload` (line 95) — this is the ACP-level stop reason from agent events, NOT the session-level one. The new `StopReason` on `SessionPayload` is a different field representing why the session stopped. + +### Relevant Files +- `internal/store/types.go` — `SessionInfo` struct (line 82), `SessionStateUpdate` (line 124) +- `internal/store/globaldb/global_db_session.go` — `RegisterSession` (line 12), `UpdateSessionState` (line 35), `ReconcileSessions` (line 125), `scanSessionInfo` (line 252) +- `internal/session/query.go` — `sessionInfoFromMeta()` (line 212) +- `internal/api/contract/contract.go` — `SessionPayload` (line 25) +- `internal/api/core/conversions.go` — `SessionPayloadFromInfo()` (line 18) +- `internal/observe/observer.go` — `OnSessionStopped()` (line 233) + +### Dependent Files +- `internal/store/globaldb/global_db_session_test.go` — test updates for new columns +- `internal/observe/observer_test.go` — test updates for stop reason propagation +- HTTP/UDS handlers that return session data — will automatically include new fields via contract types + +### Related ADRs +- [ADR-001: Canonical StopReason Enum on SessionMeta](adrs/adr-001.md) — Type lives in `internal/store` + +## Deliverables +- Migration SQL adding `stop_reason` and `stop_detail` columns +- Updated global DB functions handling new columns +- Updated query, contract, conversion, and observer code +- Unit tests with 80%+ coverage **(REQUIRED)** + +## Tests +- Unit tests: + - [ ] `RegisterSession` with StopReason=nil stores NULL + - [ ] `RegisterSession` with valid StopReason stores the value + - [ ] `UpdateSessionState` with StopReason updates the column + - [ ] `UpdateSessionState` without StopReason leaves column unchanged + - [ ] `scanSessionInfo` correctly reads 11 columns including stop_reason/stop_detail + - [ ] `scanSessionInfo` handles NULL stop_reason gracefully + - [ ] `ReconcileSessions` upserts sessions with stop_reason + - [ ] `sessionInfoFromMeta()` maps StopReason and StopDetail from meta + - [ ] `sessionInfoFromMeta()` handles nil StopReason (legacy meta) + - [ ] `SessionPayloadFromInfo()` includes stop_reason and stop_detail in output + - [ ] `SessionPayloadFromInfo()` omits stop_reason when empty + - [ ] Observer.OnSessionStopped passes StopReason in SessionStateUpdate +- Integration tests: + - [ ] Create session → stop → query global DB → verify stop_reason column value + - [ ] GET /api/sessions/:id returns stop_reason in JSON response +- Test coverage target: >=80% +- All tests must pass + +## Success Criteria +- All tests passing +- Test coverage >=80% +- `make verify` passes +- Stop reasons visible in API responses for stopped sessions +- Global DB stores and queries stop reasons correctly diff --git a/.compozy/tasks/session-resilience/task_04.md b/.compozy/tasks/session-resilience/task_04.md new file mode 100644 index 000000000..eadbbb0a0 --- /dev/null +++ b/.compozy/tasks/session-resilience/task_04.md @@ -0,0 +1,112 @@ +--- +status: pending +title: Resume repair + config + verification +type: backend +complexity: high +dependencies: + - task_02 + - task_03 +--- + +# Task 04: Resume repair + config + verification + +## Overview + +Implement the infrastructure-level repair pipeline in `Resume()` that validates session state before starting the ACP agent. Add `SessionLimitsConfig` with a `timeout` field. Write end-to-end integration tests verifying the complete stop reason + resume repair flow. This is the final task that ties all pieces together and ensures the system works end-to-end. + + +- ALWAYS READ the PRD and TechSpec before starting +- REFERENCE TECHSPEC for implementation details — do not duplicate here +- FOCUS ON "WHAT" — describe what needs to be accomplished, not how +- MINIMIZE CODE — show code only to illustrate current structure or problem areas +- TESTS REQUIRED — every task MUST include tests in deliverables + + + +- MUST implement `classifyPreviousStop(meta)` that detects crashed sessions from meta state +- MUST implement `validateInfrastructure(meta)` returning independent errors per check +- MUST check: workspace dir exists, agent definition resolvable, event store file exists and non-zero, meta fields valid +- MUST insert repair pipeline into `Resume()` after `ReadSessionMeta()` and before `resolveResumeWorkspace()` +- MUST persist crash classification to meta.json (update StopReason/StopDetail) +- MUST prepare hook seams for `session.pre_resume` and `session.post_resume` as no-op function calls +- MUST add `SessionLimitsConfig` struct with `Timeout` field to `internal/config` +- MUST add TOML parsing and merge logic for `[session.limits]` config section +- MUST write end-to-end integration tests covering the full create → stop → resume flow +- SHOULD log structured events for crash classification and validation failures + + +## Subtasks +- [ ] 4.1 Implement `classifyPreviousStop(meta)` — map meta.State to StopReason for crashed sessions +- [ ] 4.2 Implement `validateInfrastructure(meta)` — 4 independent checks returning []error +- [ ] 4.3 Insert repair pipeline into `Resume()` flow with crash classification and meta persistence +- [ ] 4.4 Add hook seams as no-op functions (prepared for future `session.pre_resume`/`session.post_resume`) +- [ ] 4.5 Add `SessionLimitsConfig` with `Timeout` to config, TOML parsing, and merge logic +- [ ] 4.6 Write end-to-end integration tests for stop reason + resume repair flows +- [ ] 4.7 Run `make verify` and fix any issues + +## Implementation Details + +See TechSpec "Resume Repair Pipeline" section for the step-by-step validation flow and "Phase 2: Loop/Recursion Guards (Deferred)" for the hook seam design. + +The repair pipeline runs BEFORE the ACP agent starts. Each infrastructure check is independent — one failure does not block others from running. All errors are collected and returned as a combined diagnostic. + +Hook seams are plain function calls that do nothing in Phase 1. When the hooks platform is ready, they become typed dispatch calls. This avoids coupling Phase 1 to hooks availability. + +### Relevant Files +- `internal/session/manager_lifecycle.go` — `Resume()` (line 170), where the repair pipeline inserts +- `internal/store/meta.go` — `ReadSessionMeta()`, `WriteSessionMeta()` for crash classification persistence +- `internal/config/config.go` — existing config structs (LimitsConfig at line 41) +- `internal/config/merge.go` — overlay merge patterns for new config section + +### Dependent Files +- `internal/session/manager_lifecycle.go` — Resume() modified to include repair pipeline +- `internal/config/config.go` — new SessionLimitsConfig struct +- `internal/config/merge.go` — new merge logic for session limits + +### Related ADRs +- [ADR-003: Infrastructure-Level Repair on Resume](adrs/adr-003.md) — Scope of repair checks, hook seam design +- [ADR-005: Defer Loop Guards to Phase 2](adrs/adr-005.md) — Hook seams prepared but not wired + +## Deliverables +- `classifyPreviousStop()` and `validateInfrastructure()` functions +- Resume repair pipeline integrated into `Resume()` +- Hook seams for future session.pre_resume/post_resume +- `SessionLimitsConfig` with TOML parsing and merge +- Unit tests with 80%+ coverage **(REQUIRED)** +- End-to-end integration tests **(REQUIRED)** + +## Tests +- Unit tests: + - [ ] `classifyPreviousStop` with meta.State="active" → StopReason="agent_crashed" + - [ ] `classifyPreviousStop` with meta.State="stopping" → StopReason="agent_crashed" + - [ ] `classifyPreviousStop` with meta.State="starting" → StopReason="error" + - [ ] `classifyPreviousStop` with meta.State="stopped" + existing StopReason → preserved + - [ ] `classifyPreviousStop` with meta.State="stopped" + nil StopReason → no change + - [ ] `validateInfrastructure` with valid workspace/agent/store/meta → no errors + - [ ] `validateInfrastructure` with missing workspace dir → error with path + - [ ] `validateInfrastructure` with unresolvable agent → error with agent name + - [ ] `validateInfrastructure` with missing event store → error with DB path + - [ ] `validateInfrastructure` with zero-size event store → error + - [ ] `validateInfrastructure` with empty meta.ID → error + - [ ] `validateInfrastructure` with multiple failures → all errors collected + - [ ] SessionLimitsConfig TOML parsing with valid timeout + - [ ] SessionLimitsConfig merge with overlay +- Integration tests: + - [ ] Create → explicit Stop → verify StopReason="user_canceled" in meta + global DB + API + - [ ] Create → kill subprocess → verify StopReason="agent_crashed" in meta + global DB + API + - [ ] Create → write meta State="active" (simulate crash) → Resume → verify crash classified + - [ ] Create → delete workspace dir → Resume → verify descriptive error + - [ ] Create → remove agent from config → Resume → verify descriptive error + - [ ] Create → truncate event store → Resume → verify descriptive error + - [ ] Create → crash → Resume → verify session activates successfully after classification + - [ ] Full flow: create → stop → resume → stop → verify both stops have correct StopReasons +- Test coverage target: >=80% +- All tests must pass + +## Success Criteria +- All tests passing +- Test coverage >=80% +- `make verify` passes +- Crashed sessions are correctly classified on resume +- Infrastructure validation catches all 4 failure modes with descriptive errors +- Full create → stop → resume flow works end-to-end with correct StopReasons throughout diff --git a/AGENTS.md b/AGENTS.md index 88b28a6a4..96e1e516b 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -162,3 +162,5 @@ The `.old_project/` directory contains the previous AGH implementation (78K+ LOC - `TestMain` for expensive one-time setup/teardown - Use **real dependencies** (real SQLite via `t.TempDir()`, mock ACP server as subprocess) - Keep fast enough for CI (~30s max per package) + +NEVER COMMITS ai-docs/ TO THE REPO diff --git a/internal/api/contract/contract.go b/internal/api/contract/contract.go index faadc5a32..a8d92c4da 100644 --- a/internal/api/contract/contract.go +++ b/internal/api/contract/contract.go @@ -4,6 +4,8 @@ package contract import ( "encoding/json" "time" + + hookspkg "github.com/pedronauck/agh/internal/hooks" ) // CreateSessionRequest is the shared session creation request payload. @@ -138,6 +140,70 @@ type ObserveHealthPayload struct { Version string `json:"version"` } +// HookCatalogQuery captures the shared resolved-hook catalog filters. +type HookCatalogQuery struct { + Workspace string + Agent string + Event string + Source string + Mode string +} + +// HookRunsQuery captures the shared hook execution history filters. +type HookRunsQuery struct { + Session string + Event string + Outcome string + Since string + Last int +} + +// HookEventsQuery captures the shared hook taxonomy filters. +type HookEventsQuery struct { + Family string + SyncOnly bool +} + +// HookCatalogPayload is the shared resolved-hook catalog response payload. +type HookCatalogPayload struct { + Order int `json:"order"` + Name string `json:"name"` + Event string `json:"event"` + Source string `json:"source"` + SkillSource string `json:"skill_source,omitempty"` + Mode string `json:"mode"` + Required bool `json:"required"` + Priority int `json:"priority"` + TimeoutMS int64 `json:"timeout_ms,omitempty"` + ExecutorKind string `json:"executor_kind,omitempty"` + Matcher hookspkg.HookMatcher `json:"matcher,omitempty"` + Metadata map[string]string `json:"metadata,omitempty"` +} + +// HookRunPayload is the shared hook execution history response payload. +type HookRunPayload struct { + HookName string `json:"hook_name"` + Event string `json:"event"` + Source string `json:"source"` + Mode string `json:"mode"` + DurationMS int64 `json:"duration_ms"` + Outcome string `json:"outcome"` + DispatchDepth int `json:"dispatch_depth"` + PatchApplied json.RawMessage `json:"patch_applied,omitempty"` + Error string `json:"error,omitempty"` + Required bool `json:"required,omitempty"` + RecordedAt time.Time `json:"recorded_at"` +} + +// HookEventPayload is the shared hook taxonomy response payload. +type HookEventPayload struct { + Event string `json:"event"` + Family string `json:"family"` + SyncEligible bool `json:"sync_eligible"` + PayloadSchema string `json:"payload_schema"` + PatchSchema string `json:"patch_schema,omitempty"` +} + // DaemonStatusPayload is the shared daemon status response payload. type DaemonStatusPayload struct { Status string `json:"status"` diff --git a/internal/api/core/handlers.go b/internal/api/core/handlers.go index d103c2abd..3bbfe8449 100644 --- a/internal/api/core/handlers.go +++ b/internal/api/core/handlers.go @@ -468,6 +468,76 @@ func (h *BaseHandlers) GetAgent(c *gin.Context) { c.JSON(http.StatusOK, gin.H{"agent": AgentPayloadFromDef(agent)}) } +// HookCatalog returns the resolved hook catalog for the supplied workspace and agent view. +func (h *BaseHandlers) HookCatalog(c *gin.Context) { + filter, err := ParseHookCatalogFilter(c) + if err != nil { + h.respondError(c, http.StatusBadRequest, err) + return + } + + if workspaceRef := strings.TrimSpace(c.Query("workspace")); workspaceRef != "" { + resolved, err := h.Workspaces.Resolve(c.Request.Context(), workspaceRef) + if err != nil { + h.respondError(c, StatusForWorkspaceError(err), err) + return + } + filter.WorkspaceID = strings.TrimSpace(resolved.ID) + filter.WorkspaceRoot = strings.TrimSpace(resolved.RootDir) + } + + entries, err := h.Observer.QueryHookCatalog(c.Request.Context(), filter) + if err != nil { + h.respondError(c, http.StatusInternalServerError, err) + return + } + + c.JSON(http.StatusOK, gin.H{"hooks": HookCatalogPayloadsFromEntries(entries)}) +} + +// HookRuns returns persisted hook execution history for a session. +func (h *BaseHandlers) HookRuns(c *gin.Context) { + query, err := ParseHookRunsQuery(c) + if err != nil { + h.respondError(c, http.StatusBadRequest, err) + return + } + if strings.TrimSpace(query.SessionID) == "" { + h.respondError(c, http.StatusBadRequest, fmt.Errorf("%s: session query is required", h.transportName())) + return + } + + if _, err := h.Sessions.Status(c.Request.Context(), query.SessionID); err != nil { + h.respondError(c, StatusForSessionError(err), err) + return + } + + records, err := h.Observer.QueryHookRuns(c.Request.Context(), query) + if err != nil { + h.respondError(c, http.StatusInternalServerError, err) + return + } + + c.JSON(http.StatusOK, gin.H{"runs": HookRunPayloadsFromRecords(records)}) +} + +// HookEvents returns the supported hook taxonomy metadata. +func (h *BaseHandlers) HookEvents(c *gin.Context) { + filter, err := ParseHookEventFilter(c) + if err != nil { + h.respondError(c, http.StatusBadRequest, err) + return + } + + events, err := h.Observer.QueryHookEvents(c.Request.Context(), filter) + if err != nil { + h.respondError(c, http.StatusInternalServerError, err) + return + } + + c.JSON(http.StatusOK, gin.H{"events": HookEventPayloadsFromDescriptors(events)}) +} + // ObserveEvents returns the filtered observe event list. func (h *BaseHandlers) ObserveEvents(c *gin.Context) { query, err := ParseObserveEventQuery(c) diff --git a/internal/api/core/interfaces.go b/internal/api/core/interfaces.go index d797ff39b..000559da0 100644 --- a/internal/api/core/interfaces.go +++ b/internal/api/core/interfaces.go @@ -7,6 +7,7 @@ import ( "github.com/pedronauck/agh/internal/acp" aghconfig "github.com/pedronauck/agh/internal/config" + hookspkg "github.com/pedronauck/agh/internal/hooks" "github.com/pedronauck/agh/internal/observe" "github.com/pedronauck/agh/internal/session" "github.com/pedronauck/agh/internal/skills" @@ -38,6 +39,9 @@ type SessionManager interface { // Observer is the observability surface exposed by API transports. type Observer interface { QueryEvents(ctx context.Context, query store.EventSummaryQuery) ([]store.EventSummary, error) + QueryHookCatalog(ctx context.Context, filter hookspkg.CatalogFilter) ([]hookspkg.CatalogEntry, error) + QueryHookRuns(ctx context.Context, query store.HookRunQuery) ([]hookspkg.HookRunRecord, error) + QueryHookEvents(ctx context.Context, filter hookspkg.EventFilter) ([]hookspkg.EventDescriptor, error) Health(ctx context.Context) (observe.Health, error) } diff --git a/internal/api/core/parsers.go b/internal/api/core/parsers.go index 4bf989443..cbfc896d5 100644 --- a/internal/api/core/parsers.go +++ b/internal/api/core/parsers.go @@ -7,6 +7,7 @@ import ( "time" "github.com/gin-gonic/gin" + hookspkg "github.com/pedronauck/agh/internal/hooks" "github.com/pedronauck/agh/internal/store" ) @@ -55,6 +56,92 @@ func ParseObserveEventQuery(c *gin.Context) (store.EventSummaryQuery, error) { }, nil } +// ParseHookCatalogFilter parses the shared hook catalog query parameters. +func ParseHookCatalogFilter(c *gin.Context) (hookspkg.CatalogFilter, error) { + filter := hookspkg.CatalogFilter{ + AgentName: strings.TrimSpace(c.Query("agent")), + } + + if event := strings.TrimSpace(c.Query("event")); event != "" { + parsed := hookspkg.HookEvent(event) + if err := parsed.Validate(); err != nil { + return hookspkg.CatalogFilter{}, err + } + filter.Event = parsed + } + + if source := strings.TrimSpace(c.Query("source")); source != "" { + var parsed hookspkg.HookSource + if err := parsed.UnmarshalText([]byte(source)); err != nil { + return hookspkg.CatalogFilter{}, err + } + filter.Source = &parsed + } + + if mode := strings.TrimSpace(c.Query("mode")); mode != "" { + parsed := hookspkg.HookMode(mode) + if err := parsed.Validate(); err != nil { + return hookspkg.CatalogFilter{}, err + } + filter.Mode = parsed + } + + return filter, nil +} + +// ParseHookRunsQuery parses the shared hook execution history query parameters. +func ParseHookRunsQuery(c *gin.Context) (store.HookRunQuery, error) { + since, err := ParseOptionalTime(c.Query("since")) + if err != nil { + return store.HookRunQuery{}, err + } + last, err := ParseOptionalInt(c.Query("last")) + if err != nil { + return store.HookRunQuery{}, err + } + + query := store.HookRunQuery{ + SessionID: strings.TrimSpace(c.Query("session")), + Event: strings.TrimSpace(c.Query("event")), + Since: since, + Limit: last, + } + if outcome := strings.TrimSpace(c.Query("outcome")); outcome != "" { + query.Outcome = hookspkg.HookRunOutcome(outcome) + if err := query.Outcome.Validate(); err != nil { + return store.HookRunQuery{}, err + } + } + if event := query.Event; event != "" { + if err := hookspkg.HookEvent(event).Validate(); err != nil { + return store.HookRunQuery{}, err + } + } + if err := query.Validate(); err != nil { + return store.HookRunQuery{}, err + } + return query, nil +} + +// ParseHookEventFilter parses the shared hook taxonomy query parameters. +func ParseHookEventFilter(c *gin.Context) (hookspkg.EventFilter, error) { + syncOnly, err := ParseOptionalBool(c.Query("sync_only")) + if err != nil { + return hookspkg.EventFilter{}, err + } + + filter := hookspkg.EventFilter{ + SyncOnly: syncOnly, + } + if family := strings.TrimSpace(c.Query("family")); family != "" { + filter.Family = hookspkg.HookEventFamily(family) + if err := filter.Family.Validate(); err != nil { + return hookspkg.EventFilter{}, err + } + } + return filter, nil +} + // ParseObserveCursor parses a Last-Event-ID cursor for observe streaming. func ParseObserveCursor(raw string) (ObserveCursor, error) { value := strings.TrimSpace(raw) @@ -136,3 +223,17 @@ func ParseOptionalInt64(raw string) (int64, error) { } return parsed, nil } + +// ParseOptionalBool parses an optional boolean query value. +func ParseOptionalBool(raw string) (bool, error) { + value := strings.TrimSpace(raw) + if value == "" { + return false, nil + } + + parsed, err := strconv.ParseBool(value) + if err != nil { + return false, fmt.Errorf("invalid boolean %q: %w", value, err) + } + return parsed, nil +} diff --git a/internal/api/core/payloads.go b/internal/api/core/payloads.go index 5edb198d6..8926b7cbb 100644 --- a/internal/api/core/payloads.go +++ b/internal/api/core/payloads.go @@ -1,8 +1,12 @@ package core import ( + "encoding/json" "io" "time" + + "github.com/pedronauck/agh/internal/api/contract" + hookspkg "github.com/pedronauck/agh/internal/hooks" ) // SSEMessage is the shared SSE envelope. @@ -24,3 +28,84 @@ type ObserveCursor struct { Sequence int64 ID string } + +// HookCatalogPayloadsFromEntries converts resolved hook catalog entries into transport DTOs. +func HookCatalogPayloadsFromEntries(entries []hookspkg.CatalogEntry) []contract.HookCatalogPayload { + payloads := make([]contract.HookCatalogPayload, 0, len(entries)) + for _, entry := range entries { + payload := contract.HookCatalogPayload{ + Order: entry.Order, + Name: entry.Name, + Event: entry.Event.String(), + Source: entry.Source.String(), + Mode: string(entry.Mode), + Required: entry.Required, + Priority: entry.Priority, + ExecutorKind: string(entry.ExecutorKind), + Matcher: entry.Matcher, + Metadata: cloneCatalogMetadata(entry.Metadata), + } + if entry.SkillSource != "" { + payload.SkillSource = string(entry.SkillSource) + } + if entry.Timeout > 0 { + payload.TimeoutMS = entry.Timeout.Milliseconds() + } + payloads = append(payloads, payload) + } + return payloads +} + +// HookRunPayloadsFromRecords converts persisted hook audit records into transport DTOs. +func HookRunPayloadsFromRecords(records []hookspkg.HookRunRecord) []contract.HookRunPayload { + payloads := make([]contract.HookRunPayload, 0, len(records)) + for _, record := range records { + payloads = append(payloads, contract.HookRunPayload{ + HookName: record.HookName, + Event: record.Event.String(), + Source: record.Source.String(), + Mode: string(record.Mode), + DurationMS: record.Duration.Milliseconds(), + Outcome: string(record.Outcome), + DispatchDepth: record.DispatchDepth, + PatchApplied: cloneHookRunPatch(record.PatchApplied), + Error: record.Error, + Required: record.Required, + RecordedAt: record.RecordedAt, + }) + } + return payloads +} + +// HookEventPayloadsFromDescriptors converts hook taxonomy descriptors into transport DTOs. +func HookEventPayloadsFromDescriptors(events []hookspkg.EventDescriptor) []contract.HookEventPayload { + payloads := make([]contract.HookEventPayload, 0, len(events)) + for _, event := range events { + payloads = append(payloads, contract.HookEventPayload{ + Event: event.Event.String(), + Family: string(event.Family), + SyncEligible: event.SyncEligible, + PayloadSchema: event.PayloadSchema, + PatchSchema: event.PatchSchema, + }) + } + return payloads +} + +func cloneCatalogMetadata(src map[string]string) map[string]string { + if len(src) == 0 { + return nil + } + cloned := make(map[string]string, len(src)) + for key, value := range src { + cloned[key] = value + } + return cloned +} + +func cloneHookRunPatch(src json.RawMessage) json.RawMessage { + if len(src) == 0 { + return nil + } + return append(json.RawMessage(nil), src...) +} diff --git a/internal/api/httpapi/handlers_test.go b/internal/api/httpapi/handlers_test.go index 21ba73f6f..0bef9baa9 100644 --- a/internal/api/httpapi/handlers_test.go +++ b/internal/api/httpapi/handlers_test.go @@ -42,6 +42,9 @@ func TestRegisterRoutesCoversTechSpecEndpoints(t *testing.T) { "GET /api/agents", "GET /api/agents/:name", "GET /api/daemon/status", + "GET /api/hooks/catalog", + "GET /api/hooks/events", + "GET /api/hooks/runs", "GET /api/memory", "GET /api/memory/:filename", "GET /api/observe/events", diff --git a/internal/api/httpapi/hooks_integration_test.go b/internal/api/httpapi/hooks_integration_test.go new file mode 100644 index 000000000..ad68246e9 --- /dev/null +++ b/internal/api/httpapi/hooks_integration_test.go @@ -0,0 +1,492 @@ +//go:build integration + +package httpapi + +import ( + "context" + "fmt" + "net/http" + "path/filepath" + "testing" + "time" + + "github.com/pedronauck/agh/internal/api/contract" + aghconfig "github.com/pedronauck/agh/internal/config" + hookspkg "github.com/pedronauck/agh/internal/hooks" + "github.com/pedronauck/agh/internal/observe" + "github.com/pedronauck/agh/internal/session" + "github.com/pedronauck/agh/internal/store" + "github.com/pedronauck/agh/internal/store/sessiondb" + testutilpkg "github.com/pedronauck/agh/internal/testutil" + workspacepkg "github.com/pedronauck/agh/internal/workspace" +) + +func TestHTTPHookCatalogEndpointReturnsResolvedHooksInPipelineOrder(t *testing.T) { + homePaths := newTestHomePaths(t) + observer := newHookIntegrationObserver(t, homePaths) + hooksRuntime := newHookIntegrationRuntime(t, + hookspkg.WithNativeDeclarations([]hookspkg.HookDecl{{ + Name: "native-first", + Event: hookspkg.HookSessionPostCreate, + Mode: hookspkg.HookModeSync, + ExecutorKind: hookspkg.HookExecutorNative, + }}), + hookspkg.WithConfigDeclarations([]hookspkg.HookDecl{{ + Name: "config-second", + Event: hookspkg.HookSessionPostCreate, + Mode: hookspkg.HookModeSync, + Command: "/bin/sh", + Args: []string{"-c", "printf '{}'"}, + }}), + hookspkg.WithExecutorResolver(hookIntegrationResolver(map[string]hookspkg.Executor{ + "native-first": hookspkg.NewTypedNativeExecutor(func(_ context.Context, _ hookspkg.RegisteredHook, _ hookspkg.SessionPostCreatePayload) (hookspkg.SessionPostCreatePatch, error) { + return hookspkg.SessionPostCreatePatch{}, nil + }), + })), + ) + observer.AttachHooks(hooksRuntime) + + engine := newTestRouter(t, newTestHandlers(t, stubSessionManager{}, observer, homePaths)) + recorder := performRequest(t, engine, http.MethodGet, "/api/hooks/catalog", nil) + if recorder.Code != http.StatusOK { + t.Fatalf("status = %d, want %d; body=%s", recorder.Code, http.StatusOK, recorder.Body.String()) + } + + var response struct { + Hooks []contract.HookCatalogPayload `json:"hooks"` + } + decodeJSONResponse(t, recorder, &response) + if got, want := len(response.Hooks), 2; got != want { + t.Fatalf("len(hooks) = %d, want %d", got, want) + } + if response.Hooks[0].Name != "native-first" || response.Hooks[0].Order != 1 || response.Hooks[0].Source != "native" { + t.Fatalf("hooks[0] = %#v", response.Hooks[0]) + } + if response.Hooks[0].ExecutorKind != string(hookspkg.HookExecutorNative) { + t.Fatalf("hooks[0].ExecutorKind = %q, want %q", response.Hooks[0].ExecutorKind, hookspkg.HookExecutorNative) + } + if response.Hooks[1].Name != "config-second" || response.Hooks[1].Order != 2 || response.Hooks[1].Source != "config" { + t.Fatalf("hooks[1] = %#v", response.Hooks[1]) + } + if response.Hooks[1].ExecutorKind != string(hookspkg.HookExecutorSubprocess) { + t.Fatalf("hooks[1].ExecutorKind = %q, want %q", response.Hooks[1].ExecutorKind, hookspkg.HookExecutorSubprocess) + } +} + +func TestHTTPHookCatalogEndpointFiltersWorkspaceScopedHooks(t *testing.T) { + homePaths := newTestHomePaths(t) + observer := newHookIntegrationObserver(t, homePaths) + hooksRuntime := newHookIntegrationRuntime(t, + hookspkg.WithConfigDeclarations([]hookspkg.HookDecl{ + { + Name: "workspace-alpha", + Event: hookspkg.HookSessionPostCreate, + Mode: hookspkg.HookModeSync, + Matcher: hookspkg.HookMatcher{ + WorkspaceID: "ws-alpha", + WorkspaceRoot: "/workspace/alpha", + }, + Command: "/bin/sh", + Args: []string{"-c", "printf '{}'"}, + }, + { + Name: "workspace-beta", + Event: hookspkg.HookSessionPostCreate, + Mode: hookspkg.HookModeSync, + Matcher: hookspkg.HookMatcher{ + WorkspaceID: "ws-beta", + WorkspaceRoot: "/workspace/beta", + }, + Command: "/bin/sh", + Args: []string{"-c", "printf '{}'"}, + }, + }), + ) + observer.AttachHooks(hooksRuntime) + + workspaces := stubWorkspaceService{ + ResolveFn: func(_ context.Context, ref string) (workspacepkg.ResolvedWorkspace, error) { + if ref != "alpha" { + t.Fatalf("Resolve() ref = %q, want alpha", ref) + } + return workspacepkg.ResolvedWorkspace{ + Workspace: workspacepkg.Workspace{ + ID: "ws-alpha", + RootDir: "/workspace/alpha", + }, + }, nil + }, + } + + engine := newTestRouter(t, newTestHandlersWithWorkspace(t, stubSessionManager{}, observer, workspaces, homePaths)) + recorder := performRequest(t, engine, http.MethodGet, "/api/hooks/catalog?workspace=alpha", nil) + if recorder.Code != http.StatusOK { + t.Fatalf("status = %d, want %d; body=%s", recorder.Code, http.StatusOK, recorder.Body.String()) + } + + var response struct { + Hooks []contract.HookCatalogPayload `json:"hooks"` + } + decodeJSONResponse(t, recorder, &response) + if got, want := len(response.Hooks), 1; got != want { + t.Fatalf("len(hooks) = %d, want %d", got, want) + } + if response.Hooks[0].Name != "workspace-alpha" || response.Hooks[0].Source != "config" { + t.Fatalf("hooks[0] = %#v", response.Hooks[0]) + } +} + +func TestHTTPHookRunsEndpointReturnsExecutionHistoryWithPatchDiffs(t *testing.T) { + homePaths := newTestHomePaths(t) + observer := newHookIntegrationObserver(t, homePaths) + sessionID := "sess-history" + db := openHookRunSessionDB(t, homePaths, sessionID) + recordedAt := time.Date(2026, 4, 9, 18, 30, 0, 0, time.UTC) + if err := db.RecordHookRun(testutilpkg.Context(t), hookspkg.HookRunRecord{ + HookName: "permission-history", + Event: hookspkg.HookPermissionRequest, + Source: hookspkg.HookSourceConfig, + Mode: hookspkg.HookModeSync, + Duration: 15 * time.Millisecond, + Outcome: hookspkg.HookRunOutcomeDenied, + DispatchDepth: 2, + PatchApplied: []byte(`{"decision":"deny","reason":"policy"}`), + Required: true, + RecordedAt: recordedAt, + }); err != nil { + t.Fatalf("RecordHookRun() error = %v", err) + } + closeHookRunSessionDB(t, db) + + manager := stubSessionManager{ + StatusFn: func(_ context.Context, id string) (*session.SessionInfo, error) { + return newSessionInfo(id), nil + }, + } + + engine := newTestRouter(t, newTestHandlers(t, manager, observer, homePaths)) + recorder := performRequest(t, engine, http.MethodGet, "/api/hooks/runs?session="+sessionID, nil) + if recorder.Code != http.StatusOK { + t.Fatalf("status = %d, want %d; body=%s", recorder.Code, http.StatusOK, recorder.Body.String()) + } + + var response struct { + Runs []contract.HookRunPayload `json:"runs"` + } + decodeJSONResponse(t, recorder, &response) + if got, want := len(response.Runs), 1; got != want { + t.Fatalf("len(runs) = %d, want %d", got, want) + } + if response.Runs[0].HookName != "permission-history" || string(response.Runs[0].PatchApplied) != `{"decision":"deny","reason":"policy"}` { + t.Fatalf("runs[0] = %#v", response.Runs[0]) + } +} + +func TestHTTPHookEventsEndpointReturnsAllEventsWithSyncEligibility(t *testing.T) { + homePaths := newTestHomePaths(t) + observer := newHookIntegrationObserver(t, homePaths) + + engine := newTestRouter(t, newTestHandlers(t, stubSessionManager{}, observer, homePaths)) + recorder := performRequest(t, engine, http.MethodGet, "/api/hooks/events", nil) + if recorder.Code != http.StatusOK { + t.Fatalf("status = %d, want %d; body=%s", recorder.Code, http.StatusOK, recorder.Body.String()) + } + + var response struct { + Events []contract.HookEventPayload `json:"events"` + } + decodeJSONResponse(t, recorder, &response) + if got, want := len(response.Events), len(hookspkg.AllHookEvents()); got != want { + t.Fatalf("len(events) = %d, want %d", got, want) + } + + byEvent := make(map[string]contract.HookEventPayload, len(response.Events)) + for _, event := range response.Events { + byEvent[event.Event] = event + } + if event, ok := byEvent[hookspkg.HookMessageDelta.String()]; !ok || event.SyncEligible { + t.Fatalf("message.delta = %#v, want async-only", event) + } + if event, ok := byEvent[hookspkg.HookPermissionRequest.String()]; !ok || !event.SyncEligible { + t.Fatalf("permission.request = %#v, want sync-eligible", event) + } +} + +func TestHTTPHookCatalogEndpointFiltersByEventSourceAndMode(t *testing.T) { + homePaths := newTestHomePaths(t) + observer := newHookIntegrationObserver(t, homePaths) + hooksRuntime := newHookIntegrationRuntime(t, + hookspkg.WithNativeDeclarations([]hookspkg.HookDecl{{ + Name: "native-tool", + Event: hookspkg.HookToolPreCall, + Mode: hookspkg.HookModeSync, + ExecutorKind: hookspkg.HookExecutorNative, + }}), + hookspkg.WithConfigDeclarations([]hookspkg.HookDecl{ + { + Name: "config-tool-sync", + Event: hookspkg.HookToolPreCall, + Mode: hookspkg.HookModeSync, + Command: "/bin/sh", + Args: []string{"-c", "printf '{}'"}, + }, + { + Name: "config-tool-async", + Event: hookspkg.HookToolPreCall, + Mode: hookspkg.HookModeAsync, + Command: "/bin/sh", + Args: []string{"-c", "printf '{}'"}, + }, + }), + hookspkg.WithExecutorResolver(hookIntegrationResolver(map[string]hookspkg.Executor{ + "native-tool": hookspkg.NewTypedNativeExecutor(func(_ context.Context, _ hookspkg.RegisteredHook, _ hookspkg.ToolPreCallPayload) (hookspkg.ToolCallPatch, error) { + return hookspkg.ToolCallPatch{}, nil + }), + })), + ) + observer.AttachHooks(hooksRuntime) + + engine := newTestRouter(t, newTestHandlers(t, stubSessionManager{}, observer, homePaths)) + recorder := performRequest(t, engine, http.MethodGet, "/api/hooks/catalog?event=tool.pre_call&source=config&mode=sync", nil) + if recorder.Code != http.StatusOK { + t.Fatalf("status = %d, want %d; body=%s", recorder.Code, http.StatusOK, recorder.Body.String()) + } + + var response struct { + Hooks []contract.HookCatalogPayload `json:"hooks"` + } + decodeJSONResponse(t, recorder, &response) + if got, want := len(response.Hooks), 1; got != want { + t.Fatalf("len(hooks) = %d, want %d", got, want) + } + if response.Hooks[0].Name != "config-tool-sync" || response.Hooks[0].ExecutorKind != string(hookspkg.HookExecutorSubprocess) { + t.Fatalf("hooks[0] = %#v, want filtered config sync hook", response.Hooks[0]) + } +} + +func TestHTTPHookRunsEndpointFiltersByOutcomeAndLast(t *testing.T) { + homePaths := newTestHomePaths(t) + observer := newHookIntegrationObserver(t, homePaths) + sessionID := "sess-history-filtered" + db := openHookRunSessionDB(t, homePaths, sessionID) + records := []hookspkg.HookRunRecord{ + { + HookName: "ignored-applied", + Event: hookspkg.HookPermissionRequest, + Source: hookspkg.HookSourceConfig, + Mode: hookspkg.HookModeSync, + Outcome: hookspkg.HookRunOutcomeApplied, + RecordedAt: time.Date(2026, 4, 9, 18, 31, 0, 0, time.UTC), + }, + { + HookName: "denied-older", + Event: hookspkg.HookPermissionRequest, + Source: hookspkg.HookSourceConfig, + Mode: hookspkg.HookModeSync, + Outcome: hookspkg.HookRunOutcomeDenied, + RecordedAt: time.Date(2026, 4, 9, 18, 32, 0, 0, time.UTC), + }, + { + HookName: "denied-newer", + Event: hookspkg.HookPermissionRequest, + Source: hookspkg.HookSourceConfig, + Mode: hookspkg.HookModeSync, + Outcome: hookspkg.HookRunOutcomeDenied, + PatchApplied: []byte(`{"decision":"deny","reason":"policy"}`), + DispatchDepth: 1, + RecordedAt: time.Date(2026, 4, 9, 18, 33, 0, 0, time.UTC), + }, + } + for _, record := range records { + if err := db.RecordHookRun(testutilpkg.Context(t), record); err != nil { + t.Fatalf("RecordHookRun(%q) error = %v", record.HookName, err) + } + } + closeHookRunSessionDB(t, db) + + manager := stubSessionManager{ + StatusFn: func(_ context.Context, id string) (*session.SessionInfo, error) { + return newSessionInfo(id), nil + }, + } + + engine := newTestRouter(t, newTestHandlers(t, manager, observer, homePaths)) + recorder := performRequest(t, engine, http.MethodGet, "/api/hooks/runs?session="+sessionID+"&event=permission.request&outcome=denied&last=1", nil) + if recorder.Code != http.StatusOK { + t.Fatalf("status = %d, want %d; body=%s", recorder.Code, http.StatusOK, recorder.Body.String()) + } + + var response struct { + Runs []contract.HookRunPayload `json:"runs"` + } + decodeJSONResponse(t, recorder, &response) + if got, want := len(response.Runs), 1; got != want { + t.Fatalf("len(runs) = %d, want %d", got, want) + } + if response.Runs[0].HookName != "denied-newer" || response.Runs[0].Outcome != string(hookspkg.HookRunOutcomeDenied) { + t.Fatalf("runs[0] = %#v, want most recent denied run", response.Runs[0]) + } +} + +func TestHTTPHookEventsEndpointFiltersByFamilyAndSyncOnly(t *testing.T) { + homePaths := newTestHomePaths(t) + observer := newHookIntegrationObserver(t, homePaths) + + engine := newTestRouter(t, newTestHandlers(t, stubSessionManager{}, observer, homePaths)) + recorder := performRequest(t, engine, http.MethodGet, "/api/hooks/events?family=tool&sync_only=true", nil) + if recorder.Code != http.StatusOK { + t.Fatalf("status = %d, want %d; body=%s", recorder.Code, http.StatusOK, recorder.Body.String()) + } + + var response struct { + Events []contract.HookEventPayload `json:"events"` + } + decodeJSONResponse(t, recorder, &response) + if len(response.Events) == 0 { + t.Fatal("len(events) = 0, want filtered tool events") + } + for _, event := range response.Events { + if event.Family != string(hookspkg.HookEventFamilyTool) { + t.Fatalf("event.Family = %q, want %q", event.Family, hookspkg.HookEventFamilyTool) + } + if !event.SyncEligible { + t.Fatalf("event.SyncEligible = false for %q, want true", event.Event) + } + } +} + +func TestHTTPHookRunsEndpointDispatchStoreQueryCycle(t *testing.T) { + homePaths := newTestHomePaths(t) + observer := newHookIntegrationObserver(t, homePaths) + sessionID := "sess-cycle" + closeHookRunSessionDB(t, openHookRunSessionDB(t, homePaths, sessionID)) + + hooksRuntime := newHookIntegrationRuntime(t, + hookspkg.WithTelemetrySink(observer), + hookspkg.WithNativeDeclarations([]hookspkg.HookDecl{{ + Name: "permission-audit", + Event: hookspkg.HookPermissionRequest, + Mode: hookspkg.HookModeSync, + ExecutorKind: hookspkg.HookExecutorNative, + }}), + hookspkg.WithExecutorResolver(hookIntegrationResolver(map[string]hookspkg.Executor{ + "permission-audit": hookspkg.NewTypedNativeExecutor(func(_ context.Context, _ hookspkg.RegisteredHook, _ hookspkg.PermissionRequestPayload) (hookspkg.PermissionRequestPatch, error) { + deny := "deny" + return hookspkg.PermissionRequestPatch{ + Decision: &deny, + Reason: hookStringPointer("policy"), + }, nil + }), + })), + ) + + _, err := hooksRuntime.DispatchPermissionRequest(testutilpkg.Context(t), hookspkg.PermissionRequestPayload{ + PayloadBase: hookspkg.PayloadBase{Event: hookspkg.HookPermissionRequest}, + SessionContext: hookspkg.SessionContext{ + SessionID: sessionID, + }, + Decision: "allow", + }) + if err != nil { + t.Fatalf("DispatchPermissionRequest() error = %v", err) + } + + manager := stubSessionManager{ + StatusFn: func(_ context.Context, id string) (*session.SessionInfo, error) { + return newSessionInfo(id), nil + }, + } + + engine := newTestRouter(t, newTestHandlers(t, manager, observer, homePaths)) + recorder := performRequest(t, engine, http.MethodGet, "/api/hooks/runs?session="+sessionID+"&event=permission.request", nil) + if recorder.Code != http.StatusOK { + t.Fatalf("status = %d, want %d; body=%s", recorder.Code, http.StatusOK, recorder.Body.String()) + } + + var response struct { + Runs []contract.HookRunPayload `json:"runs"` + } + decodeJSONResponse(t, recorder, &response) + if got, want := len(response.Runs), 1; got != want { + t.Fatalf("len(runs) = %d, want %d", got, want) + } + if response.Runs[0].HookName != "permission-audit" || response.Runs[0].Outcome != string(hookspkg.HookRunOutcomeDenied) { + t.Fatalf("runs[0] = %#v", response.Runs[0]) + } + if string(response.Runs[0].PatchApplied) != `{"decision":"deny","reason":"policy"}` { + t.Fatalf("runs[0].PatchApplied = %s, want deny patch", response.Runs[0].PatchApplied) + } +} + +func newHookIntegrationObserver(t *testing.T, homePaths aghconfig.HomePaths) *observe.Observer { + t.Helper() + + observer, err := observe.New(testutilpkg.Context(t), + observe.WithHomePaths(homePaths), + observe.WithLogger(discardLogger()), + ) + if err != nil { + t.Fatalf("observe.New() error = %v", err) + } + t.Cleanup(func() { + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + if err := observer.Close(ctx); err != nil { + t.Errorf("observer.Close() error = %v", err) + } + }) + return observer +} + +func newHookIntegrationRuntime(t *testing.T, opts ...hookspkg.Option) *hookspkg.Hooks { + t.Helper() + + runtime := hookspkg.NewHooks(append([]hookspkg.Option{ + hookspkg.WithLogger(discardLogger()), + }, opts...)...) + if err := runtime.Rebuild(testutilpkg.Context(t)); err != nil { + t.Fatalf("Hooks.Rebuild() error = %v", err) + } + t.Cleanup(runtime.Close) + return runtime +} + +func hookIntegrationResolver(overrides map[string]hookspkg.Executor) hookspkg.ExecutorResolver { + return func(decl hookspkg.HookDecl) (hookspkg.Executor, error) { + if executor, ok := overrides[decl.Name]; ok { + return executor, nil + } + if decl.Command != "" { + opts := []hookspkg.SubprocessExecutorOption{} + if len(decl.Env) != 0 { + opts = append(opts, hookspkg.WithSubprocessEnv(decl.Env)) + } + return hookspkg.NewSubprocessExecutor(decl.Command, decl.Args, opts...), nil + } + return nil, fmt.Errorf("unexpected executor resolution for hook %q", decl.Name) + } +} + +func openHookRunSessionDB(t *testing.T, homePaths aghconfig.HomePaths, sessionID string) *sessiondb.SessionDB { + t.Helper() + + db, err := sessiondb.OpenSessionDB(testutilpkg.Context(t), sessionID, store.SessionDBFile(filepath.Join(homePaths.SessionsDir, sessionID))) + if err != nil { + t.Fatalf("OpenSessionDB(%q) error = %v", sessionID, err) + } + return db +} + +func closeHookRunSessionDB(t *testing.T, db *sessiondb.SessionDB) { + t.Helper() + + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + if err := db.Close(ctx); err != nil { + t.Fatalf("SessionDB.Close() error = %v", err) + } +} + +func hookStringPointer(value string) *string { + return &value +} diff --git a/internal/api/httpapi/hooks_test.go b/internal/api/httpapi/hooks_test.go new file mode 100644 index 000000000..d74a6253e --- /dev/null +++ b/internal/api/httpapi/hooks_test.go @@ -0,0 +1,335 @@ +package httpapi + +import ( + "context" + "net/http" + "testing" + "time" + + "github.com/pedronauck/agh/internal/api/contract" + hookspkg "github.com/pedronauck/agh/internal/hooks" + "github.com/pedronauck/agh/internal/session" + "github.com/pedronauck/agh/internal/store" + workspacepkg "github.com/pedronauck/agh/internal/workspace" +) + +func TestHookCatalogHandlerReturnsResolvedHooksAndWorkspaceFilter(t *testing.T) { + t.Parallel() + + homePaths := newTestHomePaths(t) + source := hookspkg.HookSourceConfig + observer := stubObserver{ + QueryHookCatalogFn: func(_ context.Context, filter hookspkg.CatalogFilter) ([]hookspkg.CatalogEntry, error) { + if filter.WorkspaceID != "ws-alpha" { + t.Fatalf("filter.WorkspaceID = %q, want ws-alpha", filter.WorkspaceID) + } + if filter.WorkspaceRoot != "/workspace/alpha" { + t.Fatalf("filter.WorkspaceRoot = %q, want /workspace/alpha", filter.WorkspaceRoot) + } + if filter.AgentName != "coder" { + t.Fatalf("filter.AgentName = %q, want coder", filter.AgentName) + } + if filter.Event != hookspkg.HookSessionPostCreate { + t.Fatalf("filter.Event = %q, want %q", filter.Event, hookspkg.HookSessionPostCreate) + } + if filter.Source == nil || *filter.Source != source { + t.Fatalf("filter.Source = %#v, want %q", filter.Source, source) + } + if filter.Mode != hookspkg.HookModeSync { + t.Fatalf("filter.Mode = %q, want %q", filter.Mode, hookspkg.HookModeSync) + } + return []hookspkg.CatalogEntry{ + { + Order: 1, + Name: "native-first", + Event: hookspkg.HookSessionPostCreate, + Source: hookspkg.HookSourceNative, + Mode: hookspkg.HookModeSync, + Priority: 1000, + ExecutorKind: hookspkg.HookExecutorNative, + }, + { + Order: 2, + Name: "config-second", + Event: hookspkg.HookSessionPostCreate, + Source: hookspkg.HookSourceConfig, + Mode: hookspkg.HookModeSync, + Priority: 0, + ExecutorKind: hookspkg.HookExecutorSubprocess, + }, + }, nil + }, + } + workspaces := stubWorkspaceService{ + ResolveFn: func(_ context.Context, ref string) (workspacepkg.ResolvedWorkspace, error) { + if ref != "alpha" { + t.Fatalf("Resolve() ref = %q, want alpha", ref) + } + return workspacepkg.ResolvedWorkspace{ + Workspace: workspacepkg.Workspace{ + ID: "ws-alpha", + RootDir: "/workspace/alpha", + }, + }, nil + }, + } + + engine := newTestRouter(t, newTestHandlersWithWorkspace(t, stubSessionManager{}, observer, workspaces, homePaths)) + recorder := performRequest(t, engine, http.MethodGet, "/api/hooks/catalog?workspace=alpha&agent=coder&event=session.post_create&source=config&mode=sync", nil) + if recorder.Code != http.StatusOK { + t.Fatalf("status = %d, want %d; body=%s", recorder.Code, http.StatusOK, recorder.Body.String()) + } + + var response struct { + Hooks []contract.HookCatalogPayload `json:"hooks"` + } + decodeJSONResponse(t, recorder, &response) + if got, want := len(response.Hooks), 2; got != want { + t.Fatalf("len(hooks) = %d, want %d", got, want) + } + if response.Hooks[0].Name != "native-first" || response.Hooks[0].Order != 1 || response.Hooks[0].Source != "native" { + t.Fatalf("hooks[0] = %#v", response.Hooks[0]) + } + if response.Hooks[0].ExecutorKind != string(hookspkg.HookExecutorNative) { + t.Fatalf("hooks[0].ExecutorKind = %q, want %q", response.Hooks[0].ExecutorKind, hookspkg.HookExecutorNative) + } + if response.Hooks[1].Name != "config-second" || response.Hooks[1].Order != 2 || response.Hooks[1].Source != "config" { + t.Fatalf("hooks[1] = %#v", response.Hooks[1]) + } + if response.Hooks[1].ExecutorKind != string(hookspkg.HookExecutorSubprocess) { + t.Fatalf("hooks[1].ExecutorKind = %q, want %q", response.Hooks[1].ExecutorKind, hookspkg.HookExecutorSubprocess) + } +} + +func TestHookRunsHandlerReturnsExecutionHistoryWithPatchDiffs(t *testing.T) { + t.Parallel() + + homePaths := newTestHomePaths(t) + since := time.Date(2026, 4, 9, 17, 59, 0, 0, time.UTC) + manager := stubSessionManager{ + StatusFn: func(_ context.Context, id string) (*session.SessionInfo, error) { + if id != "sess-hook" { + t.Fatalf("Status() id = %q, want sess-hook", id) + } + return newSessionInfo(id), nil + }, + } + observer := stubObserver{ + QueryHookRunsFn: func(_ context.Context, query store.HookRunQuery) ([]hookspkg.HookRunRecord, error) { + if query.SessionID != "sess-hook" { + t.Fatalf("query.SessionID = %q, want sess-hook", query.SessionID) + } + if query.Event != hookspkg.HookPermissionRequest.String() { + t.Fatalf("query.Event = %q, want %q", query.Event, hookspkg.HookPermissionRequest) + } + if query.Outcome != hookspkg.HookRunOutcomeDenied { + t.Fatalf("query.Outcome = %q, want %q", query.Outcome, hookspkg.HookRunOutcomeDenied) + } + if !query.Since.Equal(since) { + t.Fatalf("query.Since = %s, want %s", query.Since, since) + } + if query.Limit != 20 { + t.Fatalf("query.Limit = %d, want 20", query.Limit) + } + return []hookspkg.HookRunRecord{ + { + HookName: "permission-audit", + Event: hookspkg.HookPermissionRequest, + Source: hookspkg.HookSourceConfig, + Mode: hookspkg.HookModeSync, + Duration: 25 * time.Millisecond, + Outcome: hookspkg.HookRunOutcomeDenied, + DispatchDepth: 2, + PatchApplied: []byte(`{"decision":"deny","reason":"policy"}`), + Error: "denied by policy", + Required: true, + RecordedAt: time.Date(2026, 4, 9, 18, 0, 0, 0, time.UTC), + }, + }, nil + }, + } + + engine := newTestRouter(t, newTestHandlers(t, manager, observer, homePaths)) + recorder := performRequest(t, engine, http.MethodGet, "/api/hooks/runs?session=sess-hook&event=permission.request&outcome=denied&since=2026-04-09T17:59:00Z&last=20", nil) + if recorder.Code != http.StatusOK { + t.Fatalf("status = %d, want %d; body=%s", recorder.Code, http.StatusOK, recorder.Body.String()) + } + + var response struct { + Runs []contract.HookRunPayload `json:"runs"` + } + decodeJSONResponse(t, recorder, &response) + if got, want := len(response.Runs), 1; got != want { + t.Fatalf("len(runs) = %d, want %d", got, want) + } + if response.Runs[0].HookName != "permission-audit" { + t.Fatalf("runs[0].HookName = %q, want permission-audit", response.Runs[0].HookName) + } + if string(response.Runs[0].PatchApplied) != `{"decision":"deny","reason":"policy"}` { + t.Fatalf("runs[0].PatchApplied = %s, want deny patch", response.Runs[0].PatchApplied) + } + if response.Runs[0].DispatchDepth != 2 || response.Runs[0].Outcome != string(hookspkg.HookRunOutcomeDenied) { + t.Fatalf("runs[0] = %#v", response.Runs[0]) + } +} + +func TestHookRunsHandlerRejectsMissingSession(t *testing.T) { + t.Parallel() + + engine := newTestRouter(t, newTestHandlers(t, stubSessionManager{}, stubObserver{}, newTestHomePaths(t))) + recorder := performRequest(t, engine, http.MethodGet, "/api/hooks/runs", nil) + if recorder.Code != http.StatusBadRequest { + t.Fatalf("status = %d, want %d; body=%s", recorder.Code, http.StatusBadRequest, recorder.Body.String()) + } +} + +func TestHookRunsHandlerRejectsInvalidEvent(t *testing.T) { + t.Parallel() + + homePaths := newTestHomePaths(t) + manager := stubSessionManager{ + StatusFn: func(_ context.Context, id string) (*session.SessionInfo, error) { + return newSessionInfo(id), nil + }, + } + + engine := newTestRouter(t, newTestHandlers(t, manager, stubObserver{}, homePaths)) + recorder := performRequest(t, engine, http.MethodGet, "/api/hooks/runs?session=sess-hook&event=not-a-hook", nil) + if recorder.Code != http.StatusBadRequest { + t.Fatalf("status = %d, want %d; body=%s", recorder.Code, http.StatusBadRequest, recorder.Body.String()) + } +} + +func TestHookCatalogHandlerRejectsInvalidSource(t *testing.T) { + t.Parallel() + + engine := newTestRouter(t, newTestHandlers(t, stubSessionManager{}, stubObserver{}, newTestHomePaths(t))) + recorder := performRequest(t, engine, http.MethodGet, "/api/hooks/catalog?source=wrong", nil) + if recorder.Code != http.StatusBadRequest { + t.Fatalf("status = %d, want %d; body=%s", recorder.Code, http.StatusBadRequest, recorder.Body.String()) + } +} + +func TestHookCatalogHandlerRejectsInvalidMode(t *testing.T) { + t.Parallel() + + engine := newTestRouter(t, newTestHandlers(t, stubSessionManager{}, stubObserver{}, newTestHomePaths(t))) + recorder := performRequest(t, engine, http.MethodGet, "/api/hooks/catalog?mode=wrong", nil) + if recorder.Code != http.StatusBadRequest { + t.Fatalf("status = %d, want %d; body=%s", recorder.Code, http.StatusBadRequest, recorder.Body.String()) + } +} + +func TestHookRunsHandlerRejectsInvalidOutcome(t *testing.T) { + t.Parallel() + + homePaths := newTestHomePaths(t) + manager := stubSessionManager{ + StatusFn: func(_ context.Context, id string) (*session.SessionInfo, error) { + return newSessionInfo(id), nil + }, + } + + engine := newTestRouter(t, newTestHandlers(t, manager, stubObserver{}, homePaths)) + recorder := performRequest(t, engine, http.MethodGet, "/api/hooks/runs?session=sess-hook&outcome=nope", nil) + if recorder.Code != http.StatusBadRequest { + t.Fatalf("status = %d, want %d; body=%s", recorder.Code, http.StatusBadRequest, recorder.Body.String()) + } +} + +func TestHookRunsHandlerRejectsInvalidSince(t *testing.T) { + t.Parallel() + + homePaths := newTestHomePaths(t) + manager := stubSessionManager{ + StatusFn: func(_ context.Context, id string) (*session.SessionInfo, error) { + return newSessionInfo(id), nil + }, + } + + engine := newTestRouter(t, newTestHandlers(t, manager, stubObserver{}, homePaths)) + recorder := performRequest(t, engine, http.MethodGet, "/api/hooks/runs?session=sess-hook&since=not-a-time", nil) + if recorder.Code != http.StatusBadRequest { + t.Fatalf("status = %d, want %d; body=%s", recorder.Code, http.StatusBadRequest, recorder.Body.String()) + } +} + +func TestHookRunsHandlerRejectsInvalidLast(t *testing.T) { + t.Parallel() + + homePaths := newTestHomePaths(t) + manager := stubSessionManager{ + StatusFn: func(_ context.Context, id string) (*session.SessionInfo, error) { + return newSessionInfo(id), nil + }, + } + + engine := newTestRouter(t, newTestHandlers(t, manager, stubObserver{}, homePaths)) + recorder := performRequest(t, engine, http.MethodGet, "/api/hooks/runs?session=sess-hook&last=-1", nil) + if recorder.Code != http.StatusBadRequest { + t.Fatalf("status = %d, want %d; body=%s", recorder.Code, http.StatusBadRequest, recorder.Body.String()) + } +} + +func TestHookEventsHandlerReturnsPayloads(t *testing.T) { + t.Parallel() + + homePaths := newTestHomePaths(t) + observer := stubObserver{ + QueryHookEventsFn: func(_ context.Context, filter hookspkg.EventFilter) ([]hookspkg.EventDescriptor, error) { + if filter.Family != hookspkg.HookEventFamilyTool { + t.Fatalf("filter.Family = %q, want %q", filter.Family, hookspkg.HookEventFamilyTool) + } + if !filter.SyncOnly { + t.Fatal("filter.SyncOnly = false, want true") + } + return []hookspkg.EventDescriptor{ + { + Event: hookspkg.HookToolPreCall, + Family: hookspkg.HookEventFamilyTool, + SyncEligible: true, + PayloadSchema: "ToolPreCallPayload", + PatchSchema: "ToolCallPatch", + }, + }, nil + }, + } + + engine := newTestRouter(t, newTestHandlers(t, stubSessionManager{}, observer, homePaths)) + recorder := performRequest(t, engine, http.MethodGet, "/api/hooks/events?family=tool&sync_only=true", nil) + if recorder.Code != http.StatusOK { + t.Fatalf("status = %d, want %d; body=%s", recorder.Code, http.StatusOK, recorder.Body.String()) + } + + var response struct { + Events []contract.HookEventPayload `json:"events"` + } + decodeJSONResponse(t, recorder, &response) + if got, want := len(response.Events), 1; got != want { + t.Fatalf("len(events) = %d, want %d", got, want) + } + if response.Events[0].Event != hookspkg.HookToolPreCall.String() || !response.Events[0].SyncEligible { + t.Fatalf("events[0] = %#v", response.Events[0]) + } + +} + +func TestHookEventsHandlerRejectsInvalidFamily(t *testing.T) { + t.Parallel() + + engine := newTestRouter(t, newTestHandlers(t, stubSessionManager{}, stubObserver{}, newTestHomePaths(t))) + recorder := performRequest(t, engine, http.MethodGet, "/api/hooks/events?family=nope", nil) + if recorder.Code != http.StatusBadRequest { + t.Fatalf("status = %d, want %d; body=%s", recorder.Code, http.StatusBadRequest, recorder.Body.String()) + } +} + +func TestHookEventsHandlerRejectsInvalidSyncOnly(t *testing.T) { + t.Parallel() + + engine := newTestRouter(t, newTestHandlers(t, stubSessionManager{}, stubObserver{}, newTestHomePaths(t))) + recorder := performRequest(t, engine, http.MethodGet, "/api/hooks/events?sync_only=maybe", nil) + if recorder.Code != http.StatusBadRequest { + t.Fatalf("status = %d, want %d; body=%s", recorder.Code, http.StatusBadRequest, recorder.Body.String()) + } +} diff --git a/internal/api/httpapi/httpapi_integration_test.go b/internal/api/httpapi/httpapi_integration_test.go index 991c73423..c235335b7 100644 --- a/internal/api/httpapi/httpapi_integration_test.go +++ b/internal/api/httpapi/httpapi_integration_test.go @@ -534,7 +534,7 @@ func (f *integrationNotifierFanout) OnSessionStopped(ctx context.Context, sess * } } -func (f *integrationNotifierFanout) OnAgentEvent(ctx context.Context, sessionID string, event acp.AgentEvent) { +func (f *integrationNotifierFanout) OnAgentEvent(ctx context.Context, sessionID string, event any) { for _, notifier := range f.notifiers { notifier.OnAgentEvent(ctx, sessionID, event) } diff --git a/internal/api/httpapi/server.go b/internal/api/httpapi/server.go index 78f4d9588..aec5cb7e7 100644 --- a/internal/api/httpapi/server.go +++ b/internal/api/httpapi/server.go @@ -460,6 +460,13 @@ func RegisterRoutes(router gin.IRouter, handlers *Handlers) { observeGroup.GET("/health", handlers.Health) } + hooksGroup := api.Group("/hooks") + { + hooksGroup.GET("/catalog", handlers.HookCatalog) + hooksGroup.GET("/runs", handlers.HookRuns) + hooksGroup.GET("/events", handlers.HookEvents) + } + skillsGroup := api.Group("/skills") { skillsGroup.GET("", handlers.ListSkills) diff --git a/internal/api/testutil/apitest.go b/internal/api/testutil/apitest.go index 0769d58de..f54230268 100644 --- a/internal/api/testutil/apitest.go +++ b/internal/api/testutil/apitest.go @@ -19,6 +19,7 @@ import ( "github.com/pedronauck/agh/internal/acp" core "github.com/pedronauck/agh/internal/api/core" aghconfig "github.com/pedronauck/agh/internal/config" + hookspkg "github.com/pedronauck/agh/internal/hooks" "github.com/pedronauck/agh/internal/observe" "github.com/pedronauck/agh/internal/session" "github.com/pedronauck/agh/internal/skills" @@ -130,8 +131,11 @@ func (s StubSessionManager) ApprovePermission(ctx context.Context, id string, re } type StubObserver struct { - QueryEventsFn func(context.Context, store.EventSummaryQuery) ([]store.EventSummary, error) - HealthFn func(context.Context) (observe.Health, error) + QueryEventsFn func(context.Context, store.EventSummaryQuery) ([]store.EventSummary, error) + QueryHookCatalogFn func(context.Context, hookspkg.CatalogFilter) ([]hookspkg.CatalogEntry, error) + QueryHookRunsFn func(context.Context, store.HookRunQuery) ([]hookspkg.HookRunRecord, error) + QueryHookEventsFn func(context.Context, hookspkg.EventFilter) ([]hookspkg.EventDescriptor, error) + HealthFn func(context.Context) (observe.Health, error) } func (s StubObserver) QueryEvents(ctx context.Context, query store.EventSummaryQuery) ([]store.EventSummary, error) { @@ -148,6 +152,27 @@ func (s StubObserver) Health(ctx context.Context) (observe.Health, error) { return observe.Health{Status: "ok"}, nil } +func (s StubObserver) QueryHookCatalog(ctx context.Context, filter hookspkg.CatalogFilter) ([]hookspkg.CatalogEntry, error) { + if s.QueryHookCatalogFn != nil { + return s.QueryHookCatalogFn(ctx, filter) + } + return nil, nil +} + +func (s StubObserver) QueryHookRuns(ctx context.Context, query store.HookRunQuery) ([]hookspkg.HookRunRecord, error) { + if s.QueryHookRunsFn != nil { + return s.QueryHookRunsFn(ctx, query) + } + return nil, nil +} + +func (s StubObserver) QueryHookEvents(ctx context.Context, filter hookspkg.EventFilter) ([]hookspkg.EventDescriptor, error) { + if s.QueryHookEventsFn != nil { + return s.QueryHookEventsFn(ctx, filter) + } + return nil, nil +} + type StubWorkspaceService struct { RegisterFn func(context.Context, workspacepkg.RegisterOptions) (workspacepkg.Workspace, error) UnregisterFn func(context.Context, string) error diff --git a/internal/api/udsapi/handlers_test.go b/internal/api/udsapi/handlers_test.go index 4f1c8e95b..64aae9100 100644 --- a/internal/api/udsapi/handlers_test.go +++ b/internal/api/udsapi/handlers_test.go @@ -11,7 +11,9 @@ import ( "time" "github.com/pedronauck/agh/internal/acp" + "github.com/pedronauck/agh/internal/api/contract" aghconfig "github.com/pedronauck/agh/internal/config" + hookspkg "github.com/pedronauck/agh/internal/hooks" "github.com/pedronauck/agh/internal/observe" "github.com/pedronauck/agh/internal/session" "github.com/pedronauck/agh/internal/store" @@ -38,6 +40,9 @@ func TestRegisterRoutesCoversTechSpecEndpoints(t *testing.T) { "GET /api/agents", "GET /api/agents/:name", "GET /api/daemon/status", + "GET /api/hooks/catalog", + "GET /api/hooks/events", + "GET /api/hooks/runs", "GET /api/memory", "GET /api/memory/:filename", "GET /api/observe/events", @@ -192,6 +197,46 @@ func TestListSessionsHandlerFiltersByWorkspace(t *testing.T) { } } +func TestHookEventsHandlerAvailableOnUDSRouter(t *testing.T) { + t.Parallel() + + homePaths := newTestHomePaths(t) + observer := stubObserver{ + QueryHookEventsFn: func(_ context.Context, filter hookspkg.EventFilter) ([]hookspkg.EventDescriptor, error) { + if filter.Family != hookspkg.HookEventFamilyTool { + t.Fatalf("filter.Family = %q, want %q", filter.Family, hookspkg.HookEventFamilyTool) + } + if !filter.SyncOnly { + t.Fatal("filter.SyncOnly = false, want true") + } + return []hookspkg.EventDescriptor{{ + Event: hookspkg.HookToolPreCall, + Family: hookspkg.HookEventFamilyTool, + SyncEligible: true, + PayloadSchema: "ToolPreCallPayload", + PatchSchema: "ToolCallPatch", + }}, nil + }, + } + engine := newTestRouter(t, newTestHandlers(t, stubSessionManager{}, observer, homePaths)) + + recorder := performRequest(t, engine, http.MethodGet, "/api/hooks/events?family=tool&sync_only=true", nil) + if recorder.Code != http.StatusOK { + t.Fatalf("status = %d, want %d; body=%s", recorder.Code, http.StatusOK, recorder.Body.String()) + } + + var response struct { + Events []contract.HookEventPayload `json:"events"` + } + decodeJSONResponse(t, recorder, &response) + if got, want := len(response.Events), 1; got != want { + t.Fatalf("len(events) = %d, want %d", got, want) + } + if response.Events[0].Event != hookspkg.HookToolPreCall.String() { + t.Fatalf("events[0].Event = %q, want %q", response.Events[0].Event, hookspkg.HookToolPreCall) + } +} + func TestCreateWorkspaceHandlerRegistersWorkspace(t *testing.T) { homePaths := newTestHomePaths(t) rootDir := t.TempDir() diff --git a/internal/api/udsapi/routes.go b/internal/api/udsapi/routes.go index 9c4cba492..5279bb431 100644 --- a/internal/api/udsapi/routes.go +++ b/internal/api/udsapi/routes.go @@ -44,6 +44,13 @@ func RegisterRoutes(router gin.IRouter, handlers *Handlers) { observe.GET("/health", handlers.Health) } + hooksGroup := api.Group("/hooks") + { + hooksGroup.GET("/catalog", handlers.HookCatalog) + hooksGroup.GET("/runs", handlers.HookRuns) + hooksGroup.GET("/events", handlers.HookEvents) + } + skillsGroup := api.Group("/skills") { skillsGroup.GET("", handlers.ListSkills) diff --git a/internal/api/udsapi/udsapi_integration_test.go b/internal/api/udsapi/udsapi_integration_test.go index 85dd1eb3a..af7e30e62 100644 --- a/internal/api/udsapi/udsapi_integration_test.go +++ b/internal/api/udsapi/udsapi_integration_test.go @@ -331,7 +331,7 @@ func (f *integrationNotifierFanout) OnSessionStopped(ctx context.Context, sess * } } -func (f *integrationNotifierFanout) OnAgentEvent(ctx context.Context, sessionID string, event acp.AgentEvent) { +func (f *integrationNotifierFanout) OnAgentEvent(ctx context.Context, sessionID string, event any) { for _, notifier := range f.notifiers { notifier.OnAgentEvent(ctx, sessionID, event) } diff --git a/internal/cli/cli_integration_test.go b/internal/cli/cli_integration_test.go index fc52efce1..85732fb37 100644 --- a/internal/cli/cli_integration_test.go +++ b/internal/cli/cli_integration_test.go @@ -634,7 +634,7 @@ func (f *integrationNotifierFanout) OnSessionStopped(ctx context.Context, sess * } } -func (f *integrationNotifierFanout) OnAgentEvent(ctx context.Context, sessionID string, event acp.AgentEvent) { +func (f *integrationNotifierFanout) OnAgentEvent(ctx context.Context, sessionID string, event any) { for _, notifier := range f.notifiers { notifier.OnAgentEvent(ctx, sessionID, event) } diff --git a/internal/cli/client.go b/internal/cli/client.go index 345012814..301478a2b 100644 --- a/internal/cli/client.go +++ b/internal/cli/client.go @@ -44,6 +44,9 @@ type DaemonClient interface { DeleteWorkspace(ctx context.Context, ref string) error ListAgents(ctx context.Context) ([]AgentRecord, error) GetAgent(ctx context.Context, name string) (AgentRecord, error) + HookCatalog(ctx context.Context, query HookCatalogQuery) ([]HookCatalogRecord, error) + HookRuns(ctx context.Context, query HookRunsQuery) ([]HookRunRecord, error) + HookEvents(ctx context.Context, query HookEventsQuery) ([]HookEventRecord, error) ObserveEvents(ctx context.Context, query ObserveEventQuery) ([]ObserveEventRecord, error) StreamObserveEvents(ctx context.Context, query ObserveEventQuery, lastEventID string, handler SSEHandler) error ObserveHealth(ctx context.Context) (HealthStatus, error) @@ -111,6 +114,24 @@ type AgentEventRecord = contract.AgentEventPayload // TokenUsageRecord is the prompt usage payload returned by the daemon API. type TokenUsageRecord = contract.TokenUsagePayload +// HookCatalogQuery captures the CLI filters for resolved hook catalog queries. +type HookCatalogQuery = contract.HookCatalogQuery + +// HookCatalogRecord is one resolved hook returned by the daemon API. +type HookCatalogRecord = contract.HookCatalogPayload + +// HookRunsQuery captures the CLI filters for hook execution history queries. +type HookRunsQuery = contract.HookRunsQuery + +// HookRunRecord is one persisted hook execution audit record. +type HookRunRecord = contract.HookRunPayload + +// HookEventsQuery captures the CLI filters for hook taxonomy queries. +type HookEventsQuery = contract.HookEventsQuery + +// HookEventRecord is one supported hook taxonomy row returned by the daemon API. +type HookEventRecord = contract.HookEventPayload + // ObserveEventRecord is one cross-session observability event row. type ObserveEventRecord = contract.ObserveEventPayload @@ -353,6 +374,36 @@ func (c *unixSocketClient) GetAgent(ctx context.Context, name string) (AgentReco return response.Agent, nil } +func (c *unixSocketClient) HookCatalog(ctx context.Context, query HookCatalogQuery) ([]HookCatalogRecord, error) { + var response struct { + Hooks []HookCatalogRecord `json:"hooks"` + } + if err := c.doJSON(ctx, http.MethodGet, "/api/hooks/catalog", hookCatalogValues(query), nil, &response); err != nil { + return nil, err + } + return response.Hooks, nil +} + +func (c *unixSocketClient) HookRuns(ctx context.Context, query HookRunsQuery) ([]HookRunRecord, error) { + var response struct { + Runs []HookRunRecord `json:"runs"` + } + if err := c.doJSON(ctx, http.MethodGet, "/api/hooks/runs", hookRunsValues(query), nil, &response); err != nil { + return nil, err + } + return response.Runs, nil +} + +func (c *unixSocketClient) HookEvents(ctx context.Context, query HookEventsQuery) ([]HookEventRecord, error) { + var response struct { + Events []HookEventRecord `json:"events"` + } + if err := c.doJSON(ctx, http.MethodGet, "/api/hooks/events", hookEventsValues(query), nil, &response); err != nil { + return nil, err + } + return response.Events, nil +} + func (c *unixSocketClient) ObserveEvents(ctx context.Context, query ObserveEventQuery) ([]ObserveEventRecord, error) { var response struct { Events []ObserveEventRecord `json:"events"` @@ -614,6 +665,57 @@ func observeEventValues(query ObserveEventQuery) url.Values { return values } +func hookCatalogValues(query HookCatalogQuery) url.Values { + values := url.Values{} + if trimmed := strings.TrimSpace(query.Workspace); trimmed != "" { + values.Set("workspace", trimmed) + } + if trimmed := strings.TrimSpace(query.Agent); trimmed != "" { + values.Set("agent", trimmed) + } + if trimmed := strings.TrimSpace(query.Event); trimmed != "" { + values.Set("event", trimmed) + } + if trimmed := strings.TrimSpace(query.Source); trimmed != "" { + values.Set("source", trimmed) + } + if trimmed := strings.TrimSpace(query.Mode); trimmed != "" { + values.Set("mode", trimmed) + } + return values +} + +func hookRunsValues(query HookRunsQuery) url.Values { + values := url.Values{} + if trimmed := strings.TrimSpace(query.Session); trimmed != "" { + values.Set("session", trimmed) + } + if trimmed := strings.TrimSpace(query.Event); trimmed != "" { + values.Set("event", trimmed) + } + if trimmed := strings.TrimSpace(query.Outcome); trimmed != "" { + values.Set("outcome", trimmed) + } + if trimmed := strings.TrimSpace(query.Since); trimmed != "" { + values.Set("since", trimmed) + } + if query.Last > 0 { + values.Set("last", strconv.Itoa(query.Last)) + } + return values +} + +func hookEventsValues(query HookEventsQuery) url.Values { + values := url.Values{} + if trimmed := strings.TrimSpace(query.Family); trimmed != "" { + values.Set("family", trimmed) + } + if query.SyncOnly { + values.Set("sync_only", strconv.FormatBool(query.SyncOnly)) + } + return values +} + func memoryValues(scope memory.Scope, workspace string) url.Values { values := url.Values{} if trimmed := strings.TrimSpace(string(scope)); trimmed != "" { diff --git a/internal/cli/client_test.go b/internal/cli/client_test.go index 387e1adc7..18576831f 100644 --- a/internal/cli/client_test.go +++ b/internal/cli/client_test.go @@ -100,6 +100,48 @@ func TestUnixSocketClientMethods(t *testing.T) { return newHTTPResponse(http.StatusOK, `{"agents":[{"name":"coder","provider":"fake","prompt":"You are coder."}]}`), nil case req.Method == http.MethodGet && req.URL.Path == "/api/agents/coder": return newHTTPResponse(http.StatusOK, `{"agent":{"name":"coder","provider":"fake","prompt":"You are coder."}}`), nil + case req.Method == http.MethodGet && req.URL.Path == "/api/hooks/catalog": + if got := req.URL.Query().Get("workspace"); got != "alpha" { + t.Fatalf("hook catalog workspace query = %q, want %q", got, "alpha") + } + if got := req.URL.Query().Get("agent"); got != "coder" { + t.Fatalf("hook catalog agent query = %q, want %q", got, "coder") + } + if got := req.URL.Query().Get("event"); got != "tool.pre_call" { + t.Fatalf("hook catalog event query = %q, want %q", got, "tool.pre_call") + } + if got := req.URL.Query().Get("source"); got != "config" { + t.Fatalf("hook catalog source query = %q, want %q", got, "config") + } + if got := req.URL.Query().Get("mode"); got != "sync" { + t.Fatalf("hook catalog mode query = %q, want %q", got, "sync") + } + return newHTTPResponse(http.StatusOK, `{"hooks":[{"order":1,"name":"permission-guard","event":"tool.pre_call","source":"config","mode":"sync","priority":10,"executor_kind":"subprocess"}]}`), nil + case req.Method == http.MethodGet && req.URL.Path == "/api/hooks/runs": + if got := req.URL.Query().Get("session"); got != "sess-1" { + t.Fatalf("hook runs session query = %q, want %q", got, "sess-1") + } + if got := req.URL.Query().Get("event"); got != "permission.request" { + t.Fatalf("hook runs event query = %q, want %q", got, "permission.request") + } + if got := req.URL.Query().Get("outcome"); got != "failed" { + t.Fatalf("hook runs outcome query = %q, want %q", got, "failed") + } + if got := req.URL.Query().Get("since"); got != "2026-04-03T11:00:00Z" { + t.Fatalf("hook runs since query = %q, want %q", got, "2026-04-03T11:00:00Z") + } + if got := req.URL.Query().Get("last"); got != "2" { + t.Fatalf("hook runs last query = %q, want %q", got, "2") + } + return newHTTPResponse(http.StatusOK, `{"runs":[{"hook_name":"permission-guard","event":"permission.request","source":"config","mode":"sync","duration_ms":12,"outcome":"failed","error":"boom","recorded_at":"2026-04-03T12:00:00Z"}]}`), nil + case req.Method == http.MethodGet && req.URL.Path == "/api/hooks/events": + if got := req.URL.Query().Get("family"); got != "tool" { + t.Fatalf("hook events family query = %q, want %q", got, "tool") + } + if got := req.URL.Query().Get("sync_only"); got != "true" { + t.Fatalf("hook events sync_only query = %q, want %q", got, "true") + } + return newHTTPResponse(http.StatusOK, `{"events":[{"event":"tool.pre_call","family":"tool","sync_eligible":true,"payload_schema":"ToolPreCallPayload","patch_schema":"ToolCallPatch"}]}`), nil case req.Method == http.MethodGet && req.URL.Path == "/api/observe/events": if got := req.URL.Query().Get("session_id"); got != "sess-1" { t.Fatalf("observe session_id query = %q, want %q", got, "sess-1") @@ -201,6 +243,36 @@ func TestUnixSocketClientMethods(t *testing.T) { t.Fatalf("GetAgent() = %#v, %v", agent, err) } + hookCatalog, err := client.HookCatalog(ctx, HookCatalogQuery{ + Workspace: "alpha", + Agent: "coder", + Event: "tool.pre_call", + Source: "config", + Mode: "sync", + }) + if err != nil || len(hookCatalog) != 1 || hookCatalog[0].ExecutorKind != "subprocess" { + t.Fatalf("HookCatalog() = %#v, %v", hookCatalog, err) + } + + hookRuns, err := client.HookRuns(ctx, HookRunsQuery{ + Session: "sess-1", + Event: "permission.request", + Outcome: "failed", + Since: "2026-04-03T11:00:00Z", + Last: 2, + }) + if err != nil || len(hookRuns) != 1 || hookRuns[0].Outcome != "failed" { + t.Fatalf("HookRuns() = %#v, %v", hookRuns, err) + } + + hookEvents, err := client.HookEvents(ctx, HookEventsQuery{ + Family: "tool", + SyncOnly: true, + }) + if err != nil || len(hookEvents) != 1 || hookEvents[0].Family != "tool" { + t.Fatalf("HookEvents() = %#v, %v", hookEvents, err) + } + createdWorkspace, err := client.CreateWorkspace(ctx, WorkspaceCreateRequest{RootDir: "/workspace/project"}) if err != nil || createdWorkspace.ID != "ws-1" { t.Fatalf("CreateWorkspace() = %#v, %v", createdWorkspace, err) @@ -306,6 +378,30 @@ func TestReadAPIErrorAndHelpers(t *testing.T) { t.Fatalf("observeEventValues() = %v, want session_id/limit", got) } + if got := hookCatalogValues(HookCatalogQuery{ + Workspace: "alpha", + Agent: "coder", + Event: "tool.pre_call", + Source: "config", + Mode: "sync", + }); got.Get("workspace") != "alpha" || got.Get("source") != "config" || got.Get("mode") != "sync" { + t.Fatalf("hookCatalogValues() = %v, want all hook catalog filters", got) + } + + if got := hookRunsValues(HookRunsQuery{ + Session: "sess-1", + Event: "permission.request", + Outcome: "failed", + Since: "2026-04-03T11:00:00Z", + Last: 2, + }); got.Get("outcome") != "failed" || got.Get("last") != "2" || got.Get("since") != "2026-04-03T11:00:00Z" { + t.Fatalf("hookRunsValues() = %v, want all hook runs filters", got) + } + + if got := hookEventsValues(HookEventsQuery{Family: "tool", SyncOnly: true}); got.Get("family") != "tool" || got.Get("sync_only") != "true" { + t.Fatalf("hookEventsValues() = %v, want family/sync_only", got) + } + if got := memoryValues(memory.ScopeWorkspace, "/workspace/project"); got.Get("scope") != "workspace" || got.Get("workspace") != "/workspace/project" { t.Fatalf("memoryValues() = %v, want scope/workspace", got) } @@ -435,6 +531,12 @@ func TestCLIUsesSharedContractAliases(t *testing.T) { {name: "Should alias TurnHistoryRecord to the shared contract", cliType: TurnHistoryRecord{}, want: contract.TurnHistoryPayload{}}, {name: "Should alias AgentRecord to the shared contract", cliType: AgentRecord{}, want: contract.AgentPayload{}}, {name: "Should alias AgentEventRecord to the shared contract", cliType: AgentEventRecord{}, want: contract.AgentEventPayload{}}, + {name: "Should alias HookCatalogQuery to the shared contract", cliType: HookCatalogQuery{}, want: contract.HookCatalogQuery{}}, + {name: "Should alias HookCatalogRecord to the shared contract", cliType: HookCatalogRecord{}, want: contract.HookCatalogPayload{}}, + {name: "Should alias HookRunsQuery to the shared contract", cliType: HookRunsQuery{}, want: contract.HookRunsQuery{}}, + {name: "Should alias HookRunRecord to the shared contract", cliType: HookRunRecord{}, want: contract.HookRunPayload{}}, + {name: "Should alias HookEventsQuery to the shared contract", cliType: HookEventsQuery{}, want: contract.HookEventsQuery{}}, + {name: "Should alias HookEventRecord to the shared contract", cliType: HookEventRecord{}, want: contract.HookEventPayload{}}, {name: "Should alias ObserveEventRecord to the shared contract", cliType: ObserveEventRecord{}, want: contract.ObserveEventPayload{}}, {name: "Should alias WorkspaceCreateRequest to the shared contract", cliType: WorkspaceCreateRequest{}, want: contract.CreateWorkspaceRequest{}}, {name: "Should alias WorkspaceUpdateRequest to the shared contract", cliType: WorkspaceUpdateRequest{}, want: contract.UpdateWorkspaceRequest{}}, @@ -517,6 +619,57 @@ func TestSharedContractJSONParity(t *testing.T) { t.Fatalf("observe decode = %#v, want %#v", cliObserve, sharedObserve) } + hookCatalogResponse := `{"hooks":[{"order":1,"name":"permission-guard","event":"tool.pre_call","source":"config","mode":"sync","priority":10,"executor_kind":"subprocess","matcher":{"tool_name":"shell"},"metadata":{"origin":"config"}}]}` + var cliHookCatalog struct { + Hooks []HookCatalogRecord `json:"hooks"` + } + if err := json.Unmarshal([]byte(hookCatalogResponse), &cliHookCatalog); err != nil { + t.Fatalf("json.Unmarshal(cli hook catalog response) error = %v", err) + } + var sharedHookCatalog struct { + Hooks []contract.HookCatalogPayload `json:"hooks"` + } + if err := json.Unmarshal([]byte(hookCatalogResponse), &sharedHookCatalog); err != nil { + t.Fatalf("json.Unmarshal(shared hook catalog response) error = %v", err) + } + if !reflect.DeepEqual(cliHookCatalog, sharedHookCatalog) { + t.Fatalf("hook catalog decode = %#v, want %#v", cliHookCatalog, sharedHookCatalog) + } + + hookRunsResponse := `{"runs":[{"hook_name":"permission-guard","event":"permission.request","source":"config","mode":"sync","duration_ms":12,"outcome":"failed","error":"boom","recorded_at":"2026-04-03T12:00:00Z"}]}` + var cliHookRuns struct { + Runs []HookRunRecord `json:"runs"` + } + if err := json.Unmarshal([]byte(hookRunsResponse), &cliHookRuns); err != nil { + t.Fatalf("json.Unmarshal(cli hook runs response) error = %v", err) + } + var sharedHookRuns struct { + Runs []contract.HookRunPayload `json:"runs"` + } + if err := json.Unmarshal([]byte(hookRunsResponse), &sharedHookRuns); err != nil { + t.Fatalf("json.Unmarshal(shared hook runs response) error = %v", err) + } + if !reflect.DeepEqual(cliHookRuns, sharedHookRuns) { + t.Fatalf("hook runs decode = %#v, want %#v", cliHookRuns, sharedHookRuns) + } + + hookEventsResponse := `{"events":[{"event":"tool.pre_call","family":"tool","sync_eligible":true,"payload_schema":"ToolPreCallPayload","patch_schema":"ToolCallPatch"}]}` + var cliHookEvents struct { + Events []HookEventRecord `json:"events"` + } + if err := json.Unmarshal([]byte(hookEventsResponse), &cliHookEvents); err != nil { + t.Fatalf("json.Unmarshal(cli hook events response) error = %v", err) + } + var sharedHookEvents struct { + Events []contract.HookEventPayload `json:"events"` + } + if err := json.Unmarshal([]byte(hookEventsResponse), &sharedHookEvents); err != nil { + t.Fatalf("json.Unmarshal(shared hook events response) error = %v", err) + } + if !reflect.DeepEqual(cliHookEvents, sharedHookEvents) { + t.Fatalf("hook events decode = %#v, want %#v", cliHookEvents, sharedHookEvents) + } + daemonResponse := `{"daemon":{"status":"running","pid":10,"started_at":"2026-04-03T12:00:00Z","socket":"/tmp/agh.sock","http_host":"localhost","http_port":2123,"active_sessions":1,"total_sessions":2,"version":"dev"}}` var cliDaemon struct { Daemon DaemonStatus `json:"daemon"` diff --git a/internal/cli/helpers_test.go b/internal/cli/helpers_test.go index f306ded16..79cc2bac5 100644 --- a/internal/cli/helpers_test.go +++ b/internal/cli/helpers_test.go @@ -34,6 +34,9 @@ type stubClient struct { deleteWorkspaceFn func(context.Context, string) error listAgentsFn func(context.Context) ([]AgentRecord, error) getAgentFn func(context.Context, string) (AgentRecord, error) + hookCatalogFn func(context.Context, HookCatalogQuery) ([]HookCatalogRecord, error) + hookRunsFn func(context.Context, HookRunsQuery) ([]HookRunRecord, error) + hookEventsFn func(context.Context, HookEventsQuery) ([]HookEventRecord, error) observeEventsFn func(context.Context, ObserveEventQuery) ([]ObserveEventRecord, error) streamObserveEventsFn func(context.Context, ObserveEventQuery, string, SSEHandler) error observeHealthFn func(context.Context) (HealthStatus, error) @@ -163,6 +166,27 @@ func (s stubClient) GetAgent(ctx context.Context, name string) (AgentRecord, err return AgentRecord{}, errors.New("unexpected GetAgent call") } +func (s stubClient) HookCatalog(ctx context.Context, query HookCatalogQuery) ([]HookCatalogRecord, error) { + if s.hookCatalogFn != nil { + return s.hookCatalogFn(ctx, query) + } + return nil, errors.New("unexpected HookCatalog call") +} + +func (s stubClient) HookRuns(ctx context.Context, query HookRunsQuery) ([]HookRunRecord, error) { + if s.hookRunsFn != nil { + return s.hookRunsFn(ctx, query) + } + return nil, errors.New("unexpected HookRuns call") +} + +func (s stubClient) HookEvents(ctx context.Context, query HookEventsQuery) ([]HookEventRecord, error) { + if s.hookEventsFn != nil { + return s.hookEventsFn(ctx, query) + } + return nil, errors.New("unexpected HookEvents call") +} + func (s stubClient) ObserveEvents(ctx context.Context, query ObserveEventQuery) ([]ObserveEventRecord, error) { if s.observeEventsFn != nil { return s.observeEventsFn(ctx, query) diff --git a/internal/cli/hooks.go b/internal/cli/hooks.go new file mode 100644 index 000000000..18f565d23 --- /dev/null +++ b/internal/cli/hooks.go @@ -0,0 +1,366 @@ +package cli + +import ( + "errors" + "fmt" + "sort" + "strconv" + "strings" + "time" + + hookspkg "github.com/pedronauck/agh/internal/hooks" + "github.com/spf13/cobra" +) + +func newHooksCommand(deps commandDeps) *cobra.Command { + cmd := &cobra.Command{ + Use: "hooks", + Short: "Inspect configured and executed hooks", + } + + cmd.AddCommand(newHooksListCommand(deps)) + cmd.AddCommand(newHooksInfoCommand(deps)) + cmd.AddCommand(newHooksEventsCommand(deps)) + cmd.AddCommand(newHooksRunsCommand(deps)) + + return cmd +} + +func newHooksListCommand(deps commandDeps) *cobra.Command { + var query HookCatalogQuery + + cmd := &cobra.Command{ + Use: "list", + Short: "List resolved hooks in pipeline order", + RunE: func(cmd *cobra.Command, _ []string) error { + client, _, err := clientFromDeps(deps) + if err != nil { + return err + } + + hooks, err := client.HookCatalog(cmd.Context(), query) + if err != nil { + return err + } + return writeCommandOutput(cmd, hookListBundle(hooks)) + }, + } + + cmd.Flags().StringVar(&query.Workspace, "workspace", "", "Filter by workspace name or ID") + cmd.Flags().StringVar(&query.Agent, "agent", "", "Filter by agent name") + cmd.Flags().StringVar(&query.Event, "event", "", "Filter by hook event") + cmd.Flags().StringVar(&query.Source, "source", "", "Filter by hook source") + cmd.Flags().StringVar(&query.Mode, "mode", "", "Filter by hook mode") + return cmd +} + +func newHooksInfoCommand(deps commandDeps) *cobra.Command { + var workspace string + + cmd := &cobra.Command{ + Use: "info ", + Short: "Show detailed information for one or more hooks by name", + Args: cobra.ExactArgs(1), + RunE: func(cmd *cobra.Command, args []string) error { + client, _, err := clientFromDeps(deps) + if err != nil { + return err + } + + hooks, err := client.HookCatalog(cmd.Context(), HookCatalogQuery{Workspace: workspace}) + if err != nil { + return err + } + + name := strings.TrimSpace(args[0]) + matches := make([]HookCatalogRecord, 0) + for _, hook := range hooks { + if strings.TrimSpace(hook.Name) == name { + matches = append(matches, hook) + } + } + if len(matches) == 0 { + return fmt.Errorf("cli: no hooks named %q found", name) + } + + return writeCommandOutput(cmd, hookInfoBundle(matches)) + }, + } + + cmd.Flags().StringVar(&workspace, "workspace", "", "Resolve hooks in one workspace context") + return cmd +} + +func newHooksEventsCommand(deps commandDeps) *cobra.Command { + var query HookEventsQuery + + cmd := &cobra.Command{ + Use: "events", + Short: "List supported hook events", + RunE: func(cmd *cobra.Command, _ []string) error { + client, _, err := clientFromDeps(deps) + if err != nil { + return err + } + + events, err := client.HookEvents(cmd.Context(), query) + if err != nil { + return err + } + return writeCommandOutput(cmd, hookEventsBundle(events)) + }, + } + + cmd.Flags().StringVar(&query.Family, "family", "", "Filter by hook event family") + cmd.Flags().BoolVar(&query.SyncOnly, "sync-only", false, "Show only sync-eligible events") + return cmd +} + +func newHooksRunsCommand(deps commandDeps) *cobra.Command { + var ( + query HookRunsQuery + sinceRaw string + ) + + cmd := &cobra.Command{ + Use: "runs", + Short: "Show persisted hook execution history", + RunE: func(cmd *cobra.Command, _ []string) error { + client, _, err := clientFromDeps(deps) + if err != nil { + return err + } + if strings.TrimSpace(query.Session) == "" { + return errors.New("cli: --session is required") + } + + since, err := parseSinceFlag(sinceRaw, deps.now) + if err != nil { + return err + } + if !since.IsZero() { + query.Since = since.UTC().Format(time.RFC3339Nano) + } + + runs, err := client.HookRuns(cmd.Context(), query) + if err != nil { + return err + } + return writeCommandOutput(cmd, hookRunsBundle(runs)) + }, + } + + cmd.Flags().StringVar(&query.Session, "session", "", "Session ID") + cmd.Flags().StringVar(&query.Event, "event", "", "Filter by hook event") + cmd.Flags().StringVar(&query.Outcome, "outcome", "", "Filter by hook outcome") + cmd.Flags().StringVar(&sinceRaw, "since", "", "Show runs since an RFC3339 timestamp or relative duration") + cmd.Flags().IntVar(&query.Last, "last", 0, "Show only the most recent N runs") + return cmd +} + +func hookListBundle(hooks []HookCatalogRecord) outputBundle { + return listBundle( + hooks, + hooks, + "Hooks", + []string{"Order", "Name", "Event", "Source", "Mode", "Priority"}, + "hooks", + []string{"order", "name", "event", "source", "skill_source", "mode", "required", "priority"}, + func(item HookCatalogRecord) []string { + return []string{ + strconv.Itoa(item.Order), + stringOrDash(item.Name), + stringOrDash(item.Event), + stringOrDash(item.Source), + stringOrDash(item.Mode), + strconv.Itoa(item.Priority), + } + }, + func(item HookCatalogRecord) []string { + return []string{ + strconv.Itoa(item.Order), + item.Name, + item.Event, + item.Source, + item.SkillSource, + item.Mode, + strconv.FormatBool(item.Required), + strconv.Itoa(item.Priority), + } + }, + ) +} + +func hookInfoBundle(hooks []HookCatalogRecord) outputBundle { + return outputBundle{ + jsonValue: hooks, + human: func() (string, error) { + blocks := make([]string, 0, len(hooks)) + for _, item := range hooks { + blocks = append(blocks, renderHumanBlocks( + renderHumanSection("Hook", []keyValue{ + {Label: "Name", Value: stringOrDash(item.Name)}, + {Label: "Order", Value: strconv.Itoa(item.Order)}, + {Label: "Event", Value: stringOrDash(item.Event)}, + {Label: "Source", Value: stringOrDash(item.Source)}, + {Label: "Skill Source", Value: stringOrDash(item.SkillSource)}, + {Label: "Mode", Value: stringOrDash(item.Mode)}, + {Label: "Required", Value: strconv.FormatBool(item.Required)}, + {Label: "Priority", Value: strconv.Itoa(item.Priority)}, + {Label: "Timeout (ms)", Value: int64OrDash(item.TimeoutMS)}, + {Label: "Executor Kind", Value: stringOrDash(item.ExecutorKind)}, + }), + renderHumanTable("Matcher", []string{"Field", "Value"}, hookMatcherRows(item.Matcher)), + renderHumanTable("Metadata", []string{"Key", "Value"}, hookMetadataRows(item.Metadata)), + )) + } + return renderHumanBlocks(blocks...), nil + }, + toon: func() (string, error) { + rows := make([][]string, 0, len(hooks)) + for _, item := range hooks { + rows = append(rows, []string{ + item.Name, + strconv.Itoa(item.Order), + item.Event, + item.Source, + item.SkillSource, + item.Mode, + strconv.FormatBool(item.Required), + strconv.Itoa(item.Priority), + strconv.FormatInt(item.TimeoutMS, 10), + item.ExecutorKind, + }) + } + + blocks := []string{ + renderToonArray("hooks", []string{"name", "order", "event", "source", "skill_source", "mode", "required", "priority", "timeout_ms", "executor_kind"}, rows), + } + for _, item := range hooks { + blocks = append(blocks, renderHumanBlocks( + renderToonObject("hook", []string{"name", "order", "event"}, []string{ + item.Name, + strconv.Itoa(item.Order), + item.Event, + }), + renderToonArray("matcher", []string{"field", "value"}, hookMatcherRows(item.Matcher)), + renderToonArray("metadata", []string{"key", "value"}, hookMetadataRows(item.Metadata)), + )) + } + return renderHumanBlocks(blocks...), nil + }, + } +} + +func hookEventsBundle(events []HookEventRecord) outputBundle { + return listBundle( + events, + events, + "Hook Events", + []string{"Event", "Family", "Sync", "Payload", "Patch"}, + "events", + []string{"event", "family", "sync_eligible", "payload_schema", "patch_schema"}, + func(item HookEventRecord) []string { + return []string{ + stringOrDash(item.Event), + stringOrDash(item.Family), + strconv.FormatBool(item.SyncEligible), + stringOrDash(item.PayloadSchema), + stringOrDash(item.PatchSchema), + } + }, + func(item HookEventRecord) []string { + return []string{ + item.Event, + item.Family, + strconv.FormatBool(item.SyncEligible), + item.PayloadSchema, + item.PatchSchema, + } + }, + ) +} + +func hookRunsBundle(runs []HookRunRecord) outputBundle { + return listBundle( + runs, + runs, + "Hook Runs", + []string{"Hook", "Event", "Outcome", "Duration", "Error"}, + "runs", + []string{"hook_name", "event", "outcome", "duration_ms", "error", "recorded_at"}, + func(item HookRunRecord) []string { + return []string{ + stringOrDash(item.HookName), + stringOrDash(item.Event), + stringOrDash(item.Outcome), + stringOrDash(formatHookDuration(item.DurationMS)), + stringOrDash(item.Error), + } + }, + func(item HookRunRecord) []string { + return []string{ + item.HookName, + item.Event, + item.Outcome, + strconv.FormatInt(item.DurationMS, 10), + item.Error, + formatTime(item.RecordedAt), + } + }, + ) +} + +func hookMatcherRows(matcher hookspkg.HookMatcher) [][]string { + rows := make([][]string, 0, 16) + appendRow := func(label string, value string) { + if trimmed := strings.TrimSpace(value); trimmed != "" { + rows = append(rows, []string{label, trimmed}) + } + } + + appendRow("agent_name", matcher.AgentName) + appendRow("agent_type", matcher.AgentType) + appendRow("workspace_id", matcher.WorkspaceID) + appendRow("workspace_root", matcher.WorkspaceRoot) + appendRow("session_type", matcher.SessionType) + appendRow("input_class", matcher.InputClass) + appendRow("acp_event_type", matcher.ACPEventType) + appendRow("turn_id", matcher.TurnID) + appendRow("tool_name", matcher.ToolName) + appendRow("tool_namespace", matcher.ToolNamespace) + if matcher.ToolReadOnly != nil { + rows = append(rows, []string{"tool_read_only", strconv.FormatBool(*matcher.ToolReadOnly)}) + } + appendRow("decision_class", matcher.DecisionClass) + appendRow("message_role", matcher.MessageRole) + appendRow("message_delta_type", matcher.MessageDeltaType) + appendRow("compaction_reason", matcher.CompactionReason) + appendRow("compaction_strategy", matcher.CompactionStrategy) + return rows +} + +func hookMetadataRows(metadata map[string]string) [][]string { + if len(metadata) == 0 { + return nil + } + + keys := make([]string, 0, len(metadata)) + for key := range metadata { + keys = append(keys, key) + } + sort.Strings(keys) + + rows := make([][]string, 0, len(keys)) + for _, key := range keys { + rows = append(rows, []string{key, metadata[key]}) + } + return rows +} + +func formatHookDuration(durationMS int64) string { + if durationMS <= 0 { + return "" + } + return fmt.Sprintf("%dms", durationMS) +} diff --git a/internal/cli/hooks_test.go b/internal/cli/hooks_test.go new file mode 100644 index 000000000..6dba1be46 --- /dev/null +++ b/internal/cli/hooks_test.go @@ -0,0 +1,309 @@ +package cli + +import ( + "context" + "encoding/json" + "strings" + "testing" + "time" + + hookspkg "github.com/pedronauck/agh/internal/hooks" +) + +func TestHooksListCommandPassesFiltersAndRendersJSON(t *testing.T) { + t.Parallel() + + var seenQuery HookCatalogQuery + deps := newTestDeps(t, stubClient{ + hookCatalogFn: func(_ context.Context, query HookCatalogQuery) ([]HookCatalogRecord, error) { + seenQuery = query + return []HookCatalogRecord{{ + Order: 1, + Name: "permission-guard", + Event: "tool.pre_call", + Source: "config", + SkillSource: "review-skill", + Mode: "sync", + Required: true, + Priority: 10, + ExecutorKind: "subprocess", + }}, nil + }, + }) + + stdout, _, err := executeRootCommand(t, deps, + "hooks", "list", + "--workspace", "alpha", + "--agent", "coder", + "--event", "tool.pre_call", + "--source", "config", + "--mode", "sync", + "-o", "json", + ) + if err != nil { + t.Fatalf("executeRootCommand(hooks list) error = %v", err) + } + + if seenQuery != (HookCatalogQuery{ + Workspace: "alpha", + Agent: "coder", + Event: "tool.pre_call", + Source: "config", + Mode: "sync", + }) { + t.Fatalf("HookCatalog() query = %#v, want expected filters", seenQuery) + } + + var decoded []HookCatalogRecord + if err := json.Unmarshal([]byte(stdout), &decoded); err != nil { + t.Fatalf("json.Unmarshal(hooks list) error = %v", err) + } + if got, want := len(decoded), 1; got != want { + t.Fatalf("len(decoded) = %d, want %d", got, want) + } + if decoded[0].Name != "permission-guard" || decoded[0].ExecutorKind != "subprocess" { + t.Fatalf("decoded[0] = %#v, want hook catalog payload", decoded[0]) + } +} + +func TestHooksListCommandRendersHumanAndToon(t *testing.T) { + t.Parallel() + + deps := newTestDeps(t, stubClient{ + hookCatalogFn: func(context.Context, HookCatalogQuery) ([]HookCatalogRecord, error) { + return []HookCatalogRecord{{ + Order: 1, + Name: "permission-guard", + Event: "tool.pre_call", + Source: "config", + SkillSource: "review-skill", + Mode: "sync", + Required: true, + Priority: 10, + }}, nil + }, + }) + + humanOut, _, err := executeRootCommand(t, deps, "hooks", "list", "-o", "human") + if err != nil { + t.Fatalf("executeRootCommand(hooks list human) error = %v", err) + } + if !strings.Contains(humanOut, "Hooks") || !strings.Contains(humanOut, "permission-guard") { + t.Fatalf("human output = %q, want hooks table", humanOut) + } + + toonOut, _, err := executeRootCommand(t, deps, "hooks", "list", "-o", "toon") + if err != nil { + t.Fatalf("executeRootCommand(hooks list toon) error = %v", err) + } + if !strings.Contains(toonOut, "hooks[1]{order,name,event,source,skill_source,mode,required,priority}:") { + t.Fatalf("toon output = %q, want TOON header", toonOut) + } +} + +func TestHooksInfoCommandReturnsAllMatchesAcrossFormats(t *testing.T) { + t.Parallel() + + var seenQuery HookCatalogQuery + deps := newTestDeps(t, stubClient{ + hookCatalogFn: func(_ context.Context, query HookCatalogQuery) ([]HookCatalogRecord, error) { + seenQuery = query + return []HookCatalogRecord{ + { + Order: 1, + Name: "permission-guard", + Event: "permission.request", + Source: "config", + Mode: "sync", + Required: true, + Priority: 10, + TimeoutMS: 500, + ExecutorKind: "subprocess", + Matcher: hookspkg.HookMatcher{ + ToolName: "shell", + }, + Metadata: map[string]string{"origin": "config"}, + }, + { + Order: 1, + Name: "permission-guard", + Event: "tool.pre_call", + Source: "skill", + SkillSource: "review-skill", + Mode: "async", + Priority: 20, + ExecutorKind: "native", + Matcher: hookspkg.HookMatcher{ + AgentName: "coder", + }, + Metadata: map[string]string{"owner": "team"}, + }, + { + Order: 1, + Name: "other-hook", + Event: "tool.pre_call", + Mode: "sync", + }, + }, nil + }, + }) + + jsonOut, _, err := executeRootCommand(t, deps, "hooks", "info", "permission-guard", "--workspace", "alpha", "-o", "json") + if err != nil { + t.Fatalf("executeRootCommand(hooks info json) error = %v", err) + } + if seenQuery.Workspace != "alpha" { + t.Fatalf("HookCatalog() workspace query = %q, want alpha", seenQuery.Workspace) + } + + var decoded []HookCatalogRecord + if err := json.Unmarshal([]byte(jsonOut), &decoded); err != nil { + t.Fatalf("json.Unmarshal(hooks info) error = %v", err) + } + if got, want := len(decoded), 2; got != want { + t.Fatalf("len(decoded) = %d, want %d", got, want) + } + + humanOut, _, err := executeRootCommand(t, deps, "hooks", "info", "permission-guard", "-o", "human") + if err != nil { + t.Fatalf("executeRootCommand(hooks info human) error = %v", err) + } + if !strings.Contains(humanOut, "Matcher") || !strings.Contains(humanOut, "Metadata") || !strings.Contains(humanOut, "Executor Kind") { + t.Fatalf("human output = %q, want detail sections", humanOut) + } + + toonOut, _, err := executeRootCommand(t, deps, "hooks", "info", "permission-guard", "-o", "toon") + if err != nil { + t.Fatalf("executeRootCommand(hooks info toon) error = %v", err) + } + if !strings.Contains(toonOut, "hooks[2]{name,order,event,source,skill_source,mode,required,priority,timeout_ms,executor_kind}:") { + t.Fatalf("toon output = %q, want hooks array", toonOut) + } + if !strings.Contains(toonOut, "matcher[1]{field,value}:") || !strings.Contains(toonOut, "metadata[1]{key,value}:") { + t.Fatalf("toon output = %q, want matcher/metadata blocks", toonOut) + } +} + +func TestHooksEventsCommandPassesFiltersAndRendersFormats(t *testing.T) { + t.Parallel() + + var seenQuery HookEventsQuery + deps := newTestDeps(t, stubClient{ + hookEventsFn: func(_ context.Context, query HookEventsQuery) ([]HookEventRecord, error) { + seenQuery = query + return []HookEventRecord{{ + Event: "tool.pre_call", + Family: "tool", + SyncEligible: true, + PayloadSchema: "ToolPreCallPayload", + PatchSchema: "ToolCallPatch", + }}, nil + }, + }) + + jsonOut, _, err := executeRootCommand(t, deps, "hooks", "events", "--family", "tool", "--sync-only", "-o", "json") + if err != nil { + t.Fatalf("executeRootCommand(hooks events json) error = %v", err) + } + if seenQuery != (HookEventsQuery{Family: "tool", SyncOnly: true}) { + t.Fatalf("HookEvents() query = %#v, want expected filters", seenQuery) + } + + var decoded []HookEventRecord + if err := json.Unmarshal([]byte(jsonOut), &decoded); err != nil { + t.Fatalf("json.Unmarshal(hooks events) error = %v", err) + } + if got, want := len(decoded), 1; got != want { + t.Fatalf("len(decoded) = %d, want %d", got, want) + } + + humanOut, _, err := executeRootCommand(t, deps, "hooks", "events", "-o", "human") + if err != nil { + t.Fatalf("executeRootCommand(hooks events human) error = %v", err) + } + if !strings.Contains(humanOut, "Hook Events") || !strings.Contains(humanOut, "tool.pre_call") { + t.Fatalf("human output = %q, want events table", humanOut) + } + + toonOut, _, err := executeRootCommand(t, deps, "hooks", "events", "-o", "toon") + if err != nil { + t.Fatalf("executeRootCommand(hooks events toon) error = %v", err) + } + if !strings.Contains(toonOut, "events[1]{event,family,sync_eligible,payload_schema,patch_schema}:") { + t.Fatalf("toon output = %q, want TOON header", toonOut) + } +} + +func TestHooksRunsCommandRequiresSession(t *testing.T) { + t.Parallel() + + code, _, stderr := executeRootCommandWithExit(t, newTestDeps(t, stubClient{}), "hooks", "runs") + if code != 1 { + t.Fatalf("executeRootCommandWithExit() code = %d, want 1", code) + } + if !strings.Contains(stderr, "--session is required") { + t.Fatalf("stderr = %q, want session validation message", stderr) + } +} + +func TestHooksRunsCommandParsesSinceAndRendersFormats(t *testing.T) { + t.Parallel() + + var seenQuery HookRunsQuery + deps := newTestDeps(t, stubClient{ + hookRunsFn: func(_ context.Context, query HookRunsQuery) ([]HookRunRecord, error) { + seenQuery = query + return []HookRunRecord{{ + HookName: "permission-guard", + Event: "permission.request", + Outcome: "failed", + DurationMS: 12, + Error: "boom", + RecordedAt: time.Date(2026, 4, 3, 11, 59, 0, 0, time.UTC), + }}, nil + }, + }) + + jsonOut, _, err := executeRootCommand(t, deps, + "hooks", "runs", + "--session", "sess-1", + "--event", "permission.request", + "--outcome", "failed", + "--since", "5m", + "--last", "2", + "-o", "json", + ) + if err != nil { + t.Fatalf("executeRootCommand(hooks runs json) error = %v", err) + } + if seenQuery.Session != "sess-1" || seenQuery.Event != "permission.request" || seenQuery.Outcome != "failed" || seenQuery.Last != 2 { + t.Fatalf("HookRuns() query = %#v, want session/event/outcome/last", seenQuery) + } + if want := fixedTestNow.Add(-5 * time.Minute).UTC().Format(time.RFC3339Nano); seenQuery.Since != want { + t.Fatalf("HookRuns() since = %q, want %q", seenQuery.Since, want) + } + + var decoded []HookRunRecord + if err := json.Unmarshal([]byte(jsonOut), &decoded); err != nil { + t.Fatalf("json.Unmarshal(hooks runs) error = %v", err) + } + if got, want := len(decoded), 1; got != want { + t.Fatalf("len(decoded) = %d, want %d", got, want) + } + + humanOut, _, err := executeRootCommand(t, deps, "hooks", "runs", "--session", "sess-1", "-o", "human") + if err != nil { + t.Fatalf("executeRootCommand(hooks runs human) error = %v", err) + } + if !strings.Contains(humanOut, "Hook Runs") || !strings.Contains(humanOut, "permission-guard") || !strings.Contains(humanOut, "12ms") { + t.Fatalf("human output = %q, want runs table", humanOut) + } + + toonOut, _, err := executeRootCommand(t, deps, "hooks", "runs", "--session", "sess-1", "-o", "toon") + if err != nil { + t.Fatalf("executeRootCommand(hooks runs toon) error = %v", err) + } + if !strings.Contains(toonOut, "runs[1]{hook_name,event,outcome,duration_ms,error,recorded_at}:") { + t.Fatalf("toon output = %q, want TOON header", toonOut) + } +} diff --git a/internal/cli/root.go b/internal/cli/root.go index 980f45b6c..629137d83 100644 --- a/internal/cli/root.go +++ b/internal/cli/root.go @@ -83,6 +83,7 @@ func newRootCommand(deps commandDeps) *cobra.Command { cmd.AddCommand(newSessionCommand(deps)) cmd.AddCommand(newWorkspaceCommand(deps)) cmd.AddCommand(newAgentCommand(deps)) + cmd.AddCommand(newHooksCommand(deps)) cmd.AddCommand(newSkillCommand(deps)) cmd.AddCommand(newMemoryCommand(deps)) cmd.AddCommand(newObserveCommand(deps)) diff --git a/internal/config/agent.go b/internal/config/agent.go index 181cddb52..00396e8af 100644 --- a/internal/config/agent.go +++ b/internal/config/agent.go @@ -7,20 +7,34 @@ import ( "path/filepath" "strings" + "github.com/BurntSushi/toml" "github.com/goccy/go-yaml" "github.com/pedronauck/agh/internal/frontmatter" + hookspkg "github.com/pedronauck/agh/internal/hooks" ) // AgentDef is the parsed representation of an AGENT.md file. type AgentDef struct { - Name string `yaml:"name"` - Provider string `yaml:"provider"` - Command string `yaml:"command,omitempty"` - Model string `yaml:"model,omitempty"` - Tools []string `yaml:"tools,omitempty"` - Permissions string `yaml:"permissions,omitempty"` - MCPServers []MCPServer `yaml:"mcp_servers,omitempty"` - Prompt string `yaml:"-"` + Name string `yaml:"name" toml:"name"` + Provider string `yaml:"provider" toml:"provider"` + Command string `yaml:"command,omitempty" toml:"command,omitempty"` + Model string `yaml:"model,omitempty" toml:"model,omitempty"` + Tools []string `yaml:"tools,omitempty" toml:"tools,omitempty"` + Permissions string `yaml:"permissions,omitempty" toml:"permissions,omitempty"` + MCPServers []MCPServer `yaml:"mcp_servers,omitempty" toml:"mcp_servers,omitempty"` + Hooks []hookspkg.HookDecl `yaml:"hooks,omitempty" toml:"hooks,omitempty"` + Prompt string `yaml:"-"` +} + +type parsedAgentDef struct { + Name string `yaml:"name" toml:"name"` + Provider string `yaml:"provider" toml:"provider"` + Command string `yaml:"command,omitempty" toml:"command,omitempty"` + Model string `yaml:"model,omitempty" toml:"model,omitempty"` + Tools []string `yaml:"tools,omitempty" toml:"tools,omitempty"` + Permissions string `yaml:"permissions,omitempty" toml:"permissions,omitempty"` + MCPServers []MCPServer `yaml:"mcp_servers,omitempty" toml:"mcp_servers,omitempty"` + Hooks []parsedHookDeclaration `yaml:"hooks,omitempty" toml:"hooks,omitempty"` } // WorkspaceDiscoverySource identifies where a discovery root came from. @@ -178,27 +192,38 @@ func LoadWorkspaceAgentDefs(rootDir string, additionalDirs []string, homePaths H // ParseAgentDef parses a Markdown file with YAML frontmatter into an AgentDef. func ParseAgentDef(content []byte) (AgentDef, error) { - var agent AgentDef + var parsed parsedAgentDef body, err := frontmatter.Decode(content, func(data []byte) error { - if err := yaml.UnmarshalWithOptions(data, &agent, yaml.Strict()); err != nil { - return fmt.Errorf("decode YAML frontmatter: %w", err) - } - return nil + return decodeAgentFrontmatter(data, &parsed) }) if err != nil { return AgentDef{}, wrapFrontmatterError(err) } - agent.Name = strings.TrimSpace(agent.Name) - agent.Provider = strings.TrimSpace(agent.Provider) - agent.Command = strings.TrimSpace(agent.Command) - agent.Model = strings.TrimSpace(agent.Model) - agent.Permissions = strings.TrimSpace(agent.Permissions) - agent.Prompt = strings.TrimSpace(body) + agent := AgentDef{ + Name: strings.TrimSpace(parsed.Name), + Provider: strings.TrimSpace(parsed.Provider), + Command: strings.TrimSpace(parsed.Command), + Model: strings.TrimSpace(parsed.Model), + Tools: cloneStrings(parsed.Tools), + Permissions: strings.TrimSpace(parsed.Permissions), + MCPServers: cloneMCPServers(parsed.MCPServers), + Prompt: strings.TrimSpace(body), + } if len(agent.Tools) == 0 { agent.Tools = []string{"*"} } + if len(parsed.Hooks) > 0 { + agent.Hooks = make([]hookspkg.HookDecl, 0, len(parsed.Hooks)) + for idx, raw := range parsed.Hooks { + decl, err := raw.toHookDecl(hookspkg.HookSourceAgentDefinition, agent.Name) + if err != nil { + return AgentDef{}, fmt.Errorf("agent.hooks[%d]: %w", idx, err) + } + agent.Hooks = append(agent.Hooks, decl) + } + } if err := agent.Validate(); err != nil { return AgentDef{}, err @@ -227,6 +252,11 @@ func (a AgentDef) Validate() error { return err } } + for i, hook := range a.Hooks { + if err := hookspkg.ValidateHookDecl(hook); err != nil { + return fmt.Errorf("agent.hooks[%d]: %w", i, err) + } + } return nil } @@ -248,6 +278,23 @@ func wrapFrontmatterError(err error) error { } } +func decodeAgentFrontmatter(data []byte, parsed *parsedAgentDef) error { + if err := yaml.UnmarshalWithOptions(data, parsed, yaml.Strict()); err == nil { + return nil + } else { + var parsedTOML parsedAgentDef + meta, tomlErr := toml.Decode(string(data), &parsedTOML) + if tomlErr != nil { + return fmt.Errorf("decode agent frontmatter: yaml: %w; toml: %v", err, tomlErr) + } + if undecoded := meta.Undecoded(); len(undecoded) > 0 { + return fmt.Errorf("decode agent frontmatter: unknown field %q", undecoded[0].String()) + } + *parsed = parsedTOML + return nil + } +} + type mappedFrontmatterError struct { message string causes []error diff --git a/internal/config/bootstrap.go b/internal/config/bootstrap.go index eed052fd2..e9e422082 100644 --- a/internal/config/bootstrap.go +++ b/internal/config/bootstrap.go @@ -77,7 +77,9 @@ func SaveBootstrapConfig(homePaths HomePaths, provider string, model string) (Co overlay.Providers[selectedProvider] = providerOverlay finalCfg := DefaultWithHome(homePaths) - overlay.Apply(&finalCfg) + if err := overlay.Apply(&finalCfg); err != nil { + return Config{}, fmt.Errorf("apply bootstrap config overlay: %w", err) + } if err := normalizeConfigPaths(&finalCfg); err != nil { return Config{}, err } diff --git a/internal/config/config.go b/internal/config/config.go index 7d2a57de6..b079fc79a 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -123,6 +123,7 @@ type Config struct { Log LogConfig `toml:"log"` Memory MemoryConfig `toml:"memory"` Skills SkillsConfig `toml:"skills"` + Hooks HooksConfig `toml:"hooks"` } type loadOptions struct { @@ -323,6 +324,9 @@ func (c Config) Validate() error { if err := c.Skills.Validate(); err != nil { return err } + if err := c.Hooks.Validate(); err != nil { + return fmt.Errorf("validate hooks config: %w", err) + } for name := range c.Providers { if _, err := c.ResolveProvider(name); err != nil { diff --git a/internal/config/config_test.go b/internal/config/config_test.go index 1e53a2c36..a80bbebf7 100644 --- a/internal/config/config_test.go +++ b/internal/config/config_test.go @@ -7,6 +7,8 @@ import ( "strings" "testing" "time" + + hookspkg "github.com/pedronauck/agh/internal/hooks" ) func TestLoadValidTOMLConfigWithAllSections(t *testing.T) { @@ -600,6 +602,30 @@ func TestValidateRejectsUnknownPermissionMode(t *testing.T) { } } +func TestValidateWrapsHooksConfigErrors(t *testing.T) { + t.Parallel() + + homePaths, err := ResolveHomePathsFrom(filepath.Join(t.TempDir(), "home")) + if err != nil { + t.Fatalf("ResolveHomePathsFrom() error = %v", err) + } + + cfg := DefaultWithHome(homePaths) + cfg.Hooks.Declarations = []hookspkg.HookDecl{{ + Name: "broken-hook", + Event: "bad.event", + Source: hookspkg.HookSourceConfig, + }} + + err = cfg.Validate() + if err == nil { + t.Fatal("Validate() error = nil, want non-nil") + } + if !strings.Contains(err.Error(), "validate hooks config") { + t.Fatalf("Validate() error = %q, want hooks config context", err) + } +} + func TestDreamConfigValidateRejectsNonPositiveThresholds(t *testing.T) { t.Parallel() diff --git a/internal/config/hooks.go b/internal/config/hooks.go new file mode 100644 index 000000000..147a40000 --- /dev/null +++ b/internal/config/hooks.go @@ -0,0 +1,222 @@ +package config + +import ( + "context" + "errors" + "fmt" + "strings" + "time" + + hookspkg "github.com/pedronauck/agh/internal/hooks" +) + +// HooksConfig holds config-defined hook declarations. +type HooksConfig struct { + Declarations []hookspkg.HookDecl `toml:"declarations,omitempty"` +} + +type parsedHookDeclaration struct { + Name string `yaml:"name" toml:"name"` + Event string `yaml:"event" toml:"event"` + Mode string `yaml:"mode,omitempty" toml:"mode,omitempty"` + Required bool `yaml:"required,omitempty" toml:"required,omitempty"` + Priority *int `yaml:"priority,omitempty" toml:"priority,omitempty"` + Timeout time.Duration `yaml:"timeout,omitempty" toml:"timeout,omitempty"` + Matcher parsedHookMatcher `yaml:"matcher,omitempty" toml:"matcher,omitempty"` + Command string `yaml:"command,omitempty" toml:"command,omitempty"` + Args []string `yaml:"args,omitempty" toml:"args,omitempty"` + Env map[string]string `yaml:"env,omitempty" toml:"env,omitempty"` + Executor parsedHookExecutor `yaml:"executor,omitempty" toml:"executor,omitempty"` +} + +type parsedHookExecutor struct { + Kind string `yaml:"kind,omitempty" toml:"kind,omitempty"` + Command string `yaml:"command,omitempty" toml:"command,omitempty"` + Args []string `yaml:"args,omitempty" toml:"args,omitempty"` + Env map[string]string `yaml:"env,omitempty" toml:"env,omitempty"` +} + +type parsedHookMatcher struct { + AgentName string `yaml:"agent_name,omitempty" toml:"agent_name,omitempty"` + AgentType string `yaml:"agent_type,omitempty" toml:"agent_type,omitempty"` + WorkspaceID string `yaml:"workspace_id,omitempty" toml:"workspace_id,omitempty"` + WorkspaceRoot string `yaml:"workspace_root,omitempty" toml:"workspace_root,omitempty"` + SessionType string `yaml:"session_type,omitempty" toml:"session_type,omitempty"` + InputClass string `yaml:"input_class,omitempty" toml:"input_class,omitempty"` + ACPEventType string `yaml:"acp_event_type,omitempty" toml:"acp_event_type,omitempty"` + TurnID string `yaml:"turn_id,omitempty" toml:"turn_id,omitempty"` + ToolName string `yaml:"tool_name,omitempty" toml:"tool_name,omitempty"` + ToolNamespace string `yaml:"tool_namespace,omitempty" toml:"tool_namespace,omitempty"` + ToolReadOnly *bool `yaml:"tool_read_only,omitempty" toml:"tool_read_only,omitempty"` + DecisionClass string `yaml:"decision_class,omitempty" toml:"decision_class,omitempty"` + MessageRole string `yaml:"message_role,omitempty" toml:"message_role,omitempty"` + MessageDeltaType string `yaml:"message_delta_type,omitempty" toml:"message_delta_type,omitempty"` + CompactionReason string `yaml:"compaction_reason,omitempty" toml:"compaction_reason,omitempty"` + CompactionStrategy string `yaml:"compaction_strategy,omitempty" toml:"compaction_strategy,omitempty"` +} + +type hookValidationExecutor struct { + kind hookspkg.HookExecutorKind +} + +var _ hookspkg.Executor = hookValidationExecutor{} + +func (e hookValidationExecutor) Kind() hookspkg.HookExecutorKind { + return e.kind +} + +func (hookValidationExecutor) Execute(context.Context, hookspkg.RegisteredHook, []byte) ([]byte, error) { + return nil, errors.New("config: validation executor cannot execute") +} + +// HookDeclarations returns normalized config and agent-definition hook declarations for registry consumption. +func HookDeclarations(cfg Config, agents []AgentDef) ([]hookspkg.HookDecl, error) { + capacity := len(cfg.Hooks.Declarations) + for _, agent := range agents { + capacity += len(agent.Hooks) + } + raw := make([]hookspkg.HookDecl, 0, capacity) + raw = append(raw, cloneHookDecls(cfg.Hooks.Declarations)...) + for _, agent := range agents { + raw = append(raw, cloneHookDecls(agent.Hooks)...) + } + + if len(raw) == 0 { + return []hookspkg.HookDecl{}, nil + } + + normalized := make([]hookspkg.HookDecl, 0, len(raw)) + for idx, decl := range raw { + resolved, err := hookspkg.NormalizeHookDecl(decl, hookDeclarationResolver) + if err != nil { + return nil, fmt.Errorf("config: normalize hook declaration %d (%q): %w", idx, strings.TrimSpace(decl.Name), err) + } + normalized = append(normalized, resolved.Decl) + } + + return normalized, nil +} + +// Validate ensures the hook declarations are internally consistent. +func (c HooksConfig) Validate() error { + if len(c.Declarations) == 0 { + return nil + } + if err := hookspkg.ValidateHookDecls(c.Declarations); err != nil { + return fmt.Errorf("hooks.declarations: %w", err) + } + return nil +} + +func (d parsedHookDeclaration) toHookDecl(source hookspkg.HookSource, scopeAgentName string) (hookspkg.HookDecl, error) { + command, args, env, kind, err := d.resolveExecutor() + if err != nil { + return hookspkg.HookDecl{}, err + } + + matcher, err := d.Matcher.toHookMatcher(scopeAgentName) + if err != nil { + return hookspkg.HookDecl{}, err + } + + decl := hookspkg.HookDecl{ + Name: strings.TrimSpace(d.Name), + Event: hookspkg.HookEvent(strings.TrimSpace(d.Event)), + Source: source, + Mode: hookspkg.HookMode(strings.TrimSpace(d.Mode)), + Required: d.Required, + Timeout: d.Timeout, + Matcher: matcher, + ExecutorKind: kind, + Command: command, + Args: args, + Env: env, + } + if d.Priority != nil { + decl.Priority = *d.Priority + decl.PrioritySet = true + } + + return decl, nil +} + +func (d parsedHookDeclaration) resolveExecutor() (string, []string, map[string]string, hookspkg.HookExecutorKind, error) { + rootSpecified := strings.TrimSpace(d.Command) != "" || len(d.Args) > 0 || len(d.Env) > 0 + nestedSpecified := strings.TrimSpace(d.Executor.Command) != "" || len(d.Executor.Args) > 0 || len(d.Executor.Env) > 0 + if rootSpecified && nestedSpecified { + return "", nil, nil, "", errors.New("hook executor fields must be declared either at the top level or under executor, not both") + } + + command := strings.TrimSpace(d.Command) + args := cloneStrings(d.Args) + env := mergeStringMaps(nil, d.Env) + if nestedSpecified { + command = strings.TrimSpace(d.Executor.Command) + args = cloneStrings(d.Executor.Args) + env = mergeStringMaps(nil, d.Executor.Env) + } + + return command, args, env, hookspkg.HookExecutorKind(strings.TrimSpace(d.Executor.Kind)), nil +} + +func (m parsedHookMatcher) toHookMatcher(scopeAgentName string) (hookspkg.HookMatcher, error) { + matcher := hookspkg.HookMatcher{ + AgentName: strings.TrimSpace(m.AgentName), + AgentType: strings.TrimSpace(m.AgentType), + WorkspaceID: strings.TrimSpace(m.WorkspaceID), + WorkspaceRoot: strings.TrimSpace(m.WorkspaceRoot), + SessionType: strings.TrimSpace(m.SessionType), + InputClass: strings.TrimSpace(m.InputClass), + ACPEventType: strings.TrimSpace(m.ACPEventType), + TurnID: strings.TrimSpace(m.TurnID), + ToolName: strings.TrimSpace(m.ToolName), + ToolNamespace: strings.TrimSpace(m.ToolNamespace), + DecisionClass: strings.TrimSpace(m.DecisionClass), + MessageRole: strings.TrimSpace(m.MessageRole), + MessageDeltaType: strings.TrimSpace(m.MessageDeltaType), + CompactionReason: strings.TrimSpace(m.CompactionReason), + CompactionStrategy: strings.TrimSpace(m.CompactionStrategy), + } + if m.ToolReadOnly != nil { + value := *m.ToolReadOnly + matcher.ToolReadOnly = &value + } + + if scopeAgentName == "" { + return matcher, nil + } + if matcher.AgentName != "" && matcher.AgentName != scopeAgentName { + return hookspkg.HookMatcher{}, fmt.Errorf("matcher.agent_name must match agent name %q", scopeAgentName) + } + matcher.AgentName = scopeAgentName + return matcher, nil +} + +func hookDeclarationResolver(decl hookspkg.HookDecl) (hookspkg.Executor, error) { + return hookValidationExecutor{kind: decl.ExecutorKind}, nil +} + +func cloneHookDecls(src []hookspkg.HookDecl) []hookspkg.HookDecl { + if len(src) == 0 { + return nil + } + + cloned := make([]hookspkg.HookDecl, 0, len(src)) + for _, decl := range src { + cloned = append(cloned, cloneHookDecl(decl)) + } + + return cloned +} + +func cloneHookDecl(src hookspkg.HookDecl) hookspkg.HookDecl { + cloned := src + cloned.Args = cloneStrings(src.Args) + cloned.Env = mergeStringMaps(nil, src.Env) + cloned.Metadata = mergeStringMaps(nil, src.Metadata) + if src.Matcher.ToolReadOnly != nil { + value := *src.Matcher.ToolReadOnly + cloned.Matcher.ToolReadOnly = &value + } + return cloned +} diff --git a/internal/config/hooks_test.go b/internal/config/hooks_test.go new file mode 100644 index 000000000..d80f19515 --- /dev/null +++ b/internal/config/hooks_test.go @@ -0,0 +1,400 @@ +package config + +import ( + "path/filepath" + "strings" + "testing" + "time" + + hookspkg "github.com/pedronauck/agh/internal/hooks" +) + +func TestLoadParsesConfigHookDeclarationWithAllFields(t *testing.T) { + workspaceRoot, homePaths := prepareHookConfigTestEnv(t) + writeFile(t, homePaths.ConfigFile, ` +[[hooks.declarations]] +name = "audit-tool" +event = "tool.pre_call" +mode = "sync" +required = false +priority = 640 +timeout = "7s" + +[hooks.declarations.matcher] +tool_name = "read_file" +tool_namespace = "fs" +tool_read_only = true + +[hooks.declarations.executor] +command = "/bin/echo" +args = ["audit"] +env = { PHASE = "pre" } +`) + + cfg, err := Load(WithWorkspaceRoot(workspaceRoot)) + if err != nil { + t.Fatalf("Load() error = %v", err) + } + + decls, err := HookDeclarations(cfg, nil) + if err != nil { + t.Fatalf("HookDeclarations() error = %v", err) + } + if got, want := len(decls), 1; got != want { + t.Fatalf("len(HookDeclarations()) = %d, want %d", got, want) + } + + hook := decls[0] + if got, want := hook.Name, "audit-tool"; got != want { + t.Fatalf("hook.Name = %q, want %q", got, want) + } + if got, want := hook.Event, hookspkg.HookToolPreCall; got != want { + t.Fatalf("hook.Event = %q, want %q", got, want) + } + if got, want := hook.Source, hookspkg.HookSourceConfig; got != want { + t.Fatalf("hook.Source = %q, want %q", got, want) + } + if got, want := hook.Mode, hookspkg.HookModeSync; got != want { + t.Fatalf("hook.Mode = %q, want %q", got, want) + } + if got, want := hook.Priority, 640; got != want { + t.Fatalf("hook.Priority = %d, want %d", got, want) + } + if got, want := hook.Timeout, 7*time.Second; got != want { + t.Fatalf("hook.Timeout = %s, want %s", got, want) + } + if got, want := hook.ExecutorKind, hookspkg.HookExecutorSubprocess; got != want { + t.Fatalf("hook.ExecutorKind = %q, want %q", got, want) + } + if got, want := hook.Command, "/bin/echo"; got != want { + t.Fatalf("hook.Command = %q, want %q", got, want) + } + if got, want := strings.Join(hook.Args, ","), "audit"; got != want { + t.Fatalf("hook.Args = %#v, want %q", hook.Args, want) + } + if got, want := hook.Env["PHASE"], "pre"; got != want { + t.Fatalf("hook.Env[PHASE] = %q, want %q", got, want) + } + if got, want := hook.Matcher.ToolName, "read_file"; got != want { + t.Fatalf("hook.Matcher.ToolName = %q, want %q", got, want) + } + if got, want := hook.Matcher.ToolNamespace, "fs"; got != want { + t.Fatalf("hook.Matcher.ToolNamespace = %q, want %q", got, want) + } + if hook.Matcher.ToolReadOnly == nil || !*hook.Matcher.ToolReadOnly { + t.Fatalf("hook.Matcher.ToolReadOnly = %#v, want true", hook.Matcher.ToolReadOnly) + } +} + +func TestLoadParsesMinimalConfigHookAndAppliesDefaults(t *testing.T) { + workspaceRoot, homePaths := prepareHookConfigTestEnv(t) + writeFile(t, homePaths.ConfigFile, ` +[[hooks.declarations]] +name = "workspace-ready" +event = "session.post_create" +command = "/bin/echo" +`) + + cfg, err := Load(WithWorkspaceRoot(workspaceRoot)) + if err != nil { + t.Fatalf("Load() error = %v", err) + } + + decls, err := HookDeclarations(cfg, nil) + if err != nil { + t.Fatalf("HookDeclarations() error = %v", err) + } + if got, want := len(decls), 1; got != want { + t.Fatalf("len(HookDeclarations()) = %d, want %d", got, want) + } + + hook := decls[0] + if got, want := hook.Mode, hookspkg.HookModeAsync; got != want { + t.Fatalf("hook.Mode = %q, want %q", got, want) + } + if got, want := hook.Priority, 500; got != want { + t.Fatalf("hook.Priority = %d, want %d", got, want) + } + if hook.PrioritySet { + t.Fatal("hook.PrioritySet = true, want false for default priority") + } + if got, want := hook.ExecutorKind, hookspkg.HookExecutorSubprocess; got != want { + t.Fatalf("hook.ExecutorKind = %q, want %q", got, want) + } +} + +func TestLoadRejectsInvalidConfigHookEvent(t *testing.T) { + workspaceRoot, homePaths := prepareHookConfigTestEnv(t) + writeFile(t, homePaths.ConfigFile, ` +[[hooks.declarations]] +name = "bad-event" +event = "bad.event" +command = "/bin/echo" +`) + + _, err := Load(WithWorkspaceRoot(workspaceRoot)) + if err == nil { + t.Fatal("Load() error = nil, want non-nil") + } + if !strings.Contains(err.Error(), "hooks.declarations") || !strings.Contains(err.Error(), "bad.event") { + t.Fatalf("Load() error = %v, want hooks.declarations invalid event detail", err) + } +} + +func TestLoadRejectsRequiredAsyncConfigHook(t *testing.T) { + workspaceRoot, homePaths := prepareHookConfigTestEnv(t) + writeFile(t, homePaths.ConfigFile, ` +[[hooks.declarations]] +name = "must-not-async" +event = "session.post_create" +mode = "async" +required = true +command = "/bin/echo" +`) + + _, err := Load(WithWorkspaceRoot(workspaceRoot)) + if err == nil { + t.Fatal("Load() error = nil, want non-nil") + } + if !strings.Contains(err.Error(), "must-not-async") || !strings.Contains(err.Error(), "async") { + t.Fatalf("Load() error = %v, want required async detail", err) + } +} + +func TestLoadMergesConfigHooksAcrossPrecedenceLevels(t *testing.T) { + workspaceRoot, homePaths := prepareHookConfigTestEnv(t) + writeFile(t, homePaths.ConfigFile, ` +[[hooks.declarations]] +name = "global-only" +event = "session.post_create" +command = "/bin/global" + +[[hooks.declarations]] +name = "shared" +event = "session.post_stop" +command = "/bin/global-shared" +`) + writeFile(t, filepath.Join(workspaceRoot, DirName, ConfigName), ` +[[hooks.declarations]] +name = "workspace-only" +event = "input.pre_submit" +command = "/bin/workspace" + +[[hooks.declarations]] +name = "shared" +event = "session.post_stop" +command = "/bin/workspace-shared" +`) + + cfg, err := Load(WithWorkspaceRoot(workspaceRoot)) + if err != nil { + t.Fatalf("Load() error = %v", err) + } + + decls, err := HookDeclarations(cfg, nil) + if err != nil { + t.Fatalf("HookDeclarations() error = %v", err) + } + if got, want := len(decls), 3; got != want { + t.Fatalf("len(HookDeclarations()) = %d, want %d", got, want) + } + + got := map[string]hookspkg.HookDecl{} + for _, decl := range decls { + got[decl.Name] = decl + } + if _, ok := got["global-only"]; !ok { + t.Fatalf("HookDeclarations() missing global-only: %#v", decls) + } + if _, ok := got["workspace-only"]; !ok { + t.Fatalf("HookDeclarations() missing workspace-only: %#v", decls) + } + if got["shared"].Command != "/bin/workspace-shared" { + t.Fatalf("shared command = %q, want %q", got["shared"].Command, "/bin/workspace-shared") + } +} + +func TestParseAgentDefParsesHookAndScopesMatcherToAgent(t *testing.T) { + t.Parallel() + + agent, err := ParseAgentDef([]byte(`--- +name: coder +provider: claude +hooks: + - name: prompt-sanitizer + event: prompt.post_assemble + mode: sync + command: /bin/echo + args: ["sanitize"] +--- + +Keep prompts tight. +`)) + if err != nil { + t.Fatalf("ParseAgentDef() error = %v", err) + } + if got, want := len(agent.Hooks), 1; got != want { + t.Fatalf("len(agent.Hooks) = %d, want %d", got, want) + } + + hook := agent.Hooks[0] + if got, want := hook.Name, "prompt-sanitizer"; got != want { + t.Fatalf("hook.Name = %q, want %q", got, want) + } + if got, want := hook.Source, hookspkg.HookSourceAgentDefinition; got != want { + t.Fatalf("hook.Source = %q, want %q", got, want) + } + if got, want := hook.Matcher.AgentName, "coder"; got != want { + t.Fatalf("hook.Matcher.AgentName = %q, want %q", got, want) + } +} + +func TestParseAgentDefParsesTOMLHookAndScopesMatcherToAgent(t *testing.T) { + t.Parallel() + + agent, err := ParseAgentDef([]byte(`--- +name = "reviewer" +provider = "codex" + +[[hooks]] +name = "review-gate" +event = "input.pre_submit" +command = "/bin/echo" +--- + +Review carefully. +`)) + if err != nil { + t.Fatalf("ParseAgentDef() error = %v", err) + } + if got, want := len(agent.Hooks), 1; got != want { + t.Fatalf("len(agent.Hooks) = %d, want %d", got, want) + } + + hook := agent.Hooks[0] + if got, want := hook.Name, "review-gate"; got != want { + t.Fatalf("hook.Name = %q, want %q", got, want) + } + if got, want := hook.Source, hookspkg.HookSourceAgentDefinition; got != want { + t.Fatalf("hook.Source = %q, want %q", got, want) + } + if got, want := hook.Matcher.AgentName, "reviewer"; got != want { + t.Fatalf("hook.Matcher.AgentName = %q, want %q", got, want) + } +} + +func TestParseAgentDefRejectsHookScopedToDifferentAgent(t *testing.T) { + t.Parallel() + + _, err := ParseAgentDef([]byte(`--- +name: coder +provider: claude +hooks: + - name: prompt-sanitizer + event: prompt.post_assemble + command: /bin/echo + matcher: + agent_name: reviewer +--- + +Keep prompts tight. +`)) + if err == nil { + t.Fatal("ParseAgentDef() error = nil, want non-nil") + } + if !strings.Contains(err.Error(), "matcher.agent_name") || !strings.Contains(err.Error(), "coder") { + t.Fatalf("ParseAgentDef() error = %v, want scoped agent detail", err) + } +} + +func TestHookDeclarationsReturnsCombinedConfigAndAgentHooks(t *testing.T) { + workspaceRoot, homePaths := prepareHookConfigTestEnv(t) + writeFile(t, homePaths.ConfigFile, ` +[[hooks.declarations]] +name = "global-create" +event = "session.post_create" +command = "/bin/global" +`) + + cfg, err := Load(WithWorkspaceRoot(workspaceRoot)) + if err != nil { + t.Fatalf("Load() error = %v", err) + } + agent, err := ParseAgentDef([]byte(`--- +name: coder +provider: claude +hooks: + - name: agent-input + event: input.pre_submit + command: /bin/echo +--- + +Prompt. +`)) + if err != nil { + t.Fatalf("ParseAgentDef() error = %v", err) + } + + decls, err := HookDeclarations(cfg, []AgentDef{agent}) + if err != nil { + t.Fatalf("HookDeclarations() error = %v", err) + } + if got, want := len(decls), 2; got != want { + t.Fatalf("len(HookDeclarations()) = %d, want %d", got, want) + } + + got := map[string]hookspkg.HookDecl{} + for _, decl := range decls { + got[decl.Name] = decl + } + if got["global-create"].Priority != 500 { + t.Fatalf("global-create priority = %d, want 500", got["global-create"].Priority) + } + if got["agent-input"].Priority != 100 { + t.Fatalf("agent-input priority = %d, want 100", got["agent-input"].Priority) + } + if got["agent-input"].Matcher.AgentName != "coder" { + t.Fatalf("agent-input matcher.agent_name = %q, want %q", got["agent-input"].Matcher.AgentName, "coder") + } +} + +func TestHookDeclarationsReturnsEmptySliceForEmptyHooksSection(t *testing.T) { + workspaceRoot, homePaths := prepareHookConfigTestEnv(t) + writeFile(t, homePaths.ConfigFile, ` +[hooks] +`) + + cfg, err := Load(WithWorkspaceRoot(workspaceRoot)) + if err != nil { + t.Fatalf("Load() error = %v", err) + } + + decls, err := HookDeclarations(cfg, nil) + if err != nil { + t.Fatalf("HookDeclarations() error = %v", err) + } + if decls == nil { + t.Fatal("HookDeclarations() = nil, want empty slice") + } + if got := len(decls); got != 0 { + t.Fatalf("len(HookDeclarations()) = %d, want 0", got) + } +} + +func prepareHookConfigTestEnv(t *testing.T) (string, HomePaths) { + t.Helper() + + workspaceRoot := t.TempDir() + homeRoot := filepath.Join(t.TempDir(), "home") + t.Setenv("AGH_HOME", homeRoot) + + homePaths, err := ResolveHomePaths() + if err != nil { + t.Fatalf("ResolveHomePaths() error = %v", err) + } + if err := EnsureHomeLayout(homePaths); err != nil { + t.Fatalf("EnsureHomeLayout() error = %v", err) + } + + return workspaceRoot, homePaths +} diff --git a/internal/config/merge.go b/internal/config/merge.go index 0e12e254a..dacad4f33 100644 --- a/internal/config/merge.go +++ b/internal/config/merge.go @@ -9,6 +9,7 @@ import ( "time" "github.com/BurntSushi/toml" + hookspkg "github.com/pedronauck/agh/internal/hooks" ) type configOverlay struct { @@ -22,6 +23,7 @@ type configOverlay struct { Log logOverlay `toml:"log"` Memory memoryOverlay `toml:"memory"` Skills skillsOverlay `toml:"skills"` + Hooks hooksOverlay `toml:"hooks"` } type daemonOverlay struct { @@ -99,6 +101,10 @@ type marketplaceOverlay struct { BaseURL *string `toml:"base_url"` } +type hooksOverlay struct { + Declarations []parsedHookDeclaration `toml:"declarations"` +} + type mcpServerOverlay struct { Name *string `toml:"name"` Command *string `toml:"command"` @@ -117,8 +123,7 @@ func ApplyConfigOverlayFile(path string, dst *Config) error { return err } - overlay.Apply(dst) - return nil + return overlay.Apply(dst) } func loadConfigOverlayFile(path string) (configOverlay, error) { @@ -144,7 +149,7 @@ func loadConfigOverlayFile(path string) (configOverlay, error) { return overlay, nil } -func (o configOverlay) Apply(dst *Config) { +func (o configOverlay) Apply(dst *Config) error { o.Daemon.Apply(&dst.Daemon) o.HTTP.Apply(&dst.HTTP) o.Defaults.Apply(&dst.Defaults) @@ -155,6 +160,7 @@ func (o configOverlay) Apply(dst *Config) { o.Log.Apply(&dst.Log) o.Memory.Apply(&dst.Memory) o.Skills.Apply(&dst.Skills) + return o.Hooks.Apply(&dst.Hooks) } func (o daemonOverlay) Apply(dst *DaemonConfig) { @@ -298,6 +304,41 @@ func (o marketplaceOverlay) Apply(dst *MarketplaceConfig) { } } +func (o hooksOverlay) Apply(dst *HooksConfig) error { + if len(o.Declarations) == 0 { + return nil + } + + merged := cloneHookDecls(dst.Declarations) + index := make(map[string]int, len(merged)) + for i, decl := range merged { + if name := strings.TrimSpace(decl.Name); name != "" { + index[name] = i + } + } + + for idx, raw := range o.Declarations { + decl, err := raw.toHookDecl(hookspkg.HookSourceConfig, "") + if err != nil { + return fmt.Errorf("hooks.declarations[%d]: %w", idx, err) + } + + name := strings.TrimSpace(decl.Name) + if existingIdx, ok := index[name]; ok && name != "" { + merged[existingIdx] = decl + continue + } + + merged = append(merged, decl) + if name != "" { + index[name] = len(merged) - 1 + } + } + + dst.Declarations = merged + return nil +} + func (o mcpServerOverlay) Apply(dst *MCPServer) { if o.Name != nil { dst.Name = *o.Name diff --git a/internal/daemon/boot.go b/internal/daemon/boot.go index a203377e1..cb88f563c 100644 --- a/internal/daemon/boot.go +++ b/internal/daemon/boot.go @@ -10,9 +10,11 @@ import ( core "github.com/pedronauck/agh/internal/api/core" aghconfig "github.com/pedronauck/agh/internal/config" + hookspkg "github.com/pedronauck/agh/internal/hooks" aghlogger "github.com/pedronauck/agh/internal/logger" "github.com/pedronauck/agh/internal/memory" "github.com/pedronauck/agh/internal/memory/consolidation" + "github.com/pedronauck/agh/internal/observe" "github.com/pedronauck/agh/internal/session" "github.com/pedronauck/agh/internal/skills" "github.com/pedronauck/agh/internal/skills/bundled" @@ -70,7 +72,6 @@ func (d *Daemon) boot(ctx context.Context) (err error) { memoryStore *memory.Store skillsRegistry *skills.Registry mcpResolver *skills.MCPResolver - hookRunner *skills.HookRunner dreamSvc consolidation.Service dreamRuntime *consolidation.Runtime globalMemoryDir string @@ -119,13 +120,6 @@ func (d *Daemon) boot(ctx context.Context) (err error) { return fmt.Errorf("daemon: load skills registry: %w", err) } mcpResolver = skills.NewMCPResolver(cfg.Skills, logger) - hookRunner = skills.NewHookRunner(cfg.Skills, logger) - - skillsCancel, skillsDone = startSkillsWatcher(ctx, skillsRegistry, cfg.Skills.PollInterval) - cleanupFns = append(cleanupFns, func(context.Context) error { - stopSkillsWatcher(skillsCancel, skillsDone) - return nil - }) appendProviders = append(appendProviders, skills.NewCatalogProvider(skillsRegistry)) } @@ -195,7 +189,7 @@ func (d *Daemon) boot(ctx context.Context) (err error) { } startedAt := d.now().UTC() - fanout := notifierFanout{} + notifier := newHooksNotifier(logger, d.now) var skillRegistryDep session.SkillRegistry if skillsRegistry != nil { skillRegistryDep = skillsRegistry @@ -207,7 +201,8 @@ func (d *Daemon) boot(ctx context.Context) (err error) { sessions, err := d.newSessionManager(ctx, SessionManagerDeps{ HomePaths: d.homePaths, Logger: logger, - Notifier: &fanout, + Notifier: notifier, + Hooks: notifier, PromptAssembler: promptAssembler, SkillRegistry: skillRegistryDep, MCPResolver: mcpResolverDep, @@ -257,18 +252,45 @@ func (d *Daemon) boot(ctx context.Context) (err error) { if err != nil { return fmt.Errorf("daemon: create observer: %w", err) } - fanout.notifiers = append(fanout.notifiers, observer) - if skillsRegistry != nil && hookRunner != nil { - fanout.hookPhase = newSkillsHookDispatcher(skillsRegistry, hookRunner, workspaceResolver, logger) - } deps.Observer = observer - if dreamSvc != nil { - fanout.postSessionStopped = append(fanout.postSessionStopped, func(_ context.Context, sess *session.Session) { - info := sess.Info() - if info == nil || info.Type == session.SessionTypeDream || strings.TrimSpace(info.WorkspaceID) == "" { - return - } - dreamRuntime.EnqueueCheck("session_stop", info.WorkspaceID) + + nativeDecls, nativeExecutors := daemonNativeHooks(observer, dreamRuntime) + hookOptions := []hookspkg.Option{ + hookspkg.WithLogger(logger), + hookspkg.WithNow(d.now), + hookspkg.WithDebugPatchAudit(strings.EqualFold(cfg.Log.Level, "debug")), + hookspkg.WithExecutorResolver(daemonExecutorResolver(nativeExecutors)), + hookspkg.WithNativeDeclarations(nativeDecls), + hookspkg.WithConfigDeclarationProvider(configDeclarationProvider(registry, workspaceResolver, logger)), + hookspkg.WithAgentDeclarationProvider(agentDeclarationProvider(registry, workspaceResolver, logger)), + hookspkg.WithSkillDeclarationProvider(skillDeclarationProvider(skillsRegistry, registry, workspaceResolver, cfg.Skills.AllowedMarketplaceHooks, logger)), + } + if sink, ok := observer.(hookspkg.TelemetrySink); ok { + hookOptions = append(hookOptions, hookspkg.WithTelemetrySink(sink)) + } + hooks := hookspkg.NewHooks(hookOptions...) + if err := hooks.Rebuild(ctx); err != nil { + hooks.Close() + return fmt.Errorf("daemon: rebuild hooks: %w", err) + } + if hookAwareObserver, ok := observer.(interface { + AttachHooks(observe.HookCatalogSource) + }); ok { + hookAwareObserver.AttachHooks(hooks) + } + notifier.setRuntime(hooks, observer) + cleanupFns = append(cleanupFns, func(context.Context) error { + hooks.Close() + return nil + }) + + if skillsRegistry != nil { + skillsCancel, skillsDone = startSkillsWatcher(ctx, skillsRegistry, cfg.Skills.PollInterval, func(refreshCtx context.Context) error { + return hooks.Rebuild(refreshCtx) + }) + cleanupFns = append(cleanupFns, func(context.Context) error { + stopSkillsWatcher(skillsCancel, skillsDone) + return nil }) } @@ -331,6 +353,7 @@ func (d *Daemon) boot(ctx context.Context) (err error) { d.registry = registry d.memoryStore = memoryStore d.sessions = sessions + d.hooks = hooks d.observer = observer d.httpServer = httpServer d.udsServer = udsServer @@ -364,7 +387,7 @@ func (d *Daemon) skillsRegistryConfig(cfg aghconfig.Config) (skills.RegistryConf }, nil } -func startSkillsWatcher(ctx context.Context, registry *skills.Registry, interval time.Duration) (context.CancelFunc, chan struct{}) { +func startSkillsWatcher(ctx context.Context, registry *skills.Registry, interval time.Duration, afterRefresh func(context.Context) error) (context.CancelFunc, chan struct{}) { if registry == nil { return nil, nil } @@ -372,6 +395,7 @@ func startSkillsWatcher(ctx context.Context, registry *skills.Registry, interval watcherCtx, cancel := context.WithCancel(ctx) done := make(chan struct{}) watcher := skills.NewWatcher(registry, interval) + watcher.SetAfterRefresh(afterRefresh) go func() { defer close(done) watcher.Start(watcherCtx) diff --git a/internal/daemon/daemon.go b/internal/daemon/daemon.go index 86d52809c..91c0e81eb 100644 --- a/internal/daemon/daemon.go +++ b/internal/daemon/daemon.go @@ -87,6 +87,7 @@ type SessionManagerDeps struct { HomePaths aghconfig.HomePaths Logger *slog.Logger Notifier session.Notifier + Hooks session.HookDispatcher PromptAssembler session.PromptAssembler SkillRegistry session.SkillRegistry MCPResolver session.MCPResolver @@ -129,6 +130,7 @@ type Daemon struct { registry Registry memoryStore *memory.Store sessions SessionManager + hooks hookRuntime observer Observer httpServer Server udsServer Server @@ -245,6 +247,7 @@ func New(opts ...Option) (*Daemon, error) { session.WithLifecycleContext(ctx), session.WithLogger(deps.Logger), session.WithNotifier(deps.Notifier), + session.WithHookDispatcher(deps.Hooks), session.WithPromptAssembler(deps.PromptAssembler), session.WithSkillRegistry(deps.SkillRegistry), session.WithMCPResolver(deps.MCPResolver), @@ -373,6 +376,7 @@ func (d *Daemon) Shutdown(ctx context.Context) error { d.mu.Lock() sessions := d.sessions + hooks := d.hooks httpServer := d.httpServer udsServer := d.udsServer registry := d.registry @@ -384,6 +388,7 @@ func (d *Daemon) Shutdown(ctx context.Context) error { skillsDone := d.skillsDone d.sessions = nil + d.hooks = nil d.httpServer = nil d.udsServer = nil d.observer = nil @@ -409,6 +414,9 @@ func (d *Daemon) Shutdown(ctx context.Context) error { if err := d.stopSessions(ctx, sessions); err != nil { errs = append(errs, err) } + if hooks != nil { + hooks.Close() + } if httpServer != nil { if err := httpServer.Shutdown(ctx); err != nil { errs = append(errs, fmt.Errorf("daemon: shutdown http server: %w", err)) diff --git a/internal/daemon/daemon_integration_test.go b/internal/daemon/daemon_integration_test.go index 31356e0ff..c9ef73b04 100644 --- a/internal/daemon/daemon_integration_test.go +++ b/internal/daemon/daemon_integration_test.go @@ -4,6 +4,7 @@ package daemon import ( "context" + "encoding/json" "errors" "os" "path/filepath" @@ -12,6 +13,7 @@ import ( "time" aghconfig "github.com/pedronauck/agh/internal/config" + hookspkg "github.com/pedronauck/agh/internal/hooks" "github.com/pedronauck/agh/internal/memory" "github.com/pedronauck/agh/internal/memory/consolidation" "github.com/pedronauck/agh/internal/session" @@ -313,6 +315,215 @@ func TestBootLeavesSkillDependenciesNilWhenSkillsDisabled(t *testing.T) { } } +func TestBootBuildsHooksFromWorkspaceConfigAgentAndSkills(t *testing.T) { + homePaths := integrationHomePaths(t) + cfg := testConfig(t, homePaths) + cfg.Memory.Enabled = false + cfg.Skills.Enabled = true + + workspaceRoot := filepath.Join(t.TempDir(), "workspace") + if err := os.MkdirAll(filepath.Join(workspaceRoot, aghconfig.DirName), 0o755); err != nil { + t.Fatalf("os.MkdirAll(%q) error = %v", filepath.Join(workspaceRoot, aghconfig.DirName), err) + } + + scriptPath := writeDaemonHookScript(t, t.TempDir(), "capture.sh", "#!/bin/sh\ncat > \"$1\"\n") + configOutput := filepath.Join(t.TempDir(), "config-create.json") + agentOutput := filepath.Join(t.TempDir(), "agent-stop.json") + skillOutput := filepath.Join(t.TempDir(), "skill-create.json") + + writeDaemonFile(t, filepath.Join(workspaceRoot, aghconfig.DirName, "config.toml"), ` +[[hooks.declarations]] +name = "config-create" +event = "session.post_create" +mode = "sync" +command = "`+scriptPath+`" +args = ["`+configOutput+`"] +`) + writeDaemonFile(t, filepath.Join(workspaceRoot, aghconfig.DirName, "agents", "coder", "AGENT.md"), `--- +name: coder +provider: claude +hooks: + - name: agent-stop + event: session.post_stop + mode: sync + command: `+scriptPath+` + args: ["`+agentOutput+`"] +--- + +Prompt. +`) + writeDaemonFile(t, filepath.Join(workspaceRoot, aghconfig.DirName, "skills", "local-hook", "SKILL.md"), `--- +name: local-hook +description: workspace lifecycle hook +metadata: + agh: + hooks: + - event: session.post_create + mode: sync + command: `+scriptPath+` + args: + - `+skillOutput+` +--- + +body +`) + + resolvedWorkspace := seedDaemonWorkspace(t, homePaths, workspaceRoot) + + var capturedDeps SessionManagerDeps + d, err := New( + WithHomePaths(homePaths), + WithConfig(cfg), + WithLogger(discardLogger()), + ) + if err != nil { + t.Fatalf("New() error = %v", err) + } + d.newSessionManager = func(_ context.Context, deps SessionManagerDeps) (SessionManager, error) { + capturedDeps = deps + return &fakeSessionManager{}, nil + } + d.newObserver = func(context.Context, RuntimeDeps) (Observer, error) { + return &fakeObserver{}, nil + } + d.httpFactory = func(context.Context, RuntimeDeps) (Server, error) { + return &fakeServer{name: "http"}, nil + } + d.udsFactory = func(context.Context, RuntimeDeps) (Server, error) { + return &fakeServer{name: "uds"}, nil + } + + if err := d.boot(testutil.Context(t)); err != nil { + t.Fatalf("boot() error = %v", err) + } + t.Cleanup(func() { + if err := d.Shutdown(testutil.Context(t)); err != nil { + t.Fatalf("Shutdown() error = %v", err) + } + }) + + if d.hooks == nil { + t.Fatal("boot() did not initialize hooks runtime") + } + if capturedDeps.Notifier == nil { + t.Fatal("boot() did not inject the hooks notifier") + } + if capturedDeps.Hooks == nil { + t.Fatal("boot() did not inject the hooks dispatcher") + } + + sess := &session.Session{ + ID: "sess-1", + Name: "demo", + AgentName: "coder", + WorkspaceID: resolvedWorkspace.ID, + Workspace: resolvedWorkspace.RootDir, + Type: session.SessionTypeUser, + State: session.StateStopped, + CreatedAt: time.Date(2026, 4, 9, 10, 0, 0, 0, time.UTC), + UpdatedAt: time.Date(2026, 4, 9, 11, 0, 0, 0, time.UTC), + } + + if _, err := capturedDeps.Hooks.DispatchSessionPostCreate(testutil.Context(t), hookspkg.SessionPostCreatePayload(hookSessionLifecyclePayload(sess, hookspkg.HookSessionPostCreate, time.Now().UTC()))); err != nil { + t.Fatalf("DispatchSessionPostCreate() error = %v", err) + } + if _, err := capturedDeps.Hooks.DispatchSessionPostStop(testutil.Context(t), hookspkg.SessionPostStopPayload(hookSessionLifecyclePayload(sess, hookspkg.HookSessionPostStop, time.Now().UTC()))); err != nil { + t.Fatalf("DispatchSessionPostStop() error = %v", err) + } + + assertLifecycleHookPayload(t, configOutput, hookspkg.HookSessionPostCreate, resolvedWorkspace) + assertLifecycleHookPayload(t, skillOutput, hookspkg.HookSessionPostCreate, resolvedWorkspace) + assertLifecycleHookPayload(t, agentOutput, hookspkg.HookSessionPostStop, resolvedWorkspace) +} + +func TestBootSkillsWatcherRebuildsHooksBeforeNextDispatch(t *testing.T) { + homePaths := integrationHomePaths(t) + cfg := testConfig(t, homePaths) + cfg.Memory.Enabled = false + cfg.Skills.Enabled = true + cfg.Skills.PollInterval = 10 * time.Millisecond + + workspaceRoot := filepath.Join(t.TempDir(), "workspace") + resolvedWorkspace := seedDaemonWorkspace(t, homePaths, workspaceRoot) + outputPath := filepath.Join(t.TempDir(), "watched-create.json") + scriptPath := writeDaemonHookScript(t, t.TempDir(), "capture.sh", "#!/bin/sh\ncat > \"$1\"\n") + + var capturedDeps SessionManagerDeps + d, err := New( + WithHomePaths(homePaths), + WithConfig(cfg), + WithLogger(discardLogger()), + ) + if err != nil { + t.Fatalf("New() error = %v", err) + } + d.newSessionManager = func(_ context.Context, deps SessionManagerDeps) (SessionManager, error) { + capturedDeps = deps + return &fakeSessionManager{}, nil + } + d.newObserver = func(context.Context, RuntimeDeps) (Observer, error) { + return &fakeObserver{}, nil + } + d.httpFactory = func(context.Context, RuntimeDeps) (Server, error) { + return &fakeServer{name: "http"}, nil + } + d.udsFactory = func(context.Context, RuntimeDeps) (Server, error) { + return &fakeServer{name: "uds"}, nil + } + + if err := d.boot(testutil.Context(t)); err != nil { + t.Fatalf("boot() error = %v", err) + } + t.Cleanup(func() { + if err := d.Shutdown(testutil.Context(t)); err != nil { + t.Fatalf("Shutdown() error = %v", err) + } + }) + if capturedDeps.Hooks == nil { + t.Fatal("boot() did not inject the hooks dispatcher") + } + + initialVersion := d.hooks.Version() + writeDaemonFile(t, filepath.Join(homePaths.SkillsDir, "watched-hook", "SKILL.md"), `--- +name: watched-hook +description: reloaded hook +metadata: + agh: + hooks: + - event: session.post_create + mode: sync + command: `+scriptPath+` + args: + - `+outputPath+` +--- + +body +`) + + waitForCondition(t, "hooks rebuild after watcher refresh", func() bool { + if _, ok := d.skillsRegistry.Get("watched-hook"); !ok { + return false + } + return d.hooks.Version() > initialVersion + }) + + sess := &session.Session{ + ID: "sess-watch", + AgentName: "general", + WorkspaceID: resolvedWorkspace.ID, + Workspace: resolvedWorkspace.RootDir, + Type: session.SessionTypeUser, + State: session.StateActive, + CreatedAt: time.Date(2026, 4, 9, 12, 0, 0, 0, time.UTC), + UpdatedAt: time.Date(2026, 4, 9, 12, 0, 0, 0, time.UTC), + } + + if _, err := capturedDeps.Hooks.DispatchSessionPostCreate(testutil.Context(t), hookspkg.SessionPostCreatePayload(hookSessionLifecyclePayload(sess, hookspkg.HookSessionPostCreate, time.Now().UTC()))); err != nil { + t.Fatalf("DispatchSessionPostCreate() error = %v", err) + } + assertLifecycleHookPayload(t, outputPath, hookspkg.HookSessionPostCreate, resolvedWorkspace) +} + func TestRunDreamTickerAndSpawnerIntegration(t *testing.T) { homePaths := integrationHomePaths(t) cfg := testConfig(t, homePaths) @@ -444,3 +655,70 @@ func seedDaemonWorkspace(t *testing.T, homePaths aghconfig.HomePaths, root strin } return resolved } + +func writeDaemonHookScript(t *testing.T, dir string, name string, contents string) string { + t.Helper() + + path := filepath.Join(dir, name) + if err := os.WriteFile(path, []byte(contents), 0o755); err != nil { + t.Fatalf("os.WriteFile(%q) error = %v", path, err) + } + return path +} + +func assertLifecycleHookPayload(t *testing.T, path string, wantEvent hookspkg.HookEvent, wantWorkspace workspacepkg.ResolvedWorkspace) { + t.Helper() + + var ( + payloadBytes []byte + payload hookspkg.SessionLifecyclePayload + readOK bool + unmarshalOK bool + ) + + t.Run("read file", func(t *testing.T) { + var err error + payloadBytes, err = os.ReadFile(path) + if err != nil { + t.Fatalf("os.ReadFile(%q) error = %v", path, err) + } + readOK = true + }) + + t.Run("unmarshal", func(t *testing.T) { + if !readOK { + t.Skip("payload unavailable after read failure") + } + if err := json.Unmarshal(payloadBytes, &payload); err != nil { + t.Fatalf("json.Unmarshal(%q) error = %v", path, err) + } + unmarshalOK = true + }) + + t.Run("event", func(t *testing.T) { + if !unmarshalOK { + t.Skip("payload unavailable after unmarshal failure") + } + if payload.Event != wantEvent { + t.Fatalf("payload.Event = %q, want %q", payload.Event, wantEvent) + } + }) + + t.Run("workspace id", func(t *testing.T) { + if !unmarshalOK { + t.Skip("payload unavailable after unmarshal failure") + } + if payload.WorkspaceID != wantWorkspace.ID { + t.Fatalf("payload.WorkspaceID = %q, want %q", payload.WorkspaceID, wantWorkspace.ID) + } + }) + + t.Run("workspace path", func(t *testing.T) { + if !unmarshalOK { + t.Skip("payload unavailable after unmarshal failure") + } + if payload.Workspace != wantWorkspace.RootDir { + t.Fatalf("payload.Workspace = %q, want %q", payload.Workspace, wantWorkspace.RootDir) + } + }) +} diff --git a/internal/daemon/daemon_test.go b/internal/daemon/daemon_test.go index fabb1aa48..9c700da50 100644 --- a/internal/daemon/daemon_test.go +++ b/internal/daemon/daemon_test.go @@ -19,6 +19,7 @@ import ( "github.com/gofrs/flock" "github.com/pedronauck/agh/internal/acp" aghconfig "github.com/pedronauck/agh/internal/config" + hookspkg "github.com/pedronauck/agh/internal/hooks" "github.com/pedronauck/agh/internal/memory" "github.com/pedronauck/agh/internal/memory/consolidation" "github.com/pedronauck/agh/internal/observe" @@ -299,6 +300,11 @@ func TestShutdownTearsDownInRequiredOrder(t *testing.T) { events = append(events, "db") }, } + d.hooks = &fakeHookRuntime{ + onClose: func() { + events = append(events, "hooks") + }, + } d.lock = &Lock{ path: homePaths.DaemonLock, releaseFn: func() error { @@ -315,12 +321,104 @@ func TestShutdownTearsDownInRequiredOrder(t *testing.T) { t.Fatalf("Shutdown() error = %v", err) } - want := []string{"session:sess-a", "session:sess-b", "http", "uds", "db", "lock", "logger"} + want := []string{"session:sess-a", "session:sess-b", "hooks", "http", "uds", "db", "lock", "logger"} if !testutil.EqualStringSlices(events, want) { t.Fatalf("Shutdown() order = %#v, want %#v", events, want) } } +func TestShutdownDrainsHooksBeforeClosingDatabase(t *testing.T) { + t.Parallel() + + homePaths := testHomePaths(t) + cfg := testConfig(t, homePaths) + d := newTestDaemon(t, homePaths, cfg) + + asyncStarted := make(chan struct{}, 1) + asyncRelease := make(chan struct{}) + dbClosed := make(chan struct{}, 1) + + hooks := hookspkg.NewHooks( + hookspkg.WithLogger(discardLogger()), + hookspkg.WithNativeDeclarations([]hookspkg.HookDecl{ + { + Name: "async-stop", + Event: hookspkg.HookSessionPostStop, + Mode: hookspkg.HookModeAsync, + ExecutorKind: hookspkg.HookExecutorNative, + }, + }), + hookspkg.WithExecutorResolver(testHookExecutorResolver(map[string]hookspkg.Executor{ + "async-stop": hookspkg.NewTypedNativeExecutor(func(_ context.Context, _ hookspkg.RegisteredHook, _ hookspkg.SessionLifecyclePayload) (hookspkg.SessionPostStopPatch, error) { + asyncStarted <- struct{}{} + <-asyncRelease + return hookspkg.SessionPostStopPatch{}, nil + }), + })), + ) + t.Cleanup(hooks.Close) + if err := hooks.Rebuild(testutil.Context(t)); err != nil { + t.Fatalf("Rebuild() error = %v", err) + } + + notifier := newHooksNotifier(discardLogger(), func() time.Time { return time.Date(2026, 4, 9, 12, 0, 0, 0, time.UTC) }) + notifier.setRuntime(hooks, nil) + + d.sessions = &fakeSessionManager{ + infos: []*session.SessionInfo{{ID: "sess-a"}}, + onStop: func(string) { + if _, err := notifier.DispatchSessionPostStop(context.Background(), hookspkg.SessionPostStopPayload(hookSessionLifecyclePayload(&session.Session{ + ID: "sess-a", + AgentName: "codex", + WorkspaceID: "ws-1", + Workspace: "/tmp/ws-1", + Type: session.SessionTypeUser, + State: session.StateStopped, + CreatedAt: time.Date(2026, 4, 9, 11, 0, 0, 0, time.UTC), + UpdatedAt: time.Date(2026, 4, 9, 12, 0, 0, 0, time.UTC), + }, hookspkg.HookSessionPostStop, time.Date(2026, 4, 9, 12, 0, 0, 0, time.UTC)))); err != nil { + t.Fatalf("DispatchSessionPostStop() error = %v", err) + } + }, + } + d.hooks = hooks + d.registry = &recordingRegistry{ + path: homePaths.DatabaseFile, + onClose: func() { + dbClosed <- struct{}{} + }, + } + d.closeLogger = func() error { return nil } + + errCh := make(chan error, 1) + go func() { + errCh <- d.Shutdown(testutil.Context(t)) + }() + + select { + case <-asyncStarted: + case <-time.After(time.Second): + t.Fatal("async hook did not start before shutdown blocked") + } + + select { + case <-dbClosed: + t.Fatal("database closed before hooks drained") + default: + } + + close(asyncRelease) + if err := <-errCh; err != nil { + t.Fatalf("Shutdown() error = %v", err) + } + + select { + case <-dbClosed: + case <-time.After(time.Second): + t.Fatal("database was not closed after hooks drained") + } +} + func TestBootFailureCleansUpStartedResourcesInReverseOrder(t *testing.T) { homePaths := testHomePaths(t) cfg := testConfig(t, homePaths) @@ -782,23 +880,6 @@ func TestSignalSourceDefaultsToOSSignalRegistration(t *testing.T) { stop() } -func TestNotifierFanoutDispatchesEvents(t *testing.T) { - first := &recordingNotifier{} - second := &recordingNotifier{} - fanout := notifierFanout{notifiers: []session.Notifier{first, second}} - - fanout.OnSessionCreated(testutil.Context(t), &session.Session{ID: "sess-1"}) - fanout.OnSessionStopped(testutil.Context(t), &session.Session{ID: "sess-2"}) - fanout.OnAgentEvent(testutil.Context(t), "sess-3", acp.AgentEvent{Type: "message"}) - - if got, want := first.events, []string{"created", "stopped", "agent"}; !testutil.EqualStringSlices(got, want) { - t.Fatalf("first notifier events = %#v, want %#v", got, want) - } - if got, want := second.events, []string{"created", "stopped", "agent"}; !testutil.EqualStringSlices(got, want) { - t.Fatalf("second notifier events = %#v, want %#v", got, want) - } -} - func TestBootInjectsComposedAssemblerForFeatureFlagCombinations(t *testing.T) { t.Parallel() @@ -1237,11 +1318,11 @@ func TestSessionStopNotifierQueuesDreamCheck(t *testing.T) { return spawn(ctx, "memory-consolidation", "session-stop prompt", workspace) }, } - var notifier session.Notifier + var dispatcher session.HookDispatcher d := newTestDaemon(t, homePaths, cfg) d.newSessionManager = func(_ context.Context, deps SessionManagerDeps) (SessionManager, error) { - notifier = deps.Notifier + dispatcher = deps.Hooks return sessions, nil } d.newObserver = func(context.Context, RuntimeDeps) (Observer, error) { @@ -1269,12 +1350,25 @@ func TestSessionStopNotifierQueuesDreamCheck(t *testing.T) { defer d.mu.Unlock() return d.dreamRuntime != nil }) - if notifier == nil { - t.Fatal("session manager notifier = nil") + if dispatcher == nil { + t.Fatal("session manager hook dispatcher = nil") } resolved := resolveDaemonWorkspace(t, d.workspaceResolver, workspace) - notifier.OnSessionStopped(context.Background(), &session.Session{ID: "sess-user", WorkspaceID: resolved.ID, Type: session.SessionTypeUser}) + if _, err := dispatcher.DispatchSessionPostStop(context.Background(), hookspkg.SessionPostStopPayload{ + PayloadBase: hookspkg.PayloadBase{ + Event: hookspkg.HookSessionPostStop, + Timestamp: time.Date(2026, 4, 9, 12, 0, 0, 0, time.UTC), + }, + SessionContext: hookspkg.SessionContext{ + SessionID: "sess-user", + WorkspaceID: resolved.ID, + SessionType: string(session.SessionTypeUser), + State: string(session.StateStopped), + }, + }); err != nil { + t.Fatalf("DispatchSessionPostStop() error = %v", err) + } waitForCondition(t, "dream run from session stop", func() bool { return dream.runCount() == 1 }) @@ -1288,7 +1382,19 @@ func TestSessionStopNotifierQueuesDreamCheck(t *testing.T) { t.Fatalf("Create() workspace_path = %q, want empty", got) } - notifier.OnSessionStopped(context.Background(), &session.Session{ID: "sess-dream", Type: session.SessionTypeDream}) + if _, err := dispatcher.DispatchSessionPostStop(context.Background(), hookspkg.SessionPostStopPayload{ + PayloadBase: hookspkg.PayloadBase{ + Event: hookspkg.HookSessionPostStop, + Timestamp: time.Date(2026, 4, 9, 12, 0, 0, 0, time.UTC), + }, + SessionContext: hookspkg.SessionContext{ + SessionID: "sess-dream", + SessionType: string(session.SessionTypeDream), + State: string(session.StateStopped), + }, + }); err != nil { + t.Fatalf("DispatchSessionPostStop(dream) error = %v", err) + } time.Sleep(20 * time.Millisecond) if got := dream.runCount(); got != 1 { t.Fatalf("dream run count after dream-session stop = %d, want 1", got) @@ -1799,6 +1905,18 @@ func (f *fakeObserver) QueryEvents(context.Context, store.EventSummaryQuery) ([] return nil, nil } +func (f *fakeObserver) QueryHookCatalog(context.Context, hookspkg.CatalogFilter) ([]hookspkg.CatalogEntry, error) { + return nil, nil +} + +func (f *fakeObserver) QueryHookRuns(context.Context, store.HookRunQuery) ([]hookspkg.HookRunRecord, error) { + return nil, nil +} + +func (f *fakeObserver) QueryHookEvents(context.Context, hookspkg.EventFilter) ([]hookspkg.EventDescriptor, error) { + return nil, nil +} + func (f *fakeObserver) Health(context.Context) (observe.Health, error) { return observe.Health{Status: "ok"}, nil } @@ -1812,7 +1930,7 @@ func (f *fakeObserver) OnSessionCreated(context.Context, *session.Session) {} func (f *fakeObserver) OnSessionStopped(context.Context, *session.Session) {} -func (f *fakeObserver) OnAgentEvent(context.Context, string, acp.AgentEvent) {} +func (f *fakeObserver) OnAgentEvent(context.Context, string, any) {} type fakeServer struct { name string @@ -1926,10 +2044,173 @@ func (n *recordingNotifier) OnSessionStopped(context.Context, *session.Session) n.events = append(n.events, "stopped") } -func (n *recordingNotifier) OnAgentEvent(context.Context, string, acp.AgentEvent) { +func (n *recordingNotifier) OnAgentEvent(context.Context, string, any) { n.events = append(n.events, "agent") } +type fakeHookRuntime struct { + version int64 + onRebuild func(context.Context) error + onClose func() + onDispatchCreate func(context.Context, hookspkg.SessionPostCreatePayload) error + onDispatchStop func(context.Context, hookspkg.SessionPostStopPayload) error + onTurnStart func(context.Context, hookspkg.TurnStartPayload) error + onTurnEnd func(context.Context, hookspkg.TurnEndPayload) error + onMessageStart func(context.Context, hookspkg.MessageStartPayload) error + onMessageDelta func(context.Context, hookspkg.MessageDeltaPayload) error + onMessageEnd func(context.Context, hookspkg.MessageEndPayload) error + onPreCompact func(context.Context, hookspkg.ContextPreCompactPayload) error + onPostCompact func(context.Context, hookspkg.ContextPostCompactPayload) error + onAgentEvent func(context.Context, string, any) +} + +func (f *fakeHookRuntime) Rebuild(ctx context.Context) error { + if f.onRebuild != nil { + return f.onRebuild(ctx) + } + return nil +} + +func (f *fakeHookRuntime) Close() { + if f.onClose != nil { + f.onClose() + } +} + +func (f *fakeHookRuntime) Version() int64 { + return f.version +} + +func (f *fakeHookRuntime) DispatchSessionPreCreate(_ context.Context, payload hookspkg.SessionPreCreatePayload) (hookspkg.SessionPreCreatePayload, error) { + return payload, nil +} + +func (f *fakeHookRuntime) DispatchSessionPostCreate(ctx context.Context, payload hookspkg.SessionPostCreatePayload) (hookspkg.SessionPostCreatePayload, error) { + if f.onDispatchCreate != nil { + return payload, f.onDispatchCreate(ctx, payload) + } + return payload, nil +} + +func (f *fakeHookRuntime) DispatchSessionPreResume(_ context.Context, payload hookspkg.SessionPreResumePayload) (hookspkg.SessionPreResumePayload, error) { + return payload, nil +} + +func (f *fakeHookRuntime) DispatchSessionPostResume(_ context.Context, payload hookspkg.SessionPostResumePayload) (hookspkg.SessionPostResumePayload, error) { + return payload, nil +} + +func (f *fakeHookRuntime) DispatchSessionPreStop(_ context.Context, payload hookspkg.SessionPreStopPayload) (hookspkg.SessionPreStopPayload, error) { + return payload, nil +} + +func (f *fakeHookRuntime) DispatchSessionPostStop(ctx context.Context, payload hookspkg.SessionPostStopPayload) (hookspkg.SessionPostStopPayload, error) { + if f.onDispatchStop != nil { + return payload, f.onDispatchStop(ctx, payload) + } + return payload, nil +} + +func (f *fakeHookRuntime) DispatchInputPreSubmit(_ context.Context, payload hookspkg.InputPreSubmitPayload) (hookspkg.InputPreSubmitPayload, error) { + return payload, nil +} + +func (f *fakeHookRuntime) DispatchPromptPostAssemble(_ context.Context, payload hookspkg.PromptPayload) (hookspkg.PromptPayload, error) { + return payload, nil +} + +func (f *fakeHookRuntime) DispatchEventPreRecord(_ context.Context, payload hookspkg.EventPreRecordPayload) (hookspkg.EventPreRecordPayload, error) { + return payload, nil +} + +func (f *fakeHookRuntime) DispatchEventPostRecord(_ context.Context, payload hookspkg.EventPostRecordPayload) (hookspkg.EventPostRecordPayload, error) { + return payload, nil +} + +func (f *fakeHookRuntime) DispatchAgentPreStart(_ context.Context, payload hookspkg.AgentPreStartPayload) (hookspkg.AgentPreStartPayload, error) { + return payload, nil +} + +func (f *fakeHookRuntime) DispatchAgentSpawned(_ context.Context, payload hookspkg.AgentSpawnedPayload) (hookspkg.AgentSpawnedPayload, error) { + return payload, nil +} + +func (f *fakeHookRuntime) DispatchAgentCrashed(_ context.Context, payload hookspkg.AgentCrashedPayload) (hookspkg.AgentCrashedPayload, error) { + return payload, nil +} + +func (f *fakeHookRuntime) DispatchAgentStopped(_ context.Context, payload hookspkg.AgentStoppedPayload) (hookspkg.AgentStoppedPayload, error) { + return payload, nil +} + +func (f *fakeHookRuntime) DispatchTurnStart(ctx context.Context, payload hookspkg.TurnStartPayload) (hookspkg.TurnStartPayload, error) { + if f.onTurnStart != nil { + return payload, f.onTurnStart(ctx, payload) + } + return payload, nil +} + +func (f *fakeHookRuntime) DispatchTurnEnd(ctx context.Context, payload hookspkg.TurnEndPayload) (hookspkg.TurnEndPayload, error) { + if f.onTurnEnd != nil { + return payload, f.onTurnEnd(ctx, payload) + } + return payload, nil +} + +func (f *fakeHookRuntime) DispatchMessageStart(ctx context.Context, payload hookspkg.MessageStartPayload) (hookspkg.MessageStartPayload, error) { + if f.onMessageStart != nil { + return payload, f.onMessageStart(ctx, payload) + } + return payload, nil +} + +func (f *fakeHookRuntime) DispatchMessageDelta(ctx context.Context, payload hookspkg.MessageDeltaPayload) (hookspkg.MessageDeltaPayload, error) { + if f.onMessageDelta != nil { + return payload, f.onMessageDelta(ctx, payload) + } + return payload, nil +} + +func (f *fakeHookRuntime) DispatchMessageEnd(ctx context.Context, payload hookspkg.MessageEndPayload) (hookspkg.MessageEndPayload, error) { + if f.onMessageEnd != nil { + return payload, f.onMessageEnd(ctx, payload) + } + return payload, nil +} + +func (f *fakeHookRuntime) DispatchContextPreCompact(ctx context.Context, payload hookspkg.ContextPreCompactPayload) (hookspkg.ContextPreCompactPayload, error) { + if f.onPreCompact != nil { + return payload, f.onPreCompact(ctx, payload) + } + return payload, nil +} + +func (f *fakeHookRuntime) DispatchContextPostCompact(ctx context.Context, payload hookspkg.ContextPostCompactPayload) (hookspkg.ContextPostCompactPayload, error) { + if f.onPostCompact != nil { + return payload, f.onPostCompact(ctx, payload) + } + return payload, nil +} + +func (f *fakeHookRuntime) OnAgentEvent(ctx context.Context, sessionID string, event any) { + if f.onAgentEvent != nil { + f.onAgentEvent(ctx, sessionID, event) + } +} + +func testHookExecutorResolver(native map[string]hookspkg.Executor) hookspkg.ExecutorResolver { + return func(decl hookspkg.HookDecl) (hookspkg.Executor, error) { + if decl.ExecutorKind == hookspkg.HookExecutorNative { + executor := native[strings.TrimSpace(decl.Name)] + if executor == nil { + return nil, errors.New("missing native executor") + } + return executor, nil + } + return defaultDaemonExecutorResolver(decl) + } +} + type fakeDreamService struct { mu sync.Mutex shouldRun bool diff --git a/internal/daemon/hooks_bridge.go b/internal/daemon/hooks_bridge.go new file mode 100644 index 000000000..3e06eb8de --- /dev/null +++ b/internal/daemon/hooks_bridge.go @@ -0,0 +1,636 @@ +package daemon + +import ( + "context" + "errors" + "fmt" + "log/slog" + "slices" + "strings" + "sync" + "time" + + aghconfig "github.com/pedronauck/agh/internal/config" + hookspkg "github.com/pedronauck/agh/internal/hooks" + "github.com/pedronauck/agh/internal/session" + "github.com/pedronauck/agh/internal/skills" + workspacepkg "github.com/pedronauck/agh/internal/workspace" +) + +type hookRuntime interface { + Rebuild(context.Context) error + Close() + Version() int64 + DispatchSessionPreCreate(context.Context, hookspkg.SessionPreCreatePayload) (hookspkg.SessionPreCreatePayload, error) + DispatchSessionPostCreate(context.Context, hookspkg.SessionPostCreatePayload) (hookspkg.SessionPostCreatePayload, error) + DispatchSessionPreResume(context.Context, hookspkg.SessionPreResumePayload) (hookspkg.SessionPreResumePayload, error) + DispatchSessionPostResume(context.Context, hookspkg.SessionPostResumePayload) (hookspkg.SessionPostResumePayload, error) + DispatchSessionPreStop(context.Context, hookspkg.SessionPreStopPayload) (hookspkg.SessionPreStopPayload, error) + DispatchSessionPostStop(context.Context, hookspkg.SessionPostStopPayload) (hookspkg.SessionPostStopPayload, error) + DispatchInputPreSubmit(context.Context, hookspkg.InputPreSubmitPayload) (hookspkg.InputPreSubmitPayload, error) + DispatchPromptPostAssemble(context.Context, hookspkg.PromptPayload) (hookspkg.PromptPayload, error) + DispatchEventPreRecord(context.Context, hookspkg.EventPreRecordPayload) (hookspkg.EventPreRecordPayload, error) + DispatchEventPostRecord(context.Context, hookspkg.EventPostRecordPayload) (hookspkg.EventPostRecordPayload, error) + DispatchAgentPreStart(context.Context, hookspkg.AgentPreStartPayload) (hookspkg.AgentPreStartPayload, error) + DispatchAgentSpawned(context.Context, hookspkg.AgentSpawnedPayload) (hookspkg.AgentSpawnedPayload, error) + DispatchAgentCrashed(context.Context, hookspkg.AgentCrashedPayload) (hookspkg.AgentCrashedPayload, error) + DispatchAgentStopped(context.Context, hookspkg.AgentStoppedPayload) (hookspkg.AgentStoppedPayload, error) + DispatchTurnStart(context.Context, hookspkg.TurnStartPayload) (hookspkg.TurnStartPayload, error) + DispatchTurnEnd(context.Context, hookspkg.TurnEndPayload) (hookspkg.TurnEndPayload, error) + DispatchMessageStart(context.Context, hookspkg.MessageStartPayload) (hookspkg.MessageStartPayload, error) + DispatchMessageDelta(context.Context, hookspkg.MessageDeltaPayload) (hookspkg.MessageDeltaPayload, error) + DispatchMessageEnd(context.Context, hookspkg.MessageEndPayload) (hookspkg.MessageEndPayload, error) + DispatchContextPreCompact(context.Context, hookspkg.ContextPreCompactPayload) (hookspkg.ContextPreCompactPayload, error) + DispatchContextPostCompact(context.Context, hookspkg.ContextPostCompactPayload) (hookspkg.ContextPostCompactPayload, error) + OnAgentEvent(context.Context, string, any) +} + +type sessionLifecycleObserver interface { + OnSessionCreated(context.Context, *session.Session) + OnSessionStopped(context.Context, *session.Session) +} + +type dreamCheckEnqueuer interface { + EnqueueCheck(reason string, workspaceRef string) +} + +type hooksNotifier struct { + mu sync.RWMutex + + logger *slog.Logger + now func() time.Time + hooks hookRuntime + agentEventNotify session.Notifier +} + +var _ session.Notifier = (*hooksNotifier)(nil) +var _ session.HookDispatcher = (*hooksNotifier)(nil) + +func newHooksNotifier(logger *slog.Logger, now func() time.Time) *hooksNotifier { + if logger == nil { + logger = slog.Default() + } + if now == nil { + now = func() time.Time { return time.Now().UTC() } + } + + return &hooksNotifier{ + logger: logger, + now: now, + } +} + +func (n *hooksNotifier) setRuntime(hooks hookRuntime, agentEventNotify session.Notifier) { + n.mu.Lock() + defer n.mu.Unlock() + + n.hooks = hooks + n.agentEventNotify = agentEventNotify +} + +// OnSessionCreated is a no-op; lifecycle observation is handled via hook dispatch. +func (n *hooksNotifier) OnSessionCreated(ctx context.Context, sess *session.Session) { +} + +// OnSessionStopped is a no-op; lifecycle observation is handled via hook dispatch. +func (n *hooksNotifier) OnSessionStopped(ctx context.Context, sess *session.Session) { +} + +func (n *hooksNotifier) DispatchSessionPreCreate(ctx context.Context, payload hookspkg.SessionPreCreatePayload) (hookspkg.SessionPreCreatePayload, error) { + return dispatchRuntime(n, ctx, hookspkg.HookSessionPreCreate, payload, true, func(hooks hookRuntime, callCtx context.Context, item hookspkg.SessionPreCreatePayload) (hookspkg.SessionPreCreatePayload, error) { + return hooks.DispatchSessionPreCreate(callCtx, item) + }) +} + +func (n *hooksNotifier) DispatchSessionPostCreate(ctx context.Context, payload hookspkg.SessionPostCreatePayload) (hookspkg.SessionPostCreatePayload, error) { + return dispatchRuntime(n, ctx, hookspkg.HookSessionPostCreate, payload, true, func(hooks hookRuntime, callCtx context.Context, item hookspkg.SessionPostCreatePayload) (hookspkg.SessionPostCreatePayload, error) { + return hooks.DispatchSessionPostCreate(callCtx, item) + }) +} + +func (n *hooksNotifier) DispatchSessionPreResume(ctx context.Context, payload hookspkg.SessionPreResumePayload) (hookspkg.SessionPreResumePayload, error) { + return dispatchRuntime(n, ctx, hookspkg.HookSessionPreResume, payload, true, func(hooks hookRuntime, callCtx context.Context, item hookspkg.SessionPreResumePayload) (hookspkg.SessionPreResumePayload, error) { + return hooks.DispatchSessionPreResume(callCtx, item) + }) +} + +func (n *hooksNotifier) DispatchSessionPostResume(ctx context.Context, payload hookspkg.SessionPostResumePayload) (hookspkg.SessionPostResumePayload, error) { + return dispatchRuntime(n, ctx, hookspkg.HookSessionPostResume, payload, true, func(hooks hookRuntime, callCtx context.Context, item hookspkg.SessionPostResumePayload) (hookspkg.SessionPostResumePayload, error) { + return hooks.DispatchSessionPostResume(callCtx, item) + }) +} + +func (n *hooksNotifier) DispatchSessionPreStop(ctx context.Context, payload hookspkg.SessionPreStopPayload) (hookspkg.SessionPreStopPayload, error) { + return dispatchRuntime(n, ctx, hookspkg.HookSessionPreStop, payload, true, func(hooks hookRuntime, callCtx context.Context, item hookspkg.SessionPreStopPayload) (hookspkg.SessionPreStopPayload, error) { + return hooks.DispatchSessionPreStop(callCtx, item) + }) +} + +func (n *hooksNotifier) DispatchSessionPostStop(ctx context.Context, payload hookspkg.SessionPostStopPayload) (hookspkg.SessionPostStopPayload, error) { + return dispatchRuntime(n, ctx, hookspkg.HookSessionPostStop, payload, true, func(hooks hookRuntime, callCtx context.Context, item hookspkg.SessionPostStopPayload) (hookspkg.SessionPostStopPayload, error) { + return hooks.DispatchSessionPostStop(callCtx, item) + }) +} + +func (n *hooksNotifier) DispatchInputPreSubmit(ctx context.Context, payload hookspkg.InputPreSubmitPayload) (hookspkg.InputPreSubmitPayload, error) { + return dispatchRuntime(n, ctx, hookspkg.HookInputPreSubmit, payload, false, func(hooks hookRuntime, callCtx context.Context, item hookspkg.InputPreSubmitPayload) (hookspkg.InputPreSubmitPayload, error) { + return hooks.DispatchInputPreSubmit(callCtx, item) + }) +} + +func (n *hooksNotifier) DispatchPromptPostAssemble(ctx context.Context, payload hookspkg.PromptPayload) (hookspkg.PromptPayload, error) { + return dispatchRuntime(n, ctx, hookspkg.HookPromptPostAssemble, payload, false, func(hooks hookRuntime, callCtx context.Context, item hookspkg.PromptPayload) (hookspkg.PromptPayload, error) { + return hooks.DispatchPromptPostAssemble(callCtx, item) + }) +} + +func (n *hooksNotifier) DispatchEventPreRecord(ctx context.Context, payload hookspkg.EventPreRecordPayload) (hookspkg.EventPreRecordPayload, error) { + return dispatchRuntime(n, ctx, hookspkg.HookEventPreRecord, payload, false, func(hooks hookRuntime, callCtx context.Context, item hookspkg.EventPreRecordPayload) (hookspkg.EventPreRecordPayload, error) { + return hooks.DispatchEventPreRecord(callCtx, item) + }) +} + +func (n *hooksNotifier) DispatchEventPostRecord(ctx context.Context, payload hookspkg.EventPostRecordPayload) (hookspkg.EventPostRecordPayload, error) { + return dispatchRuntime(n, ctx, hookspkg.HookEventPostRecord, payload, false, func(hooks hookRuntime, callCtx context.Context, item hookspkg.EventPostRecordPayload) (hookspkg.EventPostRecordPayload, error) { + return hooks.DispatchEventPostRecord(callCtx, item) + }) +} + +func (n *hooksNotifier) DispatchAgentPreStart(ctx context.Context, payload hookspkg.AgentPreStartPayload) (hookspkg.AgentPreStartPayload, error) { + return dispatchRuntime(n, ctx, hookspkg.HookAgentPreStart, payload, true, func(hooks hookRuntime, callCtx context.Context, item hookspkg.AgentPreStartPayload) (hookspkg.AgentPreStartPayload, error) { + return hooks.DispatchAgentPreStart(callCtx, item) + }) +} + +func (n *hooksNotifier) DispatchAgentSpawned(ctx context.Context, payload hookspkg.AgentSpawnedPayload) (hookspkg.AgentSpawnedPayload, error) { + return dispatchRuntime(n, ctx, hookspkg.HookAgentSpawned, payload, true, func(hooks hookRuntime, callCtx context.Context, item hookspkg.AgentSpawnedPayload) (hookspkg.AgentSpawnedPayload, error) { + return hooks.DispatchAgentSpawned(callCtx, item) + }) +} + +func (n *hooksNotifier) DispatchAgentCrashed(ctx context.Context, payload hookspkg.AgentCrashedPayload) (hookspkg.AgentCrashedPayload, error) { + return dispatchRuntime(n, ctx, hookspkg.HookAgentCrashed, payload, true, func(hooks hookRuntime, callCtx context.Context, item hookspkg.AgentCrashedPayload) (hookspkg.AgentCrashedPayload, error) { + return hooks.DispatchAgentCrashed(callCtx, item) + }) +} + +func (n *hooksNotifier) DispatchAgentStopped(ctx context.Context, payload hookspkg.AgentStoppedPayload) (hookspkg.AgentStoppedPayload, error) { + return dispatchRuntime(n, ctx, hookspkg.HookAgentStopped, payload, true, func(hooks hookRuntime, callCtx context.Context, item hookspkg.AgentStoppedPayload) (hookspkg.AgentStoppedPayload, error) { + return hooks.DispatchAgentStopped(callCtx, item) + }) +} + +func (n *hooksNotifier) DispatchTurnStart(ctx context.Context, payload hookspkg.TurnStartPayload) (hookspkg.TurnStartPayload, error) { + return dispatchRuntime(n, ctx, hookspkg.HookTurnStart, payload, false, func(hooks hookRuntime, callCtx context.Context, item hookspkg.TurnStartPayload) (hookspkg.TurnStartPayload, error) { + return hooks.DispatchTurnStart(callCtx, item) + }) +} + +func (n *hooksNotifier) DispatchTurnEnd(ctx context.Context, payload hookspkg.TurnEndPayload) (hookspkg.TurnEndPayload, error) { + return dispatchRuntime(n, ctx, hookspkg.HookTurnEnd, payload, false, func(hooks hookRuntime, callCtx context.Context, item hookspkg.TurnEndPayload) (hookspkg.TurnEndPayload, error) { + return hooks.DispatchTurnEnd(callCtx, item) + }) +} + +func (n *hooksNotifier) DispatchMessageStart(ctx context.Context, payload hookspkg.MessageStartPayload) (hookspkg.MessageStartPayload, error) { + return dispatchRuntime(n, ctx, hookspkg.HookMessageStart, payload, false, func(hooks hookRuntime, callCtx context.Context, item hookspkg.MessageStartPayload) (hookspkg.MessageStartPayload, error) { + return hooks.DispatchMessageStart(callCtx, item) + }) +} + +func (n *hooksNotifier) DispatchMessageDelta(ctx context.Context, payload hookspkg.MessageDeltaPayload) (hookspkg.MessageDeltaPayload, error) { + return dispatchRuntime(n, ctx, hookspkg.HookMessageDelta, payload, false, func(hooks hookRuntime, callCtx context.Context, item hookspkg.MessageDeltaPayload) (hookspkg.MessageDeltaPayload, error) { + return hooks.DispatchMessageDelta(callCtx, item) + }) +} + +func (n *hooksNotifier) DispatchMessageEnd(ctx context.Context, payload hookspkg.MessageEndPayload) (hookspkg.MessageEndPayload, error) { + return dispatchRuntime(n, ctx, hookspkg.HookMessageEnd, payload, false, func(hooks hookRuntime, callCtx context.Context, item hookspkg.MessageEndPayload) (hookspkg.MessageEndPayload, error) { + return hooks.DispatchMessageEnd(callCtx, item) + }) +} + +func (n *hooksNotifier) DispatchContextPreCompact(ctx context.Context, payload hookspkg.ContextPreCompactPayload) (hookspkg.ContextPreCompactPayload, error) { + return dispatchRuntime(n, ctx, hookspkg.HookContextPreCompact, payload, false, func(hooks hookRuntime, callCtx context.Context, item hookspkg.ContextPreCompactPayload) (hookspkg.ContextPreCompactPayload, error) { + return hooks.DispatchContextPreCompact(callCtx, item) + }) +} + +func (n *hooksNotifier) DispatchContextPostCompact(ctx context.Context, payload hookspkg.ContextPostCompactPayload) (hookspkg.ContextPostCompactPayload, error) { + return dispatchRuntime(n, ctx, hookspkg.HookContextPostCompact, payload, false, func(hooks hookRuntime, callCtx context.Context, item hookspkg.ContextPostCompactPayload) (hookspkg.ContextPostCompactPayload, error) { + return hooks.DispatchContextPostCompact(callCtx, item) + }) +} + +func (n *hooksNotifier) OnAgentEvent(ctx context.Context, sessionID string, event any) { + hooks, agentEventNotify := n.runtime() + if agentEventNotify != nil { + agentEventNotify.OnAgentEvent(ctx, sessionID, event) + } + if hooks != nil { + hooks.OnAgentEvent(ctx, sessionID, event) + } +} + +func (n *hooksNotifier) runtime() (hookRuntime, session.Notifier) { + n.mu.RLock() + defer n.mu.RUnlock() + + return n.hooks, n.agentEventNotify +} + +func (n *hooksNotifier) timestamp() time.Time { + if n == nil || n.now == nil { + return time.Now().UTC() + } + return n.now().UTC() +} + +type runtimeDispatchFunc[P any] func(hookRuntime, context.Context, P) (P, error) + +func dispatchRuntime[P any](n *hooksNotifier, ctx context.Context, event hookspkg.HookEvent, payload P, rebuild bool, dispatch runtimeDispatchFunc[P]) (P, error) { + hooks, _ := n.runtime() + if hooks == nil { + return payload, nil + } + if ctx == nil { + return payload, fmt.Errorf("daemon: dispatch %s requires a non-nil context", event) + } + if rebuild { + if err := hooks.Rebuild(ctx); err != nil { + n.logger.WarnContext( + ctx, + "daemon: rebuild hooks before dispatch failed", + "event", event.String(), + "error", err, + ) + } + } + return dispatch(hooks, ctx, payload) +} + +func hookSessionLifecyclePayload(sess *session.Session, event hookspkg.HookEvent, timestamp time.Time) hookspkg.SessionLifecyclePayload { + return hookspkg.SessionLifecyclePayload{ + PayloadBase: hookspkg.PayloadBase{ + Event: event, + Timestamp: timestamp, + }, + SessionContext: hookSessionContext(sess), + } +} + +func hookSessionContext(sess *session.Session) hookspkg.SessionContext { + if sess == nil { + return hookspkg.SessionContext{} + } + + info := sess.Info() + if info == nil { + return hookspkg.SessionContext{} + } + + return hookspkg.SessionContext{ + SessionID: strings.TrimSpace(info.ID), + SessionName: strings.TrimSpace(info.Name), + SessionType: string(info.Type), + AgentName: strings.TrimSpace(info.AgentName), + WorkspaceID: strings.TrimSpace(info.WorkspaceID), + Workspace: strings.TrimSpace(info.Workspace), + ACPSessionID: strings.TrimSpace(info.ACPSessionID), + State: string(info.State), + CreatedAt: info.CreatedAt, + UpdatedAt: info.UpdatedAt, + } +} + +func sessionFromHookPayload(payload hookspkg.SessionLifecyclePayload) *session.Session { + return &session.Session{ + ID: strings.TrimSpace(payload.SessionID), + Name: strings.TrimSpace(payload.SessionName), + AgentName: strings.TrimSpace(payload.AgentName), + WorkspaceID: strings.TrimSpace(payload.WorkspaceID), + Workspace: strings.TrimSpace(payload.Workspace), + Type: session.SessionType(strings.TrimSpace(payload.SessionType)), + State: session.SessionState(strings.TrimSpace(payload.State)), + ACPSessionID: strings.TrimSpace(payload.ACPSessionID), + CreatedAt: payload.CreatedAt, + UpdatedAt: payload.UpdatedAt, + } +} + +func daemonNativeHooks(observer sessionLifecycleObserver, dreamRuntime dreamCheckEnqueuer) ([]hookspkg.HookDecl, map[string]hookspkg.Executor) { + decls := make([]hookspkg.HookDecl, 0, 3) + executors := make(map[string]hookspkg.Executor, 3) + + if observer != nil { + const ( + createName = "daemon.observe.session_post_create" + stopName = "daemon.observe.session_post_stop" + ) + + decls = append(decls, + hookspkg.HookDecl{ + Name: createName, + Event: hookspkg.HookSessionPostCreate, + Mode: hookspkg.HookModeSync, + Priority: 1000, + PrioritySet: true, + ExecutorKind: hookspkg.HookExecutorNative, + }, + hookspkg.HookDecl{ + Name: stopName, + Event: hookspkg.HookSessionPostStop, + Mode: hookspkg.HookModeSync, + Priority: 1000, + PrioritySet: true, + ExecutorKind: hookspkg.HookExecutorNative, + }, + ) + executors[createName] = hookspkg.NewTypedNativeExecutor(func(ctx context.Context, _ hookspkg.RegisteredHook, payload hookspkg.SessionLifecyclePayload) (hookspkg.SessionPostCreatePatch, error) { + observer.OnSessionCreated(ctx, sessionFromHookPayload(payload)) + return hookspkg.SessionPostCreatePatch{}, nil + }) + executors[stopName] = hookspkg.NewTypedNativeExecutor(func(ctx context.Context, _ hookspkg.RegisteredHook, payload hookspkg.SessionLifecyclePayload) (hookspkg.SessionPostStopPatch, error) { + observer.OnSessionStopped(ctx, sessionFromHookPayload(payload)) + return hookspkg.SessionPostStopPatch{}, nil + }) + } + + if dreamRuntime != nil { + const dreamName = "daemon.dream.session_post_stop" + + decls = append(decls, hookspkg.HookDecl{ + Name: dreamName, + Event: hookspkg.HookSessionPostStop, + Mode: hookspkg.HookModeSync, + Priority: 900, + PrioritySet: true, + ExecutorKind: hookspkg.HookExecutorNative, + }) + executors[dreamName] = hookspkg.NewTypedNativeExecutor(func(_ context.Context, _ hookspkg.RegisteredHook, payload hookspkg.SessionLifecyclePayload) (hookspkg.SessionPostStopPatch, error) { + if strings.TrimSpace(payload.WorkspaceID) == "" || session.SessionType(strings.TrimSpace(payload.SessionType)) == session.SessionTypeDream { + return hookspkg.SessionPostStopPatch{}, nil + } + + dreamRuntime.EnqueueCheck("session_stop", strings.TrimSpace(payload.WorkspaceID)) + return hookspkg.SessionPostStopPatch{}, nil + }) + } + + return decls, executors +} + +func daemonExecutorResolver(nativeExecutors map[string]hookspkg.Executor) hookspkg.ExecutorResolver { + return func(decl hookspkg.HookDecl) (hookspkg.Executor, error) { + if decl.ExecutorKind == hookspkg.HookExecutorNative { + executor := nativeExecutors[strings.TrimSpace(decl.Name)] + if executor == nil { + return nil, fmt.Errorf("daemon: missing native hook executor for %q", decl.Name) + } + return executor, nil + } + return defaultDaemonExecutorResolver(decl) + } +} + +func defaultDaemonExecutorResolver(decl hookspkg.HookDecl) (hookspkg.Executor, error) { + switch decl.ExecutorKind { + case hookspkg.HookExecutorSubprocess: + opts := []hookspkg.SubprocessExecutorOption{ + hookspkg.WithSubprocessEnv(decl.Env), + } + if root := strings.TrimSpace(decl.Matcher.WorkspaceRoot); root != "" { + opts = append(opts, hookspkg.WithSubprocessDir(root)) + } + return hookspkg.NewSubprocessExecutor( + decl.Command, + decl.Args, + opts..., + ), nil + case hookspkg.HookExecutorWASM: + return &hookspkg.WasmExecutor{}, nil + case hookspkg.HookExecutorNative: + return nil, fmt.Errorf("daemon: native executor for hook %q requires an explicit binding", decl.Name) + default: + return nil, fmt.Errorf("daemon: unsupported executor kind %q for hook %q", decl.ExecutorKind, decl.Name) + } +} + +func configDeclarationProvider(registry Registry, workspaceResolver workspacepkg.WorkspaceResolver, logger *slog.Logger) hookspkg.DeclarationProvider { + if logger == nil { + logger = slog.Default() + } + return func(ctx context.Context) ([]hookspkg.HookDecl, error) { + decls, err := workspaceHookDeclarations(ctx, registry, workspaceResolver, logger) + if err != nil { + return nil, err + } + return filterHookDeclsBySource(decls, hookspkg.HookSourceConfig), nil + } +} + +func agentDeclarationProvider(registry Registry, workspaceResolver workspacepkg.WorkspaceResolver, logger *slog.Logger) hookspkg.DeclarationProvider { + if logger == nil { + logger = slog.Default() + } + return func(ctx context.Context) ([]hookspkg.HookDecl, error) { + decls, err := workspaceHookDeclarations(ctx, registry, workspaceResolver, logger) + if err != nil { + return nil, err + } + return filterHookDeclsBySource(decls, hookspkg.HookSourceAgentDefinition), nil + } +} + +func skillDeclarationProvider(skillsRegistry *skills.Registry, registry Registry, workspaceResolver workspacepkg.WorkspaceResolver, allowedMarketplaceHooks []string, logger *slog.Logger) hookspkg.DeclarationProvider { + if logger == nil { + logger = slog.Default() + } + allowed := marketplaceHookAllowlist(allowedMarketplaceHooks) + + return func(ctx context.Context) ([]hookspkg.HookDecl, error) { + if skillsRegistry == nil || registry == nil || workspaceResolver == nil { + return nil, nil + } + + workspaces, err := registeredWorkspaces(ctx, registry, workspaceResolver, logger) + if err != nil { + return nil, err + } + + decls := make([]hookspkg.HookDecl, 0, len(workspaces)) + for _, resolved := range workspaces { + activeSkills, err := skillsRegistry.ForWorkspace(ctx, resolved) + if err != nil { + return nil, fmt.Errorf("daemon: resolve active skills for workspace %q: %w", resolved.ID, err) + } + + for _, skill := range activeSkills { + if !marketplaceHookAllowed(skill, allowed) { + logger.Warn( + "daemon: blocked hook", + "skill_name", skill.Meta.Name, + "workspace_id", resolved.ID, + "source", skills.SkillSourceName(skill.Source), + ) + continue + } + decls = append(decls, scopeWorkspaceHookDecls(skill.Hooks, resolved)...) + } + } + + return decls, nil + } +} + +func workspaceHookDeclarations(ctx context.Context, registry Registry, workspaceResolver workspacepkg.WorkspaceResolver, logger *slog.Logger) ([]hookspkg.HookDecl, error) { + workspaces, err := registeredWorkspaces(ctx, registry, workspaceResolver, logger) + if err != nil { + return nil, err + } + + decls := make([]hookspkg.HookDecl, 0, len(workspaces)) + for _, resolved := range workspaces { + workspaceDecls, err := aghconfig.HookDeclarations(resolved.Config, resolved.Agents) + if err != nil { + return nil, fmt.Errorf("daemon: load hook declarations for workspace %q: %w", resolved.ID, err) + } + decls = append(decls, scopeWorkspaceHookDecls(workspaceDecls, resolved)...) + } + + return decls, nil +} + +func registeredWorkspaces(ctx context.Context, registry Registry, workspaceResolver workspacepkg.WorkspaceResolver, logger *slog.Logger) ([]workspacepkg.ResolvedWorkspace, error) { + if registry == nil || workspaceResolver == nil { + return nil, nil + } + + workspaces, err := registry.ListWorkspaces(ctx) + if err != nil { + return nil, fmt.Errorf("daemon: list workspaces for hooks rebuild: %w", err) + } + slices.SortFunc(workspaces, func(left, right workspacepkg.Workspace) int { + return strings.Compare(strings.TrimSpace(left.ID), strings.TrimSpace(right.ID)) + }) + + resolvedWorkspaces := make([]workspacepkg.ResolvedWorkspace, 0, len(workspaces)) + for _, workspace := range workspaces { + resolved, err := workspaceResolver.Resolve(ctx, workspace.ID) + switch { + case err == nil: + resolvedWorkspaces = append(resolvedWorkspaces, resolved) + case errors.Is(err, workspacepkg.ErrWorkspaceNotFound), errors.Is(err, workspacepkg.ErrWorkspaceRootMissing): + if logger != nil { + logger.Warn( + "daemon: skipped workspace while rebuilding hooks", + "workspace_id", workspace.ID, + "workspace_root", workspace.RootDir, + "error", err, + ) + } + default: + return nil, fmt.Errorf("daemon: resolve workspace %q for hooks rebuild: %w", workspace.ID, err) + } + } + + return resolvedWorkspaces, nil +} + +func filterHookDeclsBySource(decls []hookspkg.HookDecl, source hookspkg.HookSource) []hookspkg.HookDecl { + filtered := make([]hookspkg.HookDecl, 0, len(decls)) + for _, decl := range decls { + if decl.Source != source { + continue + } + filtered = append(filtered, cloneDaemonHookDecl(decl)) + } + return filtered +} + +func scopeWorkspaceHookDecls(decls []hookspkg.HookDecl, resolved workspacepkg.ResolvedWorkspace) []hookspkg.HookDecl { + scoped := make([]hookspkg.HookDecl, 0, len(decls)) + for _, decl := range decls { + cloned := cloneDaemonHookDecl(decl) + cloned.Matcher.WorkspaceID = strings.TrimSpace(resolved.ID) + cloned.Matcher.WorkspaceRoot = strings.TrimSpace(resolved.RootDir) + scoped = append(scoped, cloned) + } + return scoped +} + +func cloneDaemonHookDecl(src hookspkg.HookDecl) hookspkg.HookDecl { + cloned := src + cloned.Args = append([]string(nil), src.Args...) + cloned.Env = cloneStringMap(src.Env) + cloned.Metadata = cloneStringMap(src.Metadata) + if src.Matcher.ToolReadOnly != nil { + value := *src.Matcher.ToolReadOnly + cloned.Matcher.ToolReadOnly = &value + } + return cloned +} + +func cloneStringMap(src map[string]string) map[string]string { + if len(src) == 0 { + return nil + } + + cloned := make(map[string]string, len(src)) + for key, value := range src { + cloned[key] = value + } + return cloned +} + +func marketplaceHookAllowlist(values []string) map[string]struct{} { + allowed := make(map[string]struct{}, len(values)) + for _, value := range values { + trimmed := strings.TrimSpace(value) + if trimmed == "" { + continue + } + allowed[trimmed] = struct{}{} + } + + return allowed +} + +func marketplaceHookAllowed(skill *skills.Skill, allowedMarketplaceHooks map[string]struct{}) bool { + if skill == nil { + return false + } + + switch skill.Source { + case skills.SourceBundled, skills.SourceUser, skills.SourceAdditional, skills.SourceWorkspace: + return true + case skills.SourceMarketplace: + for _, key := range marketplaceHookConsentKeys(skill) { + if _, ok := allowedMarketplaceHooks[key]; ok { + return true + } + } + return false + default: + return false + } +} + +func marketplaceHookConsentKeys(skill *skills.Skill) []string { + if skill == nil || skill.Provenance == nil { + return nil + } + + keys := make([]string, 0, 3) + if slug := strings.TrimSpace(skill.Provenance.Slug); slug != "" { + keys = append(keys, slug) + if registry := strings.TrimSpace(skill.Provenance.Registry); registry != "" { + keys = append(keys, registry+":"+slug) + } + } + if hash := strings.TrimSpace(skill.Provenance.Hash); hash != "" { + keys = append(keys, hash) + } + + return keys +} diff --git a/internal/daemon/notifier.go b/internal/daemon/notifier.go deleted file mode 100644 index ac2133fa4..000000000 --- a/internal/daemon/notifier.go +++ /dev/null @@ -1,147 +0,0 @@ -package daemon - -import ( - "context" - "log/slog" - "strings" - - "github.com/pedronauck/agh/internal/acp" - "github.com/pedronauck/agh/internal/session" - "github.com/pedronauck/agh/internal/skills" - workspacepkg "github.com/pedronauck/agh/internal/workspace" -) - -type sessionLifecycleCallback func(context.Context, *session.Session) - -type sessionHookPhase interface { - OnSessionCreated(ctx context.Context, session *session.Session) - OnSessionStopped(ctx context.Context, session *session.Session) -} - -type notifierFanout struct { - notifiers []session.Notifier - postSessionCreated []sessionLifecycleCallback - postSessionStopped []sessionLifecycleCallback - hookPhase sessionHookPhase -} - -var _ session.Notifier = (*notifierFanout)(nil) - -func (f *notifierFanout) OnSessionCreated(ctx context.Context, sess *session.Session) { - for _, notifier := range f.notifiers { - if notifier == nil { - continue - } - notifier.OnSessionCreated(ctx, sess) - } - for _, callback := range f.postSessionCreated { - if callback == nil { - continue - } - callback(ctx, sess) - } - if f.hookPhase != nil { - f.hookPhase.OnSessionCreated(ctx, sess) - } -} - -func (f *notifierFanout) OnSessionStopped(ctx context.Context, sess *session.Session) { - for _, notifier := range f.notifiers { - if notifier == nil { - continue - } - notifier.OnSessionStopped(ctx, sess) - } - for _, callback := range f.postSessionStopped { - if callback == nil { - continue - } - callback(ctx, sess) - } - if f.hookPhase != nil { - f.hookPhase.OnSessionStopped(ctx, sess) - } -} - -func (f *notifierFanout) OnAgentEvent(ctx context.Context, sessionID string, event acp.AgentEvent) { - for _, notifier := range f.notifiers { - if notifier == nil { - continue - } - notifier.OnAgentEvent(ctx, sessionID, event) - } -} - -type skillsHookDispatcher struct { - registry session.SkillRegistry - runner *skills.HookRunner - workspaceResolver workspacepkg.WorkspaceResolver - logger *slog.Logger -} - -var _ sessionHookPhase = (*skillsHookDispatcher)(nil) - -func newSkillsHookDispatcher(registry session.SkillRegistry, runner *skills.HookRunner, workspaceResolver workspacepkg.WorkspaceResolver, logger *slog.Logger) *skillsHookDispatcher { - if logger == nil { - logger = slog.Default() - } - - return &skillsHookDispatcher{ - registry: registry, - runner: runner, - workspaceResolver: workspaceResolver, - logger: logger, - } -} - -func (d *skillsHookDispatcher) OnSessionCreated(ctx context.Context, sess *session.Session) { - d.dispatch(ctx, skills.HookSessionCreated, sess) -} - -func (d *skillsHookDispatcher) OnSessionStopped(ctx context.Context, sess *session.Session) { - d.dispatch(ctx, skills.HookSessionStopped, sess) -} - -func (d *skillsHookDispatcher) dispatch(ctx context.Context, event skills.HookEvent, sess *session.Session) { - if d == nil || sess == nil || d.registry == nil || d.runner == nil || d.workspaceResolver == nil { - return - } - - workspaceRef := strings.TrimSpace(sess.WorkspaceID) - if workspaceRef == "" { - workspaceRef = strings.TrimSpace(sess.Workspace) - } - if workspaceRef == "" { - return - } - - resolved, err := d.workspaceResolver.Resolve(ctx, workspaceRef) - if err != nil { - d.logger.Warn( - "daemon: resolve workspace for hook dispatch failed", - "session_id", sess.ID, - "workspace_ref", workspaceRef, - "event", event, - "error", err, - ) - return - } - - activeSkills, err := d.registry.ForWorkspace(ctx, resolved) - if err != nil { - d.logger.Warn( - "daemon: resolve active skills for hook dispatch failed", - "session_id", sess.ID, - "workspace_id", resolved.ID, - "event", event, - "error", err, - ) - return - } - - d.runner.RunHooks(ctx, event, activeSkills, skills.HookPayload{ - SessionID: sess.ID, - AgentName: sess.AgentName, - Workspace: resolved.RootDir, - }) -} diff --git a/internal/daemon/notifier_integration_test.go b/internal/daemon/notifier_integration_test.go deleted file mode 100644 index 2447fb03d..000000000 --- a/internal/daemon/notifier_integration_test.go +++ /dev/null @@ -1,182 +0,0 @@ -//go:build integration - -package daemon - -import ( - "context" - "encoding/json" - "os" - "path/filepath" - "testing" - - aghconfig "github.com/pedronauck/agh/internal/config" - "github.com/pedronauck/agh/internal/session" - skillspkg "github.com/pedronauck/agh/internal/skills" - "github.com/pedronauck/agh/internal/testutil" - workspacepkg "github.com/pedronauck/agh/internal/workspace" -) - -func TestNotifierFanoutExecutesCreatedAndStoppedHooks(t *testing.T) { - workDir := t.TempDir() - rootDir := filepath.Join(workDir, "workspace") - if err := os.MkdirAll(rootDir, 0o755); err != nil { - t.Fatalf("os.MkdirAll(workspace) error = %v", err) - } - - scriptPath := writeIntegrationHookScript(t, workDir, "capture.sh", "#!/bin/sh\ncat > \"$1\"\n") - createdOutput := filepath.Join(workDir, "created.json") - stoppedOutput := filepath.Join(workDir, "stopped.json") - - registry := &integrationHookRegistry{ - skills: []*skillspkg.Skill{ - { - Source: skillspkg.SourceWorkspace, - Meta: skillspkg.SkillMeta{Name: "hook-skill"}, - Hooks: []skillspkg.HookDecl{ - { - Event: skillspkg.HookSessionCreated, - Command: scriptPath, - Args: []string{createdOutput}, - }, - { - Event: skillspkg.HookSessionStopped, - Command: scriptPath, - Args: []string{stoppedOutput}, - }, - }, - }, - }, - } - resolver := &integrationHookWorkspaceResolver{ - resolved: workspacepkg.ResolvedWorkspace{ - Workspace: workspacepkg.Workspace{ - ID: "ws-1", - RootDir: rootDir, - Name: "workspace", - }, - }, - } - - fanout := notifierFanout{ - notifiers: []session.Notifier{&recordingNotifier{}}, - hookPhase: newSkillsHookDispatcher(registry, skillspkg.NewHookRunner(aghconfig.SkillsConfig{}, discardLogger()), resolver, discardLogger()), - } - sess := &session.Session{ - ID: "sess-1", - AgentName: "coder", - WorkspaceID: "ws-1", - Workspace: filepath.Join(workDir, "non-canonical"), - } - - fanout.OnSessionCreated(testutil.Context(t), sess) - fanout.OnSessionStopped(testutil.Context(t), sess) - - assertHookPayload(t, createdOutput, skillspkg.HookPayload{ - SessionID: "sess-1", - AgentName: "coder", - Workspace: rootDir, - Event: string(skillspkg.HookSessionCreated), - }) - assertHookPayload(t, stoppedOutput, skillspkg.HookPayload{ - SessionID: "sess-1", - AgentName: "coder", - Workspace: rootDir, - Event: string(skillspkg.HookSessionStopped), - }) -} - -func TestNotifierFanoutHookFailureDoesNotBlockLifecycle(t *testing.T) { - workDir := t.TempDir() - rootDir := filepath.Join(workDir, "workspace") - if err := os.MkdirAll(rootDir, 0o755); err != nil { - t.Fatalf("os.MkdirAll(workspace) error = %v", err) - } - - scriptPath := writeIntegrationHookScript(t, workDir, "fail.sh", "#!/bin/sh\nexit 7\n") - registry := &integrationHookRegistry{ - skills: []*skillspkg.Skill{ - { - Source: skillspkg.SourceWorkspace, - Meta: skillspkg.SkillMeta{Name: "failing-hook-skill"}, - Hooks: []skillspkg.HookDecl{ - { - Event: skillspkg.HookSessionCreated, - Command: scriptPath, - }, - { - Event: skillspkg.HookSessionStopped, - Command: scriptPath, - }, - }, - }, - }, - } - resolver := &integrationHookWorkspaceResolver{ - resolved: workspacepkg.ResolvedWorkspace{ - Workspace: workspacepkg.Workspace{ - ID: "ws-1", - RootDir: rootDir, - Name: "workspace", - }, - }, - } - notifier := &recordingNotifier{} - fanout := notifierFanout{ - notifiers: []session.Notifier{notifier}, - hookPhase: newSkillsHookDispatcher(registry, skillspkg.NewHookRunner(aghconfig.SkillsConfig{}, discardLogger()), resolver, discardLogger()), - } - - sess := &session.Session{ID: "sess-1", AgentName: "coder", WorkspaceID: "ws-1"} - fanout.OnSessionCreated(testutil.Context(t), sess) - fanout.OnSessionStopped(testutil.Context(t), sess) - - if got, want := notifier.events, []string{"created", "stopped"}; !testutil.EqualStringSlices(got, want) { - t.Fatalf("built-in notifier events = %#v, want %#v", got, want) - } -} - -type integrationHookRegistry struct { - skills []*skillspkg.Skill -} - -func (r *integrationHookRegistry) ForWorkspace(context.Context, workspacepkg.ResolvedWorkspace) ([]*skillspkg.Skill, error) { - return append([]*skillspkg.Skill(nil), r.skills...), nil -} - -type integrationHookWorkspaceResolver struct { - resolved workspacepkg.ResolvedWorkspace -} - -func (r *integrationHookWorkspaceResolver) Resolve(context.Context, string) (workspacepkg.ResolvedWorkspace, error) { - return r.resolved, nil -} - -func (r *integrationHookWorkspaceResolver) ResolveOrRegister(context.Context, string) (workspacepkg.ResolvedWorkspace, error) { - return workspacepkg.ResolvedWorkspace{}, nil -} - -func writeIntegrationHookScript(t *testing.T, dir string, name string, contents string) string { - t.Helper() - - path := filepath.Join(dir, name) - if err := os.WriteFile(path, []byte(contents), 0o755); err != nil { - t.Fatalf("os.WriteFile(%q) error = %v", path, err) - } - return path -} - -func assertHookPayload(t *testing.T, path string, want skillspkg.HookPayload) { - t.Helper() - - payloadBytes, err := os.ReadFile(path) - if err != nil { - t.Fatalf("os.ReadFile(%q) error = %v", path, err) - } - var got skillspkg.HookPayload - if err := json.Unmarshal(payloadBytes, &got); err != nil { - t.Fatalf("json.Unmarshal(%q) error = %v", path, err) - } - if got != want { - t.Fatalf("hook payload = %#v, want %#v", got, want) - } -} diff --git a/internal/daemon/notifier_test.go b/internal/daemon/notifier_test.go index 67ea925f2..a36b7ba5d 100644 --- a/internal/daemon/notifier_test.go +++ b/internal/daemon/notifier_test.go @@ -2,273 +2,507 @@ package daemon import ( "context" - "encoding/json" "errors" - "os" "path/filepath" + "strings" "testing" + "time" - "github.com/pedronauck/agh/internal/acp" - aghconfig "github.com/pedronauck/agh/internal/config" + hookspkg "github.com/pedronauck/agh/internal/hooks" "github.com/pedronauck/agh/internal/session" - skillspkg "github.com/pedronauck/agh/internal/skills" + "github.com/pedronauck/agh/internal/skills" "github.com/pedronauck/agh/internal/testutil" workspacepkg "github.com/pedronauck/agh/internal/workspace" ) -func TestNotifierFanoutRunsHookPhaseAfterBuiltInNotifiers(t *testing.T) { +func TestHooksNotifierDispatchesLifecycleAgentAndStreamEvents(t *testing.T) { t.Parallel() - order := make([]string, 0, 4) - fanout := notifierFanout{ - notifiers: []session.Notifier{ - notifierFunc{ - onCreated: func(context.Context, *session.Session) { - order = append(order, "notifier-created") - }, - onStopped: func(context.Context, *session.Session) { - order = append(order, "notifier-stopped") - }, - }, + fixedNow := time.Date(2026, 4, 9, 12, 0, 0, 0, time.UTC) + var order []string + runtime := &fakeHookRuntime{ + onRebuild: func(context.Context) error { + order = append(order, "rebuild") + return nil }, - hookPhase: hookPhaseRecorder{ - onCreated: func(context.Context, *session.Session) { - order = append(order, "hook-created") - }, - onStopped: func(context.Context, *session.Session) { - order = append(order, "hook-stopped") - }, + onDispatchCreate: func(_ context.Context, payload hookspkg.SessionPostCreatePayload) error { + order = append(order, "create") + if payload.Event != hookspkg.HookSessionPostCreate { + t.Fatalf("payload.Event = %q, want %q", payload.Event, hookspkg.HookSessionPostCreate) + } + if payload.Timestamp != fixedNow { + t.Fatalf("payload.Timestamp = %s, want %s", payload.Timestamp, fixedNow) + } + if payload.SessionID != "sess-created" || payload.WorkspaceID != "ws-1" { + t.Fatalf("payload = %#v, want session metadata", payload) + } + return nil + }, + onDispatchStop: func(_ context.Context, payload hookspkg.SessionPostStopPayload) error { + order = append(order, "stop") + if payload.Event != hookspkg.HookSessionPostStop { + t.Fatalf("payload.Event = %q, want %q", payload.Event, hookspkg.HookSessionPostStop) + } + if payload.CreatedAt.IsZero() || payload.UpdatedAt.IsZero() { + t.Fatalf("payload timestamps = %#v, want created/updated timestamps", payload) + } + return nil + }, + onTurnStart: func(_ context.Context, payload hookspkg.TurnStartPayload) error { + order = append(order, "turn-start") + if payload.Event != hookspkg.HookTurnStart { + t.Fatalf("payload.Event = %q, want %q", payload.Event, hookspkg.HookTurnStart) + } + return nil + }, + onTurnEnd: func(_ context.Context, payload hookspkg.TurnEndPayload) error { + order = append(order, "turn-end") + if payload.Event != hookspkg.HookTurnEnd { + t.Fatalf("payload.Event = %q, want %q", payload.Event, hookspkg.HookTurnEnd) + } + return nil + }, + onMessageStart: func(_ context.Context, payload hookspkg.MessageStartPayload) error { + order = append(order, "message-start") + if payload.Event != hookspkg.HookMessageStart { + t.Fatalf("payload.Event = %q, want %q", payload.Event, hookspkg.HookMessageStart) + } + return nil + }, + onMessageDelta: func(_ context.Context, payload hookspkg.MessageDeltaPayload) error { + order = append(order, "message-delta") + if payload.Event != hookspkg.HookMessageDelta { + t.Fatalf("payload.Event = %q, want %q", payload.Event, hookspkg.HookMessageDelta) + } + return nil + }, + onMessageEnd: func(_ context.Context, payload hookspkg.MessageEndPayload) error { + order = append(order, "message-end") + if payload.Event != hookspkg.HookMessageEnd { + t.Fatalf("payload.Event = %q, want %q", payload.Event, hookspkg.HookMessageEnd) + } + return nil + }, + onPreCompact: func(_ context.Context, payload hookspkg.ContextPreCompactPayload) error { + order = append(order, "context-pre") + if payload.Event != hookspkg.HookContextPreCompact { + t.Fatalf("payload.Event = %q, want %q", payload.Event, hookspkg.HookContextPreCompact) + } + return nil + }, + onPostCompact: func(_ context.Context, payload hookspkg.ContextPostCompactPayload) error { + order = append(order, "context-post") + if payload.Event != hookspkg.HookContextPostCompact { + t.Fatalf("payload.Event = %q, want %q", payload.Event, hookspkg.HookContextPostCompact) + } + return nil + }, + onAgentEvent: func(context.Context, string, any) { + order = append(order, "hook-agent") }, } + agentEvents := &recordingNotifier{} + notifier := newHooksNotifier(discardLogger(), func() time.Time { return fixedNow }) + notifier.setRuntime(runtime, agentEvents) + + sess := &session.Session{ + ID: "sess-created", + Name: "demo", + AgentName: "codex", + WorkspaceID: "ws-1", + Workspace: "/tmp/ws-1", + Type: session.SessionTypeUser, + State: session.StateActive, + CreatedAt: fixedNow.Add(-time.Minute), + UpdatedAt: fixedNow, + } - fanout.OnSessionCreated(testutil.Context(t), &session.Session{ID: "sess-created"}) - fanout.OnSessionStopped(testutil.Context(t), &session.Session{ID: "sess-stopped"}) - - want := []string{"notifier-created", "hook-created", "notifier-stopped", "hook-stopped"} - if !testutil.EqualStringSlices(order, want) { - t.Fatalf("fanout order = %#v, want %#v", order, want) + if _, err := notifier.DispatchSessionPostCreate(testutil.Context(t), hookspkg.SessionPostCreatePayload(hookSessionLifecyclePayload(sess, hookspkg.HookSessionPostCreate, fixedNow))); err != nil { + t.Fatalf("DispatchSessionPostCreate() error = %v", err) + } + if _, err := notifier.DispatchSessionPostStop(testutil.Context(t), hookspkg.SessionPostStopPayload(hookSessionLifecyclePayload(sess, hookspkg.HookSessionPostStop, fixedNow))); err != nil { + t.Fatalf("DispatchSessionPostStop() error = %v", err) + } + if _, err := notifier.DispatchTurnStart(testutil.Context(t), hookspkg.TurnStartPayload{ + PayloadBase: hookspkg.PayloadBase{Event: hookspkg.HookTurnStart, Timestamp: fixedNow}, + SessionContext: hookspkg.SessionContext{SessionID: "sess-created"}, + TurnContext: hookspkg.TurnContext{TurnID: "turn-1"}, + }); err != nil { + t.Fatalf("DispatchTurnStart() error = %v", err) + } + if _, err := notifier.DispatchMessageStart(testutil.Context(t), hookspkg.MessageStartPayload{ + PayloadBase: hookspkg.PayloadBase{Event: hookspkg.HookMessageStart, Timestamp: fixedNow}, + SessionContext: hookspkg.SessionContext{SessionID: "sess-created"}, + TurnContext: hookspkg.TurnContext{TurnID: "turn-1"}, + MessageID: "msg-1", + }); err != nil { + t.Fatalf("DispatchMessageStart() error = %v", err) + } + if _, err := notifier.DispatchMessageDelta(testutil.Context(t), hookspkg.MessageDeltaPayload{ + PayloadBase: hookspkg.PayloadBase{Event: hookspkg.HookMessageDelta, Timestamp: fixedNow}, + SessionContext: hookspkg.SessionContext{SessionID: "sess-created"}, + TurnContext: hookspkg.TurnContext{TurnID: "turn-1"}, + MessageID: "msg-1", + }); err != nil { + t.Fatalf("DispatchMessageDelta() error = %v", err) + } + if _, err := notifier.DispatchMessageEnd(testutil.Context(t), hookspkg.MessageEndPayload{ + PayloadBase: hookspkg.PayloadBase{Event: hookspkg.HookMessageEnd, Timestamp: fixedNow}, + SessionContext: hookspkg.SessionContext{SessionID: "sess-created"}, + TurnContext: hookspkg.TurnContext{TurnID: "turn-1"}, + MessageID: "msg-1", + }); err != nil { + t.Fatalf("DispatchMessageEnd() error = %v", err) + } + if _, err := notifier.DispatchTurnEnd(testutil.Context(t), hookspkg.TurnEndPayload{ + PayloadBase: hookspkg.PayloadBase{Event: hookspkg.HookTurnEnd, Timestamp: fixedNow}, + SessionContext: hookspkg.SessionContext{SessionID: "sess-created"}, + TurnContext: hookspkg.TurnContext{TurnID: "turn-1"}, + }); err != nil { + t.Fatalf("DispatchTurnEnd() error = %v", err) + } + if _, err := notifier.DispatchContextPreCompact(testutil.Context(t), hookspkg.ContextPreCompactPayload{ + PayloadBase: hookspkg.PayloadBase{Event: hookspkg.HookContextPreCompact, Timestamp: fixedNow}, + SessionContext: hookspkg.SessionContext{SessionID: "sess-created"}, + TurnContext: hookspkg.TurnContext{TurnID: "turn-1"}, + }); err != nil { + t.Fatalf("DispatchContextPreCompact() error = %v", err) + } + if _, err := notifier.DispatchContextPostCompact(testutil.Context(t), hookspkg.ContextPostCompactPayload{ + PayloadBase: hookspkg.PayloadBase{Event: hookspkg.HookContextPostCompact, Timestamp: fixedNow}, + SessionContext: hookspkg.SessionContext{SessionID: "sess-created"}, + TurnContext: hookspkg.TurnContext{TurnID: "turn-1"}, + }); err != nil { + t.Fatalf("DispatchContextPostCompact() error = %v", err) + } + notifier.OnAgentEvent(testutil.Context(t), "sess-created", struct{ Type string }{Type: "done"}) + + wantOrder := []string{ + "rebuild", + "create", + "rebuild", + "stop", + "turn-start", + "message-start", + "message-delta", + "message-end", + "turn-end", + "context-pre", + "context-post", + "hook-agent", + } + if !testutil.EqualStringSlices(order, wantOrder) { + t.Fatalf("dispatch order = %#v, want %#v", order, wantOrder) + } + if got, want := agentEvents.events, []string{"agent"}; !testutil.EqualStringSlices(got, want) { + t.Fatalf("agent event notifier events = %#v, want %#v", got, want) } } -func TestSkillsHookDispatcherUsesResolvedWorkspaceForLookupAndPayload(t *testing.T) { +func TestDaemonNativeHooksDriveObserverAndDreamCallbacks(t *testing.T) { t.Parallel() - workDir := t.TempDir() - rootDir := filepath.Join(workDir, "workspace") - if err := os.MkdirAll(rootDir, 0o755); err != nil { - t.Fatalf("os.MkdirAll(workspace) error = %v", err) - } - - scriptPath := writeHookScript(t, workDir, "capture.sh", "#!/bin/sh\ncat > \"$1\"\n") - outputPath := filepath.Join(workDir, "created.json") - - registry := &hookDispatcherRegistry{ - skills: []*skillspkg.Skill{ - { - Source: skillspkg.SourceWorkspace, - Meta: skillspkg.SkillMeta{Name: "hook-skill"}, - Hooks: []skillspkg.HookDecl{ - { - Event: skillspkg.HookSessionCreated, - Command: scriptPath, - Args: []string{outputPath}, - }, - }, - }, - }, - } - resolver := &hookDispatcherWorkspaceResolver{ - resolved: workspacepkg.ResolvedWorkspace{ - Workspace: workspacepkg.Workspace{ - ID: "ws-1", - RootDir: rootDir, - Name: "workspace", - }, - }, + observer := &spyLifecycleObserver{} + dream := &spyDreamRuntime{} + decls, executors := daemonNativeHooks(observer, dream) + hooks := hookspkg.NewHooks( + hookspkg.WithLogger(discardLogger()), + hookspkg.WithNativeDeclarations(decls), + hookspkg.WithExecutorResolver(daemonExecutorResolver(executors)), + ) + t.Cleanup(hooks.Close) + + if err := hooks.Rebuild(testutil.Context(t)); err != nil { + t.Fatalf("Rebuild() error = %v", err) } - dispatcher := newSkillsHookDispatcher(registry, skillspkg.NewHookRunner(aghconfig.SkillsConfig{}, discardLogger()), resolver, discardLogger()) - dispatcher.OnSessionCreated(testutil.Context(t), &session.Session{ - ID: "sess-1", - AgentName: "coder", + fixedNow := time.Date(2026, 4, 9, 15, 0, 0, 0, time.UTC) + sess := &session.Session{ + ID: "sess-user", + Name: "demo", + AgentName: "codex", WorkspaceID: "ws-1", - Workspace: filepath.Join(workDir, "non-canonical"), - }) + Workspace: "/tmp/ws-1", + Type: session.SessionTypeUser, + State: session.StateStopped, + CreatedAt: fixedNow.Add(-time.Hour), + UpdatedAt: fixedNow, + } - if got := resolver.callCount(); got != 1 { - t.Fatalf("workspace resolver call count = %d, want 1", got) + if _, err := hooks.DispatchSessionPostCreate(testutil.Context(t), hookSessionLifecyclePayload(sess, hookspkg.HookSessionPostCreate, fixedNow)); err != nil { + t.Fatalf("DispatchSessionPostCreate() error = %v", err) } - if got := resolver.call(0); got != "ws-1" { - t.Fatalf("workspace resolver call = %q, want %q", got, "ws-1") + if _, err := hooks.DispatchSessionPostStop(testutil.Context(t), hookSessionLifecyclePayload(sess, hookspkg.HookSessionPostStop, fixedNow)); err != nil { + t.Fatalf("DispatchSessionPostStop() error = %v", err) } - if got := registry.callCount(); got != 1 { - t.Fatalf("registry call count = %d, want 1", got) + + if got := len(observer.created); got != 1 { + t.Fatalf("len(observer.created) = %d, want 1", got) } - if got := registry.call(0).RootDir; got != rootDir { - t.Fatalf("registry workspace root = %q, want %q", got, rootDir) + if got := len(observer.stopped); got != 1 { + t.Fatalf("len(observer.stopped) = %d, want 1", got) } - - payloadBytes, err := os.ReadFile(outputPath) - if err != nil { - t.Fatalf("os.ReadFile(%q) error = %v", outputPath, err) + if observer.created[0].Info().CreatedAt != sess.CreatedAt { + t.Fatalf("observer created CreatedAt = %s, want %s", observer.created[0].Info().CreatedAt, sess.CreatedAt) } - var payload skillspkg.HookPayload - if err := json.Unmarshal(payloadBytes, &payload); err != nil { - t.Fatalf("json.Unmarshal(payload) error = %v", err) + if got, want := dream.calls, []string{"session_stop:ws-1"}; !testutil.EqualStringSlices(got, want) { + t.Fatalf("dream calls = %#v, want %#v", got, want) } - if payload.SessionID != "sess-1" { - t.Fatalf("payload.SessionID = %q, want %q", payload.SessionID, "sess-1") +} + +func TestMarketplaceHookAllowedHonorsConsentKeys(t *testing.T) { + t.Parallel() + + marketplaceSkill := marketplaceSkillForTest("registry.example", "@registry/hook-a", "hash-123") + if marketplaceHookAllowed(marketplaceSkill, nil) { + t.Fatal("marketplaceHookAllowed() = true, want false without consent") } - if payload.AgentName != "coder" { - t.Fatalf("payload.AgentName = %q, want %q", payload.AgentName, "coder") + + allowed := marketplaceHookAllowlist([]string{"@registry/hook-a"}) + if !marketplaceHookAllowed(marketplaceSkill, allowed) { + t.Fatal("marketplaceHookAllowed() = false, want true for allowed slug") } - if payload.Workspace != rootDir { - t.Fatalf("payload.Workspace = %q, want %q", payload.Workspace, rootDir) + + allowed = marketplaceHookAllowlist([]string{"registry.example:@registry/hook-a"}) + if !marketplaceHookAllowed(marketplaceSkill, allowed) { + t.Fatal("marketplaceHookAllowed() = false, want true for allowed registry slug") } - if payload.Event != string(skillspkg.HookSessionCreated) { - t.Fatalf("payload.Event = %q, want %q", payload.Event, skillspkg.HookSessionCreated) + + allowed = marketplaceHookAllowlist([]string{"hash-123"}) + if !marketplaceHookAllowed(marketplaceSkill, allowed) { + t.Fatal("marketplaceHookAllowed() = false, want true for allowed hash") } } -func TestSkillsHookDispatcherSkipsUntrustedMarketplaceHooks(t *testing.T) { +func TestHooksBridgeHelperCloningAndTimestamp(t *testing.T) { t.Parallel() - workDir := t.TempDir() - rootDir := filepath.Join(workDir, "workspace") - if err := os.MkdirAll(rootDir, 0o755); err != nil { - t.Fatalf("os.MkdirAll(workspace) error = %v", err) - } - - scriptPath := writeHookScript(t, workDir, "capture.sh", "#!/bin/sh\ncat > \"$1\"\n") - outputPath := filepath.Join(workDir, "blocked.json") - - registry := &hookDispatcherRegistry{ - skills: []*skillspkg.Skill{ - { - Source: skillspkg.SourceMarketplace, - Meta: skillspkg.SkillMeta{Name: "marketplace-skill"}, - Hooks: []skillspkg.HookDecl{ - { - Event: skillspkg.HookSessionCreated, - Command: scriptPath, - Args: []string{outputPath}, - }, - }, - }, - }, + notifier := newHooksNotifier(discardLogger(), nil) + before := time.Now().UTC().Add(-time.Second) + got := notifier.timestamp() + after := time.Now().UTC().Add(time.Second) + if got.Before(before) || got.After(after) { + t.Fatalf("timestamp() = %s, want current time between %s and %s", got, before, after) } - resolver := &hookDispatcherWorkspaceResolver{ - resolved: workspacepkg.ResolvedWorkspace{ - Workspace: workspacepkg.Workspace{ - ID: "ws-1", - RootDir: rootDir, - Name: "workspace", - }, + + readOnly := true + original := hookspkg.HookDecl{ + Name: "config-hook", + Source: hookspkg.HookSourceConfig, + Args: []string{"one"}, + Env: map[string]string{"KEY": "value"}, + Metadata: map[string]string{"note": "keep"}, + Matcher: hookspkg.HookMatcher{ + ToolReadOnly: &readOnly, }, } - dispatcher := newSkillsHookDispatcher(registry, skillspkg.NewHookRunner(aghconfig.SkillsConfig{}, discardLogger()), resolver, discardLogger()) - dispatcher.OnSessionCreated(testutil.Context(t), &session.Session{ - ID: "sess-1", - AgentName: "coder", - WorkspaceID: "ws-1", - }) + filtered := filterHookDeclsBySource([]hookspkg.HookDecl{original}, hookspkg.HookSourceConfig) + if len(filtered) != 1 { + t.Fatalf("len(filtered) = %d, want 1", len(filtered)) + } + filtered[0].Args[0] = "changed" + filtered[0].Env["KEY"] = "changed" + filtered[0].Metadata["note"] = "changed" + *filtered[0].Matcher.ToolReadOnly = false - if _, err := os.Stat(outputPath); !errors.Is(err, os.ErrNotExist) { - t.Fatalf("os.Stat(%q) error = %v, want os.ErrNotExist", outputPath, err) + if original.Args[0] != "one" { + t.Fatalf("original.Args = %#v, want unchanged", original.Args) + } + if original.Env["KEY"] != "value" { + t.Fatalf("original.Env = %#v, want unchanged", original.Env) + } + if original.Metadata["note"] != "keep" { + t.Fatalf("original.Metadata = %#v, want unchanged", original.Metadata) + } + if !*original.Matcher.ToolReadOnly { + t.Fatal("original matcher ToolReadOnly was mutated") } -} -type notifierFunc struct { - onCreated func(context.Context, *session.Session) - onStopped func(context.Context, *session.Session) -} + resolved := workspaceResolvedForTest("ws-1", "/tmp/ws-1") + scoped := scopeWorkspaceHookDecls([]hookspkg.HookDecl{original}, resolved) + if len(scoped) != 1 { + t.Fatalf("len(scoped) = %d, want 1", len(scoped)) + } + if scoped[0].Matcher.WorkspaceID != resolved.ID { + t.Fatalf("scoped WorkspaceID = %q, want %q", scoped[0].Matcher.WorkspaceID, resolved.ID) + } + if scoped[0].Matcher.WorkspaceRoot != resolved.RootDir { + t.Fatalf("scoped WorkspaceRoot = %q, want %q", scoped[0].Matcher.WorkspaceRoot, resolved.RootDir) + } + if original.Matcher.WorkspaceID != "" || original.Matcher.WorkspaceRoot != "" { + t.Fatalf("original matcher workspace fields were mutated: %#v", original.Matcher) + } -func (n notifierFunc) OnSessionCreated(ctx context.Context, sess *session.Session) { - if n.onCreated != nil { - n.onCreated(ctx, sess) + if got := cloneStringMap(nil); got != nil { + t.Fatalf("cloneStringMap(nil) = %#v, want nil", got) } } -func (n notifierFunc) OnSessionStopped(ctx context.Context, sess *session.Session) { - if n.onStopped != nil { - n.onStopped(ctx, sess) +func TestDispatchRuntimeAndExecutorResolvers(t *testing.T) { + t.Parallel() + + notifier := newHooksNotifier(discardLogger(), func() time.Time { return time.Date(2026, 4, 9, 16, 0, 0, 0, time.UTC) }) + payload, err := dispatchRuntime(notifier, nil, hookspkg.HookSessionPostCreate, "seed", false, func(_ hookRuntime, _ context.Context, item string) (string, error) { + return item + "-unused", nil + }) + if err != nil { + t.Fatalf("dispatchRuntime(nil runtime) error = %v, want nil", err) + } + if payload != "seed" { + t.Fatalf("dispatchRuntime(nil runtime) payload = %q, want %q", payload, "seed") } -} -func (n notifierFunc) OnAgentEvent(context.Context, string, acp.AgentEvent) {} + var rebuildCalls int + runtime := &fakeHookRuntime{ + onRebuild: func(context.Context) error { + rebuildCalls++ + return errors.New("rebuild failed") + }, + } + notifier.setRuntime(runtime, nil) -type hookPhaseRecorder struct { - onCreated func(context.Context, *session.Session) - onStopped func(context.Context, *session.Session) -} + result, err := dispatchRuntime(notifier, context.Background(), hookspkg.HookEventPreRecord, "seed", false, func(_ hookRuntime, _ context.Context, item string) (string, error) { + return item + "-ok", nil + }) + if err != nil { + t.Fatalf("dispatchRuntime(rebuild false) error = %v, want nil", err) + } + if result != "seed-ok" { + t.Fatalf("dispatchRuntime(rebuild false) result = %q, want %q", result, "seed-ok") + } + if rebuildCalls != 0 { + t.Fatalf("rebuildCalls = %d, want 0 when rebuild=false", rebuildCalls) + } -func (h hookPhaseRecorder) OnSessionCreated(ctx context.Context, sess *session.Session) { - if h.onCreated != nil { - h.onCreated(ctx, sess) + result, err = dispatchRuntime(notifier, context.Background(), hookspkg.HookSessionPostCreate, "seed", true, func(_ hookRuntime, _ context.Context, item string) (string, error) { + return item + "-after-rebuild", nil + }) + if err != nil { + t.Fatalf("dispatchRuntime(rebuild true) error = %v, want nil", err) + } + if result != "seed-after-rebuild" { + t.Fatalf("dispatchRuntime(rebuild true) result = %q, want %q", result, "seed-after-rebuild") + } + if rebuildCalls != 1 { + t.Fatalf("rebuildCalls = %d, want 1", rebuildCalls) } -} -func (h hookPhaseRecorder) OnSessionStopped(ctx context.Context, sess *session.Session) { - if h.onStopped != nil { - h.onStopped(ctx, sess) + _, err = dispatchRuntime(notifier, nil, hookspkg.HookSessionPostCreate, "seed", false, func(_ hookRuntime, _ context.Context, item string) (string, error) { + return item, nil + }) + if err == nil || !strings.Contains(err.Error(), "requires a non-nil context") { + t.Fatalf("dispatchRuntime(nil context) error = %v, want non-nil context detail", err) } -} -type hookDispatcherRegistry struct { - skills []*skillspkg.Skill - calls []workspacepkg.ResolvedWorkspace - err error -} + workspaceRoot := t.TempDir() + subprocessExecutor, err := defaultDaemonExecutorResolver(hookspkg.HookDecl{ + Name: "subprocess", + ExecutorKind: hookspkg.HookExecutorSubprocess, + Command: "/bin/sh", + Args: []string{"-c", "printf '%s|' \"$HOOK_SCOPE_ENV\"; pwd"}, + Env: map[string]string{"HOOK_SCOPE_ENV": "kept"}, + Matcher: hookspkg.HookMatcher{WorkspaceRoot: workspaceRoot}, + }) + if err != nil { + t.Fatalf("defaultDaemonExecutorResolver(subprocess) error = %v, want nil", err) + } + if subprocessExecutor.Kind() != hookspkg.HookExecutorSubprocess { + t.Fatalf("subprocess executor kind = %q, want %q", subprocessExecutor.Kind(), hookspkg.HookExecutorSubprocess) + } + output, err := subprocessExecutor.Execute(t.Context(), hookspkg.RegisteredHook{Name: "subprocess"}, nil) + if err != nil { + t.Fatalf("subprocess executor.Execute() error = %v, want nil", err) + } + resolvedWorkspaceRoot, err := filepath.EvalSymlinks(workspaceRoot) + if err != nil { + t.Fatalf("EvalSymlinks(workspaceRoot) error = %v, want nil", err) + } + if got := strings.TrimSpace(string(output)); got != "kept|"+resolvedWorkspaceRoot { + t.Fatalf("subprocess executor output = %q, want %q", got, "kept|"+resolvedWorkspaceRoot) + } -func (r *hookDispatcherRegistry) ForWorkspace(_ context.Context, resolved workspacepkg.ResolvedWorkspace) ([]*skillspkg.Skill, error) { - r.calls = append(r.calls, resolved) - if r.err != nil { - return nil, r.err + wasmExecutor, err := defaultDaemonExecutorResolver(hookspkg.HookDecl{ + Name: "wasm", + ExecutorKind: hookspkg.HookExecutorWASM, + }) + if err != nil { + t.Fatalf("defaultDaemonExecutorResolver(wasm) error = %v, want nil", err) + } + if wasmExecutor.Kind() != hookspkg.HookExecutorWASM { + t.Fatalf("wasm executor kind = %q, want %q", wasmExecutor.Kind(), hookspkg.HookExecutorWASM) } - return append([]*skillspkg.Skill(nil), r.skills...), nil -} -func (r *hookDispatcherRegistry) callCount() int { - return len(r.calls) -} + if _, err := defaultDaemonExecutorResolver(hookspkg.HookDecl{ + Name: "native", + ExecutorKind: hookspkg.HookExecutorNative, + }); err == nil || !strings.Contains(err.Error(), "requires an explicit binding") { + t.Fatalf("defaultDaemonExecutorResolver(native) error = %v, want explicit binding error", err) + } -func (r *hookDispatcherRegistry) call(index int) workspacepkg.ResolvedWorkspace { - return r.calls[index] + if _, err := defaultDaemonExecutorResolver(hookspkg.HookDecl{ + Name: "unknown", + ExecutorKind: hookspkg.HookExecutorKind("mystery"), + }); err == nil || !strings.Contains(err.Error(), "unsupported executor kind") { + t.Fatalf("defaultDaemonExecutorResolver(unknown) error = %v, want unsupported kind error", err) + } + + resolver := daemonExecutorResolver(map[string]hookspkg.Executor{ + "bound": hookspkg.NewTypedNativeExecutor(func(context.Context, hookspkg.RegisteredHook, hookspkg.SessionPostCreatePayload) (hookspkg.SessionPostCreatePatch, error) { + return hookspkg.SessionPostCreatePatch{}, nil + }), + }) + nativeExecutor, err := resolver(hookspkg.HookDecl{Name: "bound", ExecutorKind: hookspkg.HookExecutorNative}) + if err != nil { + t.Fatalf("daemonExecutorResolver(bound native) error = %v, want nil", err) + } + if nativeExecutor.Kind() != hookspkg.HookExecutorNative { + t.Fatalf("native executor kind = %q, want %q", nativeExecutor.Kind(), hookspkg.HookExecutorNative) + } + + if _, err := resolver(hookspkg.HookDecl{Name: "missing", ExecutorKind: hookspkg.HookExecutorNative}); err == nil || !strings.Contains(err.Error(), "missing native hook executor") { + t.Fatalf("daemonExecutorResolver(missing native) error = %v, want missing native executor error", err) + } } -type hookDispatcherWorkspaceResolver struct { - resolved workspacepkg.ResolvedWorkspace - calls []string - err error +type spyLifecycleObserver struct { + created []*session.Session + stopped []*session.Session } -func (r *hookDispatcherWorkspaceResolver) Resolve(_ context.Context, idOrPath string) (workspacepkg.ResolvedWorkspace, error) { - r.calls = append(r.calls, idOrPath) - if r.err != nil { - return workspacepkg.ResolvedWorkspace{}, r.err - } - return r.resolved, nil +func (s *spyLifecycleObserver) OnSessionCreated(_ context.Context, sess *session.Session) { + s.created = append(s.created, sess) } -func (r *hookDispatcherWorkspaceResolver) ResolveOrRegister(context.Context, string) (workspacepkg.ResolvedWorkspace, error) { - return workspacepkg.ResolvedWorkspace{}, errors.New("unexpected ResolveOrRegister call") +func (s *spyLifecycleObserver) OnSessionStopped(_ context.Context, sess *session.Session) { + s.stopped = append(s.stopped, sess) } -func (r *hookDispatcherWorkspaceResolver) callCount() int { - return len(r.calls) +type spyDreamRuntime struct { + calls []string } -func (r *hookDispatcherWorkspaceResolver) call(index int) string { - return r.calls[index] +func (s *spyDreamRuntime) EnqueueCheck(reason string, workspaceRef string) { + s.calls = append(s.calls, reason+":"+workspaceRef) } -func writeHookScript(t *testing.T, dir string, name string, contents string) string { - t.Helper() +func marketplaceSkillForTest(registry string, slug string, hash string) *skills.Skill { + return &skills.Skill{ + Source: skills.SourceMarketplace, + Meta: skills.SkillMeta{Name: "marketplace-hook"}, + Provenance: &skills.Provenance{ + Registry: registry, + Slug: slug, + Hash: hash, + }, + } +} - path := filepath.Join(dir, name) - if err := os.WriteFile(path, []byte(contents), 0o755); err != nil { - t.Fatalf("os.WriteFile(%q) error = %v", path, err) +func workspaceResolvedForTest(id string, root string) workspacepkg.ResolvedWorkspace { + return workspacepkg.ResolvedWorkspace{ + Workspace: workspacepkg.Workspace{ + ID: id, + RootDir: root, + }, } - return path } diff --git a/internal/hooks/agent_event.go b/internal/hooks/agent_event.go new file mode 100644 index 000000000..00cd229cf --- /dev/null +++ b/internal/hooks/agent_event.go @@ -0,0 +1,7 @@ +package hooks + +import "context" + +// OnAgentEvent remains a no-op until the richer direct runtime integrations +// land in the daemon/session wiring tasks. +func (h *Hooks) OnAgentEvent(_ context.Context, _ string, _ any) {} diff --git a/internal/hooks/depth.go b/internal/hooks/depth.go new file mode 100644 index 000000000..611ef85e6 --- /dev/null +++ b/internal/hooks/depth.go @@ -0,0 +1,56 @@ +package hooks + +import ( + "context" + "errors" + "fmt" +) + +const maxDispatchDepth = 3 + +type dispatchDepthContextKey struct{} +type dispatchChainContextKey struct{} + +var ErrDispatchDepthExceeded = errors.New("hooks: dispatch depth exceeded") + +func enterDispatch(ctx context.Context, event HookEvent) (context.Context, int, error) { + depth := currentDispatchDepth(ctx) + 1 + if depth > maxDispatchDepth { + return ctx, currentDispatchDepth(ctx), fmt.Errorf( + "%w for event %q: depth %d exceeds max %d", + ErrDispatchDepthExceeded, + event, + depth, + maxDispatchDepth, + ) + } + + nextChain := append(currentDispatchChain(ctx), event) + nextCtx := context.WithValue(ctx, dispatchDepthContextKey{}, depth) + nextCtx = context.WithValue(nextCtx, dispatchChainContextKey{}, nextChain) + return nextCtx, depth, nil +} + +func currentDispatchDepth(ctx context.Context) int { + if ctx == nil { + return 0 + } + + depth, _ := ctx.Value(dispatchDepthContextKey{}).(int) + return depth +} + +func currentDispatchChain(ctx context.Context) []HookEvent { + if ctx == nil { + return nil + } + + chain, _ := ctx.Value(dispatchChainContextKey{}).([]HookEvent) + if len(chain) == 0 { + return nil + } + + cloned := make([]HookEvent, len(chain)) + copy(cloned, chain) + return cloned +} diff --git a/internal/hooks/dispatch.go b/internal/hooks/dispatch.go new file mode 100644 index 000000000..bd1b516ba --- /dev/null +++ b/internal/hooks/dispatch.go @@ -0,0 +1,868 @@ +package hooks + +import ( + "context" + "errors" + "fmt" + "time" +) + +type matcherFunc[P any] func(HookMatcher, P) bool + +type dispatchConfig[P any, R any] struct { + match matcherFunc[P] + apply func(P, R) P + denied denyDetector[R] + denyErr func(P) error + guard patchGuard[P, R] +} + +// DispatchSessionPreCreate runs the session.pre_create hook pipeline. +func (h *Hooks) DispatchSessionPreCreate(ctx context.Context, payload SessionPreCreatePayload) (SessionPreCreatePayload, error) { + return executeDispatch( + h, + ctx, + HookSessionPreCreate, + payload, + dispatchConfig[SessionPreCreatePayload, SessionCreatePatch]{ + match: matchSessionPreCreate, + apply: applySessionCreatePatch, + denied: sessionCreatePatchDenied, + denyErr: func(SessionPreCreatePayload) error { + return fmt.Errorf("hooks: event %q denied", HookSessionPreCreate) + }, + }, + ) +} + +// DispatchSessionPostCreate runs the session.post_create hook pipeline. +func (h *Hooks) DispatchSessionPostCreate(ctx context.Context, payload SessionPostCreatePayload) (SessionPostCreatePayload, error) { + return executeDispatch( + h, + ctx, + HookSessionPostCreate, + payload, + dispatchConfig[SessionPostCreatePayload, SessionPostCreatePatch]{ + match: matchSessionLifecycle, + apply: applySessionLifecyclePatch, + denied: sessionCreatePatchDenied, + }, + ) +} + +// DispatchSessionPreResume runs the session.pre_resume hook pipeline. +func (h *Hooks) DispatchSessionPreResume(ctx context.Context, payload SessionPreResumePayload) (SessionPreResumePayload, error) { + return executeDispatch( + h, + ctx, + HookSessionPreResume, + payload, + dispatchConfig[SessionPreResumePayload, SessionPreResumePatch]{ + match: matchSessionLifecycle, + apply: applySessionLifecyclePatch, + denied: sessionCreatePatchDenied, + denyErr: func(SessionPreResumePayload) error { + return fmt.Errorf("hooks: event %q denied", HookSessionPreResume) + }, + }, + ) +} + +// DispatchSessionPostResume runs the session.post_resume hook pipeline. +func (h *Hooks) DispatchSessionPostResume(ctx context.Context, payload SessionPostResumePayload) (SessionPostResumePayload, error) { + return executeDispatch( + h, + ctx, + HookSessionPostResume, + payload, + dispatchConfig[SessionPostResumePayload, SessionPostResumePatch]{ + match: matchSessionLifecycle, + apply: applySessionLifecyclePatch, + denied: sessionCreatePatchDenied, + }, + ) +} + +// DispatchSessionPreStop runs the session.pre_stop hook pipeline. +func (h *Hooks) DispatchSessionPreStop(ctx context.Context, payload SessionPreStopPayload) (SessionPreStopPayload, error) { + return executeDispatch( + h, + ctx, + HookSessionPreStop, + payload, + dispatchConfig[SessionPreStopPayload, SessionPreStopPatch]{ + match: matchSessionLifecycle, + apply: applySessionLifecyclePatch, + denied: sessionCreatePatchDenied, + denyErr: func(SessionPreStopPayload) error { + return fmt.Errorf("hooks: event %q denied", HookSessionPreStop) + }, + }, + ) +} + +// DispatchSessionPostStop runs the session.post_stop hook pipeline. +func (h *Hooks) DispatchSessionPostStop(ctx context.Context, payload SessionPostStopPayload) (SessionPostStopPayload, error) { + return executeDispatch( + h, + ctx, + HookSessionPostStop, + payload, + dispatchConfig[SessionPostStopPayload, SessionPostStopPatch]{ + match: matchSessionLifecycle, + apply: applySessionLifecyclePatch, + denied: sessionCreatePatchDenied, + }, + ) +} + +// DispatchInputPreSubmit runs the input.pre_submit hook pipeline. +func (h *Hooks) DispatchInputPreSubmit(ctx context.Context, payload InputPreSubmitPayload) (InputPreSubmitPayload, error) { + return executeDispatch( + h, + ctx, + HookInputPreSubmit, + payload, + dispatchConfig[InputPreSubmitPayload, InputPreSubmitPatch]{ + match: matchInputPreSubmit, + apply: applyInputPreSubmitPatch, + denied: inputPreSubmitPatchDenied, + denyErr: func(InputPreSubmitPayload) error { + return fmt.Errorf("hooks: event %q denied", HookInputPreSubmit) + }, + }, + ) +} + +// DispatchPromptPostAssemble runs the prompt.post_assemble hook pipeline. +func (h *Hooks) DispatchPromptPostAssemble(ctx context.Context, payload PromptPayload) (PromptPayload, error) { + return executeDispatch( + h, + ctx, + HookPromptPostAssemble, + payload, + dispatchConfig[PromptPayload, PromptPatch]{ + match: matchPrompt, + apply: applyPromptPatch, + denied: promptPatchDenied, + denyErr: func(PromptPayload) error { + return fmt.Errorf("hooks: event %q denied", HookPromptPostAssemble) + }, + }, + ) +} + +// DispatchEventPreRecord runs the event.pre_record hook dispatch. +func (h *Hooks) DispatchEventPreRecord(ctx context.Context, payload EventPreRecordPayload) (EventPreRecordPayload, error) { + return executeDispatch( + h, + ctx, + HookEventPreRecord, + payload, + dispatchConfig[EventPreRecordPayload, EventPreRecordPatch]{ + match: matchEventRecord, + apply: applyNoop[EventPreRecordPayload, EventPreRecordPatch], + }, + ) +} + +// DispatchEventPostRecord runs the event.post_record hook dispatch. +func (h *Hooks) DispatchEventPostRecord(ctx context.Context, payload EventPostRecordPayload) (EventPostRecordPayload, error) { + return executeDispatch( + h, + ctx, + HookEventPostRecord, + payload, + dispatchConfig[EventPostRecordPayload, EventPostRecordPatch]{ + match: matchEventRecord, + apply: applyNoop[EventPostRecordPayload, EventPostRecordPatch], + }, + ) +} + +// DispatchAgentPreStart runs the agent.pre_start hook pipeline. +func (h *Hooks) DispatchAgentPreStart(ctx context.Context, payload AgentPreStartPayload) (AgentPreStartPayload, error) { + return executeDispatch( + h, + ctx, + HookAgentPreStart, + payload, + dispatchConfig[AgentPreStartPayload, AgentStartPatch]{ + match: matchAgentPreStart, + apply: applyAgentStartPatch, + denied: agentStartPatchDenied, + denyErr: func(AgentPreStartPayload) error { + return fmt.Errorf("hooks: event %q denied", HookAgentPreStart) + }, + }, + ) +} + +// DispatchAgentSpawned runs the agent.spawned hook pipeline. +func (h *Hooks) DispatchAgentSpawned(ctx context.Context, payload AgentSpawnedPayload) (AgentSpawnedPayload, error) { + return executeDispatch( + h, + ctx, + HookAgentSpawned, + payload, + dispatchConfig[AgentSpawnedPayload, AgentSpawnedPatch]{ + match: matchAgentLifecycle, + apply: applyNoop[AgentSpawnedPayload, AgentSpawnedPatch], + }, + ) +} + +// DispatchAgentCrashed runs the agent.crashed hook pipeline. +func (h *Hooks) DispatchAgentCrashed(ctx context.Context, payload AgentCrashedPayload) (AgentCrashedPayload, error) { + return executeDispatch( + h, + ctx, + HookAgentCrashed, + payload, + dispatchConfig[AgentCrashedPayload, AgentCrashedPatch]{ + match: matchAgentLifecycle, + apply: applyNoop[AgentCrashedPayload, AgentCrashedPatch], + }, + ) +} + +// DispatchAgentStopped runs the agent.stopped hook pipeline. +func (h *Hooks) DispatchAgentStopped(ctx context.Context, payload AgentStoppedPayload) (AgentStoppedPayload, error) { + return executeDispatch( + h, + ctx, + HookAgentStopped, + payload, + dispatchConfig[AgentStoppedPayload, AgentStoppedPatch]{ + match: matchAgentLifecycle, + apply: applyNoop[AgentStoppedPayload, AgentStoppedPatch], + }, + ) +} + +// DispatchTurnStart runs the turn.start hook pipeline. +func (h *Hooks) DispatchTurnStart(ctx context.Context, payload TurnStartPayload) (TurnStartPayload, error) { + return executeDispatch( + h, + ctx, + HookTurnStart, + payload, + dispatchConfig[TurnStartPayload, TurnStartPatch]{ + match: matchTurn, + apply: applyNoop[TurnStartPayload, TurnStartPatch], + denied: turnPatchDenied, + }, + ) +} + +// DispatchTurnEnd runs the turn.end hook pipeline. +func (h *Hooks) DispatchTurnEnd(ctx context.Context, payload TurnEndPayload) (TurnEndPayload, error) { + return executeDispatch( + h, + ctx, + HookTurnEnd, + payload, + dispatchConfig[TurnEndPayload, TurnEndPatch]{ + match: matchTurn, + apply: applyNoop[TurnEndPayload, TurnEndPatch], + denied: turnPatchDenied, + }, + ) +} + +// DispatchMessageStart runs the message.start hook pipeline. +func (h *Hooks) DispatchMessageStart(ctx context.Context, payload MessageStartPayload) (MessageStartPayload, error) { + return executeDispatch( + h, + ctx, + HookMessageStart, + payload, + dispatchConfig[MessageStartPayload, MessageStartPatch]{ + match: matchMessage, + apply: applyMessagePatch, + denied: messagePatchDenied, + }, + ) +} + +// DispatchMessageDelta runs the message.delta hook dispatch. +func (h *Hooks) DispatchMessageDelta(ctx context.Context, payload MessageDeltaPayload) (MessageDeltaPayload, error) { + return executeDispatch( + h, + ctx, + HookMessageDelta, + payload, + dispatchConfig[MessageDeltaPayload, MessageDeltaPatch]{ + match: matchMessage, + apply: applyMessagePatch, + denied: messagePatchDenied, + }, + ) +} + +// DispatchMessageEnd runs the message.end hook pipeline. +func (h *Hooks) DispatchMessageEnd(ctx context.Context, payload MessageEndPayload) (MessageEndPayload, error) { + return executeDispatch( + h, + ctx, + HookMessageEnd, + payload, + dispatchConfig[MessageEndPayload, MessageEndPatch]{ + match: matchMessage, + apply: applyMessagePatch, + denied: messagePatchDenied, + }, + ) +} + +// DispatchToolPreCall runs the tool.pre_call hook pipeline. +func (h *Hooks) DispatchToolPreCall(ctx context.Context, payload ToolPreCallPayload) (ToolPreCallPayload, error) { + return executeDispatch( + h, + ctx, + HookToolPreCall, + payload, + dispatchConfig[ToolPreCallPayload, ToolCallPatch]{ + match: matchToolPreCall, + apply: applyToolCallPatch, + denied: toolCallPatchDenied, + }, + ) +} + +// DispatchToolPostCall runs the tool.post_call hook pipeline. +func (h *Hooks) DispatchToolPostCall(ctx context.Context, payload ToolPostCallPayload) (ToolPostCallPayload, error) { + return executeDispatch( + h, + ctx, + HookToolPostCall, + payload, + dispatchConfig[ToolPostCallPayload, ToolResultPatch]{ + match: matchToolPostCall, + apply: applyToolResultPatch, + denied: toolResultPatchDenied, + }, + ) +} + +// DispatchToolPostError runs the tool.post_error hook pipeline. +func (h *Hooks) DispatchToolPostError(ctx context.Context, payload ToolPostErrorPayload) (ToolPostErrorPayload, error) { + return executeDispatch( + h, + ctx, + HookToolPostError, + payload, + dispatchConfig[ToolPostErrorPayload, ToolPostErrorPatch]{ + match: matchToolPostError, + apply: applyToolPostErrorPatch, + denied: toolResultPatchDenied, + }, + ) +} + +// DispatchPermissionRequest runs the permission.request hook pipeline. +func (h *Hooks) DispatchPermissionRequest(ctx context.Context, payload PermissionRequestPayload) (PermissionRequestPayload, error) { + return executeDispatch( + h, + ctx, + HookPermissionRequest, + payload, + dispatchConfig[PermissionRequestPayload, PermissionRequestPatch]{ + match: matchPermissionRequest, + apply: mergePermissionRequestPatch, + denied: permissionPatchDenies, + guard: newPermissionRequestGuard(h.logger, h.metrics), + }, + ) +} + +// DispatchPermissionResolved runs the permission.resolved hook dispatch. +func (h *Hooks) DispatchPermissionResolved(ctx context.Context, payload PermissionResolvedPayload) (PermissionResolvedPayload, error) { + return executeDispatch( + h, + ctx, + HookPermissionResolved, + payload, + dispatchConfig[PermissionResolvedPayload, PermissionResolvedPatch]{ + match: matchPermissionResolution, + apply: applyNoop[PermissionResolvedPayload, PermissionResolvedPatch], + }, + ) +} + +// DispatchPermissionDenied runs the permission.denied hook dispatch. +func (h *Hooks) DispatchPermissionDenied(ctx context.Context, payload PermissionDeniedPayload) (PermissionDeniedPayload, error) { + return executeDispatch( + h, + ctx, + HookPermissionDenied, + payload, + dispatchConfig[PermissionDeniedPayload, PermissionDeniedPatch]{ + match: matchPermissionResolution, + apply: applyNoop[PermissionDeniedPayload, PermissionDeniedPatch], + }, + ) +} + +// DispatchContextPreCompact runs the context.pre_compact hook pipeline. +func (h *Hooks) DispatchContextPreCompact(ctx context.Context, payload ContextPreCompactPayload) (ContextPreCompactPayload, error) { + return executeDispatch( + h, + ctx, + HookContextPreCompact, + payload, + dispatchConfig[ContextPreCompactPayload, ContextPreCompactPatch]{ + match: matchContextCompact, + apply: applyContextCompactionPatch, + denied: contextCompactionPatchDenied, + }, + ) +} + +// DispatchContextPostCompact runs the context.post_compact hook pipeline. +func (h *Hooks) DispatchContextPostCompact(ctx context.Context, payload ContextPostCompactPayload) (ContextPostCompactPayload, error) { + return executeDispatch( + h, + ctx, + HookContextPostCompact, + payload, + dispatchConfig[ContextPostCompactPayload, ContextPostCompactPatch]{ + match: matchContextCompact, + apply: applyContextCompactionPatch, + denied: contextCompactionPatchDenied, + }, + ) +} + +func executeDispatch[P any, R any]( + h *Hooks, + ctx context.Context, + event HookEvent, + payload P, + cfg dispatchConfig[P, R], +) (P, error) { + if h == nil { + return payload, errors.New("hooks: dispatcher is nil") + } + if ctx == nil { + return payload, errors.New("hooks: dispatch context is nil") + } + + snapshot, err := h.hookSnapshot(event) + if err != nil { + return payload, err + } + + syncHooks, asyncHooks := selectMatchingHooks(snapshot, payload, cfg.match) + if len(syncHooks) == 0 && len(asyncHooks) == 0 { + return payload, nil + } + + dispatchDepth := currentDispatchDepth(ctx) + 1 + dispatchStarted := time.Now() + h.logger.Info( + "hook.dispatch.started", + "event", event.String(), + "dispatch_depth", dispatchDepth, + "sync_hooks", len(syncHooks), + "async_hooks", len(asyncHooks), + ) + + result := payload + var dispatchErr error + pipe := pipeline[P, R]{ + event: event, + hooksRuntime: h, + hooks: func(P) []*ResolvedHook { return syncHooks }, + apply: cfg.apply, + encode: encodeJSON[P], + decode: decodeJSON[R], + denied: cfg.denied, + guard: cfg.guard, + enter: h.enterDispatch, + } + var report dispatchReport + if len(syncHooks) > 0 { + result, report, dispatchErr = pipe.executeWithDisposition(ctx, payload) + if dispatchErr == nil && report.Denied && cfg.denyErr != nil { + dispatchErr = cfg.denyErr(result) + } + } + + if dispatchErr == nil && !report.Denied && len(asyncHooks) > 0 { + submitAsyncHooks(h, ctx, result, asyncHooks, pipe) + } + + pipelineDuration := time.Since(dispatchStarted) + h.metrics.observePipeline(event, pipelineDuration) + switch { + case report.Denied: + h.logger.Warn( + "hook.dispatch.blocked", + "event", event.String(), + "dispatch_depth", dispatchDepth, + "deny_source", report.DenySource, + "pipeline_trace", traceStrings(report.Trace), + ) + case dispatchErr != nil: + h.logger.Warn( + "hook.dispatch.failed", + "event", event.String(), + "dispatch_depth", dispatchDepth, + "error", dispatchErr, + "failed_hook", report.FailedHook, + "required", report.FailedRequired, + "pipeline_trace", traceStrings(report.Trace), + ) + default: + h.logger.Info( + "hook.dispatch.completed", + "event", event.String(), + "dispatch_depth", dispatchDepth, + "duration_ms", pipelineDuration.Milliseconds(), + "pipeline_trace", traceStrings(report.Trace), + "sync_hooks", len(syncHooks), + "async_hooks", len(asyncHooks), + ) + } + + return result, dispatchErr +} + +func selectMatchingHooks[P any]( + snapshot []*ResolvedHook, + payload P, + match matcherFunc[P], +) ([]*ResolvedHook, []*ResolvedHook) { + syncHooks := make([]*ResolvedHook, 0, len(snapshot)) + asyncHooks := make([]*ResolvedHook, 0, len(snapshot)) + + for _, hook := range snapshot { + if hook == nil { + continue + } + if match != nil && !match(hook.Matcher, payload) { + continue + } + switch hook.Mode { + case HookModeAsync: + asyncHooks = append(asyncHooks, hook) + case HookModeSync: + syncHooks = append(syncHooks, hook) + } + } + + return syncHooks, asyncHooks +} + +func submitAsyncHooks[P any, R any](h *Hooks, parent context.Context, payload P, hooks []*ResolvedHook, pipe pipeline[P, R]) { + if h == nil || h.pool == nil { + return + } + + parentDepth := currentDispatchDepth(parent) + for _, hook := range hooks { + if hook == nil { + continue + } + + asyncHook := *hook + asyncPayload := payload + h.pool.Submit(asyncTask{ + hook: asyncHook.RegisteredHook, + run: func(poolCtx context.Context) { + baseCtx := context.WithValue(poolCtx, dispatchDepthContextKey{}, parentDepth) + baseCtx = context.WithValue(baseCtx, dispatchChainContextKey{}, currentDispatchChain(parent)) + hookCtx, depth, err := h.enterDispatch(baseCtx, asyncHook.Event) + if err != nil { + h.emitHookRun(poolCtx, asyncPayload, asyncHook.RegisteredHook, HookRunOutcomeSkipped, 0, nil, err, parentDepth) + return + } + + cancel := func() {} + if asyncHook.Timeout > 0 { + hookCtx, cancel = context.WithTimeout(hookCtx, asyncHook.Timeout) + } + defer cancel() + + started := time.Now() + _, rawPatch, err := pipe.runHook(hookCtx, asyncHook.RegisteredHook, asyncPayload) + duration := time.Since(started) + if err != nil { + h.emitHookRun(hookCtx, asyncPayload, asyncHook.RegisteredHook, HookRunOutcomeFailed, duration, rawPatch, err, depth) + h.logger.WarnContext( + hookCtx, + "hook.dispatch.async_failed", + "hook", asyncHook.Name, + "event", asyncHook.Event.String(), + "source", asyncHook.Source.String(), + "error", err, + ) + return + } + h.emitHookRun(hookCtx, asyncPayload, asyncHook.RegisteredHook, HookRunOutcomeApplied, duration, rawPatch, nil, depth) + }, + }) + } +} + +func applyNoop[P any, R any](payload P, _ R) P { + return payload +} + +func matchSessionPreCreate(matcher HookMatcher, payload SessionPreCreatePayload) bool { + return matcher.MatchesSession(payload.SessionContext) +} + +func matchSessionLifecycle(matcher HookMatcher, payload SessionLifecyclePayload) bool { + return matcher.MatchesSession(payload.SessionContext) +} + +func matchInputPreSubmit(matcher HookMatcher, payload InputPreSubmitPayload) bool { + return matcher.MatchesInput(payload) +} + +func matchPrompt(matcher HookMatcher, payload PromptPayload) bool { + return matcher.MatchesPrompt(payload) +} + +func matchEventRecord(matcher HookMatcher, payload EventRecordPayload) bool { + return matcher.MatchesEvent(payload) +} + +func matchAgentPreStart(matcher HookMatcher, payload AgentPreStartPayload) bool { + return matcher.MatchesAgentPreStart(payload) +} + +func matchAgentLifecycle(matcher HookMatcher, payload AgentLifecyclePayload) bool { + return matcher.MatchesAgentLifecycle(payload) +} + +func matchTurn(matcher HookMatcher, payload TurnPayload) bool { + return matcher.MatchesTurn(payload) +} + +func matchMessage(matcher HookMatcher, payload MessagePayload) bool { + return matcher.MatchesMessage(payload) +} + +func matchToolPreCall(matcher HookMatcher, payload ToolPreCallPayload) bool { + return matcher.MatchesToolPreCall(payload) +} + +func matchToolPostCall(matcher HookMatcher, payload ToolPostCallPayload) bool { + return matcher.MatchesToolPostCall(payload) +} + +func matchToolPostError(matcher HookMatcher, payload ToolPostErrorPayload) bool { + return matcher.MatchesToolPostError(payload) +} + +func matchPermissionRequest(matcher HookMatcher, payload PermissionRequestPayload) bool { + return matcher.MatchesPermissionRequest(payload) +} + +func matchPermissionResolution(matcher HookMatcher, payload PermissionResolutionPayload) bool { + return matcher.MatchesPermissionResolution(payload) +} + +func matchContextCompact(matcher HookMatcher, payload ContextCompactPayload) bool { + return matcher.MatchesContextCompact(payload) +} + +func applySessionContextPatch(payload SessionContext, patch SessionCreatePatch) SessionContext { + if patch.SessionName != nil { + payload.SessionName = *patch.SessionName + } + if patch.SessionType != nil { + payload.SessionType = *patch.SessionType + } + if patch.AgentName != nil { + payload.AgentName = *patch.AgentName + } + if patch.WorkspaceID != nil { + payload.WorkspaceID = *patch.WorkspaceID + } + if patch.Workspace != nil { + payload.Workspace = *patch.Workspace + } + return payload +} + +func applySessionCreatePatch(payload SessionPreCreatePayload, patch SessionCreatePatch) SessionPreCreatePayload { + payload.SessionContext = applySessionContextPatch(payload.SessionContext, patch) + return payload +} + +func applySessionLifecyclePatch(payload SessionLifecyclePayload, patch SessionCreatePatch) SessionLifecyclePayload { + payload.SessionContext = applySessionContextPatch(payload.SessionContext, patch) + return payload +} + +func applyInputPreSubmitPatch(payload InputPreSubmitPayload, patch InputPreSubmitPatch) InputPreSubmitPayload { + if patch.Message != nil { + payload.Message = *patch.Message + } + if patch.ContextBlocks != nil { + payload.ContextBlocks = cloneContextBlocks(patch.ContextBlocks) + } + return payload +} + +func applyPromptPatch(payload PromptPayload, patch PromptPatch) PromptPayload { + if patch.Prompt != nil { + payload.Prompt = *patch.Prompt + } + if patch.ContextBlocks != nil { + payload.ContextBlocks = cloneContextBlocks(patch.ContextBlocks) + } + return payload +} + +func applyAgentStartPatch(payload AgentPreStartPayload, patch AgentStartPatch) AgentPreStartPayload { + if patch.Command != nil { + payload.Command = *patch.Command + } + if patch.Args != nil { + payload.Args = append([]string(nil), patch.Args...) + } + if patch.Cwd != nil { + payload.Cwd = *patch.Cwd + } + return payload +} + +func applyMessagePatch(payload MessagePayload, patch MessagePatch) MessagePayload { + if patch.Role != nil { + payload.Role = *patch.Role + } + if patch.DeltaType != nil { + payload.DeltaType = *patch.DeltaType + } + if patch.Text != nil { + payload.Text = *patch.Text + } + return payload +} + +func applyToolCallPatch(payload ToolPreCallPayload, patch ToolCallPatch) ToolPreCallPayload { + if patch.ToolName != nil { + payload.ToolName = *patch.ToolName + } + if patch.ToolNamespace != nil { + payload.ToolNamespace = *patch.ToolNamespace + } + if patch.ReadOnly != nil { + payload.ReadOnly = *patch.ReadOnly + } + if patch.ToolInput != nil { + payload.ToolInput = cloneRawMessage(patch.ToolInput) + } + return payload +} + +func applyToolResultPatch(payload ToolPostCallPayload, patch ToolResultPatch) ToolPostCallPayload { + if patch.Title != nil { + payload.Title = *patch.Title + } + if patch.ToolResult != nil { + payload.ToolResult = cloneRawMessage(patch.ToolResult) + } + return payload +} + +func applyToolPostErrorPatch(payload ToolPostErrorPayload, patch ToolPostErrorPatch) ToolPostErrorPayload { + if patch.Title != nil { + payload.Title = *patch.Title + } + if patch.Error != nil { + payload.Error = *patch.Error + } + return payload +} + +func mergePermissionRequestPatch(payload PermissionRequestPayload, patch PermissionRequestPatch) PermissionRequestPayload { + if patch.Decision != nil { + payload.Decision = *patch.Decision + } + if patch.Deny { + payload.Decision = "deny" + } + if patch.DecisionClass != nil { + payload.DecisionClass = *patch.DecisionClass + } + return payload +} + +func applyContextCompactionPatch(payload ContextCompactPayload, patch ContextCompactionPatch) ContextCompactPayload { + if patch.Reason != nil { + payload.Reason = *patch.Reason + } + if patch.Strategy != nil { + payload.Strategy = *patch.Strategy + } + if patch.ContextBlocks != nil { + payload.ContextBlocks = cloneContextBlocks(patch.ContextBlocks) + } + return payload +} + +func cloneContextBlocks(blocks []ContextBlock) []ContextBlock { + if blocks == nil { + return nil + } + + cloned := make([]ContextBlock, 0, len(blocks)) + for _, block := range blocks { + cloned = append(cloned, ContextBlock{ + Kind: block.Kind, + Text: block.Text, + Metadata: cloneStringMap(block.Metadata), + }) + } + return cloned +} + +func cloneRawMessage(payload []byte) []byte { + if payload == nil { + return nil + } + + return append([]byte(nil), payload...) +} + +func sessionCreatePatchDenied(patch SessionCreatePatch) bool { + return patch.Deny +} + +func inputPreSubmitPatchDenied(patch InputPreSubmitPatch) bool { + return patch.Deny +} + +func promptPatchDenied(patch PromptPatch) bool { + return patch.Deny +} + +func agentStartPatchDenied(patch AgentStartPatch) bool { + return patch.Deny +} + +func turnPatchDenied(patch TurnPatch) bool { + return patch.Deny +} + +func messagePatchDenied(patch MessagePatch) bool { + return patch.Deny +} + +func toolCallPatchDenied(patch ToolCallPatch) bool { + return patch.Deny +} + +func toolResultPatchDenied(patch ToolResultPatch) bool { + return patch.Deny +} + +func contextCompactionPatchDenied(patch ContextCompactionPatch) bool { + return patch.Deny +} diff --git a/internal/hooks/dispatch_integration_test.go b/internal/hooks/dispatch_integration_test.go new file mode 100644 index 000000000..dea993f91 --- /dev/null +++ b/internal/hooks/dispatch_integration_test.go @@ -0,0 +1,89 @@ +//go:build integration + +package hooks + +import ( + "context" + "testing" +) + +func TestDispatchInputPreSubmitOrdersNativeBeforeSubprocess(t *testing.T) { + hooks := newTestHooks( + t, + WithNativeDeclarations([]HookDecl{ + { + Name: "native-prefix", + Event: HookInputPreSubmit, + Mode: HookModeSync, + ExecutorKind: HookExecutorNative, + }, + }), + WithSkillDeclarations([]HookDecl{ + { + Name: "skill-shell", + Event: HookInputPreSubmit, + Mode: HookModeSync, + Command: "/bin/sh", + Args: []string{"-c", "payload=$(cat); if printf '%s' \"$payload\" | grep -q 'native'; then printf '{\"message\":\"native-shell\"}'; else printf '{\"message\":\"wrong-order\"}'; fi"}, + SkillSource: HookSkillSourceUser, + }, + }), + WithExecutorResolver(testExecutorResolver(map[string]Executor{ + "native-prefix": NewTypedNativeExecutor(func(_ context.Context, _ RegisteredHook, payload InputPreSubmitPayload) (InputPreSubmitPatch, error) { + msg := payload.Message + "native" + return InputPreSubmitPatch{Message: &msg}, nil + }), + })), + ) + + if err := hooks.Rebuild(t.Context()); err != nil { + t.Fatalf("Rebuild() error = %v, want nil", err) + } + + result, err := hooks.DispatchInputPreSubmit(t.Context(), InputPreSubmitPayload{ + PayloadBase: PayloadBase{Event: HookInputPreSubmit}, + Message: "", + }) + if err != nil { + t.Fatalf("DispatchInputPreSubmit() error = %v, want nil", err) + } + if result.Message != "native-shell" { + t.Fatalf("result.Message = %q, want %q", result.Message, "native-shell") + } +} + +func TestDispatchPermissionRequestBlocksEscalationFromSubprocess(t *testing.T) { + hooks := newTestHooks( + t, + WithSkillDeclarations([]HookDecl{{ + Name: "permission-escalation", + Event: HookPermissionRequest, + Mode: HookModeSync, + Command: "/bin/sh", + Args: []string{"-c", "printf '{\"decision\":\"allow-once\",\"decision_class\":\"patched\"}'"}, + SkillSource: HookSkillSourceUser, + }}), + ) + + if err := hooks.Rebuild(t.Context()); err != nil { + t.Fatalf("Rebuild() error = %v, want nil", err) + } + + result, err := hooks.DispatchPermissionRequest(t.Context(), PermissionRequestPayload{ + PayloadBase: PayloadBase{Event: HookPermissionRequest}, + RequestID: "req-1", + Action: "session/request_permission", + Resource: "/tmp/secret.txt", + Decision: "reject-once", + DecisionClass: "interactive", + }) + if err != nil { + t.Fatalf("DispatchPermissionRequest() error = %v, want nil", err) + } + if result.Decision != "reject-once" { + t.Fatalf("result.Decision = %q, want %q", result.Decision, "reject-once") + } + if result.DecisionClass != "interactive" { + t.Fatalf("result.DecisionClass = %q, want %q", result.DecisionClass, "interactive") + } +} diff --git a/internal/hooks/doc.go b/internal/hooks/doc.go new file mode 100644 index 000000000..64d5b709e --- /dev/null +++ b/internal/hooks/doc.go @@ -0,0 +1,3 @@ +// Package hooks defines the dependency-free core types for the AGH lifecycle +// hooks platform. +package hooks diff --git a/internal/hooks/events.go b/internal/hooks/events.go new file mode 100644 index 000000000..e02d10c66 --- /dev/null +++ b/internal/hooks/events.go @@ -0,0 +1,214 @@ +package hooks + +import "fmt" + +// HookEventFamily groups hook events into the documented taxonomy families. +type HookEventFamily string + +const ( + HookEventFamilySession HookEventFamily = "session" + HookEventFamilyInput HookEventFamily = "input" + HookEventFamilyPrompt HookEventFamily = "prompt" + HookEventFamilyEvent HookEventFamily = "event" + HookEventFamilyAgent HookEventFamily = "agent" + HookEventFamilyTurn HookEventFamily = "turn" + HookEventFamilyMessage HookEventFamily = "message" + HookEventFamilyTool HookEventFamily = "tool" + HookEventFamilyPermission HookEventFamily = "permission" + HookEventFamilyContext HookEventFamily = "context" +) + +// Validate ensures the event family is part of the supported taxonomy. +func (f HookEventFamily) Validate() error { + switch f { + case HookEventFamilySession, + HookEventFamilyInput, + HookEventFamilyPrompt, + HookEventFamilyEvent, + HookEventFamilyAgent, + HookEventFamilyTurn, + HookEventFamilyMessage, + HookEventFamilyTool, + HookEventFamilyPermission, + HookEventFamilyContext: + return nil + default: + return fmt.Errorf("hooks: invalid hook event family %q", f) + } +} + +// HookEvent identifies when a hook fires. +type HookEvent string + +const ( + HookSessionPreCreate HookEvent = "session.pre_create" + HookSessionPostCreate HookEvent = "session.post_create" + HookSessionPreResume HookEvent = "session.pre_resume" + HookSessionPostResume HookEvent = "session.post_resume" + HookSessionPreStop HookEvent = "session.pre_stop" + HookSessionPostStop HookEvent = "session.post_stop" + + HookInputPreSubmit HookEvent = "input.pre_submit" + + HookPromptPostAssemble HookEvent = "prompt.post_assemble" + + HookEventPreRecord HookEvent = "event.pre_record" + HookEventPostRecord HookEvent = "event.post_record" + + HookAgentPreStart HookEvent = "agent.pre_start" + HookAgentSpawned HookEvent = "agent.spawned" + HookAgentCrashed HookEvent = "agent.crashed" + HookAgentStopped HookEvent = "agent.stopped" + + HookTurnStart HookEvent = "turn.start" + HookTurnEnd HookEvent = "turn.end" + + HookMessageStart HookEvent = "message.start" + HookMessageDelta HookEvent = "message.delta" + HookMessageEnd HookEvent = "message.end" + + HookToolPreCall HookEvent = "tool.pre_call" + HookToolPostCall HookEvent = "tool.post_call" + HookToolPostError HookEvent = "tool.post_error" + + HookPermissionRequest HookEvent = "permission.request" + HookPermissionResolved HookEvent = "permission.resolved" + HookPermissionDenied HookEvent = "permission.denied" + + HookContextPreCompact HookEvent = "context.pre_compact" + HookContextPostCompact HookEvent = "context.post_compact" +) + +type hookEventSpec struct { + family HookEventFamily + syncEligible bool +} + +var hookEventSpecs = map[HookEvent]hookEventSpec{ + HookSessionPreCreate: {family: HookEventFamilySession, syncEligible: true}, + HookSessionPostCreate: {family: HookEventFamilySession, syncEligible: true}, + HookSessionPreResume: {family: HookEventFamilySession, syncEligible: true}, + HookSessionPostResume: {family: HookEventFamilySession, syncEligible: true}, + HookSessionPreStop: {family: HookEventFamilySession, syncEligible: true}, + HookSessionPostStop: {family: HookEventFamilySession, syncEligible: true}, + HookInputPreSubmit: {family: HookEventFamilyInput, syncEligible: true}, + HookPromptPostAssemble: { + family: HookEventFamilyPrompt, + syncEligible: true, + }, + HookEventPreRecord: {family: HookEventFamilyEvent, syncEligible: false}, + HookEventPostRecord: {family: HookEventFamilyEvent, syncEligible: false}, + HookAgentPreStart: {family: HookEventFamilyAgent, syncEligible: true}, + HookAgentSpawned: {family: HookEventFamilyAgent, syncEligible: true}, + HookAgentCrashed: {family: HookEventFamilyAgent, syncEligible: true}, + HookAgentStopped: {family: HookEventFamilyAgent, syncEligible: true}, + HookTurnStart: {family: HookEventFamilyTurn, syncEligible: true}, + HookTurnEnd: {family: HookEventFamilyTurn, syncEligible: true}, + HookMessageStart: {family: HookEventFamilyMessage, syncEligible: true}, + HookMessageDelta: {family: HookEventFamilyMessage, syncEligible: false}, + HookMessageEnd: {family: HookEventFamilyMessage, syncEligible: true}, + HookToolPreCall: {family: HookEventFamilyTool, syncEligible: true}, + HookToolPostCall: {family: HookEventFamilyTool, syncEligible: true}, + HookToolPostError: {family: HookEventFamilyTool, syncEligible: true}, + HookPermissionRequest: { + family: HookEventFamilyPermission, + syncEligible: true, + }, + HookPermissionResolved: { + family: HookEventFamilyPermission, + syncEligible: false, + }, + HookPermissionDenied: { + family: HookEventFamilyPermission, + syncEligible: false, + }, + HookContextPreCompact: {family: HookEventFamilyContext, syncEligible: true}, + HookContextPostCompact: {family: HookEventFamilyContext, syncEligible: true}, +} + +var allHookEvents = []HookEvent{ + HookSessionPreCreate, + HookSessionPostCreate, + HookSessionPreResume, + HookSessionPostResume, + HookSessionPreStop, + HookSessionPostStop, + HookInputPreSubmit, + HookPromptPostAssemble, + HookEventPreRecord, + HookEventPostRecord, + HookAgentPreStart, + HookAgentSpawned, + HookAgentCrashed, + HookAgentStopped, + HookTurnStart, + HookTurnEnd, + HookMessageStart, + HookMessageDelta, + HookMessageEnd, + HookToolPreCall, + HookToolPostCall, + HookToolPostError, + HookPermissionRequest, + HookPermissionResolved, + HookPermissionDenied, + HookContextPreCompact, + HookContextPostCompact, +} + +func init() { + if err := validateHookEventSpecsConsistency(); err != nil { + panic(err) + } +} + +// AllHookEvents returns the full taxonomy in deterministic order. +func AllHookEvents() []HookEvent { + events := make([]HookEvent, len(allHookEvents)) + copy(events, allHookEvents) + return events +} + +// String returns the literal hook event value. +func (e HookEvent) String() string { + return string(e) +} + +// Family reports the taxonomy family for the event. +func (e HookEvent) Family() HookEventFamily { + spec, ok := hookEventSpecs[e] + if !ok { + return "" + } + return spec.family +} + +// SyncEligible reports whether the event accepts sync hooks. +func (e HookEvent) SyncEligible() bool { + spec, ok := hookEventSpecs[e] + return ok && spec.syncEligible +} + +// Validate ensures the event is part of the supported taxonomy. +func (e HookEvent) Validate() error { + if _, ok := hookEventSpecs[e]; !ok { + return fmt.Errorf("hooks: invalid hook event %q", e) + } + return nil +} + +func validateHookEventSpecsConsistency() error { + eventsFromList := make(map[HookEvent]struct{}, len(allHookEvents)) + for _, event := range allHookEvents { + eventsFromList[event] = struct{}{} + if _, ok := hookEventSpecs[event]; !ok { + return fmt.Errorf("hooks: event %q exists in allHookEvents but is missing from hookEventSpecs", event) + } + } + for event := range hookEventSpecs { + if _, ok := eventsFromList[event]; !ok { + return fmt.Errorf("hooks: event %q exists in hookEventSpecs but is missing from allHookEvents", event) + } + } + return nil +} diff --git a/internal/hooks/events_test.go b/internal/hooks/events_test.go new file mode 100644 index 000000000..b903a3626 --- /dev/null +++ b/internal/hooks/events_test.go @@ -0,0 +1,75 @@ +package hooks + +import "testing" + +const expectedHookEventCount = 27 + +func TestAllHookEvents(t *testing.T) { + t.Parallel() + + events := AllHookEvents() + // Assert the exact count so accidental taxonomy additions/removals are caught explicitly. + if len(events) != expectedHookEventCount { + t.Fatalf("len(AllHookEvents()) = %d, want %d", len(events), expectedHookEventCount) + } + + seen := make(map[HookEvent]struct{}, len(events)) + for _, event := range events { + if event == "" { + t.Fatal("AllHookEvents() contains an empty event") + } + if err := event.Validate(); err != nil { + t.Fatalf("event.Validate() error = %v", err) + } + if _, ok := seen[event]; ok { + t.Fatalf("AllHookEvents() contains duplicate event %q", event) + } + seen[event] = struct{}{} + } +} + +func TestSyncEligibleClassification(t *testing.T) { + t.Parallel() + + asyncOnly := map[HookEvent]struct{}{ + HookMessageDelta: {}, + HookEventPreRecord: {}, + HookEventPostRecord: {}, + HookPermissionResolved: {}, + HookPermissionDenied: {}, + } + + if !HookSessionPreCreate.SyncEligible() { + t.Fatal("HookSessionPreCreate.SyncEligible() = false, want true") + } + if HookMessageDelta.SyncEligible() { + t.Fatal("HookMessageDelta.SyncEligible() = true, want false") + } + + for _, event := range AllHookEvents() { + _, wantAsyncOnly := asyncOnly[event] + got := event.SyncEligible() + if wantAsyncOnly && got { + t.Fatalf("%s.SyncEligible() = true, want false", event) + } + if !wantAsyncOnly && !got { + t.Fatalf("%s.SyncEligible() = false, want true", event) + } + } +} + +func TestHookEventFamilyAndInvalidValidation(t *testing.T) { + t.Parallel() + + if got := HookToolPostCall.Family(); got != HookEventFamilyTool { + t.Fatalf("HookToolPostCall.Family() = %q, want %q", got, HookEventFamilyTool) + } + + var invalid HookEvent = "nope.invalid" + if got := invalid.Family(); got != "" { + t.Fatalf("invalid.Family() = %q, want empty string", got) + } + if err := invalid.Validate(); err == nil { + t.Fatal("invalid.Validate() error = nil, want non-nil") + } +} diff --git a/internal/hooks/executor.go b/internal/hooks/executor.go new file mode 100644 index 000000000..30b3419d5 --- /dev/null +++ b/internal/hooks/executor.go @@ -0,0 +1,45 @@ +package hooks + +import ( + "context" + "errors" + "fmt" +) + +var ( + // ErrNotImplemented reports that the requested executor seam is reserved for + // future work. + ErrNotImplemented = errors.New("hooks: executor not implemented") + // ErrNativeCallbackRequired reports that a native executor lacks a callback. + ErrNativeCallbackRequired = errors.New("hooks: native executor callback is required") + // ErrSubprocessCommandRequired reports that a subprocess executor lacks a + // command to start. + ErrSubprocessCommandRequired = errors.New("hooks: subprocess executor command is required") + // ErrInvalidHookExecutorKind reports an unsupported executor kind. + ErrInvalidHookExecutorKind = errors.New("hooks: invalid hook executor kind") +) + +// HookExecutorKind identifies the execution boundary for a hook. +type HookExecutorKind string + +const ( + HookExecutorNative HookExecutorKind = "native" + HookExecutorSubprocess HookExecutorKind = "subprocess" + HookExecutorWASM HookExecutorKind = "wasm" +) + +// Validate ensures the executor kind is supported. +func (k HookExecutorKind) Validate() error { + switch k { + case HookExecutorNative, HookExecutorSubprocess, HookExecutorWASM: + return nil + default: + return fmt.Errorf("hooks: invalid hook executor kind %q: %w", k, ErrInvalidHookExecutorKind) + } +} + +// Executor is the execution seam for hook implementations. +type Executor interface { + Kind() HookExecutorKind + Execute(ctx context.Context, hook RegisteredHook, payload []byte) ([]byte, error) +} diff --git a/internal/hooks/executor_native.go b/internal/hooks/executor_native.go new file mode 100644 index 000000000..f47dab15c --- /dev/null +++ b/internal/hooks/executor_native.go @@ -0,0 +1,87 @@ +package hooks + +import ( + "context" + "fmt" +) + +// NativeHookFunc executes an in-process hook without crossing a subprocess +// boundary. +type NativeHookFunc func(ctx context.Context, hook RegisteredHook, payload []byte) ([]byte, error) + +// NativeExecutor runs hooks as direct Go callbacks. +type NativeExecutor struct { + callback NativeHookFunc +} + +// TypedNativeHookFunc executes a typed in-process hook without crossing a +// serialization boundary. +type TypedNativeHookFunc[P any, R any] func(ctx context.Context, hook RegisteredHook, payload P) (R, error) + +// TypedNativeExecutor runs hooks as direct Go callbacks on typed payloads. +type TypedNativeExecutor[P any, R any] struct { + callback TypedNativeHookFunc[P, R] +} + +// NewNativeExecutor constructs a NativeExecutor bound to one callback. +func NewNativeExecutor(callback NativeHookFunc) *NativeExecutor { + return &NativeExecutor{callback: callback} +} + +// NewTypedNativeExecutor constructs a typed native executor for pipeline use. +func NewTypedNativeExecutor[P any, R any](callback TypedNativeHookFunc[P, R]) *TypedNativeExecutor[P, R] { + return &TypedNativeExecutor[P, R]{callback: callback} +} + +// Kind returns the executor type. +func (*NativeExecutor) Kind() HookExecutorKind { + return HookExecutorNative +} + +// Kind returns the executor type. +func (*TypedNativeExecutor[P, R]) Kind() HookExecutorKind { + return HookExecutorNative +} + +// Execute invokes the configured Go callback directly. +func (e *NativeExecutor) Execute(ctx context.Context, hook RegisteredHook, payload []byte) (result []byte, err error) { + if e == nil || e.callback == nil { + return nil, fmt.Errorf("hooks: hook %q: %w", hook.Name, ErrNativeCallbackRequired) + } + + defer func() { + if recovered := recover(); recovered != nil { + err = fmt.Errorf("hooks: hook %q native callback panic: %v", hook.Name, recovered) + result = nil + } + }() + + return e.callback(ctx, hook, payload) +} + +// Execute preserves the Executor contract but the typed native path is expected +// to be invoked through pipeline.execute. +func (e *TypedNativeExecutor[P, R]) Execute(_ context.Context, hook RegisteredHook, _ []byte) ([]byte, error) { + if e == nil || e.callback == nil { + return nil, fmt.Errorf("hooks: hook %q: %w", hook.Name, ErrNativeCallbackRequired) + } + + return nil, fmt.Errorf("hooks: hook %q typed native executor must be invoked through pipeline", hook.Name) +} + +// ExecuteTyped invokes the configured typed Go callback directly. +func (e *TypedNativeExecutor[P, R]) ExecuteTyped(ctx context.Context, hook RegisteredHook, payload P) (result R, err error) { + if e == nil || e.callback == nil { + return result, fmt.Errorf("hooks: hook %q: %w", hook.Name, ErrNativeCallbackRequired) + } + + defer func() { + if recovered := recover(); recovered != nil { + err = fmt.Errorf("hooks: hook %q native typed callback panic: %v", hook.Name, recovered) + var zero R + result = zero + } + }() + + return e.callback(ctx, hook, payload) +} diff --git a/internal/hooks/executor_subprocess.go b/internal/hooks/executor_subprocess.go new file mode 100644 index 000000000..d3d3528a3 --- /dev/null +++ b/internal/hooks/executor_subprocess.go @@ -0,0 +1,255 @@ +package hooks + +import ( + "bytes" + "context" + "errors" + "fmt" + "os" + "os/exec" + "sort" + "strings" + "time" +) + +const ( + defaultSubprocessHookTimeout = 5 * time.Second + subprocessCaptureLimitBytes = 8 * 1024 + subprocessCaptureTruncate = "...[truncated]" + subprocessShutdownGrace = 250 * time.Millisecond +) + +var subprocessEnvAllowlist = []string{ + "COMSPEC", + "HOME", + "LANG", + "LC_ALL", + "LC_CTYPE", + "LOGNAME", + "PATH", + "PATHEXT", + "SHELL", + "SYSTEMROOT", + "TEMP", + "TERM", + "TMP", + "TMPDIR", + "USER", + "USERPROFILE", +} + +// SubprocessExecutorOption mutates a subprocess executor during construction. +type SubprocessExecutorOption func(*SubprocessExecutor) + +// WithSubprocessDir configures the working directory for a subprocess hook. +func WithSubprocessDir(dir string) SubprocessExecutorOption { + return func(executor *SubprocessExecutor) { + executor.dir = strings.TrimSpace(dir) + } +} + +// WithSubprocessEnv configures the explicit environment overrides for a hook. +func WithSubprocessEnv(env map[string]string) SubprocessExecutorOption { + return func(executor *SubprocessExecutor) { + executor.env = cloneStringMap(env) + } +} + +// SubprocessExecutor runs hooks through a local shell command boundary. +type SubprocessExecutor struct { + command string + args []string + dir string + env map[string]string +} + +var _ Executor = (*SubprocessExecutor)(nil) + +// NewSubprocessExecutor constructs a subprocess-backed executor. +func NewSubprocessExecutor(command string, args []string, opts ...SubprocessExecutorOption) *SubprocessExecutor { + executor := &SubprocessExecutor{ + command: strings.TrimSpace(command), + args: append([]string(nil), args...), + } + for _, opt := range opts { + if opt != nil { + opt(executor) + } + } + return executor +} + +// Kind returns the executor type. +func (*SubprocessExecutor) Kind() HookExecutorKind { + return HookExecutorSubprocess +} + +// Execute runs the configured command with the JSON payload on stdin and +// returns captured stdout. +func (e *SubprocessExecutor) Execute(ctx context.Context, hook RegisteredHook, payload []byte) ([]byte, error) { + if e == nil || e.command == "" { + return nil, fmt.Errorf("hooks: hook %q: %w", hook.Name, ErrSubprocessCommandRequired) + } + + timeout := subprocessHookTimeout(hook.Timeout) + hookCtx, cancel := context.WithTimeout(ctx, timeout) + defer cancel() + + cmd := exec.Command(e.command, e.args...) + configureSubprocessCommand(cmd) + cmd.Dir = e.dir + cmd.Env = subprocessProcessEnv(e.env) + cmd.Stdin = bytes.NewReader(payload) + + stdout := newLimitedSubprocessCapture() + stderr := newLimitedSubprocessCapture() + cmd.Stdout = stdout + cmd.Stderr = stderr + + err := runSubprocessCommand(hookCtx, cmd) + output := []byte(stdout.String()) + if err != nil { + return output, subprocessRunError(hookCtx, timeout, err, stderr) + } + + return output, nil +} + +func subprocessHookTimeout(timeout time.Duration) time.Duration { + if timeout > 0 { + return timeout + } + + return defaultSubprocessHookTimeout +} + +func runSubprocessCommand(ctx context.Context, cmd *exec.Cmd) error { + if err := cmd.Start(); err != nil { + return err + } + + waitCh := make(chan error, 1) + go func() { + waitCh <- cmd.Wait() + }() + + select { + case err := <-waitCh: + return err + case <-ctx.Done(): + _ = terminateSubprocessCommand(cmd) // best-effort cleanup; the process may already be gone + timer := time.NewTimer(subprocessShutdownGrace) + defer timer.Stop() + + select { + case err := <-waitCh: + return err + case <-timer.C: + _ = killSubprocessCommand(cmd) // best-effort cleanup; the process may already be gone + return <-waitCh + } + } +} + +func subprocessProcessEnv(env map[string]string) []string { + merged := make(map[string]string, len(subprocessEnvAllowlist)+len(env)) + for _, key := range subprocessEnvAllowlist { + if value, ok := os.LookupEnv(key); ok { + merged[key] = value + } + } + for key, value := range env { + merged[key] = value + } + + keys := make([]string, 0, len(merged)) + for key := range merged { + keys = append(keys, key) + } + sort.Strings(keys) + + values := make([]string, 0, len(keys)) + for _, key := range keys { + values = append(values, key+"="+merged[key]) + } + + return values +} + +func subprocessRunError(ctx context.Context, timeout time.Duration, err error, stderr *limitedSubprocessCapture) error { + if errors.Is(ctx.Err(), context.DeadlineExceeded) { + return fmt.Errorf("hook timed out after %s: %w", timeout, ctx.Err()) + } + if errors.Is(ctx.Err(), context.Canceled) { + return fmt.Errorf("hook canceled: %w", ctx.Err()) + } + if stderr == nil || stderr.Len() == 0 { + return fmt.Errorf("hook command failed: %w", err) + } + + return fmt.Errorf("hook command failed: %w (%s)", err, subprocessCaptureSummary(stderr)) +} + +type limitedSubprocessCapture struct { + buf bytes.Buffer + truncated bool +} + +func newLimitedSubprocessCapture() *limitedSubprocessCapture { + return &limitedSubprocessCapture{} +} + +func (c *limitedSubprocessCapture) Write(payload []byte) (int, error) { + if c == nil { + return len(payload), nil + } + + remaining := subprocessCaptureLimitBytes - c.buf.Len() + switch { + case remaining <= 0: + c.truncated = true + case len(payload) > remaining: + _, _ = c.buf.Write(payload[:remaining]) + c.truncated = true + default: + _, _ = c.buf.Write(payload) + } + + return len(payload), nil +} + +func (c *limitedSubprocessCapture) String() string { + if c == nil { + return "" + } + + value := c.buf.String() + if !c.truncated { + return value + } + + return value + subprocessCaptureTruncate +} + +func (c *limitedSubprocessCapture) Len() int { + if c == nil { + return 0 + } + + return c.buf.Len() +} + +func (c *limitedSubprocessCapture) Truncated() bool { + return c != nil && c.truncated +} + +func subprocessCaptureSummary(capture *limitedSubprocessCapture) string { + if capture == nil || capture.Len() == 0 { + return "redacted output (0 bytes)" + } + if capture.Truncated() { + return fmt.Sprintf("redacted output (%d+ bytes, truncated)", capture.Len()) + } + + return fmt.Sprintf("redacted output (%d bytes)", capture.Len()) +} diff --git a/internal/hooks/executor_subprocess_unix.go b/internal/hooks/executor_subprocess_unix.go new file mode 100644 index 000000000..79c599f8e --- /dev/null +++ b/internal/hooks/executor_subprocess_unix.go @@ -0,0 +1,41 @@ +//go:build !windows + +package hooks + +import ( + "errors" + "fmt" + "os/exec" + "syscall" +) + +func configureSubprocessCommand(cmd *exec.Cmd) { + if cmd == nil { + return + } + if cmd.SysProcAttr == nil { + cmd.SysProcAttr = &syscall.SysProcAttr{} + } + cmd.SysProcAttr.Setpgid = true +} + +func terminateSubprocessCommand(cmd *exec.Cmd) error { + return signalSubprocessCommand(cmd, syscall.SIGTERM) +} + +func killSubprocessCommand(cmd *exec.Cmd) error { + return signalSubprocessCommand(cmd, syscall.SIGKILL) +} + +func signalSubprocessCommand(cmd *exec.Cmd, sig syscall.Signal) error { + if cmd == nil || cmd.Process == nil || cmd.Process.Pid <= 0 { + return nil + } + if err := syscall.Kill(-cmd.Process.Pid, sig); err != nil { + if errors.Is(err, syscall.ESRCH) { + return nil + } + return fmt.Errorf("kill process group (pid %d, sig %v): %w", cmd.Process.Pid, sig, err) + } + return nil +} diff --git a/internal/hooks/executor_subprocess_unix_test.go b/internal/hooks/executor_subprocess_unix_test.go new file mode 100644 index 000000000..6e5c18e94 --- /dev/null +++ b/internal/hooks/executor_subprocess_unix_test.go @@ -0,0 +1,101 @@ +//go:build !windows + +package hooks + +import ( + "os" + "path/filepath" + "strconv" + "strings" + "syscall" + "testing" + "time" +) + +func TestSubprocessExecutorExecuteGracefulShutdownSignalsBeforeKill(t *testing.T) { + t.Parallel() + + signalFile := filepath.Join(t.TempDir(), "signal.txt") + scriptPath := filepath.Join(t.TempDir(), "trap-term.sh") + script := strings.Join([]string{ + "#!/bin/sh", + "set -eu", + "trap 'printf term > \"$HOOK_SIGNAL_FILE\"; while :; do :; done' TERM", + "while :; do :; done", + }, "\n") + if err := os.WriteFile(scriptPath, []byte(script), 0o755); err != nil { + t.Fatalf("WriteFile(%q) error = %v", scriptPath, err) + } + + executor := NewSubprocessExecutor( + "/bin/sh", + []string{scriptPath}, + WithSubprocessEnv(map[string]string{"HOOK_SIGNAL_FILE": signalFile}), + ) + + _, err := executor.Execute(t.Context(), RegisteredHook{ + Name: "graceful-timeout-hook", + Timeout: 120 * time.Millisecond, + }, nil) + if err == nil { + t.Fatal("Execute() error = nil, want timeout error") + } + + signalBytes, readErr := os.ReadFile(signalFile) + if readErr != nil { + t.Fatalf("ReadFile(%q) error = %v", signalFile, readErr) + } + if got := string(signalBytes); got != "term" { + t.Fatalf("signal file = %q, want %q", got, "term") + } +} + +func TestSubprocessExecutorExecuteKillsDescendantProcessesOnTimeout(t *testing.T) { + skillDir := t.TempDir() + pidFile := filepath.Join(skillDir, "child.pid") + scriptPath := filepath.Join(skillDir, "spawn-child.sh") + script := strings.Join([]string{ + "#!/bin/sh", + "set -eu", + "/bin/sh -c 'while :; do :; done' &", + "child=$!", + "printf '%s' \"$child\" > \"$HOOK_CHILD_PID_FILE\"", + "while :; do :; done", + }, "\n") + if err := os.WriteFile(scriptPath, []byte(script), 0o755); err != nil { + t.Fatalf("WriteFile(%q) error = %v", scriptPath, err) + } + + executor := NewSubprocessExecutor( + "/bin/sh", + []string{scriptPath}, + WithSubprocessEnv(map[string]string{"HOOK_CHILD_PID_FILE": pidFile}), + ) + + _, err := executor.Execute(t.Context(), RegisteredHook{ + Name: "descendant-cleanup-hook", + Timeout: 120 * time.Millisecond, + }, nil) + if err == nil { + t.Fatal("Execute() error = nil, want timeout error") + } + + pidBytes, readErr := os.ReadFile(pidFile) + if readErr != nil { + t.Fatalf("ReadFile(%q) error = %v", pidFile, readErr) + } + pid, atoiErr := strconv.Atoi(strings.TrimSpace(string(pidBytes))) + if atoiErr != nil { + t.Fatalf("Atoi(%q) error = %v", strings.TrimSpace(string(pidBytes)), atoiErr) + } + + deadline := time.Now().Add(2 * time.Second) + for time.Now().Before(deadline) { + if syscall.Kill(pid, 0) != nil { + return + } + time.Sleep(10 * time.Millisecond) + } + + t.Fatalf("child process %d still alive after subprocess timeout cleanup", pid) +} diff --git a/internal/hooks/executor_subprocess_windows.go b/internal/hooks/executor_subprocess_windows.go new file mode 100644 index 000000000..271dc025e --- /dev/null +++ b/internal/hooks/executor_subprocess_windows.go @@ -0,0 +1,32 @@ +//go:build windows + +package hooks + +import ( + "errors" + "os" + "os/exec" +) + +func configureSubprocessCommand(_ *exec.Cmd) {} + +func terminateSubprocessCommand(cmd *exec.Cmd) error { + return signalSubprocessCommand(cmd, os.Kill) +} + +func killSubprocessCommand(cmd *exec.Cmd) error { + return signalSubprocessCommand(cmd, os.Kill) +} + +func signalSubprocessCommand(cmd *exec.Cmd, sig os.Signal) error { + if cmd == nil || cmd.Process == nil { + return nil + } + if err := cmd.Process.Signal(sig); err != nil { + if errors.Is(err, os.ErrProcessDone) { + return nil + } + return err + } + return nil +} diff --git a/internal/hooks/executor_test.go b/internal/hooks/executor_test.go new file mode 100644 index 000000000..27fa9c459 --- /dev/null +++ b/internal/hooks/executor_test.go @@ -0,0 +1,202 @@ +package hooks + +import ( + "context" + "errors" + "runtime" + "strings" + "testing" + "time" +) + +func TestNativeExecutorExecuteCallsCallback(t *testing.T) { + t.Parallel() + + var called bool + executor := NewNativeExecutor(func(ctx context.Context, hook RegisteredHook, payload []byte) ([]byte, error) { + called = true + if hook.Name != "native-hook" { + t.Fatalf("hook.Name = %q, want %q", hook.Name, "native-hook") + } + if got := string(payload); got != `{"value":"demo"}` { + t.Fatalf("payload = %q, want %q", got, `{"value":"demo"}`) + } + if ctx == nil { + t.Fatal("ctx = nil, want non-nil") + } + return []byte(`{"ok":true}`), nil + }) + + output, err := executor.Execute(t.Context(), RegisteredHook{Name: "native-hook"}, []byte(`{"value":"demo"}`)) + if err != nil { + t.Fatalf("Execute() error = %v, want nil", err) + } + if !called { + t.Fatal("native callback was not called") + } + if got := string(output); got != `{"ok":true}` { + t.Fatalf("output = %q, want %q", got, `{"ok":true}`) + } +} + +func TestNativeExecutorExecuteRecoversPanic(t *testing.T) { + t.Parallel() + + executor := NewNativeExecutor(func(context.Context, RegisteredHook, []byte) ([]byte, error) { + panic("boom") + }) + + output, err := executor.Execute(t.Context(), RegisteredHook{Name: "panic-hook"}, nil) + if err == nil { + t.Fatal("Execute() error = nil, want non-nil") + } + if !strings.Contains(err.Error(), "panic-hook") || !strings.Contains(err.Error(), "boom") { + t.Fatalf("Execute() error = %q, want panic detail", err) + } + if output != nil { + t.Fatalf("output = %q, want nil", string(output)) + } +} + +func TestSubprocessExecutorExecuteCapturesStdout(t *testing.T) { + t.Parallel() + + if runtime.GOOS == "windows" { + t.Skip("subprocess shell test requires POSIX shell") + } + + executor := NewSubprocessExecutor("/bin/sh", []string{"-c", "printf 'hello-from-hook'"}) + + output, err := executor.Execute(t.Context(), RegisteredHook{Name: "stdout-hook"}, nil) + if err != nil { + t.Fatalf("Execute() error = %v, want nil", err) + } + if got := string(output); got != "hello-from-hook" { + t.Fatalf("output = %q, want %q", got, "hello-from-hook") + } +} + +func TestSubprocessExecutorExecutePassesPayloadViaStdin(t *testing.T) { + t.Parallel() + + if runtime.GOOS == "windows" { + t.Skip("subprocess shell test requires POSIX shell") + } + + executor := NewSubprocessExecutor("/bin/sh", []string{"-c", "payload=$(cat); printf '%s' \"$payload\""}) + payload := []byte(`{"event":"session.post_create","session_id":"session-123"}`) + + output, err := executor.Execute(t.Context(), RegisteredHook{Name: "stdin-hook"}, payload) + if err != nil { + t.Fatalf("Execute() error = %v, want nil", err) + } + if string(output) != string(payload) { + t.Fatalf("output = %q, want %q", string(output), string(payload)) + } +} + +func TestSubprocessExecutorExecuteTimesOut(t *testing.T) { + t.Parallel() + + if runtime.GOOS == "windows" { + t.Skip("subprocess shell test requires POSIX shell") + } + + executor := NewSubprocessExecutor("/bin/sh", []string{"-c", "while :; do :; done"}) + + started := time.Now() + _, err := executor.Execute(t.Context(), RegisteredHook{ + Name: "timeout-hook", + Timeout: 120 * time.Millisecond, + }, nil) + elapsed := time.Since(started) + if err == nil { + t.Fatal("Execute() error = nil, want timeout error") + } + if !strings.Contains(err.Error(), "timed out") { + t.Fatalf("Execute() error = %q, want timeout detail", err) + } + if elapsed > 2*time.Second { + t.Fatalf("Execute() elapsed = %s, want prompt timeout handling", elapsed) + } +} + +func TestSubprocessExecutorExecuteFiltersEnvironment(t *testing.T) { + if runtime.GOOS == "windows" { + t.Skip("subprocess shell test requires POSIX shell") + } + + t.Setenv("HOOK_TEST_AMBIENT_SECRET", "ambient-secret") + executor := NewSubprocessExecutor( + "/bin/sh", + []string{"-c", `printf '%s|%s|%s' "${HOOK_TEST_AMBIENT_SECRET:-}" "${PATH:+present}" "${HOOK_CUSTOM_ENV:-}"`}, + WithSubprocessEnv(map[string]string{"HOOK_CUSTOM_ENV": "custom-value"}), + ) + + output, err := executor.Execute(t.Context(), RegisteredHook{Name: "env-hook"}, nil) + if err != nil { + t.Fatalf("Execute() error = %v, want nil", err) + } + if got := string(output); got != "|present|custom-value" { + t.Fatalf("output = %q, want %q", got, "|present|custom-value") + } +} + +func TestSubprocessExecutorExecuteCapturesStderrOnFailure(t *testing.T) { + t.Parallel() + + if runtime.GOOS == "windows" { + t.Skip("subprocess shell test requires POSIX shell") + } + + executor := NewSubprocessExecutor("/bin/sh", []string{"-c", "printf 'partial-stdout'; printf 'problem' >&2; exit 7"}) + + output, err := executor.Execute(t.Context(), RegisteredHook{Name: "stderr-hook"}, nil) + if err == nil { + t.Fatal("Execute() error = nil, want non-nil") + } + if got := string(output); got != "partial-stdout" { + t.Fatalf("output = %q, want %q", got, "partial-stdout") + } + if !strings.Contains(err.Error(), "hook command failed") || !strings.Contains(err.Error(), "redacted output") { + t.Fatalf("Execute() error = %q, want stderr summary detail", err) + } +} + +func TestSubprocessExecutorExecuteCapsCapturedOutput(t *testing.T) { + t.Parallel() + + if runtime.GOOS == "windows" { + t.Skip("subprocess shell test requires POSIX shell") + } + + executor := NewSubprocessExecutor( + "/bin/sh", + []string{"-c", "yes x | tr -d '\\n' | head -c 9000; yes y | tr -d '\\n' | head -c 9000 >&2; exit 7"}, + ) + + output, err := executor.Execute(t.Context(), RegisteredHook{Name: "truncate-hook"}, nil) + if err == nil { + t.Fatal("Execute() error = nil, want non-nil") + } + if !strings.Contains(string(output), subprocessCaptureTruncate) { + t.Fatalf("output = %q, want truncation marker", string(output)) + } + if !strings.Contains(err.Error(), "truncated") { + t.Fatalf("Execute() error = %q, want truncated stderr summary", err) + } +} + +func TestWasmExecutorExecuteReturnsErrNotImplemented(t *testing.T) { + t.Parallel() + + executor := &WasmExecutor{} + + output, err := executor.Execute(t.Context(), RegisteredHook{Name: "wasm-hook"}, nil) + if !errors.Is(err, ErrNotImplemented) { + t.Fatalf("Execute() error = %v, want ErrNotImplemented", err) + } + if output != nil { + t.Fatalf("output = %q, want nil", string(output)) + } +} diff --git a/internal/hooks/executor_wasm_stub.go b/internal/hooks/executor_wasm_stub.go new file mode 100644 index 000000000..23b4b5270 --- /dev/null +++ b/internal/hooks/executor_wasm_stub.go @@ -0,0 +1,18 @@ +package hooks + +import "context" + +// WasmExecutor is the future execution seam for sandboxed hook runtimes. +type WasmExecutor struct{} + +var _ Executor = (*WasmExecutor)(nil) + +// Kind returns the executor type. +func (*WasmExecutor) Kind() HookExecutorKind { + return HookExecutorWASM +} + +// Execute reports that the Wasm executor seam is not implemented yet. +func (*WasmExecutor) Execute(_ context.Context, _ RegisteredHook, _ []byte) ([]byte, error) { + return nil, ErrNotImplemented +} diff --git a/internal/hooks/hooks.go b/internal/hooks/hooks.go new file mode 100644 index 000000000..4de70b447 --- /dev/null +++ b/internal/hooks/hooks.go @@ -0,0 +1,471 @@ +package hooks + +import ( + "context" + "encoding/json" + "errors" + "fmt" + "log/slog" + "sync" + "sync/atomic" + "time" +) + +// Option customizes a Hooks dispatcher during construction. +type Option func(*Hooks) + +// DeclarationProvider returns the hook declarations for one source class. +type DeclarationProvider func(context.Context) ([]HookDecl, error) + +// Hooks owns the hot-reloadable registry snapshot and typed dispatch surface. +type Hooks struct { + mu sync.RWMutex + snapshot map[HookEvent][]*ResolvedHook + + pool *asyncPool + + version atomic.Int64 + fingerprint string + + logger *slog.Logger + now func() time.Time + resolveExecutor ExecutorResolver + telemetrySink TelemetrySink + metrics *hookMetrics + debugPatchAudit bool + + nativeProvider DeclarationProvider + configProvider DeclarationProvider + agentProvider DeclarationProvider + skillProvider DeclarationProvider + + asyncWorkerCount int + asyncQueueCapacity int + asyncDrainTimeout time.Duration +} + +type snapshotFingerprint struct { + Event HookEvent `json:"event"` + Hooks []resolvedHookFingerprint `json:"hooks"` +} + +type resolvedHookFingerprint struct { + Name string `json:"name"` + Event HookEvent `json:"event"` + Source HookSource `json:"source"` + Mode HookMode `json:"mode"` + Required bool `json:"required"` + Priority int `json:"priority"` + Timeout time.Duration `json:"timeout"` + Matcher HookMatcher `json:"matcher"` + Metadata map[string]string `json:"metadata"` + ExecutorKind HookExecutorKind `json:"executor_kind"` + Command string `json:"command"` + Args []string `json:"args"` + Env map[string]string `json:"env"` + SkillSource HookSkillSource `json:"skill_source"` +} + +// WithLogger injects the logger used for hook diagnostics. +func WithLogger(logger *slog.Logger) Option { + return func(hooks *Hooks) { + hooks.logger = logger + } +} + +// WithNow injects the clock used by notifier payload construction. +func WithNow(now func() time.Time) Option { + return func(hooks *Hooks) { + hooks.now = now + } +} + +// WithExecutorResolver injects the resolver used to bind declarations to +// executors during rebuild. +func WithExecutorResolver(resolve ExecutorResolver) Option { + return func(hooks *Hooks) { + hooks.resolveExecutor = resolve + } +} + +// WithTelemetrySink injects the persistence sink used when no active +// session-scoped writer is attached to the dispatch context. +func WithTelemetrySink(sink TelemetrySink) Option { + return func(hooks *Hooks) { + hooks.telemetrySink = sink + } +} + +// WithDebugPatchAudit enables patch capture for non-security hook families. +func WithDebugPatchAudit(enabled bool) Option { + return func(hooks *Hooks) { + hooks.debugPatchAudit = enabled + } +} + +// WithAsyncWorkerCount configures the async worker pool size. +func WithAsyncWorkerCount(count int) Option { + return func(hooks *Hooks) { + hooks.asyncWorkerCount = count + } +} + +// WithAsyncQueueCapacity configures the async worker pool queue depth. +func WithAsyncQueueCapacity(capacity int) Option { + return func(hooks *Hooks) { + hooks.asyncQueueCapacity = capacity + } +} + +// WithAsyncDrainTimeout configures the async pool shutdown deadline. +func WithAsyncDrainTimeout(timeout time.Duration) Option { + return func(hooks *Hooks) { + hooks.asyncDrainTimeout = timeout + } +} + +// WithNativeDeclarationProvider injects the native-hook declaration source. +func WithNativeDeclarationProvider(provider DeclarationProvider) Option { + return func(hooks *Hooks) { + hooks.nativeProvider = provider + } +} + +// WithConfigDeclarationProvider injects the config-hook declaration source. +func WithConfigDeclarationProvider(provider DeclarationProvider) Option { + return func(hooks *Hooks) { + hooks.configProvider = provider + } +} + +// WithAgentDeclarationProvider injects the agent-definition declaration source. +func WithAgentDeclarationProvider(provider DeclarationProvider) Option { + return func(hooks *Hooks) { + hooks.agentProvider = provider + } +} + +// WithSkillDeclarationProvider injects the skill-hook declaration source. +func WithSkillDeclarationProvider(provider DeclarationProvider) Option { + return func(hooks *Hooks) { + hooks.skillProvider = provider + } +} + +// WithNativeDeclarations injects a static native declaration set. +func WithNativeDeclarations(decls []HookDecl) Option { + return WithNativeDeclarationProvider(staticDeclarationProvider(decls)) +} + +// WithConfigDeclarations injects a static config declaration set. +func WithConfigDeclarations(decls []HookDecl) Option { + return WithConfigDeclarationProvider(staticDeclarationProvider(decls)) +} + +// WithAgentDeclarations injects a static agent-definition declaration set. +func WithAgentDeclarations(decls []HookDecl) Option { + return WithAgentDeclarationProvider(staticDeclarationProvider(decls)) +} + +// WithSkillDeclarations injects a static skill declaration set. +func WithSkillDeclarations(decls []HookDecl) Option { + return WithSkillDeclarationProvider(staticDeclarationProvider(decls)) +} + +// NewHooks constructs a hook dispatcher with an empty registry snapshot and a +// started async pool. +func NewHooks(opts ...Option) *Hooks { + hooks := &Hooks{ + snapshot: make(map[HookEvent][]*ResolvedHook), + logger: slog.Default(), + now: time.Now, + resolveExecutor: defaultExecutorResolver, + metrics: newHookMetrics(), + nativeProvider: emptyDeclarationProvider, + configProvider: emptyDeclarationProvider, + agentProvider: emptyDeclarationProvider, + skillProvider: emptyDeclarationProvider, + asyncWorkerCount: defaultAsyncWorkerCount, + asyncQueueCapacity: defaultAsyncQueueCapacity, + asyncDrainTimeout: defaultAsyncDrainTimeout, + } + + for _, opt := range opts { + if opt != nil { + opt(hooks) + } + } + + if hooks.logger == nil { + hooks.logger = slog.Default() + } + if hooks.now == nil { + hooks.now = time.Now + } + if hooks.resolveExecutor == nil { + hooks.resolveExecutor = defaultExecutorResolver + } + if hooks.metrics == nil { + hooks.metrics = newHookMetrics() + } + if hooks.nativeProvider == nil { + hooks.nativeProvider = emptyDeclarationProvider + } + if hooks.configProvider == nil { + hooks.configProvider = emptyDeclarationProvider + } + if hooks.agentProvider == nil { + hooks.agentProvider = emptyDeclarationProvider + } + if hooks.skillProvider == nil { + hooks.skillProvider = emptyDeclarationProvider + } + + hooks.pool = newAsyncPool(asyncPoolConfig{ + WorkerCount: hooks.asyncWorkerCount, + QueueCapacity: hooks.asyncQueueCapacity, + DrainTimeout: hooks.asyncDrainTimeout, + Logger: hooks.logger, + Metrics: hooks.metrics, + }) + hooks.pool.Start(context.Background()) + + return hooks +} + +// Version returns the current registry snapshot version. +func (h *Hooks) Version() int64 { + if h == nil { + return 0 + } + + return h.version.Load() +} + +// Close drains the async worker pool. +func (h *Hooks) Close() { + if h == nil || h.pool == nil { + return + } + + h.pool.Close() +} + +// Rebuild reloads all declaration sources, validates the full snapshot, and +// swaps it atomically when the semantic contents changed. +func (h *Hooks) Rebuild(ctx context.Context) error { + if h == nil { + return errors.New("hooks: dispatcher is required") + } + if ctx == nil { + return errors.New("hooks: rebuild context is required") + } + + decls, err := h.collectDeclarations(ctx) + if err != nil { + return err + } + + resolved, err := NormalizeHookDecls(decls, h.resolveExecutor) + if err != nil { + return err + } + + snapshot := buildHookSnapshot(resolved) + fingerprint, err := fingerprintHookSnapshot(snapshot) + if err != nil { + return err + } + + reloadStarted := h.now() + newHookCount := countResolvedHooks(snapshot) + + h.mu.Lock() + defer h.mu.Unlock() + + if fingerprint == h.fingerprint { + return nil + } + + oldHookCount := countResolvedHooks(h.snapshot) + h.snapshot = snapshot + h.fingerprint = fingerprint + version := h.version.Add(1) + reloadDuration := h.now().Sub(reloadStarted) + h.metrics.observeRegistryReload(reloadDuration, newHookCount-oldHookCount) + h.logger.Info( + "hook.registry.reloaded", + "version", version, + "hook_count", newHookCount, + "hook_count_delta", newHookCount-oldHookCount, + "duration_ms", reloadDuration.Milliseconds(), + ) + + return nil +} + +func (h *Hooks) collectDeclarations(ctx context.Context) ([]HookDecl, error) { + collected := make([]HookDecl, 0, 16) + + sources := []struct { + name string + source HookSource + provider DeclarationProvider + }{ + {name: "native", source: HookSourceNative, provider: h.nativeProvider}, + {name: "config", source: HookSourceConfig, provider: h.configProvider}, + {name: "agent_definition", source: HookSourceAgentDefinition, provider: h.agentProvider}, + {name: "skill", source: HookSourceSkill, provider: h.skillProvider}, + } + + for _, source := range sources { + provider := source.provider + if provider == nil { + continue + } + + decls, err := provider(ctx) + if err != nil { + return nil, fmt.Errorf("hooks: load %s declarations: %w", source.name, err) + } + + for _, decl := range decls { + normalized := cloneHookDecl(decl) + normalized.Source = source.source + collected = append(collected, normalized) + } + } + + return collected, nil +} + +func (h *Hooks) hookSnapshot(event HookEvent) ([]*ResolvedHook, error) { + if h == nil { + return nil, errors.New("hooks: dispatcher is required") + } + if err := event.Validate(); err != nil { + return nil, err + } + + h.mu.RLock() + snapshot := h.snapshot[event] + h.mu.RUnlock() + + return snapshot, nil +} + +func buildHookSnapshot(resolved []ResolvedHook) map[HookEvent][]*ResolvedHook { + snapshot := make(map[HookEvent][]*ResolvedHook) + for idx := range resolved { + hook := resolved[idx] + snapshot[hook.Event] = append(snapshot[hook.Event], &hook) + } + + for _, event := range AllHookEvents() { + SortResolvedHooks(snapshot[event]) + } + + return snapshot +} + +func countResolvedHooks(snapshot map[HookEvent][]*ResolvedHook) int { + count := 0 + for _, hooks := range snapshot { + count += len(hooks) + } + return count +} + +func fingerprintHookSnapshot(snapshot map[HookEvent][]*ResolvedHook) (string, error) { + fingerprints := make([]snapshotFingerprint, 0, len(AllHookEvents())) + for _, event := range AllHookEvents() { + entry := snapshotFingerprint{ + Event: event, + Hooks: make([]resolvedHookFingerprint, 0, len(snapshot[event])), + } + + for _, hook := range snapshot[event] { + if hook == nil { + continue + } + + entry.Hooks = append(entry.Hooks, resolvedHookFingerprint{ + Name: hook.Name, + Event: hook.Event, + Source: hook.Source, + Mode: hook.Mode, + Required: hook.Required, + Priority: hook.Priority, + Timeout: hook.Timeout, + Matcher: hook.Matcher, + Metadata: cloneStringMap(hook.Metadata), + ExecutorKind: hook.Decl.ExecutorKind, + Command: hook.Decl.Command, + Args: append([]string(nil), hook.Decl.Args...), + Env: cloneStringMap(hook.Decl.Env), + SkillSource: hook.Decl.SkillSource, + }) + } + + fingerprints = append(fingerprints, entry) + } + + encoded, err := json.Marshal(fingerprints) + if err != nil { + return "", fmt.Errorf("hooks: fingerprint snapshot: %w", err) + } + + return string(encoded), nil +} + +func defaultExecutorResolver(decl HookDecl) (Executor, error) { + switch decl.ExecutorKind { + case HookExecutorNative: + return nil, fmt.Errorf("hooks: native executor for hook %q requires an explicit resolver", decl.Name) + case HookExecutorSubprocess: + return NewSubprocessExecutor( + decl.Command, + decl.Args, + WithSubprocessEnv(decl.Env), + ), nil + case HookExecutorWASM: + return &WasmExecutor{}, nil + default: + return nil, fmt.Errorf("hooks: unsupported executor kind %q for hook %q", decl.ExecutorKind, decl.Name) + } +} + +func emptyDeclarationProvider(context.Context) ([]HookDecl, error) { + return nil, nil +} + +func staticDeclarationProvider(decls []HookDecl) DeclarationProvider { + cloned := cloneHookDecls(decls) + return func(context.Context) ([]HookDecl, error) { + return cloneHookDecls(cloned), nil + } +} + +func cloneHookDecls(decls []HookDecl) []HookDecl { + if len(decls) == 0 { + return nil + } + + cloned := make([]HookDecl, 0, len(decls)) + for _, decl := range decls { + cloned = append(cloned, cloneHookDecl(decl)) + } + return cloned +} + +func cloneHookDecl(decl HookDecl) HookDecl { + cloned := decl + cloned.Args = append([]string(nil), decl.Args...) + cloned.Env = cloneStringMap(decl.Env) + cloned.Metadata = cloneStringMap(decl.Metadata) + if decl.Matcher.ToolReadOnly != nil { + value := *decl.Matcher.ToolReadOnly + cloned.Matcher.ToolReadOnly = &value + } + return cloned +} diff --git a/internal/hooks/hooks_test.go b/internal/hooks/hooks_test.go new file mode 100644 index 000000000..ee441b7aa --- /dev/null +++ b/internal/hooks/hooks_test.go @@ -0,0 +1,1448 @@ +package hooks + +import ( + "context" + "errors" + "reflect" + "strings" + "sync" + "sync/atomic" + "testing" + "time" +) + +func TestNewHooksCreatesEmptyStartedRegistry(t *testing.T) { + t.Parallel() + + hooks := newTestHooks(t) + if hooks.snapshot == nil { + t.Fatal("snapshot = nil, want initialized map") + } + if len(hooks.snapshot) != 0 { + t.Fatalf("len(snapshot) = %d, want 0", len(hooks.snapshot)) + } + if hooks.pool == nil { + t.Fatal("pool = nil, want initialized pool") + } + if !hooks.pool.started { + t.Fatal("pool.started = false, want true") + } + if got := hooks.version.Load(); got != 0 { + t.Fatalf("version = %d, want 0", got) + } +} + +func TestHooksRebuildPopulatesSnapshot(t *testing.T) { + t.Parallel() + + hooks := newTestHooks( + t, + WithNativeDeclarations([]HookDecl{ + { + Name: "native-post-create", + Event: HookSessionPostCreate, + Mode: HookModeAsync, + ExecutorKind: HookExecutorNative, + }, + }), + WithConfigDeclarations([]HookDecl{ + testSubprocessDecl("config-input", HookInputPreSubmit), + }), + WithAgentDeclarations([]HookDecl{ + testSubprocessDecl("agent-message", HookMessageEnd), + }), + WithSkillDeclarations([]HookDecl{ + func() HookDecl { + decl := testSubprocessDecl("skill-tool", HookToolPostCall) + decl.SkillSource = HookSkillSourceUser + return decl + }(), + }), + WithExecutorResolver(testExecutorResolver(map[string]Executor{ + "native-post-create": NewNativeExecutor(func(context.Context, RegisteredHook, []byte) ([]byte, error) { + return nil, nil + }), + })), + ) + + if err := hooks.Rebuild(t.Context()); err != nil { + t.Fatalf("Rebuild() error = %v, want nil", err) + } + + if got := len(hooks.snapshot[HookSessionPostCreate]); got != 1 { + t.Fatalf("len(snapshot[session.post_create]) = %d, want 1", got) + } + if got := len(hooks.snapshot[HookInputPreSubmit]); got != 1 { + t.Fatalf("len(snapshot[input.pre_submit]) = %d, want 1", got) + } + if got := len(hooks.snapshot[HookMessageEnd]); got != 1 { + t.Fatalf("len(snapshot[message.end]) = %d, want 1", got) + } + if got := len(hooks.snapshot[HookToolPostCall]); got != 1 { + t.Fatalf("len(snapshot[tool.post_call]) = %d, want 1", got) + } + if got := hooks.version.Load(); got != 1 { + t.Fatalf("version = %d, want 1", got) + } + + if hooks.snapshot[HookSessionPostCreate][0].Source != HookSourceNative { + t.Fatalf("native source = %q, want %q", hooks.snapshot[HookSessionPostCreate][0].Source, HookSourceNative) + } + if hooks.snapshot[HookToolPostCall][0].Decl.SkillSource != HookSkillSourceUser { + t.Fatalf("skill source = %q, want %q", hooks.snapshot[HookToolPostCall][0].Decl.SkillSource, HookSkillSourceUser) + } +} + +func TestHooksRebuildInvalidDeclarationKeepsOldSnapshot(t *testing.T) { + t.Parallel() + + configDecls := []HookDecl{testSubprocessDecl("valid-input", HookInputPreSubmit)} + hooks := newTestHooks( + t, + WithConfigDeclarationProvider(func(context.Context) ([]HookDecl, error) { + return cloneHookDecls(configDecls), nil + }), + ) + + if err := hooks.Rebuild(t.Context()); err != nil { + t.Fatalf("initial Rebuild() error = %v, want nil", err) + } + + beforeVersion := hooks.version.Load() + beforeFingerprint := hooks.fingerprint + beforeHook := hooks.snapshot[HookInputPreSubmit][0] + + configDecls = []HookDecl{ + { + Name: "invalid-sync-delta", + Event: HookMessageDelta, + Mode: HookModeSync, + Command: "/bin/sh", + Args: []string{"-c", "printf '{}'"}, + }, + } + + err := hooks.Rebuild(t.Context()) + if err == nil { + t.Fatal("Rebuild() error = nil, want non-nil") + } + if hooks.version.Load() != beforeVersion { + t.Fatalf("version = %d, want %d", hooks.version.Load(), beforeVersion) + } + if hooks.fingerprint != beforeFingerprint { + t.Fatal("fingerprint changed after failed rebuild") + } + if hooks.snapshot[HookInputPreSubmit][0] != beforeHook { + t.Fatal("snapshot changed after failed rebuild") + } +} + +func TestHooksRebuildUnchangedSkipsSwap(t *testing.T) { + t.Parallel() + + decls := []HookDecl{testSubprocessDecl("stable-input", HookInputPreSubmit)} + hooks := newTestHooks(t, WithConfigDeclarations(decls)) + if err := hooks.Rebuild(t.Context()); err != nil { + t.Fatalf("first Rebuild() error = %v, want nil", err) + } + + beforeVersion := hooks.version.Load() + beforeHook := hooks.snapshot[HookInputPreSubmit][0] + + if err := hooks.Rebuild(t.Context()); err != nil { + t.Fatalf("second Rebuild() error = %v, want nil", err) + } + + if hooks.version.Load() != beforeVersion { + t.Fatalf("version = %d, want %d", hooks.version.Load(), beforeVersion) + } + if hooks.snapshot[HookInputPreSubmit][0] != beforeHook { + t.Fatal("snapshot swapped for unchanged declarations") + } +} + +func TestHooksRebuildBumpsVersionOnSwap(t *testing.T) { + t.Parallel() + + configDecls := []HookDecl{testSubprocessDecl("v1-input", HookInputPreSubmit)} + hooks := newTestHooks( + t, + WithConfigDeclarationProvider(func(context.Context) ([]HookDecl, error) { + return cloneHookDecls(configDecls), nil + }), + ) + + if err := hooks.Rebuild(t.Context()); err != nil { + t.Fatalf("first Rebuild() error = %v, want nil", err) + } + + configDecls = []HookDecl{testSubprocessDecl("v2-input", HookInputPreSubmit)} + if err := hooks.Rebuild(t.Context()); err != nil { + t.Fatalf("second Rebuild() error = %v, want nil", err) + } + + if got := hooks.version.Load(); got != 2 { + t.Fatalf("version = %d, want 2", got) + } + if hooks.snapshot[HookInputPreSubmit][0].Name != "v2-input" { + t.Fatalf("snapshot hook = %q, want %q", hooks.snapshot[HookInputPreSubmit][0].Name, "v2-input") + } +} + +func TestHooksConcurrentRebuildAndDispatch(t *testing.T) { + declsA := []HookDecl{ + { + Name: "native-a", + Event: HookInputPreSubmit, + Mode: HookModeSync, + ExecutorKind: HookExecutorNative, + }, + } + declsB := []HookDecl{ + { + Name: "native-a", + Event: HookInputPreSubmit, + Mode: HookModeSync, + ExecutorKind: HookExecutorNative, + }, + { + Name: "native-b", + Event: HookInputPreSubmit, + Mode: HookModeSync, + Priority: 200, + PrioritySet: true, + ExecutorKind: HookExecutorNative, + }, + } + + var seq atomic.Int64 + hooks := newTestHooks( + t, + WithNativeDeclarationProvider(func(context.Context) ([]HookDecl, error) { + if seq.Add(1)%2 == 0 { + return cloneHookDecls(declsA), nil + } + return cloneHookDecls(declsB), nil + }), + WithExecutorResolver(testExecutorResolver(map[string]Executor{ + "native-a": NewTypedNativeExecutor(func(_ context.Context, _ RegisteredHook, payload InputPreSubmitPayload) (InputPreSubmitPatch, error) { + msg := payload.Message + "a" + return InputPreSubmitPatch{Message: &msg}, nil + }), + "native-b": NewTypedNativeExecutor(func(_ context.Context, _ RegisteredHook, payload InputPreSubmitPayload) (InputPreSubmitPatch, error) { + msg := payload.Message + "b" + return InputPreSubmitPatch{Message: &msg}, nil + }), + })), + ) + + var wg sync.WaitGroup + errCh := make(chan error, 16) + + for i := 0; i < 2; i++ { + wg.Add(1) + go func() { + defer wg.Done() + for j := 0; j < 100; j++ { + if err := hooks.Rebuild(context.Background()); err != nil { + errCh <- err + return + } + } + }() + } + + for i := 0; i < 4; i++ { + wg.Add(1) + go func() { + defer wg.Done() + for j := 0; j < 100; j++ { + if _, err := hooks.DispatchInputPreSubmit(context.Background(), InputPreSubmitPayload{ + PayloadBase: PayloadBase{Event: HookInputPreSubmit}, + Message: "seed-", + }); err != nil { + errCh <- err + return + } + } + }() + } + + wg.Wait() + close(errCh) + + for err := range errCh { + if err != nil { + t.Fatalf("concurrent hooks operation error = %v, want nil", err) + } + } +} + +func TestDispatchInputPreSubmitRejectsNilHooksAndContext(t *testing.T) { + t.Parallel() + + payload := InputPreSubmitPayload{ + PayloadBase: PayloadBase{Event: HookInputPreSubmit}, + Message: "seed", + } + + var nilHooks *Hooks + if _, err := nilHooks.DispatchInputPreSubmit(context.Background(), payload); err == nil || !strings.Contains(err.Error(), "dispatcher is nil") { + t.Fatalf("DispatchInputPreSubmit(nil hooks) error = %v, want nil dispatcher detail", err) + } + + hooks := newTestHooks(t, WithConfigDeclarations([]HookDecl{ + testSubprocessDecl("input-hook", HookInputPreSubmit), + })) + if err := hooks.Rebuild(t.Context()); err != nil { + t.Fatalf("Rebuild() error = %v", err) + } + if _, err := hooks.DispatchInputPreSubmit(nilTestContext(), payload); err == nil || !strings.Contains(err.Error(), "dispatch context is nil") { + t.Fatalf("DispatchInputPreSubmit(nil context) error = %v, want nil context detail", err) + } +} + +func nilTestContext() context.Context { + var ctx context.Context + return ctx +} + +func TestDispatchInputPreSubmitReturnsOriginalPayloadWhenNoHooksMatch(t *testing.T) { + t.Parallel() + + hooks := newTestHooks(t) + payload := InputPreSubmitPayload{ + PayloadBase: PayloadBase{Event: HookInputPreSubmit}, + Message: "unchanged", + } + + got, err := hooks.DispatchInputPreSubmit(t.Context(), payload) + if err != nil { + t.Fatalf("DispatchInputPreSubmit() error = %v, want nil", err) + } + if !reflect.DeepEqual(got, payload) { + t.Fatalf("DispatchInputPreSubmit() = %#v, want %#v", got, payload) + } +} + +func TestDispatchMethodsSmokeNoHooks(t *testing.T) { + t.Parallel() + + testCases := []struct { + name string + run func(context.Context, *Hooks) error + }{ + { + name: "Should dispatch session.pre_create without hooks", + run: func(ctx context.Context, hooks *Hooks) error { + _, err := hooks.DispatchSessionPreCreate(ctx, SessionPreCreatePayload{PayloadBase: PayloadBase{Event: HookSessionPreCreate}}) + return err + }, + }, + { + name: "Should dispatch session.post_create without hooks", + run: func(ctx context.Context, hooks *Hooks) error { + _, err := hooks.DispatchSessionPostCreate(ctx, SessionPostCreatePayload{PayloadBase: PayloadBase{Event: HookSessionPostCreate}}) + return err + }, + }, + { + name: "Should dispatch session.pre_resume without hooks", + run: func(ctx context.Context, hooks *Hooks) error { + _, err := hooks.DispatchSessionPreResume(ctx, SessionPreResumePayload{PayloadBase: PayloadBase{Event: HookSessionPreResume}}) + return err + }, + }, + { + name: "Should dispatch session.post_resume without hooks", + run: func(ctx context.Context, hooks *Hooks) error { + _, err := hooks.DispatchSessionPostResume(ctx, SessionPostResumePayload{PayloadBase: PayloadBase{Event: HookSessionPostResume}}) + return err + }, + }, + { + name: "Should dispatch session.pre_stop without hooks", + run: func(ctx context.Context, hooks *Hooks) error { + _, err := hooks.DispatchSessionPreStop(ctx, SessionPreStopPayload{PayloadBase: PayloadBase{Event: HookSessionPreStop}}) + return err + }, + }, + { + name: "Should dispatch session.post_stop without hooks", + run: func(ctx context.Context, hooks *Hooks) error { + _, err := hooks.DispatchSessionPostStop(ctx, SessionPostStopPayload{PayloadBase: PayloadBase{Event: HookSessionPostStop}}) + return err + }, + }, + { + name: "Should dispatch input.pre_submit without hooks", + run: func(ctx context.Context, hooks *Hooks) error { + _, err := hooks.DispatchInputPreSubmit(ctx, InputPreSubmitPayload{PayloadBase: PayloadBase{Event: HookInputPreSubmit}}) + return err + }, + }, + { + name: "Should dispatch prompt.post_assemble without hooks", + run: func(ctx context.Context, hooks *Hooks) error { + _, err := hooks.DispatchPromptPostAssemble(ctx, PromptPayload{PayloadBase: PayloadBase{Event: HookPromptPostAssemble}}) + return err + }, + }, + { + name: "Should dispatch event.pre_record without hooks", + run: func(ctx context.Context, hooks *Hooks) error { + _, err := hooks.DispatchEventPreRecord(ctx, EventPreRecordPayload{PayloadBase: PayloadBase{Event: HookEventPreRecord}}) + return err + }, + }, + { + name: "Should dispatch event.post_record without hooks", + run: func(ctx context.Context, hooks *Hooks) error { + _, err := hooks.DispatchEventPostRecord(ctx, EventPostRecordPayload{PayloadBase: PayloadBase{Event: HookEventPostRecord}}) + return err + }, + }, + { + name: "Should dispatch agent.pre_start without hooks", + run: func(ctx context.Context, hooks *Hooks) error { + _, err := hooks.DispatchAgentPreStart(ctx, AgentPreStartPayload{PayloadBase: PayloadBase{Event: HookAgentPreStart}}) + return err + }, + }, + { + name: "Should dispatch agent.spawned without hooks", + run: func(ctx context.Context, hooks *Hooks) error { + _, err := hooks.DispatchAgentSpawned(ctx, AgentSpawnedPayload{PayloadBase: PayloadBase{Event: HookAgentSpawned}}) + return err + }, + }, + { + name: "Should dispatch agent.crashed without hooks", + run: func(ctx context.Context, hooks *Hooks) error { + _, err := hooks.DispatchAgentCrashed(ctx, AgentCrashedPayload{PayloadBase: PayloadBase{Event: HookAgentCrashed}}) + return err + }, + }, + { + name: "Should dispatch agent.stopped without hooks", + run: func(ctx context.Context, hooks *Hooks) error { + _, err := hooks.DispatchAgentStopped(ctx, AgentStoppedPayload{PayloadBase: PayloadBase{Event: HookAgentStopped}}) + return err + }, + }, + { + name: "Should dispatch turn.start without hooks", + run: func(ctx context.Context, hooks *Hooks) error { + _, err := hooks.DispatchTurnStart(ctx, TurnStartPayload{PayloadBase: PayloadBase{Event: HookTurnStart}}) + return err + }, + }, + { + name: "Should dispatch turn.end without hooks", + run: func(ctx context.Context, hooks *Hooks) error { + _, err := hooks.DispatchTurnEnd(ctx, TurnEndPayload{PayloadBase: PayloadBase{Event: HookTurnEnd}}) + return err + }, + }, + { + name: "Should dispatch message.start without hooks", + run: func(ctx context.Context, hooks *Hooks) error { + _, err := hooks.DispatchMessageStart(ctx, MessageStartPayload{PayloadBase: PayloadBase{Event: HookMessageStart}}) + return err + }, + }, + { + name: "Should dispatch message.delta without hooks", + run: func(ctx context.Context, hooks *Hooks) error { + _, err := hooks.DispatchMessageDelta(ctx, MessageDeltaPayload{PayloadBase: PayloadBase{Event: HookMessageDelta}}) + return err + }, + }, + { + name: "Should dispatch message.end without hooks", + run: func(ctx context.Context, hooks *Hooks) error { + _, err := hooks.DispatchMessageEnd(ctx, MessageEndPayload{PayloadBase: PayloadBase{Event: HookMessageEnd}}) + return err + }, + }, + { + name: "Should dispatch tool.pre_call without hooks", + run: func(ctx context.Context, hooks *Hooks) error { + _, err := hooks.DispatchToolPreCall(ctx, ToolPreCallPayload{PayloadBase: PayloadBase{Event: HookToolPreCall}}) + return err + }, + }, + { + name: "Should dispatch tool.post_call without hooks", + run: func(ctx context.Context, hooks *Hooks) error { + _, err := hooks.DispatchToolPostCall(ctx, ToolPostCallPayload{PayloadBase: PayloadBase{Event: HookToolPostCall}}) + return err + }, + }, + { + name: "Should dispatch tool.post_error without hooks", + run: func(ctx context.Context, hooks *Hooks) error { + _, err := hooks.DispatchToolPostError(ctx, ToolPostErrorPayload{PayloadBase: PayloadBase{Event: HookToolPostError}}) + return err + }, + }, + { + name: "Should dispatch permission.request without hooks", + run: func(ctx context.Context, hooks *Hooks) error { + _, err := hooks.DispatchPermissionRequest(ctx, PermissionRequestPayload{PayloadBase: PayloadBase{Event: HookPermissionRequest}}) + return err + }, + }, + { + name: "Should dispatch permission.resolved without hooks", + run: func(ctx context.Context, hooks *Hooks) error { + _, err := hooks.DispatchPermissionResolved(ctx, PermissionResolvedPayload{PayloadBase: PayloadBase{Event: HookPermissionResolved}}) + return err + }, + }, + { + name: "Should dispatch permission.denied without hooks", + run: func(ctx context.Context, hooks *Hooks) error { + _, err := hooks.DispatchPermissionDenied(ctx, PermissionDeniedPayload{PayloadBase: PayloadBase{Event: HookPermissionDenied}}) + return err + }, + }, + { + name: "Should dispatch context.pre_compact without hooks", + run: func(ctx context.Context, hooks *Hooks) error { + _, err := hooks.DispatchContextPreCompact(ctx, ContextPreCompactPayload{PayloadBase: PayloadBase{Event: HookContextPreCompact}}) + return err + }, + }, + { + name: "Should dispatch context.post_compact without hooks", + run: func(ctx context.Context, hooks *Hooks) error { + _, err := hooks.DispatchContextPostCompact(ctx, ContextPostCompactPayload{PayloadBase: PayloadBase{Event: HookContextPostCompact}}) + return err + }, + }, + } + + for _, tc := range testCases { + tc := tc + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + + hooks := newTestHooks(t) + if err := tc.run(t.Context(), hooks); err != nil { + t.Fatalf("%s: %v", tc.name, err) + } + }) + } +} + +func TestDispatchInputPreSubmitAppliesMatchingHooksInOrder(t *testing.T) { + t.Parallel() + + var seen []string + hooks := newTestHooks( + t, + WithNativeDeclarations([]HookDecl{ + { + Name: "append-a", + Event: HookInputPreSubmit, + Mode: HookModeSync, + Priority: 200, + PrioritySet: true, + ExecutorKind: HookExecutorNative, + }, + { + Name: "append-b", + Event: HookInputPreSubmit, + Mode: HookModeSync, + Priority: 100, + PrioritySet: true, + ExecutorKind: HookExecutorNative, + }, + }), + WithExecutorResolver(testExecutorResolver(map[string]Executor{ + "append-a": NewTypedNativeExecutor(func(_ context.Context, _ RegisteredHook, payload InputPreSubmitPayload) (InputPreSubmitPatch, error) { + seen = append(seen, payload.Message) + msg := payload.Message + "a" + return InputPreSubmitPatch{Message: &msg}, nil + }), + "append-b": NewTypedNativeExecutor(func(_ context.Context, _ RegisteredHook, payload InputPreSubmitPayload) (InputPreSubmitPatch, error) { + seen = append(seen, payload.Message) + msg := payload.Message + "b" + return InputPreSubmitPatch{Message: &msg}, nil + }), + })), + ) + + if err := hooks.Rebuild(t.Context()); err != nil { + t.Fatalf("Rebuild() error = %v, want nil", err) + } + + result, err := hooks.DispatchInputPreSubmit(t.Context(), InputPreSubmitPayload{ + PayloadBase: PayloadBase{Event: HookInputPreSubmit}, + Message: "seed-", + }) + if err != nil { + t.Fatalf("DispatchInputPreSubmit() error = %v, want nil", err) + } + if result.Message != "seed-ab" { + t.Fatalf("result.Message = %q, want %q", result.Message, "seed-ab") + } + if got, want := len(seen), 2; got != want { + t.Fatalf("len(seen) = %d, want %d", got, want) + } + if seen[0] != "seed-" || seen[1] != "seed-a" { + t.Fatalf("seen = %#v, want [seed- seed-a]", seen) + } +} + +func TestDispatchSessionPreCreateAppliesPatch(t *testing.T) { + t.Parallel() + + hooks := newTestHooks( + t, + WithNativeDeclarations([]HookDecl{ + { + Name: "session-precreate", + Event: HookSessionPreCreate, + Mode: HookModeSync, + ExecutorKind: HookExecutorNative, + Matcher: HookMatcher{AgentName: "codex"}, + }, + }), + WithExecutorResolver(testExecutorResolver(map[string]Executor{ + "session-precreate": NewTypedNativeExecutor(func(_ context.Context, _ RegisteredHook, _ SessionPreCreatePayload) (SessionCreatePatch, error) { + name := "renamed" + workspace := "/tmp/next" + return SessionCreatePatch{ + SessionName: &name, + Workspace: &workspace, + }, nil + }), + })), + ) + + if err := hooks.Rebuild(t.Context()); err != nil { + t.Fatalf("Rebuild() error = %v, want nil", err) + } + + result, err := hooks.DispatchSessionPreCreate(t.Context(), SessionPreCreatePayload{ + PayloadBase: PayloadBase{Event: HookSessionPreCreate}, + SessionContext: SessionContext{ + AgentName: "codex", + SessionName: "old", + Workspace: "/tmp/old", + }, + }) + if err != nil { + t.Fatalf("DispatchSessionPreCreate() error = %v, want nil", err) + } + if result.SessionName != "renamed" { + t.Fatalf("result.SessionName = %q, want %q", result.SessionName, "renamed") + } + if result.Workspace != "/tmp/next" { + t.Fatalf("result.Workspace = %q, want %q", result.Workspace, "/tmp/next") + } +} + +func TestDispatchPromptPostAssembleAppliesPatch(t *testing.T) { + t.Parallel() + + hooks := newTestHooks( + t, + WithNativeDeclarations([]HookDecl{ + { + Name: "prompt-post-assemble", + Event: HookPromptPostAssemble, + Mode: HookModeSync, + ExecutorKind: HookExecutorNative, + Matcher: HookMatcher{InputClass: "chat"}, + }, + }), + WithExecutorResolver(testExecutorResolver(map[string]Executor{ + "prompt-post-assemble": NewTypedNativeExecutor(func(_ context.Context, _ RegisteredHook, _ PromptPayload) (PromptPatch, error) { + prompt := "patched" + return PromptPatch{ + Prompt: &prompt, + ContextBlocks: []ContextBlock{{Kind: "note", Text: "ctx", Metadata: map[string]string{"k": "v"}}}, + }, nil + }), + })), + ) + + if err := hooks.Rebuild(t.Context()); err != nil { + t.Fatalf("Rebuild() error = %v, want nil", err) + } + + result, err := hooks.DispatchPromptPostAssemble(t.Context(), PromptPayload{ + PayloadBase: PayloadBase{Event: HookPromptPostAssemble}, + InputClass: "chat", + Prompt: "original", + }) + if err != nil { + t.Fatalf("DispatchPromptPostAssemble() error = %v, want nil", err) + } + if result.Prompt != "patched" { + t.Fatalf("result.Prompt = %q, want %q", result.Prompt, "patched") + } + if got := len(result.ContextBlocks); got != 1 { + t.Fatalf("len(result.ContextBlocks) = %d, want 1", got) + } + if result.ContextBlocks[0].Metadata["k"] != "v" { + t.Fatalf("metadata = %#v, want key k", result.ContextBlocks[0].Metadata) + } +} + +func TestDispatchEventPreRecordRunsAsyncHook(t *testing.T) { + t.Parallel() + + called := make(chan string, 1) + hooks := newTestHooks( + t, + WithNativeDeclarations([]HookDecl{ + { + Name: "event-observer", + Event: HookEventPreRecord, + Mode: HookModeAsync, + ExecutorKind: HookExecutorNative, + Matcher: HookMatcher{ACPEventType: "agent_message"}, + }, + }), + WithExecutorResolver(testExecutorResolver(map[string]Executor{ + "event-observer": NewTypedNativeExecutor(func(_ context.Context, _ RegisteredHook, payload EventPreRecordPayload) (EventPreRecordPatch, error) { + called <- payload.RecordType + return EventPreRecordPatch{}, nil + }), + })), + ) + + if err := hooks.Rebuild(t.Context()); err != nil { + t.Fatalf("Rebuild() error = %v, want nil", err) + } + + if _, err := hooks.DispatchEventPreRecord(t.Context(), EventPreRecordPayload{ + PayloadBase: PayloadBase{Event: HookEventPreRecord}, + RecordType: "agent_message", + }); err != nil { + t.Fatalf("DispatchEventPreRecord() error = %v, want nil", err) + } + + select { + case got := <-called: + if got != "agent_message" { + t.Fatalf("async event payload = %q, want %q", got, "agent_message") + } + case <-time.After(time.Second): + t.Fatal("async event hook was not called") + } +} + +func TestDispatchInputPreSubmitSkipsAsyncHooksWhenSyncPhaseDoesNotSucceed(t *testing.T) { + t.Parallel() + + testCases := []struct { + name string + required bool + syncExecutor func(context.Context, RegisteredHook, InputPreSubmitPayload) (InputPreSubmitPatch, error) + wantErr string + }{ + { + name: "Should skip async hooks after a sync deny", + syncExecutor: func(_ context.Context, _ RegisteredHook, _ InputPreSubmitPayload) (InputPreSubmitPatch, error) { + return InputPreSubmitPatch{ + ControlPatch: ControlPatch{Deny: true, DenyReason: "blocked"}, + }, nil + }, + wantErr: "denied", + }, + { + name: "Should skip async hooks after a sync failure", + required: true, + syncExecutor: func(_ context.Context, _ RegisteredHook, _ InputPreSubmitPayload) (InputPreSubmitPatch, error) { + return InputPreSubmitPatch{}, errors.New("sync hook failed") + }, + wantErr: "sync hook failed", + }, + } + + for _, tc := range testCases { + tc := tc + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + + asyncCalled := make(chan struct{}, 1) + hooks := newTestHooks( + t, + WithNativeDeclarations([]HookDecl{ + { + Name: "sync-input", + Event: HookInputPreSubmit, + Mode: HookModeSync, + Required: tc.required, + ExecutorKind: HookExecutorNative, + }, + { + Name: "async-input", + Event: HookInputPreSubmit, + Mode: HookModeAsync, + ExecutorKind: HookExecutorNative, + }, + }), + WithExecutorResolver(testExecutorResolver(map[string]Executor{ + "sync-input": NewTypedNativeExecutor(tc.syncExecutor), + "async-input": NewTypedNativeExecutor(func(_ context.Context, _ RegisteredHook, _ InputPreSubmitPayload) (InputPreSubmitPatch, error) { + asyncCalled <- struct{}{} + return InputPreSubmitPatch{}, nil + }), + })), + ) + + if err := hooks.Rebuild(t.Context()); err != nil { + t.Fatalf("Rebuild() error = %v, want nil", err) + } + + _, err := hooks.DispatchInputPreSubmit(t.Context(), InputPreSubmitPayload{ + PayloadBase: PayloadBase{Event: HookInputPreSubmit}, + Message: "seed", + }) + if err == nil || !strings.Contains(err.Error(), tc.wantErr) { + t.Fatalf("DispatchInputPreSubmit() error = %v, want detail %q", err, tc.wantErr) + } + + select { + case <-asyncCalled: + t.Fatal("async hook ran after sync phase did not succeed") + case <-time.After(100 * time.Millisecond): + } + }) + } +} + +func TestDispatchAgentHooksApplyPatches(t *testing.T) { + t.Parallel() + + spawned := make(chan struct{}, 1) + hooks := newTestHooks( + t, + WithNativeDeclarations([]HookDecl{ + { + Name: "agent-prestart", + Event: HookAgentPreStart, + Mode: HookModeSync, + ExecutorKind: HookExecutorNative, + Matcher: HookMatcher{AgentName: "codex"}, + }, + { + Name: "agent-spawned", + Event: HookAgentSpawned, + Mode: HookModeSync, + ExecutorKind: HookExecutorNative, + Matcher: HookMatcher{AgentName: "codex"}, + }, + }), + WithExecutorResolver(testExecutorResolver(map[string]Executor{ + "agent-prestart": NewTypedNativeExecutor(func(_ context.Context, _ RegisteredHook, _ AgentPreStartPayload) (AgentStartPatch, error) { + command := "/bin/next" + cwd := "/tmp/next" + return AgentStartPatch{ + Command: &command, + Args: []string{"--next"}, + Cwd: &cwd, + }, nil + }), + "agent-spawned": NewTypedNativeExecutor(func(_ context.Context, _ RegisteredHook, _ AgentSpawnedPayload) (AgentSpawnedPatch, error) { + spawned <- struct{}{} + return AgentSpawnedPatch{}, nil + }), + })), + ) + + if err := hooks.Rebuild(t.Context()); err != nil { + t.Fatalf("Rebuild() error = %v, want nil", err) + } + + preStart, err := hooks.DispatchAgentPreStart(t.Context(), AgentPreStartPayload{ + PayloadBase: PayloadBase{Event: HookAgentPreStart}, + SessionContext: SessionContext{AgentName: "codex"}, + Command: "/bin/original", + Cwd: "/tmp/original", + }) + if err != nil { + t.Fatalf("DispatchAgentPreStart() error = %v, want nil", err) + } + if preStart.Command != "/bin/next" { + t.Fatalf("preStart.Command = %q, want %q", preStart.Command, "/bin/next") + } + if preStart.Cwd != "/tmp/next" { + t.Fatalf("preStart.Cwd = %q, want %q", preStart.Cwd, "/tmp/next") + } + if got := len(preStart.Args); got != 1 { + t.Fatalf("len(preStart.Args) = %d, want 1", got) + } + + if _, err := hooks.DispatchAgentSpawned(t.Context(), AgentSpawnedPayload{ + PayloadBase: PayloadBase{Event: HookAgentSpawned}, + SessionContext: SessionContext{AgentName: "codex"}, + }); err != nil { + t.Fatalf("DispatchAgentSpawned() error = %v, want nil", err) + } + + select { + case <-spawned: + case <-time.After(time.Second): + t.Fatal("spawned hook was not called") + } +} + +func TestDispatchTurnAndMessageHooksApplyPatches(t *testing.T) { + t.Parallel() + + turnCalled := make(chan struct{}, 1) + hooks := newTestHooks( + t, + WithNativeDeclarations([]HookDecl{ + { + Name: "turn-start", + Event: HookTurnStart, + Mode: HookModeSync, + ExecutorKind: HookExecutorNative, + Matcher: HookMatcher{InputClass: "chat"}, + }, + { + Name: "message-start", + Event: HookMessageStart, + Mode: HookModeSync, + ExecutorKind: HookExecutorNative, + Matcher: HookMatcher{MessageRole: "assistant"}, + }, + }), + WithExecutorResolver(testExecutorResolver(map[string]Executor{ + "turn-start": NewTypedNativeExecutor(func(_ context.Context, _ RegisteredHook, _ TurnStartPayload) (TurnStartPatch, error) { + turnCalled <- struct{}{} + return TurnStartPatch{}, nil + }), + "message-start": NewTypedNativeExecutor(func(_ context.Context, _ RegisteredHook, _ MessageStartPayload) (MessageStartPatch, error) { + role := "tool" + delta := "replacement" + text := "patched" + return MessageStartPatch{ + Role: &role, + DeltaType: &delta, + Text: &text, + }, nil + }), + })), + ) + + if err := hooks.Rebuild(t.Context()); err != nil { + t.Fatalf("Rebuild() error = %v, want nil", err) + } + + if _, err := hooks.DispatchTurnStart(t.Context(), TurnStartPayload{ + PayloadBase: PayloadBase{Event: HookTurnStart}, + InputClass: "chat", + }); err != nil { + t.Fatalf("DispatchTurnStart() error = %v, want nil", err) + } + select { + case <-turnCalled: + case <-time.After(time.Second): + t.Fatal("turn-start hook was not called") + } + + result, err := hooks.DispatchMessageStart(t.Context(), MessageStartPayload{ + PayloadBase: PayloadBase{Event: HookMessageStart}, + Role: "assistant", + }) + if err != nil { + t.Fatalf("DispatchMessageStart() error = %v, want nil", err) + } + if result.Role != "tool" || result.DeltaType != "replacement" || result.Text != "patched" { + t.Fatalf("message result = %#v, want patched role/delta/text", result) + } +} + +func TestDispatchToolHooksApplyPatches(t *testing.T) { + t.Parallel() + + hooks := newTestHooks( + t, + WithNativeDeclarations([]HookDecl{ + { + Name: "tool-pre", + Event: HookToolPreCall, + Mode: HookModeSync, + ExecutorKind: HookExecutorNative, + Matcher: HookMatcher{ToolName: "read"}, + }, + { + Name: "tool-post", + Event: HookToolPostCall, + Mode: HookModeSync, + ExecutorKind: HookExecutorNative, + Matcher: HookMatcher{ToolName: "read"}, + }, + { + Name: "tool-error", + Event: HookToolPostError, + Mode: HookModeSync, + ExecutorKind: HookExecutorNative, + Matcher: HookMatcher{ToolName: "read"}, + }, + }), + WithExecutorResolver(testExecutorResolver(map[string]Executor{ + "tool-pre": NewTypedNativeExecutor(func(_ context.Context, _ RegisteredHook, _ ToolPreCallPayload) (ToolCallPatch, error) { + name := "write" + namespace := "fs" + readOnly := false + return ToolCallPatch{ + ToolName: &name, + ToolNamespace: &namespace, + ReadOnly: &readOnly, + ToolInput: []byte(`{"patched":true}`), + }, nil + }), + "tool-post": NewTypedNativeExecutor(func(_ context.Context, _ RegisteredHook, _ ToolPostCallPayload) (ToolResultPatch, error) { + title := "patched-result" + return ToolResultPatch{ + Title: &title, + ToolResult: []byte(`{"ok":true}`), + }, nil + }), + "tool-error": NewTypedNativeExecutor(func(_ context.Context, _ RegisteredHook, _ ToolPostErrorPayload) (ToolPostErrorPatch, error) { + title := "patched-error" + errText := "patched failure" + return ToolPostErrorPatch{ + Title: &title, + Error: &errText, + }, nil + }), + })), + ) + + if err := hooks.Rebuild(t.Context()); err != nil { + t.Fatalf("Rebuild() error = %v, want nil", err) + } + + pre, err := hooks.DispatchToolPreCall(t.Context(), ToolPreCallPayload{ + PayloadBase: PayloadBase{Event: HookToolPreCall}, + ToolCallRef: ToolCallRef{ToolName: "read"}, + }) + if err != nil { + t.Fatalf("DispatchToolPreCall() error = %v, want nil", err) + } + if pre.ToolName != "write" || pre.ToolNamespace != "fs" || pre.ReadOnly { + t.Fatalf("pre = %#v, want patched tool identity", pre) + } + if string(pre.ToolInput) != `{"patched":true}` { + t.Fatalf("pre.ToolInput = %s, want patched json", pre.ToolInput) + } + + post, err := hooks.DispatchToolPostCall(t.Context(), ToolPostCallPayload{ + PayloadBase: PayloadBase{Event: HookToolPostCall}, + ToolCallRef: ToolCallRef{ToolName: "read"}, + }) + if err != nil { + t.Fatalf("DispatchToolPostCall() error = %v, want nil", err) + } + if post.Title != "patched-result" || string(post.ToolResult) != `{"ok":true}` { + t.Fatalf("post = %#v, want patched title/result", post) + } + + postErr, err := hooks.DispatchToolPostError(t.Context(), ToolPostErrorPayload{ + PayloadBase: PayloadBase{Event: HookToolPostError}, + ToolCallRef: ToolCallRef{ToolName: "read"}, + }) + if err != nil { + t.Fatalf("DispatchToolPostError() error = %v, want nil", err) + } + if postErr.Title != "patched-error" || postErr.Error != "patched failure" { + t.Fatalf("postErr = %#v, want patched title/error", postErr) + } +} + +func TestDispatchPermissionAndContextHooksApplyPatches(t *testing.T) { + t.Parallel() + + resolvedCalled := make(chan struct{}, 1) + deniedCalled := make(chan struct{}, 1) + hooks := newTestHooks( + t, + WithNativeDeclarations([]HookDecl{ + { + Name: "permission-request", + Event: HookPermissionRequest, + Mode: HookModeSync, + ExecutorKind: HookExecutorNative, + Matcher: HookMatcher{DecisionClass: "tool"}, + }, + { + Name: "permission-resolved", + Event: HookPermissionResolved, + Mode: HookModeAsync, + ExecutorKind: HookExecutorNative, + Matcher: HookMatcher{DecisionClass: "tool"}, + }, + { + Name: "permission-denied", + Event: HookPermissionDenied, + Mode: HookModeAsync, + ExecutorKind: HookExecutorNative, + Matcher: HookMatcher{DecisionClass: "tool"}, + }, + { + Name: "context-pre", + Event: HookContextPreCompact, + Mode: HookModeSync, + ExecutorKind: HookExecutorNative, + Matcher: HookMatcher{CompactionReason: "token_limit"}, + }, + }), + WithExecutorResolver(testExecutorResolver(map[string]Executor{ + "permission-request": NewTypedNativeExecutor(func(_ context.Context, _ RegisteredHook, _ PermissionRequestPayload) (PermissionRequestPatch, error) { + decision := "allow-once" + decisionClass := "tool-patched" + return PermissionRequestPatch{ + Decision: &decision, + DecisionClass: &decisionClass, + }, nil + }), + "permission-resolved": NewTypedNativeExecutor(func(_ context.Context, _ RegisteredHook, _ PermissionResolvedPayload) (PermissionResolvedPatch, error) { + resolvedCalled <- struct{}{} + return PermissionResolvedPatch{}, nil + }), + "permission-denied": NewTypedNativeExecutor(func(_ context.Context, _ RegisteredHook, _ PermissionDeniedPayload) (PermissionDeniedPatch, error) { + deniedCalled <- struct{}{} + return PermissionDeniedPatch{}, nil + }), + "context-pre": NewTypedNativeExecutor(func(_ context.Context, _ RegisteredHook, _ ContextPreCompactPayload) (ContextPreCompactPatch, error) { + reason := "manual" + strategy := "summarize" + return ContextPreCompactPatch{ + Reason: &reason, + Strategy: &strategy, + ContextBlocks: []ContextBlock{{Kind: "summary", Text: "patched"}}, + }, nil + }), + })), + ) + + if err := hooks.Rebuild(t.Context()); err != nil { + t.Fatalf("Rebuild() error = %v, want nil", err) + } + + permission, err := hooks.DispatchPermissionRequest(t.Context(), PermissionRequestPayload{ + PayloadBase: PayloadBase{Event: HookPermissionRequest}, + DecisionClass: "tool", + Decision: "allow", + SessionContext: SessionContext{}, + }) + if err != nil { + t.Fatalf("DispatchPermissionRequest() error = %v, want nil", err) + } + if permission.Decision != "allow-once" || permission.DecisionClass != "tool-patched" { + t.Fatalf("permission = %#v, want patched decision fields", permission) + } + + if _, err := hooks.DispatchPermissionResolved(t.Context(), PermissionResolvedPayload{ + PayloadBase: PayloadBase{Event: HookPermissionResolved}, + DecisionClass: "tool", + }); err != nil { + t.Fatalf("DispatchPermissionResolved() error = %v, want nil", err) + } + if _, err := hooks.DispatchPermissionDenied(t.Context(), PermissionDeniedPayload{ + PayloadBase: PayloadBase{Event: HookPermissionDenied}, + DecisionClass: "tool", + }); err != nil { + t.Fatalf("DispatchPermissionDenied() error = %v, want nil", err) + } + + select { + case <-resolvedCalled: + case <-time.After(time.Second): + t.Fatal("permission-resolved async hook was not called") + } + select { + case <-deniedCalled: + case <-time.After(time.Second): + t.Fatal("permission-denied async hook was not called") + } + + contextPayload, err := hooks.DispatchContextPreCompact(t.Context(), ContextPreCompactPayload{ + PayloadBase: PayloadBase{Event: HookContextPreCompact}, + Reason: "token_limit", + }) + if err != nil { + t.Fatalf("DispatchContextPreCompact() error = %v, want nil", err) + } + if contextPayload.Reason != "manual" || contextPayload.Strategy != "summarize" { + t.Fatalf("contextPayload = %#v, want patched reason/strategy", contextPayload) + } + if got := len(contextPayload.ContextBlocks); got != 1 { + t.Fatalf("len(contextPayload.ContextBlocks) = %d, want 1", got) + } +} + +func TestHooksDispatchSessionPostCreate(t *testing.T) { + t.Parallel() + + called := make(chan SessionLifecyclePayload, 1) + hooks := newTestHooks( + t, + WithNativeDeclarations([]HookDecl{ + { + Name: "observe-create", + Event: HookSessionPostCreate, + Mode: HookModeSync, + ExecutorKind: HookExecutorNative, + }, + }), + WithExecutorResolver(testExecutorResolver(map[string]Executor{ + "observe-create": NewTypedNativeExecutor(func(_ context.Context, _ RegisteredHook, payload SessionLifecyclePayload) (SessionPostCreatePatch, error) { + called <- payload + return SessionPostCreatePatch{}, nil + }), + })), + ) + + if err := hooks.Rebuild(t.Context()); err != nil { + t.Fatalf("Rebuild() error = %v, want nil", err) + } + + _, err := hooks.DispatchSessionPostCreate(t.Context(), SessionPostCreatePayload{ + PayloadBase: PayloadBase{ + Event: HookSessionPostCreate, + Timestamp: time.Unix(123, 0).UTC(), + }, + SessionContext: SessionContext{ + SessionID: "sess-created", + SessionName: "demo", + AgentName: "codex", + WorkspaceID: "ws-1", + Workspace: "/tmp/ws", + SessionType: "user", + State: "active", + }, + }) + if err != nil { + t.Fatalf("DispatchSessionPostCreate() error = %v, want nil", err) + } + + select { + case payload := <-called: + if payload.Event != HookSessionPostCreate { + t.Fatalf("payload.Event = %q, want %q", payload.Event, HookSessionPostCreate) + } + if payload.SessionID != "sess-created" { + t.Fatalf("payload.SessionID = %q, want %q", payload.SessionID, "sess-created") + } + if payload.Timestamp != time.Unix(123, 0).UTC() { + t.Fatalf("payload.Timestamp = %s, want fixed clock", payload.Timestamp) + } + case <-time.After(time.Second): + t.Fatal("post-create hook was not called") + } +} + +func TestHooksDispatchSessionPostStop(t *testing.T) { + t.Parallel() + + called := make(chan SessionLifecyclePayload, 1) + hooks := newTestHooks( + t, + WithNativeDeclarations([]HookDecl{ + { + Name: "observe-stop", + Event: HookSessionPostStop, + Mode: HookModeSync, + ExecutorKind: HookExecutorNative, + }, + }), + WithExecutorResolver(testExecutorResolver(map[string]Executor{ + "observe-stop": NewTypedNativeExecutor(func(_ context.Context, _ RegisteredHook, payload SessionLifecyclePayload) (SessionPostStopPatch, error) { + called <- payload + return SessionPostStopPatch{}, nil + }), + })), + ) + + if err := hooks.Rebuild(t.Context()); err != nil { + t.Fatalf("Rebuild() error = %v, want nil", err) + } + + _, err := hooks.DispatchSessionPostStop(t.Context(), SessionPostStopPayload{ + PayloadBase: PayloadBase{Event: HookSessionPostStop}, + SessionContext: SessionContext{ + SessionID: "sess-stopped", + SessionName: "demo", + AgentName: "codex", + SessionType: "system", + State: "stopped", + }, + }) + if err != nil { + t.Fatalf("DispatchSessionPostStop() error = %v, want nil", err) + } + + select { + case payload := <-called: + if payload.Event != HookSessionPostStop { + t.Fatalf("payload.Event = %q, want %q", payload.Event, HookSessionPostStop) + } + if payload.State != "stopped" { + t.Fatalf("payload.State = %q, want %q", payload.State, "stopped") + } + case <-time.After(time.Second): + t.Fatal("post-stop hook was not called") + } +} + +func TestHooksCloseDrainsAsyncPool(t *testing.T) { + t.Parallel() + + started := make(chan struct{}) + release := make(chan struct{}) + done := make(chan struct{}) + + hooks := newTestHooks( + t, + WithNativeDeclarations([]HookDecl{ + { + Name: "async-input", + Event: HookInputPreSubmit, + Mode: HookModeAsync, + ExecutorKind: HookExecutorNative, + }, + }), + WithExecutorResolver(testExecutorResolver(map[string]Executor{ + "async-input": NewTypedNativeExecutor(func(_ context.Context, _ RegisteredHook, _ InputPreSubmitPayload) (InputPreSubmitPatch, error) { + close(started) + <-release + close(done) + return InputPreSubmitPatch{}, nil + }), + })), + ) + + if err := hooks.Rebuild(t.Context()); err != nil { + t.Fatalf("Rebuild() error = %v, want nil", err) + } + + if _, err := hooks.DispatchInputPreSubmit(t.Context(), InputPreSubmitPayload{ + PayloadBase: PayloadBase{Event: HookInputPreSubmit}, + Message: "seed", + }); err != nil { + t.Fatalf("DispatchInputPreSubmit() error = %v, want nil", err) + } + + select { + case <-started: + case <-time.After(time.Second): + t.Fatal("async hook did not start") + } + + closed := make(chan struct{}) + go func() { + hooks.Close() + close(closed) + }() + + select { + case <-closed: + t.Fatal("Close() returned before async hook completed") + case <-time.After(50 * time.Millisecond): + } + + close(release) + + select { + case <-done: + case <-time.After(time.Second): + t.Fatal("async hook did not finish") + } + + select { + case <-closed: + case <-time.After(time.Second): + t.Fatal("Close() did not return after async hook completion") + } +} + +func TestNewHooksAppliesOptionsAndDefaultResolver(t *testing.T) { + t.Parallel() + + logger := discardPoolLogger() + hooks := newTestHooks( + t, + WithLogger(logger), + WithAsyncWorkerCount(2), + WithAsyncQueueCapacity(3), + WithAsyncDrainTimeout(4*time.Second), + ) + + if hooks.logger != logger { + t.Fatal("logger option was not applied") + } + if hooks.pool.workerCount != 2 { + t.Fatalf("workerCount = %d, want 2", hooks.pool.workerCount) + } + if hooks.pool.queueCapacity != 3 { + t.Fatalf("queueCapacity = %d, want 3", hooks.pool.queueCapacity) + } + if hooks.pool.drainTimeout != 4*time.Second { + t.Fatalf("drainTimeout = %s, want 4s", hooks.pool.drainTimeout) + } + if hooks.Version() != 0 { + t.Fatalf("Version() = %d, want 0", hooks.Version()) + } + + executor, err := defaultExecutorResolver(HookDecl{ + Name: "wasm-stub", + ExecutorKind: HookExecutorWASM, + }) + if err != nil { + t.Fatalf("defaultExecutorResolver(wasm) error = %v, want nil", err) + } + if executor.Kind() != HookExecutorWASM { + t.Fatalf("executor.Kind() = %q, want %q", executor.Kind(), HookExecutorWASM) + } + if _, err := defaultExecutorResolver(HookDecl{ + Name: "native-missing", + ExecutorKind: HookExecutorNative, + }); err == nil { + t.Fatal("defaultExecutorResolver(native) error = nil, want non-nil") + } + + hooks.OnAgentEvent(t.Context(), "session-id", struct{ Type string }{Type: "done"}) +} + +func newTestHooks(t *testing.T, opts ...Option) *Hooks { + t.Helper() + + hooks := NewHooks(opts...) + t.Cleanup(hooks.Close) + return hooks +} + +func testExecutorResolver(native map[string]Executor) ExecutorResolver { + return func(decl HookDecl) (Executor, error) { + if decl.ExecutorKind == HookExecutorNative { + executor := native[decl.Name] + if executor == nil { + return nil, errors.New("missing native executor") + } + return executor, nil + } + return defaultExecutorResolver(decl) + } +} + +func testSubprocessDecl(name string, event HookEvent) HookDecl { + return HookDecl{ + Name: name, + Event: event, + Mode: HookModeAsync, + Command: "/bin/sh", + Args: []string{"-c", "printf '{}'"}, + } +} diff --git a/internal/hooks/introspection.go b/internal/hooks/introspection.go new file mode 100644 index 000000000..5f5d70156 --- /dev/null +++ b/internal/hooks/introspection.go @@ -0,0 +1,175 @@ +package hooks + +import "time" + +// CatalogFilter narrows the resolved hook catalog for one workspace/agent view. +type CatalogFilter struct { + WorkspaceID string + WorkspaceRoot string + AgentName string + Event HookEvent + Source *HookSource + Mode HookMode +} + +// CatalogEntry describes one resolved hook in pipeline order. +type CatalogEntry struct { + Order int + Name string + Event HookEvent + Source HookSource + SkillSource HookSkillSource + Mode HookMode + Required bool + Priority int + Timeout time.Duration + ExecutorKind HookExecutorKind + Matcher HookMatcher + Metadata map[string]string +} + +// EventFilter narrows the supported hook taxonomy for introspection APIs. +type EventFilter struct { + Family HookEventFamily + SyncOnly bool +} + +// EventDescriptor describes one supported hook event for introspection APIs. +type EventDescriptor struct { + Event HookEvent + Family HookEventFamily + SyncEligible bool + PayloadSchema string + PatchSchema string +} + +var hookEventDescriptors = map[HookEvent]EventDescriptor{ + HookSessionPreCreate: {Event: HookSessionPreCreate, Family: HookEventFamilySession, SyncEligible: true, PayloadSchema: "SessionPreCreatePayload", PatchSchema: "SessionCreatePatch"}, + HookSessionPostCreate: {Event: HookSessionPostCreate, Family: HookEventFamilySession, SyncEligible: true, PayloadSchema: "SessionPostCreatePayload", PatchSchema: "SessionPostCreatePatch"}, + HookSessionPreResume: {Event: HookSessionPreResume, Family: HookEventFamilySession, SyncEligible: true, PayloadSchema: "SessionPreResumePayload", PatchSchema: "SessionPreResumePatch"}, + HookSessionPostResume: {Event: HookSessionPostResume, Family: HookEventFamilySession, SyncEligible: true, PayloadSchema: "SessionPostResumePayload", PatchSchema: "SessionPostResumePatch"}, + HookSessionPreStop: {Event: HookSessionPreStop, Family: HookEventFamilySession, SyncEligible: true, PayloadSchema: "SessionPreStopPayload", PatchSchema: "SessionPreStopPatch"}, + HookSessionPostStop: {Event: HookSessionPostStop, Family: HookEventFamilySession, SyncEligible: true, PayloadSchema: "SessionPostStopPayload", PatchSchema: "SessionPostStopPatch"}, + HookInputPreSubmit: {Event: HookInputPreSubmit, Family: HookEventFamilyInput, SyncEligible: true, PayloadSchema: "InputPreSubmitPayload", PatchSchema: "InputPreSubmitPatch"}, + HookPromptPostAssemble: {Event: HookPromptPostAssemble, Family: HookEventFamilyPrompt, SyncEligible: true, PayloadSchema: "PromptPayload", PatchSchema: "PromptPatch"}, + HookEventPreRecord: {Event: HookEventPreRecord, Family: HookEventFamilyEvent, SyncEligible: false, PayloadSchema: "EventPreRecordPayload", PatchSchema: "EventPreRecordPatch"}, + HookEventPostRecord: {Event: HookEventPostRecord, Family: HookEventFamilyEvent, SyncEligible: false, PayloadSchema: "EventPostRecordPayload", PatchSchema: "EventPostRecordPatch"}, + HookAgentPreStart: {Event: HookAgentPreStart, Family: HookEventFamilyAgent, SyncEligible: true, PayloadSchema: "AgentPreStartPayload", PatchSchema: "AgentStartPatch"}, + HookAgentSpawned: {Event: HookAgentSpawned, Family: HookEventFamilyAgent, SyncEligible: true, PayloadSchema: "AgentSpawnedPayload", PatchSchema: "AgentSpawnedPatch"}, + HookAgentCrashed: {Event: HookAgentCrashed, Family: HookEventFamilyAgent, SyncEligible: true, PayloadSchema: "AgentCrashedPayload", PatchSchema: "AgentCrashedPatch"}, + HookAgentStopped: {Event: HookAgentStopped, Family: HookEventFamilyAgent, SyncEligible: true, PayloadSchema: "AgentStoppedPayload", PatchSchema: "AgentStoppedPatch"}, + HookTurnStart: {Event: HookTurnStart, Family: HookEventFamilyTurn, SyncEligible: true, PayloadSchema: "TurnStartPayload", PatchSchema: "TurnStartPatch"}, + HookTurnEnd: {Event: HookTurnEnd, Family: HookEventFamilyTurn, SyncEligible: true, PayloadSchema: "TurnEndPayload", PatchSchema: "TurnEndPatch"}, + HookMessageStart: {Event: HookMessageStart, Family: HookEventFamilyMessage, SyncEligible: true, PayloadSchema: "MessageStartPayload", PatchSchema: "MessageStartPatch"}, + HookMessageDelta: {Event: HookMessageDelta, Family: HookEventFamilyMessage, SyncEligible: false, PayloadSchema: "MessageDeltaPayload", PatchSchema: "MessageDeltaPatch"}, + HookMessageEnd: {Event: HookMessageEnd, Family: HookEventFamilyMessage, SyncEligible: true, PayloadSchema: "MessageEndPayload", PatchSchema: "MessageEndPatch"}, + HookToolPreCall: {Event: HookToolPreCall, Family: HookEventFamilyTool, SyncEligible: true, PayloadSchema: "ToolPreCallPayload", PatchSchema: "ToolCallPatch"}, + HookToolPostCall: {Event: HookToolPostCall, Family: HookEventFamilyTool, SyncEligible: true, PayloadSchema: "ToolPostCallPayload", PatchSchema: "ToolResultPatch"}, + HookToolPostError: {Event: HookToolPostError, Family: HookEventFamilyTool, SyncEligible: true, PayloadSchema: "ToolPostErrorPayload", PatchSchema: "ToolPostErrorPatch"}, + HookPermissionRequest: {Event: HookPermissionRequest, Family: HookEventFamilyPermission, SyncEligible: true, PayloadSchema: "PermissionRequestPayload", PatchSchema: "PermissionRequestPatch"}, + HookPermissionResolved: {Event: HookPermissionResolved, Family: HookEventFamilyPermission, SyncEligible: false, PayloadSchema: "PermissionResolvedPayload", PatchSchema: "PermissionResolvedPatch"}, + HookPermissionDenied: {Event: HookPermissionDenied, Family: HookEventFamilyPermission, SyncEligible: false, PayloadSchema: "PermissionDeniedPayload", PatchSchema: "PermissionDeniedPatch"}, + HookContextPreCompact: {Event: HookContextPreCompact, Family: HookEventFamilyContext, SyncEligible: true, PayloadSchema: "ContextPreCompactPayload", PatchSchema: "ContextPreCompactPatch"}, + HookContextPostCompact: {Event: HookContextPostCompact, Family: HookEventFamilyContext, SyncEligible: true, PayloadSchema: "ContextPostCompactPayload", PatchSchema: "ContextPostCompactPatch"}, +} + +// Catalog returns the currently resolved hook catalog in deterministic pipeline order. +func (h *Hooks) Catalog(filter CatalogFilter) ([]CatalogEntry, error) { + if h == nil { + return nil, nil + } + + h.mu.RLock() + defer h.mu.RUnlock() + + entries := make([]CatalogEntry, 0) + for _, event := range AllHookEvents() { + order := 0 + for _, hook := range h.snapshot[event] { + if hook == nil || !catalogHookMatchesFilter(*hook, filter) { + continue + } + executorKind := HookExecutorKind("") + if hook.Executor != nil { + executorKind = hook.Executor.Kind() + } + order++ + entries = append(entries, CatalogEntry{ + Order: order, + Name: hook.Name, + Event: hook.Event, + Source: hook.Source, + SkillSource: hook.Decl.SkillSource, + Mode: hook.Mode, + Required: hook.Required, + Priority: hook.Priority, + Timeout: hook.Timeout, + ExecutorKind: executorKind, + Matcher: cloneHookMatcher(hook.Matcher), + Metadata: cloneStringMap(hook.Metadata), + }) + } + } + + return entries, nil +} + +// AllEventDescriptors returns the hook taxonomy metadata in deterministic order. +func AllEventDescriptors() []EventDescriptor { + return FilterEventDescriptors(EventFilter{}) +} + +// FilterEventDescriptors returns the hook taxonomy metadata in deterministic order. +func FilterEventDescriptors(filter EventFilter) []EventDescriptor { + descriptors := make([]EventDescriptor, 0, len(allHookEvents)) + for _, event := range AllHookEvents() { + if descriptor, ok := hookEventDescriptors[event]; ok { + if filter.Family != "" && descriptor.Family != filter.Family { + continue + } + if filter.SyncOnly && !descriptor.SyncEligible { + continue + } + descriptors = append(descriptors, descriptor) + } + } + return descriptors +} + +func catalogHookMatchesFilter(hook ResolvedHook, filter CatalogFilter) bool { + if filter.Event != "" && hook.Event != filter.Event { + return false + } + if filter.Source != nil && hook.Source != *filter.Source { + return false + } + if filter.Mode != "" && hook.Mode != filter.Mode { + return false + } + if !catalogStringMatches(filter.AgentName, hook.Matcher.AgentName) { + return false + } + if !catalogStringMatches(filter.WorkspaceID, hook.Matcher.WorkspaceID) { + return false + } + if !catalogStringMatches(filter.WorkspaceRoot, hook.Matcher.WorkspaceRoot) { + return false + } + return true +} + +func catalogStringMatches(filter string, value string) bool { + if filter == "" || value == "" { + return true + } + return filter == value +} + +func cloneHookMatcher(src HookMatcher) HookMatcher { + cloned := src + if src.ToolReadOnly != nil { + value := *src.ToolReadOnly + cloned.ToolReadOnly = &value + } + return cloned +} diff --git a/internal/hooks/introspection_test.go b/internal/hooks/introspection_test.go new file mode 100644 index 000000000..ece5b9b05 --- /dev/null +++ b/internal/hooks/introspection_test.go @@ -0,0 +1,257 @@ +package hooks + +import ( + "context" + "testing" +) + +func TestHooksCatalogFiltersByWorkspaceAndAgent(t *testing.T) { + t.Parallel() + + readOnly := true + hooks := newTestHooks( + t, + WithConfigDeclarations([]HookDecl{ + { + Name: "matching-session", + Event: HookSessionPostCreate, + Mode: HookModeSync, + Matcher: HookMatcher{ + AgentName: "coder", + WorkspaceID: "ws-alpha", + WorkspaceRoot: "/workspace/alpha", + }, + Command: "/bin/sh", + Args: []string{"-c", "printf '{}'"}, + Metadata: map[string]string{"origin": "test"}, + }, + { + Name: "tool-hook", + Event: HookToolPreCall, + Mode: HookModeSync, + Matcher: HookMatcher{ + ToolReadOnly: &readOnly, + }, + Command: "/bin/sh", + Args: []string{"-c", "printf '{}'"}, + }, + { + Name: "other-workspace", + Event: HookSessionPostCreate, + Mode: HookModeSync, + Matcher: HookMatcher{ + WorkspaceID: "ws-beta", + }, + Command: "/bin/sh", + Args: []string{"-c", "printf '{}'"}, + }, + }), + ) + + if err := hooks.Rebuild(t.Context()); err != nil { + t.Fatalf("Rebuild() error = %v", err) + } + + entries, err := hooks.Catalog(CatalogFilter{ + AgentName: "coder", + WorkspaceID: "ws-alpha", + WorkspaceRoot: "/workspace/alpha", + }) + if err != nil { + t.Fatalf("Catalog() error = %v", err) + } + if got, want := len(entries), 2; got != want { + t.Fatalf("len(entries) = %d, want %d", got, want) + } + if entries[0].Name != "matching-session" { + t.Fatalf("entries[0].Name = %q, want matching-session", entries[0].Name) + } + if entries[0].Metadata["origin"] != "test" { + t.Fatalf("entries[0].Metadata = %#v", entries[0].Metadata) + } + if entries[1].Name != "tool-hook" { + t.Fatalf("entries[1].Name = %q, want tool-hook", entries[1].Name) + } + if entries[1].Matcher.ToolReadOnly == nil || !*entries[1].Matcher.ToolReadOnly { + t.Fatalf("entries[1].Matcher.ToolReadOnly = %#v, want true", entries[1].Matcher.ToolReadOnly) + } +} + +func TestAllEventDescriptorsReturnsFullTaxonomy(t *testing.T) { + t.Parallel() + + descriptors := AllEventDescriptors() + if got, want := len(descriptors), len(AllHookEvents()); got != want { + t.Fatalf("len(descriptors) = %d, want %d", got, want) + } + + byEvent := make(map[HookEvent]EventDescriptor, len(descriptors)) + for _, descriptor := range descriptors { + byEvent[descriptor.Event] = descriptor + } + if descriptor := byEvent[HookMessageDelta]; descriptor.SyncEligible { + t.Fatalf("message.delta SyncEligible = true, want false") + } + if descriptor := byEvent[HookPermissionRequest]; !descriptor.SyncEligible { + t.Fatalf("permission.request SyncEligible = false, want true") + } +} + +func TestHooksCatalogFiltersByEventSourceModeAndExposesExecutorKind(t *testing.T) { + t.Parallel() + + source := HookSourceConfig + hooks := newTestHooks( + t, + WithNativeDeclarations([]HookDecl{{ + Name: "native-tool", + Event: HookToolPreCall, + Mode: HookModeSync, + ExecutorKind: HookExecutorNative, + }}), + WithConfigDeclarations([]HookDecl{ + { + Name: "config-tool-sync", + Event: HookToolPreCall, + Mode: HookModeSync, + Command: "/bin/sh", + Args: []string{"-c", "printf '{}'"}, + }, + { + Name: "config-tool-async", + Event: HookToolPreCall, + Mode: HookModeAsync, + Command: "/bin/sh", + Args: []string{"-c", "printf '{}'"}, + }, + { + Name: "config-session-sync", + Event: HookSessionPostCreate, + Mode: HookModeSync, + Command: "/bin/sh", + Args: []string{"-c", "printf '{}'"}, + }, + }), + WithExecutorResolver(testExecutorResolver(map[string]Executor{ + "native-tool": NewTypedNativeExecutor(func(_ context.Context, _ RegisteredHook, _ ToolPreCallPayload) (ToolCallPatch, error) { + return ToolCallPatch{}, nil + }), + })), + ) + + if err := hooks.Rebuild(t.Context()); err != nil { + t.Fatalf("Rebuild() error = %v", err) + } + + entries, err := hooks.Catalog(CatalogFilter{ + Event: HookToolPreCall, + Source: &source, + Mode: HookModeSync, + }) + if err != nil { + t.Fatalf("Catalog() error = %v", err) + } + if got, want := len(entries), 1; got != want { + t.Fatalf("len(entries) = %d, want %d", got, want) + } + if entries[0].Name != "config-tool-sync" { + t.Fatalf("entries[0].Name = %q, want config-tool-sync", entries[0].Name) + } + if entries[0].ExecutorKind != HookExecutorSubprocess { + t.Fatalf("entries[0].ExecutorKind = %q, want %q", entries[0].ExecutorKind, HookExecutorSubprocess) + } + + nativeEntries, err := hooks.Catalog(CatalogFilter{Event: HookToolPreCall}) + if err != nil { + t.Fatalf("Catalog(native) error = %v", err) + } + if got, want := len(nativeEntries), 3; got != want { + t.Fatalf("len(nativeEntries) = %d, want %d", got, want) + } + if nativeEntries[0].Name != "native-tool" || nativeEntries[0].ExecutorKind != HookExecutorNative { + t.Fatalf("nativeEntries[0] = %#v, want native executor metadata", nativeEntries[0]) + } +} + +func TestFilterEventDescriptorsSupportsFamilyAndSyncOnly(t *testing.T) { + t.Parallel() + + descriptors := FilterEventDescriptors(EventFilter{ + Family: HookEventFamilyTool, + SyncOnly: true, + }) + if len(descriptors) == 0 { + t.Fatal("FilterEventDescriptors() returned no tool descriptors") + } + + for _, descriptor := range descriptors { + if descriptor.Family != HookEventFamilyTool { + t.Fatalf("descriptor.Family = %q, want %q", descriptor.Family, HookEventFamilyTool) + } + if !descriptor.SyncEligible { + t.Fatalf("descriptor.SyncEligible = false for %q, want true", descriptor.Event) + } + } +} + +func TestHookTelemetryHelpersExposeSessionIDAndSink(t *testing.T) { + t.Parallel() + + sink := &captureTelemetrySink{} + hooks := NewHooks(WithTelemetrySink(sink)) + if hooks.telemetrySink != sink { + t.Fatalf("telemetrySink = %#v, want %#v", hooks.telemetrySink, sink) + } + + writer := &captureHookRunWriter{} + ctx := WithHookRunWriter(context.Background(), writer) + if HookRunWriterFromContext(ctx) != writer { + t.Fatal("HookRunWriterFromContext() did not return attached writer") + } + + payload := SessionPostCreatePayload{ + SessionContext: SessionContext{SessionID: "sess-1"}, + } + if got := sessionIDFromPayload(payload); got != "sess-1" { + t.Fatalf("sessionIDFromPayload() = %q, want sess-1", got) + } +} + +func TestHooksCatalogAllowsNilExecutorInSnapshot(t *testing.T) { + t.Parallel() + + hooks := &Hooks{ + snapshot: map[HookEvent][]*ResolvedHook{ + HookToolPreCall: {&ResolvedHook{ + RegisteredHook: RegisteredHook{ + Name: "nil-executor", + Event: HookToolPreCall, + Source: HookSourceConfig, + Mode: HookModeSync, + Priority: 500, + }, + Decl: HookDecl{ + Name: "nil-executor", + Event: HookToolPreCall, + }, + }}, + }, + } + + entries, err := hooks.Catalog(CatalogFilter{Event: HookToolPreCall}) + if err != nil { + t.Fatalf("Catalog() error = %v", err) + } + if got, want := len(entries), 1; got != want { + t.Fatalf("len(entries) = %d, want %d", got, want) + } + if entries[0].ExecutorKind != "" { + t.Fatalf("entries[0].ExecutorKind = %q, want empty string for nil executor", entries[0].ExecutorKind) + } +} + +type captureTelemetrySink struct{} + +func (*captureTelemetrySink) WriteHookRecord(context.Context, string, HookRunRecord) error { + return nil +} diff --git a/internal/hooks/matcher.go b/internal/hooks/matcher.go new file mode 100644 index 000000000..873dc27d6 --- /dev/null +++ b/internal/hooks/matcher.go @@ -0,0 +1,312 @@ +package hooks + +import ( + "fmt" + "path" + "sort" + "strings" +) + +var allowedMatcherFieldsByFamily = map[HookEventFamily]map[string]struct{}{ + HookEventFamilySession: { + "agent_name": {}, + "workspace_id": {}, + "workspace_root": {}, + "session_type": {}, + }, + HookEventFamilyInput: { + "agent_name": {}, + "workspace_id": {}, + "workspace_root": {}, + "input_class": {}, + }, + HookEventFamilyPrompt: { + "agent_name": {}, + "workspace_id": {}, + "workspace_root": {}, + "input_class": {}, + }, + HookEventFamilyEvent: { + "agent_name": {}, + "acp_event_type": {}, + "turn_id": {}, + }, + HookEventFamilyAgent: { + "agent_name": {}, + "workspace_id": {}, + "workspace_root": {}, + }, + HookEventFamilyTurn: { + "agent_name": {}, + "workspace_id": {}, + "workspace_root": {}, + "input_class": {}, + }, + HookEventFamilyTool: { + "tool_name": {}, + "tool_namespace": {}, + "tool_read_only": {}, + }, + HookEventFamilyPermission: { + "tool_name": {}, + "decision_class": {}, + }, + HookEventFamilyMessage: { + "message_role": {}, + "message_delta_type": {}, + }, + HookEventFamilyContext: { + "compaction_reason": {}, + "compaction_strategy": {}, + }, +} + +// ValidateMatcherForEvent ensures only the matcher fields defined for the event +// family are present. +func ValidateMatcherForEvent(event HookEvent, matcher HookMatcher) error { + if err := event.Validate(); err != nil { + return err + } + + fields := matcherFieldNames(matcher) + if len(fields) == 0 { + return nil + } + + allowed := allowedMatcherFieldsByFamily[event.Family()] + invalid := make([]string, 0, len(fields)) + for _, field := range fields { + if _, ok := allowed[field]; ok { + continue + } + invalid = append(invalid, field) + } + if len(invalid) == 0 { + return validateMatcherPatterns(matcher) + } + + sort.Strings(invalid) + return fmt.Errorf("hooks: matcher fields [%s] are not valid for event %q", strings.Join(invalid, ", "), event) +} + +// MatchesSession matches session-family hooks. +func (m HookMatcher) MatchesSession(payload SessionContext) bool { + return m.matchSessionContext(payload, true) +} + +// MatchesInput matches input-family hooks. +func (m HookMatcher) MatchesInput(payload InputPreSubmitPayload) bool { + return m.matchSessionContext(payload.SessionContext, false) && + matchStringField(m.InputClass, payload.InputClass) +} + +// MatchesPrompt matches prompt-family hooks. +func (m HookMatcher) MatchesPrompt(payload PromptPayload) bool { + return m.matchSessionContext(payload.SessionContext, false) && + matchStringField(m.InputClass, payload.InputClass) +} + +// MatchesEvent matches event-record-family hooks. +func (m HookMatcher) MatchesEvent(payload EventRecordPayload) bool { + return matchStringField(m.AgentName, payload.AgentName) && + matchStringField(m.ACPEventType, payload.RecordType) && + matchStringField(m.TurnID, payload.TurnID) +} + +// MatchesAgentPreStart matches pre-start agent hooks. +func (m HookMatcher) MatchesAgentPreStart(payload AgentPreStartPayload) bool { + return m.matchSessionContext(payload.SessionContext, false) +} + +// MatchesAgentLifecycle matches spawned, crashed, and stopped agent hooks. +func (m HookMatcher) MatchesAgentLifecycle(payload AgentLifecyclePayload) bool { + return m.matchSessionContext(payload.SessionContext, false) +} + +// MatchesTurn matches turn-family hooks. +func (m HookMatcher) MatchesTurn(payload TurnPayload) bool { + return m.matchSessionContext(payload.SessionContext, false) && + matchStringField(m.InputClass, payload.InputClass) +} + +// MatchesMessage matches message-family hooks. +func (m HookMatcher) MatchesMessage(payload MessagePayload) bool { + return matchStringField(m.MessageRole, payload.Role) && + matchStringField(m.MessageDeltaType, payload.DeltaType) +} + +// MatchesToolPreCall matches tool pre-call hooks. +func (m HookMatcher) MatchesToolPreCall(payload ToolPreCallPayload) bool { + return m.matchToolCall(payload.ToolCallRef) +} + +// MatchesToolPostCall matches tool post-call hooks. +func (m HookMatcher) MatchesToolPostCall(payload ToolPostCallPayload) bool { + return m.matchToolCall(payload.ToolCallRef) +} + +// MatchesToolPostError matches tool post-error hooks. +func (m HookMatcher) MatchesToolPostError(payload ToolPostErrorPayload) bool { + return m.matchToolCall(payload.ToolCallRef) +} + +// MatchesPermissionRequest matches permission-request hooks. +func (m HookMatcher) MatchesPermissionRequest(payload PermissionRequestPayload) bool { + return m.matchPermission(payload.ToolCall.Kind, payload.DecisionClass) +} + +// MatchesPermissionResolution matches resolved and denied permission hooks. +func (m HookMatcher) MatchesPermissionResolution(payload PermissionResolutionPayload) bool { + return m.matchPermission(payload.ToolCall.Kind, payload.DecisionClass) +} + +// MatchesContextCompact matches context-compaction hooks. +func (m HookMatcher) MatchesContextCompact(payload ContextCompactPayload) bool { + return matchStringField(m.CompactionReason, payload.Reason) && + matchStringField(m.CompactionStrategy, payload.Strategy) +} + +func (m HookMatcher) matchSessionContext(payload SessionContext, includeSessionType bool) bool { + if !matchStringField(m.AgentName, payload.AgentName) { + return false + } + if !matchStringField(m.WorkspaceID, payload.WorkspaceID) { + return false + } + if !matchStringField(m.WorkspaceRoot, payload.Workspace) { + return false + } + if includeSessionType && !matchStringField(m.SessionType, payload.SessionType) { + return false + } + return true +} + +func (m HookMatcher) matchToolCall(payload ToolCallRef) bool { + if !matchStringField(m.ToolName, payload.ToolName) { + return false + } + if !matchStringField(m.ToolNamespace, payload.ToolNamespace) { + return false + } + if m.ToolReadOnly != nil && payload.ReadOnly != *m.ToolReadOnly { + return false + } + return true +} + +func (m HookMatcher) matchPermission(toolName string, decisionClass string) bool { + return matchStringField(m.ToolName, toolName) && + matchStringField(m.DecisionClass, decisionClass) +} + +func normalizeHookMatcher(matcher HookMatcher) HookMatcher { + normalized := HookMatcher{ + AgentName: strings.TrimSpace(matcher.AgentName), + AgentType: strings.TrimSpace(matcher.AgentType), + WorkspaceID: strings.TrimSpace(matcher.WorkspaceID), + WorkspaceRoot: strings.TrimSpace(matcher.WorkspaceRoot), + SessionType: strings.TrimSpace(matcher.SessionType), + InputClass: strings.TrimSpace(matcher.InputClass), + ACPEventType: strings.TrimSpace(matcher.ACPEventType), + TurnID: strings.TrimSpace(matcher.TurnID), + ToolName: strings.TrimSpace(matcher.ToolName), + ToolNamespace: strings.TrimSpace(matcher.ToolNamespace), + DecisionClass: strings.TrimSpace(matcher.DecisionClass), + MessageRole: strings.TrimSpace(matcher.MessageRole), + MessageDeltaType: strings.TrimSpace(matcher.MessageDeltaType), + CompactionReason: strings.TrimSpace(matcher.CompactionReason), + CompactionStrategy: strings.TrimSpace(matcher.CompactionStrategy), + } + if matcher.ToolReadOnly != nil { + value := *matcher.ToolReadOnly + normalized.ToolReadOnly = &value + } + return normalized +} + +func matcherFieldNames(matcher HookMatcher) []string { + fields := make([]string, 0, 16) + + appendIf := func(name string, present bool) { + if present { + fields = append(fields, name) + } + } + + appendIf("agent_name", matcher.AgentName != "") + appendIf("agent_type", matcher.AgentType != "") + appendIf("workspace_id", matcher.WorkspaceID != "") + appendIf("workspace_root", matcher.WorkspaceRoot != "") + appendIf("session_type", matcher.SessionType != "") + appendIf("input_class", matcher.InputClass != "") + appendIf("acp_event_type", matcher.ACPEventType != "") + appendIf("turn_id", matcher.TurnID != "") + appendIf("tool_name", matcher.ToolName != "") + appendIf("tool_namespace", matcher.ToolNamespace != "") + appendIf("tool_read_only", matcher.ToolReadOnly != nil) + appendIf("decision_class", matcher.DecisionClass != "") + appendIf("message_role", matcher.MessageRole != "") + appendIf("message_delta_type", matcher.MessageDeltaType != "") + appendIf("compaction_reason", matcher.CompactionReason != "") + appendIf("compaction_strategy", matcher.CompactionStrategy != "") + + return fields +} + +func validateMatcherPatterns(matcher HookMatcher) error { + patterns := []struct { + field string + pattern string + }{ + {field: "agent_name", pattern: matcher.AgentName}, + {field: "agent_type", pattern: matcher.AgentType}, + {field: "workspace_id", pattern: matcher.WorkspaceID}, + {field: "workspace_root", pattern: matcher.WorkspaceRoot}, + {field: "session_type", pattern: matcher.SessionType}, + {field: "input_class", pattern: matcher.InputClass}, + {field: "acp_event_type", pattern: matcher.ACPEventType}, + {field: "turn_id", pattern: matcher.TurnID}, + {field: "tool_name", pattern: matcher.ToolName}, + {field: "tool_namespace", pattern: matcher.ToolNamespace}, + {field: "decision_class", pattern: matcher.DecisionClass}, + {field: "message_role", pattern: matcher.MessageRole}, + {field: "message_delta_type", pattern: matcher.MessageDeltaType}, + {field: "compaction_reason", pattern: matcher.CompactionReason}, + {field: "compaction_strategy", pattern: matcher.CompactionStrategy}, + } + for _, item := range patterns { + if err := validateMatcherPattern(item.field, item.pattern); err != nil { + return err + } + } + return nil +} + +func validateMatcherPattern(field string, pattern string) error { + pattern = strings.TrimSpace(pattern) + if pattern == "" || !strings.ContainsAny(pattern, "*?[]") { + return nil + } + if _, err := path.Match(pattern, ""); err != nil { + return fmt.Errorf("hooks: matcher.%s pattern %q is invalid: %w", field, pattern, err) + } + return nil +} + +func matchStringField(pattern string, value string) bool { + pattern = strings.TrimSpace(pattern) + if pattern == "" || pattern == "*" { + return true + } + + value = strings.TrimSpace(value) + if !strings.ContainsAny(pattern, "*?[]") { + return pattern == value + } + + matched, err := path.Match(pattern, value) + // Invalid patterns are treated as non-matching at runtime; validation should + // reject them earlier during normalization. + return err == nil && matched +} diff --git a/internal/hooks/matcher_test.go b/internal/hooks/matcher_test.go new file mode 100644 index 000000000..c2aba519c --- /dev/null +++ b/internal/hooks/matcher_test.go @@ -0,0 +1,260 @@ +package hooks + +import "testing" + +func TestHookMatcherMatchesSession(t *testing.T) { + t.Parallel() + + matcher := HookMatcher{ + WorkspaceID: "ws-1", + AgentName: "claude", + } + payload := SessionContext{ + WorkspaceID: "ws-1", + AgentName: "claude", + } + if !matcher.MatchesSession(payload) { + t.Fatal("MatchesSession() = false, want true") + } + + payload.AgentName = "codex" + if matcher.MatchesSession(payload) { + t.Fatal("MatchesSession() = true, want false for non-matching agent") + } +} + +func TestHookMatcherMatchesToolWithWildcard(t *testing.T) { + t.Parallel() + + readOnly := true + matcher := HookMatcher{ + ToolName: "read_*", + ToolNamespace: "fs", + ToolReadOnly: &readOnly, + } + + payload := ToolPreCallPayload{ + ToolCallRef: ToolCallRef{ + ToolName: "read_text_file", + ToolNamespace: "fs", + ReadOnly: true, + }, + } + if !matcher.MatchesToolPreCall(payload) { + t.Fatal("MatchesToolPreCall() = false, want true") + } + + payload.ToolNamespace = "terminal" + if matcher.MatchesToolPreCall(payload) { + t.Fatal("MatchesToolPreCall() = true, want false for namespace mismatch") + } +} + +func TestHookMatcherMatchesPermission(t *testing.T) { + t.Parallel() + + matcher := HookMatcher{ + ToolName: "fs/*", + DecisionClass: "filesystem", + } + payload := PermissionRequestPayload{ + DecisionClass: "filesystem", + ToolCall: PermissionToolCall{ + Kind: "fs/read_text_file", + }, + } + if !matcher.MatchesPermissionRequest(payload) { + t.Fatal("MatchesPermissionRequest() = false, want true") + } + + payload.DecisionClass = "terminal" + if matcher.MatchesPermissionRequest(payload) { + t.Fatal("MatchesPermissionRequest() = true, want false for decision class mismatch") + } +} + +func TestHookMatcherMatchesMessageAndContext(t *testing.T) { + t.Parallel() + + messageMatcher := HookMatcher{ + MessageRole: "assistant", + MessageDeltaType: "text", + } + if !messageMatcher.MatchesMessage(MessagePayload{Role: "assistant", DeltaType: "text"}) { + t.Fatal("MatchesMessage() = false, want true") + } + if messageMatcher.MatchesMessage(MessagePayload{Role: "user", DeltaType: "text"}) { + t.Fatal("MatchesMessage() = true, want false for role mismatch") + } + + contextMatcher := HookMatcher{ + CompactionReason: "token_limit", + CompactionStrategy: "summary", + } + if !contextMatcher.MatchesContextCompact(ContextCompactPayload{ + Reason: "token_limit", + Strategy: "summary", + }) { + t.Fatal("MatchesContextCompact() = false, want true") + } + if contextMatcher.MatchesContextCompact(ContextCompactPayload{ + Reason: "manual", + Strategy: "summary", + }) { + t.Fatal("MatchesContextCompact() = true, want false for reason mismatch") + } +} + +func TestHookMatcherMatchesInput(t *testing.T) { + t.Parallel() + + scopeMatcher := HookMatcher{ + AgentName: "claude", + WorkspaceID: "ws-1", + WorkspaceRoot: "/workspace/demo", + InputClass: "chat", + } + if !scopeMatcher.MatchesInput(InputPreSubmitPayload{ + SessionContext: SessionContext{ + AgentName: "claude", + WorkspaceID: "ws-1", + Workspace: "/workspace/demo", + }, + InputClass: "chat", + }) { + t.Fatal("MatchesInput() = false, want true") + } +} + +func TestHookMatcherMatchesPrompt(t *testing.T) { + t.Parallel() + + scopeMatcher := HookMatcher{ + AgentName: "claude", + WorkspaceID: "ws-1", + WorkspaceRoot: "/workspace/demo", + InputClass: "chat", + } + if !scopeMatcher.MatchesPrompt(PromptPayload{ + SessionContext: SessionContext{ + AgentName: "claude", + WorkspaceID: "ws-1", + Workspace: "/workspace/demo", + }, + InputClass: "chat", + }) { + t.Fatal("MatchesPrompt() = false, want true") + } +} + +func TestHookMatcherMatchesAgentPreStart(t *testing.T) { + t.Parallel() + + scopeMatcher := HookMatcher{ + AgentName: "claude", + WorkspaceID: "ws-1", + WorkspaceRoot: "/workspace/demo", + } + if !scopeMatcher.MatchesAgentPreStart(AgentPreStartPayload{ + SessionContext: SessionContext{ + AgentName: "claude", + WorkspaceID: "ws-1", + Workspace: "/workspace/demo", + }, + }) { + t.Fatal("MatchesAgentPreStart() = false, want true") + } +} + +func TestHookMatcherMatchesAgentLifecycle(t *testing.T) { + t.Parallel() + + scopeMatcher := HookMatcher{ + AgentName: "claude", + WorkspaceID: "ws-1", + WorkspaceRoot: "/workspace/demo", + } + if !scopeMatcher.MatchesAgentLifecycle(AgentLifecyclePayload{ + SessionContext: SessionContext{ + AgentName: "claude", + WorkspaceID: "ws-1", + Workspace: "/workspace/demo", + }, + }) { + t.Fatal("MatchesAgentLifecycle() = false, want true") + } +} + +func TestHookMatcherMatchesTurn(t *testing.T) { + t.Parallel() + + scopeMatcher := HookMatcher{ + AgentName: "claude", + WorkspaceID: "ws-1", + WorkspaceRoot: "/workspace/demo", + InputClass: "chat", + } + if !scopeMatcher.MatchesTurn(TurnPayload{ + SessionContext: SessionContext{ + AgentName: "claude", + WorkspaceID: "ws-1", + Workspace: "/workspace/demo", + }, + InputClass: "chat", + }) { + t.Fatal("MatchesTurn() = false, want true") + } +} + +func TestHookMatcherMatchesEvent(t *testing.T) { + t.Parallel() + + eventMatcher := HookMatcher{ + AgentName: "claude", + ACPEventType: "permission", + TurnID: "turn-1", + } + if !eventMatcher.MatchesEvent(EventRecordPayload{ + SessionContext: SessionContext{AgentName: "claude"}, + TurnContext: TurnContext{TurnID: "turn-1"}, + RecordType: "permission", + }) { + t.Fatal("MatchesEvent() = false, want true") + } +} + +func TestHookMatcherMatchesToolResponses(t *testing.T) { + t.Parallel() + + toolMatcher := HookMatcher{ + ToolName: "run", + ToolNamespace: "terminal", + } + if !toolMatcher.MatchesToolPostCall(ToolPostCallPayload{ + ToolCallRef: ToolCallRef{ToolName: "run", ToolNamespace: "terminal"}, + }) { + t.Fatal("MatchesToolPostCall() = false, want true") + } + if !toolMatcher.MatchesToolPostError(ToolPostErrorPayload{ + ToolCallRef: ToolCallRef{ToolName: "run", ToolNamespace: "terminal"}, + }) { + t.Fatal("MatchesToolPostError() = false, want true") + } +} + +func TestHookMatcherMatchesPermissionResolution(t *testing.T) { + t.Parallel() + + permissionMatcher := HookMatcher{ + ToolName: "terminal/run", + DecisionClass: "command", + } + if !permissionMatcher.MatchesPermissionResolution(PermissionResolutionPayload{ + DecisionClass: "command", + ToolCall: PermissionToolCall{ + Kind: "terminal/run", + }, + }) { + t.Fatal("MatchesPermissionResolution() = false, want true") + } +} diff --git a/internal/hooks/normalize.go b/internal/hooks/normalize.go new file mode 100644 index 000000000..f37176669 --- /dev/null +++ b/internal/hooks/normalize.go @@ -0,0 +1,228 @@ +package hooks + +import ( + "errors" + "fmt" + "strings" +) + +// ErrExecutorResolverRequired reports that full normalization needs an executor +// resolver to attach the execution implementation. +var ErrExecutorResolverRequired = errors.New("hooks: executor resolver is required") + +// ExecutorResolver binds a normalized declaration to its executor +// implementation. +type ExecutorResolver func(HookDecl) (Executor, error) + +// ValidateHookDecl validates one declaration without binding an executor. +func ValidateHookDecl(decl HookDecl) error { + _, err := normalizeHookDecl(decl, nil, false) + return err +} + +// ValidateHookDecls validates a declaration slice and stops at the first error. +func ValidateHookDecls(decls []HookDecl) error { + for idx, decl := range decls { + if err := ValidateHookDecl(decl); err != nil { + return fmt.Errorf("hooks: validate declaration %d (%q): %w", idx, strings.TrimSpace(decl.Name), err) + } + } + return nil +} + +// NormalizeHookDecl validates one declaration, applies defaults, and binds the +// executor. +func NormalizeHookDecl(decl HookDecl, resolve ExecutorResolver) (ResolvedHook, error) { + return normalizeHookDecl(decl, resolve, true) +} + +// NormalizeHookDecls normalizes every declaration in order and stops at the +// first error. +func NormalizeHookDecls(decls []HookDecl, resolve ExecutorResolver) ([]ResolvedHook, error) { + resolved := make([]ResolvedHook, 0, len(decls)) + for idx, decl := range decls { + hook, err := NormalizeHookDecl(decl, resolve) + if err != nil { + return nil, fmt.Errorf("hooks: normalize declaration %d (%q): %w", idx, strings.TrimSpace(decl.Name), err) + } + resolved = append(resolved, hook) + } + return resolved, nil +} + +func normalizeHookDecl(decl HookDecl, resolve ExecutorResolver, bindExecutor bool) (ResolvedHook, error) { + normalized, err := sanitizedHookDecl(decl) + if err != nil { + return ResolvedHook{}, err + } + + registered := RegisteredHook{ + Name: normalized.Name, + Event: normalized.Event, + Source: normalized.Source, + Mode: normalized.Mode, + Required: normalized.Required, + Priority: normalized.Priority, + Timeout: normalized.Timeout, + Matcher: normalized.Matcher, + Metadata: cloneStringMap(normalized.Metadata), + } + + if bindExecutor { + if resolve == nil { + return ResolvedHook{}, fmt.Errorf("hooks: normalize hook %q: %w", normalized.Name, ErrExecutorResolverRequired) + } + + executor, err := resolve(normalized) + if err != nil { + return ResolvedHook{}, fmt.Errorf("hooks: resolve executor for hook %q: %w", normalized.Name, err) + } + if executor == nil { + return ResolvedHook{}, fmt.Errorf("hooks: resolve executor for hook %q: nil executor", normalized.Name) + } + if executor.Kind() != normalized.ExecutorKind { + return ResolvedHook{}, fmt.Errorf("hooks: resolve executor for hook %q returned kind %q, want %q", normalized.Name, executor.Kind(), normalized.ExecutorKind) + } + registered.Executor = executor + } + + if err := registered.Validate(); err != nil { + return ResolvedHook{}, err + } + + resolved := ResolvedHook{ + RegisteredHook: registered, + Decl: normalized, + } + if bindExecutor { + if err := resolved.Validate(); err != nil { + return ResolvedHook{}, err + } + } + + return resolved, nil +} + +func sanitizedHookDecl(decl HookDecl) (HookDecl, error) { + normalized := HookDecl{ + Name: strings.TrimSpace(decl.Name), + Event: decl.Event, + Source: decl.Source, + Mode: decl.Mode, + Required: decl.Required, + Priority: decl.Priority, + PrioritySet: decl.PrioritySet, + Timeout: decl.Timeout, + Matcher: normalizeHookMatcher(decl.Matcher), + ExecutorKind: decl.ExecutorKind, + Command: strings.TrimSpace(decl.Command), + Args: append([]string(nil), decl.Args...), + Env: cloneStringMap(decl.Env), + Metadata: cloneStringMap(decl.Metadata), + SkillSource: decl.SkillSource, + } + + if normalized.Name == "" { + return HookDecl{}, fmt.Errorf("hooks: hook name is required") + } + if err := normalized.Event.Validate(); err != nil { + return HookDecl{}, err + } + if err := normalized.Source.Validate(); err != nil { + return HookDecl{}, err + } + if err := normalized.SkillSource.Validate(); err != nil { + return HookDecl{}, err + } + if normalized.Source != HookSourceSkill && normalized.SkillSource != "" { + return HookDecl{}, fmt.Errorf("hooks: hook %q skill source is only valid for skill declarations", normalized.Name) + } + + if normalized.Mode == "" { + normalized.Mode = defaultHookMode(normalized.Source) + } + if err := normalized.Mode.Validate(); err != nil { + return HookDecl{}, err + } + if normalized.Required && normalized.Mode != HookModeSync { + return HookDecl{}, fmt.Errorf("hooks: hook %q cannot be required in async mode", normalized.Name) + } + if normalized.Mode == HookModeSync && !normalized.Event.SyncEligible() { + return HookDecl{}, fmt.Errorf("hooks: hook %q cannot use sync mode for async-only event %q", normalized.Name, normalized.Event) + } + if normalized.Timeout < 0 { + return HookDecl{}, fmt.Errorf("hooks: hook %q timeout must be non-negative", normalized.Name) + } + + priority, err := resolveHookPriority(normalized) + if err != nil { + return HookDecl{}, err + } + normalized.Priority = priority + + kind, err := resolveHookExecutorKind(normalized) + if err != nil { + return HookDecl{}, err + } + normalized.ExecutorKind = kind + + if err := ValidateMatcherForEvent(normalized.Event, normalized.Matcher); err != nil { + return HookDecl{}, err + } + + return normalized, nil +} + +func defaultHookMode(_ HookSource) HookMode { + return HookModeAsync +} + +func resolveHookPriority(decl HookDecl) (int, error) { + if decl.Priority != 0 || decl.PrioritySet { + return decl.Priority, nil + } + + return DefaultHookPriority(decl.Source) +} + +func resolveHookExecutorKind(decl HookDecl) (HookExecutorKind, error) { + kind := decl.ExecutorKind + if kind == "" { + switch { + case decl.Command != "": + kind = HookExecutorSubprocess + case decl.Source == HookSourceNative: + kind = HookExecutorNative + default: + return "", fmt.Errorf("hooks: hook %q executor kind is required", decl.Name) + } + } + + if err := kind.Validate(); err != nil { + return "", err + } + + if kind == HookExecutorNative && decl.Source != HookSourceNative { + return "", fmt.Errorf("hooks: hook %q only native sources may use native executors", decl.Name) + } + if kind == HookExecutorSubprocess && decl.Command == "" { + return "", fmt.Errorf("hooks: hook %q subprocess executor requires a command", decl.Name) + } + if kind != HookExecutorSubprocess && (decl.Command != "" || len(decl.Args) > 0 || len(decl.Env) > 0) { + return "", fmt.Errorf("hooks: hook %q shell command fields require a subprocess executor", decl.Name) + } + + return kind, nil +} + +func cloneStringMap(src map[string]string) map[string]string { + if len(src) == 0 { + return nil + } + + dst := make(map[string]string, len(src)) + for key, value := range src { + dst[key] = value + } + return dst +} diff --git a/internal/hooks/normalize_test.go b/internal/hooks/normalize_test.go new file mode 100644 index 000000000..4cf479aa4 --- /dev/null +++ b/internal/hooks/normalize_test.go @@ -0,0 +1,296 @@ +package hooks + +import ( + "errors" + "strings" + "testing" + "time" +) + +func TestValidateHookDeclRejectsSyncForAsyncOnlyEvent(t *testing.T) { + t.Parallel() + + err := ValidateHookDecl(HookDecl{ + Name: "delta-blocker", + Event: HookMessageDelta, + Source: HookSourceConfig, + Mode: HookModeSync, + Command: "./hook.sh", + }) + if err == nil { + t.Fatal("ValidateHookDecl() error = nil, want non-nil") + } + if !strings.Contains(err.Error(), "async-only event") || !strings.Contains(err.Error(), string(HookMessageDelta)) { + t.Fatalf("ValidateHookDecl() error = %q, want async-only message.delta detail", err) + } +} + +func TestValidateHookDeclRejectsRequiredAsyncHook(t *testing.T) { + t.Parallel() + + err := ValidateHookDecl(HookDecl{ + Name: "required-async", + Event: HookSessionPostCreate, + Source: HookSourceConfig, + Mode: HookModeAsync, + Required: true, + Command: "./hook.sh", + }) + if err == nil { + t.Fatal("ValidateHookDecl() error = nil, want non-nil") + } + if !strings.Contains(err.Error(), "required-async") || !strings.Contains(err.Error(), "async mode") { + t.Fatalf("ValidateHookDecl() error = %q, want required async detail", err) + } +} + +func TestNormalizeHookDeclAppliesNativeDefaults(t *testing.T) { + t.Parallel() + + hook, err := NormalizeHookDecl(HookDecl{ + Name: "native-defaults", + Event: HookSessionPostCreate, + Source: HookSourceNative, + }, expectExecutorKind(t, HookExecutorNative)) + if err != nil { + t.Fatalf("NormalizeHookDecl() error = %v", err) + } + + if hook.Priority != 1000 { + t.Fatalf("NormalizeHookDecl() priority = %d, want 1000", hook.Priority) + } + if hook.Mode != HookModeAsync { + t.Fatalf("NormalizeHookDecl() mode = %q, want %q", hook.Mode, HookModeAsync) + } + if hook.Executor == nil || hook.Executor.Kind() != HookExecutorNative { + t.Fatalf("NormalizeHookDecl() executor = %#v, want native executor", hook.Executor) + } +} + +func TestNormalizeHookDeclAppliesSkillDefaults(t *testing.T) { + t.Parallel() + + hook, err := NormalizeHookDecl(HookDecl{ + Name: "skill-defaults", + Event: HookSessionPostCreate, + Source: HookSourceSkill, + Command: "./hook.sh", + SkillSource: HookSkillSourceUser, + }, expectExecutorKind(t, HookExecutorSubprocess)) + if err != nil { + t.Fatalf("NormalizeHookDecl() error = %v", err) + } + + if hook.Priority != 0 { + t.Fatalf("NormalizeHookDecl() priority = %d, want 0", hook.Priority) + } + if hook.Decl.ExecutorKind != HookExecutorSubprocess { + t.Fatalf("NormalizeHookDecl() executor kind = %q, want %q", hook.Decl.ExecutorKind, HookExecutorSubprocess) + } +} + +func TestNormalizeHookDeclPreservesExplicitZeroPriority(t *testing.T) { + t.Parallel() + + hook, err := NormalizeHookDecl(HookDecl{ + Name: "config-explicit-zero", + Event: HookSessionPostCreate, + Source: HookSourceConfig, + Priority: 0, + PrioritySet: true, + Command: "./hook.sh", + }, expectExecutorKind(t, HookExecutorSubprocess)) + if err != nil { + t.Fatalf("NormalizeHookDecl() error = %v", err) + } + + if hook.Priority != 0 { + t.Fatalf("NormalizeHookDecl() priority = %d, want explicit 0", hook.Priority) + } +} + +func TestNormalizeHookDeclRequiresResolver(t *testing.T) { + t.Parallel() + + _, err := NormalizeHookDecl(HookDecl{ + Name: "no-resolver", + Event: HookSessionPostCreate, + Source: HookSourceNative, + }, nil) + if err == nil { + t.Fatal("NormalizeHookDecl() error = nil, want non-nil") + } + if !strings.Contains(err.Error(), ErrExecutorResolverRequired.Error()) { + t.Fatalf("NormalizeHookDecl() error = %q, want resolver detail", err) + } +} + +func TestValidateHookDeclRejectsIllegalMatcherField(t *testing.T) { + t.Parallel() + + err := ValidateHookDecl(HookDecl{ + Name: "bad-matcher", + Event: HookSessionPostCreate, + Source: HookSourceConfig, + Matcher: HookMatcher{ + ToolName: "read_file", + }, + Command: "./hook.sh", + }) + if err == nil { + t.Fatal("ValidateHookDecl() error = nil, want non-nil") + } + if !strings.Contains(err.Error(), "tool_name") || !strings.Contains(err.Error(), string(HookSessionPostCreate)) { + t.Fatalf("ValidateHookDecl() error = %q, want matcher field detail", err) + } +} + +func TestNormalizeHookDeclClonesMutableFields(t *testing.T) { + t.Parallel() + + env := map[string]string{"A": "1"} + metadata := map[string]string{"team": "hooks"} + args := []string{"--demo"} + readOnly := true + + hook, err := NormalizeHookDecl(HookDecl{ + Name: "clone-fields", + Event: HookToolPreCall, + Source: HookSourceConfig, + Command: "./hook.sh", + Args: args, + Env: env, + Metadata: metadata, + Matcher: HookMatcher{ + ToolReadOnly: &readOnly, + }, + Timeout: 5 * time.Second, + }, expectExecutorKind(t, HookExecutorSubprocess)) + if err != nil { + t.Fatalf("NormalizeHookDecl() error = %v", err) + } + + args[0] = "--changed" + env["A"] = "2" + metadata["team"] = "changed" + readOnly = false + + if hook.Decl.Args[0] != "--demo" { + t.Fatalf("NormalizeHookDecl() args = %#v, want cloned args", hook.Decl.Args) + } + if hook.Decl.Env["A"] != "1" { + t.Fatalf("NormalizeHookDecl() env = %#v, want cloned env", hook.Decl.Env) + } + if hook.Decl.Metadata["team"] != "hooks" { + t.Fatalf("NormalizeHookDecl() metadata = %#v, want cloned metadata", hook.Decl.Metadata) + } + if hook.Matcher.ToolReadOnly == nil || !*hook.Matcher.ToolReadOnly { + t.Fatalf("NormalizeHookDecl() matcher read_only = %#v, want true clone", hook.Matcher.ToolReadOnly) + } +} + +func TestValidateAndNormalizeHookDecls(t *testing.T) { + t.Parallel() + + decls := []HookDecl{ + { + Name: "first", + Event: HookSessionPostCreate, + Source: HookSourceNative, + }, + { + Name: "second", + Event: HookToolPreCall, + Source: HookSourceConfig, + Command: "./hook.sh", + }, + } + + if err := ValidateHookDecls(decls); err != nil { + t.Fatalf("ValidateHookDecls() error = %v", err) + } + + hooks, err := NormalizeHookDecls(decls, func(decl HookDecl) (Executor, error) { + return stubExecutor{kind: decl.ExecutorKind}, nil + }) + if err != nil { + t.Fatalf("NormalizeHookDecls() error = %v", err) + } + if len(hooks) != len(decls) { + t.Fatalf("NormalizeHookDecls() len = %d, want %d", len(hooks), len(decls)) + } +} + +func TestValidateHookDeclRejectsNativeExecutorForNonNativeSource(t *testing.T) { + t.Parallel() + + err := ValidateHookDecl(HookDecl{ + Name: "config-native", + Event: HookSessionPostCreate, + Source: HookSourceConfig, + ExecutorKind: HookExecutorNative, + }) + if err == nil { + t.Fatal("ValidateHookDecl() error = nil, want non-nil") + } + if !strings.Contains(err.Error(), "native sources") { + t.Fatalf("ValidateHookDecl() error = %q, want native-source detail", err) + } +} + +func TestValidateHookDeclRejectsSkillSourceOnNonSkillDeclaration(t *testing.T) { + t.Parallel() + + err := ValidateHookDecl(HookDecl{ + Name: "config-skill-source", + Event: HookSessionPostCreate, + Source: HookSourceConfig, + Command: "./hook.sh", + SkillSource: HookSkillSourceWorkspace, + }) + if err == nil { + t.Fatal("ValidateHookDecl() error = nil, want non-nil") + } + if !strings.Contains(err.Error(), "skill source is only valid") { + t.Fatalf("ValidateHookDecl() error = %q, want skill-source detail", err) + } +} + +func TestDefaultHookPriorityRejectsUnknownSource(t *testing.T) { + t.Parallel() + + if _, err := DefaultHookPriority(HookSource(99)); !errors.Is(err, ErrInvalidHookSource) { + t.Fatalf("DefaultHookPriority() error = %v, want ErrInvalidHookSource", err) + } +} + +func TestValidateHookDeclRejectsInvalidMatcherPattern(t *testing.T) { + t.Parallel() + + err := ValidateHookDecl(HookDecl{ + Name: "bad-pattern", + Event: HookToolPreCall, + Source: HookSourceConfig, + Command: "./hook.sh", + Matcher: HookMatcher{ + ToolName: "[", + }, + }) + if err == nil { + t.Fatal("ValidateHookDecl() error = nil, want non-nil") + } + if !strings.Contains(err.Error(), "matcher.tool_name pattern") { + t.Fatalf("ValidateHookDecl() error = %q, want matcher pattern detail", err) + } +} + +func expectExecutorKind(t *testing.T, kind HookExecutorKind) ExecutorResolver { + t.Helper() + + return func(decl HookDecl) (Executor, error) { + if decl.ExecutorKind != kind { + t.Fatalf("executor resolver kind = %q, want %q", decl.ExecutorKind, kind) + } + return stubExecutor{kind: kind}, nil + } +} diff --git a/internal/hooks/ordering.go b/internal/hooks/ordering.go new file mode 100644 index 000000000..9cfc2385c --- /dev/null +++ b/internal/hooks/ordering.go @@ -0,0 +1,78 @@ +package hooks + +import ( + "errors" + "fmt" + "sort" +) + +var ErrInvalidHookSource = errors.New("hooks: invalid hook source") + +// DefaultHookPriority returns the documented default priority for the source. +func DefaultHookPriority(source HookSource) (int, error) { + switch source { + case HookSourceNative: + return 1000, nil + case HookSourceConfig: + return 500, nil + case HookSourceAgentDefinition: + return 100, nil + case HookSourceSkill: + return 0, nil + default: + return 0, fmt.Errorf("%w: %d", ErrInvalidHookSource, source) + } +} + +// SortResolvedHooks sorts the slice in place using deterministic dispatch +// precedence. +func SortResolvedHooks(hooks []*ResolvedHook) { + sort.SliceStable(hooks, func(i, j int) bool { + return resolvedHookLess(hooks[i], hooks[j]) + }) +} + +// OrderedResolvedHooks returns a sorted copy of the slice. +func OrderedResolvedHooks(hooks []*ResolvedHook) []*ResolvedHook { + ordered := append([]*ResolvedHook(nil), hooks...) + SortResolvedHooks(ordered) + return ordered +} + +func resolvedHookLess(left *ResolvedHook, right *ResolvedHook) bool { + if left == nil || right == nil { + return left != nil && right == nil + } + + if left.Source != right.Source { + return left.Source < right.Source + } + if left.Priority != right.Priority { + return left.Priority > right.Priority + } + if left.Source == HookSourceSkill && left.Decl.SkillSource != right.Decl.SkillSource { + return hookSkillSourceRank(left.Decl.SkillSource) < hookSkillSourceRank(right.Decl.SkillSource) + } + if left.Name != right.Name { + return left.Name < right.Name + } + + return false +} + +func hookSkillSourceRank(source HookSkillSource) int { + switch source { + case HookSkillSourceBundled: + return 0 + case HookSkillSourceMarketplace: + return 1 + case HookSkillSourceUser: + return 2 + case HookSkillSourceAdditional: + return 3 + case HookSkillSourceWorkspace: + return 4 + default: + return 5 + } +} diff --git a/internal/hooks/ordering_test.go b/internal/hooks/ordering_test.go new file mode 100644 index 000000000..05dc38cb1 --- /dev/null +++ b/internal/hooks/ordering_test.go @@ -0,0 +1,187 @@ +package hooks + +import ( + "errors" + "testing" +) + +func TestSortResolvedHooks(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + assert func(*testing.T) + }{ + { + name: "Should order hooks by source precedence", + assert: func(t *testing.T) { + hooks := []*ResolvedHook{ + testResolvedHook("skill", HookSourceSkill, 0, HookSkillSourceUser), + testResolvedHook("agent", HookSourceAgentDefinition, 100, ""), + testResolvedHook("config", HookSourceConfig, 500, ""), + testResolvedHook("native", HookSourceNative, 1000, ""), + } + + SortResolvedHooks(hooks) + + assertHookNames(t, hooks, []string{"native", "config", "agent", "skill"}) + }, + }, + { + name: "Should order hooks by priority then name", + assert: func(t *testing.T) { + hooks := []*ResolvedHook{ + testResolvedHook("charlie", HookSourceConfig, 500, ""), + testResolvedHook("bravo", HookSourceConfig, 900, ""), + testResolvedHook("alpha", HookSourceConfig, 500, ""), + } + + SortResolvedHooks(hooks) + + assertHookNames(t, hooks, []string{"bravo", "alpha", "charlie"}) + }, + }, + { + name: "Should order skill hooks by skill source before name", + assert: func(t *testing.T) { + hooks := []*ResolvedHook{ + testResolvedHook("workspace-skill", HookSourceSkill, 0, HookSkillSourceWorkspace), + testResolvedHook("additional-skill", HookSourceSkill, 0, HookSkillSourceAdditional), + testResolvedHook("user-skill", HookSourceSkill, 0, HookSkillSourceUser), + testResolvedHook("marketplace-skill", HookSourceSkill, 0, HookSkillSourceMarketplace), + testResolvedHook("bundled-skill", HookSourceSkill, 0, HookSkillSourceBundled), + } + + SortResolvedHooks(hooks) + + assertHookNames(t, hooks, []string{ + "bundled-skill", + "marketplace-skill", + "user-skill", + "additional-skill", + "workspace-skill", + }) + }, + }, + { + name: "Should remain stable across repeated sorts", + assert: func(t *testing.T) { + first := testResolvedHook("same", HookSourceConfig, 500, "") + second := testResolvedHook("same", HookSourceConfig, 500, "") + hooks := []*ResolvedHook{first, second} + + SortResolvedHooks(hooks) + SortResolvedHooks(hooks) + + if hooks[0] != first || hooks[1] != second { + t.Fatalf("SortResolvedHooks() order = %#v, want stable original order", hooks) + } + }, + }, + { + name: "Should return a sorted copy without mutating the original slice", + assert: func(t *testing.T) { + original := []*ResolvedHook{ + testResolvedHook("skill", HookSourceSkill, 0, HookSkillSourceUser), + testResolvedHook("native", HookSourceNative, 1000, ""), + } + + ordered := OrderedResolvedHooks(original) + + assertHookNames(t, ordered, []string{"native", "skill"}) + assertHookNames(t, original, []string{"skill", "native"}) + }, + }, + } + + for _, tt := range tests { + tt := tt + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + tt.assert(t) + }) + } +} + +func TestDefaultHookPriority(t *testing.T) { + t.Parallel() + + tests := []struct { + source HookSource + want int + }{ + {source: HookSourceNative, want: 1000}, + {source: HookSourceConfig, want: 500}, + {source: HookSourceAgentDefinition, want: 100}, + {source: HookSourceSkill, want: 0}, + } + + for _, tt := range tests { + tt := tt + t.Run(tt.source.String(), func(t *testing.T) { + t.Parallel() + + got, err := DefaultHookPriority(tt.source) + if err != nil { + t.Fatalf("DefaultHookPriority() error = %v", err) + } + if got != tt.want { + t.Fatalf("DefaultHookPriority() = %d, want %d", got, tt.want) + } + }) + } + + t.Run("Should reject unknown sources with a sentinel error", func(t *testing.T) { + t.Parallel() + + _, err := DefaultHookPriority(HookSource(99)) + if !errors.Is(err, ErrInvalidHookSource) { + t.Fatalf("DefaultHookPriority() error = %v, want ErrInvalidHookSource", err) + } + }) +} + +func testResolvedHook(name string, source HookSource, priority int, skillSource HookSkillSource) *ResolvedHook { + kind := HookExecutorSubprocess + command := "./hook.sh" + if source == HookSourceNative { + kind = HookExecutorNative + command = "" + } + + return &ResolvedHook{ + RegisteredHook: RegisteredHook{ + Name: name, + Event: HookSessionPostCreate, + Source: source, + Mode: HookModeAsync, + Priority: priority, + Executor: stubExecutor{kind: kind}, + }, + Decl: HookDecl{ + Name: name, + Event: HookSessionPostCreate, + Source: source, + Mode: HookModeAsync, + Priority: priority, + PrioritySet: true, + ExecutorKind: kind, + Command: command, + SkillSource: skillSource, + }, + } +} + +func assertHookNames(t *testing.T, hooks []*ResolvedHook, want []string) { + t.Helper() + + if len(hooks) != len(want) { + t.Fatalf("hook count = %d, want %d", len(hooks), len(want)) + } + + for idx, hook := range hooks { + if hook.Name != want[idx] { + t.Fatalf("hook[%d] name = %q, want %q", idx, hook.Name, want[idx]) + } + } +} diff --git a/internal/hooks/payloads.go b/internal/hooks/payloads.go new file mode 100644 index 000000000..9b03f03a1 --- /dev/null +++ b/internal/hooks/payloads.go @@ -0,0 +1,500 @@ +package hooks + +import ( + "encoding/json" + "time" +) + +// PayloadBase carries the common identifiers attached to every hook payload. +type PayloadBase struct { + Event HookEvent `json:"event"` + Timestamp time.Time `json:"timestamp,omitempty"` +} + +// SessionContext carries the common session-scoped hook attributes. +type SessionContext struct { + SessionID string `json:"session_id,omitempty"` + SessionName string `json:"session_name,omitempty"` + SessionType string `json:"session_type,omitempty"` + AgentName string `json:"agent_name,omitempty"` + WorkspaceID string `json:"workspace_id,omitempty"` + Workspace string `json:"workspace,omitempty"` + ACPSessionID string `json:"acp_session_id,omitempty"` + State string `json:"state,omitempty"` + CreatedAt time.Time `json:"created_at,omitempty"` + UpdatedAt time.Time `json:"updated_at,omitempty"` +} + +// TurnContext carries the current turn identifier. +type TurnContext struct { + TurnID string `json:"turn_id,omitempty"` +} + +// ContextBlock is a typed free-form context fragment attached to inputs or prompts. +type ContextBlock struct { + Kind string `json:"kind,omitempty"` + Text string `json:"text,omitempty"` + Metadata map[string]string `json:"metadata,omitempty"` +} + +// ToolCallRef identifies a tool invocation in hook payloads. +type ToolCallRef struct { + ToolCallID string `json:"tool_call_id,omitempty"` + ToolName string `json:"tool_name,omitempty"` + ToolNamespace string `json:"tool_namespace,omitempty"` + ReadOnly bool `json:"read_only,omitempty"` +} + +// ToolLocation captures one path-scoped tool location. +type ToolLocation struct { + Path string `json:"path,omitempty"` + StartLine int `json:"start_line,omitempty"` + EndLine int `json:"end_line,omitempty"` +} + +// PermissionOption carries one interactive permission option. +type PermissionOption struct { + Decision string `json:"decision,omitempty"` + OptionID string `json:"option_id,omitempty"` + Kind string `json:"kind,omitempty"` + Label string `json:"label,omitempty"` +} + +// PermissionToolCall carries the tool details attached to a permission request. +type PermissionToolCall struct { + ID string `json:"id,omitempty"` + Kind string `json:"kind,omitempty"` + Title string `json:"title,omitempty"` + Status string `json:"status,omitempty"` + Locations []ToolLocation `json:"locations,omitempty"` +} + +// ControlPatch carries the common deny surface shared by mutable hook families. +type ControlPatch struct { + Deny bool `json:"deny,omitempty"` + DenyReason string `json:"deny_reason,omitempty"` +} + +// SessionPreCreatePayload is delivered before a session is created. +type SessionPreCreatePayload struct { + PayloadBase + SessionContext +} + +// SessionLifecyclePayload is shared by post-create, resume, and stop events. +type SessionLifecyclePayload struct { + PayloadBase + SessionContext +} + +// SessionPostCreatePayload is delivered after a session is created. +type SessionPostCreatePayload = SessionLifecyclePayload + +// SessionPreResumePayload is delivered before a session resumes. +type SessionPreResumePayload = SessionLifecyclePayload + +// SessionPostResumePayload is delivered after a session resumes. +type SessionPostResumePayload = SessionLifecyclePayload + +// SessionPreStopPayload is delivered before a session stops. +type SessionPreStopPayload = SessionLifecyclePayload + +// SessionPostStopPayload is delivered after a session stops. +type SessionPostStopPayload = SessionLifecyclePayload + +// SessionCreatePatch mutates or denies session lifecycle operations. +type SessionCreatePatch struct { + ControlPatch + SessionName *string `json:"session_name,omitempty"` + SessionType *string `json:"session_type,omitempty"` + AgentName *string `json:"agent_name,omitempty"` + WorkspaceID *string `json:"workspace_id,omitempty"` + Workspace *string `json:"workspace,omitempty"` +} + +// SessionPostCreatePatch is the post-create patch surface. +type SessionPostCreatePatch = SessionCreatePatch + +// SessionPreResumePatch is the pre-resume patch surface. +type SessionPreResumePatch = SessionCreatePatch + +// SessionPostResumePatch is the post-resume patch surface. +type SessionPostResumePatch = SessionCreatePatch + +// SessionPreStopPatch is the pre-stop patch surface. +type SessionPreStopPatch = SessionCreatePatch + +// SessionPostStopPatch is the post-stop patch surface. +type SessionPostStopPatch = SessionCreatePatch + +// InputPreSubmitPayload is delivered before prompt submission. +type InputPreSubmitPayload struct { + PayloadBase + SessionContext + TurnContext + InputClass string `json:"input_class,omitempty"` + Message string `json:"message,omitempty"` + ContextBlocks []ContextBlock `json:"context_blocks,omitempty"` +} + +// InputPreSubmitPatch mutates or denies the submitted input. +type InputPreSubmitPatch struct { + ControlPatch + Message *string `json:"message,omitempty"` + ContextBlocks []ContextBlock `json:"context_blocks,omitempty"` +} + +// PromptPayload is delivered after prompt assembly. +type PromptPayload struct { + PayloadBase + SessionContext + TurnContext + InputClass string `json:"input_class,omitempty"` + Prompt string `json:"prompt,omitempty"` + ContextBlocks []ContextBlock `json:"context_blocks,omitempty"` +} + +// PromptPatch mutates or denies the assembled prompt. +type PromptPatch struct { + ControlPatch + Prompt *string `json:"prompt,omitempty"` + ContextBlocks []ContextBlock `json:"context_blocks,omitempty"` +} + +// EventRecordPayload is shared by event pre/post-record hooks. +type EventRecordPayload struct { + PayloadBase + SessionContext + TurnContext + RecordType string `json:"record_type,omitempty"` + Sequence int64 `json:"sequence,omitempty"` + Content json.RawMessage `json:"content,omitempty"` +} + +// EventPreRecordPayload is delivered before an event record is written. +type EventPreRecordPayload = EventRecordPayload + +// EventPostRecordPayload is delivered after an event record is written. +type EventPostRecordPayload = EventRecordPayload + +// EventRecordPatch captures the optional observation patch surface for event hooks. +type EventRecordPatch struct { + Labels map[string]string `json:"labels,omitempty"` +} + +// EventPreRecordPatch is the pre-record patch surface. +type EventPreRecordPatch = EventRecordPatch + +// EventPostRecordPatch is the post-record patch surface. +type EventPostRecordPatch = EventRecordPatch + +// AgentPreStartPayload is delivered before an agent process starts. +type AgentPreStartPayload struct { + PayloadBase + SessionContext + Command string `json:"command,omitempty"` + Args []string `json:"args,omitempty"` + Cwd string `json:"cwd,omitempty"` + Provider string `json:"provider,omitempty"` + Model string `json:"model,omitempty"` +} + +// AgentLifecyclePayload is shared by spawned, crashed, and stopped hooks. +type AgentLifecyclePayload struct { + PayloadBase + SessionContext + Command string `json:"command,omitempty"` + Args []string `json:"args,omitempty"` + Cwd string `json:"cwd,omitempty"` + PID int `json:"pid,omitempty"` + Provider string `json:"provider,omitempty"` + Model string `json:"model,omitempty"` + Error string `json:"error,omitempty"` +} + +// AgentSpawnedPayload is delivered after an agent process starts. +type AgentSpawnedPayload = AgentLifecyclePayload + +// AgentCrashedPayload is delivered when an agent crashes. +type AgentCrashedPayload = AgentLifecyclePayload + +// AgentStoppedPayload is delivered after an agent stops. +type AgentStoppedPayload = AgentLifecyclePayload + +// AgentStartPatch mutates or denies a pre-start operation. +type AgentStartPatch struct { + ControlPatch + Command *string `json:"command,omitempty"` + Args []string `json:"args,omitempty"` + Cwd *string `json:"cwd,omitempty"` +} + +// AgentLifecyclePatch captures optional labels for observation events. +type AgentLifecyclePatch struct { + Labels map[string]string `json:"labels,omitempty"` +} + +// AgentSpawnedPatch is the spawned patch surface. +type AgentSpawnedPatch = AgentLifecyclePatch + +// AgentCrashedPatch is the crashed patch surface. +type AgentCrashedPatch = AgentLifecyclePatch + +// AgentStoppedPatch is the stopped patch surface. +type AgentStoppedPatch = AgentLifecyclePatch + +// TurnPayload is shared by turn start and end events. +type TurnPayload struct { + PayloadBase + SessionContext + TurnContext + InputClass string `json:"input_class,omitempty"` + UserMessage string `json:"user_message,omitempty"` +} + +// TurnStartPayload is delivered at turn start. +type TurnStartPayload = TurnPayload + +// TurnEndPayload is delivered at turn end. +type TurnEndPayload = TurnPayload + +// TurnPatch mutates or denies turn-scoped operations. +type TurnPatch struct { + ControlPatch + Labels map[string]string `json:"labels,omitempty"` +} + +// TurnStartPatch is the turn-start patch surface. +type TurnStartPatch = TurnPatch + +// TurnEndPatch is the turn-end patch surface. +type TurnEndPatch = TurnPatch + +// MessagePayload is shared by message start, delta, and end events. +type MessagePayload struct { + PayloadBase + SessionContext + TurnContext + MessageID string `json:"message_id,omitempty"` + Role string `json:"role,omitempty"` + DeltaType string `json:"delta_type,omitempty"` + Text string `json:"text,omitempty"` + Raw json.RawMessage `json:"raw,omitempty"` +} + +// MessageStartPayload is delivered when a message begins. +type MessageStartPayload = MessagePayload + +// MessageDeltaPayload is delivered for streaming message deltas. +type MessageDeltaPayload = MessagePayload + +// MessageEndPayload is delivered when a message finishes. +type MessageEndPayload = MessagePayload + +// MessagePatch mutates or denies message-scoped operations. +type MessagePatch struct { + ControlPatch + Role *string `json:"role,omitempty"` + DeltaType *string `json:"delta_type,omitempty"` + Text *string `json:"text,omitempty"` +} + +// MessageStartPatch is the message-start patch surface. +type MessageStartPatch = MessagePatch + +// MessageDeltaPatch is the message-delta patch surface. +type MessageDeltaPatch = MessagePatch + +// MessageEndPatch is the message-end patch surface. +type MessageEndPatch = MessagePatch + +// ToolPreCallPayload is delivered before a tool runs. +type ToolPreCallPayload struct { + PayloadBase + SessionContext + TurnContext + ToolCallRef + ToolInput json.RawMessage `json:"tool_input,omitempty"` +} + +// ToolPostCallPayload is delivered after a tool completes successfully. +type ToolPostCallPayload struct { + PayloadBase + SessionContext + TurnContext + ToolCallRef + Title string `json:"title,omitempty"` + ToolInput json.RawMessage `json:"tool_input,omitempty"` + ToolResult json.RawMessage `json:"tool_result,omitempty"` +} + +// ToolPostErrorPayload is delivered after a tool fails. +type ToolPostErrorPayload struct { + PayloadBase + SessionContext + TurnContext + ToolCallRef + Title string `json:"title,omitempty"` + ToolInput json.RawMessage `json:"tool_input,omitempty"` + Error string `json:"error,omitempty"` +} + +// ToolCallPatch mutates or denies tool invocation inputs. +type ToolCallPatch struct { + ControlPatch + ToolName *string `json:"tool_name,omitempty"` + ToolNamespace *string `json:"tool_namespace,omitempty"` + ReadOnly *bool `json:"read_only,omitempty"` + ToolInput json.RawMessage `json:"tool_input,omitempty"` +} + +// ToolResultPatch mutates or denies tool outputs. +type ToolResultPatch struct { + ControlPatch + Title *string `json:"title,omitempty"` + ToolResult json.RawMessage `json:"tool_result,omitempty"` + Error *string `json:"error,omitempty"` +} + +// ToolPostErrorPatch is the post-error patch surface. +type ToolPostErrorPatch = ToolResultPatch + +// PermissionRequestPayload is delivered before a permission decision resolves. +type PermissionRequestPayload struct { + PayloadBase + SessionContext + TurnContext + RequestID string `json:"request_id,omitempty"` + Action string `json:"action,omitempty"` + Resource string `json:"resource,omitempty"` + Decision string `json:"decision,omitempty"` + DecisionClass string `json:"decision_class,omitempty"` + ToolInput json.RawMessage `json:"tool_input,omitempty"` + ToolCall PermissionToolCall `json:"tool_call,omitempty"` + Options []PermissionOption `json:"options,omitempty"` +} + +// PermissionResolutionPayload is shared by resolved and denied events. +type PermissionResolutionPayload struct { + PayloadBase + SessionContext + TurnContext + RequestID string `json:"request_id,omitempty"` + Action string `json:"action,omitempty"` + Resource string `json:"resource,omitempty"` + Decision string `json:"decision,omitempty"` + DecisionClass string `json:"decision_class,omitempty"` + ToolInput json.RawMessage `json:"tool_input,omitempty"` + ToolCall PermissionToolCall `json:"tool_call,omitempty"` +} + +// PermissionResolvedPayload is delivered after a permission decision resolves. +type PermissionResolvedPayload = PermissionResolutionPayload + +// PermissionDeniedPayload is delivered after a permission denial resolves. +type PermissionDeniedPayload = PermissionResolutionPayload + +// PermissionRequestPatch mutates or denies the permission-request surface. +type PermissionRequestPatch struct { + ControlPatch + Decision *string `json:"decision,omitempty"` + DecisionClass *string `json:"decision_class,omitempty"` + Reason *string `json:"reason,omitempty"` +} + +// PermissionResolvedPatch is the resolved patch surface. +type PermissionResolvedPatch struct{} + +// PermissionDeniedPatch is the denied patch surface. +type PermissionDeniedPatch struct{} + +// ContextCompactPayload is shared by context compaction hooks. +type ContextCompactPayload struct { + PayloadBase + SessionContext + TurnContext + Reason string `json:"reason,omitempty"` + Strategy string `json:"strategy,omitempty"` + Summary string `json:"summary,omitempty"` + ContextBlocks []ContextBlock `json:"context_blocks,omitempty"` +} + +// ContextPreCompactPayload is delivered before compaction. +type ContextPreCompactPayload = ContextCompactPayload + +// ContextPostCompactPayload is delivered after compaction. +type ContextPostCompactPayload = ContextCompactPayload + +// ContextCompactionPatch mutates or denies compaction behavior. +type ContextCompactionPatch struct { + ControlPatch + Reason *string `json:"reason,omitempty"` + Strategy *string `json:"strategy,omitempty"` + ContextBlocks []ContextBlock `json:"context_blocks,omitempty"` +} + +// ContextPreCompactPatch is the pre-compact patch surface. +type ContextPreCompactPatch = ContextCompactionPatch + +// ContextPostCompactPatch is the post-compact patch surface. +type ContextPostCompactPatch = ContextCompactionPatch + +func (p SessionPreCreatePayload) hookSessionContext() SessionContext { + return p.SessionContext +} + +func (p SessionLifecyclePayload) hookSessionContext() SessionContext { + return p.SessionContext +} + +func (p InputPreSubmitPayload) hookSessionContext() SessionContext { + return p.SessionContext +} + +func (p PromptPayload) hookSessionContext() SessionContext { + return p.SessionContext +} + +func (p EventRecordPayload) hookSessionContext() SessionContext { + return p.SessionContext +} + +func (p AgentPreStartPayload) hookSessionContext() SessionContext { + return p.SessionContext +} + +func (p AgentLifecyclePayload) hookSessionContext() SessionContext { + return p.SessionContext +} + +func (p TurnPayload) hookSessionContext() SessionContext { + return p.SessionContext +} + +func (p MessagePayload) hookSessionContext() SessionContext { + return p.SessionContext +} + +func (p ToolPreCallPayload) hookSessionContext() SessionContext { + return p.SessionContext +} + +func (p ToolPostCallPayload) hookSessionContext() SessionContext { + return p.SessionContext +} + +func (p ToolPostErrorPayload) hookSessionContext() SessionContext { + return p.SessionContext +} + +func (p PermissionRequestPayload) hookSessionContext() SessionContext { + return p.SessionContext +} + +func (p PermissionResolutionPayload) hookSessionContext() SessionContext { + return p.SessionContext +} + +func (p ContextCompactPayload) hookSessionContext() SessionContext { + return p.SessionContext +} diff --git a/internal/hooks/payloads_test.go b/internal/hooks/payloads_test.go new file mode 100644 index 000000000..38856e5e3 --- /dev/null +++ b/internal/hooks/payloads_test.go @@ -0,0 +1,437 @@ +package hooks + +import ( + "encoding/json" + "reflect" + "testing" + "time" +) + +func TestPayloadsAndPatchesJSONRoundTrip(t *testing.T) { + t.Parallel() + + sampleSession := SessionContext{ + SessionID: "sess-1", + SessionName: "demo", + SessionType: "user", + AgentName: "codex", + WorkspaceID: "ws-1", + Workspace: "/tmp/demo", + ACPSessionID: "acp-1", + State: "active", + } + sampleTurn := TurnContext{TurnID: "turn-1"} + samplePayloadBase := func(event HookEvent) PayloadBase { + return PayloadBase{ + Event: event, + Timestamp: time.Date(2026, time.April, 9, 12, 0, 0, 0, time.UTC), + } + } + sampleContextBlocks := []ContextBlock{ + { + Kind: "policy", + Text: "ctx", + Metadata: map[string]string{ + "source": "test", + }, + }, + } + sampleRaw := json.RawMessage(`{"key":"value"}`) + allowOnce := "allow-once" + reason := "blocked" + toolName := "grep" + toolNamespace := "fs" + strategy := "summarize" + text := "patched" + role := "assistant" + deltaType := "text" + sessionName := "patched-session" + sessionType := "system" + agentName := "native" + workspaceID := "ws-2" + workspace := "/tmp/other" + title := "result" + readOnly := true + + assertJSONRoundTrip(t, "SessionPreCreatePayload", SessionPreCreatePayload{ + PayloadBase: samplePayloadBase(HookSessionPreCreate), + SessionContext: sampleSession, + }) + assertJSONRoundTrip(t, "SessionPostCreatePayload", SessionPostCreatePayload{ + PayloadBase: samplePayloadBase(HookSessionPostCreate), + SessionContext: sampleSession, + }) + assertJSONRoundTrip(t, "SessionPreResumePayload", SessionPreResumePayload{ + PayloadBase: samplePayloadBase(HookSessionPreResume), + SessionContext: sampleSession, + }) + assertJSONRoundTrip(t, "SessionPostResumePayload", SessionPostResumePayload{ + PayloadBase: samplePayloadBase(HookSessionPostResume), + SessionContext: sampleSession, + }) + assertJSONRoundTrip(t, "SessionPreStopPayload", SessionPreStopPayload{ + PayloadBase: samplePayloadBase(HookSessionPreStop), + SessionContext: sampleSession, + }) + assertJSONRoundTrip(t, "SessionPostStopPayload", SessionPostStopPayload{ + PayloadBase: samplePayloadBase(HookSessionPostStop), + SessionContext: sampleSession, + }) + assertJSONRoundTrip(t, "SessionCreatePatch", SessionCreatePatch{ + ControlPatch: ControlPatch{Deny: true, DenyReason: "policy"}, + SessionName: &sessionName, + SessionType: &sessionType, + AgentName: &agentName, + WorkspaceID: &workspaceID, + Workspace: &workspace, + }) + assertJSONRoundTrip(t, "SessionPostCreatePatch", SessionPostCreatePatch{ + ControlPatch: ControlPatch{DenyReason: "observe"}, + SessionName: &sessionName, + }) + assertJSONRoundTrip(t, "SessionPreResumePatch", SessionPreResumePatch{ + SessionType: &sessionType, + }) + assertJSONRoundTrip(t, "SessionPostResumePatch", SessionPostResumePatch{ + AgentName: &agentName, + }) + assertJSONRoundTrip(t, "SessionPreStopPatch", SessionPreStopPatch{ + ControlPatch: ControlPatch{Deny: true, DenyReason: "stop"}, + }) + assertJSONRoundTrip(t, "SessionPostStopPatch", SessionPostStopPatch{ + Workspace: &workspace, + }) + + assertJSONRoundTrip(t, "InputPreSubmitPayload", InputPreSubmitPayload{ + PayloadBase: samplePayloadBase(HookInputPreSubmit), + SessionContext: sampleSession, + TurnContext: sampleTurn, + InputClass: "user_message", + Message: "hello", + ContextBlocks: sampleContextBlocks, + }) + assertJSONRoundTrip(t, "InputPreSubmitPatch", InputPreSubmitPatch{ + ControlPatch: ControlPatch{Deny: true, DenyReason: "input"}, + Message: &text, + ContextBlocks: sampleContextBlocks, + }) + + assertJSONRoundTrip(t, "PromptPayload", PromptPayload{ + PayloadBase: samplePayloadBase(HookPromptPostAssemble), + SessionContext: sampleSession, + TurnContext: sampleTurn, + InputClass: "user_message", + Prompt: "assembled", + ContextBlocks: sampleContextBlocks, + }) + assertJSONRoundTrip(t, "PromptPatch", PromptPatch{ + ControlPatch: ControlPatch{DenyReason: "prompt"}, + Prompt: &text, + ContextBlocks: sampleContextBlocks, + }) + + assertJSONRoundTrip(t, "EventPreRecordPayload", EventPreRecordPayload{ + PayloadBase: samplePayloadBase(HookEventPreRecord), + SessionContext: sampleSession, + TurnContext: sampleTurn, + RecordType: "tool_call", + Sequence: 1, + Content: sampleRaw, + }) + assertJSONRoundTrip(t, "EventPostRecordPayload", EventPostRecordPayload{ + PayloadBase: samplePayloadBase(HookEventPostRecord), + SessionContext: sampleSession, + TurnContext: sampleTurn, + RecordType: "tool_result", + Sequence: 2, + Content: sampleRaw, + }) + assertJSONRoundTrip(t, "EventPreRecordPatch", EventPreRecordPatch{ + Labels: map[string]string{"stage": "pre"}, + }) + assertJSONRoundTrip(t, "EventPostRecordPatch", EventPostRecordPatch{ + Labels: map[string]string{"stage": "post"}, + }) + + assertJSONRoundTrip(t, "AgentPreStartPayload", AgentPreStartPayload{ + PayloadBase: samplePayloadBase(HookAgentPreStart), + SessionContext: sampleSession, + Command: "codex", + Args: []string{"serve"}, + Cwd: "/tmp/demo", + Provider: "openai", + Model: "gpt-5.4", + }) + assertJSONRoundTrip(t, "AgentSpawnedPayload", AgentSpawnedPayload{ + PayloadBase: samplePayloadBase(HookAgentSpawned), + SessionContext: sampleSession, + Command: "codex", + Args: []string{"serve"}, + Cwd: "/tmp/demo", + PID: 123, + Provider: "openai", + Model: "gpt-5.4", + }) + assertJSONRoundTrip(t, "AgentCrashedPayload", AgentCrashedPayload{ + PayloadBase: samplePayloadBase(HookAgentCrashed), + SessionContext: sampleSession, + Command: "codex", + Args: []string{"serve"}, + Cwd: "/tmp/demo", + PID: 123, + Provider: "openai", + Model: "gpt-5.4", + Error: "boom", + }) + assertJSONRoundTrip(t, "AgentStoppedPayload", AgentStoppedPayload{ + PayloadBase: samplePayloadBase(HookAgentStopped), + SessionContext: sampleSession, + Command: "codex", + Args: []string{"serve"}, + Cwd: "/tmp/demo", + PID: 123, + Provider: "openai", + Model: "gpt-5.4", + }) + assertJSONRoundTrip(t, "AgentStartPatch", AgentStartPatch{ + ControlPatch: ControlPatch{DenyReason: "agent"}, + Command: &toolName, + Args: []string{"--safe"}, + }) + assertJSONRoundTrip(t, "AgentSpawnedPatch", AgentSpawnedPatch{ + Labels: map[string]string{"state": "spawned"}, + }) + assertJSONRoundTrip(t, "AgentCrashedPatch", AgentCrashedPatch{ + Labels: map[string]string{"state": "crashed"}, + }) + assertJSONRoundTrip(t, "AgentStoppedPatch", AgentStoppedPatch{ + Labels: map[string]string{"state": "stopped"}, + }) + + assertJSONRoundTrip(t, "TurnStartPayload", TurnStartPayload{ + PayloadBase: samplePayloadBase(HookTurnStart), + SessionContext: sampleSession, + TurnContext: sampleTurn, + InputClass: "user_message", + UserMessage: "hello", + }) + assertJSONRoundTrip(t, "TurnEndPayload", TurnEndPayload{ + PayloadBase: samplePayloadBase(HookTurnEnd), + SessionContext: sampleSession, + TurnContext: sampleTurn, + InputClass: "user_message", + UserMessage: "bye", + }) + assertJSONRoundTrip(t, "TurnStartPatch", TurnStartPatch{ + ControlPatch: ControlPatch{DenyReason: "turn"}, + Labels: map[string]string{"phase": "start"}, + }) + assertJSONRoundTrip(t, "TurnEndPatch", TurnEndPatch{ + Labels: map[string]string{"phase": "end"}, + }) + + assertJSONRoundTrip(t, "MessageStartPayload", MessageStartPayload{ + PayloadBase: samplePayloadBase(HookMessageStart), + SessionContext: sampleSession, + TurnContext: sampleTurn, + MessageID: "msg-1", + Role: "assistant", + DeltaType: "full", + Text: "hello", + Raw: sampleRaw, + }) + assertJSONRoundTrip(t, "MessageDeltaPayload", MessageDeltaPayload{ + PayloadBase: samplePayloadBase(HookMessageDelta), + SessionContext: sampleSession, + TurnContext: sampleTurn, + MessageID: "msg-1", + Role: "assistant", + DeltaType: "text", + Text: "hel", + Raw: sampleRaw, + }) + assertJSONRoundTrip(t, "MessageEndPayload", MessageEndPayload{ + PayloadBase: samplePayloadBase(HookMessageEnd), + SessionContext: sampleSession, + TurnContext: sampleTurn, + MessageID: "msg-1", + Role: "assistant", + DeltaType: "full", + Text: "hello", + Raw: sampleRaw, + }) + assertJSONRoundTrip(t, "MessageStartPatch", MessageStartPatch{ + ControlPatch: ControlPatch{DenyReason: "message"}, + Role: &role, + DeltaType: &deltaType, + Text: &text, + }) + assertJSONRoundTrip(t, "MessageDeltaPatch", MessageDeltaPatch{ + DeltaType: &deltaType, + }) + assertJSONRoundTrip(t, "MessageEndPatch", MessageEndPatch{ + Text: &text, + }) + + assertJSONRoundTrip(t, "ToolPreCallPayload", ToolPreCallPayload{ + PayloadBase: samplePayloadBase(HookToolPreCall), + SessionContext: sampleSession, + TurnContext: sampleTurn, + ToolCallRef: ToolCallRef{ + ToolCallID: "tool-1", + ToolName: "grep", + ToolNamespace: "fs", + ReadOnly: true, + }, + ToolInput: sampleRaw, + }) + assertJSONRoundTrip(t, "ToolPostCallPayload", ToolPostCallPayload{ + PayloadBase: samplePayloadBase(HookToolPostCall), + SessionContext: sampleSession, + TurnContext: sampleTurn, + ToolCallRef: ToolCallRef{ + ToolCallID: "tool-1", + ToolName: "grep", + ToolNamespace: "fs", + ReadOnly: true, + }, + Title: "grep result", + ToolInput: sampleRaw, + ToolResult: sampleRaw, + }) + assertJSONRoundTrip(t, "ToolPostErrorPayload", ToolPostErrorPayload{ + PayloadBase: samplePayloadBase(HookToolPostError), + SessionContext: sampleSession, + TurnContext: sampleTurn, + ToolCallRef: ToolCallRef{ + ToolCallID: "tool-1", + ToolName: "grep", + ToolNamespace: "fs", + ReadOnly: true, + }, + Title: "grep error", + ToolInput: sampleRaw, + Error: "failed", + }) + assertJSONRoundTrip(t, "ToolCallPatch", ToolCallPatch{ + ControlPatch: ControlPatch{DenyReason: "tool"}, + ToolName: &toolName, + ToolNamespace: &toolNamespace, + ReadOnly: &readOnly, + ToolInput: sampleRaw, + }) + assertJSONRoundTrip(t, "ToolResultPatch", ToolResultPatch{ + ControlPatch: ControlPatch{DenyReason: "result"}, + Title: &title, + ToolResult: sampleRaw, + }) + assertJSONRoundTrip(t, "ToolPostErrorPatch", ToolPostErrorPatch{ + Error: &reason, + }) + + assertJSONRoundTrip(t, "PermissionRequestPayload", PermissionRequestPayload{ + PayloadBase: samplePayloadBase(HookPermissionRequest), + SessionContext: sampleSession, + TurnContext: sampleTurn, + RequestID: "req-1", + Action: "session/request_permission", + Resource: "/tmp/demo.txt", + Decision: "pending", + DecisionClass: "interactive", + ToolInput: sampleRaw, + ToolCall: PermissionToolCall{ + ID: "tool-1", + Kind: "read", + Title: "Read file", + Status: "pending", + Locations: []ToolLocation{ + {Path: "/tmp/demo.txt", StartLine: 1, EndLine: 1}, + }, + }, + Options: []PermissionOption{ + {Decision: "allow-once", OptionID: "allow-once", Kind: "allow"}, + }, + }) + assertJSONRoundTrip(t, "PermissionResolvedPayload", PermissionResolvedPayload{ + PayloadBase: samplePayloadBase(HookPermissionResolved), + SessionContext: sampleSession, + TurnContext: sampleTurn, + RequestID: "req-1", + Action: "session/request_permission", + Resource: "/tmp/demo.txt", + Decision: "allow-once", + DecisionClass: "interactive", + ToolInput: sampleRaw, + ToolCall: PermissionToolCall{ID: "tool-1", Kind: "read", Title: "Read file", Status: "done"}, + }) + assertJSONRoundTrip(t, "PermissionDeniedPayload", PermissionDeniedPayload{ + PayloadBase: samplePayloadBase(HookPermissionDenied), + SessionContext: sampleSession, + TurnContext: sampleTurn, + RequestID: "req-2", + Action: "session/request_permission", + Resource: "/tmp/secret.txt", + Decision: "reject-once", + DecisionClass: "interactive", + ToolInput: sampleRaw, + ToolCall: PermissionToolCall{ID: "tool-2", Kind: "read", Title: "Read secret", Status: "done"}, + }) + assertJSONRoundTrip(t, "PermissionRequestPatch", PermissionRequestPatch{ + ControlPatch: ControlPatch{Deny: true, DenyReason: "permission"}, + Decision: &allowOnce, + DecisionClass: &role, + Reason: &reason, + }) + assertJSONRoundTrip(t, "PermissionResolvedPatch", PermissionResolvedPatch{}) + assertJSONRoundTrip(t, "PermissionDeniedPatch", PermissionDeniedPatch{}) + + assertJSONRoundTrip(t, "ContextPreCompactPayload", ContextPreCompactPayload{ + PayloadBase: samplePayloadBase(HookContextPreCompact), + SessionContext: sampleSession, + TurnContext: sampleTurn, + Reason: "token_limit", + Strategy: "summarize", + Summary: "before", + ContextBlocks: sampleContextBlocks, + }) + assertJSONRoundTrip(t, "ContextPostCompactPayload", ContextPostCompactPayload{ + PayloadBase: samplePayloadBase(HookContextPostCompact), + SessionContext: sampleSession, + TurnContext: sampleTurn, + Reason: "token_limit", + Strategy: "summarize", + Summary: "after", + ContextBlocks: sampleContextBlocks, + }) + assertJSONRoundTrip(t, "ContextPreCompactPatch", ContextPreCompactPatch{ + ControlPatch: ControlPatch{DenyReason: "compact"}, + Reason: &reason, + Strategy: &strategy, + ContextBlocks: sampleContextBlocks, + }) + assertJSONRoundTrip(t, "ContextPostCompactPatch", ContextPostCompactPatch{ + Strategy: &strategy, + }) +} + +func assertJSONRoundTrip[T any](t *testing.T, name string, sample T) { + t.Helper() + + t.Run(name, func(t *testing.T) { + t.Parallel() + + data, err := json.Marshal(sample) + if err != nil { + t.Fatalf("json.Marshal(%s) error = %v", name, err) + } + + var decoded T + if err := json.Unmarshal(data, &decoded); err != nil { + t.Fatalf("json.Unmarshal(%s) error = %v", name, err) + } + + if !reflect.DeepEqual(sample, decoded) { + t.Fatalf("%s round-trip mismatch\ngot: %#v\nwant: %#v", name, decoded, sample) + } + }) +} diff --git a/internal/hooks/permission.go b/internal/hooks/permission.go new file mode 100644 index 000000000..eccff3170 --- /dev/null +++ b/internal/hooks/permission.go @@ -0,0 +1,89 @@ +package hooks + +import ( + "context" + "errors" + "fmt" + "log/slog" + "strings" +) + +var ( + ErrHookPatchRejected = errors.New("hooks: hook patch rejected") + ErrPermissionEscalationBlocked = errors.New("hooks: permission escalation blocked") +) + +func newPermissionRequestGuard(logger *slog.Logger, metrics *hookMetrics) patchGuard[PermissionRequestPayload, PermissionRequestPatch] { + if logger == nil { + logger = slog.Default() + } + + return func(ctx context.Context, hook RegisteredHook, payload PermissionRequestPayload, patch PermissionRequestPatch) error { + beforeDecision := normalizedPermissionDecision(payload.Decision) + afterDecision := normalizedPermissionDecision(permissionDecisionAfterPatch(payload.Decision, patch)) + if permissionDecisionDenied(beforeDecision) && !permissionDecisionDenied(afterDecision) { + metrics.observePermissionEscalationBlock() + logger.WarnContext( + ctx, + "hook.dispatch.permission_escalation_blocked", + "hook", hook.Name, + "event", hook.Event.String(), + "source", hook.Source.String(), + "decision_before", beforeDecision, + "decision_after", afterDecision, + ) + + return fmt.Errorf("%w: %w", ErrHookPatchRejected, ErrPermissionEscalationBlocked) + } + + return nil + } +} + +func permissionDecisionAfterPatch(decision string, patch PermissionRequestPatch) string { + switch { + case patch.Deny: + return "deny" + case patch.Decision != nil: + return *patch.Decision + default: + return decision + } +} + +func permissionPatchDenies(patch PermissionRequestPatch) bool { + switch { + case patch.Deny: + return true + case patch.Decision == nil: + return false + default: + return permissionDecisionDenied(*patch.Decision) + } +} + +func permissionDecisionDenied(decision string) bool { + clean := normalizedPermissionDecision(decision) + switch { + case clean == "": + return false + case clean == "block", clean == "blocked": + return true + case clean == "deny", clean == "denied", clean == "rejected": + return true + case strings.HasPrefix(clean, "block-"): + return true + case strings.HasPrefix(clean, "deny-"): + return true + case clean == "reject": + return true + case strings.HasPrefix(clean, "reject-"): + return true + default: + return false + } +} + +func normalizedPermissionDecision(decision string) string { + return strings.ToLower(strings.TrimSpace(decision)) +} diff --git a/internal/hooks/permission_test.go b/internal/hooks/permission_test.go new file mode 100644 index 000000000..db0e02d24 --- /dev/null +++ b/internal/hooks/permission_test.go @@ -0,0 +1,36 @@ +package hooks + +import "testing" + +func TestPermissionDecisionDenied(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + decision string + want bool + }{ + {name: "empty", decision: "", want: false}, + {name: "allow once", decision: "allow-once", want: false}, + {name: "allow always", decision: "allow-always", want: false}, + {name: "pending", decision: "pending", want: false}, + {name: "deny", decision: "deny", want: true}, + {name: "deny once", decision: "deny-once", want: true}, + {name: "reject", decision: "reject", want: true}, + {name: "reject once", decision: "reject-once", want: true}, + {name: "reject always", decision: "reject-always", want: true}, + {name: "blocked", decision: "blocked", want: true}, + {name: "trim and case fold", decision: " Reject-Once ", want: true}, + } + + for _, tt := range tests { + tt := tt + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + if got := permissionDecisionDenied(tt.decision); got != tt.want { + t.Fatalf("permissionDecisionDenied(%q) = %v, want %v", tt.decision, got, tt.want) + } + }) + } +} diff --git a/internal/hooks/pipeline.go b/internal/hooks/pipeline.go new file mode 100644 index 000000000..371acaed1 --- /dev/null +++ b/internal/hooks/pipeline.go @@ -0,0 +1,217 @@ +package hooks + +import ( + "bytes" + "context" + "encoding/json" + "errors" + "fmt" + "time" +) + +type hookSelector[P any] func(P) []*ResolvedHook + +type patchGuard[P any, R any] func(context.Context, RegisteredHook, P, R) error + +type denyDetector[R any] func(R) bool + +type typedNativeExecutor[P any, R any] interface { + Executor + ExecuteTyped(context.Context, RegisteredHook, P) (R, error) +} + +// pipeline executes one sync hook chain for a concrete payload/patch pair. +type pipeline[P any, R any] struct { + event HookEvent + hooksRuntime *Hooks + hooks hookSelector[P] + apply func(P, R) P + encode func(P) ([]byte, error) + decode func([]byte) (R, error) + denied denyDetector[R] + guard patchGuard[P, R] + enter func(context.Context, HookEvent) (context.Context, int, error) +} + +func (p pipeline[P, R]) execute(ctx context.Context, payload P) (P, error) { + result, _, err := p.executeWithDisposition(ctx, payload) + return result, err +} + +func (p pipeline[P, R]) executeWithDisposition(ctx context.Context, payload P) (P, dispatchReport, error) { + if err := p.validate(); err != nil { + return payload, dispatchReport{}, err + } + + enterDispatchFn := p.enter + if enterDispatchFn == nil { + enterDispatchFn = enterDispatch + } + + dispatchCtx, depth, err := enterDispatchFn(ctx, p.event) + if err != nil { + return payload, dispatchReport{}, err + } + + current := payload + report := dispatchReport{Trace: make([]hookTraceEntry, 0)} + for _, hook := range OrderedResolvedHooks(p.hooks(payload)) { + if hook == nil { + continue + } + + next, denied, trace, err := p.executeHook(dispatchCtx, *hook, current, depth) + if trace.Hook != "" { + report.Trace = append(report.Trace, trace) + } + if err != nil { + report.FailedHook = hook.Name + report.FailedRequired = hook.Required + if hook.Required { + return current, report, fmt.Errorf("hooks: required hook %q failed for event %q: %w", hook.Name, p.event, err) + } + continue + } + + current = next + if denied { + report.Denied = true + report.DenySource = hook.Name + return current, report, nil + } + } + + return current, report, nil +} + +func (p pipeline[P, R]) validate() error { + if err := p.event.Validate(); err != nil { + return err + } + if p.hooks == nil { + return fmt.Errorf("hooks: pipeline for event %q requires a hook selector", p.event) + } + if p.apply == nil { + return fmt.Errorf("hooks: pipeline for event %q requires an apply function", p.event) + } + if p.encode == nil { + return fmt.Errorf("hooks: pipeline for event %q requires an encode function", p.event) + } + if p.decode == nil { + return fmt.Errorf("hooks: pipeline for event %q requires a decode function", p.event) + } + return nil +} + +func (p pipeline[P, R]) executeHook(ctx context.Context, hook ResolvedHook, payload P, depth int) (P, bool, hookTraceEntry, error) { + if hook.Executor == nil { + return payload, false, hookTraceEntry{}, fmt.Errorf("hooks: hook %q executor is required", hook.Name) + } + + hookCtx := ctx + cancel := func() {} + if hook.Timeout > 0 { + hookCtx, cancel = context.WithTimeout(ctx, hook.Timeout) + } + defer cancel() + + started := time.Now() + patch, rawPatch, err := p.runHook(hookCtx, hook.RegisteredHook, payload) + duration := time.Since(started) + trace := hookTraceEntry{ + Hook: hook.Name, + Duration: duration, + Required: hook.Required, + Patch: cloneRawJSON(rawPatch), + } + if err != nil { + trace.Outcome = HookRunOutcomeFailed + trace.Error = err.Error() + p.recordHookRun(hookCtx, payload, hook.RegisteredHook, trace.Outcome, duration, rawPatch, err, depth) + return payload, false, trace, err + } + if p.guard != nil { + if err := p.guard(hookCtx, hook.RegisteredHook, payload, patch); err != nil { + if errors.Is(err, ErrHookPatchRejected) { + trace.Outcome = HookRunOutcomeRejected + trace.Error = err.Error() + p.recordHookRun(hookCtx, payload, hook.RegisteredHook, trace.Outcome, duration, rawPatch, err, depth) + return payload, false, trace, nil + } + trace.Outcome = HookRunOutcomeFailed + trace.Error = err.Error() + p.recordHookRun(hookCtx, payload, hook.RegisteredHook, trace.Outcome, duration, rawPatch, err, depth) + return payload, false, trace, err + } + } + + next := p.apply(payload, patch) + denied := p.denied != nil && p.denied(patch) + if denied { + trace.Outcome = HookRunOutcomeDenied + } else { + trace.Outcome = HookRunOutcomeApplied + } + p.recordHookRun(hookCtx, payload, hook.RegisteredHook, trace.Outcome, duration, rawPatch, nil, depth) + return next, denied, trace, nil +} + +func (p pipeline[P, R]) runHook(ctx context.Context, hook RegisteredHook, payload P) (R, json.RawMessage, error) { + if hook.Executor.Kind() == HookExecutorNative { + if executor, ok := hook.Executor.(typedNativeExecutor[P, R]); ok { + patch, err := executor.ExecuteTyped(ctx, hook, payload) + if err != nil { + var zero R + return zero, nil, err + } + rawPatch, marshalErr := json.Marshal(patch) + if marshalErr != nil { + var zero R + return zero, nil, fmt.Errorf("hooks: encode native patch for hook %q: %w", hook.Name, marshalErr) + } + return patch, rawPatch, nil + } + } + + encoded, err := p.encode(payload) + if err != nil { + var zero R + return zero, nil, fmt.Errorf("hooks: encode payload for hook %q: %w", hook.Name, err) + } + + rawPatch, err := hook.Executor.Execute(ctx, hook, encoded) + if err != nil { + var zero R + return zero, nil, err + } + + patch, err := p.decode(rawPatch) + if err != nil { + var zero R + return zero, rawPatch, fmt.Errorf("hooks: decode patch for hook %q: %w", hook.Name, err) + } + + return patch, rawPatch, nil +} + +func encodeJSON[T any](payload T) ([]byte, error) { + return json.Marshal(payload) +} + +func decodeJSON[T any](payload []byte) (T, error) { + var decoded T + if len(bytes.TrimSpace(payload)) == 0 { + return decoded, nil + } + if err := json.Unmarshal(payload, &decoded); err != nil { + return decoded, err + } + return decoded, nil +} + +func (p pipeline[P, R]) recordHookRun(ctx context.Context, payload P, hook RegisteredHook, outcome HookRunOutcome, duration time.Duration, rawPatch json.RawMessage, err error, depth int) { + if p.hooksRuntime == nil { + return + } + p.hooksRuntime.emitHookRun(ctx, payload, hook, outcome, duration, rawPatch, err, depth) +} diff --git a/internal/hooks/pipeline_test.go b/internal/hooks/pipeline_test.go new file mode 100644 index 000000000..c53362791 --- /dev/null +++ b/internal/hooks/pipeline_test.go @@ -0,0 +1,634 @@ +package hooks + +import ( + "bytes" + "context" + "errors" + "log/slog" + "strings" + "testing" + "time" +) + +type pipelineTestPayload struct { + Value string `json:"value,omitempty"` + Denied bool `json:"denied,omitempty"` +} + +type pipelineTestPatch struct { + Append string `json:"append,omitempty"` + Deny bool `json:"deny,omitempty"` +} + +type pipelineTestExecutor struct { + kind HookExecutorKind + execute func(context.Context, RegisteredHook, []byte) ([]byte, error) +} + +func (e pipelineTestExecutor) Kind() HookExecutorKind { + return e.kind +} + +func (e pipelineTestExecutor) Execute(ctx context.Context, hook RegisteredHook, payload []byte) ([]byte, error) { + return e.execute(ctx, hook, payload) +} + +func TestPipelineExecuteSequentialComposition(t *testing.T) { + t.Parallel() + + seen := make([]string, 0, 3) + pipe := pipeline[pipelineTestPayload, pipelineTestPatch]{ + event: HookSessionPreCreate, + hooks: func(pipelineTestPayload) []*ResolvedHook { + return []*ResolvedHook{ + testPipelineHook(HookSessionPreCreate, "hook-1", false, 0, NewTypedNativeExecutor( + func(_ context.Context, _ RegisteredHook, payload pipelineTestPayload) (pipelineTestPatch, error) { + seen = append(seen, payload.Value) + return pipelineTestPatch{Append: "A"}, nil + }, + )), + testPipelineHook(HookSessionPreCreate, "hook-2", false, 0, NewTypedNativeExecutor( + func(_ context.Context, _ RegisteredHook, payload pipelineTestPayload) (pipelineTestPatch, error) { + seen = append(seen, payload.Value) + return pipelineTestPatch{Append: "B"}, nil + }, + )), + testPipelineHook(HookSessionPreCreate, "hook-3", false, 0, NewTypedNativeExecutor( + func(_ context.Context, _ RegisteredHook, payload pipelineTestPayload) (pipelineTestPatch, error) { + seen = append(seen, payload.Value) + return pipelineTestPatch{Append: "C"}, nil + }, + )), + } + }, + apply: applyPipelineTestPatch, + encode: failPipelineEncode(t), + decode: failPipelineDecode(t), + denied: func(patch pipelineTestPatch) bool { return patch.Deny }, + } + + result, err := pipe.execute(t.Context(), pipelineTestPayload{}) + if err != nil { + t.Fatalf("execute() error = %v, want nil", err) + } + if result.Value != "ABC" { + t.Fatalf("result.Value = %q, want %q", result.Value, "ABC") + } + if got := strings.Join(seen, ","); got != ",A,AB" { + t.Fatalf("payload sequence = %q, want %q", got, ",A,AB") + } +} + +func TestPipelineExecuteShortCircuitsOnExplicitDeny(t *testing.T) { + t.Parallel() + + var thirdExecuted bool + pipe := pipeline[pipelineTestPayload, pipelineTestPatch]{ + event: HookSessionPreCreate, + hooks: func(pipelineTestPayload) []*ResolvedHook { + return []*ResolvedHook{ + testPipelineHook(HookSessionPreCreate, "hook-1", false, 0, NewTypedNativeExecutor( + func(_ context.Context, _ RegisteredHook, _ pipelineTestPayload) (pipelineTestPatch, error) { + return pipelineTestPatch{Append: "A"}, nil + }, + )), + testPipelineHook(HookSessionPreCreate, "hook-2", false, 0, NewTypedNativeExecutor( + func(_ context.Context, _ RegisteredHook, payload pipelineTestPayload) (pipelineTestPatch, error) { + if payload.Value != "A" { + t.Fatalf("hook-2 payload.Value = %q, want %q", payload.Value, "A") + } + return pipelineTestPatch{Append: "B", Deny: true}, nil + }, + )), + testPipelineHook(HookSessionPreCreate, "hook-3", false, 0, NewTypedNativeExecutor( + func(_ context.Context, _ RegisteredHook, _ pipelineTestPayload) (pipelineTestPatch, error) { + thirdExecuted = true + return pipelineTestPatch{Append: "C"}, nil + }, + )), + } + }, + apply: applyPipelineTestPatch, + encode: failPipelineEncode(t), + decode: failPipelineDecode(t), + denied: func(patch pipelineTestPatch) bool { return patch.Deny }, + } + + result, err := pipe.execute(t.Context(), pipelineTestPayload{}) + if err != nil { + t.Fatalf("execute() error = %v, want nil", err) + } + if !result.Denied { + t.Fatal("result.Denied = false, want true") + } + if result.Value != "AB" { + t.Fatalf("result.Value = %q, want %q", result.Value, "AB") + } + if thirdExecuted { + t.Fatal("third hook executed after explicit deny") + } +} + +func TestPipelineExecuteRequiredHookTimeoutStopsPipeline(t *testing.T) { + t.Parallel() + + var thirdExecuted bool + pipe := pipeline[pipelineTestPayload, pipelineTestPatch]{ + event: HookSessionPreCreate, + hooks: func(pipelineTestPayload) []*ResolvedHook { + return []*ResolvedHook{ + testPipelineHook(HookSessionPreCreate, "hook-1", false, 0, NewTypedNativeExecutor( + func(_ context.Context, _ RegisteredHook, _ pipelineTestPayload) (pipelineTestPatch, error) { + return pipelineTestPatch{Append: "A"}, nil + }, + )), + testPipelineHook(HookSessionPreCreate, "hook-2", true, 20*time.Millisecond, NewTypedNativeExecutor( + func(ctx context.Context, _ RegisteredHook, payload pipelineTestPayload) (pipelineTestPatch, error) { + if payload.Value != "A" { + t.Fatalf("hook-2 payload.Value = %q, want %q", payload.Value, "A") + } + <-ctx.Done() + return pipelineTestPatch{}, ctx.Err() + }, + )), + testPipelineHook(HookSessionPreCreate, "hook-3", false, 0, NewTypedNativeExecutor( + func(_ context.Context, _ RegisteredHook, _ pipelineTestPayload) (pipelineTestPatch, error) { + thirdExecuted = true + return pipelineTestPatch{Append: "C"}, nil + }, + )), + } + }, + apply: applyPipelineTestPatch, + encode: failPipelineEncode(t), + decode: failPipelineDecode(t), + denied: func(patch pipelineTestPatch) bool { return patch.Deny }, + } + + result, err := pipe.execute(t.Context(), pipelineTestPayload{}) + if !errors.Is(err, context.DeadlineExceeded) { + t.Fatalf("execute() error = %v, want context deadline exceeded", err) + } + if result.Value != "A" { + t.Fatalf("result.Value = %q, want %q", result.Value, "A") + } + if thirdExecuted { + t.Fatal("third hook executed after required hook timeout") + } +} + +func TestPipelineExecuteSkipsNonRequiredHookFailure(t *testing.T) { + t.Parallel() + + pipe := pipeline[pipelineTestPayload, pipelineTestPatch]{ + event: HookSessionPreCreate, + hooks: func(pipelineTestPayload) []*ResolvedHook { + return []*ResolvedHook{ + testPipelineHook(HookSessionPreCreate, "hook-1", false, 0, NewTypedNativeExecutor( + func(_ context.Context, _ RegisteredHook, _ pipelineTestPayload) (pipelineTestPatch, error) { + return pipelineTestPatch{Append: "A"}, nil + }, + )), + testPipelineHook(HookSessionPreCreate, "hook-2", false, 0, NewTypedNativeExecutor( + func(_ context.Context, _ RegisteredHook, payload pipelineTestPayload) (pipelineTestPatch, error) { + if payload.Value != "A" { + t.Fatalf("hook-2 payload.Value = %q, want %q", payload.Value, "A") + } + return pipelineTestPatch{}, errors.New("boom") + }, + )), + testPipelineHook(HookSessionPreCreate, "hook-3", false, 0, NewTypedNativeExecutor( + func(_ context.Context, _ RegisteredHook, payload pipelineTestPayload) (pipelineTestPatch, error) { + if payload.Value != "A" { + t.Fatalf("hook-3 payload.Value = %q, want %q", payload.Value, "A") + } + return pipelineTestPatch{Append: "B"}, nil + }, + )), + } + }, + apply: applyPipelineTestPatch, + encode: failPipelineEncode(t), + decode: failPipelineDecode(t), + denied: func(patch pipelineTestPatch) bool { return patch.Deny }, + } + + result, err := pipe.execute(t.Context(), pipelineTestPayload{}) + if err != nil { + t.Fatalf("execute() error = %v, want nil", err) + } + if result.Value != "AB" { + t.Fatalf("result.Value = %q, want %q", result.Value, "AB") + } +} + +func TestPipelineExecuteWithNoMatchingHooksReturnsOriginalPayload(t *testing.T) { + t.Parallel() + + var encodeCalled bool + var decodeCalled bool + original := pipelineTestPayload{Value: "original"} + pipe := pipeline[pipelineTestPayload, pipelineTestPatch]{ + event: HookSessionPreCreate, + hooks: func(pipelineTestPayload) []*ResolvedHook { + return nil + }, + apply: applyPipelineTestPatch, + encode: func(payload pipelineTestPayload) ([]byte, error) { + encodeCalled = true + return encodeJSON(payload) + }, + decode: func(payload []byte) (pipelineTestPatch, error) { + decodeCalled = true + return decodeJSON[pipelineTestPatch](payload) + }, + denied: func(patch pipelineTestPatch) bool { return patch.Deny }, + } + + result, err := pipe.execute(t.Context(), original) + if err != nil { + t.Fatalf("execute() error = %v, want nil", err) + } + if result != original { + t.Fatalf("result = %#v, want %#v", result, original) + } + if encodeCalled { + t.Fatal("encode called with no matching hooks") + } + if decodeCalled { + t.Fatal("decode called with no matching hooks") + } +} + +func TestEnterDispatchDepthOneSucceeds(t *testing.T) { + t.Parallel() + + ctx, depth, err := enterDispatch(t.Context(), HookSessionPreCreate) + if err != nil { + t.Fatalf("enterDispatch() error = %v, want nil", err) + } + if depth != 1 { + t.Fatalf("depth = %d, want 1", depth) + } + if got := currentDispatchDepth(ctx); got != 1 { + t.Fatalf("currentDispatchDepth() = %d, want 1", got) + } +} + +func TestEnterDispatchDepthThreeSucceeds(t *testing.T) { + t.Parallel() + + ctx := t.Context() + for i := 1; i <= 3; i++ { + var depth int + var err error + ctx, depth, err = enterDispatch(ctx, HookSessionPreCreate) + if err != nil { + t.Fatalf("enterDispatch() iteration %d error = %v, want nil", i, err) + } + if depth != i { + t.Fatalf("depth = %d, want %d", depth, i) + } + } +} + +func TestEnterDispatchDepthFourFails(t *testing.T) { + t.Parallel() + + ctx := t.Context() + for i := 0; i < 3; i++ { + var err error + ctx, _, err = enterDispatch(ctx, HookSessionPreCreate) + if err != nil { + t.Fatalf("enterDispatch() setup error = %v, want nil", err) + } + } + + _, depth, err := enterDispatch(ctx, HookSessionPreCreate) + if !errors.Is(err, ErrDispatchDepthExceeded) { + t.Fatalf("enterDispatch() error = %v, want ErrDispatchDepthExceeded", err) + } + if depth != 3 { + t.Fatalf("depth = %d, want 3", depth) + } +} + +func TestEnterDispatchNestedIncrementsFromParentContext(t *testing.T) { + t.Parallel() + + parent, _, err := enterDispatch(t.Context(), HookSessionPreCreate) + if err != nil { + t.Fatalf("enterDispatch() parent error = %v, want nil", err) + } + + child, depth, err := enterDispatch(parent, HookToolPreCall) + if err != nil { + t.Fatalf("enterDispatch() child error = %v, want nil", err) + } + if depth != 2 { + t.Fatalf("depth = %d, want 2", depth) + } + if got := currentDispatchDepth(child); got != 2 { + t.Fatalf("currentDispatchDepth() = %d, want 2", got) + } +} + +func TestPermissionPipelineKeepDenyAllowed(t *testing.T) { + t.Parallel() + + var secondExecuted bool + pipe := permissionPipeline( + slog.New(slog.NewTextHandler(&bytes.Buffer{}, nil)), + func(PermissionRequestPayload) []*ResolvedHook { + return []*ResolvedHook{ + testPipelineHook(HookPermissionRequest, "hook-1", false, 0, NewTypedNativeExecutor( + func(_ context.Context, _ RegisteredHook, _ PermissionRequestPayload) (PermissionRequestPatch, error) { + return PermissionRequestPatch{ControlPatch: ControlPatch{Deny: true}}, nil + }, + )), + testPipelineHook(HookPermissionRequest, "hook-2", false, 0, NewTypedNativeExecutor( + func(_ context.Context, _ RegisteredHook, _ PermissionRequestPayload) (PermissionRequestPatch, error) { + secondExecuted = true + return PermissionRequestPatch{}, nil + }, + )), + } + }, + ) + + result, err := pipe.execute(t.Context(), PermissionRequestPayload{Decision: "deny"}) + if err != nil { + t.Fatalf("execute() error = %v, want nil", err) + } + if result.Decision != "deny" { + t.Fatalf("result.Decision = %q, want %q", result.Decision, "deny") + } + if secondExecuted { + t.Fatal("second hook executed after deny") + } +} + +func TestPermissionPipelineRejectsDenyToAllowAndLogs(t *testing.T) { + t.Parallel() + + var logs bytes.Buffer + logger := slog.New(slog.NewTextHandler(&logs, nil)) + var secondHookSawDecision string + pipe := permissionPipeline( + logger, + func(PermissionRequestPayload) []*ResolvedHook { + return []*ResolvedHook{ + testPipelineHook(HookPermissionRequest, "hook-1", false, 0, NewTypedNativeExecutor( + func(_ context.Context, _ RegisteredHook, _ PermissionRequestPayload) (PermissionRequestPatch, error) { + allow := "allow" + return PermissionRequestPatch{Decision: &allow}, nil + }, + )), + testPipelineHook(HookPermissionRequest, "hook-2", false, 0, NewTypedNativeExecutor( + func(_ context.Context, _ RegisteredHook, payload PermissionRequestPayload) (PermissionRequestPatch, error) { + secondHookSawDecision = payload.Decision + return PermissionRequestPatch{}, nil + }, + )), + } + }, + ) + + result, err := pipe.execute(t.Context(), PermissionRequestPayload{Decision: "deny"}) + if err != nil { + t.Fatalf("execute() error = %v, want nil", err) + } + if result.Decision != "deny" { + t.Fatalf("result.Decision = %q, want %q", result.Decision, "deny") + } + if secondHookSawDecision != "deny" { + t.Fatalf("second hook saw decision %q, want %q", secondHookSawDecision, "deny") + } + if !strings.Contains(logs.String(), "hook.dispatch.permission_escalation_blocked") { + t.Fatalf("logs = %q, want escalation-blocked entry", logs.String()) + } +} + +func TestPermissionPipelineAllowToDenyIsAllowed(t *testing.T) { + t.Parallel() + + var secondExecuted bool + pipe := permissionPipeline( + slog.New(slog.NewTextHandler(&bytes.Buffer{}, nil)), + func(PermissionRequestPayload) []*ResolvedHook { + return []*ResolvedHook{ + testPipelineHook(HookPermissionRequest, "hook-1", false, 0, NewTypedNativeExecutor( + func(_ context.Context, _ RegisteredHook, _ PermissionRequestPayload) (PermissionRequestPatch, error) { + deny := "deny" + return PermissionRequestPatch{Decision: &deny}, nil + }, + )), + testPipelineHook(HookPermissionRequest, "hook-2", false, 0, NewTypedNativeExecutor( + func(_ context.Context, _ RegisteredHook, _ PermissionRequestPayload) (PermissionRequestPatch, error) { + secondExecuted = true + return PermissionRequestPatch{}, nil + }, + )), + } + }, + ) + + result, err := pipe.execute(t.Context(), PermissionRequestPayload{Decision: "allow"}) + if err != nil { + t.Fatalf("execute() error = %v, want nil", err) + } + if result.Decision != "deny" { + t.Fatalf("result.Decision = %q, want %q", result.Decision, "deny") + } + if secondExecuted { + t.Fatal("second hook executed after allow-to-deny patch") + } +} + +func TestPipelineNativeExecutorSkipsSerialization(t *testing.T) { + t.Parallel() + + var encodeCalls int + var decodeCalls int + pipe := pipeline[pipelineTestPayload, pipelineTestPatch]{ + event: HookSessionPreCreate, + hooks: func(pipelineTestPayload) []*ResolvedHook { + return []*ResolvedHook{ + testPipelineHook(HookSessionPreCreate, "hook-1", false, 0, NewTypedNativeExecutor( + func(_ context.Context, _ RegisteredHook, payload pipelineTestPayload) (pipelineTestPatch, error) { + if payload.Value != "seed" { + t.Fatalf("payload.Value = %q, want %q", payload.Value, "seed") + } + return pipelineTestPatch{Append: "-native"}, nil + }, + )), + } + }, + apply: applyPipelineTestPatch, + encode: func(payload pipelineTestPayload) ([]byte, error) { + encodeCalls++ + return encodeJSON(payload) + }, + decode: func(payload []byte) (pipelineTestPatch, error) { + decodeCalls++ + return decodeJSON[pipelineTestPatch](payload) + }, + denied: func(patch pipelineTestPatch) bool { return patch.Deny }, + } + + result, err := pipe.execute(t.Context(), pipelineTestPayload{Value: "seed"}) + if err != nil { + t.Fatalf("execute() error = %v, want nil", err) + } + if result.Value != "seed-native" { + t.Fatalf("result.Value = %q, want %q", result.Value, "seed-native") + } + if encodeCalls != 0 { + t.Fatalf("encodeCalls = %d, want 0", encodeCalls) + } + if decodeCalls != 0 { + t.Fatalf("decodeCalls = %d, want 0", decodeCalls) + } +} + +func TestPipelineSubprocessExecutorUsesEncodeDecode(t *testing.T) { + t.Parallel() + + var encodeCalls int + var decodeCalls int + var rawPayload string + pipe := pipeline[pipelineTestPayload, pipelineTestPatch]{ + event: HookSessionPreCreate, + hooks: func(pipelineTestPayload) []*ResolvedHook { + return []*ResolvedHook{ + testPipelineHook(HookSessionPreCreate, "hook-1", false, 0, pipelineTestExecutor{ + kind: HookExecutorSubprocess, + execute: func(_ context.Context, _ RegisteredHook, payload []byte) ([]byte, error) { + rawPayload = string(payload) + return []byte(`{"append":"-subprocess"}`), nil + }, + }), + } + }, + apply: applyPipelineTestPatch, + encode: func(payload pipelineTestPayload) ([]byte, error) { + encodeCalls++ + return encodeJSON(payload) + }, + decode: func(payload []byte) (pipelineTestPatch, error) { + decodeCalls++ + return decodeJSON[pipelineTestPatch](payload) + }, + denied: func(patch pipelineTestPatch) bool { return patch.Deny }, + } + + result, err := pipe.execute(t.Context(), pipelineTestPayload{Value: "seed"}) + if err != nil { + t.Fatalf("execute() error = %v, want nil", err) + } + if result.Value != "seed-subprocess" { + t.Fatalf("result.Value = %q, want %q", result.Value, "seed-subprocess") + } + if encodeCalls != 1 { + t.Fatalf("encodeCalls = %d, want 1", encodeCalls) + } + if decodeCalls != 1 { + t.Fatalf("decodeCalls = %d, want 1", decodeCalls) + } + if !strings.Contains(rawPayload, `"value":"seed"`) { + t.Fatalf("rawPayload = %q, want encoded JSON payload", rawPayload) + } +} + +func applyPipelineTestPatch(payload pipelineTestPayload, patch pipelineTestPatch) pipelineTestPayload { + payload.Value += patch.Append + if patch.Deny { + payload.Denied = true + } + return payload +} + +func applyPermissionRequestPatch(payload PermissionRequestPayload, patch PermissionRequestPatch) PermissionRequestPayload { + if patch.Decision != nil { + payload.Decision = *patch.Decision + } + if patch.Deny { + payload.Decision = "deny" + } + if patch.DecisionClass != nil { + payload.DecisionClass = *patch.DecisionClass + } + return payload +} + +func permissionPipeline( + logger *slog.Logger, + hooks func(PermissionRequestPayload) []*ResolvedHook, +) pipeline[PermissionRequestPayload, PermissionRequestPatch] { + return pipeline[PermissionRequestPayload, PermissionRequestPatch]{ + event: HookPermissionRequest, + hooks: hooks, + apply: applyPermissionRequestPatch, + encode: failPermissionEncode, + decode: failPermissionDecode, + denied: permissionPatchDenies, + guard: newPermissionRequestGuard(logger, nil), + } +} + +func testPipelineHook(event HookEvent, name string, required bool, timeout time.Duration, executor Executor) *ResolvedHook { + source := HookSourceConfig + command := "hook" + if executor.Kind() == HookExecutorNative { + source = HookSourceNative + command = "" + } + + return &ResolvedHook{ + RegisteredHook: RegisteredHook{ + Name: name, + Event: event, + Source: source, + Mode: HookModeSync, + Required: required, + Timeout: timeout, + Executor: executor, + }, + Decl: HookDecl{ + Name: name, + Event: event, + Source: source, + Mode: HookModeSync, + Required: required, + Timeout: timeout, + ExecutorKind: executor.Kind(), + Command: command, + }, + } +} + +func failPipelineEncode(t *testing.T) func(pipelineTestPayload) ([]byte, error) { + t.Helper() + + return func(pipelineTestPayload) ([]byte, error) { + t.Fatal("encode should not be called") + return nil, nil + } +} + +func failPipelineDecode(t *testing.T) func([]byte) (pipelineTestPatch, error) { + t.Helper() + + return func([]byte) (pipelineTestPatch, error) { + t.Fatal("decode should not be called") + return pipelineTestPatch{}, nil + } +} + +func failPermissionEncode(PermissionRequestPayload) ([]byte, error) { + return nil, errors.New("permission pipeline should not serialize native hooks in tests") +} + +func failPermissionDecode([]byte) (PermissionRequestPatch, error) { + return PermissionRequestPatch{}, errors.New("permission pipeline should not deserialize native hooks in tests") +} diff --git a/internal/hooks/pool.go b/internal/hooks/pool.go new file mode 100644 index 000000000..d07b1c7dd --- /dev/null +++ b/internal/hooks/pool.go @@ -0,0 +1,235 @@ +package hooks + +import ( + "context" + "log/slog" + "sync" + "time" +) + +const ( + defaultAsyncWorkerCount = 4 + defaultAsyncQueueCapacity = 64 + defaultAsyncDrainTimeout = 10 * time.Second +) + +type asyncTask struct { + hook RegisteredHook + run func(context.Context) +} + +type asyncPoolConfig struct { + WorkerCount int + QueueCapacity int + DrainTimeout time.Duration + Logger *slog.Logger + Metrics *hookMetrics +} + +type asyncPool struct { + logger *slog.Logger + workerCount int + queueCapacity int + drainTimeout time.Duration + metrics *hookMetrics + + mu sync.RWMutex + ctx context.Context + cancel context.CancelFunc + tasks chan asyncTask + wg sync.WaitGroup + started bool + closed bool +} + +func newAsyncPool(cfg asyncPoolConfig) *asyncPool { + workerCount := cfg.WorkerCount + if workerCount <= 0 { + workerCount = defaultAsyncWorkerCount + } + + queueCapacity := cfg.QueueCapacity + if queueCapacity <= 0 { + queueCapacity = defaultAsyncQueueCapacity + } + + drainTimeout := cfg.DrainTimeout + if drainTimeout <= 0 { + drainTimeout = defaultAsyncDrainTimeout + } + + logger := cfg.Logger + if logger == nil { + logger = slog.Default() + } + + return &asyncPool{ + logger: logger, + workerCount: workerCount, + queueCapacity: queueCapacity, + drainTimeout: drainTimeout, + metrics: cfg.Metrics, + } +} + +func (p *asyncPool) Start(parent context.Context) { + if p == nil { + return + } + if parent == nil { + parent = context.Background() + } + + p.mu.Lock() + if p.started || p.closed { + p.mu.Unlock() + return + } + + p.ctx, p.cancel = context.WithCancel(parent) + p.tasks = make(chan asyncTask, p.queueCapacity) + p.started = true + + workerCtx := p.ctx + tasks := p.tasks + workerCount := p.workerCount + p.wg.Add(workerCount) + p.mu.Unlock() + + for range workerCount { + go p.worker(workerCtx, tasks) + } +} + +func (p *asyncPool) Submit(task asyncTask) bool { + if p == nil { + return false + } + + p.mu.RLock() + if !p.started || p.closed || p.tasks == nil { + p.mu.RUnlock() + return false + } + + select { + case p.tasks <- task: + p.metrics.observeQueueDepth(len(p.tasks)) + p.mu.RUnlock() + return true + default: + queueDepth := len(p.tasks) + logger := p.logger + p.metrics.observeAsyncDrop(queueDepth) + p.mu.RUnlock() + + logger.Warn( + "hook.dispatch.async_dropped", + "hook", task.hook.Name, + "event", task.hook.Event.String(), + "source", task.hook.Source.String(), + "queue_depth", queueDepth, + "queue_capacity", p.queueCapacity, + ) + return false + } +} + +func (p *asyncPool) Close() { + if p == nil { + return + } + + p.mu.Lock() + if p.closed { + p.mu.Unlock() + return + } + p.closed = true + + if !p.started || p.tasks == nil { + p.mu.Unlock() + return + } + + tasks := p.tasks + cancel := p.cancel + drainTimeout := p.drainTimeout + p.mu.Unlock() + + if cancel != nil { + defer cancel() + } + + close(tasks) + + drainCtx, stopDrain := context.WithTimeout(context.Background(), drainTimeout) + defer stopDrain() + + done := make(chan struct{}) + go func() { + p.wg.Wait() + close(done) + }() + + select { + case <-done: + return + case <-drainCtx.Done(): + discardAsyncTasks(tasks) + if cancel != nil { + cancel() + } + <-done + } +} + +func (p *asyncPool) worker(ctx context.Context, tasks <-chan asyncTask) { + defer p.wg.Done() + + for { + select { + case <-ctx.Done(): + return + case task, ok := <-tasks: + if !ok { + return + } + p.runTask(ctx, task) + } + } +} + +func (p *asyncPool) runTask(ctx context.Context, task asyncTask) { + if task.run == nil { + return + } + + defer func() { + if recovered := recover(); recovered != nil { + p.logger.ErrorContext( + ctx, + "hook.dispatch.async_panic", + "hook", task.hook.Name, + "event", task.hook.Event.String(), + "source", task.hook.Source.String(), + "panic", recovered, + ) + } + }() + + task.run(ctx) +} + +func discardAsyncTasks(tasks <-chan asyncTask) { + for { + select { + case _, ok := <-tasks: + if !ok { + return + } + default: + return + } + } +} diff --git a/internal/hooks/pool_test.go b/internal/hooks/pool_test.go new file mode 100644 index 000000000..3e557a847 --- /dev/null +++ b/internal/hooks/pool_test.go @@ -0,0 +1,404 @@ +package hooks + +import ( + "bytes" + "context" + "io" + "log/slog" + "sync" + "sync/atomic" + "testing" + "time" +) + +func TestNewAsyncPoolAppliesDefaults(t *testing.T) { + t.Parallel() + + pool := newAsyncPool(asyncPoolConfig{}) + if pool.workerCount != defaultAsyncWorkerCount { + t.Fatalf("workerCount = %d, want %d", pool.workerCount, defaultAsyncWorkerCount) + } + if pool.queueCapacity != defaultAsyncQueueCapacity { + t.Fatalf("queueCapacity = %d, want %d", pool.queueCapacity, defaultAsyncQueueCapacity) + } + if pool.drainTimeout != defaultAsyncDrainTimeout { + t.Fatalf("drainTimeout = %s, want %s", pool.drainTimeout, defaultAsyncDrainTimeout) + } + if pool.logger == nil { + t.Fatal("logger = nil, want non-nil") + } +} + +func TestAsyncPoolStartsConfiguredWorkers(t *testing.T) { + t.Parallel() + + const workers = 3 + pool := newAsyncPool(asyncPoolConfig{ + WorkerCount: workers, + QueueCapacity: workers, + Logger: discardPoolLogger(), + }) + pool.Start(t.Context()) + + started := make(chan struct{}, workers) + release := make(chan struct{}) + for i := 0; i < workers; i++ { + if ok := pool.Submit(asyncTask{ + run: func(context.Context) { + started <- struct{}{} + <-release + }, + }); !ok { + t.Fatalf("Submit() #%d = false, want true", i) + } + } + + for i := 0; i < workers; i++ { + waitForPoolSignal(t, started, "worker start") + } + + close(release) + pool.Close() +} + +func TestAsyncPoolSubmitWithAvailableCapacitySucceeds(t *testing.T) { + t.Parallel() + + pool := newAsyncPool(asyncPoolConfig{ + WorkerCount: 1, + QueueCapacity: 1, + Logger: discardPoolLogger(), + }) + pool.Start(t.Context()) + + done := make(chan struct{}) + if ok := pool.Submit(asyncTask{ + run: func(context.Context) { + close(done) + }, + }); !ok { + t.Fatal("Submit() = false, want true") + } + + waitForPoolSignal(t, done, "task completion") + pool.Close() +} + +func TestAsyncPoolSubmitDropsWhenQueueIsFullAndLogsDepth(t *testing.T) { + t.Parallel() + + var logs bytes.Buffer + logger := slog.New(slog.NewTextHandler(&logs, nil)) + pool := newAsyncPool(asyncPoolConfig{ + WorkerCount: 1, + QueueCapacity: 1, + Logger: logger, + }) + pool.Start(t.Context()) + + started := make(chan struct{}) + release := make(chan struct{}) + if ok := pool.Submit(asyncTask{ + hook: RegisteredHook{ + Name: "hook-1", + Event: HookEventPostRecord, + Source: HookSourceSkill, + }, + run: func(context.Context) { + close(started) + <-release + }, + }); !ok { + t.Fatal("Submit() first task = false, want true") + } + waitForPoolSignal(t, started, "first task start") + + if ok := pool.Submit(asyncTask{ + hook: RegisteredHook{ + Name: "hook-2", + Event: HookEventPostRecord, + Source: HookSourceSkill, + }, + run: func(context.Context) {}, + }); !ok { + t.Fatal("Submit() queued task = false, want true") + } + + if ok := pool.Submit(asyncTask{ + hook: RegisteredHook{ + Name: "hook-3", + Event: HookEventPostRecord, + Source: HookSourceSkill, + }, + run: func(context.Context) {}, + }); ok { + t.Fatal("Submit() overflow task = true, want false") + } + + logOutput := logs.String() + if !bytes.Contains([]byte(logOutput), []byte("hook.dispatch.async_dropped")) { + t.Fatalf("logs = %q, want async_dropped entry", logOutput) + } + if !bytes.Contains([]byte(logOutput), []byte("queue_depth=1")) { + t.Fatalf("logs = %q, want queue_depth=1", logOutput) + } + if !bytes.Contains([]byte(logOutput), []byte("hook=hook-3")) { + t.Fatalf("logs = %q, want dropped hook name", logOutput) + } + + close(release) + pool.Close() +} + +func TestAsyncPoolCloseDrainsQueuedTasksBeforeReturning(t *testing.T) { + t.Parallel() + + pool := newAsyncPool(asyncPoolConfig{ + WorkerCount: 1, + QueueCapacity: 2, + Logger: discardPoolLogger(), + }) + pool.Start(t.Context()) + + var ran atomic.Int32 + started := make(chan struct{}) + release := make(chan struct{}) + if ok := pool.Submit(asyncTask{ + run: func(context.Context) { + ran.Add(1) + close(started) + <-release + }, + }); !ok { + t.Fatal("Submit() first task = false, want true") + } + waitForPoolSignal(t, started, "first task start") + + completed := make(chan struct{}, 2) + for i := 0; i < 2; i++ { + if ok := pool.Submit(asyncTask{ + run: func(context.Context) { + ran.Add(1) + completed <- struct{}{} + }, + }); !ok { + t.Fatalf("Submit() queued task #%d = false, want true", i) + } + } + + closed := make(chan struct{}) + go func() { + pool.Close() + close(closed) + }() + + select { + case <-closed: + t.Fatal("Close() returned before queued tasks were drainable") + case <-time.After(50 * time.Millisecond): + } + + close(release) + waitForPoolSignal(t, completed, "second task completion") + waitForPoolSignal(t, completed, "third task completion") + waitForPoolSignal(t, closed, "pool close") + + if got := ran.Load(); got != 3 { + t.Fatalf("ran = %d, want 3", got) + } +} + +func TestAsyncPoolCloseCancelsAfterDrainDeadline(t *testing.T) { + t.Parallel() + + pool := newAsyncPool(asyncPoolConfig{ + WorkerCount: 1, + QueueCapacity: 1, + DrainTimeout: 40 * time.Millisecond, + Logger: discardPoolLogger(), + }) + pool.Start(t.Context()) + + started := make(chan struct{}) + cancelled := make(chan struct{}) + if ok := pool.Submit(asyncTask{ + run: func(ctx context.Context) { + close(started) + <-ctx.Done() + close(cancelled) + }, + }); !ok { + t.Fatal("Submit() first task = false, want true") + } + waitForPoolSignal(t, started, "first task start") + + var queuedRan atomic.Bool + if ok := pool.Submit(asyncTask{ + run: func(context.Context) { + queuedRan.Store(true) + }, + }); !ok { + t.Fatal("Submit() queued task = false, want true") + } + + start := time.Now() + pool.Close() + elapsed := time.Since(start) + + waitForPoolSignal(t, cancelled, "task cancellation") + if queuedRan.Load() { + t.Fatal("queued task ran after drain deadline, want abandoned") + } + if elapsed < 40*time.Millisecond { + t.Fatalf("Close() elapsed = %s, want at least drain timeout", elapsed) + } + if elapsed > time.Second { + t.Fatalf("Close() elapsed = %s, want prompt deadline handling", elapsed) + } +} + +func TestAsyncPoolRecoversPanicsAndContinues(t *testing.T) { + t.Parallel() + + pool := newAsyncPool(asyncPoolConfig{ + WorkerCount: 1, + QueueCapacity: 1, + Logger: discardPoolLogger(), + }) + pool.Start(t.Context()) + + started := make(chan struct{}) + if ok := pool.Submit(asyncTask{ + run: func(context.Context) { + close(started) + panic("boom") + }, + }); !ok { + t.Fatal("Submit() panic task = false, want true") + } + waitForPoolSignal(t, started, "panic task start") + + done := make(chan struct{}) + if ok := pool.Submit(asyncTask{ + run: func(context.Context) { + close(done) + }, + }); !ok { + t.Fatal("Submit() recovery task = false, want true") + } + + waitForPoolSignal(t, done, "post-panic task completion") + pool.Close() +} + +func TestAsyncPoolContextCancellationStopsWorkers(t *testing.T) { + t.Parallel() + + parent, cancel := context.WithCancel(t.Context()) + pool := newAsyncPool(asyncPoolConfig{ + WorkerCount: 2, + QueueCapacity: 2, + Logger: discardPoolLogger(), + }) + pool.Start(parent) + + started := make(chan struct{}, 2) + stopped := make(chan struct{}, 2) + for i := 0; i < 2; i++ { + if ok := pool.Submit(asyncTask{ + run: func(ctx context.Context) { + started <- struct{}{} + <-ctx.Done() + stopped <- struct{}{} + }, + }); !ok { + t.Fatalf("Submit() #%d = false, want true", i) + } + } + + waitForPoolSignal(t, started, "first worker start") + waitForPoolSignal(t, started, "second worker start") + cancel() + waitForPoolSignal(t, stopped, "first worker stop") + waitForPoolSignal(t, stopped, "second worker stop") + pool.Close() +} + +func TestAsyncPoolCloseWithNoTasksReturnsCleanly(t *testing.T) { + t.Parallel() + + pool := newAsyncPool(asyncPoolConfig{ + WorkerCount: 2, + QueueCapacity: 4, + Logger: discardPoolLogger(), + }) + pool.Start(t.Context()) + + closed := make(chan struct{}) + go func() { + pool.Close() + close(closed) + }() + + waitForPoolSignal(t, closed, "pool close without tasks") +} + +func TestAsyncPoolConcurrentSubmitIsSafe(t *testing.T) { + t.Parallel() + + const ( + goroutines = 20 + tasksPerGoro = 10 + totalTasks = goroutines * tasksPerGoro + ) + + pool := newAsyncPool(asyncPoolConfig{ + WorkerCount: 4, + QueueCapacity: totalTasks, + Logger: discardPoolLogger(), + }) + pool.Start(t.Context()) + + var dropped atomic.Int32 + var ran atomic.Int32 + var submitWG sync.WaitGroup + submitWG.Add(goroutines) + for i := 0; i < goroutines; i++ { + go func() { + defer submitWG.Done() + for j := 0; j < tasksPerGoro; j++ { + if ok := pool.Submit(asyncTask{ + run: func(context.Context) { + ran.Add(1) + }, + }); !ok { + dropped.Add(1) + } + } + }() + } + + submitWG.Wait() + pool.Close() + + if got := dropped.Load(); got != 0 { + t.Fatalf("dropped = %d, want 0", got) + } + if got := ran.Load(); got != totalTasks { + t.Fatalf("ran = %d, want %d", got, totalTasks) + } +} + +func discardPoolLogger() *slog.Logger { + return slog.New(slog.NewTextHandler(io.Discard, nil)) +} + +func waitForPoolSignal(t *testing.T, ch <-chan struct{}, label string) { + t.Helper() + + select { + case <-ch: + case <-time.After(time.Second): + t.Fatalf("timed out waiting for %s", label) + } +} diff --git a/internal/hooks/telemetry.go b/internal/hooks/telemetry.go new file mode 100644 index 000000000..afc254127 --- /dev/null +++ b/internal/hooks/telemetry.go @@ -0,0 +1,300 @@ +package hooks + +import ( + "context" + "encoding/json" + "strings" + "sync" + "time" +) + +// HookRunWriter persists hook run records into an active session-scoped store. +type HookRunWriter interface { + RecordHookRun(context.Context, HookRunRecord) error +} + +// TelemetrySink persists hook run records when no active writer is attached to +// the dispatch context. +type TelemetrySink interface { + WriteHookRecord(ctx context.Context, sessionID string, record HookRunRecord) error +} + +type hookRunWriterContextKey struct{} + +type dispatchMetricKey struct { + Event HookEvent + Source HookSource + Mode HookMode + Outcome HookRunOutcome +} + +type hookMetrics struct { + mu sync.Mutex + + dispatchCounts map[dispatchMetricKey]int64 + dispatchLatency map[dispatchMetricKey]time.Duration + pipelineCounts map[HookEvent]int64 + pipelineLatency map[HookEvent]time.Duration + asyncDropCount int64 + asyncQueueDepth int + permissionEscalationBlocks int64 + dispatchDepthViolationCount int64 + registryReloadCount int64 + registryReloadLatency time.Duration + registryReloadLastHookDelta int +} + +type hookTraceEntry struct { + Hook string `json:"hook"` + Outcome HookRunOutcome `json:"outcome"` + Duration time.Duration `json:"duration"` + Required bool `json:"required,omitempty"` + Error string `json:"error,omitempty"` + Patch json.RawMessage `json:"patch,omitempty"` +} + +type dispatchReport struct { + Trace []hookTraceEntry + Denied bool + DenySource string + FailedHook string + FailedRequired bool +} + +// WithHookRunWriter attaches a direct hook-run persistence writer to the context. +func WithHookRunWriter(ctx context.Context, writer HookRunWriter) context.Context { + if ctx == nil || writer == nil { + return ctx + } + return context.WithValue(ctx, hookRunWriterContextKey{}, writer) +} + +// HookRunWriterFromContext resolves the attached hook-run writer, if any. +func HookRunWriterFromContext(ctx context.Context) HookRunWriter { + if ctx == nil { + return nil + } + writer, _ := ctx.Value(hookRunWriterContextKey{}).(HookRunWriter) + return writer +} + +func newHookMetrics() *hookMetrics { + return &hookMetrics{ + dispatchCounts: make(map[dispatchMetricKey]int64), + dispatchLatency: make(map[dispatchMetricKey]time.Duration), + pipelineCounts: make(map[HookEvent]int64), + pipelineLatency: make(map[HookEvent]time.Duration), + } +} + +func (m *hookMetrics) observeHookRun(record HookRunRecord) { + if m == nil { + return + } + key := dispatchMetricKey{ + Event: record.Event, + Source: record.Source, + Mode: record.Mode, + Outcome: record.Outcome, + } + + m.mu.Lock() + defer m.mu.Unlock() + m.dispatchCounts[key]++ + m.dispatchLatency[key] += record.Duration +} + +func (m *hookMetrics) observePipeline(event HookEvent, duration time.Duration) { + if m == nil { + return + } + m.mu.Lock() + defer m.mu.Unlock() + m.pipelineCounts[event]++ + m.pipelineLatency[event] += duration +} + +func (m *hookMetrics) observeAsyncDrop(queueDepth int) { + if m == nil { + return + } + m.mu.Lock() + defer m.mu.Unlock() + m.asyncDropCount++ + if queueDepth > m.asyncQueueDepth { + m.asyncQueueDepth = queueDepth + } +} + +func (m *hookMetrics) observeQueueDepth(queueDepth int) { + if m == nil { + return + } + m.mu.Lock() + defer m.mu.Unlock() + if queueDepth > m.asyncQueueDepth { + m.asyncQueueDepth = queueDepth + } +} + +func (m *hookMetrics) observePermissionEscalationBlock() { + if m == nil { + return + } + m.mu.Lock() + defer m.mu.Unlock() + m.permissionEscalationBlocks++ +} + +func (m *hookMetrics) observeDepthViolation() { + if m == nil { + return + } + m.mu.Lock() + defer m.mu.Unlock() + m.dispatchDepthViolationCount++ +} + +func (m *hookMetrics) observeRegistryReload(duration time.Duration, hookDelta int) { + if m == nil { + return + } + m.mu.Lock() + defer m.mu.Unlock() + m.registryReloadCount++ + m.registryReloadLatency += duration + m.registryReloadLastHookDelta = hookDelta +} + +func (h *Hooks) emitHookRun(ctx context.Context, payload any, hook RegisteredHook, outcome HookRunOutcome, duration time.Duration, rawPatch json.RawMessage, err error, depth int) { + if h == nil { + return + } + + record := HookRunRecord{ + HookName: hook.Name, + Event: hook.Event, + Source: hook.Source, + Mode: hook.Mode, + Duration: duration, + Outcome: outcome, + DispatchDepth: depth, + PatchApplied: h.persistedPatchForEvent(hook.Event, rawPatch), + Error: strings.TrimSpace(errorString(err)), + Required: hook.Required, + RecordedAt: h.now().UTC(), + } + + h.metrics.observeHookRun(record) + + if writer := HookRunWriterFromContext(ctx); writer != nil { + if writeErr := writer.RecordHookRun(ctx, record); writeErr != nil { + h.logger.WarnContext(ctx, "hook.dispatch.telemetry_write_failed", "hook", hook.Name, "event", hook.Event.String(), "error", writeErr) + } + return + } + + if h.telemetrySink == nil { + return + } + + sessionID := sessionIDFromPayload(payload) + if strings.TrimSpace(sessionID) == "" { + return + } + + if writeErr := h.telemetrySink.WriteHookRecord(ctx, sessionID, record); writeErr != nil { + h.logger.WarnContext(ctx, "hook.dispatch.telemetry_write_failed", "hook", hook.Name, "event", hook.Event.String(), "session_id", sessionID, "error", writeErr) + } +} + +func (h *Hooks) persistedPatchForEvent(event HookEvent, rawPatch json.RawMessage) json.RawMessage { + if len(rawPatch) == 0 || !shouldPersistPatch(event, h.debugPatchAudit) { + return nil + } + return cloneRawJSON(rawPatch) +} + +func shouldPersistPatch(event HookEvent, debug bool) bool { + switch event.Family() { + case HookEventFamilyPermission, HookEventFamilyPrompt, HookEventFamilyTool, HookEventFamilyInput: + return true + default: + return debug + } +} + +func cloneRawJSON(src json.RawMessage) json.RawMessage { + if len(src) == 0 { + return nil + } + return append(json.RawMessage(nil), src...) +} + +func errorString(err error) string { + if err == nil { + return "" + } + return err.Error() +} + +type sessionContextCarrier interface { + hookSessionContext() SessionContext +} + +func sessionIDFromPayload(payload any) string { + carrier, ok := payload.(sessionContextCarrier) + if !ok { + return "" + } + return strings.TrimSpace(carrier.hookSessionContext().SessionID) +} + +func traceStrings(trace []hookTraceEntry) []string { + if len(trace) == 0 { + return nil + } + + out := make([]string, 0, len(trace)) + for _, entry := range trace { + item := entry.Hook + ":" + string(entry.Outcome) + if entry.Error != "" { + item += ":" + entry.Error + } + out = append(out, item) + } + return out +} + +func (h *Hooks) recordDepthViolation(ctx context.Context, event HookEvent, err error) { + if h == nil || err == nil { + return + } + h.metrics.observeDepthViolation() + h.logger.WarnContext( + ctx, + "hook.dispatch.depth_exceeded", + "event", event.String(), + "error", err, + "event_chain", hookEventChainStrings(currentDispatchChain(ctx), event), + ) +} + +func (h *Hooks) enterDispatch(ctx context.Context, event HookEvent) (context.Context, int, error) { + dispatchCtx, depth, err := enterDispatch(ctx, event) + if err != nil { + h.recordDepthViolation(ctx, event, err) + } + return dispatchCtx, depth, err +} + +func hookEventChainStrings(chain []HookEvent, next HookEvent) []string { + items := make([]string, 0, len(chain)+1) + for _, event := range chain { + items = append(items, event.String()) + } + if next != "" { + items = append(items, next.String()) + } + return items +} diff --git a/internal/hooks/telemetry_test.go b/internal/hooks/telemetry_test.go new file mode 100644 index 000000000..1add85e26 --- /dev/null +++ b/internal/hooks/telemetry_test.go @@ -0,0 +1,218 @@ +package hooks + +import ( + "context" + "encoding/json" + "errors" + "testing" +) + +func TestHookTelemetrySecurityPatchPersistsAllFields(t *testing.T) { + t.Parallel() + + writer := &captureHookRunWriter{} + hooks := newTelemetryTestHooks(t, false, HookDecl{ + Name: "permission-hook", + Event: HookPermissionRequest, + Mode: HookModeSync, + ExecutorKind: HookExecutorNative, + }, map[string]Executor{ + "permission-hook": NewTypedNativeExecutor(func(_ context.Context, _ RegisteredHook, _ PermissionRequestPayload) (PermissionRequestPatch, error) { + deny := "deny" + return PermissionRequestPatch{ + Decision: &deny, + Reason: stringPointer("policy"), + }, nil + }), + }) + + ctx := WithHookRunWriter(t.Context(), writer) + _, err := hooks.DispatchPermissionRequest(ctx, PermissionRequestPayload{ + PayloadBase: PayloadBase{Event: HookPermissionRequest}, + SessionContext: SessionContext{ + SessionID: "sess-security", + }, + Decision: "allow", + }) + if err != nil { + t.Fatalf("DispatchPermissionRequest() error = %v", err) + } + + record := writer.singleRecord(t) + if record.HookName != "permission-hook" { + t.Fatalf("record.HookName = %q, want permission-hook", record.HookName) + } + if record.Event != HookPermissionRequest { + t.Fatalf("record.Event = %q, want %q", record.Event, HookPermissionRequest) + } + if record.Source != HookSourceNative { + t.Fatalf("record.Source = %q, want %q", record.Source, HookSourceNative) + } + if record.Mode != HookModeSync { + t.Fatalf("record.Mode = %q, want %q", record.Mode, HookModeSync) + } + if record.Outcome != HookRunOutcomeDenied { + t.Fatalf("record.Outcome = %q, want %q", record.Outcome, HookRunOutcomeDenied) + } + if record.DispatchDepth != 1 { + t.Fatalf("record.DispatchDepth = %d, want 1", record.DispatchDepth) + } + if len(record.PatchApplied) == 0 { + t.Fatal("record.PatchApplied = nil, want captured security patch") + } + if record.Duration <= 0 { + t.Fatalf("record.Duration = %s, want > 0", record.Duration) + } + if record.RecordedAt.IsZero() { + t.Fatal("record.RecordedAt is zero") + } +} + +func TestHookTelemetryOmitsNonSecurityPatchOutsideDebug(t *testing.T) { + t.Parallel() + + writer := &captureHookRunWriter{} + hooks := newTelemetryTestHooks(t, false, HookDecl{ + Name: "session-hook", + Event: HookSessionPostCreate, + Mode: HookModeSync, + ExecutorKind: HookExecutorNative, + }, map[string]Executor{ + "session-hook": NewTypedNativeExecutor(func(_ context.Context, _ RegisteredHook, _ SessionPostCreatePayload) (SessionPostCreatePatch, error) { + return SessionPostCreatePatch{SessionName: stringPointer("patched")}, nil + }), + }) + + ctx := WithHookRunWriter(t.Context(), writer) + _, err := hooks.DispatchSessionPostCreate(ctx, SessionPostCreatePayload{ + PayloadBase: PayloadBase{Event: HookSessionPostCreate}, + SessionContext: SessionContext{ + SessionID: "sess-normal", + }, + }) + if err != nil { + t.Fatalf("DispatchSessionPostCreate() error = %v", err) + } + + if patch := writer.singleRecord(t).PatchApplied; len(patch) != 0 { + t.Fatalf("PatchApplied = %s, want omitted in normal mode", patch) + } +} + +func TestHookTelemetryCapturesNonSecurityPatchInDebugMode(t *testing.T) { + t.Parallel() + + writer := &captureHookRunWriter{} + hooks := newTelemetryTestHooks(t, true, HookDecl{ + Name: "session-hook", + Event: HookSessionPostCreate, + Mode: HookModeSync, + ExecutorKind: HookExecutorNative, + }, map[string]Executor{ + "session-hook": NewTypedNativeExecutor(func(_ context.Context, _ RegisteredHook, _ SessionPostCreatePayload) (SessionPostCreatePatch, error) { + return SessionPostCreatePatch{SessionName: stringPointer("patched")}, nil + }), + }) + + ctx := WithHookRunWriter(t.Context(), writer) + _, err := hooks.DispatchSessionPostCreate(ctx, SessionPostCreatePayload{ + PayloadBase: PayloadBase{Event: HookSessionPostCreate}, + SessionContext: SessionContext{ + SessionID: "sess-debug", + }, + }) + if err != nil { + t.Fatalf("DispatchSessionPostCreate() error = %v", err) + } + + record := writer.singleRecord(t) + if len(record.PatchApplied) == 0 { + t.Fatal("PatchApplied = nil, want captured patch in debug mode") + } +} + +func TestHookTelemetryRecordsFailureOutcomeAndDuration(t *testing.T) { + t.Parallel() + + writer := &captureHookRunWriter{} + hooks := newTelemetryTestHooks(t, false, HookDecl{ + Name: "failing-hook", + Event: HookSessionPostCreate, + Mode: HookModeSync, + ExecutorKind: HookExecutorNative, + }, map[string]Executor{ + "failing-hook": NewTypedNativeExecutor(func(_ context.Context, _ RegisteredHook, _ SessionPostCreatePayload) (SessionPostCreatePatch, error) { + return SessionPostCreatePatch{}, errors.New("boom") + }), + }) + + ctx := WithHookRunWriter(t.Context(), writer) + _, err := hooks.DispatchSessionPostCreate(ctx, SessionPostCreatePayload{ + PayloadBase: PayloadBase{Event: HookSessionPostCreate}, + SessionContext: SessionContext{ + SessionID: "sess-failure", + }, + }) + if err != nil { + t.Fatalf("DispatchSessionPostCreate() error = %v, want nil for non-required failure", err) + } + + record := writer.singleRecord(t) + if record.Outcome != HookRunOutcomeFailed { + t.Fatalf("record.Outcome = %q, want %q", record.Outcome, HookRunOutcomeFailed) + } + if record.Error != "boom" { + t.Fatalf("record.Error = %q, want boom", record.Error) + } + if record.Duration <= 0 { + t.Fatalf("record.Duration = %s, want > 0", record.Duration) + } +} + +type captureHookRunWriter struct { + records []HookRunRecord +} + +func (c *captureHookRunWriter) RecordHookRun(_ context.Context, record HookRunRecord) error { + c.records = append(c.records, cloneTelemetryRecord(record)) + return nil +} + +func (c *captureHookRunWriter) singleRecord(t *testing.T) HookRunRecord { + t.Helper() + if got, want := len(c.records), 1; got != want { + t.Fatalf("len(records) = %d, want %d", got, want) + } + return c.records[0] +} + +func newTelemetryTestHooks(t *testing.T, debug bool, decl HookDecl, executors map[string]Executor) *Hooks { + t.Helper() + + hooks := NewHooks( + WithLogger(discardPoolLogger()), + WithDebugPatchAudit(debug), + WithNativeDeclarations([]HookDecl{decl}), + WithExecutorResolver(func(decl HookDecl) (Executor, error) { + executor, ok := executors[decl.Name] + if !ok { + return nil, errors.New("missing executor") + } + return executor, nil + }), + ) + if err := hooks.Rebuild(t.Context()); err != nil { + t.Fatalf("Rebuild() error = %v", err) + } + return hooks +} + +func cloneTelemetryRecord(src HookRunRecord) HookRunRecord { + cloned := src + cloned.PatchApplied = append(json.RawMessage(nil), src.PatchApplied...) + return cloned +} + +func stringPointer(value string) *string { + return &value +} diff --git a/internal/hooks/types.go b/internal/hooks/types.go new file mode 100644 index 000000000..01c561353 --- /dev/null +++ b/internal/hooks/types.go @@ -0,0 +1,258 @@ +package hooks + +import ( + "encoding/json" + "fmt" + "strings" + "time" +) + +// HookSource identifies where a hook was declared. +type HookSource int + +const ( + HookSourceNative HookSource = iota + HookSourceConfig + HookSourceAgentDefinition + HookSourceSkill +) + +var hookSourceNames = map[HookSource]string{ + HookSourceNative: "native", + HookSourceConfig: "config", + HookSourceAgentDefinition: "agent_definition", + HookSourceSkill: "skill", +} + +// HookSkillSource captures the existing skill-registry precedence without +// importing internal/skills into the hooks base package. +type HookSkillSource string + +const ( + HookSkillSourceBundled HookSkillSource = "bundled" + HookSkillSourceMarketplace HookSkillSource = "marketplace" + HookSkillSourceUser HookSkillSource = "user" + HookSkillSourceAdditional HookSkillSource = "additional" + HookSkillSourceWorkspace HookSkillSource = "workspace" +) + +// String returns the stable text form for the hook source. +func (s HookSource) String() string { + name, ok := hookSourceNames[s] + if !ok { + return "" + } + return name +} + +// MarshalText encodes the source as a string. +func (s HookSource) MarshalText() ([]byte, error) { + if err := s.Validate(); err != nil { + return nil, err + } + return []byte(s.String()), nil +} + +// UnmarshalText decodes the source from a string value. +func (s *HookSource) UnmarshalText(text []byte) error { + value := strings.TrimSpace(string(text)) + for source, name := range hookSourceNames { + if value == name { + *s = source + return nil + } + } + return fmt.Errorf("hooks: invalid hook source %q", value) +} + +// Validate ensures the source is one of the documented values. +func (s HookSource) Validate() error { + if _, ok := hookSourceNames[s]; !ok { + return fmt.Errorf("hooks: invalid hook source %d", s) + } + return nil +} + +// Validate ensures the skill source is one of the documented values when set. +func (s HookSkillSource) Validate() error { + switch s { + case "": + return nil + case HookSkillSourceBundled, + HookSkillSourceMarketplace, + HookSkillSourceUser, + HookSkillSourceAdditional, + HookSkillSourceWorkspace: + return nil + default: + return fmt.Errorf("hooks: invalid hook skill source %q", s) + } +} + +// HookMode controls whether a hook runs inline or in the background. +type HookMode string + +const ( + HookModeSync HookMode = "sync" + HookModeAsync HookMode = "async" +) + +// Validate ensures the mode is supported. +func (m HookMode) Validate() error { + switch m { + case HookModeSync, HookModeAsync: + return nil + default: + return fmt.Errorf("hooks: invalid hook mode %q", m) + } +} + +// HookRunOutcome classifies the result of one hook execution. +type HookRunOutcome string + +const ( + HookRunOutcomeApplied HookRunOutcome = "applied" + HookRunOutcomeDenied HookRunOutcome = "denied" + HookRunOutcomeFailed HookRunOutcome = "failed" + HookRunOutcomeSkipped HookRunOutcome = "skipped" + HookRunOutcomeDropped HookRunOutcome = "dropped" + HookRunOutcomeRejected HookRunOutcome = "rejected" +) + +// Validate ensures the outcome is one of the documented execution results. +func (o HookRunOutcome) Validate() error { + switch o { + case HookRunOutcomeApplied, + HookRunOutcomeDenied, + HookRunOutcomeFailed, + HookRunOutcomeSkipped, + HookRunOutcomeDropped, + HookRunOutcomeRejected: + return nil + default: + return fmt.Errorf("hooks: invalid hook run outcome %q", o) + } +} + +// HookMatcher narrows when a hook is eligible to run. +type HookMatcher struct { + AgentName string `json:"agent_name,omitempty" yaml:"agent_name,omitempty"` + AgentType string `json:"agent_type,omitempty" yaml:"agent_type,omitempty"` + WorkspaceID string `json:"workspace_id,omitempty" yaml:"workspace_id,omitempty"` + WorkspaceRoot string `json:"workspace_root,omitempty" yaml:"workspace_root,omitempty"` + SessionType string `json:"session_type,omitempty" yaml:"session_type,omitempty"` + InputClass string `json:"input_class,omitempty" yaml:"input_class,omitempty"` + ACPEventType string `json:"acp_event_type,omitempty" yaml:"acp_event_type,omitempty"` + TurnID string `json:"turn_id,omitempty" yaml:"turn_id,omitempty"` + ToolName string `json:"tool_name,omitempty" yaml:"tool_name,omitempty"` + ToolNamespace string `json:"tool_namespace,omitempty" yaml:"tool_namespace,omitempty"` + ToolReadOnly *bool `json:"tool_read_only,omitempty" yaml:"tool_read_only,omitempty"` + DecisionClass string `json:"decision_class,omitempty" yaml:"decision_class,omitempty"` + MessageRole string `json:"message_role,omitempty" yaml:"message_role,omitempty"` + MessageDeltaType string `json:"message_delta_type,omitempty" yaml:"message_delta_type,omitempty"` + CompactionReason string `json:"compaction_reason,omitempty" yaml:"compaction_reason,omitempty"` + CompactionStrategy string `json:"compaction_strategy,omitempty" yaml:"compaction_strategy,omitempty"` +} + +// HookDecl is the declarative record supplied by config, agent definitions, or skills. +type HookDecl struct { + Name string `json:"name" yaml:"name"` + Event HookEvent `json:"event" yaml:"event"` + Source HookSource `json:"source" yaml:"source"` + Mode HookMode `json:"mode,omitempty" yaml:"mode,omitempty"` + Required bool `json:"required,omitempty" yaml:"required,omitempty"` + Priority int `json:"priority,omitempty" yaml:"priority,omitempty"` + PrioritySet bool `json:"-" yaml:"-"` + Timeout time.Duration `json:"timeout,omitempty" yaml:"timeout,omitempty"` + Matcher HookMatcher `json:"matcher,omitempty" yaml:"matcher,omitempty"` + ExecutorKind HookExecutorKind `json:"executor_kind,omitempty" yaml:"executor_kind,omitempty"` + Command string `json:"command,omitempty" yaml:"command,omitempty"` + Args []string `json:"args,omitempty" yaml:"args,omitempty"` + Env map[string]string `json:"env,omitempty" yaml:"env,omitempty"` + Metadata map[string]string `json:"metadata,omitempty" yaml:"metadata,omitempty"` + SkillSource HookSkillSource `json:"-" yaml:"-"` +} + +// RegisteredHook is the normalized hook ready for dispatch. +type RegisteredHook struct { + Name string + Event HookEvent + Source HookSource + Mode HookMode + Required bool + Priority int + Timeout time.Duration + Matcher HookMatcher + Executor Executor + Metadata map[string]string +} + +// Validate ensures the registered hook satisfies the task-01 invariants. +func (h RegisteredHook) Validate() error { + if strings.TrimSpace(h.Name) == "" { + return fmt.Errorf("hooks: hook name is required") + } + if err := h.Event.Validate(); err != nil { + return err + } + if err := h.Source.Validate(); err != nil { + return err + } + if err := h.Mode.Validate(); err != nil { + return err + } + if h.Required && h.Mode != HookModeSync { + return fmt.Errorf("hooks: required hook %q must use sync mode", h.Name) + } + if h.Mode == HookModeSync && !h.Event.SyncEligible() { + return fmt.Errorf("hooks: event %q does not allow sync hooks", h.Event) + } + if h.Timeout < 0 { + return fmt.Errorf("hooks: hook %q timeout must be non-negative", h.Name) + } + return nil +} + +// ResolvedHook is the registry snapshot record with executor binding attached. +type ResolvedHook struct { + RegisteredHook + Decl HookDecl +} + +// Validate ensures the resolved hook is internally consistent. +func (h ResolvedHook) Validate() error { + if err := h.RegisteredHook.Validate(); err != nil { + return err + } + if h.Executor == nil { + return fmt.Errorf("hooks: resolved hook %q executor is required", h.Name) + } + if strings.TrimSpace(h.Decl.Name) == "" { + return nil + } + if err := h.Decl.SkillSource.Validate(); err != nil { + return err + } + if h.Decl.ExecutorKind != "" && h.Executor.Kind() != h.Decl.ExecutorKind { + return fmt.Errorf("hooks: resolved hook %q executor kind %q does not match declaration %q", h.Name, h.Executor.Kind(), h.Decl.ExecutorKind) + } + if h.Decl.Name != h.Name { + return fmt.Errorf("hooks: resolved hook %q does not match declaration %q", h.Name, h.Decl.Name) + } + return nil +} + +// HookRunRecord captures one hook execution for observability and audit. +type HookRunRecord struct { + HookName string `json:"hook_name"` + Event HookEvent `json:"event"` + Source HookSource `json:"source"` + Mode HookMode `json:"mode"` + Duration time.Duration `json:"duration"` + Outcome HookRunOutcome `json:"outcome"` + DispatchDepth int `json:"dispatch_depth"` + PatchApplied json.RawMessage `json:"patch_applied,omitempty"` + Error string `json:"error,omitempty"` + Required bool `json:"required,omitempty"` + RecordedAt time.Time `json:"recorded_at"` +} diff --git a/internal/hooks/types_test.go b/internal/hooks/types_test.go new file mode 100644 index 000000000..7835975b2 --- /dev/null +++ b/internal/hooks/types_test.go @@ -0,0 +1,222 @@ +package hooks + +import ( + "context" + "encoding/json" + "errors" + "testing" + "time" +) + +type stubExecutor struct { + kind HookExecutorKind +} + +func (e stubExecutor) Kind() HookExecutorKind { + return e.kind +} + +func (e stubExecutor) Execute(_ context.Context, _ RegisteredHook, _ []byte) ([]byte, error) { + return nil, nil +} + +func TestHookSourceOrderingAndJSON(t *testing.T) { + t.Parallel() + + if HookSourceNative >= HookSourceConfig || + HookSourceConfig >= HookSourceAgentDefinition || + HookSourceAgentDefinition >= HookSourceSkill { + t.Fatalf("unexpected HookSource ordering: native=%d config=%d agent_definition=%d skill=%d", + HookSourceNative, HookSourceConfig, HookSourceAgentDefinition, HookSourceSkill) + } + + data, err := json.Marshal(HookSourceAgentDefinition) + if err != nil { + t.Fatalf("json.Marshal(HookSourceAgentDefinition) error = %v", err) + } + if string(data) != `"agent_definition"` { + t.Fatalf("json.Marshal(HookSourceAgentDefinition) = %s, want %q", string(data), `"agent_definition"`) + } + + var decoded HookSource + if err := json.Unmarshal(data, &decoded); err != nil { + t.Fatalf("json.Unmarshal(HookSource) error = %v", err) + } + if decoded != HookSourceAgentDefinition { + t.Fatalf("decoded HookSource = %v, want %v", decoded, HookSourceAgentDefinition) + } +} + +func TestHookSourceInvalid(t *testing.T) { + t.Parallel() + + if got := HookSource(42).String(); got != "" { + t.Fatalf("HookSource(42).String() = %q, want empty string", got) + } + if _, err := HookSource(42).MarshalText(); err == nil { + t.Fatal("HookSource(42).MarshalText() error = nil, want non-nil") + } +} + +func TestHookSkillSourceValidate(t *testing.T) { + t.Parallel() + + if err := HookSkillSourceWorkspace.Validate(); err != nil { + t.Fatalf("HookSkillSourceWorkspace.Validate() error = %v, want nil", err) + } + if err := HookSkillSource("remote").Validate(); err == nil { + t.Fatal("invalid HookSkillSource.Validate() error = nil, want non-nil") + } +} + +func TestHookModeAndExecutorKindValidate(t *testing.T) { + t.Parallel() + + if err := HookModeSync.Validate(); err != nil { + t.Fatalf("HookModeSync.Validate() error = %v, want nil", err) + } + if err := HookMode("later").Validate(); err == nil { + t.Fatal("invalid HookMode.Validate() error = nil, want non-nil") + } + + if err := HookExecutorSubprocess.Validate(); err != nil { + t.Fatalf("HookExecutorSubprocess.Validate() error = %v, want nil", err) + } + if err := HookExecutorKind("socket").Validate(); !errors.Is(err, ErrInvalidHookExecutorKind) { + t.Fatalf("invalid HookExecutorKind.Validate() error = %v, want ErrInvalidHookExecutorKind", err) + } +} + +func TestRegisteredHookValidate(t *testing.T) { + t.Parallel() + + base := RegisteredHook{ + Name: "test-hook", + Event: HookSessionPreCreate, + Source: HookSourceConfig, + Mode: HookModeSync, + Required: false, + Priority: 500, + Timeout: 5 * time.Second, + } + + tests := []struct { + name string + hook RegisteredHook + wantErr bool + }{ + { + name: "valid sync hook", + hook: base, + wantErr: false, + }, + { + name: "required async hook fails", + hook: func() RegisteredHook { + hook := base + hook.Mode = HookModeAsync + hook.Required = true + return hook + }(), + wantErr: true, + }, + { + name: "sync async-only event fails", + hook: func() RegisteredHook { + hook := base + hook.Event = HookMessageDelta + return hook + }(), + wantErr: true, + }, + { + name: "negative timeout fails", + hook: func() RegisteredHook { + hook := base + hook.Timeout = -time.Second + return hook + }(), + wantErr: true, + }, + { + name: "invalid source fails", + hook: func() RegisteredHook { + hook := base + hook.Source = HookSource(99) + return hook + }(), + wantErr: true, + }, + } + + for _, tt := range tests { + tt := tt + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + err := tt.hook.Validate() + if tt.wantErr && err == nil { + t.Fatal("RegisteredHook.Validate() error = nil, want non-nil") + } + if !tt.wantErr && err != nil { + t.Fatalf("RegisteredHook.Validate() error = %v, want nil", err) + } + }) + } +} + +func TestResolvedHookValidate(t *testing.T) { + t.Parallel() + + hook := ResolvedHook{ + RegisteredHook: RegisteredHook{ + Name: "resolved-hook", + Event: HookToolPreCall, + Source: HookSourceNative, + Mode: HookModeSync, + Executor: stubExecutor{kind: HookExecutorNative}, + }, + Decl: HookDecl{Name: "other-name"}, + } + + if err := hook.Validate(); err == nil { + t.Fatal("ResolvedHook.Validate() error = nil, want non-nil") + } +} + +func TestResolvedHookValidateSuccess(t *testing.T) { + t.Parallel() + + hook := ResolvedHook{ + RegisteredHook: RegisteredHook{ + Name: "resolved-hook", + Event: HookToolPreCall, + Source: HookSourceNative, + Mode: HookModeSync, + Executor: stubExecutor{kind: HookExecutorNative}, + }, + Decl: HookDecl{Name: "resolved-hook", ExecutorKind: HookExecutorNative}, + } + + if err := hook.Validate(); err != nil { + t.Fatalf("ResolvedHook.Validate() error = %v, want nil", err) + } +} + +func TestResolvedHookValidateRequiresExecutor(t *testing.T) { + t.Parallel() + + hook := ResolvedHook{ + RegisteredHook: RegisteredHook{ + Name: "resolved-hook", + Event: HookToolPreCall, + Source: HookSourceNative, + Mode: HookModeSync, + }, + Decl: HookDecl{Name: "resolved-hook", ExecutorKind: HookExecutorNative}, + } + + if err := hook.Validate(); err == nil { + t.Fatal("ResolvedHook.Validate() error = nil, want non-nil") + } +} diff --git a/internal/observe/hooks_test.go b/internal/observe/hooks_test.go new file mode 100644 index 000000000..5b557842b --- /dev/null +++ b/internal/observe/hooks_test.go @@ -0,0 +1,280 @@ +package observe + +import ( + "context" + "io" + "log/slog" + "os" + "path/filepath" + "testing" + "time" + + aghconfig "github.com/pedronauck/agh/internal/config" + hookspkg "github.com/pedronauck/agh/internal/hooks" + "github.com/pedronauck/agh/internal/store" + "github.com/pedronauck/agh/internal/store/sessiondb" + "github.com/pedronauck/agh/internal/testutil" +) + +func TestObserverAttachHooksAndQueryHookCatalog(t *testing.T) { + t.Parallel() + + h := newHarness(t) + if entries, err := h.observer.QueryHookCatalog(testutil.Context(t), hookspkg.CatalogFilter{}); err != nil || entries != nil { + t.Fatalf("QueryHookCatalog(before attach) = (%#v, %v), want (nil, nil)", entries, err) + } + + source := &stubHookCatalogSource{ + entries: []hookspkg.CatalogEntry{{ + Order: 1, + Name: "catalog-hook", + Event: hookspkg.HookSessionPostCreate, + Source: hookspkg.HookSourceConfig, + Mode: hookspkg.HookModeSync, + }}, + } + h.observer.AttachHooks(source) + + entries, err := h.observer.QueryHookCatalog(testutil.Context(t), hookspkg.CatalogFilter{ + WorkspaceID: h.workspaceID, + AgentName: "coder", + }) + if err != nil { + t.Fatalf("QueryHookCatalog() error = %v", err) + } + if got, want := len(entries), 1; got != want { + t.Fatalf("len(entries) = %d, want %d", got, want) + } + if entries[0].Name != "catalog-hook" { + t.Fatalf("entries[0].Name = %q, want catalog-hook", entries[0].Name) + } + if source.lastFilter.WorkspaceID != h.workspaceID || source.lastFilter.AgentName != "coder" { + t.Fatalf("lastFilter = %#v", source.lastFilter) + } +} + +func TestObserverWriteHookRecordAndQueryHookRuns(t *testing.T) { + t.Parallel() + + h := newHarness(t) + sessionID := "sess-hook-audit" + db := openObserverHookSessionDB(t, h.home, sessionID) + closeObserverHookSessionDB(t, db) + + recordedAt := time.Date(2026, 4, 9, 19, 0, 0, 0, time.UTC) + record := hookspkg.HookRunRecord{ + HookName: "permission-audit", + Event: hookspkg.HookPermissionRequest, + Source: hookspkg.HookSourceConfig, + Mode: hookspkg.HookModeSync, + Duration: 20 * time.Millisecond, + Outcome: hookspkg.HookRunOutcomeDenied, + DispatchDepth: 2, + PatchApplied: []byte(`{"decision":"deny","reason":"policy"}`), + Required: true, + RecordedAt: recordedAt, + } + + if err := h.observer.WriteHookRecord(testutil.Context(t), sessionID, record); err != nil { + t.Fatalf("WriteHookRecord() error = %v", err) + } + + records, err := h.observer.QueryHookRuns(testutil.Context(t), store.HookRunQuery{ + SessionID: sessionID, + Event: hookspkg.HookPermissionRequest.String(), + }) + if err != nil { + t.Fatalf("QueryHookRuns() error = %v", err) + } + if got, want := len(records), 1; got != want { + t.Fatalf("len(records) = %d, want %d", got, want) + } + if records[0].HookName != "permission-audit" { + t.Fatalf("records[0].HookName = %q, want permission-audit", records[0].HookName) + } + if string(records[0].PatchApplied) != `{"decision":"deny","reason":"policy"}` { + t.Fatalf("records[0].PatchApplied = %s, want deny patch", records[0].PatchApplied) + } +} + +func TestObserverHookRunQueriesHandleMissingDBAndEvents(t *testing.T) { + t.Parallel() + + h := newHarness(t) + record := hookspkg.HookRunRecord{ + HookName: "missing-db", + Event: hookspkg.HookSessionPostCreate, + Source: hookspkg.HookSourceConfig, + Mode: hookspkg.HookModeSync, + Outcome: hookspkg.HookRunOutcomeApplied, + DispatchDepth: 1, + RecordedAt: time.Date(2026, 4, 9, 19, 5, 0, 0, time.UTC), + } + + if err := h.observer.WriteHookRecord(testutil.Context(t), "missing-session", record); err != nil { + t.Fatalf("WriteHookRecord(missing) error = %v", err) + } + records, err := h.observer.QueryHookRuns(testutil.Context(t), store.HookRunQuery{SessionID: "missing-session"}) + if err != nil { + t.Fatalf("QueryHookRuns(missing) error = %v", err) + } + if records != nil { + t.Fatalf("records = %#v, want nil for missing session DB", records) + } + + events, err := h.observer.QueryHookEvents(testutil.Context(t), hookspkg.EventFilter{}) + if err != nil { + t.Fatalf("QueryHookEvents() error = %v", err) + } + if got, want := len(events), 27; got != want { + t.Fatalf("len(events) = %d, want %d", got, want) + } +} + +func TestObserverHookOptionsUseCustomSourcesAndStores(t *testing.T) { + t.Parallel() + + h := newHarness(t) + source := &stubHookCatalogSource{ + entries: []hookspkg.CatalogEntry{{ + Order: 1, + Name: "option-hook", + Event: hookspkg.HookInputPreSubmit, + Source: hookspkg.HookSourceSkill, + Mode: hookspkg.HookModeSync, + }}, + } + storeHandle := &stubHookRunStore{ + records: []hookspkg.HookRunRecord{{ + HookName: "from-opener", + Event: hookspkg.HookPromptPostAssemble, + Source: hookspkg.HookSourceConfig, + Mode: hookspkg.HookModeSync, + Outcome: hookspkg.HookRunOutcomeApplied, + DispatchDepth: 1, + RecordedAt: time.Date(2026, 4, 9, 19, 10, 0, 0, time.UTC), + }}, + } + sessionID := "sess-option-store" + + observer, err := New(testutil.Context(t), + WithRegistry(h.registry), + WithHomePaths(h.home), + WithWorkspaceResolver(fakeObserveWorkspaceResolver{}), + WithHookCatalogSource(source), + WithHookStoreOpener(func(_ context.Context, gotSessionID string, path string) (HookRunStore, error) { + storeHandle.lastSessionID = gotSessionID + storeHandle.lastPath = path + return storeHandle, nil + }), + WithLogger(slog.New(slog.NewTextHandler(io.Discard, nil))), + ) + if err != nil { + t.Fatalf("New(custom hook options) error = %v", err) + } + + path := observer.hookDBPath(sessionID) + if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil { + t.Fatalf("MkdirAll(%q) error = %v", filepath.Dir(path), err) + } + if err := os.WriteFile(path, []byte("placeholder"), 0o644); err != nil { + t.Fatalf("WriteFile(%q) error = %v", path, err) + } + + entries, err := observer.QueryHookCatalog(testutil.Context(t), hookspkg.CatalogFilter{AgentName: "coder"}) + if err != nil { + t.Fatalf("QueryHookCatalog(custom) error = %v", err) + } + if got, want := len(entries), 1; got != want { + t.Fatalf("len(entries) = %d, want %d", got, want) + } + + records, err := observer.QueryHookRuns(testutil.Context(t), store.HookRunQuery{SessionID: sessionID}) + if err != nil { + t.Fatalf("QueryHookRuns(custom opener) error = %v", err) + } + if got, want := len(records), 1; got != want { + t.Fatalf("len(records) = %d, want %d", got, want) + } + if storeHandle.lastSessionID != sessionID || storeHandle.lastPath != path { + t.Fatalf("custom opener saw session=%q path=%q", storeHandle.lastSessionID, storeHandle.lastPath) + } + + written := hookspkg.HookRunRecord{ + HookName: "written-via-opener", + Event: hookspkg.HookInputPreSubmit, + Source: hookspkg.HookSourceConfig, + Mode: hookspkg.HookModeSync, + Outcome: hookspkg.HookRunOutcomeApplied, + DispatchDepth: 1, + RecordedAt: time.Date(2026, 4, 9, 19, 11, 0, 0, time.UTC), + } + if err := observer.WriteHookRecord(testutil.Context(t), sessionID, written); err != nil { + t.Fatalf("WriteHookRecord(custom opener) error = %v", err) + } + if got, want := len(storeHandle.written), 1; got != want { + t.Fatalf("len(written) = %d, want %d", got, want) + } + if storeHandle.written[0].HookName != "written-via-opener" { + t.Fatalf("written[0].HookName = %q, want written-via-opener", storeHandle.written[0].HookName) + } + if !storeHandle.closed { + t.Fatal("custom hook store Close() was not called") + } +} + +type stubHookCatalogSource struct { + entries []hookspkg.CatalogEntry + lastFilter hookspkg.CatalogFilter + returnError error +} + +func (s *stubHookCatalogSource) Catalog(filter hookspkg.CatalogFilter) ([]hookspkg.CatalogEntry, error) { + s.lastFilter = filter + if s.returnError != nil { + return nil, s.returnError + } + return append([]hookspkg.CatalogEntry(nil), s.entries...), nil +} + +type stubHookRunStore struct { + records []hookspkg.HookRunRecord + written []hookspkg.HookRunRecord + lastSessionID string + lastPath string + closed bool +} + +func (s *stubHookRunStore) RecordHookRun(_ context.Context, record hookspkg.HookRunRecord) error { + s.written = append(s.written, record) + return nil +} + +func (s *stubHookRunStore) QueryHookRuns(_ context.Context, _ store.HookRunQuery) ([]hookspkg.HookRunRecord, error) { + return append([]hookspkg.HookRunRecord(nil), s.records...), nil +} + +func (s *stubHookRunStore) Close(context.Context) error { + s.closed = true + return nil +} + +func openObserverHookSessionDB(t *testing.T, homePaths aghconfig.HomePaths, sessionID string) *sessiondb.SessionDB { + t.Helper() + + db, err := sessiondb.OpenSessionDB(testutil.Context(t), sessionID, store.SessionDBFile(filepath.Join(homePaths.SessionsDir, sessionID))) + if err != nil { + t.Fatalf("OpenSessionDB(%q) error = %v", sessionID, err) + } + return db +} + +func closeObserverHookSessionDB(t *testing.T, db *sessiondb.SessionDB) { + t.Helper() + + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + if err := db.Close(ctx); err != nil { + t.Fatalf("SessionDB.Close() error = %v", err) + } +} diff --git a/internal/observe/observer.go b/internal/observe/observer.go index d263c0960..6fe41f113 100644 --- a/internal/observe/observer.go +++ b/internal/observe/observer.go @@ -6,15 +6,19 @@ import ( "errors" "fmt" "log/slog" + "os" + "path/filepath" "strings" "sync" "time" "github.com/pedronauck/agh/internal/acp" aghconfig "github.com/pedronauck/agh/internal/config" + hookspkg "github.com/pedronauck/agh/internal/hooks" "github.com/pedronauck/agh/internal/session" "github.com/pedronauck/agh/internal/store" "github.com/pedronauck/agh/internal/store/globaldb" + "github.com/pedronauck/agh/internal/store/sessiondb" "github.com/pedronauck/agh/internal/version" workspacepkg "github.com/pedronauck/agh/internal/workspace" ) @@ -47,6 +51,21 @@ type PermissionModeResolver func(ctx context.Context, agentName, workspaceID str // VersionSource returns the current daemon build metadata. type VersionSource func() version.Info +// HookCatalogSource provides resolved hook catalog views from the live runtime. +type HookCatalogSource interface { + Catalog(filter hookspkg.CatalogFilter) ([]hookspkg.CatalogEntry, error) +} + +// HookRunStore is the session-scoped storage surface used for hook run audits. +type HookRunStore interface { + RecordHookRun(context.Context, hookspkg.HookRunRecord) error + QueryHookRuns(context.Context, store.HookRunQuery) ([]hookspkg.HookRunRecord, error) + Close(context.Context) error +} + +// HookStoreOpener opens the per-session store used for hook run audit queries. +type HookStoreOpener func(ctx context.Context, sessionID string, path string) (HookRunStore, error) + // Option customizes Observer construction. type Option func(*Observer) @@ -70,6 +89,8 @@ type Observer struct { logger *slog.Logger versionSource VersionSource sessions map[string]observedSession + hookCatalogSource HookCatalogSource + openHookStore HookStoreOpener } var _ session.Notifier = (*Observer)(nil) @@ -138,6 +159,20 @@ func WithVersionSource(source VersionSource) Option { } } +// WithHookCatalogSource injects the runtime hook catalog source used by hook introspection. +func WithHookCatalogSource(source HookCatalogSource) Option { + return func(observer *Observer) { + observer.hookCatalogSource = source + } +} + +// WithHookStoreOpener overrides the per-session hook run store opener, mainly for tests. +func WithHookStoreOpener(opener HookStoreOpener) Option { + return func(observer *Observer) { + observer.openHookStore = opener + } +} + // New constructs an Observer and opens the global AGH database when needed. func New(ctx context.Context, opts ...Option) (*Observer, error) { if ctx == nil { @@ -185,6 +220,11 @@ func New(ctx context.Context, opts ...Option) (*Observer, error) { if observer.resolvePermissionMode == nil { observer.resolvePermissionMode = defaultPermissionModeResolver(observer.homePaths, observer.workspaceResolver) } + if observer.openHookStore == nil { + observer.openHookStore = func(ctx context.Context, sessionID string, path string) (HookRunStore, error) { + return sessiondb.OpenSessionDB(ctx, sessionID, path) + } + } if observer.registry == nil { if err := aghconfig.EnsureHomeLayout(observer.homePaths); err != nil { @@ -201,6 +241,59 @@ func New(ctx context.Context, opts ...Option) (*Observer, error) { return observer, nil } +// AttachHooks swaps in the live hook catalog source after the hook runtime is built. +func (o *Observer) AttachHooks(source HookCatalogSource) { + if o == nil { + return + } + o.mu.Lock() + defer o.mu.Unlock() + o.hookCatalogSource = source +} + +func (o *Observer) hookDBPath(sessionID string) string { + return store.SessionDBFile(filepath.Join(o.homePaths.SessionsDir, strings.TrimSpace(sessionID))) +} + +func (o *Observer) openHookRunStore(ctx context.Context, sessionID string) (HookRunStore, func() error, error) { + if o == nil { + return nil, nil, errors.New("observe: observer is required") + } + if ctx == nil { + return nil, nil, errors.New("observe: hook run context is required") + } + + target := strings.TrimSpace(sessionID) + if target == "" { + return nil, nil, errors.New("observe: session id is required") + } + + dbPath := o.hookDBPath(target) + if _, err := os.Stat(dbPath); err != nil { + if errors.Is(err, os.ErrNotExist) { + return nil, nil, os.ErrNotExist + } + return nil, nil, fmt.Errorf("observe: stat hook database for %q: %w", target, err) + } + + openStore := o.openHookStore + if openStore == nil { + return nil, nil, errors.New("observe: hook store opener is required") + } + + storeHandle, err := openStore(ctx, target, dbPath) + if err != nil { + return nil, nil, fmt.Errorf("observe: open hook database for %q: %w", target, err) + } + + cleanup := func() error { + closeCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + return storeHandle.Close(closeCtx) + } + return storeHandle, cleanup, nil +} + // Close flushes and closes the backing global registry. func (o *Observer) Close(ctx context.Context) error { return o.registry.Close(ctx) @@ -246,7 +339,13 @@ func (o *Observer) OnSessionStopped(ctx context.Context, sess *session.Session) } // OnAgentEvent records one lightweight cross-session event summary and any derived aggregates. -func (o *Observer) OnAgentEvent(ctx context.Context, sessionID string, event acp.AgentEvent) { +func (o *Observer) OnAgentEvent(ctx context.Context, sessionID string, payload any) { + event, ok := normalizeObservedAgentEvent(payload) + if !ok { + o.logger.Warn("observe: skipped unsupported agent event payload", "session_id", strings.TrimSpace(sessionID)) + return + } + id := strings.TrimSpace(sessionID) if id == "" { o.logger.Warn("observe: skipped agent event with empty session id", "event_type", event.Type) @@ -324,6 +423,20 @@ func (o *Observer) OnAgentEvent(ctx context.Context, sessionID string, event acp } } +func normalizeObservedAgentEvent(payload any) (acp.AgentEvent, bool) { + switch event := payload.(type) { + case acp.AgentEvent: + return event, true + case *acp.AgentEvent: + if event == nil { + return acp.AgentEvent{}, false + } + return *event, true + default: + return acp.AgentEvent{}, false + } +} + func (o *Observer) trackSession(id string, snapshot observedSession) { o.mu.Lock() defer o.mu.Unlock() diff --git a/internal/observe/query.go b/internal/observe/query.go index 3c8f68e9f..947669077 100644 --- a/internal/observe/query.go +++ b/internal/observe/query.go @@ -2,7 +2,12 @@ package observe import ( "context" + "errors" + "fmt" + "os" + "strings" + hookspkg "github.com/pedronauck/agh/internal/hooks" "github.com/pedronauck/agh/internal/store" ) @@ -20,3 +25,80 @@ func (o *Observer) QueryTokenStats(ctx context.Context, query store.TokenStatsQu func (o *Observer) QueryPermissionLog(ctx context.Context, query store.PermissionLogQuery) ([]store.PermissionLogEntry, error) { return o.registry.ListPermissionLog(ctx, query) } + +// QueryHookCatalog returns the resolved hook catalog for the supplied filter. +func (o *Observer) QueryHookCatalog(ctx context.Context, filter hookspkg.CatalogFilter) ([]hookspkg.CatalogEntry, error) { + if ctx == nil { + return nil, errors.New("observe: hook catalog context is required") + } + + o.mu.RLock() + source := o.hookCatalogSource + o.mu.RUnlock() + if source == nil { + return nil, nil + } + + return source.Catalog(filter) +} + +// QueryHookRuns returns persisted per-session hook execution records. +func (o *Observer) QueryHookRuns(ctx context.Context, query store.HookRunQuery) ([]hookspkg.HookRunRecord, error) { + if ctx == nil { + return nil, errors.New("observe: hook runs context is required") + } + if err := query.Validate(); err != nil { + return nil, err + } + if event := strings.TrimSpace(query.Event); event != "" { + if err := hookspkg.HookEvent(event).Validate(); err != nil { + return nil, err + } + } + + storeHandle, cleanup, err := o.openHookRunStore(ctx, query.SessionID) + switch { + case err == nil: + case errors.Is(err, os.ErrNotExist): + return nil, nil + default: + return nil, err + } + defer func() { + _ = cleanup() + }() + + records, err := storeHandle.QueryHookRuns(ctx, query) + if err != nil { + return nil, fmt.Errorf("observe: query hook runs for %q: %w", strings.TrimSpace(query.SessionID), err) + } + return records, nil +} + +// QueryHookEvents returns the supported hook taxonomy metadata. +func (o *Observer) QueryHookEvents(_ context.Context, filter hookspkg.EventFilter) ([]hookspkg.EventDescriptor, error) { + return hookspkg.FilterEventDescriptors(filter), nil +} + +// WriteHookRecord persists one hook execution record when the session database already exists. +func (o *Observer) WriteHookRecord(ctx context.Context, sessionID string, record hookspkg.HookRunRecord) error { + if ctx == nil { + return errors.New("observe: write hook record context is required") + } + + storeHandle, cleanup, err := o.openHookRunStore(ctx, sessionID) + if err != nil { + if errors.Is(err, os.ErrNotExist) { + return nil + } + return err + } + defer func() { + _ = cleanup() + }() + + if err := storeHandle.RecordHookRun(ctx, record); err != nil { + return fmt.Errorf("observe: write hook record for %q: %w", strings.TrimSpace(sessionID), err) + } + return nil +} diff --git a/internal/session/interfaces.go b/internal/session/interfaces.go index a520fa867..a4aaba405 100644 --- a/internal/session/interfaces.go +++ b/internal/session/interfaces.go @@ -8,6 +8,7 @@ import ( "github.com/pedronauck/agh/internal/acp" aghconfig "github.com/pedronauck/agh/internal/config" + hookspkg "github.com/pedronauck/agh/internal/hooks" skillspkg "github.com/pedronauck/agh/internal/skills" "github.com/pedronauck/agh/internal/store" workspacepkg "github.com/pedronauck/agh/internal/workspace" @@ -151,7 +152,33 @@ type EventRecorder = store.EventRecorder type Notifier interface { OnSessionCreated(ctx context.Context, session *Session) OnSessionStopped(ctx context.Context, session *Session) - OnAgentEvent(ctx context.Context, sessionID string, event acp.AgentEvent) + OnAgentEvent(ctx context.Context, sessionID string, event any) +} + +// HookDispatcher exposes the typed hook dispatch surface consumed directly by +// the session manager. +type HookDispatcher interface { + DispatchSessionPreCreate(context.Context, hookspkg.SessionPreCreatePayload) (hookspkg.SessionPreCreatePayload, error) + DispatchSessionPostCreate(context.Context, hookspkg.SessionPostCreatePayload) (hookspkg.SessionPostCreatePayload, error) + DispatchSessionPreResume(context.Context, hookspkg.SessionPreResumePayload) (hookspkg.SessionPreResumePayload, error) + DispatchSessionPostResume(context.Context, hookspkg.SessionPostResumePayload) (hookspkg.SessionPostResumePayload, error) + DispatchSessionPreStop(context.Context, hookspkg.SessionPreStopPayload) (hookspkg.SessionPreStopPayload, error) + DispatchSessionPostStop(context.Context, hookspkg.SessionPostStopPayload) (hookspkg.SessionPostStopPayload, error) + DispatchInputPreSubmit(context.Context, hookspkg.InputPreSubmitPayload) (hookspkg.InputPreSubmitPayload, error) + DispatchPromptPostAssemble(context.Context, hookspkg.PromptPayload) (hookspkg.PromptPayload, error) + DispatchEventPreRecord(context.Context, hookspkg.EventPreRecordPayload) (hookspkg.EventPreRecordPayload, error) + DispatchEventPostRecord(context.Context, hookspkg.EventPostRecordPayload) (hookspkg.EventPostRecordPayload, error) + DispatchAgentPreStart(context.Context, hookspkg.AgentPreStartPayload) (hookspkg.AgentPreStartPayload, error) + DispatchAgentSpawned(context.Context, hookspkg.AgentSpawnedPayload) (hookspkg.AgentSpawnedPayload, error) + DispatchAgentCrashed(context.Context, hookspkg.AgentCrashedPayload) (hookspkg.AgentCrashedPayload, error) + DispatchAgentStopped(context.Context, hookspkg.AgentStoppedPayload) (hookspkg.AgentStoppedPayload, error) + DispatchTurnStart(context.Context, hookspkg.TurnStartPayload) (hookspkg.TurnStartPayload, error) + DispatchTurnEnd(context.Context, hookspkg.TurnEndPayload) (hookspkg.TurnEndPayload, error) + DispatchMessageStart(context.Context, hookspkg.MessageStartPayload) (hookspkg.MessageStartPayload, error) + DispatchMessageDelta(context.Context, hookspkg.MessageDeltaPayload) (hookspkg.MessageDeltaPayload, error) + DispatchMessageEnd(context.Context, hookspkg.MessageEndPayload) (hookspkg.MessageEndPayload, error) + DispatchContextPreCompact(context.Context, hookspkg.ContextPreCompactPayload) (hookspkg.ContextPreCompactPayload, error) + DispatchContextPostCompact(context.Context, hookspkg.ContextPostCompactPayload) (hookspkg.ContextPostCompactPayload, error) } // PromptAssembler assembles the prompt context for a new session start. diff --git a/internal/session/manager.go b/internal/session/manager.go index 04fbe6175..539a39de0 100644 --- a/internal/session/manager.go +++ b/internal/session/manager.go @@ -62,6 +62,7 @@ type Manager struct { logger *slog.Logger driver AgentDriver notifier Notifier + hooks HookDispatcher skillRegistry SkillRegistry mcpResolver MCPResolver homePaths aghconfig.HomePaths @@ -111,6 +112,14 @@ func WithNotifier(notifier Notifier) Option { } } +// WithHookDispatcher injects the typed hook dispatch surface used by the +// session manager for lifecycle and runtime hook points. +func WithHookDispatcher(dispatcher HookDispatcher) Option { + return func(manager *Manager) { + manager.hooks = dispatcher + } +} + // WithSkillRegistry injects the active-skill registry used during session start. func WithSkillRegistry(registry SkillRegistry) Option { return func(manager *Manager) { diff --git a/internal/session/manager_helpers.go b/internal/session/manager_helpers.go index 892a31fb9..99fda1373 100644 --- a/internal/session/manager_helpers.go +++ b/internal/session/manager_helpers.go @@ -11,25 +11,26 @@ import ( "time" aghconfig "github.com/pedronauck/agh/internal/config" + hookspkg "github.com/pedronauck/agh/internal/hooks" "github.com/pedronauck/agh/internal/store" workspacepkg "github.com/pedronauck/agh/internal/workspace" ) -func (m *Manager) startupPrompt(ctx context.Context, agentName string, agent aghconfig.AgentDef, workspace workspacepkg.ResolvedWorkspace) (string, error) { +func (m *Manager) startupPrompt(ctx context.Context, sessionCtx hookspkg.SessionContext, agent aghconfig.AgentDef, workspace workspacepkg.ResolvedWorkspace) (string, error) { prompt := strings.TrimSpace(agent.Prompt) if m.assembler == nil { - return prompt, nil + return m.dispatchPromptPostAssemble(ctx, sessionCtx, prompt) } assembledPrompt, err := m.assembler.Assemble(ctx, agent, workspace) if err != nil { - return "", fmt.Errorf("session: assemble prompt for %q: %w", agentName, err) + return "", fmt.Errorf("session: assemble prompt for %q: %w", agent.Name, err) } if strings.TrimSpace(assembledPrompt) == "" { - return prompt, nil + assembledPrompt = prompt } - return strings.TrimSpace(assembledPrompt), nil + return m.dispatchPromptPostAssemble(ctx, sessionCtx, strings.TrimSpace(assembledPrompt)) } func (m *Manager) startPermissions(sessionType SessionType, configured string) aghconfig.PermissionMode { @@ -64,7 +65,7 @@ func (m *Manager) writeMeta(session *Session) error { return nil } -func (m *Manager) activateAndWatch(ctx context.Context, session *Session, proc *AgentProcess) error { +func (m *Manager) activateAndWatch(ctx context.Context, session *Session, proc *AgentProcess, resolved aghconfig.ResolvedAgent, postEvent hookspkg.HookEvent) error { now := m.now() if err := session.activate(now); err != nil { return err @@ -78,6 +79,13 @@ func (m *Manager) activateAndWatch(ctx context.Context, session *Session, proc * return errors.Join(err, rollbackErr) } + m.dispatchAgentSpawned(ctx, session, proc, resolved) + switch postEvent { + case hookspkg.HookSessionPostCreate: + m.dispatchSessionPostCreate(ctx, session) + case hookspkg.HookSessionPostResume: + m.dispatchSessionPostResume(ctx, session) + } if m.notifier != nil { m.notifier.OnSessionCreated(ctx, session) } diff --git a/internal/session/manager_hooks.go b/internal/session/manager_hooks.go new file mode 100644 index 000000000..1e76cf11a --- /dev/null +++ b/internal/session/manager_hooks.go @@ -0,0 +1,794 @@ +package session + +import ( + "context" + "encoding/json" + "errors" + "fmt" + "strings" + "time" + + "github.com/kballard/go-shellquote" + "github.com/pedronauck/agh/internal/acp" + aghconfig "github.com/pedronauck/agh/internal/config" + hookspkg "github.com/pedronauck/agh/internal/hooks" + "github.com/pedronauck/agh/internal/store" +) + +const ( + hookInputClassUserMessage = acp.EventTypeUserMessage + hookInputClassStartup = "startup_prompt" + + hookMessageRoleAssistant = "assistant" + + hookMessageDeltaTypeFull = "full" + hookMessageDeltaTypeText = "text" + hookMessageDeltaTypeThought = "thought" +) + +type promptTurnDispatchState struct { + session *Session + turnID string + inputClass string + userMessage string + messageSeq int + turnEnded bool + openMessage *promptMessageDispatchState +} + +type promptMessageDispatchState struct { + id string + role string + text strings.Builder + lastRaw json.RawMessage +} + +func newPromptTurnDispatchState(session *Session, turnID string, inputClass string, userMessage string) *promptTurnDispatchState { + return &promptTurnDispatchState{ + session: session, + turnID: strings.TrimSpace(turnID), + inputClass: strings.TrimSpace(inputClass), + userMessage: userMessage, + } +} + +func (m *Manager) dispatchSessionPreCreate(ctx context.Context, opts CreateOpts) (CreateOpts, error) { + if m == nil || m.hooks == nil { + return opts, nil + } + + payload, err := m.hooks.DispatchSessionPreCreate(ctx, hookspkg.SessionPreCreatePayload{ + PayloadBase: hookspkg.PayloadBase{ + Event: hookspkg.HookSessionPreCreate, + Timestamp: m.now(), + }, + SessionContext: hookspkg.SessionContext{ + SessionName: strings.TrimSpace(opts.Name), + SessionType: string(normalizeSessionType(opts.Type)), + AgentName: strings.TrimSpace(opts.AgentName), + WorkspaceID: strings.TrimSpace(opts.Workspace), + Workspace: strings.TrimSpace(opts.WorkspacePath), + State: string(StateStarting), + }, + }) + if err != nil { + return CreateOpts{}, fmt.Errorf("session: dispatch session.pre_create: %w", err) + } + + next := opts + next.Name = strings.TrimSpace(payload.SessionName) + next.Type = normalizeSessionType(SessionType(strings.TrimSpace(payload.SessionType))) + next.AgentName = strings.TrimSpace(payload.AgentName) + + workspaceID := strings.TrimSpace(payload.WorkspaceID) + workspacePath := strings.TrimSpace(payload.Workspace) + switch { + case workspaceID != "" && workspacePath != "": + return CreateOpts{}, errors.New("session: session.pre_create produced both workspace id and workspace path") + case workspaceID != "": + next.Workspace = workspaceID + next.WorkspacePath = "" + case workspacePath != "": + next.Workspace = "" + next.WorkspacePath = workspacePath + default: + next.Workspace = "" + next.WorkspacePath = "" + } + + return next, nil +} + +func (m *Manager) dispatchSessionPreResume(ctx context.Context, meta store.SessionMeta) (store.SessionMeta, error) { + if m == nil || m.hooks == nil { + return meta, nil + } + + payload, err := m.hooks.DispatchSessionPreResume(ctx, hookspkg.SessionPreResumePayload{ + PayloadBase: hookspkg.PayloadBase{ + Event: hookspkg.HookSessionPreResume, + Timestamp: m.now(), + }, + SessionContext: hookspkg.SessionContext{ + SessionID: strings.TrimSpace(meta.ID), + SessionName: strings.TrimSpace(meta.Name), + SessionType: string(normalizeSessionType(SessionType(meta.SessionType))), + AgentName: strings.TrimSpace(meta.AgentName), + WorkspaceID: strings.TrimSpace(meta.WorkspaceID), + ACPSessionID: strings.TrimSpace(derefString(meta.ACPSessionID)), + State: strings.TrimSpace(meta.State), + CreatedAt: meta.CreatedAt, + UpdatedAt: meta.UpdatedAt, + }, + }) + if err != nil { + return store.SessionMeta{}, fmt.Errorf("session: dispatch session.pre_resume: %w", err) + } + + next := meta + next.Name = strings.TrimSpace(payload.SessionName) + next.AgentName = strings.TrimSpace(payload.AgentName) + next.WorkspaceID = strings.TrimSpace(payload.WorkspaceID) + next.SessionType = string(normalizeSessionType(SessionType(strings.TrimSpace(payload.SessionType)))) + return next, nil +} + +func (m *Manager) dispatchSessionPostCreate(ctx context.Context, session *Session) { + m.dispatchSessionLifecycleObservation(ctx, session, hookspkg.HookSessionPostCreate) +} + +func (m *Manager) dispatchSessionPostResume(ctx context.Context, session *Session) { + m.dispatchSessionLifecycleObservation(ctx, session, hookspkg.HookSessionPostResume) +} + +func (m *Manager) dispatchSessionPreStop(ctx context.Context, session *Session) error { + if m == nil || m.hooks == nil || session == nil { + return nil + } + ctx = hookDispatchContext(ctx, session) + + payload, err := m.hooks.DispatchSessionPreStop(ctx, hookSessionLifecyclePayload(session, hookspkg.HookSessionPreStop, m.now())) + if err != nil { + return fmt.Errorf("session: dispatch session.pre_stop: %w", err) + } + + session.applyHookSessionContext(payload.SessionContext, m.now()) + return nil +} + +func (m *Manager) dispatchSessionPostStop(ctx context.Context, session *Session) { + m.dispatchSessionLifecycleObservation(ctx, session, hookspkg.HookSessionPostStop) +} + +func (m *Manager) dispatchSessionLifecycleObservation(ctx context.Context, session *Session, event hookspkg.HookEvent) { + if m == nil || m.hooks == nil || session == nil { + return + } + ctx = hookDispatchContext(ctx, session) + + payload := hookSessionLifecyclePayload(session, event, m.now()) + var err error + switch event { + case hookspkg.HookSessionPostCreate: + _, err = m.hooks.DispatchSessionPostCreate(ctx, hookspkg.SessionPostCreatePayload(payload)) + case hookspkg.HookSessionPostResume: + _, err = m.hooks.DispatchSessionPostResume(ctx, hookspkg.SessionPostResumePayload(payload)) + case hookspkg.HookSessionPostStop: + _, err = m.hooks.DispatchSessionPostStop(ctx, hookspkg.SessionPostStopPayload(payload)) + default: + return + } + if err != nil { + m.warnHookDispatch(ctx, session, event, err) + } +} + +func (m *Manager) dispatchInputPreSubmit(ctx context.Context, session *Session, turnID string, message string) (string, error) { + if m == nil || m.hooks == nil { + return message, nil + } + ctx = hookDispatchContext(ctx, session) + + payload, err := m.hooks.DispatchInputPreSubmit(ctx, hookspkg.InputPreSubmitPayload{ + PayloadBase: hookspkg.PayloadBase{ + Event: hookspkg.HookInputPreSubmit, + Timestamp: m.now(), + }, + SessionContext: hookSessionContext(session), + TurnContext: hookspkg.TurnContext{TurnID: strings.TrimSpace(turnID)}, + InputClass: hookInputClassUserMessage, + Message: message, + }) + if err != nil { + return "", fmt.Errorf("session: dispatch input.pre_submit: %w", err) + } + + return payload.Message, nil +} + +func (m *Manager) dispatchPromptPostAssemble(ctx context.Context, sessionCtx hookspkg.SessionContext, prompt string) (string, error) { + if m == nil || m.hooks == nil { + return prompt, nil + } + + payload, err := m.hooks.DispatchPromptPostAssemble(ctx, hookspkg.PromptPayload{ + PayloadBase: hookspkg.PayloadBase{ + Event: hookspkg.HookPromptPostAssemble, + Timestamp: m.now(), + }, + SessionContext: sessionCtx, + InputClass: hookInputClassStartup, + Prompt: prompt, + }) + if err != nil { + return "", fmt.Errorf("session: dispatch prompt.post_assemble: %w", err) + } + + return strings.TrimSpace(payload.Prompt), nil +} + +func (m *Manager) dispatchTurnStart(ctx context.Context, state *promptTurnDispatchState) error { + if m == nil || m.hooks == nil || state == nil { + return nil + } + ctx = hookDispatchContext(ctx, state.session) + + _, err := m.hooks.DispatchTurnStart(ctx, hookspkg.TurnStartPayload{ + PayloadBase: hookspkg.PayloadBase{ + Event: hookspkg.HookTurnStart, + Timestamp: m.now(), + }, + SessionContext: hookSessionContext(state.session), + TurnContext: hookspkg.TurnContext{TurnID: state.turnID}, + InputClass: state.inputClass, + UserMessage: state.userMessage, + }) + if err != nil { + return fmt.Errorf("session: dispatch turn.start: %w", err) + } + + return nil +} + +func (m *Manager) dispatchTurnEnd(ctx context.Context, state *promptTurnDispatchState, eventTime time.Time) { + if state == nil || state.turnEnded { + return + } + state.turnEnded = true + if m == nil || m.hooks == nil { + return + } + ctx = hookDispatchContext(ctx, state.session) + + _, err := m.hooks.DispatchTurnEnd(ctx, hookspkg.TurnEndPayload{ + PayloadBase: hookspkg.PayloadBase{ + Event: hookspkg.HookTurnEnd, + Timestamp: hookTimestamp(m.now(), eventTime), + }, + SessionContext: hookSessionContext(state.session), + TurnContext: hookspkg.TurnContext{TurnID: state.turnID}, + InputClass: state.inputClass, + UserMessage: state.userMessage, + }) + if err != nil { + m.warnHookDispatch(ctx, state.session, hookspkg.HookTurnEnd, err) + } +} + +func (m *Manager) preparePromptEvent(ctx context.Context, state *promptTurnDispatchState, event acp.AgentEvent) acp.AgentEvent { + if state == nil { + return event + } + + role, deltaType, isMessage := hookMessageDetails(event.Type) + if !isMessage { + m.finishPromptMessage(ctx, state, event.Timestamp) + return event + } + + if state.openMessage == nil { + event = m.dispatchMessageStart(ctx, state, event, role) + } + + if state.openMessage == nil { + return event + } + + state.openMessage.text.WriteString(event.Text) + state.openMessage.lastRaw = cloneSessionRawMessage(event.Raw) + m.dispatchMessageDelta(ctx, state, event, deltaType) + return event +} + +func (m *Manager) dispatchMessageStart(ctx context.Context, state *promptTurnDispatchState, event acp.AgentEvent, role string) acp.AgentEvent { + if state == nil { + return event + } + + state.messageSeq++ + message := &promptMessageDispatchState{ + id: nextPromptMessageID(state.turnID, state.messageSeq), + role: strings.TrimSpace(role), + } + state.openMessage = message + if m == nil || m.hooks == nil { + return event + } + ctx = hookDispatchContext(ctx, state.session) + + payload, err := m.hooks.DispatchMessageStart(ctx, hookspkg.MessageStartPayload{ + PayloadBase: hookspkg.PayloadBase{ + Event: hookspkg.HookMessageStart, + Timestamp: hookTimestamp(m.now(), event.Timestamp), + }, + SessionContext: hookSessionContext(state.session), + TurnContext: hookspkg.TurnContext{TurnID: state.turnID}, + MessageID: message.id, + Role: message.role, + DeltaType: hookMessageDeltaTypeFull, + Text: event.Text, + Raw: cloneSessionRawMessage(event.Raw), + }) + if err != nil { + m.warnHookDispatch(ctx, state.session, hookspkg.HookMessageStart, err) + return event + } + + message.role = strings.TrimSpace(payload.Role) + event.Text = payload.Text + return event +} + +func (m *Manager) dispatchMessageDelta(ctx context.Context, state *promptTurnDispatchState, event acp.AgentEvent, deltaType string) { + if m == nil || m.hooks == nil || state == nil || state.openMessage == nil { + return + } + ctx = hookDispatchContext(ctx, state.session) + + _, err := m.hooks.DispatchMessageDelta(ctx, hookspkg.MessageDeltaPayload{ + PayloadBase: hookspkg.PayloadBase{ + Event: hookspkg.HookMessageDelta, + Timestamp: hookTimestamp(m.now(), event.Timestamp), + }, + SessionContext: hookSessionContext(state.session), + TurnContext: hookspkg.TurnContext{TurnID: state.turnID}, + MessageID: state.openMessage.id, + Role: state.openMessage.role, + DeltaType: strings.TrimSpace(deltaType), + Text: event.Text, + Raw: cloneSessionRawMessage(event.Raw), + }) + if err != nil { + m.warnHookDispatch(ctx, state.session, hookspkg.HookMessageDelta, err) + } +} + +func (m *Manager) finishPromptMessage(ctx context.Context, state *promptTurnDispatchState, eventTime time.Time) { + if state == nil || state.openMessage == nil { + return + } + + message := state.openMessage + state.openMessage = nil + if m == nil || m.hooks == nil { + return + } + ctx = hookDispatchContext(ctx, state.session) + + _, err := m.hooks.DispatchMessageEnd(ctx, hookspkg.MessageEndPayload{ + PayloadBase: hookspkg.PayloadBase{ + Event: hookspkg.HookMessageEnd, + Timestamp: hookTimestamp(m.now(), eventTime), + }, + SessionContext: hookSessionContext(state.session), + TurnContext: hookspkg.TurnContext{TurnID: state.turnID}, + MessageID: message.id, + Role: message.role, + DeltaType: hookMessageDeltaTypeFull, + Text: message.text.String(), + Raw: cloneSessionRawMessage(message.lastRaw), + }) + if err != nil { + m.warnHookDispatch(ctx, state.session, hookspkg.HookMessageEnd, err) + } +} + +func (m *Manager) dispatchEventPreRecord(ctx context.Context, session *Session, event acp.AgentEvent, content string) { + if m == nil || m.hooks == nil { + return + } + ctx = hookDispatchContext(ctx, session) + + _, err := m.hooks.DispatchEventPreRecord(ctx, hookspkg.EventPreRecordPayload{ + PayloadBase: hookspkg.PayloadBase{ + Event: hookspkg.HookEventPreRecord, + Timestamp: hookTimestamp(m.now(), event.Timestamp), + }, + SessionContext: hookSessionContext(session), + TurnContext: hookspkg.TurnContext{TurnID: strings.TrimSpace(event.TurnID)}, + RecordType: strings.TrimSpace(event.Type), + Content: json.RawMessage(content), + }) + if err != nil { + m.warnHookDispatch(ctx, session, hookspkg.HookEventPreRecord, err) + } +} + +func (m *Manager) runContextCompaction( + ctx context.Context, + session *Session, + turnID string, + reason string, + strategy string, + summary string, + contextBlocks []hookspkg.ContextBlock, + compact func(context.Context, hookspkg.ContextPreCompactPayload) (hookspkg.ContextPostCompactPayload, error), +) (hookspkg.ContextPostCompactPayload, error) { + if compact == nil { + return hookspkg.ContextPostCompactPayload{}, errors.New("session: context compactor is required") + } + + now := time.Now().UTC + if m != nil { + now = m.now + } + ctx = hookDispatchContext(ctx, session) + + prePayload := hookspkg.ContextPreCompactPayload{ + PayloadBase: hookspkg.PayloadBase{ + Event: hookspkg.HookContextPreCompact, + Timestamp: now(), + }, + SessionContext: hookSessionContext(session), + TurnContext: hookspkg.TurnContext{TurnID: strings.TrimSpace(turnID)}, + Reason: strings.TrimSpace(reason), + Strategy: strings.TrimSpace(strategy), + Summary: strings.TrimSpace(summary), + ContextBlocks: cloneSessionContextBlocks(contextBlocks), + } + + var err error + if m != nil && m.hooks != nil { + prePayload, err = m.hooks.DispatchContextPreCompact(ctx, prePayload) + if err != nil { + return hookspkg.ContextPostCompactPayload{}, fmt.Errorf("session: dispatch context.pre_compact: %w", err) + } + } + + postPayload, err := compact(ctx, prePayload) + if err != nil { + return hookspkg.ContextPostCompactPayload{}, err + } + + postPayload.Event = hookspkg.HookContextPostCompact + if postPayload.Timestamp.IsZero() { + postPayload.Timestamp = now() + } + if strings.TrimSpace(postPayload.SessionID) == "" { + postPayload.SessionContext = prePayload.SessionContext + } + if strings.TrimSpace(postPayload.TurnID) == "" { + postPayload.TurnContext = prePayload.TurnContext + } + if strings.TrimSpace(postPayload.Reason) == "" { + postPayload.Reason = prePayload.Reason + } + if strings.TrimSpace(postPayload.Strategy) == "" { + postPayload.Strategy = prePayload.Strategy + } + if postPayload.ContextBlocks == nil { + postPayload.ContextBlocks = cloneSessionContextBlocks(prePayload.ContextBlocks) + } + + if m != nil && m.hooks != nil { + if _, err := m.hooks.DispatchContextPostCompact(ctx, postPayload); err != nil { + m.warnHookDispatch(ctx, session, hookspkg.HookContextPostCompact, err) + } + } + + return postPayload, nil +} + +func (m *Manager) dispatchEventPostRecord(ctx context.Context, session *Session, event acp.AgentEvent, content string) { + if m == nil || m.hooks == nil { + return + } + ctx = hookDispatchContext(ctx, session) + + _, err := m.hooks.DispatchEventPostRecord(ctx, hookspkg.EventPostRecordPayload{ + PayloadBase: hookspkg.PayloadBase{ + Event: hookspkg.HookEventPostRecord, + Timestamp: hookTimestamp(m.now(), event.Timestamp), + }, + SessionContext: hookSessionContext(session), + TurnContext: hookspkg.TurnContext{TurnID: strings.TrimSpace(event.TurnID)}, + RecordType: strings.TrimSpace(event.Type), + Content: json.RawMessage(content), + }) + if err != nil { + m.warnHookDispatch(ctx, session, hookspkg.HookEventPostRecord, err) + } +} + +func (m *Manager) dispatchAgentPreStart(ctx context.Context, session *Session, resolved aghconfig.ResolvedAgent, opts acp.StartOpts) (acp.StartOpts, error) { + if m == nil || m.hooks == nil { + return opts, nil + } + ctx = hookDispatchContext(ctx, session) + + command, args := splitCommand(opts.Command) + payload, err := m.hooks.DispatchAgentPreStart(ctx, hookspkg.AgentPreStartPayload{ + PayloadBase: hookspkg.PayloadBase{ + Event: hookspkg.HookAgentPreStart, + Timestamp: m.now(), + }, + SessionContext: hookSessionContext(session), + Command: command, + Args: args, + Cwd: strings.TrimSpace(opts.Cwd), + Provider: strings.TrimSpace(resolved.Provider), + Model: strings.TrimSpace(resolved.Model), + }) + if err != nil { + return acp.StartOpts{}, fmt.Errorf("session: dispatch agent.pre_start: %w", err) + } + + next := opts + next.Command = joinCommand(payload.Command, payload.Args) + next.Cwd = strings.TrimSpace(payload.Cwd) + return next, nil +} + +func (m *Manager) dispatchAgentSpawned(ctx context.Context, session *Session, proc *AgentProcess, resolved aghconfig.ResolvedAgent) { + m.dispatchAgentObservation(ctx, session, proc, resolved, nil, hookspkg.HookAgentSpawned) +} + +func (m *Manager) dispatchAgentCrashed(ctx context.Context, session *Session, proc *AgentProcess, waitErr error) { + m.dispatchAgentObservation(ctx, session, proc, aghconfig.ResolvedAgent{}, waitErr, hookspkg.HookAgentCrashed) +} + +func (m *Manager) dispatchAgentStopped(ctx context.Context, session *Session, proc *AgentProcess, waitErr error) { + m.dispatchAgentObservation(ctx, session, proc, aghconfig.ResolvedAgent{}, waitErr, hookspkg.HookAgentStopped) +} + +func (m *Manager) dispatchAgentObservation(ctx context.Context, session *Session, proc *AgentProcess, resolved aghconfig.ResolvedAgent, waitErr error, event hookspkg.HookEvent) { + if m == nil || m.hooks == nil { + return + } + ctx = hookDispatchContext(ctx, session) + + command, args := agentCommandAndArgs(proc) + payload := hookspkg.AgentLifecyclePayload{ + PayloadBase: hookspkg.PayloadBase{ + Event: event, + Timestamp: m.now(), + }, + SessionContext: hookSessionContext(session), + Command: command, + Args: args, + Cwd: agentCwd(proc), + PID: agentPID(proc), + Provider: strings.TrimSpace(resolved.Provider), + Model: strings.TrimSpace(resolved.Model), + } + if waitErr != nil { + payload.Error = waitErr.Error() + } + + var err error + switch event { + case hookspkg.HookAgentSpawned: + _, err = m.hooks.DispatchAgentSpawned(ctx, hookspkg.AgentSpawnedPayload(payload)) + case hookspkg.HookAgentCrashed: + _, err = m.hooks.DispatchAgentCrashed(ctx, hookspkg.AgentCrashedPayload(payload)) + case hookspkg.HookAgentStopped: + _, err = m.hooks.DispatchAgentStopped(ctx, hookspkg.AgentStoppedPayload(payload)) + default: + return + } + if err != nil { + m.warnHookDispatch(ctx, session, event, err) + } +} + +func hookSessionLifecyclePayload(session *Session, event hookspkg.HookEvent, timestamp time.Time) hookspkg.SessionLifecyclePayload { + return hookspkg.SessionLifecyclePayload{ + PayloadBase: hookspkg.PayloadBase{ + Event: event, + Timestamp: timestamp, + }, + SessionContext: hookSessionContext(session), + } +} + +func hookSessionContext(session *Session) hookspkg.SessionContext { + if session == nil { + return hookspkg.SessionContext{} + } + + info := session.Info() + if info == nil { + return hookspkg.SessionContext{} + } + + return hookspkg.SessionContext{ + SessionID: strings.TrimSpace(info.ID), + SessionName: strings.TrimSpace(info.Name), + SessionType: string(info.Type), + AgentName: strings.TrimSpace(info.AgentName), + WorkspaceID: strings.TrimSpace(info.WorkspaceID), + Workspace: strings.TrimSpace(info.Workspace), + ACPSessionID: strings.TrimSpace(info.ACPSessionID), + State: string(info.State), + CreatedAt: info.CreatedAt, + UpdatedAt: info.UpdatedAt, + } +} + +func (s *Session) applyHookSessionContext(payload hookspkg.SessionContext, now time.Time) { + if s == nil { + return + } + + s.mu.Lock() + defer s.mu.Unlock() + + s.Name = strings.TrimSpace(payload.SessionName) + s.AgentName = strings.TrimSpace(payload.AgentName) + s.WorkspaceID = strings.TrimSpace(payload.WorkspaceID) + s.Workspace = strings.TrimSpace(payload.Workspace) + s.Type = normalizeSessionType(SessionType(strings.TrimSpace(payload.SessionType))) + if !now.IsZero() { + s.UpdatedAt = now + } +} + +func hookTimestamp(now time.Time, eventTime time.Time) time.Time { + if !eventTime.IsZero() { + return eventTime + } + return now +} + +func splitCommand(command string) (string, []string) { + trimmed := strings.TrimSpace(command) + if trimmed == "" { + return "", nil + } + + parts, err := shellquote.Split(trimmed) + if err != nil || len(parts) == 0 { + return trimmed, nil + } + + return parts[0], append([]string(nil), parts[1:]...) +} + +func joinCommand(command string, args []string) string { + trimmed := strings.TrimSpace(command) + if trimmed == "" { + return "" + } + if len(args) == 0 { + return trimmed + } + + parts := make([]string, 0, len(args)+1) + parts = append(parts, trimmed) + for _, arg := range args { + if item := strings.TrimSpace(arg); item != "" { + parts = append(parts, item) + } + } + return shellquote.Join(parts...) +} + +func agentCommandAndArgs(proc *AgentProcess) (string, []string) { + if proc == nil { + return "", nil + } + if len(proc.Args) > 0 { + return strings.TrimSpace(proc.Command), append([]string(nil), proc.Args...) + } + return splitCommand(proc.Command) +} + +func agentCwd(proc *AgentProcess) string { + if proc == nil { + return "" + } + return strings.TrimSpace(proc.Cwd) +} + +func agentPID(proc *AgentProcess) int { + if proc == nil { + return 0 + } + return proc.PID +} + +func (m *Manager) warnHookDispatch(ctx context.Context, session *Session, event hookspkg.HookEvent, err error) { + if err == nil { + return + } + if ctx == nil { + ctx = context.Background() + } + + m.sessionLogger(session).WarnContext( + ctx, + "session: hook dispatch failed", + "hook_event", event.String(), + "error", err, + ) +} + +func hookMessageDetails(eventType string) (string, string, bool) { + switch strings.TrimSpace(eventType) { + case acp.EventTypeAgentMessage: + return hookMessageRoleAssistant, hookMessageDeltaTypeText, true + case acp.EventTypeThought: + return hookMessageRoleAssistant, hookMessageDeltaTypeThought, true + default: + return "", "", false + } +} + +func nextPromptMessageID(turnID string, sequence int) string { + base := strings.TrimSpace(turnID) + if base == "" { + base = "msg" + } + if sequence <= 0 { + return base + "-message" + } + return fmt.Sprintf("%s-message-%d", base, sequence) +} + +func cloneSessionRawMessage(raw json.RawMessage) json.RawMessage { + if len(raw) == 0 { + return nil + } + return append(json.RawMessage(nil), raw...) +} + +func cloneSessionContextBlocks(blocks []hookspkg.ContextBlock) []hookspkg.ContextBlock { + if len(blocks) == 0 { + return nil + } + + cloned := make([]hookspkg.ContextBlock, 0, len(blocks)) + for _, block := range blocks { + cloned = append(cloned, hookspkg.ContextBlock{ + Kind: strings.TrimSpace(block.Kind), + Text: block.Text, + Metadata: cloneStringMap(block.Metadata), + }) + } + return cloned +} + +func cloneStringMap(src map[string]string) map[string]string { + if len(src) == 0 { + return nil + } + + cloned := make(map[string]string, len(src)) + for key, value := range src { + cloned[key] = value + } + return cloned +} + +func hookDispatchContext(ctx context.Context, session *Session) context.Context { + if ctx == nil || session == nil { + return ctx + } + + writer, ok := session.recorderHandle().(hookspkg.HookRunWriter) + if !ok || writer == nil { + return ctx + } + + return hookspkg.WithHookRunWriter(ctx, writer) +} diff --git a/internal/session/manager_hooks_test.go b/internal/session/manager_hooks_test.go new file mode 100644 index 000000000..df6f37567 --- /dev/null +++ b/internal/session/manager_hooks_test.go @@ -0,0 +1,884 @@ +package session + +import ( + "context" + "errors" + "io" + "log/slog" + "strings" + "testing" + "time" + + "github.com/pedronauck/agh/internal/acp" + aghconfig "github.com/pedronauck/agh/internal/config" + hookspkg "github.com/pedronauck/agh/internal/hooks" + "github.com/pedronauck/agh/internal/store" + "github.com/pedronauck/agh/internal/testutil" +) + +func TestCreateFailsWhenSessionPreCreateDenied(t *testing.T) { + t.Parallel() + + hooks := newNativeHookDispatcher(t, + []hookspkg.HookDecl{{ + Name: "deny-create", + Event: hookspkg.HookSessionPreCreate, + Mode: hookspkg.HookModeSync, + ExecutorKind: hookspkg.HookExecutorNative, + }}, + map[string]hookspkg.Executor{ + "deny-create": hookspkg.NewTypedNativeExecutor(func(_ context.Context, _ hookspkg.RegisteredHook, _ hookspkg.SessionPreCreatePayload) (hookspkg.SessionCreatePatch, error) { + return hookspkg.SessionCreatePatch{ + ControlPatch: hookspkg.ControlPatch{ + Deny: true, + DenyReason: "blocked", + }, + }, nil + }), + }, + ) + + h := newHarness(t, WithHookDispatcher(hooks)) + _, err := h.manager.Create(testutil.Context(t), CreateOpts{ + AgentName: "coder", + Workspace: h.workspaceID, + }) + if err == nil { + t.Fatal("Create() error = nil, want pre-create denial") + } + if len(h.manager.List()) != 0 { + t.Fatalf("List() = %d active sessions, want 0", len(h.manager.List())) + } + if got := h.notifier.createdCount(); got != 0 { + t.Fatalf("created notifications = %d, want 0", got) + } +} + +func TestCreateUsesPatchedSessionPreCreatePayload(t *testing.T) { + t.Parallel() + + const patchedName = "patched-session" + sessionName := patchedName + hooks := newNativeHookDispatcher(t, + []hookspkg.HookDecl{{ + Name: "patch-create", + Event: hookspkg.HookSessionPreCreate, + Mode: hookspkg.HookModeSync, + ExecutorKind: hookspkg.HookExecutorNative, + }}, + map[string]hookspkg.Executor{ + "patch-create": hookspkg.NewTypedNativeExecutor(func(_ context.Context, _ hookspkg.RegisteredHook, _ hookspkg.SessionPreCreatePayload) (hookspkg.SessionCreatePatch, error) { + sessionType := string(SessionTypeDream) + return hookspkg.SessionCreatePatch{ + SessionName: &sessionName, + SessionType: &sessionType, + }, nil + }), + }, + ) + + h := newHarness(t, WithHookDispatcher(hooks)) + session, err := h.manager.Create(testutil.Context(t), CreateOpts{ + AgentName: "coder", + Name: "original", + Workspace: h.workspaceID, + Type: SessionTypeUser, + }) + if err != nil { + t.Fatalf("Create() error = %v", err) + } + t.Cleanup(func() { + _ = h.manager.Stop(testutil.Context(t), session.ID) + }) + + if got := session.Info().Name; got != patchedName { + t.Fatalf("session name = %q, want %q", got, patchedName) + } + if got := session.Info().Type; got != SessionTypeDream { + t.Fatalf("session type = %q, want %q", got, SessionTypeDream) + } + if got := h.driver.startCalls[0].Permissions; got != aghconfig.PermissionModeApproveAll { + t.Fatalf("start permissions = %q, want %q", got, aghconfig.PermissionModeApproveAll) + } +} + +func TestPostCreateHookFiresAfterSessionActive(t *testing.T) { + t.Parallel() + + payloadCh := make(chan hookspkg.SessionPostCreatePayload, 1) + hooks := newNativeHookDispatcher(t, + []hookspkg.HookDecl{{ + Name: "observe-post-create", + Event: hookspkg.HookSessionPostCreate, + Mode: hookspkg.HookModeAsync, + ExecutorKind: hookspkg.HookExecutorNative, + }}, + map[string]hookspkg.Executor{ + "observe-post-create": hookspkg.NewTypedNativeExecutor(func(_ context.Context, _ hookspkg.RegisteredHook, payload hookspkg.SessionPostCreatePayload) (hookspkg.SessionPostCreatePatch, error) { + payloadCh <- payload + return hookspkg.SessionPostCreatePatch{}, nil + }), + }, + ) + + h := newHarness(t, WithHookDispatcher(hooks)) + session := createSession(t, h) + t.Cleanup(func() { + _ = h.manager.Stop(testutil.Context(t), session.ID) + }) + + select { + case payload := <-payloadCh: + if payload.SessionID != session.ID { + t.Fatalf("payload.SessionID = %q, want %q", payload.SessionID, session.ID) + } + if payload.State != string(StateActive) { + t.Fatalf("payload.State = %q, want %q", payload.State, StateActive) + } + if payload.ACPSessionID == "" { + t.Fatal("payload.ACPSessionID = empty, want active ACP session id") + } + case <-time.After(time.Second): + t.Fatal("timed out waiting for session.post_create hook") + } +} + +func TestResumeUsesPatchedPreResumePayloadAndFiresPostResume(t *testing.T) { + t.Parallel() + + h := newHarness(t) + session := createSession(t, h) + if err := h.manager.Stop(testutil.Context(t), session.ID); err != nil { + t.Fatalf("Stop() error = %v", err) + } + + const patchedName = "resumed-patched" + postResumeCh := make(chan hookspkg.SessionPostResumePayload, 1) + dispatcher := &spyHookDispatcher{ + dispatchSessionPreResumeFn: func(_ context.Context, payload hookspkg.SessionPreResumePayload) (hookspkg.SessionPreResumePayload, error) { + payload.SessionName = patchedName + return payload, nil + }, + dispatchSessionPostResumeFn: func(_ context.Context, payload hookspkg.SessionPostResumePayload) (hookspkg.SessionPostResumePayload, error) { + postResumeCh <- payload + return payload, nil + }, + } + + h.manager = newManagerWithHarness(t, h, WithHookDispatcher(dispatcher)) + resumed, err := h.manager.Resume(testutil.Context(t), session.ID) + if err != nil { + t.Fatalf("Resume() error = %v", err) + } + t.Cleanup(func() { + _ = h.manager.Stop(testutil.Context(t), resumed.ID) + }) + + if got := resumed.Info().Name; got != patchedName { + t.Fatalf("resumed name = %q, want %q", got, patchedName) + } + + select { + case payload := <-postResumeCh: + if payload.SessionID != resumed.ID { + t.Fatalf("payload.SessionID = %q, want %q", payload.SessionID, resumed.ID) + } + if payload.State != string(StateActive) { + t.Fatalf("payload.State = %q, want %q", payload.State, StateActive) + } + case <-time.After(time.Second): + t.Fatal("timed out waiting for session.post_resume hook") + } +} + +func TestPromptUsesPatchedInputMessage(t *testing.T) { + t.Parallel() + + hooks := newNativeHookDispatcher(t, + []hookspkg.HookDecl{{ + Name: "patch-input", + Event: hookspkg.HookInputPreSubmit, + Mode: hookspkg.HookModeSync, + ExecutorKind: hookspkg.HookExecutorNative, + }}, + map[string]hookspkg.Executor{ + "patch-input": hookspkg.NewTypedNativeExecutor(func(_ context.Context, _ hookspkg.RegisteredHook, _ hookspkg.InputPreSubmitPayload) (hookspkg.InputPreSubmitPatch, error) { + message := "patched message" + return hookspkg.InputPreSubmitPatch{Message: &message}, nil + }), + }, + ) + + h := newHarness(t, WithHookDispatcher(hooks)) + session := createSession(t, h) + t.Cleanup(func() { + _ = h.manager.Stop(testutil.Context(t), session.ID) + }) + + eventsCh, err := h.manager.Prompt(testutil.Context(t), session.ID, "original message") + if err != nil { + t.Fatalf("Prompt() error = %v", err) + } + _ = collectEvents(t, eventsCh) + + if got := h.driver.promptCalls[0].Message; got != "patched message" { + t.Fatalf("prompt message = %q, want %q", got, "patched message") + } + + stored, err := session.recorderHandle().Query(testutil.Context(t), store.EventQuery{}) + if err != nil { + t.Fatalf("Query() error = %v", err) + } + if len(stored) == 0 || !strings.Contains(stored[0].Content, `"text":"patched message"`) { + t.Fatalf("stored user message content = %q, want patched text", stored[0].Content) + } +} + +func TestCreateUsesPatchedPrompt(t *testing.T) { + t.Parallel() + + hooks := newNativeHookDispatcher(t, + []hookspkg.HookDecl{{ + Name: "patch-prompt", + Event: hookspkg.HookPromptPostAssemble, + Mode: hookspkg.HookModeSync, + ExecutorKind: hookspkg.HookExecutorNative, + }}, + map[string]hookspkg.Executor{ + "patch-prompt": hookspkg.NewTypedNativeExecutor(func(_ context.Context, _ hookspkg.RegisteredHook, _ hookspkg.PromptPayload) (hookspkg.PromptPatch, error) { + prompt := "patched system prompt" + return hookspkg.PromptPatch{Prompt: &prompt}, nil + }), + }, + ) + + h := newHarness(t, WithHookDispatcher(hooks)) + session := createSession(t, h) + t.Cleanup(func() { + _ = h.manager.Stop(testutil.Context(t), session.ID) + }) + + if got := h.driver.startCalls[0].SystemPrompt; got != "patched system prompt" { + t.Fatalf("start system prompt = %q, want %q", got, "patched system prompt") + } +} + +func TestAgentCrashedHookFiresOnProcessCrash(t *testing.T) { + t.Parallel() + + payloadCh := make(chan hookspkg.AgentCrashedPayload, 1) + hooks := newNativeHookDispatcher(t, + []hookspkg.HookDecl{{ + Name: "observe-agent-crash", + Event: hookspkg.HookAgentCrashed, + Mode: hookspkg.HookModeAsync, + ExecutorKind: hookspkg.HookExecutorNative, + }}, + map[string]hookspkg.Executor{ + "observe-agent-crash": hookspkg.NewTypedNativeExecutor(func(_ context.Context, _ hookspkg.RegisteredHook, payload hookspkg.AgentCrashedPayload) (hookspkg.AgentCrashedPatch, error) { + payloadCh <- payload + return hookspkg.AgentCrashedPatch{}, nil + }), + }, + ) + + h := newHarness(t, WithHookDispatcher(hooks)) + session := createSession(t, h) + + h.driver.lastProcess().crash(errors.New("boom"), "stderr trace") + waitForCondition(t, "session stop after crash", func() bool { + _, ok := h.manager.Get(session.ID) + return !ok + }) + + select { + case payload := <-payloadCh: + if payload.SessionID != session.ID { + t.Fatalf("payload.SessionID = %q, want %q", payload.SessionID, session.ID) + } + if payload.Error != "boom" { + t.Fatalf("payload.Error = %q, want %q", payload.Error, "boom") + } + case <-time.After(time.Second): + t.Fatal("timed out waiting for agent.crashed hook") + } +} + +func TestRecordEventDispatchesAroundPersistence(t *testing.T) { + t.Parallel() + + order := make([]string, 0, 3) + dispatcher := &spyHookDispatcher{ + dispatchEventPreRecordFn: func(_ context.Context, payload hookspkg.EventPreRecordPayload) (hookspkg.EventPreRecordPayload, error) { + order = append(order, "pre:"+payload.RecordType) + return payload, nil + }, + dispatchEventPostRecordFn: func(_ context.Context, payload hookspkg.EventPostRecordPayload) (hookspkg.EventPostRecordPayload, error) { + order = append(order, "post:"+payload.RecordType) + return payload, nil + }, + } + h := newHarness(t, WithHookDispatcher(dispatcher)) + + recorder := &orderedRecorder{ + onRecord: func(event store.SessionEvent) { + order = append(order, "record:"+event.Type) + }, + } + now := h.manager.now() + session := &Session{ + ID: "sess-event", + AgentName: "coder", + WorkspaceID: h.workspaceID, + Workspace: h.workspace, + Type: SessionTypeUser, + State: StateActive, + CreatedAt: now, + UpdatedAt: now, + recorder: recorder, + } + + err := h.manager.recordEvent(testutil.Context(t), session, acp.AgentEvent{ + Type: acp.EventTypeDone, + TurnID: "turn-1", + Timestamp: now, + Text: "done", + }) + if err != nil { + t.Fatalf("recordEvent() error = %v", err) + } + + want := []string{"pre:done", "record:done", "post:done"} + if !testutil.EqualStringSlices(order, want) { + t.Fatalf("dispatch order = %#v, want %#v", order, want) + } +} + +func TestPromptDispatchesTurnAndMessageHooksAtACPBoundaries(t *testing.T) { + t.Parallel() + + order := make([]string, 0, 5) + var ( + turnStartPayload hookspkg.TurnStartPayload + messageStartPayload hookspkg.MessageStartPayload + messageDeltaPayload hookspkg.MessageDeltaPayload + messageEndPayload hookspkg.MessageEndPayload + turnEndPayload hookspkg.TurnEndPayload + ) + + dispatcher := &spyHookDispatcher{ + dispatchTurnStartFn: func(_ context.Context, payload hookspkg.TurnStartPayload) (hookspkg.TurnStartPayload, error) { + order = append(order, "turn.start") + turnStartPayload = payload + return payload, nil + }, + dispatchMessageStartFn: func(_ context.Context, payload hookspkg.MessageStartPayload) (hookspkg.MessageStartPayload, error) { + order = append(order, "message.start") + messageStartPayload = payload + return payload, nil + }, + dispatchMessageDeltaFn: func(_ context.Context, payload hookspkg.MessageDeltaPayload) (hookspkg.MessageDeltaPayload, error) { + order = append(order, "message.delta") + messageDeltaPayload = payload + return payload, nil + }, + dispatchMessageEndFn: func(_ context.Context, payload hookspkg.MessageEndPayload) (hookspkg.MessageEndPayload, error) { + order = append(order, "message.end") + messageEndPayload = payload + return payload, nil + }, + dispatchTurnEndFn: func(_ context.Context, payload hookspkg.TurnEndPayload) (hookspkg.TurnEndPayload, error) { + order = append(order, "turn.end") + turnEndPayload = payload + return payload, nil + }, + } + + h := newHarness(t, WithHookDispatcher(dispatcher)) + session := createSession(t, h) + t.Cleanup(func() { + _ = h.manager.Stop(testutil.Context(t), session.ID) + }) + + eventsCh, err := h.manager.Prompt(testutil.Context(t), session.ID, "hello") + if err != nil { + t.Fatalf("Prompt() error = %v", err) + } + events := collectEvents(t, eventsCh) + if len(events) != 2 { + t.Fatalf("len(events) = %d, want 2", len(events)) + } + + wantOrder := []string{"turn.start", "message.start", "message.delta", "message.end", "turn.end"} + if !testutil.EqualStringSlices(order, wantOrder) { + t.Fatalf("hook order = %#v, want %#v", order, wantOrder) + } + + if turnStartPayload.UserMessage != "hello" { + t.Fatalf("turn.start user message = %q, want %q", turnStartPayload.UserMessage, "hello") + } + if turnStartPayload.TurnID == "" { + t.Fatal("turn.start turn id = empty, want populated turn id") + } + if turnStartPayload.InputClass != hookInputClassUserMessage { + t.Fatalf("turn.start input class = %q, want %q", turnStartPayload.InputClass, hookInputClassUserMessage) + } + if messageStartPayload.MessageID == "" { + t.Fatal("message.start message id = empty, want populated message id") + } + if messageStartPayload.Role != hookMessageRoleAssistant { + t.Fatalf("message.start role = %q, want %q", messageStartPayload.Role, hookMessageRoleAssistant) + } + if messageStartPayload.DeltaType != hookMessageDeltaTypeFull { + t.Fatalf("message.start delta type = %q, want %q", messageStartPayload.DeltaType, hookMessageDeltaTypeFull) + } + if messageStartPayload.Text != "reply" { + t.Fatalf("message.start text = %q, want %q", messageStartPayload.Text, "reply") + } + if messageDeltaPayload.MessageID != messageStartPayload.MessageID { + t.Fatalf("message.delta message id = %q, want %q", messageDeltaPayload.MessageID, messageStartPayload.MessageID) + } + if messageDeltaPayload.DeltaType != hookMessageDeltaTypeText { + t.Fatalf("message.delta delta type = %q, want %q", messageDeltaPayload.DeltaType, hookMessageDeltaTypeText) + } + if messageEndPayload.MessageID != messageStartPayload.MessageID { + t.Fatalf("message.end message id = %q, want %q", messageEndPayload.MessageID, messageStartPayload.MessageID) + } + if messageEndPayload.Text != "reply" { + t.Fatalf("message.end text = %q, want %q", messageEndPayload.Text, "reply") + } + if turnEndPayload.TurnID != turnStartPayload.TurnID { + t.Fatalf("turn.end turn id = %q, want %q", turnEndPayload.TurnID, turnStartPayload.TurnID) + } +} + +func TestMessageStartPatchUpdatesFirstAssistantChunk(t *testing.T) { + t.Parallel() + + patched := "patched reply" + hooks := newNativeHookDispatcher(t, + []hookspkg.HookDecl{{ + Name: "patch-message-start", + Event: hookspkg.HookMessageStart, + Mode: hookspkg.HookModeSync, + ExecutorKind: hookspkg.HookExecutorNative, + }}, + map[string]hookspkg.Executor{ + "patch-message-start": hookspkg.NewTypedNativeExecutor(func(_ context.Context, _ hookspkg.RegisteredHook, _ hookspkg.MessageStartPayload) (hookspkg.MessageStartPatch, error) { + return hookspkg.MessageStartPatch{Text: &patched}, nil + }), + }, + ) + + h := newHarness(t, WithHookDispatcher(hooks)) + session := createSession(t, h) + t.Cleanup(func() { + _ = h.manager.Stop(testutil.Context(t), session.ID) + }) + + eventsCh, err := h.manager.Prompt(testutil.Context(t), session.ID, "hello") + if err != nil { + t.Fatalf("Prompt() error = %v", err) + } + events := collectEvents(t, eventsCh) + if len(events) != 2 { + t.Fatalf("len(events) = %d, want 2", len(events)) + } + if events[0].Text != patched { + t.Fatalf("first event text = %q, want %q", events[0].Text, patched) + } + + stored, err := session.recorderHandle().Query(testutil.Context(t), store.EventQuery{}) + if err != nil { + t.Fatalf("Query() error = %v", err) + } + if len(stored) < 2 { + t.Fatalf("stored events = %d, want at least 2", len(stored)) + } + if !strings.Contains(stored[1].Content, patched) { + t.Fatalf("stored assistant content = %q, want patched reply", stored[1].Content) + } +} + +func TestMessageDeltaAsyncHooksDoNotBlockPromptStreaming(t *testing.T) { + t.Parallel() + + started := make(chan struct{}, 1) + release := make(chan struct{}) + + hooks := hookspkg.NewHooks( + hookspkg.WithLogger(slog.New(slog.NewTextHandler(io.Discard, nil))), + hookspkg.WithAsyncWorkerCount(1), + hookspkg.WithAsyncQueueCapacity(1), + hookspkg.WithNativeDeclarations([]hookspkg.HookDecl{{ + Name: "observe-message-delta", + Event: hookspkg.HookMessageDelta, + Mode: hookspkg.HookModeAsync, + ExecutorKind: hookspkg.HookExecutorNative, + }}), + hookspkg.WithExecutorResolver(func(decl hookspkg.HookDecl) (hookspkg.Executor, error) { + if strings.TrimSpace(decl.Name) != "observe-message-delta" { + return nil, errors.New("unexpected hook name") + } + return hookspkg.NewTypedNativeExecutor(func(_ context.Context, _ hookspkg.RegisteredHook, _ hookspkg.MessageDeltaPayload) (hookspkg.MessageDeltaPatch, error) { + select { + case started <- struct{}{}: + default: + } + <-release + return hookspkg.MessageDeltaPatch{}, nil + }), nil + }), + ) + if err := hooks.Rebuild(testutil.Context(t)); err != nil { + t.Fatalf("Rebuild() error = %v", err) + } + t.Cleanup(hooks.Close) + + h := newHarness(t, WithHookDispatcher(hooks)) + session := createSession(t, h) + t.Cleanup(func() { + _ = h.manager.Stop(testutil.Context(t), session.ID) + }) + + eventsCh, err := h.manager.Prompt(testutil.Context(t), session.ID, "hello") + if err != nil { + t.Fatalf("Prompt() error = %v", err) + } + + select { + case event, ok := <-eventsCh: + if !ok { + t.Fatal("first prompt event channel read closed early, want agent message") + } + if event.Type != acp.EventTypeAgentMessage { + t.Fatalf("first prompt event type = %q, want %q", event.Type, acp.EventTypeAgentMessage) + } + case <-time.After(250 * time.Millisecond): + t.Fatal("timed out waiting for first prompt event; message.delta hook blocked streaming") + } + + select { + case event, ok := <-eventsCh: + if !ok { + t.Fatal("second prompt event channel read closed early, want done event") + } + if event.Type != acp.EventTypeDone { + t.Fatalf("second prompt event type = %q, want %q", event.Type, acp.EventTypeDone) + } + case <-time.After(250 * time.Millisecond): + t.Fatal("timed out waiting for done event; message.delta hook blocked prompt completion") + } + + select { + case _, ok := <-eventsCh: + if ok { + t.Fatal("prompt event channel still open after done event") + } + case <-time.After(250 * time.Millisecond): + t.Fatal("timed out waiting for prompt event channel close") + } + + select { + case <-started: + case <-time.After(time.Second): + t.Fatal("timed out waiting for async message.delta hook to start") + } + close(release) +} + +func TestContextCompactionDispatchesHooksAndUsesPatchedParams(t *testing.T) { + t.Parallel() + + session := &Session{ + ID: "sess-context", + AgentName: "coder", + Workspace: "/tmp/workspace", + WorkspaceID: "ws-context", + Type: SessionTypeUser, + State: StateActive, + } + + var ( + prePayload hookspkg.ContextPreCompactPayload + compactPayload hookspkg.ContextPreCompactPayload + postPayload hookspkg.ContextPostCompactPayload + ) + + dispatcher := &spyHookDispatcher{ + dispatchContextPreCompactFn: func(_ context.Context, payload hookspkg.ContextPreCompactPayload) (hookspkg.ContextPreCompactPayload, error) { + prePayload = payload + patchedReason := "token_limit" + patchedStrategy := "summary" + payload.Reason = patchedReason + payload.Strategy = patchedStrategy + payload.ContextBlocks = []hookspkg.ContextBlock{{Kind: "summary", Text: "patched"}} + return payload, nil + }, + dispatchContextPostCompactFn: func(_ context.Context, payload hookspkg.ContextPostCompactPayload) (hookspkg.ContextPostCompactPayload, error) { + postPayload = payload + return payload, nil + }, + } + + h := newHarness(t, WithHookDispatcher(dispatcher)) + result, err := h.manager.runContextCompaction( + testutil.Context(t), + session, + "turn-compact", + "manual", + "noop", + "", + []hookspkg.ContextBlock{{Kind: "note", Text: "before"}}, + func(_ context.Context, payload hookspkg.ContextPreCompactPayload) (hookspkg.ContextPostCompactPayload, error) { + compactPayload = payload + return hookspkg.ContextPostCompactPayload{ + Summary: "after", + ContextBlocks: []hookspkg.ContextBlock{{Kind: "summary", Text: "after"}}, + }, nil + }, + ) + if err != nil { + t.Fatalf("runContextCompaction() error = %v", err) + } + + if prePayload.Reason != "manual" || prePayload.Strategy != "noop" { + t.Fatalf("pre-compaction payload = %#v, want original reason/strategy", prePayload) + } + if compactPayload.Reason != "token_limit" || compactPayload.Strategy != "summary" { + t.Fatalf("compaction payload = %#v, want patched reason/strategy", compactPayload) + } + if len(compactPayload.ContextBlocks) != 1 || compactPayload.ContextBlocks[0].Text != "patched" { + t.Fatalf("compaction context blocks = %#v, want patched blocks", compactPayload.ContextBlocks) + } + if postPayload.Summary != "after" { + t.Fatalf("post-compaction summary = %q, want %q", postPayload.Summary, "after") + } + if postPayload.Reason != "token_limit" || postPayload.Strategy != "summary" { + t.Fatalf("post-compaction reason/strategy = %#v, want patched values", postPayload) + } + if result.Summary != "after" { + t.Fatalf("result summary = %q, want %q", result.Summary, "after") + } +} + +func newNativeHookDispatcher(t *testing.T, decls []hookspkg.HookDecl, executors map[string]hookspkg.Executor) *hookspkg.Hooks { + t.Helper() + + hooks := hookspkg.NewHooks( + hookspkg.WithLogger(slog.New(slog.NewTextHandler(io.Discard, nil))), + hookspkg.WithNativeDeclarations(decls), + hookspkg.WithExecutorResolver(func(decl hookspkg.HookDecl) (hookspkg.Executor, error) { + executor := executors[strings.TrimSpace(decl.Name)] + if executor == nil { + return nil, errors.New("missing native executor") + } + return executor, nil + }), + ) + if err := hooks.Rebuild(testutil.Context(t)); err != nil { + t.Fatalf("Rebuild() error = %v", err) + } + t.Cleanup(hooks.Close) + return hooks +} + +type spyHookDispatcher struct { + dispatchSessionPreCreateFn func(context.Context, hookspkg.SessionPreCreatePayload) (hookspkg.SessionPreCreatePayload, error) + dispatchSessionPostCreateFn func(context.Context, hookspkg.SessionPostCreatePayload) (hookspkg.SessionPostCreatePayload, error) + dispatchSessionPreResumeFn func(context.Context, hookspkg.SessionPreResumePayload) (hookspkg.SessionPreResumePayload, error) + dispatchSessionPostResumeFn func(context.Context, hookspkg.SessionPostResumePayload) (hookspkg.SessionPostResumePayload, error) + dispatchSessionPreStopFn func(context.Context, hookspkg.SessionPreStopPayload) (hookspkg.SessionPreStopPayload, error) + dispatchSessionPostStopFn func(context.Context, hookspkg.SessionPostStopPayload) (hookspkg.SessionPostStopPayload, error) + dispatchInputPreSubmitFn func(context.Context, hookspkg.InputPreSubmitPayload) (hookspkg.InputPreSubmitPayload, error) + dispatchPromptPostAssembleFn func(context.Context, hookspkg.PromptPayload) (hookspkg.PromptPayload, error) + dispatchEventPreRecordFn func(context.Context, hookspkg.EventPreRecordPayload) (hookspkg.EventPreRecordPayload, error) + dispatchEventPostRecordFn func(context.Context, hookspkg.EventPostRecordPayload) (hookspkg.EventPostRecordPayload, error) + dispatchAgentPreStartFn func(context.Context, hookspkg.AgentPreStartPayload) (hookspkg.AgentPreStartPayload, error) + dispatchAgentSpawnedFn func(context.Context, hookspkg.AgentSpawnedPayload) (hookspkg.AgentSpawnedPayload, error) + dispatchAgentCrashedFn func(context.Context, hookspkg.AgentCrashedPayload) (hookspkg.AgentCrashedPayload, error) + dispatchAgentStoppedFn func(context.Context, hookspkg.AgentStoppedPayload) (hookspkg.AgentStoppedPayload, error) + dispatchTurnStartFn func(context.Context, hookspkg.TurnStartPayload) (hookspkg.TurnStartPayload, error) + dispatchTurnEndFn func(context.Context, hookspkg.TurnEndPayload) (hookspkg.TurnEndPayload, error) + dispatchMessageStartFn func(context.Context, hookspkg.MessageStartPayload) (hookspkg.MessageStartPayload, error) + dispatchMessageDeltaFn func(context.Context, hookspkg.MessageDeltaPayload) (hookspkg.MessageDeltaPayload, error) + dispatchMessageEndFn func(context.Context, hookspkg.MessageEndPayload) (hookspkg.MessageEndPayload, error) + dispatchContextPreCompactFn func(context.Context, hookspkg.ContextPreCompactPayload) (hookspkg.ContextPreCompactPayload, error) + dispatchContextPostCompactFn func(context.Context, hookspkg.ContextPostCompactPayload) (hookspkg.ContextPostCompactPayload, error) +} + +func (s *spyHookDispatcher) DispatchSessionPreCreate(ctx context.Context, payload hookspkg.SessionPreCreatePayload) (hookspkg.SessionPreCreatePayload, error) { + if s.dispatchSessionPreCreateFn != nil { + return s.dispatchSessionPreCreateFn(ctx, payload) + } + return payload, nil +} + +func (s *spyHookDispatcher) DispatchSessionPostCreate(ctx context.Context, payload hookspkg.SessionPostCreatePayload) (hookspkg.SessionPostCreatePayload, error) { + if s.dispatchSessionPostCreateFn != nil { + return s.dispatchSessionPostCreateFn(ctx, payload) + } + return payload, nil +} + +func (s *spyHookDispatcher) DispatchSessionPreResume(ctx context.Context, payload hookspkg.SessionPreResumePayload) (hookspkg.SessionPreResumePayload, error) { + if s.dispatchSessionPreResumeFn != nil { + return s.dispatchSessionPreResumeFn(ctx, payload) + } + return payload, nil +} + +func (s *spyHookDispatcher) DispatchSessionPostResume(ctx context.Context, payload hookspkg.SessionPostResumePayload) (hookspkg.SessionPostResumePayload, error) { + if s.dispatchSessionPostResumeFn != nil { + return s.dispatchSessionPostResumeFn(ctx, payload) + } + return payload, nil +} + +func (s *spyHookDispatcher) DispatchSessionPreStop(ctx context.Context, payload hookspkg.SessionPreStopPayload) (hookspkg.SessionPreStopPayload, error) { + if s.dispatchSessionPreStopFn != nil { + return s.dispatchSessionPreStopFn(ctx, payload) + } + return payload, nil +} + +func (s *spyHookDispatcher) DispatchSessionPostStop(ctx context.Context, payload hookspkg.SessionPostStopPayload) (hookspkg.SessionPostStopPayload, error) { + if s.dispatchSessionPostStopFn != nil { + return s.dispatchSessionPostStopFn(ctx, payload) + } + return payload, nil +} + +func (s *spyHookDispatcher) DispatchInputPreSubmit(ctx context.Context, payload hookspkg.InputPreSubmitPayload) (hookspkg.InputPreSubmitPayload, error) { + if s.dispatchInputPreSubmitFn != nil { + return s.dispatchInputPreSubmitFn(ctx, payload) + } + return payload, nil +} + +func (s *spyHookDispatcher) DispatchPromptPostAssemble(ctx context.Context, payload hookspkg.PromptPayload) (hookspkg.PromptPayload, error) { + if s.dispatchPromptPostAssembleFn != nil { + return s.dispatchPromptPostAssembleFn(ctx, payload) + } + return payload, nil +} + +func (s *spyHookDispatcher) DispatchEventPreRecord(ctx context.Context, payload hookspkg.EventPreRecordPayload) (hookspkg.EventPreRecordPayload, error) { + if s.dispatchEventPreRecordFn != nil { + return s.dispatchEventPreRecordFn(ctx, payload) + } + return payload, nil +} + +func (s *spyHookDispatcher) DispatchEventPostRecord(ctx context.Context, payload hookspkg.EventPostRecordPayload) (hookspkg.EventPostRecordPayload, error) { + if s.dispatchEventPostRecordFn != nil { + return s.dispatchEventPostRecordFn(ctx, payload) + } + return payload, nil +} + +func (s *spyHookDispatcher) DispatchAgentPreStart(ctx context.Context, payload hookspkg.AgentPreStartPayload) (hookspkg.AgentPreStartPayload, error) { + if s.dispatchAgentPreStartFn != nil { + return s.dispatchAgentPreStartFn(ctx, payload) + } + return payload, nil +} + +func (s *spyHookDispatcher) DispatchAgentSpawned(ctx context.Context, payload hookspkg.AgentSpawnedPayload) (hookspkg.AgentSpawnedPayload, error) { + if s.dispatchAgentSpawnedFn != nil { + return s.dispatchAgentSpawnedFn(ctx, payload) + } + return payload, nil +} + +func (s *spyHookDispatcher) DispatchAgentCrashed(ctx context.Context, payload hookspkg.AgentCrashedPayload) (hookspkg.AgentCrashedPayload, error) { + if s.dispatchAgentCrashedFn != nil { + return s.dispatchAgentCrashedFn(ctx, payload) + } + return payload, nil +} + +func (s *spyHookDispatcher) DispatchAgentStopped(ctx context.Context, payload hookspkg.AgentStoppedPayload) (hookspkg.AgentStoppedPayload, error) { + if s.dispatchAgentStoppedFn != nil { + return s.dispatchAgentStoppedFn(ctx, payload) + } + return payload, nil +} + +func (s *spyHookDispatcher) DispatchTurnStart(ctx context.Context, payload hookspkg.TurnStartPayload) (hookspkg.TurnStartPayload, error) { + if s.dispatchTurnStartFn != nil { + return s.dispatchTurnStartFn(ctx, payload) + } + return payload, nil +} + +func (s *spyHookDispatcher) DispatchTurnEnd(ctx context.Context, payload hookspkg.TurnEndPayload) (hookspkg.TurnEndPayload, error) { + if s.dispatchTurnEndFn != nil { + return s.dispatchTurnEndFn(ctx, payload) + } + return payload, nil +} + +func (s *spyHookDispatcher) DispatchMessageStart(ctx context.Context, payload hookspkg.MessageStartPayload) (hookspkg.MessageStartPayload, error) { + if s.dispatchMessageStartFn != nil { + return s.dispatchMessageStartFn(ctx, payload) + } + return payload, nil +} + +func (s *spyHookDispatcher) DispatchMessageDelta(ctx context.Context, payload hookspkg.MessageDeltaPayload) (hookspkg.MessageDeltaPayload, error) { + if s.dispatchMessageDeltaFn != nil { + return s.dispatchMessageDeltaFn(ctx, payload) + } + return payload, nil +} + +func (s *spyHookDispatcher) DispatchMessageEnd(ctx context.Context, payload hookspkg.MessageEndPayload) (hookspkg.MessageEndPayload, error) { + if s.dispatchMessageEndFn != nil { + return s.dispatchMessageEndFn(ctx, payload) + } + return payload, nil +} + +func (s *spyHookDispatcher) DispatchContextPreCompact(ctx context.Context, payload hookspkg.ContextPreCompactPayload) (hookspkg.ContextPreCompactPayload, error) { + if s.dispatchContextPreCompactFn != nil { + return s.dispatchContextPreCompactFn(ctx, payload) + } + return payload, nil +} + +func (s *spyHookDispatcher) DispatchContextPostCompact(ctx context.Context, payload hookspkg.ContextPostCompactPayload) (hookspkg.ContextPostCompactPayload, error) { + if s.dispatchContextPostCompactFn != nil { + return s.dispatchContextPostCompactFn(ctx, payload) + } + return payload, nil +} + +type orderedRecorder struct { + onRecord func(store.SessionEvent) + events []store.SessionEvent +} + +func (r *orderedRecorder) Record(_ context.Context, event store.SessionEvent) error { + r.events = append(r.events, event) + if r.onRecord != nil { + r.onRecord(event) + } + return nil +} + +func (r *orderedRecorder) RecordTokenUsage(context.Context, store.TokenUsage) error { + return nil +} + +func (r *orderedRecorder) Query(context.Context, store.EventQuery) ([]store.SessionEvent, error) { + return append([]store.SessionEvent(nil), r.events...), nil +} + +func (r *orderedRecorder) History(context.Context, store.EventQuery) ([]store.TurnHistory, error) { + return nil, nil +} + +func (r *orderedRecorder) Close(context.Context) error { + return nil +} diff --git a/internal/session/manager_integration_test.go b/internal/session/manager_integration_test.go index 3342b4f78..4a70b309c 100644 --- a/internal/session/manager_integration_test.go +++ b/internal/session/manager_integration_test.go @@ -3,9 +3,14 @@ package session import ( + "context" + "errors" + "sync" "testing" + "time" "github.com/pedronauck/agh/internal/acp" + hookspkg "github.com/pedronauck/agh/internal/hooks" "github.com/pedronauck/agh/internal/store" "github.com/pedronauck/agh/internal/store/sessiondb" "github.com/pedronauck/agh/internal/testutil" @@ -116,3 +121,237 @@ func TestManagerIntegrationUsesRealSQLitePerSessionDB(t *testing.T) { t.Fatal("Query(reopen) returned 0 events, want persisted rows") } } + +func TestManagerIntegrationFullLifecycleHooksFireInOrder(t *testing.T) { + var ( + mu sync.Mutex + order []string + ) + record := func(entry string) { + mu.Lock() + defer mu.Unlock() + order = append(order, entry) + } + + dispatcher := &spyHookDispatcher{ + dispatchSessionPreCreateFn: func(_ context.Context, payload hookspkg.SessionPreCreatePayload) (hookspkg.SessionPreCreatePayload, error) { + record("session.pre_create") + return payload, nil + }, + dispatchPromptPostAssembleFn: func(_ context.Context, payload hookspkg.PromptPayload) (hookspkg.PromptPayload, error) { + record("prompt.post_assemble") + return payload, nil + }, + dispatchAgentPreStartFn: func(_ context.Context, payload hookspkg.AgentPreStartPayload) (hookspkg.AgentPreStartPayload, error) { + record("agent.pre_start") + return payload, nil + }, + dispatchAgentSpawnedFn: func(_ context.Context, payload hookspkg.AgentSpawnedPayload) (hookspkg.AgentSpawnedPayload, error) { + record("agent.spawned") + return payload, nil + }, + dispatchSessionPostCreateFn: func(_ context.Context, payload hookspkg.SessionPostCreatePayload) (hookspkg.SessionPostCreatePayload, error) { + record("session.post_create") + return payload, nil + }, + dispatchInputPreSubmitFn: func(_ context.Context, payload hookspkg.InputPreSubmitPayload) (hookspkg.InputPreSubmitPayload, error) { + record("input.pre_submit") + return payload, nil + }, + dispatchTurnStartFn: func(_ context.Context, payload hookspkg.TurnStartPayload) (hookspkg.TurnStartPayload, error) { + record("turn.start") + return payload, nil + }, + dispatchTurnEndFn: func(_ context.Context, payload hookspkg.TurnEndPayload) (hookspkg.TurnEndPayload, error) { + record("turn.end") + return payload, nil + }, + dispatchMessageStartFn: func(_ context.Context, payload hookspkg.MessageStartPayload) (hookspkg.MessageStartPayload, error) { + record("message.start") + return payload, nil + }, + dispatchMessageDeltaFn: func(_ context.Context, payload hookspkg.MessageDeltaPayload) (hookspkg.MessageDeltaPayload, error) { + record("message.delta:" + payload.DeltaType) + return payload, nil + }, + dispatchMessageEndFn: func(_ context.Context, payload hookspkg.MessageEndPayload) (hookspkg.MessageEndPayload, error) { + record("message.end") + return payload, nil + }, + dispatchEventPreRecordFn: func(_ context.Context, payload hookspkg.EventPreRecordPayload) (hookspkg.EventPreRecordPayload, error) { + record("event.pre_record:" + payload.RecordType) + return payload, nil + }, + dispatchEventPostRecordFn: func(_ context.Context, payload hookspkg.EventPostRecordPayload) (hookspkg.EventPostRecordPayload, error) { + record("event.post_record:" + payload.RecordType) + return payload, nil + }, + dispatchSessionPreStopFn: func(_ context.Context, payload hookspkg.SessionPreStopPayload) (hookspkg.SessionPreStopPayload, error) { + record("session.pre_stop") + return payload, nil + }, + dispatchAgentStoppedFn: func(_ context.Context, payload hookspkg.AgentStoppedPayload) (hookspkg.AgentStoppedPayload, error) { + record("agent.stopped") + return payload, nil + }, + dispatchSessionPostStopFn: func(_ context.Context, payload hookspkg.SessionPostStopPayload) (hookspkg.SessionPostStopPayload, error) { + record("session.post_stop") + return payload, nil + }, + } + + h := newHarness(t, WithHookDispatcher(dispatcher)) + + session := createSession(t, h) + eventsCh, err := h.manager.Prompt(testutil.Context(t), session.ID, "hello") + if err != nil { + t.Fatalf("Prompt() error = %v", err) + } + _ = collectEvents(t, eventsCh) + if err := h.manager.Stop(testutil.Context(t), session.ID); err != nil { + t.Fatalf("Stop() error = %v", err) + } + + want := []string{ + "session.pre_create", + "prompt.post_assemble", + "agent.pre_start", + "agent.spawned", + "session.post_create", + "input.pre_submit", + "turn.start", + "event.pre_record:user_message", + "event.post_record:user_message", + "message.start", + "message.delta:text", + "event.pre_record:agent_message", + "event.post_record:agent_message", + "message.end", + "event.pre_record:done", + "event.post_record:done", + "turn.end", + "session.pre_stop", + "event.pre_record:session_stopped", + "event.post_record:session_stopped", + "agent.stopped", + "session.post_stop", + } + + mu.Lock() + got := append([]string(nil), order...) + mu.Unlock() + if !testutil.EqualStringSlices(got, want) { + t.Fatalf("hook order = %#v, want %#v", got, want) + } +} + +func TestManagerIntegrationContextCompactionUsesPatchedParams(t *testing.T) { + reason := "patched-reason" + strategy := "patched-strategy" + postSeen := make(chan hookspkg.ContextPostCompactPayload, 1) + + hooks := newNativeHookDispatcher(t, + []hookspkg.HookDecl{ + { + Name: "context-pre", + Event: hookspkg.HookContextPreCompact, + Mode: hookspkg.HookModeSync, + ExecutorKind: hookspkg.HookExecutorNative, + }, + { + Name: "context-post", + Event: hookspkg.HookContextPostCompact, + Mode: hookspkg.HookModeAsync, + ExecutorKind: hookspkg.HookExecutorNative, + }, + }, + map[string]hookspkg.Executor{ + "context-pre": hookspkg.NewTypedNativeExecutor(func(_ context.Context, _ hookspkg.RegisteredHook, payload hookspkg.ContextPreCompactPayload) (hookspkg.ContextPreCompactPatch, error) { + return hookspkg.ContextPreCompactPatch{ + Reason: &reason, + Strategy: &strategy, + }, nil + }), + "context-post": hookspkg.NewTypedNativeExecutor(func(_ context.Context, _ hookspkg.RegisteredHook, payload hookspkg.ContextPostCompactPayload) (hookspkg.ContextPostCompactPatch, error) { + postSeen <- payload + return hookspkg.ContextPostCompactPatch{}, nil + }), + }, + ) + + h := newHarness(t, WithHookDispatcher(hooks)) + session := createSession(t, h) + t.Cleanup(func() { + _ = h.manager.Stop(testutil.Context(t), session.ID) + }) + + var seen hookspkg.ContextPreCompactPayload + result, err := h.manager.runContextCompaction( + testutil.Context(t), + session, + "turn-context", + "manual", + "noop", + "", + nil, + func(_ context.Context, payload hookspkg.ContextPreCompactPayload) (hookspkg.ContextPostCompactPayload, error) { + seen = payload + return hookspkg.ContextPostCompactPayload{ + Summary: "after", + }, nil + }, + ) + if err != nil { + t.Fatalf("runContextCompaction() error = %v", err) + } + if seen.Reason != reason || seen.Strategy != strategy { + t.Fatalf("compactor saw reason/strategy = %q/%q, want %q/%q", seen.Reason, seen.Strategy, reason, strategy) + } + if result.Reason != reason || result.Strategy != strategy { + t.Fatalf("result reason/strategy = %q/%q, want %q/%q", result.Reason, result.Strategy, reason, strategy) + } + select { + case payload := <-postSeen: + if payload.Reason != reason || payload.Strategy != strategy { + t.Fatalf("post hook saw reason/strategy = %q/%q, want %q/%q", payload.Reason, payload.Strategy, reason, strategy) + } + case <-time.After(time.Second): + t.Fatal("timed out waiting for context.post_compact hook") + } +} + +func TestManagerIntegrationPreStopRequiredHookErrorPreventsCleanStop(t *testing.T) { + hooks := newNativeHookDispatcher(t, + []hookspkg.HookDecl{{ + Name: "required-pre-stop", + Event: hookspkg.HookSessionPreStop, + Mode: hookspkg.HookModeSync, + Required: true, + ExecutorKind: hookspkg.HookExecutorNative, + }}, + map[string]hookspkg.Executor{ + "required-pre-stop": hookspkg.NewTypedNativeExecutor(func(_ context.Context, _ hookspkg.RegisteredHook, _ hookspkg.SessionPreStopPayload) (hookspkg.SessionPreStopPatch, error) { + return hookspkg.SessionPreStopPatch{}, errors.New("required hook failed") + }), + }, + ) + + h := newHarness(t, WithHookDispatcher(hooks)) + session := createSession(t, h) + + err := h.manager.Stop(testutil.Context(t), session.ID) + if err == nil { + t.Fatal("Stop() error = nil, want required pre-stop hook failure") + } + if got := session.Info().State; got != StateActive { + t.Fatalf("session state after failed Stop() = %q, want %q", got, StateActive) + } + if _, ok := h.manager.Get(session.ID); !ok { + t.Fatalf("Get(%q) = missing, want active session after failed stop", session.ID) + } + + h.manager.hooks = nil + if cleanupErr := h.manager.Stop(testutil.Context(t), session.ID); cleanupErr != nil { + t.Fatalf("cleanup Stop() error = %v", cleanupErr) + } +} diff --git a/internal/session/manager_lifecycle.go b/internal/session/manager_lifecycle.go index 349b5200b..ce755db0a 100644 --- a/internal/session/manager_lifecycle.go +++ b/internal/session/manager_lifecycle.go @@ -10,6 +10,7 @@ import ( "github.com/pedronauck/agh/internal/acp" aghconfig "github.com/pedronauck/agh/internal/config" + hookspkg "github.com/pedronauck/agh/internal/hooks" "github.com/pedronauck/agh/internal/store" workspacepkg "github.com/pedronauck/agh/internal/workspace" ) @@ -20,6 +21,11 @@ func (m *Manager) Create(ctx context.Context, opts CreateOpts) (_ *Session, err return nil, errors.New("session: create context is required") } + opts, err = m.dispatchSessionPreCreate(ctx, opts) + if err != nil { + return nil, err + } + resolvedWorkspace, err := m.resolveCreateWorkspace(ctx, opts) if err != nil { return nil, err @@ -34,7 +40,21 @@ func (m *Manager) Create(ctx context.Context, opts CreateOpts) (_ *Session, err if err != nil { return nil, fmt.Errorf("session: resolve workspace agent %q: %w", agentName, err) } - startupPrompt, err := m.startupPrompt(ctx, agentName, agentDef, resolvedWorkspace) + + sessionID := strings.TrimSpace(m.newSessionID()) + if sessionID == "" { + return nil, errors.New("session: session id generator returned empty id") + } + + startupPrompt, err := m.startupPrompt(ctx, hookspkg.SessionContext{ + SessionID: sessionID, + SessionName: strings.TrimSpace(opts.Name), + SessionType: string(normalizeSessionType(opts.Type)), + AgentName: strings.TrimSpace(agentName), + WorkspaceID: strings.TrimSpace(resolvedWorkspace.ID), + Workspace: strings.TrimSpace(resolvedWorkspace.RootDir), + State: string(StateStarting), + }, agentDef, resolvedWorkspace) if err != nil { return nil, err } @@ -50,11 +70,6 @@ func (m *Manager) Create(ctx context.Context, opts CreateOpts) (_ *Session, err return nil, err } - sessionID := strings.TrimSpace(m.newSessionID()) - if sessionID == "" { - return nil, errors.New("session: session id generator returned empty id") - } - if err := m.reserve(sessionID, m.effectiveMaxSessions(resolvedWorkspace.Config)); err != nil { return nil, err } @@ -100,11 +115,7 @@ func (m *Manager) Create(ctx context.Context, opts CreateOpts) (_ *Session, err recorder: recorder, } - if err := m.writeMeta(session); err != nil { - return nil, err - } - - proc, err = m.driver.Start(ctx, acp.StartOpts{ + startOpts := acp.StartOpts{ AgentName: resolved.Name, Command: resolved.Command, Cwd: resolvedWorkspace.RootDir, @@ -112,12 +123,22 @@ func (m *Manager) Create(ctx context.Context, opts CreateOpts) (_ *Session, err MCPServers: startMCPServers, Permissions: m.startPermissions(session.Type, resolved.Permissions), SystemPrompt: resolved.Prompt, - }) + } + startOpts, err = m.dispatchAgentPreStart(ctx, session, resolved, startOpts) + if err != nil { + return nil, err + } + + if err := m.writeMeta(session); err != nil { + return nil, err + } + + proc, err = m.driver.Start(ctx, startOpts) if err != nil { return nil, fmt.Errorf("session: start agent for %q: %w", sessionID, err) } - if err := m.activateAndWatch(ctx, session, proc); err != nil { + if err := m.activateAndWatch(ctx, session, proc, resolved, hookspkg.HookSessionPostCreate); err != nil { return nil, err } @@ -134,6 +155,9 @@ func (m *Manager) Stop(ctx context.Context, id string) error { if err != nil { return err } + if err := m.dispatchSessionPreStop(ctx, session); err != nil { + return err + } writeMeta, promptSetupDone, err := session.prepareStop(m.now()) if err != nil { @@ -187,6 +211,10 @@ func (m *Manager) Resume(ctx context.Context, id string) (_ *Session, err error) if err != nil { return nil, fmt.Errorf("session: read session meta %q: %w", metaPath, err) } + meta, err = m.dispatchSessionPreResume(ctx, meta) + if err != nil { + return nil, err + } resolvedWorkspace, err := m.resolveResumeWorkspace(ctx, meta) if err != nil { @@ -197,7 +225,18 @@ func (m *Manager) Resume(ctx context.Context, id string) (_ *Session, err error) if err != nil { return nil, fmt.Errorf("session: resolve workspace agent %q: %w", meta.AgentName, err) } - startupPrompt, err := m.startupPrompt(ctx, meta.AgentName, agentDef, resolvedWorkspace) + startupPrompt, err := m.startupPrompt(ctx, hookspkg.SessionContext{ + SessionID: strings.TrimSpace(meta.ID), + SessionName: strings.TrimSpace(meta.Name), + SessionType: string(normalizeSessionType(SessionType(meta.SessionType))), + AgentName: strings.TrimSpace(meta.AgentName), + WorkspaceID: strings.TrimSpace(resolvedWorkspace.ID), + Workspace: strings.TrimSpace(resolvedWorkspace.RootDir), + ACPSessionID: strings.TrimSpace(derefString(meta.ACPSessionID)), + State: string(StateStarting), + CreatedAt: meta.CreatedAt, + UpdatedAt: m.now(), + }, agentDef, resolvedWorkspace) if err != nil { return nil, err } @@ -257,11 +296,7 @@ func (m *Manager) Resume(ctx context.Context, id string) (_ *Session, err error) recorder: recorder, } - if err := m.writeMeta(session); err != nil { - return nil, err - } - - proc, err = m.driver.Start(ctx, acp.StartOpts{ + startOpts := acp.StartOpts{ AgentName: resolved.Name, Command: resolved.Command, Cwd: resolvedWorkspace.RootDir, @@ -270,12 +305,22 @@ func (m *Manager) Resume(ctx context.Context, id string) (_ *Session, err error) Permissions: m.startPermissions(session.Type, resolved.Permissions), SystemPrompt: resolved.Prompt, ResumeSessionID: derefString(meta.ACPSessionID), - }) + } + startOpts, err = m.dispatchAgentPreStart(ctx, session, resolved, startOpts) + if err != nil { + return nil, err + } + + if err := m.writeMeta(session); err != nil { + return nil, err + } + + proc, err = m.driver.Start(ctx, startOpts) if err != nil { return nil, fmt.Errorf("session: resume agent for %q: %w", meta.ID, err) } - if err := m.activateAndWatch(ctx, session, proc); err != nil { + if err := m.activateAndWatch(ctx, session, proc, resolved, hookspkg.HookSessionPostResume); err != nil { return nil, err } @@ -336,6 +381,8 @@ func (m *Manager) finalizeStopped(ctx context.Context, session *Session, waitErr } if waitErr != nil { + m.dispatchAgentCrashed(ctx, session, session.processHandle(), waitErr) + stderr := "" if proc := session.processHandle(); proc != nil { stderr = proc.Stderr() @@ -375,6 +422,8 @@ func (m *Manager) finalizeStopped(ctx context.Context, session *Session, waitErr m.notifier.OnAgentEvent(ctx, session.ID, normalizedStop) } + m.dispatchAgentStopped(ctx, session, session.processHandle(), waitErr) + if recorder := session.recorderHandle(); recorder != nil { func() { closeCtx, cancel := context.WithTimeout(context.Background(), defaultLifecycleTimeout) @@ -394,6 +443,7 @@ func (m *Manager) finalizeStopped(ctx context.Context, session *Session, waitErr } m.remove(session.ID) + m.dispatchSessionPostStop(ctx, session) if m.notifier != nil { m.notifier.OnSessionStopped(ctx, session) } diff --git a/internal/session/manager_prompt.go b/internal/session/manager_prompt.go index 8be8b481f..dee77fa58 100644 --- a/internal/session/manager_prompt.go +++ b/internal/session/manager_prompt.go @@ -5,6 +5,7 @@ import ( "errors" "fmt" "strings" + "time" "github.com/pedronauck/agh/internal/acp" "github.com/pedronauck/agh/internal/store" @@ -32,6 +33,15 @@ func (m *Manager) Prompt(ctx context.Context, id string, msg string) (<-chan acp turnID = newID("turn") } + message, err = m.dispatchInputPreSubmit(ctx, session, turnID, message) + if err != nil { + return nil, err + } + turnState := newPromptTurnDispatchState(session, turnID, hookInputClassUserMessage, message) + if err := m.dispatchTurnStart(ctx, turnState); err != nil { + return nil, err + } + proc, err := session.beginPromptSetup() if err != nil { return nil, err @@ -58,7 +68,7 @@ func (m *Manager) Prompt(ctx context.Context, id string, msg string) (<-chan acp out := make(chan acp.AgentEvent, m.promptBufSize) // pumpPrompt terminates when the driver closes the source channel or the request context ends. - go m.pumpPrompt(ctx, session, turnID, source, out) + go m.pumpPrompt(ctx, session, turnState, source, out) return out, nil } @@ -100,8 +110,12 @@ func (m *Manager) ApprovePermission(ctx context.Context, id string, req acp.Appr return nil } -func (m *Manager) pumpPrompt(ctx context.Context, session *Session, turnID string, source <-chan acp.AgentEvent, out chan<- acp.AgentEvent) { +func (m *Manager) pumpPrompt(ctx context.Context, session *Session, turnState *promptTurnDispatchState, source <-chan acp.AgentEvent, out chan<- acp.AgentEvent) { defer close(out) + defer func() { + m.finishPromptMessage(ctx, turnState, time.Time{}) + m.dispatchTurnEnd(ctx, turnState, time.Time{}) + }() for { var ( @@ -117,9 +131,10 @@ func (m *Manager) pumpPrompt(ctx context.Context, session *Session, turnID strin } } - normalized := m.normalizeEvent(session, turnID, event) + normalized := m.normalizeEvent(session, turnState.turnID, event) + normalized = m.preparePromptEvent(ctx, turnState, normalized) if err := m.recordEvent(ctx, session, normalized); err != nil { - m.sessionLogger(session).Warn("session: record prompt event failed", "turn_id", turnID, "error", err) + m.sessionLogger(session).Warn("session: record prompt event failed", "turn_id", turnState.turnID, "error", err) } if m.notifier != nil { m.notifier.OnAgentEvent(ctx, session.ID, normalized) @@ -130,6 +145,11 @@ func (m *Manager) pumpPrompt(ctx context.Context, session *Session, turnID strin case <-ctx.Done(): return } + + if normalized.Type == acp.EventTypeDone || normalized.Type == acp.EventTypeError { + m.dispatchTurnEnd(ctx, turnState, normalized.Timestamp) + return + } } } @@ -161,6 +181,8 @@ func (m *Manager) recordEvent(ctx context.Context, session *Session, event acp.A return err } + m.dispatchEventPreRecord(ctx, session, event, payload) + if err := recorder.Record(ctx, store.SessionEvent{ TurnID: event.TurnID, Type: event.Type, @@ -190,6 +212,8 @@ func (m *Manager) recordEvent(ctx context.Context, session *Session, event acp.A } } + m.dispatchEventPostRecord(ctx, session, event, payload) + return nil } diff --git a/internal/session/manager_test.go b/internal/session/manager_test.go index 46682581a..477f73ef7 100644 --- a/internal/session/manager_test.go +++ b/internal/session/manager_test.go @@ -16,6 +16,7 @@ import ( "github.com/pedronauck/agh/internal/acp" aghconfig "github.com/pedronauck/agh/internal/config" + hookspkg "github.com/pedronauck/agh/internal/hooks" skillspkg "github.com/pedronauck/agh/internal/skills" "github.com/pedronauck/agh/internal/store" "github.com/pedronauck/agh/internal/store/sessiondb" @@ -246,7 +247,7 @@ func TestActivateAndWatchUpdatesStateAndStartsWatcher(t *testing.T) { t.Fatalf("Start() error = %v", err) } - if err := h.manager.activateAndWatch(testutil.Context(t), session, proc); err != nil { + if err := h.manager.activateAndWatch(testutil.Context(t), session, proc, aghconfig.ResolvedAgent{Name: "coder"}, hookspkg.HookSessionPostCreate); err != nil { t.Fatalf("activateAndWatch() error = %v", err) } @@ -337,7 +338,7 @@ func TestActivateAndWatchRollsBackOnMetaWriteFailure(t *testing.T) { t.Fatalf("Start() error = %v", err) } - if err := h.manager.activateAndWatch(testutil.Context(t), session, proc); err == nil { + if err := h.manager.activateAndWatch(testutil.Context(t), session, proc, aghconfig.ResolvedAgent{Name: "coder"}, hookspkg.HookSessionPostCreate); err == nil { t.Fatal("activateAndWatch() error = nil, want non-nil") } if _, ok := h.manager.Get(session.ID); ok { @@ -394,7 +395,7 @@ func TestPumpPromptReturnsWhenContextIsCanceledWhileWaitingForSource(t *testing. done := make(chan struct{}) go func() { defer close(done) - h.manager.pumpPrompt(ctx, nil, "turn-1", source, out) + h.manager.pumpPrompt(ctx, nil, newPromptTurnDispatchState(nil, "turn-1", hookInputClassUserMessage, ""), source, out) }() cancel() @@ -1384,10 +1385,14 @@ func (n *fakeNotifier) OnSessionStopped(_ context.Context, session *Session) { n.order = append(n.order, "stopped:"+session.ID) } -func (n *fakeNotifier) OnAgentEvent(_ context.Context, sessionID string, event acp.AgentEvent) { +func (n *fakeNotifier) OnAgentEvent(_ context.Context, sessionID string, event any) { n.mu.Lock() defer n.mu.Unlock() - n.events[sessionID] = append(n.events[sessionID], event) + agentEvent, ok := event.(acp.AgentEvent) + if !ok { + return + } + n.events[sessionID] = append(n.events[sessionID], agentEvent) } func (n *fakeNotifier) createdCount() int { diff --git a/internal/skills/hook_decl.go b/internal/skills/hook_decl.go new file mode 100644 index 000000000..cd09b45bb --- /dev/null +++ b/internal/skills/hook_decl.go @@ -0,0 +1,65 @@ +package skills + +import ( + "fmt" + "strings" + + hookspkg "github.com/pedronauck/agh/internal/hooks" +) + +func refreshSkillHookDecls(skill *Skill) { + if skill == nil || len(skill.Hooks) == 0 { + return + } + + normalized := make([]hookspkg.HookDecl, len(skill.Hooks)) + for idx, decl := range skill.Hooks { + normalized[idx] = normalizeSkillHookDecl(skill, decl, idx, len(skill.Hooks)) + } + + skill.Hooks = normalized +} + +func normalizeSkillHookDecl(skill *Skill, decl hookspkg.HookDecl, index int, total int) hookspkg.HookDecl { + normalized := decl + if strings.TrimSpace(normalized.Name) == "" { + normalized.Name = skillHookName(skill, index, total) + } + normalized.Source = hookspkg.HookSourceSkill + normalized.SkillSource = skillHookSource(skillSource(skill)) + return normalized +} + +func skillHookName(skill *Skill, index int, total int) string { + base := skillIdentifier(skill) + if total <= 1 { + return base + } + + return fmt.Sprintf("%s#%d", base, index+1) +} + +func skillHookSource(source SkillSource) hookspkg.HookSkillSource { + switch source { + case SourceBundled: + return hookspkg.HookSkillSourceBundled + case SourceMarketplace: + return hookspkg.HookSkillSourceMarketplace + case SourceUser: + return hookspkg.HookSkillSourceUser + case SourceAdditional: + return hookspkg.HookSkillSourceAdditional + case SourceWorkspace: + return hookspkg.HookSkillSourceWorkspace + default: + return "" + } +} + +func skillSource(skill *Skill) SkillSource { + if skill == nil { + return SourceUser + } + + return skill.Source +} diff --git a/internal/skills/hook_process_unix.go b/internal/skills/hook_process_unix.go deleted file mode 100644 index 27c088530..000000000 --- a/internal/skills/hook_process_unix.go +++ /dev/null @@ -1,40 +0,0 @@ -//go:build !windows - -package skills - -import ( - "errors" - "os/exec" - "syscall" -) - -func configureHookCommand(cmd *exec.Cmd) { - if cmd == nil { - return - } - if cmd.SysProcAttr == nil { - cmd.SysProcAttr = &syscall.SysProcAttr{} - } - cmd.SysProcAttr.Setpgid = true -} - -func terminateHookCommand(cmd *exec.Cmd) error { - return signalHookCommand(cmd, syscall.SIGTERM) -} - -func killHookCommand(cmd *exec.Cmd) error { - return signalHookCommand(cmd, syscall.SIGKILL) -} - -func signalHookCommand(cmd *exec.Cmd, sig syscall.Signal) error { - if cmd == nil || cmd.Process == nil || cmd.Process.Pid <= 0 { - return nil - } - if err := syscall.Kill(-cmd.Process.Pid, sig); err != nil { - if errors.Is(err, syscall.ESRCH) { - return nil - } - return err - } - return nil -} diff --git a/internal/skills/hook_process_windows.go b/internal/skills/hook_process_windows.go deleted file mode 100644 index 69646258e..000000000 --- a/internal/skills/hook_process_windows.go +++ /dev/null @@ -1,32 +0,0 @@ -//go:build windows - -package skills - -import ( - "errors" - "os" - "os/exec" -) - -func configureHookCommand(_ *exec.Cmd) {} - -func terminateHookCommand(cmd *exec.Cmd) error { - return signalHookCommand(cmd, os.Kill) -} - -func killHookCommand(cmd *exec.Cmd) error { - return signalHookCommand(cmd, os.Kill) -} - -func signalHookCommand(cmd *exec.Cmd, sig os.Signal) error { - if cmd == nil || cmd.Process == nil { - return nil - } - if err := cmd.Process.Signal(sig); err != nil { - if errors.Is(err, os.ErrProcessDone) { - return nil - } - return err - } - return nil -} diff --git a/internal/skills/hooks.go b/internal/skills/hooks.go deleted file mode 100644 index 298c26e75..000000000 --- a/internal/skills/hooks.go +++ /dev/null @@ -1,401 +0,0 @@ -package skills - -import ( - "bytes" - "context" - "encoding/json" - "errors" - "fmt" - "log/slog" - "os" - "os/exec" - "sort" - "strings" - "time" - - aghconfig "github.com/pedronauck/agh/internal/config" -) - -const ( - defaultHookTimeout = 5 * time.Second - hookCaptureLimitBytes = 8 * 1024 - hookCaptureTruncateNote = "...[truncated]" - hookShutdownGracePeriod = 250 * time.Millisecond -) - -var hookEnvAllowlist = []string{ - "COMSPEC", - "HOME", - "LANG", - "LC_ALL", - "LC_CTYPE", - "LOGNAME", - "PATH", - "PATHEXT", - "SHELL", - "SYSTEMROOT", - "TEMP", - "TERM", - "TMP", - "TMPDIR", - "USER", - "USERPROFILE", -} - -// HookRunner dispatches subprocess hooks for skill lifecycle events. -type HookRunner struct { - allowedMarketplaceHooks []string - logger *slog.Logger -} - -// HookPayload is the JSON payload written to hook stdin. -type HookPayload struct { - SessionID string `json:"session_id"` - AgentName string `json:"agent_name"` - Workspace string `json:"workspace"` - Event string `json:"event"` -} - -// HookResult captures the outcome of a single hook execution. -type HookResult struct { - SkillName string - Event HookEvent - Output string - Error error - Duration time.Duration -} - -// NewHookRunner constructs a HookRunner with the supplied config and logger. -func NewHookRunner(cfg aghconfig.SkillsConfig, logger *slog.Logger) *HookRunner { - if logger == nil { - logger = slog.Default() - } - - return &HookRunner{ - allowedMarketplaceHooks: cloneStrings(cfg.AllowedMarketplaceHooks), - logger: logger, - } -} - -// RunHooks executes all hooks matching the given event, in precedence order. -func (hr *HookRunner) RunHooks(ctx context.Context, event HookEvent, skills []*Skill, payload HookPayload) []HookResult { - if len(skills) == 0 { - return nil - } - if hr == nil { - hr = NewHookRunner(aghconfig.SkillsConfig{}, nil) - } - if hr.logger == nil { - hr.logger = slog.Default() - } - - ordered := orderSkillsForHooks(skills, event) - if len(ordered) == 0 { - return nil - } - - allowedMarketplace := marketplaceAllowlist(hr.allowedMarketplaceHooks) - payload.Event = string(event) - results := make([]HookResult, 0) - for _, skill := range ordered { - if !marketplaceSkillAllowed(skill, allowedMarketplace) { - hr.logger.Warn( - "blocked hook", - "skill_name", skill.Meta.Name, - "event", event, - "source", skillSourceName(skill.Source), - ) - continue - } - for _, hook := range skill.Hooks { - if hook.Event != event { - continue - } - - result := hr.runHook(ctx, skill, hook, payload) - results = append(results, result) - } - } - - if len(results) == 0 { - return nil - } - - return results -} - -func (hr *HookRunner) runHook(ctx context.Context, skill *Skill, hook HookDecl, payload HookPayload) HookResult { - result := HookResult{ - SkillName: skillName(skill), - Event: hook.Event, - } - - started := time.Now() - - if strings.TrimSpace(hook.Command) == "" { - result.Error = errors.New("hook command is required") - result.Duration = time.Since(started) - hr.logHookFailure(skill, hook, result, nil, nil) - return result - } - - stdinPayload, err := json.Marshal(payload) - if err != nil { - result.Error = fmt.Errorf("marshal hook payload: %w", err) - result.Duration = time.Since(started) - hr.logHookFailure(skill, hook, result, nil, nil) - return result - } - - timeout := hookTimeout(hook.Timeout) - hookCtx, cancel := context.WithTimeout(ctx, timeout) - defer cancel() - - cmd := exec.Command(hook.Command, hook.Args...) - configureHookCommand(cmd) - cmd.Dir = hookCommandDir(skill) - cmd.Stdin = bytes.NewReader(stdinPayload) - cmd.Env = hookProcessEnv(hook.Env) - - stdout := newHookCapture() - stderr := newHookCapture() - cmd.Stdout = stdout - cmd.Stderr = stderr - - err = runHookCommand(hookCtx, cmd) - result.Output = stdout.String() - if err == nil { - result.Duration = time.Since(started) - return result - } - - result.Error = hookRunError(hookCtx, timeout, err, stderr) - result.Duration = time.Since(started) - hr.logHookFailure(skill, hook, result, stdout, stderr) - return result -} - -func runHookCommand(ctx context.Context, cmd *exec.Cmd) error { - if err := cmd.Start(); err != nil { - return err - } - - waitCh := make(chan error, 1) - go func() { - waitCh <- cmd.Wait() - }() - - select { - case err := <-waitCh: - return err - case <-ctx.Done(): - _ = terminateHookCommand(cmd) - timer := time.NewTimer(hookShutdownGracePeriod) - defer timer.Stop() - - select { - case err := <-waitCh: - return err - case <-timer.C: - _ = killHookCommand(cmd) - return <-waitCh - } - } -} - -func (hr *HookRunner) logHookFailure(skill *Skill, hook HookDecl, result HookResult, stdout hookCapture, stderr hookCapture) { - hr.logger.Warn( - "hook execution failed", - "skill_name", result.SkillName, - "event", hook.Event, - "source", skillSourceName(skill.Source), - "command", hook.Command, - "duration", result.Duration, - "stdout", hookCaptureSummary(stdout), - "stderr", hookCaptureSummary(stderr), - "error", result.Error, - ) -} - -func orderSkillsForHooks(skills []*Skill, event HookEvent) []*Skill { - ordered := make([]*Skill, 0, len(skills)) - for _, skill := range skills { - if !skillHasHookEvent(skill, event) { - continue - } - ordered = append(ordered, skill) - } - - sort.SliceStable(ordered, func(i, j int) bool { - left := ordered[i] - right := ordered[j] - if left.Source != right.Source { - return left.Source < right.Source - } - - leftKey := strings.ToLower(skillName(left)) - rightKey := strings.ToLower(skillName(right)) - if leftKey != rightKey { - return leftKey < rightKey - } - - leftName := skillName(left) - rightName := skillName(right) - if leftName != rightName { - return leftName < rightName - } - - return left.FilePath < right.FilePath - }) - - return ordered -} - -func skillHasHookEvent(skill *Skill, event HookEvent) bool { - if skill == nil { - return false - } - - for _, hook := range skill.Hooks { - if hook.Event == event { - return true - } - } - - return false -} - -func skillName(skill *Skill) string { - if skill == nil { - return "" - } - - return strings.TrimSpace(skill.Meta.Name) -} - -func hookCommandDir(skill *Skill) string { - if skill == nil { - return "" - } - - return strings.TrimSpace(skill.Dir) -} - -func hookTimeout(timeout time.Duration) time.Duration { - if timeout > 0 { - return timeout - } - - return defaultHookTimeout -} - -func hookProcessEnv(env map[string]string) []string { - merged := make(map[string]string, len(hookEnvAllowlist)+len(env)) - for _, key := range hookEnvAllowlist { - if value, ok := os.LookupEnv(key); ok { - merged[key] = value - } - } - for key, value := range env { - merged[key] = value - } - - keys := make([]string, 0, len(merged)) - for key := range merged { - keys = append(keys, key) - } - sort.Strings(keys) - - values := make([]string, 0, len(keys)) - for _, key := range keys { - values = append(values, key+"="+merged[key]) - } - - return values -} - -func hookRunError(ctx context.Context, timeout time.Duration, err error, stderr hookCapture) error { - if errors.Is(ctx.Err(), context.DeadlineExceeded) { - return fmt.Errorf("hook timed out after %s: %w", timeout, ctx.Err()) - } - if errors.Is(ctx.Err(), context.Canceled) { - return fmt.Errorf("hook canceled: %w", ctx.Err()) - } - - if stderr.Len() == 0 { - return fmt.Errorf("hook command failed: %w", err) - } - - return fmt.Errorf("hook command failed: %w (%s)", err, hookCaptureSummary(stderr)) -} - -type hookCapture interface { - Write([]byte) (int, error) - String() string - Len() int - Truncated() bool -} - -type limitedHookCapture struct { - buf bytes.Buffer - truncated bool -} - -func newHookCapture() *limitedHookCapture { - return &limitedHookCapture{} -} - -func (c *limitedHookCapture) Write(p []byte) (int, error) { - if c == nil { - return len(p), nil - } - - remaining := hookCaptureLimitBytes - c.buf.Len() - switch { - case remaining <= 0: - c.truncated = true - case len(p) > remaining: - _, _ = c.buf.Write(p[:remaining]) - c.truncated = true - default: - _, _ = c.buf.Write(p) - } - - return len(p), nil -} - -func (c *limitedHookCapture) String() string { - if c == nil { - return "" - } - - value := c.buf.String() - if !c.truncated { - return value - } - - return value + hookCaptureTruncateNote -} - -func (c *limitedHookCapture) Len() int { - if c == nil { - return 0 - } - - return c.buf.Len() -} - -func (c *limitedHookCapture) Truncated() bool { - return c != nil && c.truncated -} - -func hookCaptureSummary(capture hookCapture) string { - if capture == nil || capture.Len() == 0 { - return "redacted output (0 bytes)" - } - if capture.Truncated() { - return fmt.Sprintf("redacted output (%d+ bytes, truncated)", capture.Len()) - } - - return fmt.Sprintf("redacted output (%d bytes)", capture.Len()) -} diff --git a/internal/skills/hooks_test.go b/internal/skills/hooks_test.go deleted file mode 100644 index 7556abe5d..000000000 --- a/internal/skills/hooks_test.go +++ /dev/null @@ -1,538 +0,0 @@ -package skills - -import ( - "bytes" - "encoding/json" - "log/slog" - "os" - "path/filepath" - "strconv" - "strings" - "testing" - "time" - - aghconfig "github.com/pedronauck/agh/internal/config" - "github.com/pedronauck/agh/internal/procutil" -) - -func TestHookRunnerRunHooksReturnsEmptyForNoSkills(t *testing.T) { - t.Parallel() - - runner, logs := newHookRunnerForTest() - if got := runner.RunHooks(t.Context(), HookSessionCreated, nil, HookPayload{}); len(got) != 0 { - t.Fatalf("RunHooks(nil) len = %d, want 0", len(got)) - } - if got := runner.RunHooks(t.Context(), HookSessionCreated, []*Skill{}, HookPayload{}); len(got) != 0 { - t.Fatalf("RunHooks(empty) len = %d, want 0", len(got)) - } - if logs.Len() != 0 { - t.Fatalf("logs = %q, want empty logs", logs.String()) - } -} - -func TestHookRunnerRunHooksFiltersEventAndCapturesPayload(t *testing.T) { - t.Parallel() - - runner, logs := newHookRunnerForTest() - script := hookScriptPath(t) - - results := runner.RunHooks(t.Context(), HookSessionCreated, []*Skill{ - newSkillWithHook("created-skill", SourceUser, HookDecl{ - Event: HookSessionCreated, - Command: "/bin/sh", - Args: []string{script}, - Env: map[string]string{ - "HOOK_TEST_OUTPUT_MODE": "combined", - "HOOK_TEST_CUSTOM_ENV": "custom-value", - }, - }), - newSkillWithHook("stopped-skill", SourceUser, HookDecl{ - Event: HookSessionStopped, - Command: "/bin/sh", - Args: []string{script}, - Env: map[string]string{ - "HOOK_TEST_OUTPUT": "should-not-run", - }, - }), - }, HookPayload{ - SessionID: "session-123", - AgentName: "codex", - Workspace: "/tmp/workspace", - Event: "ignored-input-event", - }) - - if len(results) != 1 { - t.Fatalf("RunHooks() len = %d, want 1", len(results)) - } - - result := results[0] - if result.SkillName != "created-skill" { - t.Fatalf("result.SkillName = %q, want %q", result.SkillName, "created-skill") - } - if result.Event != HookSessionCreated { - t.Fatalf("result.Event = %q, want %q", result.Event, HookSessionCreated) - } - if result.Error != nil { - t.Fatalf("result.Error = %v, want nil", result.Error) - } - if result.Duration <= 0 { - t.Fatalf("result.Duration = %s, want > 0", result.Duration) - } - - payloadJSON, envValue, ok := strings.Cut(result.Output, "|") - if !ok { - t.Fatalf("result.Output = %q, want payload|env", result.Output) - } - if envValue != "custom-value" { - t.Fatalf("env value = %q, want %q", envValue, "custom-value") - } - - var payload HookPayload - if err := json.Unmarshal([]byte(payloadJSON), &payload); err != nil { - t.Fatalf("json.Unmarshal(%q): %v", payloadJSON, err) - } - - if payload.SessionID != "session-123" { - t.Fatalf("payload.SessionID = %q, want %q", payload.SessionID, "session-123") - } - if payload.AgentName != "codex" { - t.Fatalf("payload.AgentName = %q, want %q", payload.AgentName, "codex") - } - if payload.Workspace != "/tmp/workspace" { - t.Fatalf("payload.Workspace = %q, want %q", payload.Workspace, "/tmp/workspace") - } - if payload.Event != string(HookSessionCreated) { - t.Fatalf("payload.Event = %q, want %q", payload.Event, HookSessionCreated) - } - if logs.Len() != 0 { - t.Fatalf("logs = %q, want empty logs", logs.String()) - } -} - -func TestHookRunnerRunHooksOrdersBySourceAndSkillName(t *testing.T) { - t.Parallel() - - runner, logs := newHookRunnerForTest(aghconfig.SkillsConfig{ - AllowedMarketplaceHooks: []string{"@test/marketplace-skill"}, - }) - script := hookScriptPath(t) - - results := runner.RunHooks(t.Context(), HookSessionCreated, []*Skill{ - newSkillWithHook("workspace-skill", SourceWorkspace, hookOutput(script, "workspace-skill")), - newSkillWithHook("beta-user", SourceUser, hookOutput(script, "beta-user")), - newSkillWithHook("bundled-skill", SourceBundled, hookOutput(script, "bundled-skill")), - newSkillWithHook("marketplace-skill", SourceMarketplace, hookOutput(script, "marketplace-skill")), - newSkillWithHook("additional-skill", SourceAdditional, hookOutput(script, "additional-skill")), - newSkillWithHook("alpha-user", SourceUser, hookOutput(script, "alpha-user")), - }, HookPayload{}) - - got := make([]string, 0, len(results)) - for _, result := range results { - got = append(got, result.Output) - if result.Error != nil { - t.Fatalf("result for %q error = %v, want nil", result.SkillName, result.Error) - } - } - - want := []string{ - "bundled-skill", - "marketplace-skill", - "alpha-user", - "beta-user", - "additional-skill", - "workspace-skill", - } - if strings.Join(got, ",") != strings.Join(want, ",") { - t.Fatalf("hook order = %#v, want %#v", got, want) - } - if logs.Len() != 0 { - t.Fatalf("logs = %q, want empty logs", logs.String()) - } -} - -func TestHookRunnerRunHooksRequiresDedicatedMarketplaceHookConsent(t *testing.T) { - t.Parallel() - - runner, logs := newHookRunnerForTest(aghconfig.SkillsConfig{ - AllowedMarketplaceMCP: []string{"@test/marketplace-skill"}, - }) - script := hookScriptPath(t) - - results := runner.RunHooks(t.Context(), HookSessionCreated, []*Skill{ - newSkillWithHook("marketplace-skill", SourceMarketplace, hookOutput(script, "marketplace-skill")), - }, HookPayload{}) - - if len(results) != 0 { - t.Fatalf("RunHooks() len = %d, want marketplace hook blocked without hook consent", len(results)) - } - - output := logs.String() - if !strings.Contains(output, "blocked hook") { - t.Fatalf("logs = %q, want blocked hook warning", output) - } -} - -func TestHookRunnerRunHooksUsesProvenanceSlugForMarketplaceConsent(t *testing.T) { - t.Parallel() - - runner, logs := newHookRunnerForTest(aghconfig.SkillsConfig{ - AllowedMarketplaceHooks: []string{"@registry/real-skill"}, - }) - script := hookScriptPath(t) - - skill := newSkillWithHook("spoofed-display-name", SourceMarketplace, hookOutput(script, "approved")) - skill.Provenance = &Provenance{ - Hash: "hash-real-skill", - Registry: "clawhub", - Slug: "@registry/real-skill", - } - - results := runner.RunHooks(t.Context(), HookSessionCreated, []*Skill{skill}, HookPayload{}) - if len(results) != 1 { - t.Fatalf("RunHooks() len = %d, want 1 approved marketplace hook", len(results)) - } - if got, want := results[0].Output, "approved"; got != want { - t.Fatalf("results[0].Output = %q, want %q", got, want) - } - if logs.Len() != 0 { - t.Fatalf("logs = %q, want empty logs", logs.String()) - } -} - -func TestHookRunnerRunHooksBlocksMarketplaceHooksWithoutConsent(t *testing.T) { - t.Parallel() - - runner, logs := newHookRunnerForTest() - script := hookScriptPath(t) - - results := runner.RunHooks(t.Context(), HookSessionCreated, []*Skill{ - newSkillWithHook("marketplace-skill", SourceMarketplace, hookOutput(script, "marketplace-skill")), - newSkillWithHook("user-skill", SourceUser, hookOutput(script, "user-skill")), - }, HookPayload{}) - - if len(results) != 1 { - t.Fatalf("RunHooks() len = %d, want 1 allowed hook result", len(results)) - } - if got, want := results[0].SkillName, "user-skill"; got != want { - t.Fatalf("results[0].SkillName = %q, want %q", got, want) - } - if got, want := results[0].Output, "user-skill"; got != want { - t.Fatalf("results[0].Output = %q, want %q", got, want) - } - - output := logs.String() - if !strings.Contains(output, "blocked hook") { - t.Fatalf("logs = %q, want blocked hook warning", output) - } - if !strings.Contains(output, "skill_name=marketplace-skill") { - t.Fatalf("logs = %q, want blocked marketplace skill name", output) - } -} - -func TestHookRunnerRunHooksUsesSkillDirForRelativeFileAccess(t *testing.T) { - t.Parallel() - - skillDir := t.TempDir() - relativeFile := filepath.Join(skillDir, "message.txt") - if err := os.WriteFile(relativeFile, []byte("from-skill-dir"), 0o644); err != nil { - t.Fatalf("WriteFile(%q) error = %v", relativeFile, err) - } - - runner, logs := newHookRunnerForTest() - results := runner.RunHooks(t.Context(), HookSessionCreated, []*Skill{ - { - Meta: SkillMeta{ - Name: "relative-file-skill", - Description: "test skill", - }, - Dir: skillDir, - Source: SourceUser, - Hooks: []HookDecl{{ - Event: HookSessionCreated, - Command: "/bin/sh", - Args: []string{"-c", "cat message.txt"}, - }}, - }, - }, HookPayload{}) - - if len(results) != 1 { - t.Fatalf("RunHooks() len = %d, want 1", len(results)) - } - if results[0].Error != nil { - t.Fatalf("results[0].Error = %v, want nil", results[0].Error) - } - if got, want := results[0].Output, "from-skill-dir"; got != want { - t.Fatalf("results[0].Output = %q, want %q", got, want) - } - if logs.Len() != 0 { - t.Fatalf("logs = %q, want empty logs", logs.String()) - } -} - -func TestHookRunnerRunHooksFailsOpenOnHookError(t *testing.T) { - t.Parallel() - - runner, logs := newHookRunnerForTest() - script := hookScriptPath(t) - - results := runner.RunHooks(t.Context(), HookSessionCreated, []*Skill{ - newSkillWithHook("failing-skill", SourceUser, HookDecl{ - Event: HookSessionCreated, - Command: "/bin/sh", - Args: []string{script}, - Env: map[string]string{ - "HOOK_TEST_OUTPUT": "before-exit", - "HOOK_TEST_EXIT_CODE": "7", - "HOOK_TEST_STDERR": "hook failed", - }, - }), - newSkillWithHook("after-failure", SourceWorkspace, hookOutput(script, "after-failure")), - }, HookPayload{}) - - if len(results) != 2 { - t.Fatalf("RunHooks() len = %d, want 2", len(results)) - } - if results[0].Error == nil { - t.Fatal("results[0].Error = nil, want hook failure") - } - if !strings.Contains(results[0].Output, "before-exit") { - t.Fatalf("results[0].Output = %q, want captured stdout", results[0].Output) - } - if results[1].Error != nil { - t.Fatalf("results[1].Error = %v, want nil", results[1].Error) - } - if results[1].Output != "after-failure" { - t.Fatalf("results[1].Output = %q, want %q", results[1].Output, "after-failure") - } - - output := logs.String() - if !strings.Contains(output, "level=WARN") { - t.Fatalf("logs = %q, want warn log", output) - } - if !strings.Contains(output, "skill_name=failing-skill") { - t.Fatalf("logs = %q, want failing skill name", output) - } - if !strings.Contains(output, "event=on_session_created") { - t.Fatalf("logs = %q, want event field", output) - } - if strings.Contains(output, "before-exit") { - t.Fatalf("logs = %q, want stdout redacted from failure logs", output) - } - if strings.Contains(output, "hook failed") { - t.Fatalf("logs = %q, want stderr redacted from failure logs", output) - } - if !strings.Contains(output, "redacted output") { - t.Fatalf("logs = %q, want redacted output summary", output) - } -} - -func TestHookRunnerRunHooksTimesOut(t *testing.T) { - t.Parallel() - - runner, logs := newHookRunnerForTest() - script := hookScriptPath(t) - - results := runner.RunHooks(t.Context(), HookSessionCreated, []*Skill{ - newSkillWithHook("timeout-skill", SourceUser, HookDecl{ - Event: HookSessionCreated, - Command: "/bin/sh", - Args: []string{script}, - Timeout: 250 * time.Millisecond, - Env: map[string]string{ - "HOOK_TEST_BUSY_LOOP": "1", - }, - }), - }, HookPayload{}) - - if len(results) != 1 { - t.Fatalf("RunHooks() len = %d, want 1", len(results)) - } - if results[0].Error == nil { - t.Fatal("results[0].Error = nil, want timeout error") - } - if !strings.Contains(results[0].Error.Error(), "timed out") { - t.Fatalf("results[0].Error = %v, want timeout message", results[0].Error) - } - - output := logs.String() - if !strings.Contains(output, "level=WARN") { - t.Fatalf("logs = %q, want warn log", output) - } - if !strings.Contains(output, "skill_name=timeout-skill") { - t.Fatalf("logs = %q, want timeout skill name", output) - } -} - -func TestHookRunnerRunHooksTerminatesDescendantProcessesOnTimeout(t *testing.T) { - skillDir := t.TempDir() - pidFile := filepath.Join(skillDir, "child.pid") - scriptPath := filepath.Join(skillDir, "spawn-child.sh") - script := strings.Join([]string{ - "#!/bin/sh", - "set -eu", - "/bin/sh -c 'while :; do :; done' &", - "child=$!", - "printf '%s' \"$child\" > \"$HOOK_TEST_CHILD_PID_FILE\"", - "while :; do :; done", - }, "\n") - if err := os.WriteFile(scriptPath, []byte(script), 0o755); err != nil { - t.Fatalf("WriteFile(%q) error = %v", scriptPath, err) - } - - runner, _ := newHookRunnerForTest() - results := runner.RunHooks(t.Context(), HookSessionCreated, []*Skill{ - { - Meta: SkillMeta{ - Name: "descendant-cleanup-skill", - Description: "test skill", - }, - Dir: skillDir, - Source: SourceUser, - Hooks: []HookDecl{{ - Event: HookSessionCreated, - Command: "/bin/sh", - Args: []string{scriptPath}, - Timeout: 150 * time.Millisecond, - Env: map[string]string{ - "HOOK_TEST_CHILD_PID_FILE": pidFile, - }, - }}, - }, - }, HookPayload{}) - - if len(results) != 1 { - t.Fatalf("RunHooks() len = %d, want 1", len(results)) - } - if results[0].Error == nil { - t.Fatal("results[0].Error = nil, want timeout error") - } - - pidBytes, err := os.ReadFile(pidFile) - if err != nil { - t.Fatalf("ReadFile(%q) error = %v", pidFile, err) - } - pid, err := strconv.Atoi(strings.TrimSpace(string(pidBytes))) - if err != nil { - t.Fatalf("Atoi(%q) error = %v", strings.TrimSpace(string(pidBytes)), err) - } - - deadline := time.Now().Add(2 * time.Second) - for time.Now().Before(deadline) { - if !procutil.Alive(pid) { - return - } - time.Sleep(10 * time.Millisecond) - } - - t.Fatalf("child process %d still alive after hook timeout cleanup", pid) -} - -func TestHookRunnerRunHooksDoesNotInheritAmbientEnvironment(t *testing.T) { - t.Setenv("HOOK_TEST_AMBIENT_SECRET", "ambient-secret") - - runner, logs := newHookRunnerForTest() - results := runner.RunHooks(t.Context(), HookSessionCreated, []*Skill{ - newSkillWithHook("ambient-env-skill", SourceUser, HookDecl{ - Event: HookSessionCreated, - Command: "/bin/sh", - Args: []string{"-c", `printf '%s' "${HOOK_TEST_AMBIENT_SECRET:-}"`}, - }), - }, HookPayload{}) - - if len(results) != 1 { - t.Fatalf("RunHooks() len = %d, want 1", len(results)) - } - if got := results[0].Output; got != "" { - t.Fatalf("results[0].Output = %q, want ambient secret to be absent", got) - } - if logs.Len() != 0 { - t.Fatalf("logs = %q, want empty logs", logs.String()) - } -} - -func TestHookRunnerRunHooksCapsCapturedOutput(t *testing.T) { - t.Parallel() - - runner, logs := newHookRunnerForTest() - script := hookScriptPath(t) - outputValue := strings.Repeat("x", hookCaptureLimitBytes+128) - - results := runner.RunHooks(t.Context(), HookSessionCreated, []*Skill{ - newSkillWithHook("chatty-skill", SourceUser, HookDecl{ - Event: HookSessionCreated, - Command: "/bin/sh", - Args: []string{script}, - Env: map[string]string{ - "HOOK_TEST_OUTPUT": outputValue, - "HOOK_TEST_STDERR": outputValue, - "HOOK_TEST_EXIT_CODE": "7", - }, - }), - }, HookPayload{}) - - if len(results) != 1 { - t.Fatalf("RunHooks() len = %d, want 1", len(results)) - } - if !strings.Contains(results[0].Output, hookCaptureTruncateNote) { - t.Fatalf("results[0].Output = %q, want truncation marker", results[0].Output) - } - - output := logs.String() - if strings.Contains(output, outputValue[:64]) { - t.Fatalf("logs = %q, want large hook output redacted", output) - } - if !strings.Contains(output, "redacted output") { - t.Fatalf("logs = %q, want redacted output summary", output) - } -} - -func newHookRunnerForTest(cfgs ...aghconfig.SkillsConfig) (*HookRunner, *bytes.Buffer) { - var logs bytes.Buffer - logger := slog.New(slog.NewTextHandler(&logs, nil)) - cfg := aghconfig.SkillsConfig{} - if len(cfgs) > 0 { - cfg = cfgs[0] - } - return NewHookRunner(cfg, logger), &logs -} - -func newSkillWithHook(name string, source SkillSource, hook HookDecl) *Skill { - skill := &Skill{ - Meta: SkillMeta{ - Name: name, - Description: "test skill", - }, - Source: source, - Hooks: []HookDecl{hook}, - } - if source == SourceMarketplace { - skill.Provenance = &Provenance{ - Hash: "hash-" + name, - Registry: "clawhub", - Slug: "@test/" + name, - } - } - return skill -} - -func hookOutput(scriptPath string, output string) HookDecl { - return HookDecl{ - Event: HookSessionCreated, - Command: "/bin/sh", - Args: []string{scriptPath}, - Env: map[string]string{ - "HOOK_TEST_OUTPUT": output, - }, - } -} - -func hookScriptPath(t *testing.T) string { - t.Helper() - - path, err := filepath.Abs("testdata/hooks/driver.sh") - if err != nil { - t.Fatalf("filepath.Abs(): %v", err) - } - - return path -} diff --git a/internal/skills/loader.go b/internal/skills/loader.go index 0557d6e73..da77bc4eb 100644 --- a/internal/skills/loader.go +++ b/internal/skills/loader.go @@ -1,6 +1,7 @@ package skills import ( + "bytes" "errors" "fmt" "io/fs" @@ -13,6 +14,7 @@ import ( "github.com/pedronauck/agh/internal/filesnap" "github.com/pedronauck/agh/internal/frontmatter" + hookspkg "github.com/pedronauck/agh/internal/hooks" "gopkg.in/yaml.v3" ) @@ -95,6 +97,7 @@ func parseSkillDocument(filePath string, dir string, content []byte, source Skil if err := parseAGHMetadata(skill); err != nil { return nil, "", fmt.Errorf("skills: parse %q metadata.agh: %w", filePath, err) } + refreshSkillHookDecls(skill) if skill.Meta.Description == "" { slog.Warn("skills: parsed skill without description", "path", filePath, "name", skill.Meta.Name) } @@ -287,14 +290,25 @@ func parseMCPServerDecls(skill *Skill, raw any) []MCPServerDecl { return slices.Clip(servers) } -func parseHookDecls(skill *Skill, raw any) ([]HookDecl, error) { +type parsedSkillHookDecl struct { + Event string `yaml:"event"` + Command string `yaml:"command"` + Args []string `yaml:"args,omitempty"` + Timeout time.Duration `yaml:"timeout,omitempty"` + Env map[string]string `yaml:"env,omitempty"` + Mode hookspkg.HookMode `yaml:"mode,omitempty"` + Priority *int `yaml:"priority,omitempty"` + Matcher hookspkg.HookMatcher `yaml:"matcher,omitempty"` +} + +func parseHookDecls(skill *Skill, raw any) ([]hookspkg.HookDecl, error) { items, ok := raw.([]any) if !ok { warnAGHMetadata(skill, "skills: malformed metadata.agh.hooks field", "type", fmt.Sprintf("%T", raw)) return nil, nil } - hooks := make([]HookDecl, 0, len(items)) + hooks := make([]hookspkg.HookDecl, 0, len(items)) for idx, item := range items { entry, ok := item.(map[string]any) if !ok { @@ -302,18 +316,54 @@ func parseHookDecls(skill *Skill, raw any) ([]HookDecl, error) { continue } - event := HookEvent(strings.TrimSpace(stringValue(entry["event"]))) + decoded, err := decodeSkillHookDecl(entry) + if err != nil { + return nil, fmt.Errorf( + "skills: invalid metadata.agh.hooks entry for %q at index %d: %w", + skillIdentifier(skill), + idx, + err, + ) + } + + eventValue := strings.TrimSpace(decoded.Event) + event := hookspkg.HookEvent(eventValue) + if replacement, ok := legacyHookEventReplacement(eventValue); ok { + return nil, fmt.Errorf( + "skills: invalid metadata.agh.hooks entry for %q at index %d: hook event %q was removed; use %q", + skillIdentifier(skill), + idx, + eventValue, + replacement, + ) + } if !validHookEvent(event) { - warnAGHMetadata(skill, "skills: invalid metadata.agh.hooks entry", "index", idx, "reason", "unknown event", "event", event) - continue + return nil, fmt.Errorf( + "skills: invalid metadata.agh.hooks entry for %q at index %d: unknown hook event %q", + skillIdentifier(skill), + idx, + eventValue, + ) } - hook := HookDecl{ - Event: event, - Command: strings.TrimSpace(stringValue(entry["command"])), - Args: stringSliceValue(skill, "metadata.agh.hooks", idx, "args", entry["args"]), - Env: stringMapValue(skill, "metadata.agh.hooks", idx, "env", entry["env"]), - Timeout: durationValue(skill, "metadata.agh.hooks", idx, "timeout", entry["timeout"]), + mode := decoded.Mode + if mode == "" { + mode = hookspkg.HookModeAsync + } + hook := normalizeSkillHookDecl(skill, hookspkg.HookDecl{ + Name: skillHookName(skill, idx, len(items)), + Event: event, + Mode: mode, + Priority: 0, + Timeout: decoded.Timeout, + Matcher: decoded.Matcher, + Command: strings.TrimSpace(decoded.Command), + Args: append([]string(nil), decoded.Args...), + Env: cloneStringMap(decoded.Env), + PrioritySet: decoded.Priority != nil, + }, idx, len(items)) + if decoded.Priority != nil { + hook.Priority = *decoded.Priority } if hook.Command == "" { return nil, fmt.Errorf( @@ -322,6 +372,14 @@ func parseHookDecls(skill *Skill, raw any) ([]HookDecl, error) { idx, ) } + if err := hookspkg.ValidateHookDecl(hook); err != nil { + return nil, fmt.Errorf( + "skills: invalid metadata.agh.hooks entry for %q at index %d: %w", + skillIdentifier(skill), + idx, + err, + ) + } hooks = append(hooks, hook) } @@ -333,12 +391,35 @@ func parseHookDecls(skill *Skill, raw any) ([]HookDecl, error) { return slices.Clip(hooks), nil } -func validHookEvent(event HookEvent) bool { - switch event { - case HookSessionCreated, HookSessionStopped: - return true +func decodeSkillHookDecl(entry map[string]any) (parsedSkillHookDecl, error) { + var decoded parsedSkillHookDecl + + payload, err := yaml.Marshal(entry) + if err != nil { + return parsedSkillHookDecl{}, fmt.Errorf("encode hook declaration: %w", err) + } + + decoder := yaml.NewDecoder(bytes.NewReader(payload)) + decoder.KnownFields(true) + if err := decoder.Decode(&decoded); err != nil { + return parsedSkillHookDecl{}, fmt.Errorf("decode hook declaration: %w", err) + } + + return decoded, nil +} + +func validHookEvent(event hookspkg.HookEvent) bool { + return event.Validate() == nil +} + +func legacyHookEventReplacement(event string) (hookspkg.HookEvent, bool) { + switch strings.TrimSpace(event) { + case "on_session_created": + return hookspkg.HookSessionPostCreate, true + case "on_session_stopped": + return hookspkg.HookSessionPostStop, true default: - return false + return "", false } } @@ -427,26 +508,6 @@ func stringMapValue(skill *Skill, scope string, index int, field string, raw any return values } -func durationValue(skill *Skill, scope string, index int, field string, raw any) time.Duration { - if raw == nil { - return 0 - } - - value, ok := raw.(string) - if !ok { - warnAGHMetadata(skill, "skills: malformed metadata duration field", "scope", scope, "index", index, "field", field, "type", fmt.Sprintf("%T", raw)) - return 0 - } - - parsed, err := time.ParseDuration(strings.TrimSpace(value)) - if err != nil { - warnAGHMetadata(skill, "skills: invalid metadata duration value", "scope", scope, "index", index, "field", field, "value", value, "error", err) - return 0 - } - - return parsed -} - func warnAGHMetadata(skill *Skill, message string, args ...any) { attrs := make([]any, 0, len(args)+4) if skill != nil { diff --git a/internal/skills/loader_test.go b/internal/skills/loader_test.go index 7495a0f8b..8885882fa 100644 --- a/internal/skills/loader_test.go +++ b/internal/skills/loader_test.go @@ -16,6 +16,7 @@ import ( "github.com/pedronauck/agh/internal/filesnap" "github.com/pedronauck/agh/internal/frontmatter" + hookspkg "github.com/pedronauck/agh/internal/hooks" ) func TestParseSkillContentValidCases(t *testing.T) { @@ -328,7 +329,7 @@ func TestParseSkillFileParsesAGHMetadataFixtures(t *testing.T) { name string fixture string wantMCP []MCPServerDecl - wantHook []HookDecl + wantHook []hookspkg.HookDecl }{ { name: "mcp servers only", @@ -346,14 +347,17 @@ func TestParseSkillFileParsesAGHMetadataFixtures(t *testing.T) { { name: "hooks only", fixture: "hooks-only", - wantHook: []HookDecl{{ - Event: HookSessionCreated, - Command: "/bin/sh", - Args: []string{"-c", "echo ready"}, - Timeout: 5 * time.Second, - Env: map[string]string{ - "HOOK_ENV": "enabled", - }, + wantHook: []hookspkg.HookDecl{{ + Name: "hooks-only", + Event: hookspkg.HookSessionPostCreate, + Source: hookspkg.HookSourceSkill, + Mode: hookspkg.HookModeAsync, + Priority: 0, + Timeout: 5 * time.Second, + Command: "/bin/sh", + Args: []string{"-c", "echo ready"}, + Env: map[string]string{"HOOK_ENV": "enabled"}, + SkillSource: hookspkg.HookSkillSourceBundled, }}, }, { @@ -367,14 +371,17 @@ func TestParseSkillFileParsesAGHMetadataFixtures(t *testing.T) { "REPO_ROOT": "${REPO_ROOT}", }, }}, - wantHook: []HookDecl{{ - Event: HookSessionStopped, - Command: "/usr/bin/env", - Args: []string{"bash", "-lc", "echo cleanup"}, - Timeout: 30 * time.Second, - Env: map[string]string{ - "PHASE": "stop", - }, + wantHook: []hookspkg.HookDecl{{ + Name: "combined", + Event: hookspkg.HookSessionPostStop, + Source: hookspkg.HookSourceSkill, + Mode: hookspkg.HookModeAsync, + Priority: 0, + Timeout: 30 * time.Second, + Command: "/usr/bin/env", + Args: []string{"bash", "-lc", "echo cleanup"}, + Env: map[string]string{"PHASE": "stop"}, + SkillSource: hookspkg.HookSkillSourceBundled, }}, }, { @@ -418,7 +425,7 @@ func TestParseBundledSkillParsesAGHMetadata(t *testing.T) { if len(skill.MCPServers) != 1 || skill.MCPServers[0].Name != "git" { t.Fatalf("parseBundledSkill() MCPServers = %#v, want populated git server", skill.MCPServers) } - if len(skill.Hooks) != 1 || skill.Hooks[0].Event != HookSessionStopped { + if len(skill.Hooks) != 1 || skill.Hooks[0].Event != hookspkg.HookSessionPostStop { t.Fatalf("parseBundledSkill() Hooks = %#v, want populated stop hook", skill.Hooks) } @@ -484,27 +491,16 @@ func TestParseSkillFileRejectsInvalidMCPServerEntriesWithWarnings(t *testing.T) } } -func TestParseSkillFileRejectsUnknownHookEventsWithWarnings(t *testing.T) { - original := slog.Default() - var logs bytes.Buffer - slog.SetDefault(slog.New(slog.NewTextHandler(&logs, nil))) - t.Cleanup(func() { - slog.SetDefault(original) - }) - +func TestParseSkillFileRejectsUnknownHookEvents(t *testing.T) { skill, err := ParseSkillFile(loaderFixturePath("invalid-hook")) - if err != nil { - t.Fatalf("ParseSkillFile() error = %v", err) - } - - if skill.Hooks != nil { - t.Fatalf("ParseSkillFile() Hooks = %#v, want nil", skill.Hooks) + if err == nil { + t.Fatal("ParseSkillFile() error = nil, want unknown hook event failure") } - if !strings.Contains(logs.String(), "reason=\"unknown event\"") { - t.Fatalf("expected unknown event warning in logs, got %q", logs.String()) + if skill != nil { + t.Fatalf("ParseSkillFile() skill = %#v, want nil on invalid hook event", skill) } - if !strings.Contains(logs.String(), "event=on_session_started") { - t.Fatalf("expected invalid event value in logs, got %q", logs.String()) + if !strings.Contains(err.Error(), `unknown hook event "foo.bar"`) { + t.Fatalf("ParseSkillFile() error = %v, want unknown event detail", err) } } @@ -523,6 +519,130 @@ func TestParseSkillFileRejectsHooksMissingCommand(t *testing.T) { } } +func TestParseSkillFileRejectsLegacyHookEventsWithReplacement(t *testing.T) { + t.Parallel() + + root := t.TempDir() + path := writeSkillFile(t, root, filepath.Join("legacy-hook", skillFileName), strings.Join([]string{ + "---", + "name: legacy-hook", + "description: Legacy hook names are rejected", + "metadata:", + " agh:", + " hooks:", + " - event: on_session_created", + " command: /bin/echo", + "---", + "body", + }, "\n")) + + _, err := ParseSkillFile(path) + if err == nil { + t.Fatal("ParseSkillFile() error = nil, want legacy hook event failure") + } + if !strings.Contains(err.Error(), `hook event "on_session_created" was removed; use "session.post_create"`) { + t.Fatalf("ParseSkillFile() error = %v, want replacement guidance", err) + } +} + +func TestParseSkillFileParsesHookOptionalFields(t *testing.T) { + t.Parallel() + + root := t.TempDir() + path := writeSkillFile(t, root, filepath.Join("hook-options", skillFileName), strings.Join([]string{ + "---", + "name: hook-options", + "description: Hook with optional fields", + "metadata:", + " agh:", + " hooks:", + " - event: session.post_create", + " command: /bin/echo", + " mode: sync", + " priority: 7", + " matcher:", + " agent_name: codex", + " workspace_id: ws-1", + "---", + "body", + }, "\n")) + + skill, err := ParseSkillFile(path) + if err != nil { + t.Fatalf("ParseSkillFile() error = %v", err) + } + + want := hookspkg.HookDecl{ + Name: "hook-options", + Event: hookspkg.HookSessionPostCreate, + Source: hookspkg.HookSourceSkill, + Mode: hookspkg.HookModeSync, + Priority: 7, + PrioritySet: true, + Command: "/bin/echo", + Matcher: hookspkg.HookMatcher{ + AgentName: "codex", + WorkspaceID: "ws-1", + }, + SkillSource: hookspkg.HookSkillSourceBundled, + } + if got := skill.Hooks; !reflect.DeepEqual(got, []hookspkg.HookDecl{want}) { + t.Fatalf("ParseSkillFile() Hooks mismatch\nwant: %#v\ngot: %#v", []hookspkg.HookDecl{want}, got) + } +} + +func TestParseSkillFileDefaultsMinimalHookFields(t *testing.T) { + t.Parallel() + + root := t.TempDir() + path := writeSkillFile(t, root, filepath.Join("hook-defaults", skillFileName), strings.Join([]string{ + "---", + "name: hook-defaults", + "description: Minimal hook declaration", + "metadata:", + " agh:", + " hooks:", + " - event: session.post_create", + " command: /bin/echo", + "---", + "body", + }, "\n")) + + skill, err := ParseSkillFile(path) + if err != nil { + t.Fatalf("ParseSkillFile() error = %v", err) + } + if len(skill.Hooks) != 1 { + t.Fatalf("len(skill.Hooks) = %d, want 1", len(skill.Hooks)) + } + hook := skill.Hooks[0] + if hook.Mode != hookspkg.HookModeAsync { + t.Fatalf("hook.Mode = %q, want %q", hook.Mode, hookspkg.HookModeAsync) + } + if hook.Priority != 0 { + t.Fatalf("hook.Priority = %d, want 0", hook.Priority) + } + if hook.PrioritySet { + t.Fatal("hook.PrioritySet = true, want false for default priority") + } + if hook.Source != hookspkg.HookSourceSkill { + t.Fatalf("hook.Source = %q, want skill source", hook.Source) + } + if hook.Name != "hook-defaults" { + t.Fatalf("hook.Name = %q, want %q", hook.Name, "hook-defaults") + } +} + +func TestSkillHooksFieldUsesInternalHooksDeclarations(t *testing.T) { + t.Parallel() + + got := reflect.TypeOf(Skill{}.Hooks) + want := reflect.TypeOf([]hookspkg.HookDecl(nil)) + if got != want { + t.Fatalf("reflect.TypeOf(Skill{}.Hooks) = %v, want %v", got, want) + } +} + func TestScanDirectoryHonorsDepthAndSkips(t *testing.T) { t.Parallel() diff --git a/internal/skills/registry.go b/internal/skills/registry.go index c839d7741..654a74df8 100644 --- a/internal/skills/registry.go +++ b/internal/skills/registry.go @@ -15,6 +15,7 @@ import ( "time" "github.com/pedronauck/agh/internal/filesnap" + hookspkg "github.com/pedronauck/agh/internal/hooks" workspacepkg "github.com/pedronauck/agh/internal/workspace" ) @@ -292,6 +293,7 @@ func (r *Registry) loadWorkspaceSkills(ctx context.Context, paths []workspaceSki return nil, err } skill.Source = path.source + refreshSkillHookDecls(skill) if !r.processSkill(skills, skill, content, disabledSkills) { continue } @@ -392,6 +394,7 @@ func (r *Registry) assignSourceAndProvenance(skill *Skill, source SkillSource) e skill.Source = source if source != SourceUser { + refreshSkillHookDecls(skill) return nil } @@ -400,6 +403,7 @@ func (r *Registry) assignSourceAndProvenance(skill *Skill, source SkillSource) e return err } if !hasSidecar { + refreshSkillHookDecls(skill) return nil } @@ -411,6 +415,7 @@ func (r *Registry) assignSourceAndProvenance(skill *Skill, source SkillSource) e skill.Source = SourceMarketplace skill.Provenance = provenance skill.InstalledFrom = strings.TrimSpace(provenance.Slug) + refreshSkillHookDecls(skill) return nil } @@ -701,7 +706,20 @@ func cloneSkill(skill *Skill) *Skill { clone := *skill clone.Meta = cloneSkillMeta(skill.Meta) clone.MCPServers = cloneMCPServerDecls(skill.MCPServers) - clone.Hooks = cloneHookDecls(skill.Hooks) + if len(skill.Hooks) > 0 { + clone.Hooks = make([]hookspkg.HookDecl, 0, len(skill.Hooks)) + for idx, decl := range skill.Hooks { + cloned := decl + cloned.Args = append([]string(nil), decl.Args...) + cloned.Env = cloneStringMap(decl.Env) + cloned.Metadata = cloneStringMap(decl.Metadata) + if decl.Matcher.ToolReadOnly != nil { + value := *decl.Matcher.ToolReadOnly + cloned.Matcher.ToolReadOnly = &value + } + clone.Hooks = append(clone.Hooks, normalizeSkillHookDecl(skill, cloned, idx, len(skill.Hooks))) + } + } clone.Provenance = cloneProvenance(skill.Provenance) return &clone @@ -759,25 +777,6 @@ func cloneMCPServerDecls(decls []MCPServerDecl) []MCPServerDecl { return clone } -func cloneHookDecls(decls []HookDecl) []HookDecl { - if decls == nil { - return nil - } - - clone := make([]HookDecl, len(decls)) - for i, decl := range decls { - clone[i] = HookDecl{ - Event: decl.Event, - Command: decl.Command, - Args: append([]string(nil), decl.Args...), - Timeout: decl.Timeout, - Env: cloneStringMap(decl.Env), - } - } - - return clone -} - func cloneStringMap(input map[string]string) map[string]string { if input == nil { return nil diff --git a/internal/skills/registry_test.go b/internal/skills/registry_test.go index 7c1161861..34fc7893a 100644 --- a/internal/skills/registry_test.go +++ b/internal/skills/registry_test.go @@ -14,6 +14,7 @@ import ( "testing/fstest" "time" + hookspkg "github.com/pedronauck/agh/internal/hooks" workspacepkg "github.com/pedronauck/agh/internal/workspace" ) @@ -874,14 +875,16 @@ func TestSkillTypesSupportMarketplaceDeclarations(t *testing.T) { "ROOT": "/workspace", }, } - hook := HookDecl{ - Event: HookSessionCreated, - Command: "/bin/sh", - Args: []string{"-c", "echo ready"}, - Timeout: 5 * time.Second, - Env: map[string]string{ - "HOOK_ENV": "enabled", - }, + hook := hookspkg.HookDecl{ + Name: "marketplace-skill", + Event: hookspkg.HookSessionPostCreate, + Source: hookspkg.HookSourceSkill, + Mode: hookspkg.HookModeAsync, + Command: "/bin/sh", + Args: []string{"-c", "echo ready"}, + Timeout: 5 * time.Second, + Env: map[string]string{"HOOK_ENV": "enabled"}, + SkillSource: hookspkg.HookSkillSourceMarketplace, } provenance := Provenance{ Hash: "abc123", @@ -893,7 +896,8 @@ func TestSkillTypesSupportMarketplaceDeclarations(t *testing.T) { skill := Skill{ Meta: SkillMeta{Name: "marketplace-skill", Description: "Marketplace skill"}, MCPServers: []MCPServerDecl{mcp}, - Hooks: []HookDecl{hook}, + Source: SourceMarketplace, + Hooks: []hookspkg.HookDecl{hook}, Provenance: &provenance, InstalledFrom: "@author/skill", } @@ -910,14 +914,17 @@ func TestSkillTypesSupportMarketplaceDeclarations(t *testing.T) { if skill.MCPServers[0].Env["ROOT"] != "/workspace" { t.Fatalf("MCPServers[0].Env[ROOT] = %q, want %q", skill.MCPServers[0].Env["ROOT"], "/workspace") } - if skill.Hooks[0].Event != HookSessionCreated { - t.Fatalf("Hooks[0].Event = %q, want %q", skill.Hooks[0].Event, HookSessionCreated) + if skill.Hooks[0].Event != hookspkg.HookSessionPostCreate { + t.Fatalf("Hooks[0].Event = %q, want %q", skill.Hooks[0].Event, hookspkg.HookSessionPostCreate) + } + if string(skill.Hooks[0].Event) != "session.post_create" { + t.Fatalf("Hooks[0].Event = %q, want %q", skill.Hooks[0].Event, "session.post_create") } - if string(HookSessionCreated) != "on_session_created" { - t.Fatalf("HookSessionCreated = %q, want %q", HookSessionCreated, "on_session_created") + if skill.Hooks[0].Source != hookspkg.HookSourceSkill { + t.Fatalf("Hooks[0].Source = %q, want %q", skill.Hooks[0].Source, hookspkg.HookSourceSkill) } - if string(HookSessionStopped) != "on_session_stopped" { - t.Fatalf("HookSessionStopped = %q, want %q", HookSessionStopped, "on_session_stopped") + if skill.Hooks[0].SkillSource != hookspkg.HookSkillSourceMarketplace { + t.Fatalf("Hooks[0].SkillSource = %q, want %q", skill.Hooks[0].SkillSource, hookspkg.HookSkillSourceMarketplace) } if skill.Hooks[0].Timeout != 5*time.Second { t.Fatalf("Hooks[0].Timeout = %s, want %s", skill.Hooks[0].Timeout, 5*time.Second) @@ -962,7 +969,8 @@ func TestCloneSkillDeepCopiesExtendedFields(t *testing.T) { installedAt := time.Date(2026, 4, 7, 9, 30, 0, 0, time.UTC) original := &Skill{ - Meta: SkillMeta{Name: "clone", Description: "Clone extended fields"}, + Meta: SkillMeta{Name: "clone", Description: "Clone extended fields"}, + Source: SourceWorkspace, MCPServers: []MCPServerDecl{{ Name: "server", Command: "cmd", @@ -971,14 +979,16 @@ func TestCloneSkillDeepCopiesExtendedFields(t *testing.T) { "ROOT": "/tmp/original", }, }}, - Hooks: []HookDecl{{ - Event: HookSessionStopped, - Command: "hook", - Args: []string{"cleanup"}, - Timeout: time.Second, - Env: map[string]string{ - "PHASE": "stop", - }, + Hooks: []hookspkg.HookDecl{{ + Name: "clone", + Event: hookspkg.HookSessionPostStop, + Source: hookspkg.HookSourceSkill, + Mode: hookspkg.HookModeAsync, + Command: "hook", + Args: []string{"cleanup"}, + Timeout: time.Second, + Env: map[string]string{"PHASE": "stop"}, + SkillSource: hookspkg.HookSkillSourceWorkspace, }}, Provenance: &Provenance{ Hash: "hash-original", diff --git a/internal/skills/testdata/loader/combined/SKILL.md b/internal/skills/testdata/loader/combined/SKILL.md index 4c6570df0..739d056a5 100644 --- a/internal/skills/testdata/loader/combined/SKILL.md +++ b/internal/skills/testdata/loader/combined/SKILL.md @@ -11,7 +11,7 @@ metadata: env: REPO_ROOT: "${REPO_ROOT}" hooks: - - event: on_session_stopped + - event: session.post_stop command: /usr/bin/env args: - bash diff --git a/internal/skills/testdata/loader/hooks-only/SKILL.md b/internal/skills/testdata/loader/hooks-only/SKILL.md index f00a69479..23fe186e7 100644 --- a/internal/skills/testdata/loader/hooks-only/SKILL.md +++ b/internal/skills/testdata/loader/hooks-only/SKILL.md @@ -4,7 +4,7 @@ description: Skill with hook metadata only metadata: agh: hooks: - - event: on_session_created + - event: session.post_create command: /bin/sh args: - -c diff --git a/internal/skills/testdata/loader/invalid-hook-command/SKILL.md b/internal/skills/testdata/loader/invalid-hook-command/SKILL.md index b3c4d2efc..2897701c0 100644 --- a/internal/skills/testdata/loader/invalid-hook-command/SKILL.md +++ b/internal/skills/testdata/loader/invalid-hook-command/SKILL.md @@ -4,7 +4,7 @@ description: Skill with missing hook command metadata: agh: hooks: - - event: on_session_created + - event: session.post_create args: - invalid --- diff --git a/internal/skills/testdata/loader/invalid-hook/SKILL.md b/internal/skills/testdata/loader/invalid-hook/SKILL.md index 6c119055b..15c133703 100644 --- a/internal/skills/testdata/loader/invalid-hook/SKILL.md +++ b/internal/skills/testdata/loader/invalid-hook/SKILL.md @@ -4,7 +4,7 @@ description: Skill with invalid hook event metadata: agh: hooks: - - event: on_session_started + - event: foo.bar command: /bin/echo args: - invalid diff --git a/internal/skills/types.go b/internal/skills/types.go index 3eba10524..2d5eeb5d6 100644 --- a/internal/skills/types.go +++ b/internal/skills/types.go @@ -5,6 +5,8 @@ package skills import ( "io/fs" "time" + + hookspkg "github.com/pedronauck/agh/internal/hooks" ) // SkillMeta maps YAML frontmatter fields per the AgentSkills spec. @@ -23,7 +25,7 @@ type Skill struct { FilePath string Enabled bool MCPServers []MCPServerDecl - Hooks []HookDecl + Hooks []hookspkg.HookDecl Provenance *Provenance InstalledFrom string } @@ -52,23 +54,6 @@ type MCPServerDecl struct { Env map[string]string `yaml:"env,omitempty"` } -// HookDecl declares a lifecycle hook in skill frontmatter. -type HookDecl struct { - Event HookEvent `yaml:"event"` - Command string `yaml:"command"` - Args []string `yaml:"args,omitempty"` - Timeout time.Duration `yaml:"timeout,omitempty"` - Env map[string]string `yaml:"env,omitempty"` -} - -// HookEvent identifies when a hook fires. -type HookEvent string - -const ( - HookSessionCreated HookEvent = "on_session_created" - HookSessionStopped HookEvent = "on_session_stopped" -) - // Provenance stores marketplace install metadata for a skill. type Provenance struct { Hash string `json:"hash"` diff --git a/internal/skills/watcher.go b/internal/skills/watcher.go index 190a1dd53..85f505dc5 100644 --- a/internal/skills/watcher.go +++ b/internal/skills/watcher.go @@ -34,6 +34,8 @@ type Watcher struct { roots []string logger *slog.Logger + afterRefresh func(context.Context) error + mu sync.Mutex initialized bool snapshots map[string]filesnap.Snapshot @@ -70,6 +72,15 @@ func newWatcher(registry globalRefresher, interval time.Duration, roots []string } } +// SetAfterRefresh installs an optional callback that runs after a successful +// registry refresh and before watcher snapshots are committed. +func (w *Watcher) SetAfterRefresh(callback func(context.Context) error) { + if w == nil { + return + } + w.afterRefresh = callback +} + // Start runs the polling loop until the provided context is cancelled. func (w *Watcher) Start(ctx context.Context) { if ctx == nil { @@ -121,6 +132,11 @@ func (w *Watcher) pollOnce(ctx context.Context) error { return fmt.Errorf("skills: refresh global registry: %w", err) } } + if w.afterRefresh != nil { + if err := w.afterRefresh(ctx); err != nil { + return fmt.Errorf("skills: run watcher refresh callback: %w", err) + } + } w.commitSnapshots(snapshots) return nil diff --git a/internal/store/sessiondb/hook_runs_test.go b/internal/store/sessiondb/hook_runs_test.go new file mode 100644 index 000000000..41da88173 --- /dev/null +++ b/internal/store/sessiondb/hook_runs_test.go @@ -0,0 +1,235 @@ +package sessiondb + +import ( + "encoding/json" + "testing" + "time" + + hookspkg "github.com/pedronauck/agh/internal/hooks" + "github.com/pedronauck/agh/internal/store" + "github.com/pedronauck/agh/internal/testutil" +) + +func TestSessionDBRecordHookRunPersistsSecurityPatchFields(t *testing.T) { + t.Parallel() + + sessionDB := openTestSessionDB(t, "sess-hook-run") + recordedAt := time.Date(2026, 4, 9, 18, 0, 0, 0, time.UTC) + patch := json.RawMessage(`{"decision":"deny","reason":"policy"}`) + + if err := sessionDB.RecordHookRun(testutil.Context(t), hookspkg.HookRunRecord{ + HookName: "permission-audit", + Event: hookspkg.HookPermissionRequest, + Source: hookspkg.HookSourceConfig, + Mode: hookspkg.HookModeSync, + Duration: 25 * time.Millisecond, + Outcome: hookspkg.HookRunOutcomeDenied, + DispatchDepth: 2, + PatchApplied: patch, + Error: "denied by policy", + Required: true, + RecordedAt: recordedAt, + }); err != nil { + t.Fatalf("RecordHookRun() error = %v", err) + } + + records, err := sessionDB.QueryHookRuns(testutil.Context(t), store.HookRunQuery{}) + if err != nil { + t.Fatalf("QueryHookRuns() error = %v", err) + } + if got, want := len(records), 1; got != want { + t.Fatalf("len(records) = %d, want %d", got, want) + } + + record := records[0] + if record.HookName != "permission-audit" { + t.Fatalf("record.HookName = %q, want permission-audit", record.HookName) + } + if record.Event != hookspkg.HookPermissionRequest { + t.Fatalf("record.Event = %q, want %q", record.Event, hookspkg.HookPermissionRequest) + } + if record.Source != hookspkg.HookSourceConfig { + t.Fatalf("record.Source = %q, want %q", record.Source, hookspkg.HookSourceConfig) + } + if record.Mode != hookspkg.HookModeSync { + t.Fatalf("record.Mode = %q, want %q", record.Mode, hookspkg.HookModeSync) + } + if record.Duration != 25*time.Millisecond { + t.Fatalf("record.Duration = %s, want 25ms", record.Duration) + } + if record.Outcome != hookspkg.HookRunOutcomeDenied { + t.Fatalf("record.Outcome = %q, want %q", record.Outcome, hookspkg.HookRunOutcomeDenied) + } + if record.DispatchDepth != 2 { + t.Fatalf("record.DispatchDepth = %d, want 2", record.DispatchDepth) + } + if string(record.PatchApplied) != string(patch) { + t.Fatalf("record.PatchApplied = %s, want %s", record.PatchApplied, patch) + } + if record.Error != "denied by policy" { + t.Fatalf("record.Error = %q, want denied by policy", record.Error) + } + if !record.Required { + t.Fatal("record.Required = false, want true") + } + if !record.RecordedAt.Equal(recordedAt) { + t.Fatalf("record.RecordedAt = %s, want %s", record.RecordedAt, recordedAt) + } +} + +func TestSessionDBQueryHookRunsFiltersByEvent(t *testing.T) { + t.Parallel() + + sessionDB := openTestSessionDB(t, "sess-hook-filter") + records := []hookspkg.HookRunRecord{ + { + HookName: "prompt-hook", + Event: hookspkg.HookPromptPostAssemble, + Source: hookspkg.HookSourceConfig, + Mode: hookspkg.HookModeSync, + Outcome: hookspkg.HookRunOutcomeApplied, + DispatchDepth: 1, + RecordedAt: time.Date(2026, 4, 9, 18, 1, 0, 0, time.UTC), + }, + { + HookName: "permission-hook", + Event: hookspkg.HookPermissionRequest, + Source: hookspkg.HookSourceConfig, + Mode: hookspkg.HookModeSync, + Outcome: hookspkg.HookRunOutcomeDenied, + DispatchDepth: 1, + RecordedAt: time.Date(2026, 4, 9, 18, 2, 0, 0, time.UTC), + }, + } + + for _, record := range records { + if err := sessionDB.RecordHookRun(testutil.Context(t), record); err != nil { + t.Fatalf("RecordHookRun(%q) error = %v", record.HookName, err) + } + } + + filtered, err := sessionDB.QueryHookRuns(testutil.Context(t), store.HookRunQuery{Event: hookspkg.HookPermissionRequest.String()}) + if err != nil { + t.Fatalf("QueryHookRuns(filtered) error = %v", err) + } + if got, want := len(filtered), 1; got != want { + t.Fatalf("len(filtered) = %d, want %d", got, want) + } + if filtered[0].HookName != "permission-hook" { + t.Fatalf("filtered[0].HookName = %q, want permission-hook", filtered[0].HookName) + } +} + +func TestSessionDBQueryHookRunsFiltersByOutcome(t *testing.T) { + t.Parallel() + + sessionDB := openTestSessionDB(t, "sess-hook-outcome") + records := []hookspkg.HookRunRecord{ + { + HookName: "applied-hook", + Event: hookspkg.HookPermissionRequest, + Source: hookspkg.HookSourceConfig, + Mode: hookspkg.HookModeSync, + Outcome: hookspkg.HookRunOutcomeApplied, + RecordedAt: time.Date(2026, 4, 9, 18, 3, 0, 0, time.UTC), + }, + { + HookName: "failed-hook", + Event: hookspkg.HookPermissionRequest, + Source: hookspkg.HookSourceConfig, + Mode: hookspkg.HookModeSync, + Outcome: hookspkg.HookRunOutcomeFailed, + RecordedAt: time.Date(2026, 4, 9, 18, 4, 0, 0, time.UTC), + }, + } + + for _, record := range records { + if err := sessionDB.RecordHookRun(testutil.Context(t), record); err != nil { + t.Fatalf("RecordHookRun(%q) error = %v", record.HookName, err) + } + } + + filtered, err := sessionDB.QueryHookRuns(testutil.Context(t), store.HookRunQuery{Outcome: hookspkg.HookRunOutcomeFailed}) + if err != nil { + t.Fatalf("QueryHookRuns(filtered) error = %v", err) + } + if got, want := len(filtered), 1; got != want { + t.Fatalf("len(filtered) = %d, want %d", got, want) + } + if filtered[0].HookName != "failed-hook" { + t.Fatalf("filtered[0].HookName = %q, want failed-hook", filtered[0].HookName) + } +} + +func TestSessionDBQueryHookRunsAppliesEventOutcomeSinceAndLimitInAscendingOrder(t *testing.T) { + t.Parallel() + + sessionDB := openTestSessionDB(t, "sess-hook-combined") + records := []hookspkg.HookRunRecord{ + { + HookName: "ignore-other-event", + Event: hookspkg.HookPromptPostAssemble, + Source: hookspkg.HookSourceConfig, + Mode: hookspkg.HookModeSync, + Outcome: hookspkg.HookRunOutcomeApplied, + RecordedAt: time.Date(2026, 4, 9, 18, 0, 0, 0, time.UTC), + }, + { + HookName: "permission-old", + Event: hookspkg.HookPermissionRequest, + Source: hookspkg.HookSourceConfig, + Mode: hookspkg.HookModeSync, + Outcome: hookspkg.HookRunOutcomeApplied, + RecordedAt: time.Date(2026, 4, 9, 18, 1, 0, 0, time.UTC), + }, + { + HookName: "permission-denied", + Event: hookspkg.HookPermissionRequest, + Source: hookspkg.HookSourceConfig, + Mode: hookspkg.HookModeSync, + Outcome: hookspkg.HookRunOutcomeDenied, + RecordedAt: time.Date(2026, 4, 9, 18, 2, 0, 0, time.UTC), + }, + { + HookName: "permission-recent-a", + Event: hookspkg.HookPermissionRequest, + Source: hookspkg.HookSourceConfig, + Mode: hookspkg.HookModeSync, + Outcome: hookspkg.HookRunOutcomeApplied, + RecordedAt: time.Date(2026, 4, 9, 18, 3, 0, 0, time.UTC), + }, + { + HookName: "permission-recent-b", + Event: hookspkg.HookPermissionRequest, + Source: hookspkg.HookSourceConfig, + Mode: hookspkg.HookModeSync, + Outcome: hookspkg.HookRunOutcomeApplied, + RecordedAt: time.Date(2026, 4, 9, 18, 4, 0, 0, time.UTC), + }, + } + + for _, record := range records { + if err := sessionDB.RecordHookRun(testutil.Context(t), record); err != nil { + t.Fatalf("RecordHookRun(%q) error = %v", record.HookName, err) + } + } + + filtered, err := sessionDB.QueryHookRuns(testutil.Context(t), store.HookRunQuery{ + Event: hookspkg.HookPermissionRequest.String(), + Outcome: hookspkg.HookRunOutcomeApplied, + Since: time.Date(2026, 4, 9, 18, 0, 30, 0, time.UTC), + Limit: 2, + }) + if err != nil { + t.Fatalf("QueryHookRuns(filtered) error = %v", err) + } + if got, want := len(filtered), 2; got != want { + t.Fatalf("len(filtered) = %d, want %d", got, want) + } + if filtered[0].HookName != "permission-recent-a" || filtered[1].HookName != "permission-recent-b" { + t.Fatalf("filtered = %#v, want ascending last-two applied permission hooks", filtered) + } + if !filtered[0].RecordedAt.Before(filtered[1].RecordedAt) { + t.Fatalf("filtered order = %s then %s, want ascending chronology", filtered[0].RecordedAt, filtered[1].RecordedAt) + } +} diff --git a/internal/store/sessiondb/session_db.go b/internal/store/sessiondb/session_db.go index 1bbb30fb7..459deaf2a 100644 --- a/internal/store/sessiondb/session_db.go +++ b/internal/store/sessiondb/session_db.go @@ -3,6 +3,7 @@ package sessiondb import ( "context" "database/sql" + "encoding/json" "errors" "fmt" "strings" @@ -10,6 +11,7 @@ import ( "sync/atomic" "time" + hookspkg "github.com/pedronauck/agh/internal/hooks" "github.com/pedronauck/agh/internal/store" ) @@ -47,6 +49,22 @@ var sessionSchemaStatements = []string{ timestamp TEXT NOT NULL );`, `CREATE INDEX IF NOT EXISTS idx_usage_timestamp ON token_usage(timestamp);`, + `CREATE TABLE IF NOT EXISTS hook_runs ( + id TEXT PRIMARY KEY, + hook_name TEXT NOT NULL, + event TEXT NOT NULL, + source TEXT NOT NULL, + mode TEXT NOT NULL, + duration_ns INTEGER NOT NULL, + outcome TEXT NOT NULL, + dispatch_depth INTEGER NOT NULL, + patch_applied TEXT, + error TEXT, + required INTEGER NOT NULL DEFAULT 0, + recorded_at TEXT NOT NULL + );`, + `CREATE INDEX IF NOT EXISTS idx_hook_runs_event ON hook_runs(event);`, + `CREATE INDEX IF NOT EXISTS idx_hook_runs_recorded_at ON hook_runs(recorded_at);`, } const ( @@ -60,6 +78,7 @@ type sessionWriteKind int const ( sessionWriteEvent sessionWriteKind = iota + 1 sessionWriteUsage + sessionWriteHookRun ) type sessionWriteRequest struct { @@ -67,6 +86,7 @@ type sessionWriteRequest struct { kind sessionWriteKind event store.SessionEvent usage store.TokenUsage + hook hookspkg.HookRunRecord result chan error } @@ -200,6 +220,89 @@ func (s *SessionDB) RecordTokenUsage(ctx context.Context, usage store.TokenUsage }) } +// RecordHookRun stores one hook execution audit record in the per-session store. +func (s *SessionDB) RecordHookRun(ctx context.Context, record hookspkg.HookRunRecord) error { + if s == nil { + return errors.New("store: session database is required") + } + if ctx == nil { + return errors.New("store: record hook run context is required") + } + + return s.enqueueWrite(ctx, sessionWriteRequest{ + ctx: ctx, + kind: sessionWriteHookRun, + hook: cloneHookRunRecord(record), + result: make(chan error, 1), + }) +} + +// QueryHookRuns returns persisted hook execution records filtered by the supplied options. +func (s *SessionDB) QueryHookRuns(ctx context.Context, query store.HookRunQuery) ([]hookspkg.HookRunRecord, error) { + if s == nil { + return nil, errors.New("store: session database is required") + } + if ctx == nil { + return nil, errors.New("store: query hook runs context is required") + } + if err := query.Validate(); err != nil { + return nil, err + } + if strings.TrimSpace(query.SessionID) != "" && strings.TrimSpace(query.SessionID) != s.sessionID { + return nil, fmt.Errorf("store: hook run query session id %q does not match session database %q", query.SessionID, s.sessionID) + } + if event := strings.TrimSpace(query.Event); event != "" { + if err := hookspkg.HookEvent(event).Validate(); err != nil { + return nil, err + } + } + if query.Outcome != "" { + if err := query.Outcome.Validate(); err != nil { + return nil, err + } + } + + baseQuery := `SELECT rowid, hook_name, event, source, mode, duration_ns, outcome, dispatch_depth, patch_applied, error, required, recorded_at FROM hook_runs` + where, args := store.BuildClauses( + store.StringClause("event", query.Event), + store.StringClause("outcome", string(query.Outcome)), + store.TimeClause("recorded_at", ">=", query.Since), + ) + baseQuery = store.AppendWhere(baseQuery, where) + + sqlQuery := baseQuery + if query.Limit > 0 { + sqlQuery = `SELECT rowid, hook_name, event, source, mode, duration_ns, outcome, dispatch_depth, patch_applied, error, required, recorded_at + FROM (` + baseQuery + ` ORDER BY recorded_at DESC, rowid DESC LIMIT ?) AS recent_hook_runs + ORDER BY recorded_at ASC, rowid ASC` + args = append(args, query.Limit) + } else { + sqlQuery += " ORDER BY recorded_at ASC, rowid ASC" + } + + rows, err := s.db.QueryContext(ctx, sqlQuery, args...) + if err != nil { + return nil, fmt.Errorf("store: query hook runs: %w", err) + } + defer func() { + _ = rows.Close() + }() + + records := make([]hookspkg.HookRunRecord, 0) + for rows.Next() { + record, scanErr := s.scanHookRunRecord(rows) + if scanErr != nil { + return nil, scanErr + } + records = append(records, record) + } + if err := rows.Err(); err != nil { + return nil, fmt.Errorf("store: iterate hook runs: %w", err) + } + + return records, nil +} + // Query returns events filtered by the supplied options. func (s *SessionDB) Query(ctx context.Context, query store.EventQuery) ([]store.SessionEvent, error) { if s == nil { @@ -384,6 +487,8 @@ func (s *SessionDB) executeWrite(req sessionWriteRequest) error { return s.writeEvent(req.ctx, req.event) case sessionWriteUsage: return s.writeTokenUsage(req.ctx, req.usage) + case sessionWriteHookRun: + return s.writeHookRun(req.ctx, req.hook) default: return fmt.Errorf("store: unsupported session write kind %d", req.kind) } @@ -462,6 +567,49 @@ func (s *SessionDB) writeTokenUsage(ctx context.Context, usage store.TokenUsage) return nil } +func (s *SessionDB) writeHookRun(ctx context.Context, record hookspkg.HookRunRecord) error { + if strings.TrimSpace(record.HookName) == "" { + return errors.New("store: hook run hook name is required") + } + if err := record.Event.Validate(); err != nil { + return err + } + if err := record.Source.Validate(); err != nil { + return err + } + if err := record.Mode.Validate(); err != nil { + return err + } + if record.RecordedAt.IsZero() { + record.RecordedAt = s.now() + } + + id := store.NewID("hook") + if _, err := s.db.ExecContext( + ctx, + `INSERT INTO hook_runs ( + id, hook_name, event, source, mode, duration_ns, outcome, dispatch_depth, + patch_applied, error, required, recorded_at + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`, + id, + record.HookName, + record.Event.String(), + record.Source.String(), + string(record.Mode), + record.Duration.Nanoseconds(), + string(record.Outcome), + record.DispatchDepth, + store.NullableString(rawJSONText(record.PatchApplied)), + store.NullableString(record.Error), + boolToSQLite(record.Required), + store.FormatTimestamp(record.RecordedAt), + ); err != nil { + return fmt.Errorf("store: insert hook run: %w", err) + } + + return nil +} + func (s *SessionDB) scanSessionEvent(scanner rowScanner) (store.SessionEvent, error) { var ( event store.SessionEvent @@ -488,6 +636,63 @@ func (s *SessionDB) scanSessionEvent(scanner rowScanner) (store.SessionEvent, er return event, nil } +func (s *SessionDB) scanHookRunRecord(scanner rowScanner) (hookspkg.HookRunRecord, error) { + var ( + record hookspkg.HookRunRecord + rowID int64 + event string + source string + mode string + durationNS int64 + outcome string + patchApplied sql.NullString + recordError sql.NullString + required int64 + recordedAtRaw string + ) + + if err := scanner.Scan( + &rowID, + &record.HookName, + &event, + &source, + &mode, + &durationNS, + &outcome, + &record.DispatchDepth, + &patchApplied, + &recordError, + &required, + &recordedAtRaw, + ); err != nil { + return hookspkg.HookRunRecord{}, fmt.Errorf("store: scan hook run: %w", err) + } + + record.Event = hookspkg.HookEvent(strings.TrimSpace(event)) + if err := record.Event.Validate(); err != nil { + return hookspkg.HookRunRecord{}, err + } + if err := record.Source.UnmarshalText([]byte(strings.TrimSpace(source))); err != nil { + return hookspkg.HookRunRecord{}, err + } + record.Mode = hookspkg.HookMode(strings.TrimSpace(mode)) + record.Duration = time.Duration(durationNS) + record.Outcome = hookspkg.HookRunOutcome(strings.TrimSpace(outcome)) + record.Required = required != 0 + record.Error = strings.TrimSpace(recordError.String) + if patchApplied.Valid && strings.TrimSpace(patchApplied.String) != "" { + record.PatchApplied = json.RawMessage(patchApplied.String) + } + + recordedAt, err := store.ParseTimestamp(recordedAtRaw) + if err != nil { + return hookspkg.HookRunRecord{}, err + } + record.RecordedAt = recordedAt + _ = rowID + return cloneHookRunRecord(record), nil +} + func currentMaxSequence(ctx context.Context, db *sql.DB) (int64, error) { var sequence int64 if err := db.QueryRowContext(ctx, "SELECT COALESCE(MAX(sequence), 0) FROM events").Scan(&sequence); err != nil { @@ -526,3 +731,27 @@ func openSessionSQLite(ctx context.Context, path string) (*sql.DB, error) { return store.EnsureSchema(ctx, db, sessionSchemaStatements) }) } + +func rawJSONText(raw json.RawMessage) string { + return strings.TrimSpace(string(raw)) +} + +func boolToSQLite(value bool) int64 { + if value { + return 1 + } + return 0 +} + +func cloneHookRunRecord(src hookspkg.HookRunRecord) hookspkg.HookRunRecord { + cloned := src + cloned.PatchApplied = cloneRawJSON(src.PatchApplied) + return cloned +} + +func cloneRawJSON(src json.RawMessage) json.RawMessage { + if len(src) == 0 { + return nil + } + return append(json.RawMessage(nil), src...) +} diff --git a/internal/store/types.go b/internal/store/types.go index ad99c6ec0..8a19d69b1 100644 --- a/internal/store/types.go +++ b/internal/store/types.go @@ -3,6 +3,8 @@ package store import ( "fmt" "time" + + hookspkg "github.com/pedronauck/agh/internal/hooks" ) // SessionEvent is a persisted event row for a single AGH session. @@ -58,6 +60,25 @@ type TurnHistory struct { Events []SessionEvent } +// HookRunQuery filters persisted per-session hook run records. +type HookRunQuery struct { + SessionID string + Event string + Outcome hookspkg.HookRunOutcome + Since time.Time + Limit int +} + +// Validate ensures the query uses sane bounds. +func (q HookRunQuery) Validate() error { + if q.Outcome != "" { + if err := q.Outcome.Validate(); err != nil { + return err + } + } + return requirePositiveLimit(q.Limit, "hook run limit") +} + // TokenUsage captures per-turn usage data reported by an ACP provider. type TokenUsage struct { TurnID string diff --git a/internal/workspace/clone.go b/internal/workspace/clone.go index c86f75362..f1c662cb4 100644 --- a/internal/workspace/clone.go +++ b/internal/workspace/clone.go @@ -3,6 +3,7 @@ package workspace import ( aghconfig "github.com/pedronauck/agh/internal/config" "github.com/pedronauck/agh/internal/filesnap" + hookspkg "github.com/pedronauck/agh/internal/hooks" ) func cloneSnapshots(snapshots map[string]filesnap.Snapshot) map[string]filesnap.Snapshot { @@ -55,9 +56,15 @@ func cloneConfig(src aghconfig.Config) aghconfig.Config { Log: src.Log, Memory: src.Memory, Skills: aghconfig.SkillsConfig{ - Enabled: src.Skills.Enabled, - DisabledSkills: append([]string(nil), src.Skills.DisabledSkills...), - PollInterval: src.Skills.PollInterval, + Enabled: src.Skills.Enabled, + DisabledSkills: append([]string(nil), src.Skills.DisabledSkills...), + PollInterval: src.Skills.PollInterval, + AllowedMarketplaceMCP: append([]string(nil), src.Skills.AllowedMarketplaceMCP...), + AllowedMarketplaceHooks: append([]string(nil), src.Skills.AllowedMarketplaceHooks...), + Marketplace: src.Skills.Marketplace, + }, + Hooks: aghconfig.HooksConfig{ + Declarations: cloneHookDecls(src.Hooks.Declarations), }, } } @@ -98,6 +105,7 @@ func cloneAgentDefs(src []aghconfig.AgentDef) []aghconfig.AgentDef { Tools: append([]string(nil), agent.Tools...), Permissions: agent.Permissions, MCPServers: cloneMCPServers(agent.MCPServers), + Hooks: cloneHookDecls(agent.Hooks), Prompt: agent.Prompt, }) } @@ -142,3 +150,28 @@ func cloneStringMap(src map[string]string) map[string]string { } return cloned } + +func cloneHookDecls(src []hookspkg.HookDecl) []hookspkg.HookDecl { + if len(src) == 0 { + return nil + } + + cloned := make([]hookspkg.HookDecl, 0, len(src)) + for _, decl := range src { + cloned = append(cloned, cloneHookDecl(decl)) + } + + return cloned +} + +func cloneHookDecl(src hookspkg.HookDecl) hookspkg.HookDecl { + cloned := src + cloned.Args = append([]string(nil), src.Args...) + cloned.Env = cloneStringMap(src.Env) + cloned.Metadata = cloneStringMap(src.Metadata) + if src.Matcher.ToolReadOnly != nil { + value := *src.Matcher.ToolReadOnly + cloned.Matcher.ToolReadOnly = &value + } + return cloned +}