diff --git a/.agents/skills/kodebase/SKILL.md b/.agents/skills/kodebase/SKILL.md new file mode 100644 index 000000000..d78ca90b2 --- /dev/null +++ b/.agents/skills/kodebase/SKILL.md @@ -0,0 +1,251 @@ +--- +name: kodebase +description: Generates Obsidian knowledge vaults from source code repositories, inspects code metrics such as complexity, coupling, blast radius, dead code, and circular dependencies, indexes vault content for hybrid retrieval, and searches indexed vaults with lexical or vector queries. Use when analyzing a codebase for code quality, architecture health, symbol relationships, or code smells. Use when the task mentions kodebase, code vault, code knowledge base, code graph analysis, or code metrics inspection. Do not use for general code review, linting, formatting, building Go projects, or writing application code. +--- + +# Kodebase CLI + +## Prerequisites + +1. Verify the kodebase binary is available: + ``` + kodebase version + ``` +2. For search and index commands, verify QMD is installed: + ``` + qmd --version + ``` + If missing, install with `npm install -g @tobilu/qmd`. +3. Supported source languages: TypeScript (`.ts`), TSX (`.tsx`), JavaScript (`.js`), JSX (`.jsx`), Go (`.go`). + +## Workflow Overview + +Kodebase operates as a pipeline. The `generate` command must run before any other command. + +**Workflow A -- Code Analysis (no QMD required):** +``` +kodebase generate --> kodebase inspect +``` + +**Workflow B -- Full Pipeline (requires QMD):** +``` +kodebase generate --> kodebase index --> kodebase search +``` + +The vault is stored at `/.kodebase/vault//` by default. Later commands auto-discover this vault by walking up from the current working directory. + +## Command Dispatch + +Map the user's intent to the correct command: + +| Intent | Command | +|--------|---------| +| Analyze a repository for the first time | `kodebase generate --progress never` | +| Find code smells | `kodebase inspect smells --format json` | +| Find dead exports and orphan files | `kodebase inspect dead-code --format json` | +| Rank functions by complexity | `kodebase inspect complexity --format json` | +| Find high-impact symbols (blast radius) | `kodebase inspect blast-radius --min 5 --format json` | +| Find unstable files (coupling) | `kodebase inspect coupling --unstable --format json` | +| Find circular imports | `kodebase inspect circular-deps --format json` | +| Look up a specific symbol | `kodebase inspect symbol --format json` | +| Look up a specific file | `kodebase inspect file --format json` | +| Find what depends on X (incoming refs) | `kodebase inspect backlinks --format json` | +| Find what X depends on (outgoing deps) | `kodebase inspect deps --format json` | +| Search the codebase knowledge | `kodebase search "" --format json` | +| Index vault for search | `kodebase index` | + +## Step 1: Generate the Vault + +Run the generate command to create the knowledge vault from source code. + +``` +kodebase generate --progress never +``` + +Always use `--progress never` in agent contexts to prevent TTY progress bars from corrupting stdout. + +Parse the JSON output from stdout to extract key values: +- `topicSlug` -- the topic identifier for later commands +- `vaultPath` -- absolute path to the vault root +- `topicPath` -- absolute path to the topic directory +- `filesScanned`, `filesParsed`, `symbolsExtracted` -- summary statistics +- `diagnostics` -- check for warnings or errors + +Stderr carries structured stage logs. Do not treat stderr content as failure evidence. + +Key flags: +- `--output ` -- override vault root location +- `--topic ` -- override the topic slug +- `--include ` -- re-include paths that would otherwise be ignored (repeatable) +- `--exclude ` -- exclude additional paths from scanning (repeatable) +- `--semantic` -- enable semantic analysis when adapters support it + +Read `references/cli-generate.md` for the full flag table and output schema. + +## Step 2: Inspect the Vault + +Run inspect subcommands to analyze code quality and architecture. + +**Shared flags for all inspect subcommands:** +- `--format json` -- always use JSON for programmatic parsing +- `--vault ` -- explicit vault root (omit to auto-discover from cwd) +- `--topic ` -- explicit topic slug (omit if only one topic exists) + +### Tabular Subcommands + +These return a list of rows sorted by the primary metric: + +1. **smells** -- List symbols and files with detected code smells. + ``` + kodebase inspect smells --format json + kodebase inspect smells --type high-complexity --format json + ``` + +2. **dead-code** -- List dead exports (symbols with no incoming references) and orphan files (unreachable files). + ``` + kodebase inspect dead-code --format json + ``` + +3. **complexity** -- Rank functions/methods by cyclomatic complexity. Default top 20. + ``` + kodebase inspect complexity --format json + kodebase inspect complexity --top 50 --format json + ``` + +4. **blast-radius** -- Rank symbols by transitive dependent count. + ``` + kodebase inspect blast-radius --format json + kodebase inspect blast-radius --min 10 --top 20 --format json + ``` + +5. **coupling** -- Rank files by instability (Ce / (Ca + Ce)). + ``` + kodebase inspect coupling --format json + kodebase inspect coupling --unstable --format json + ``` + +6. **circular-deps** -- List files participating in circular import chains. Returns a message row if no cycles exist. + ``` + kodebase inspect circular-deps --format json + ``` + +### Detail Lookup Subcommands + +These return field-value pairs for a single matched entity: + +7. **symbol \** -- Case-insensitive substring match. Returns detail fields for a single match, or a summary table for multiple matches. + ``` + kodebase inspect symbol parseConfig --format json + ``` + +8. **file \** -- Exact source path lookup. Use the source-relative path as stored in vault frontmatter. + ``` + kodebase inspect file src/config.ts --format json + ``` + +### Relation Subcommands + +These return relation edges (`target_path`, `type`, `confidence`): + +9. **backlinks \** -- Incoming references. Accepts a symbol name or file path. + ``` + kodebase inspect backlinks parseConfig --format json + ``` + +10. **deps \** -- Outgoing dependencies. Accepts a symbol name or file path. + ``` + kodebase inspect deps src/config.ts --format json + ``` + +Read `references/cli-inspect.md` for all column schemas and flag details. + +## Step 3: Index the Vault (Optional) + +Index the vault content into QMD for search. This step requires QMD on PATH. + +``` +kodebase index +``` + +The command is idempotent: it checks whether the collection already exists and chooses `add` (create) or `update` (refresh) automatically. + +Key flags: +- `--embed` (default true) -- run embedding after syncing files +- `--force-embed` -- force re-embedding all documents +- `--context ` -- attach human context to improve search relevance +- `--name ` -- override the derived collection name + +Parse the JSON output for: +- `updateResult.indexed` -- number of documents indexed +- `status.totalDocuments` -- total documents in the collection +- `status.hasVectorIndex` -- whether vector search is available +- `embedResult` -- embedding summary (null if `--embed=false`) + +Read `references/cli-search-index.md` for the full output schema. + +## Step 4: Search the Vault (Optional) + +Search indexed vault content with QMD. Requires a prior `kodebase index` run. + +``` +kodebase search "" --format json +``` + +**Search modes:** +- Hybrid (default) -- combines lexical and vector search +- Lexical (`--lex`) -- BM25 keyword search only +- Vector (`--vec`) -- embedding-based semantic search + +The `--lex` and `--vec` flags are mutually exclusive. Omit both for hybrid mode. + +Key flags: +- `--limit N` (default 10) -- maximum results +- `--min-score N` -- minimum relevance threshold +- `--full` -- return full document content instead of snippets +- `--all` -- return all matches above the minimum score + +Output columns: `path`, `score`, `preview`. + +Read `references/cli-search-index.md` for full details and example invocations. + +## Output Format Selection + +All `inspect` and `search` commands support `--format`: +- **json** -- always use for programmatic parsing +- **table** -- human-readable aligned columns (default) +- **tsv** -- tab-separated for piping to Unix tools + +The `generate` and `index` commands always output JSON to stdout. + +Read `references/output-formats.md` for format examples and empty result handling. + +## Error Handling + +Common errors and recovery: + +| Error | Recovery | +|-------|----------| +| `unable to find a vault from ` | Run `kodebase generate ` first | +| `QMD is not available to kodebase` | Run `npm install -g @tobilu/qmd` | +| `no topics were found` | Run `kodebase generate` to populate the vault | +| `multiple topics were found` | Re-run with `--topic ` | +| `no symbols matched ""` | Use `inspect smells` or `inspect complexity` to discover valid names | +| `no file matched ""` | Use exact source-relative path from vault frontmatter | + +Read `references/error-handling.md` for the full error catalog with causes and recovery steps. + +## Constraints + +### MUST DO +- Run `kodebase generate` before any inspect, search, or index command +- Use `--format json` when parsing output programmatically +- Use `--progress never` when running `generate` in a non-interactive context +- Parse stdout only for command output; treat stderr as diagnostics +- Use the `topicSlug` from generate output for subsequent `--topic` flags + +### MUST NOT DO +- Pass both `--lex` and `--vec` to `search` +- Pass `--force-embed` with `--embed=false` to `index` +- Treat stderr content as failure evidence for `generate` +- Assume vault location without running `generate` or checking for `.kodebase/vault/` +- Use relative paths like `./src/config.ts` for `inspect file` -- use `src/config.ts` instead diff --git a/.agents/skills/kodebase/references/cli-generate.md b/.agents/skills/kodebase/references/cli-generate.md new file mode 100644 index 000000000..62e1b7c94 --- /dev/null +++ b/.agents/skills/kodebase/references/cli-generate.md @@ -0,0 +1,115 @@ +# Generate Command Reference + +## Usage + +``` +kodebase generate [flags] +``` + +The `` argument is the root directory of the source repository to analyze (required). + +## Flags + +| Flag | Type | Default | Description | +|------|------|---------|-------------| +| `--output` | string | `""` | Vault root where the generated topic will be written. Defaults to `/.kodebase/vault` | +| `--topic` | string | `""` | Override the generated topic slug (derived from directory name if omitted) | +| `--title` | string | `""` | Override the generated topic title | +| `--domain` | string | `""` | Override the generated topic domain | +| `--include` | string[] | `nil` | Re-include a path pattern that would otherwise be ignored; repeatable | +| `--exclude` | string[] | `nil` | Exclude an additional path pattern from scanning; repeatable | +| `--semantic` | bool | `false` | Enable semantic analysis when the underlying adapters support it | +| `--progress` | string | `auto` | Progress rendering mode: `auto`, `always`, or `never` | +| `--log-format` | string | `text` | Stderr event format: `text` or `json` | + +## Non-Interactive Usage + +When invoking from an agent context, always set `--progress never` to prevent TTY progress bars from corrupting stdout output. + +``` +kodebase generate /path/to/repo --progress never +``` + +## Pipeline Stages + +The generate pipeline executes these stages in order: + +1. **scan** -- Discover source files by language +2. **select_adapters** -- Choose language parsers (tree-sitter for TS/JS, Go parser) +3. **parse** -- Extract AST nodes, symbols, and relations +4. **normalize** -- Merge per-file graphs into a unified snapshot, resolve imports +5. **metrics** -- Compute complexity, coupling, blast radius, dead code, smells +6. **render** -- Generate markdown documents and Base definitions +7. **write** -- Persist vault files to disk + +## Supported Languages + +| Language | Extensions | Adapter | +|----------|-----------|---------| +| TypeScript | `.ts` | tree-sitter | +| TSX | `.tsx` | tree-sitter | +| JavaScript | `.js` | tree-sitter | +| JSX | `.jsx` | tree-sitter | +| Go | `.go` | tree-sitter | + +## Output Schema (GenerationSummary) + +The command writes JSON to stdout. Parse the following fields: + +``` +{ + "command": string, // always "generate" + "rootPath": string, // absolute path to the analyzed repository + "vaultPath": string, // absolute path to the vault root + "topicPath": string, // absolute path to the topic directory + "topicSlug": string, // topic identifier (use for --topic in later commands) + "filesScanned": int, // total files discovered + "filesParsed": int, // files successfully parsed + "filesSkipped": int, // files skipped (unsupported or excluded) + "symbolsExtracted": int, // total symbols extracted + "relationsEmitted": int, // total relation edges + "rawDocumentsWritten": int, // per-file markdown documents + "wikiDocumentsWritten": int, // concept wiki articles + "indexDocumentsWritten": int, // index pages + "timings": { + "scanMillis": int, + "selectAdaptersMillis": int, + "parseMillis": int, + "normalizeMillis": int, + "metricsMillis": int, + "renderMillis": int, + "writeMillis": int, + "totalMillis": int + }, + "diagnostics": [ // structured warnings/errors + { + "code": string, + "severity": "warning" | "error", + "stage": "scan" | "parse" | "render" | "write" | "validate", + "message": string, + "filePath": string?, + "language": string?, + "detail": string? + } + ] +} +``` + +## Vault Structure + +After generation, the vault directory contains: + +``` +// + raw-codebase/ # One markdown file per source file with frontmatter and code + wiki-concept/ # Compiled concept articles + wiki-index/ # Index pages for navigation + *.base # Obsidian Base view definitions (YAML) + CLAUDE.md # Topic marker file +``` + +## Default Path Derivation + +- If `--output` is omitted: vault path defaults to `/.kodebase/vault` +- If `--topic` is omitted: topic slug is derived from the repository directory name +- Full topic path: `//` diff --git a/.agents/skills/kodebase/references/cli-inspect.md b/.agents/skills/kodebase/references/cli-inspect.md new file mode 100644 index 000000000..e86ac9eb9 --- /dev/null +++ b/.agents/skills/kodebase/references/cli-inspect.md @@ -0,0 +1,281 @@ +# Inspect Command Reference + +## Usage + +``` +kodebase inspect [flags] +``` + +## Shared Flags (All Subcommands) + +| Flag | Type | Default | Description | +|------|------|---------|-------------| +| `--format` | string | `table` | Output format: `table`, `json`, or `tsv` | +| `--vault` | string | `""` | Vault root path (auto-discovered from cwd if omitted) | +| `--topic` | string | `""` | Topic slug inside the vault (auto-detected if only one topic exists) | + +## Vault Auto-Discovery + +When `--vault` is omitted, the CLI walks up from the current working directory looking for `.kodebase/vault/`. If `--topic` is omitted and only one topic exists, it is selected automatically. If multiple topics exist, the command fails with an error listing available slugs. + +--- + +## Subcommands + +### 1. smells + +List symbols and files with detected code smells. + +``` +kodebase inspect smells [--type ] [--format json] +``` + +**Flags:** `--type` (string) -- filter to a specific smell type (e.g., `long-function`, `high-complexity`, `dead-export`, `orphan-file`, `god-file`) + +**Output Columns:** + +| Column | Type | Description | +|--------|------|-------------| +| `kind` | string | `"symbol"` or `"file"` | +| `name` | string | Symbol name or file source path | +| `source_path` | string | Source-relative file path | +| `symbol_kind` | string | Symbol kind (empty for files) | +| `smells` | string[] | List of detected smell types | + +--- + +### 2. dead-code + +List dead exports and orphan files. + +``` +kodebase inspect dead-code [--format json] +``` + +**Output Columns:** + +| Column | Type | Description | +|--------|------|-------------| +| `kind` | string | `"symbol"` or `"file"` | +| `name` | string | Symbol name or file source path | +| `source_path` | string | Source-relative file path | +| `symbol_kind` | string | Symbol kind (empty for files) | +| `reason` | string | `"dead-export"` or `"orphan-file"` | +| `smells` | string[] | List of detected smell types | + +--- + +### 3. complexity + +Rank functions by cyclomatic complexity (descending). + +``` +kodebase inspect complexity [--top N] [--format json] +``` + +**Flags:** `--top` (int, default 20) -- maximum number of rows to return + +**Output Columns:** + +| Column | Type | Description | +|--------|------|-------------| +| `symbol_name` | string | Function or method name | +| `symbol_kind` | string | `"function"` or `"method"` | +| `source_path` | string | Source-relative file path | +| `cyclomatic_complexity` | int | Cyclomatic complexity score | +| `loc` | int | Lines of code | +| `blast_radius` | int | Transitive dependents count | +| `smells` | string[] | Detected smell types | + +--- + +### 4. blast-radius + +Rank symbols by blast radius (how many symbols transitively depend on a given symbol). + +``` +kodebase inspect blast-radius [--min N] [--top N] [--format json] +``` + +**Flags:** +- `--min` (int, default 0) -- minimum blast radius threshold +- `--top` (int, default 0) -- maximum rows to return (0 = all) + +**Output Columns:** + +| Column | Type | Description | +|--------|------|-------------| +| `symbol_name` | string | Symbol name | +| `source_path` | string | Source-relative file path | +| `blast_radius` | int | Count of unique transitive dependents | +| `centrality` | float | Betweenness centrality score (0-1) | +| `external_reference_count` | int | References from outside the symbol's module | +| `smells` | string[] | Detected smell types | + +--- + +### 5. coupling + +Rank files by instability (Martin coupling metric). + +``` +kodebase inspect coupling [--unstable] [--format json] +``` + +**Flags:** `--unstable` (bool) -- only show files with instability > 0.5 + +**Output Columns:** + +| Column | Type | Description | +|--------|------|-------------| +| `source_path` | string | Source-relative file path | +| `afferent_coupling` | int | Files that import this file (Ca) | +| `efferent_coupling` | int | Files this file imports (Ce) | +| `instability` | float | Ce / (Ca + Ce); 1.0 = completely unstable | +| `has_circular_dependency` | bool | Participates in a circular import chain | +| `smells` | string[] | Detected smell types | + +--- + +### 6. symbol \ + +Lookup symbols by case-insensitive substring match. + +``` +kodebase inspect symbol [--format json] +``` + +**Behavior:** +- **No matches:** Returns error with suggestion to use `inspect smells` or `inspect complexity` +- **Single match:** Returns detailed field-value pairs (see detail output below) +- **Multiple matches:** Returns summary table + +**Summary Table Columns** (multiple matches): + +| Column | Type | Description | +|--------|------|-------------| +| `symbol_name` | string | Symbol name | +| `symbol_kind` | string | Symbol kind | +| `source_path` | string | Source-relative file path | +| `start_line` | int | Start line in source | +| `language` | string | Source language | +| `smells` | string[] | Detected smell types | + +**Detail Fields** (single match): + +| Field | Type | +|-------|------| +| `relative_path` | string | +| `symbol_name` | string | +| `symbol_kind` | string | +| `source_path` | string | +| `language` | string | +| `exported` | bool | +| `start_line` | int | +| `end_line` | int | +| `signature` | string | +| `loc` | int | +| `blast_radius` | int | +| `centrality` | float | +| `cyclomatic_complexity` | int | +| `external_reference_count` | int | +| `is_dead_export` | bool | +| `is_long_function` | bool | +| `smells` | string[] | +| `outgoing_relations` | relation[] | +| `backlinks` | relation[] | + +Each relation entry has: `target_path` (string), `type` (string: imports|calls|references), `confidence` (string: semantic|syntactic). + +--- + +### 7. file \ + +Lookup a file by its exact source path. + +``` +kodebase inspect file [--format json] +``` + +**Detail Fields:** + +| Field | Type | +|-------|------| +| `relative_path` | string | +| `source_path` | string | +| `language` | string | +| `symbol_count` | int | +| `symbols` | string[] (name + kind pairs) | +| `afferent_coupling` | int | +| `efferent_coupling` | int | +| `instability` | float | +| `is_orphan_file` | bool | +| `is_god_file` | bool | +| `has_circular_dependency` | bool | +| `smells` | string[] | +| `outgoing_relations` | relation[] | +| `backlinks` | relation[] | + +--- + +### 8. backlinks \ + +Show incoming references for a symbol or file. + +``` +kodebase inspect backlinks [--format json] +``` + +**Entity Resolution:** Tries exact file path match first, falls back to single symbol name match. + +**Output Columns:** + +| Column | Type | Description | +|--------|------|-------------| +| `target_path` | string | Path of the referencing entity | +| `type` | string | Relation type: `imports`, `calls`, `references` | +| `confidence` | string | `semantic` or `syntactic` | + +--- + +### 9. deps \ + +Show outgoing dependencies for a symbol or file. + +``` +kodebase inspect deps [--format json] +``` + +**Entity Resolution:** Same as backlinks (file path first, then symbol name). + +**Output Columns:** + +| Column | Type | Description | +|--------|------|-------------| +| `target_path` | string | Path of the dependency | +| `type` | string | Relation type: `imports`, `calls`, `references` | +| `confidence` | string | `semantic` or `syntactic` | + +--- + +### 10. circular-deps + +List files that participate in circular dependencies. + +``` +kodebase inspect circular-deps [--format json] +``` + +**Behavior:** +- If cycles exist: returns a table of participating files +- If no cycles: returns `{"message": "no circular dependencies found"}` + +**Output Columns** (when cycles exist): + +| Column | Type | Description | +|--------|------|-------------| +| `source_path` | string | Source-relative file path | +| `afferent_coupling` | int | Files that import this file | +| `efferent_coupling` | int | Files this file imports | +| `instability` | float | Coupling instability metric | +| `smells` | string[] | Detected smell types | diff --git a/.agents/skills/kodebase/references/cli-search-index.md b/.agents/skills/kodebase/references/cli-search-index.md new file mode 100644 index 000000000..d2c110f3f --- /dev/null +++ b/.agents/skills/kodebase/references/cli-search-index.md @@ -0,0 +1,165 @@ +# Search and Index Command Reference + +Both commands require the QMD binary on PATH. Install with `npm install -g @tobilu/qmd`. + +--- + +## Search Command + +### Usage + +``` +kodebase search [flags] +``` + +The `` argument is the search text (required, non-empty). + +### Flags + +| Flag | Type | Default | Description | +|------|------|---------|-------------| +| `--lex` | bool | `false` | Use BM25 keyword search only | +| `--vec` | bool | `false` | Use vector similarity search only | +| `--limit` | int | `10` | Maximum number of results to return | +| `--min-score` | float | `0` | Minimum score threshold for returned matches | +| `--full` | bool | `false` | Show the full matched document content instead of snippets | +| `--all` | bool | `false` | Return all matches above the minimum score threshold | +| `--collection` | string | `""` | Use an explicit QMD collection name instead of deriving from the topic | +| `--format` | string | `table` | Output format: `table`, `json`, or `tsv` | +| `--vault` | string | `""` | Vault root path (used when deriving the collection name) | +| `--topic` | string | `""` | Topic slug (used when deriving the collection name) | + +### Search Modes + +| Mode | Flag | QMD Command | Description | +|------|------|-------------|-------------| +| Hybrid | (default) | `query` | Combines lexical and vector search | +| Lexical | `--lex` | `search` | BM25 keyword search only | +| Vector | `--vec` | `vsearch` | Embedding-based semantic search | + +The `--lex` and `--vec` flags are mutually exclusive. Omit both for hybrid mode. + +### Output Columns + +| Column | Type | Description | +|--------|------|-------------| +| `path` | string | Vault-relative path of the matched document | +| `score` | float | Relevance score | +| `preview` | string | Snippet of matched content (or full content if `--full` is set) | + +### Collection Name Derivation + +When `--collection` is omitted, the collection name is derived from the topic slug: +1. Resolve the vault and topic (same logic as inspect commands) +2. Use the `topicSlug` as the collection name + +### Example Invocations + +```bash +# Hybrid search (default) +kodebase search "authentication middleware" --format json + +# Lexical search with higher result limit +kodebase search "parseConfig" --lex --limit 20 --format json + +# Vector search with score threshold +kodebase search "error handling patterns" --vec --min-score 0.5 --format json + +# Full document content +kodebase search "auth" --full --format json + +# Explicit collection name +kodebase search "auth" --collection my-project --format json +``` + +--- + +## Index Command + +### Usage + +``` +kodebase index [flags] +``` + +Alias: `kodebase index-vault` + +### Flags + +| Flag | Type | Default | Description | +|------|------|---------|-------------| +| `--vault` | string | `""` | Vault root path | +| `--topic` | string | `""` | Topic slug inside the vault | +| `--name` | string | `""` | Override the derived QMD collection name | +| `--embed` | bool | `true` | Run embedding after syncing files | +| `--force-embed` | bool | `false` | Force re-embedding all documents | +| `--context` | string | `""` | Attach human-written collection context to improve search relevance | + +### Idempotent Behavior + +The index command is idempotent. It checks `qmd status` first and selects the operation: +- If the collection already exists: performs an **update** (syncs changes) +- If the collection does not exist: performs an **add** (creates and populates) + +Run `kodebase index` repeatedly without side effects. + +### Output Schema (indexResultPayload) + +``` +{ + "collectionName": string, // QMD collection name (= topic slug or --name override) + "embedRequested": bool, // whether --embed was true + "embedResult": { // present only if embedding was performed + "docsProcessed": int, + "chunksEmbedded": int, + "errors": int, + "durationMs": int + }, + "forceEmbed": bool, // whether --force-embed was set + "status": { + "collection": { // null if collection was just created + "name": string, + "path": string, + "pattern": string, + "documents": int, + "lastUpdated": string + }, + "hasVectorIndex": bool, + "needsEmbedding": int, + "totalDocuments": int + }, + "topicPath": string, // absolute path to the topic directory + "topicSlug": string, // topic identifier + "updateResult": { + "collections": int, + "indexed": int, + "updated": int, + "unchanged": int, + "removed": int, + "needsEmbedding": int + }, + "vaultPath": string // absolute path to the vault root +} +``` + +### Example Invocations + +```bash +# Index with default settings (embed enabled) +kodebase index + +# Index with custom context for search relevance +kodebase index --context "React application with Redux state management" + +# Force re-embedding all documents +kodebase index --force-embed + +# Index without embedding (sync files only) +kodebase index --embed=false + +# Index with explicit vault and topic +kodebase index --vault /path/to/vault --topic my-project + +# Index with custom collection name +kodebase index --name custom-collection +``` diff --git a/.agents/skills/kodebase/references/error-handling.md b/.agents/skills/kodebase/references/error-handling.md new file mode 100644 index 000000000..ffe44ed6f --- /dev/null +++ b/.agents/skills/kodebase/references/error-handling.md @@ -0,0 +1,59 @@ +# Error Handling Reference + +Categorized error messages from kodebase CLI with causes and recovery steps. + +## Vault Resolution Errors + +These occur when `inspect`, `search`, or `index` cannot locate a vault or topic. + +| Error Message | Cause | Recovery | +|---------------|-------|----------| +| `unable to find a vault from . walked up looking for .kodebase/vault/` | No `.kodebase/vault/` directory exists above the working directory | Run `kodebase generate ` first to create the vault | +| `Vault path was not found or is not a directory: ` | The `--vault` flag points to a nonexistent path | Verify the vault path exists and is a directory | +| `no topics were found in . expected child directories containing CLAUDE.md` | The vault directory exists but contains no generated topics | Run `kodebase generate ` to populate the vault | +| `multiple topics were found in : , ` | The vault contains more than one topic and no `--topic` flag was provided | Re-run the command with `--topic ` to select one | +| `topic name is required when topic is specified` | The `--topic` flag was provided but with an empty or whitespace-only value | Provide a non-empty topic slug | +| `Topic path was not found or is not a directory: ` | The `--topic` slug does not match any directory in the vault | Check available topic slugs inside the vault directory | + +## Inspect Lookup Errors + +These occur when `inspect symbol`, `inspect file`, `inspect backlinks`, or `inspect deps` cannot resolve the target entity. + +| Error Message | Cause | Recovery | +|---------------|-------|----------| +| `no symbols matched ""` | No symbol name contains the query as a case-insensitive substring | Use `kodebase inspect smells` or `kodebase inspect complexity` to discover valid symbol names | +| `multiple symbols matched "": , ` | More than one symbol matched the query | Re-run with a more specific query string | +| `no file matched ""` | No file in the vault has the given `source_path` value | Use the exact source-relative path as stored in vault frontmatter (e.g., `src/config.ts` not `./src/config.ts`) | +| `no symbol or file matched ""` | The query matched neither a file source path nor a symbol name | Re-run with a specific symbol name or an exact source path | + +## QMD Errors + +These occur when `search` or `index` cannot communicate with the QMD binary. + +| Error Message | Cause | Recovery | +|---------------|-------|----------| +| `: QMD is not available to kodebase. Install it with 'npm install -g @tobilu/qmd' and ensure 'qmd' is on PATH` | The `qmd` binary was not found on the system PATH | Run `npm install -g @tobilu/qmd` and verify with `qmd --version` | +| `: ` | QMD returned an error during execution | Read the stderr diagnostics from QMD for details; common causes include missing collections or corrupted index files | + +## Flag Validation Errors + +These occur before any command execution when flag combinations are invalid. + +| Error Message | Cause | Recovery | +|---------------|-------|----------| +| `choose at most one search mode flag: --lex or --vec` | Both `--lex` and `--vec` were provided to `search` | Use only one mode selector, or omit both for hybrid mode | +| `--force-embed cannot be used together with --embed=false` | Contradictory embedding flags on `index` | Remove `--force-embed` or set `--embed=true` | +| `--limit must be >= 1. received ` | The `--limit` flag on `search` was set to zero or negative | Provide a positive integer for `--limit` | +| `--min-score must be >= 0. received ` | The `--min-score` flag on `search` was set to a negative value | Provide a non-negative value for `--min-score` | +| `--top must be >= 1. received ` | The `--top` flag on `inspect complexity` was set to zero or negative | Provide a positive integer for `--top` | +| `--min must be >= 0. received ` | The `--min` flag on `inspect blast-radius` was set to negative | Provide a non-negative integer for `--min` | +| `invalid --format "": expected one of "table", "json", "tsv"` | An unsupported format string was provided | Use `table`, `json`, or `tsv` | + +## General Errors + +| Error Message | Cause | Recovery | +|---------------|-------|----------| +| `a search query is required` | Empty or whitespace-only query passed to `search` | Provide a non-empty search query string | +| `a symbol name is required` | Empty query passed to `inspect symbol` | Provide a non-empty symbol name | +| `a file path is required` | Empty path passed to `inspect file` | Provide a non-empty source path | +| `a symbol name or file path is required` | Empty query passed to `inspect backlinks` or `inspect deps` | Provide a non-empty symbol name or file path | diff --git a/.agents/skills/kodebase/references/output-formats.md b/.agents/skills/kodebase/references/output-formats.md new file mode 100644 index 000000000..443ced26d --- /dev/null +++ b/.agents/skills/kodebase/references/output-formats.md @@ -0,0 +1,169 @@ +# Output Format Reference + +All `inspect` and `search` commands support three output formats via `--format`. + +## Format Selection + +| Format | Flag | Use Case | +|--------|------|----------| +| table | `--format table` | Human-readable display (default) | +| json | `--format json` | Programmatic parsing by agents | +| tsv | `--format tsv` | Piping to Unix tools | + +Always use `--format json` when parsing output programmatically. + +## Inspect Output (Tabular Commands) + +Tabular inspect commands (`smells`, `dead-code`, `complexity`, `blast-radius`, `coupling`, `circular-deps`) return rows with typed columns. + +### JSON Example (`inspect complexity --top 2 --format json`) + +```json +[ + { + "symbol_name": "parseConfig", + "symbol_kind": "function", + "source_path": "src/config.ts", + "cyclomatic_complexity": 12, + "loc": 45, + "blast_radius": 8, + "smells": ["high-complexity"] + }, + { + "symbol_name": "resolveImports", + "symbol_kind": "function", + "source_path": "src/resolver.ts", + "cyclomatic_complexity": 9, + "loc": 32, + "blast_radius": 5, + "smells": [] + } +] +``` + +### TSV Example + +``` +symbol_name symbol_kind source_path cyclomatic_complexity loc blast_radius smells +parseConfig function src/config.ts 12 45 8 high-complexity +resolveImports function src/resolver.ts 9 32 5 +``` + +## Inspect Output (Detail Commands) + +Detail commands (`symbol`, `file`) return field-value pairs when a single entity matches. + +### JSON Example (`inspect symbol parseConfig --format json`) + +```json +[ + {"field": "symbol_name", "value": "parseConfig"}, + {"field": "symbol_kind", "value": "function"}, + {"field": "source_path", "value": "src/config.ts"}, + {"field": "loc", "value": 45}, + {"field": "blast_radius", "value": 8}, + {"field": "smells", "value": ["high-complexity"]}, + {"field": "outgoing_relations", "value": [ + {"target_path": "src/utils.ts", "type": "imports", "confidence": "syntactic"} + ]}, + {"field": "backlinks", "value": [ + {"target_path": "src/main.ts", "type": "calls", "confidence": "semantic"} + ]} +] +``` + +## Generate Output + +`generate` always outputs JSON to stdout (no `--format` flag). + +```json +{ + "command": "generate", + "rootPath": "/path/to/repo", + "vaultPath": "/path/to/repo/.kodebase/vault", + "topicPath": "/path/to/repo/.kodebase/vault/my-project", + "topicSlug": "my-project", + "filesScanned": 120, + "filesParsed": 95, + "filesSkipped": 25, + "symbolsExtracted": 430, + "relationsEmitted": 1200, + "rawDocumentsWritten": 95, + "wikiDocumentsWritten": 12, + "indexDocumentsWritten": 5, + "timings": { + "scanMillis": 45, + "selectAdaptersMillis": 2, + "parseMillis": 1200, + "normalizeMillis": 80, + "metricsMillis": 150, + "renderMillis": 300, + "writeMillis": 200, + "totalMillis": 1977 + }, + "diagnostics": [] +} +``` + +## Search Output + +### JSON Example (`search "auth middleware" --format json`) + +```json +[ + { + "path": "raw-codebase/src/auth/middleware.md", + "score": 0.89, + "preview": "Authentication middleware that validates JWT tokens..." + } +] +``` + +## Index Output + +`index` always outputs JSON to stdout (no `--format` flag). + +```json +{ + "collectionName": "my-project", + "embedRequested": true, + "embedResult": { + "docsProcessed": 95, + "chunksEmbedded": 320, + "errors": 0, + "durationMs": 4500 + }, + "forceEmbed": false, + "status": { + "collection": { + "name": "my-project", + "path": "qmd://collections/my-project", + "pattern": "", + "documents": 95, + "lastUpdated": "2026-04-10T12:00:00Z" + }, + "hasVectorIndex": true, + "needsEmbedding": 0, + "totalDocuments": 95 + }, + "topicPath": "/path/to/vault/my-project", + "topicSlug": "my-project", + "updateResult": { + "collections": 1, + "indexed": 95, + "updated": 0, + "unchanged": 0, + "removed": 0, + "needsEmbedding": 95 + }, + "vaultPath": "/path/to/vault" +} +``` + +## Empty Results + +| Format | Empty Output | +|--------|-------------| +| json | `[]` | +| table | `No results.` followed by newline | +| tsv | Header row only (no data rows) | diff --git a/.codex/plans/2026-04-10-kb-refac-full-sweep.md b/.codex/plans/2026-04-10-kb-refac-full-sweep.md new file mode 100644 index 000000000..658ed8c03 --- /dev/null +++ b/.codex/plans/2026-04-10-kb-refac-full-sweep.md @@ -0,0 +1,50 @@ +# KB Refactor Full Sweep + +## Summary + +- Execute the entire `kb-refac` scope in phased checkpoints, not a single big-bang batch. +- Treat the techspec as the scope authority, but treat the live codebase as the implementation authority for exact file sizes, helper names, and stale counts. +- Fix root causes: composition-root concentration, duplicated session-start flow, repetitive ACP dispatch, oversized hook and transport surfaces, dead exported API surface, and repeated workspace primitives. +- Keep runtime behavior and wire formats stable throughout; each phase closes only after its own verification gate and without temporary bridge code left behind. + +## Key Changes + +1. Phase 1: safe cleanup and task artifact sync + - Remove confirmed dead exports and unused shadcn UI components listed in the techspec. + - Unexport test-only production helpers and replace callers with direct constructors or same-package helpers instead of keeping test-facing production API. + - Create the missing `kb-refac` ADR files referenced by the techspec and update the techspec only when implementation proves a claim stale. +2. Phase 2: session lifecycle dedup + - Extract one private `startSession` pipeline shared by create and resume. + - Keep `Create` and `Resume` limited to source-specific preamble logic that prepares a `sessionStartSpec`. +3. Phase 3: ACP and daemon orchestration + - Replace ACP inbound switch dispatch with a typed registry plus a small decode/execute helper. + - Split ACP driver startup into subprocess spawn, ACP connection initialization, and session negotiation helpers. + - Refactor daemon construction into `applyDefaults()` plus boot phases backed by a `bootState` and cleanup stack. +4. Phase 4: hook boundary reduction + - Split hook dispatch implementation by responsibility before changing interfaces. + - Replace the single 21-method session hook dependency with grouped domain subinterfaces collected in a hook-set container injected into the session manager. + - Provide no-op group defaults so tests only implement the groups they exercise. +5. Phase 5: transport, registry, and shared value objects + - Split the large CLI skill, HTTP server, and skills registry files by concern while keeping package boundaries and external entrypoints unchanged. + - Flatten session SSE streaming into helpers for backlog replay, polling, and stop-event emission. + - Move CLI SSE decoding into a neutral shared package. + - Introduce a tiny neutral workspace-reference package reused across payload types without changing external JSON field names. + +## Interfaces And Types + +- Replace the current aggregate session hook dependency with grouped internal interfaces plus a hook-set container. +- Add private orchestration types such as `sessionStartSpec` and `bootState`. +- Add a neutral shared workspace-reference value object reused across payload types without changing external JSON field names. + +## Test Plan + +- End every phase with targeted package tests first, then `make verify`. +- Run `make test-integration` after phases that change daemon, session, hooks, ACP startup, or API transport behavior. +- Verify create vs resume parity, failed-start cleanup, ACP invalid-params and method-not-found handling, daemon boot cleanup ordering, hook mutation and denial behavior, HTTP route coverage parity, SSE replay/poll/stop behavior, and JSON compatibility for embedded workspace refs. +- Verify frontend cleanup with `make web-test` and `make web-build` after unused UI component removal. + +## Assumptions + +- Scope is the full sweep from the techspec, including unused UI cleanup and task-artifact synchronization. +- Delivery is phased; no phase is complete if it leaves behind transitional bridge code. +- The missing `kb-refac` ADR references should be created unless implementation proves one obsolete. diff --git a/.compozy/tasks/ext-architecture/_examples.md b/.compozy/tasks/ext-architecture/_examples.md new file mode 100644 index 000000000..65f8547ae --- /dev/null +++ b/.compozy/tasks/ext-architecture/_examples.md @@ -0,0 +1,824 @@ +# Extension Architecture — High-Level API Examples + +Examples showing how extension authors interact with AGH using subprocess extensions (Go and TypeScript). + +--- + +## 1. Subprocess Hook Extension (Go) + +A content safety validator that blocks prompts containing secrets. + +### Manifest (`extension.toml`) + +```toml +[extension] +name = "secret-guard" +version = "0.1.0" +description = "Blocks prompts that contain API keys or secrets" +min_agh_version = "0.5.0" + +[capabilities] +provides = ["content.validate"] + +[[resources.hooks]] +name = "secret-guard-hook" +event = "input.pre_submit" +mode = "sync" +executor.kind = "subprocess" +executor.command = "./bin/secret-guard" +executor.args = ["--hook", "input_pre_submit"] + +[subprocess] +command = "./bin/secret-guard" +args = ["serve"] + +[security] +capabilities = ["message.read"] +``` + +### Extension Code (Go) + +```go +package main + +import ( + "context" + "strings" + + agh "github.com/anthropics/agh/sdk/go" // placeholder — final module path TBD +) + +func main() { + ext := agh.NewExtension(agh.ExtensionConfig{ + Name: "secret-guard", + Version: "0.1.0", + }) + + patterns := []string{"sk-", "AKIA", "ghp_", "-----BEGIN RSA"} + + ext.Handle("execute_hook", func(ctx context.Context, params agh.HookPayload) (*agh.HookResult, error) { + for _, pat := range patterns { + if strings.Contains(params.Message, pat) { + return &agh.HookResult{ + Allow: false, + Reason: fmt.Sprintf("Message contains a potential secret (pattern: %s)", pat), + }, nil + } + } + return &agh.HookResult{Allow: true}, nil + }) + + ext.Start() +} +``` + +### Build & Install + +```bash +# Build +go build -o bin/secret-guard . + +# Install +agh extension install ./secret-guard/ +``` + +--- + +## 2. Subprocess Extension — Memory Backend (Go) + +A pgvector-backed memory backend that replaces AGH's default SQLite memory. + +### Manifest (`extension.toml`) + +```toml +[extension] +name = "pgvector-memory" +version = "0.2.0" +description = "PostgreSQL pgvector semantic memory backend" +type = "subprocess" +min_agh_version = "0.5.0" + +[capabilities] +provides = ["memory.backend"] + +[actions] +requires = [ + "sessions/list", + "sessions/events", +] + +[subprocess] +command = "./bin/agh-ext-pgvector" +args = ["serve"] +health_check_interval = "30s" +shutdown_timeout = "10s" + +[subprocess.env] +DATABASE_URL = "{{env:PGVECTOR_DATABASE_URL}}" + +[resources] +skills = ["skills/"] + +[security] +capabilities = ["memory.read", "memory.write", "session.read"] +``` + +### Extension Code (Go) + +```go +package main + +import ( + "context" + "log" + + agh "github.com/anthropics/agh/sdk/go" // placeholder — final module path TBD +) + +func main() { + ext := agh.NewExtension(agh.ExtensionConfig{ + Name: "pgvector-memory", + Version: "0.2.0", + Capabilities: []string{"memory.backend"}, + }) + + db := connectPgvector(os.Getenv("DATABASE_URL")) + + // Handle memory/store — AGH calls this when storing a memory + ext.Handle("memory/store", func(ctx context.Context, params agh.StoreParams) (*agh.StoreResult, error) { + embedding := embed(params.Content) + err := db.Insert(ctx, params.Key, params.Content, embedding, params.Tags) + if err != nil { + return nil, fmt.Errorf("pgvector store: %w", err) + } + return &agh.StoreResult{}, nil + }) + + // Handle memory/recall — AGH calls this when searching memory + ext.Handle("memory/recall", func(ctx context.Context, params agh.RecallParams) (*agh.RecallResult, error) { + embedding := embed(params.Query) + rows, err := db.Search(ctx, embedding, params.Limit) + if err != nil { + return nil, fmt.Errorf("pgvector recall: %w", err) + } + entries := make([]agh.MemoryEntry, len(rows)) + for i, r := range rows { + entries[i] = agh.MemoryEntry{Key: r.Key, Content: r.Content, Score: r.Score} + } + return &agh.RecallResult{Entries: entries}, nil + }) + + // Use Host API — read session events for context enrichment + ext.OnReady(func(host *agh.HostAPI) { + sessions, _ := host.Sessions.List(context.Background()) + log.Printf("pgvector-memory connected. %d active sessions.", len(sessions)) + }) + + // Handle health checks + ext.Handle("health_check", func(ctx context.Context, _ any) (*agh.HealthResult, error) { + err := db.Ping(ctx) + if err != nil { + return &agh.HealthResult{Healthy: false, Message: err.Error()}, nil + } + return &agh.HealthResult{Healthy: true}, nil + }) + + ext.Start() // Blocks, reads stdin, writes stdout +} +``` + +--- + +## 3. Subprocess Extension — Memory Backend (TypeScript) + +The same pgvector backend, but in TypeScript using `@agh/extension-sdk`. + +### Manifest (`extension.toml`) + +```toml +[extension] +name = "pgvector-memory-ts" +version = "0.2.0" +description = "PostgreSQL pgvector memory backend (TypeScript)" +type = "subprocess" +min_agh_version = "0.5.0" + +[capabilities] +provides = ["memory.backend"] + +[actions] +requires = ["sessions/list"] + +[subprocess] +command = "node" +args = ["dist/index.js"] +health_check_interval = "30s" + +[subprocess.env] +DATABASE_URL = "{{env:PGVECTOR_DATABASE_URL}}" + +[security] +capabilities = ["memory.read", "memory.write", "session.read"] +``` + +### Extension Code (TypeScript) + +```typescript +import { Extension, HostAPI, StoreParams, RecallParams } from '@agh/extension-sdk'; +import { PgVector } from './pgvector'; + +const ext = new Extension({ + name: 'pgvector-memory-ts', + version: '0.2.0', + capabilities: { provides: ['memory.backend'] }, + actions: { requires: ['sessions/list'] }, +}); + +const db = new PgVector(process.env.DATABASE_URL!); + +// Handle memory/store +ext.handle('memory/store', async (ctx, params: StoreParams) => { + const embedding = await embed(params.content); + await db.insert(params.key, params.content, embedding, params.tags); + return {}; +}); + +// Handle memory/recall +ext.handle('memory/recall', async (ctx, params: RecallParams) => { + const embedding = await embed(params.query); + const rows = await db.search(embedding, params.limit ?? 10); + return { + entries: rows.map(r => ({ + key: r.key, + content: r.content, + score: r.score, + })), + }; +}); + +// Use Host API on startup +ext.onReady(async (host: HostAPI) => { + const sessions = await host.sessions.list(); + console.error(`pgvector-memory-ts connected. ${sessions.length} active sessions.`); +}); + +// Health check +ext.handle('health_check', async () => { + const ok = await db.ping(); + return { healthy: ok }; +}); + +ext.start(); +``` + +--- + +## 4. Subprocess Hook Extension (TypeScript) + +A prompt enhancer that adds workspace context to every prompt. + +### Manifest (`extension.toml`) + +```toml +[extension] +name = "prompt-enhancer" +version = "0.1.0" +description = "Adds workspace context to every prompt" +min_agh_version = "0.5.0" + +[capabilities] +provides = ["prompt.provider"] + +[[resources.hooks]] +name = "workspace-context" +event = "prompt.post_assemble" +mode = "sync" +executor.kind = "subprocess" +executor.command = "node" +executor.args = ["dist/index.js", "--hook", "prompt_post_assemble"] + +[subprocess] +command = "node" +args = ["dist/index.js", "serve"] + +[security] +capabilities = ["message.read", "message.write"] +``` + +### Extension Code (TypeScript) + +```typescript +import { Extension } from '@agh/extension-sdk'; + +const ext = new Extension({ + name: 'prompt-enhancer', + version: '0.1.0', + capabilities: { provides: ['prompt.provider'] }, +}); + +ext.handle('execute_hook', async (ctx, params: { + session_id: string; + agent_name: string; + workspace_root: string; + prompt: string; +}) => { + return { + updated_prompt: `[Workspace: ${params.workspace_root}]\n\n${params.prompt}`, + }; +}); + +ext.start(); +``` + +--- + +## 5. Subprocess Extension — Observe Exporter + +Exports AGH events to OpenTelemetry. + +### Manifest (`extension.toml`) + +```toml +[extension] +name = "otel-exporter" +version = "1.0.0" +description = "Export AGH events to OpenTelemetry" +type = "subprocess" +min_agh_version = "0.5.0" + +[capabilities] +provides = ["observe.exporter"] + +[actions] +requires = ["observe/events", "observe/health", "sessions/list"] + +[subprocess] +command = "./bin/agh-ext-otel" +health_check_interval = "15s" + +[subprocess.env] +OTEL_ENDPOINT = "{{env:OTEL_ENDPOINT}}" +OTEL_SERVICE_NAME = "agh" + +[security] +capabilities = ["observe.read", "session.read"] +``` + +### Extension Code (Go) + +```go +package main + +import ( + "context" + "time" + + agh "github.com/anthropics/agh/sdk/go" // placeholder — final module path TBD + "go.opentelemetry.io/otel/exporters/otlp/otlptrace" +) + +func main() { + ext := agh.NewExtension(agh.ExtensionConfig{ + Name: "otel-exporter", + Version: "1.0.0", + Capabilities: []string{"observe.exporter"}, + }) + + exporter := initOTelExporter(os.Getenv("OTEL_ENDPOINT")) + + // Periodic poll for new events via Host API + ext.OnReady(func(ctx context.Context, host *agh.HostAPI) { + ticker := time.NewTicker(5 * time.Second) + defer ticker.Stop() + var lastSeen time.Time + + for { + select { + case <-ctx.Done(): + return + case <-ticker.C: + events, _ := host.Observe.Events(ctx, agh.EventsQuery{ + Since: &lastSeen, + Limit: 100, + }) + for _, ev := range events { + exporter.Export(ctx, toOTelSpan(ev)) + lastSeen = ev.Timestamp + } + } + } + }) + + ext.Handle("health_check", func(ctx context.Context, _ any) (*agh.HealthResult, error) { + return &agh.HealthResult{Healthy: exporter.IsConnected()}, nil + }) + + ext.Start() +} +``` + +--- + +## 6. Extension Bundling Resources + +An extension that bundles skills, agent definitions, and hook declarations together — like a Claude Code plugin. + +### Directory Structure + +``` +my-devops-pack/ + extension.toml + skills/ + k8s-troubleshoot.md # Skill: Kubernetes debugging + terraform-review.md # Skill: Terraform plan review + agents/ + devops-agent.md # Agent definition with custom system prompt + bin/ + agh-ext-devops # Subprocess binary (optional) +``` + +### Manifest (`extension.toml`) + +```toml +[extension] +name = "devops-pack" +version = "1.0.0" +description = "DevOps skills, agent, and incident response hooks" +type = "subprocess" +min_agh_version = "0.5.0" + +[resources] +skills = ["skills/"] +agents = ["agents/"] + +[[resources.hooks]] +name = "incident-notifier" +event = "session.post_stop" +mode = "async" +executor.kind = "subprocess" +executor.command = "./bin/agh-ext-devops" +executor.args = ["--hook", "incident-notify"] + +[[resources.hooks]] +name = "cost-guard" +event = "session.pre_create" +mode = "sync" +executor.kind = "subprocess" +executor.command = "./bin/agh-ext-devops" +executor.args = ["--hook", "cost-check"] + +[resources.mcp_servers] +[resources.mcp_servers.kubectl] +command = "mcp-kubectl" +args = ["--context", "production"] + +[capabilities] +provides = ["prompt.provider"] + +[actions] +requires = ["sessions/list", "sessions/events", "observe/health"] + +[subprocess] +command = "./bin/agh-ext-devops" +args = ["serve"] + +[security] +capabilities = ["session.read", "observe.read"] +``` + +### What happens on `agh extension install ./my-devops-pack/` + +``` +1. DISCOVER → Found extension.toml in ./my-devops-pack/ +2. PARSE → Manifest parsed: name=devops-pack, type=subprocess +3. VALIDATE → Version check OK, checksum verified, capabilities valid +4. REGISTER → + ├── Skills: k8s-troubleshoot.md, terraform-review.md → skills.Registry + ├── Agents: devops-agent.md → config.AgentDef resolution + ├── Hooks: incident-notifier, cost-guard → hooks.DeclarationProvider + └── MCP: kubectl server → MCPResolver +5. INITIALIZE → Launch ./bin/agh-ext-devops serve → handshake +6. ACTIVATE → Extension live. Hooks dispatching. Host API available. +``` + +--- + +## 7. Host API Usage Patterns + +### Pattern: Channel Adapter (extension creates sessions from external messages) + +```typescript +import { Extension, HostAPI } from '@agh/extension-sdk'; +import { SlackClient } from './slack'; + +const ext = new Extension({ + name: 'slack-adapter', + version: '0.1.0', + capabilities: { provides: [] }, + actions: { requires: ['sessions/create', 'sessions/prompt', 'sessions/stop'] }, +}); + +const slack = new SlackClient(process.env.SLACK_TOKEN!); + +ext.onReady(async (host: HostAPI) => { + // Listen for Slack messages + slack.onMessage(async (msg) => { + // Create a session per thread + const threadKey = `slack-${msg.channel}-${msg.thread_ts}`; + let sessionId = activeThreads.get(threadKey); + + if (!sessionId) { + // Create new AGH session + const result = await host.sessions.create({ + agent: 'default', + workspace: '/path/to/project', + }); + sessionId = result.session_id; + activeThreads.set(threadKey, sessionId); + } + + // Send the Slack message as a prompt + await host.sessions.prompt({ + session_id: sessionId, + message: msg.text, + }); + + // Stream events back to Slack + const events = await host.sessions.events({ + session_id: sessionId, + limit: 50, + }); + for (const ev of events) { + if (ev.type === 'message' && ev.data.role === 'assistant') { + await slack.reply(msg.channel, msg.thread_ts, ev.data.text); + } + } + }); +}); + +ext.start(); +``` + +### Pattern: Scheduled Task (extension creates sessions on a timer) + +```go +ext.OnReady(func(host *agh.HostAPI) { + // Every day at 9am, run a standup summary + scheduler.Every("0 9 * * *", func() { + result, err := host.Sessions.Create(ctx, agh.CreateParams{ + Agent: "standup-summarizer", + Prompt: "Summarize yesterday's git activity and open PRs", + }) + if err != nil { + log.Printf("standup session failed: %v", err) + return + } + log.Printf("standup session created: %s", result.SessionID) + }) +}) +``` + +### Pattern: Memory Enrichment (extension reads sessions to enrich memory) + +```go +ext.OnReady(func(host *agh.HostAPI) { + // Periodically scan completed sessions and extract learnings + ticker := time.NewTicker(1 * time.Hour) + go func() { + for range ticker.C { + sessions, _ := host.Sessions.List(ctx) + for _, s := range sessions { + if s.State != "stopped" { continue } + + events, _ := host.Sessions.Events(ctx, agh.EventsQuery{ + SessionID: s.ID, + Type: "tool_call", + }) + + insights := extractInsights(events) + for _, insight := range insights { + host.Memory.Store(ctx, agh.StoreParams{ + Key: fmt.Sprintf("learning-%s-%d", s.ID, i), + Content: insight, + Tags: []string{"auto-extracted", "session-learning"}, + }) + } + } + } + }() +}) +``` + +--- + +## 8. CLI Interaction + +```bash +# List installed extensions +$ agh extension list +NAME VERSION TYPE STATE TIER CAPABILITIES +secret-guard 0.1.0 subprocess active content.validate +pgvector-memory 0.2.0 subprocess active memory.backend +otel-exporter 1.0.0 subprocess active observe.exporter +devops-pack 1.0.0 subprocess active prompt.provider + +# Install from local path +$ agh extension install ./my-extension/ +✓ Manifest parsed: my-extension v0.1.0 (subprocess) +✓ Checksum verified +✓ Resources registered: 2 skills, 1 agent, 3 hooks +✓ Extension installed + +# Install from git URL +$ agh extension install github.com/user/agh-ext-pgvector@v0.2.0 +✓ Cloned and verified +✓ Extension installed + +# Disable without uninstalling +$ agh extension disable pgvector-memory +✓ pgvector-memory disabled (subprocess stopped) + +# Re-enable +$ agh extension enable pgvector-memory +✓ pgvector-memory enabled (subprocess started, handshake OK) + +# Check extension health +$ agh extension status pgvector-memory +Name: pgvector-memory +Version: 0.2.0 +Type: subprocess +State: active +PID: 42891 +Uptime: 2h 15m +Health: healthy (last check 12s ago) +Capabilities: memory.backend +Actions: sessions/list, memory/store, memory/recall +Resources: 2 skills +``` + +--- + +## 9. Testing Extensions + +### Unit Testing (TypeScript) + +```typescript +import { TestHarness } from '@agh/extension-sdk/testing'; +import { describe, it, expect } from 'vitest'; + +describe('pgvector-memory', () => { + const harness = new TestHarness(); + + // Mock Host API responses + harness.mockHostAPI('sessions/list', () => [ + { id: 'sess-1', name: 'test', agent: 'claude', state: 'active' }, + ]); + + it('stores and recalls memory', async () => { + const ext = await harness.loadExtension('./dist/index.js'); + + // Simulate AGH calling memory/store + const storeResult = await harness.call('memory/store', { + key: 'test-key', + content: 'The deploy script is at /scripts/deploy.sh', + tags: ['project-knowledge'], + }); + expect(storeResult).toEqual({}); + + // Simulate AGH calling memory/recall + const recallResult = await harness.call('memory/recall', { + query: 'where is the deploy script?', + limit: 5, + }); + expect(recallResult.entries).toHaveLength(1); + expect(recallResult.entries[0].content).toContain('deploy.sh'); + }); + + it('rejects unauthorized Host API calls', async () => { + const ext = await harness.loadExtension('./dist/index.js', { + capabilities: ['memory.read'], // no memory.write + }); + + // Extension tries to call memory/store via Host API + await expect( + harness.simulateHostAPICall('memory/store', { key: 'x', content: 'y' }) + ).rejects.toThrow('capability_denied: memory.write'); + }); +}); +``` + +### Integration Testing (Go) + +```go +func TestSubprocessExtensionEndToEnd(t *testing.T) { + t.Parallel() + dir := t.TempDir() + + // Build and install test extension + extDir := filepath.Join(dir, "test-ext") + installTestExtension(t, extDir, TestExtManifest{ + Name: "secret-guard", + Capabilities: []string{"content.validate"}, + }) + + // Create extension manager and start + registry := extension.NewRegistry(globalDB) + mgr := extension.NewManager(registry, extension.WithLogger(testLogger)) + require.NoError(t, mgr.Start(t.Context())) + defer mgr.Stop(t.Context()) + + // Verify extension loaded and handshake completed + info, err := mgr.Get("secret-guard") + require.NoError(t, err) + assert.Equal(t, "active", info.State) + + // Dispatch a hook via the existing hook system + payload := hooks.InputPreSubmitPayload{ + Message: "my key is sk-abc123", + } + result, err := hookDispatcher.DispatchInputPreSubmit(t.Context(), payload) + require.NoError(t, err) + + // Verify the hook blocked the message + assert.Contains(t, result.DenyReason, "secret") +} +``` + +--- + +## 10. JSON-RPC Protocol Example (Raw) + +What the bidirectional communication looks like on the wire between AGH and a subprocess extension. This matches the normative protocol spec in `_protocol.md`. + +``` +── AGH → Extension (initialize handshake) ────────────────────── +{"jsonrpc":"2.0","id":1,"method":"initialize","params":{ + "protocol_version":"1", + "supported_protocol_versions":["1"], + "agh_version":"0.5.0", + "extension":{"name":"pgvector-memory","version":"0.2.0","source_tier":"user"}, + "capabilities":{ + "provides":["memory.backend"], + "granted_actions":["sessions/list","sessions/events"], + "granted_security":["memory.read","memory.write","session.read"] + }, + "methods":{ + "daemon_requests":["execute_hook","health_check","shutdown"], + "extension_services":["memory/store","memory/recall","memory/forget"] + }, + "runtime":{ + "health_check_interval_ms":30000, + "health_check_timeout_ms":5000, + "shutdown_timeout_ms":10000, + "default_hook_timeout_ms":5000 + } +}} + +── Extension → AGH (initialize response) ────────────────────── +{"jsonrpc":"2.0","id":1,"result":{ + "protocol_version":"1", + "extension_info":{"name":"pgvector-memory","version":"0.2.0","sdk_name":"@agh/extension-sdk","sdk_version":"0.1.0"}, + "accepted_capabilities":{ + "provides":["memory.backend"], + "actions":["sessions/list","sessions/events"], + "security":["memory.read","memory.write","session.read"] + }, + "implemented_methods":["memory/store","memory/recall","memory/forget","health_check","shutdown"], + "supported_hook_events":[], + "supports":{"health_check":true,"provide_tools":false} +}} + +── AGH → Extension (daemon calls memory/store) ──────────────── +{"jsonrpc":"2.0","id":2,"method":"memory/store","params":{ + "key":"user-pref-timezone", + "content":"User prefers UTC-3 (São Paulo)", + "scope":"workspace", + "tags":["user-preference"] +}} + +── Extension → AGH (store response) ─────────────────────────── +{"jsonrpc":"2.0","id":2,"result":{}} + +── Extension → AGH (extension calls Host API) ───────────────── +{"jsonrpc":"2.0","id":100,"method":"sessions/list","params":{}} + +── AGH → Extension (Host API response) ──────────────────────── +{"jsonrpc":"2.0","id":100,"result":[ + {"id":"sess-abc","name":"debug-session","agent":"claude","state":"active"} +]} + +── AGH → Extension (health check) ───────────────────────────── +{"jsonrpc":"2.0","id":3,"method":"health_check","params":{}} + +── Extension → AGH (healthy) ────────────────────────────────── +{"jsonrpc":"2.0","id":3,"result":{"healthy":true,"message":"","details":{}}} + +── AGH → Extension (shutdown) ───────────────────────────────── +{"jsonrpc":"2.0","id":4,"method":"shutdown","params":{ + "reason":"daemon_shutdown", + "deadline_ms":10000 +}} + +── Extension → AGH (ack and exit) ───────────────────────────── +{"jsonrpc":"2.0","id":4,"result":{"acknowledged":true}} +``` diff --git a/.compozy/tasks/ext-architecture/_protocol.md b/.compozy/tasks/ext-architecture/_protocol.md new file mode 100644 index 000000000..6efa81e13 --- /dev/null +++ b/.compozy/tasks/ext-architecture/_protocol.md @@ -0,0 +1,846 @@ +# Extension Subprocess Protocol Specification + +## Status + +Draft + +## Date + +2026-04-10 + +## Purpose + +This document is the **normative wire-level contract** for AGH subprocess extensions. +It complements: + +- `_techspec.md` for architecture, package model, and Host API inventory +- `_examples.md` for illustrative flows and SDK usage +- ADR-003, ADR-004, and ADR-005 for security, lifecycle, and package-model decisions + +If another document conflicts with this file on **transport framing, lifecycle, handshake fields, error codes, or method-direction semantics**, this file wins. + +This specification applies to **persistent extension subprocesses** managed by the Extension Manager. It does **not** describe the legacy one-shot hook subprocess executor used by `internal/hooks/executor_subprocess.go`. + +--- + +## 1. Transport + +### 1.1 Base transport + +- The protocol uses **JSON-RPC 2.0** over the subprocess `stdin`/`stdout` streams. +- Messages are encoded as **UTF-8 JSON**, **one JSON object per line**. +- `stdout` is reserved for protocol frames only. +- Human-readable logs and diagnostics must go to `stderr`. +- Blank lines on `stdout` must be ignored. +- JSON-RPC batch requests are **not supported** in v1. +- Method names beginning with `rpc.` are reserved and must not be used. +- **JSON-RPC notifications** (requests without an `id` field) are **not supported** in v1. All messages must be requests or responses with an `id`. Receivers must ignore notifications silently. +- The transport is **fully multiplexed**. Both peers may have multiple outstanding requests simultaneously. Responses may arrive in any order. Peers must correlate responses by `id`. + +### 1.2 Framing rules + +- Each line must contain exactly one JSON-RPC request, response, or notification object. +- Peers must ignore unknown fields for forward compatibility. +- Per-message encoded size must not exceed **10 MiB**. Messages exceeding this limit must be rejected; the receiver should close the transport connection. + +### 1.3 Request identifiers + +- AGH will use positive integer IDs. +- Extensions may use positive integer IDs or string IDs. +- Fractional numeric IDs must not be used. + +### 1.4 Time encoding + +- All timestamps are serialized as RFC3339Nano UTC strings, matching Go `time.Time` JSON encoding. + +### 1.5 JSON encoding rules + +- Struct fields tagged `omitempty` are omitted when zero-valued. +- Fields represented as `json.RawMessage` on the Go side must be serialized as embedded JSON values, not quoted strings. +- Unknown object members must be ignored unless the receiving method explicitly forbids them. + +--- + +## 2. Roles and Method Directions + +AGH is the **connection initiator** because it launches the subprocess, but after initialization the transport is **bidirectional peer-to-peer JSON-RPC**. Either side may originate requests. + +### 2.1 Method families + +| Direction | Family | Canonical names | +|---|---|---| +| AGH -> Extension | Base lifecycle methods | `initialize`, `execute_hook`, `health_check`, `shutdown`, `provide_tools` | +| Extension -> AGH | Host API actions | `sessions/*`, `memory/*`, `observe/*`, `skills/*` | +| AGH -> Extension | Extension service methods | Capability-specific methods such as `memory/store`, `memory/recall`, `memory/forget` when the extension provides `memory.backend` | + +### 2.2 Naming conventions + +- Base lifecycle methods use **snake_case**. +- Host API and extension service methods use **slash-separated** RPC names. +- Hook event names use **dotted** identifiers such as `turn.start` and `tool.pre_call`. + +### 2.3 Direction disambiguates ownership + +Some method names may appear in both directions. + +Example: + +- `memory/store` sent **AGH -> Extension** means "invoke the extension's `memory.backend` implementation" +- `memory/store` sent **Extension -> AGH** means "call AGH's Host API memory store" + +This is valid because the transport is bidirectional. SDKs must expose these surfaces separately so that implementers do not confuse: + +- `host.memory.store(...)` +- `extension.handle("memory/store", ...)` + +--- + +## 3. Connection Lifecycle + +The connection lifecycle has five phases: + +1. **Spawn**: AGH starts the extension process and connects `stdin`/`stdout`. +2. **Initialize**: AGH sends `initialize`; the extension accepts or rejects the session contract. +3. **Ready**: Both peers may exchange operational requests. +4. **Draining**: A shutdown has been initiated; no new work should be accepted. +5. **Stopped**: The process exits and the transport closes. + +### 3.1 Pre-ready rules + +- Before `initialize` succeeds, the only valid request is `initialize`. +- Any other request before readiness must fail with `-32003 not_initialized`. +- AGH must not route hooks, Host API actions, or capability service calls before readiness. + +### 3.2 Ready transition + +There is **no separate `initialized` notification in v1**. +The connection enters **Ready** immediately after: + +1. `initialize` returns success +2. AGH verifies the selected protocol version +3. AGH verifies the returned capability/method contract is a subset of the granted contract + +### 3.3 Restart semantics + +When an extension subprocess is restarted (due to crash recovery, manual re-enable, or daemon restart): + +- A fresh subprocess is spawned. There is **no connection resumption** in v1. +- AGH must send a new `initialize` handshake from scratch. +- The extension must not assume any state from a previous session persists. +- AGH may re-register the extension's resources (hooks, skills) during the new initialization. + +### 3.4 Draining rules + +- After `shutdown` starts, the peer must stop accepting new operational requests. +- New requests during draining must fail with `-32004 shutdown_in_progress`. +- Responses for already accepted in-flight requests may still be delivered until the process exits. + +--- + +## 4. Initialize Handshake + +The `initialize` handshake establishes: + +- protocol version compatibility +- runtime grants derived from the manifest and source-tier policy +- the method surfaces that may be used in this session +- runtime intervals and deadlines + +### 4.1 Initialize request + +AGH must send `initialize` as the first request. + +```json +{ + "jsonrpc": "2.0", + "id": 1, + "method": "initialize", + "params": { + "protocol_version": "1", + "supported_protocol_versions": ["1"], + "agh_version": "0.5.0", + "extension": { + "name": "pgvector-memory", + "version": "0.2.0", + "source_tier": "user" + }, + "capabilities": { + "provides": ["memory.backend"], + "granted_actions": ["sessions/list", "sessions/events"], + "granted_security": ["memory.read", "memory.write", "session.read"] + }, + "methods": { + "daemon_requests": ["execute_hook", "health_check", "shutdown"], + "extension_services": ["memory/store", "memory/recall", "memory/forget"] + }, + "runtime": { + "health_check_interval_ms": 30000, + "health_check_timeout_ms": 5000, + "shutdown_timeout_ms": 10000, + "default_hook_timeout_ms": 5000 + } + } +} +``` + +### 4.2 Initialize request fields + +| Field | Type | Required | Meaning | +|---|---|---|---| +| `protocol_version` | string | yes | AGH's preferred protocol version | +| `supported_protocol_versions` | array | yes | Ordered list of versions AGH can speak | +| `agh_version` | string | yes | Daemon semver version string for diagnostics and compatibility checks (informational only) | +| `extension.name` | string | yes | Manifest name AGH loaded | +| `extension.version` | string | yes | Manifest version AGH loaded | +| `extension.source_tier` | string | yes | Source trust tier such as `bundled`, `user`, `workspace`, or `marketplace` | +| `capabilities.provides` | array | yes | Capability interfaces AGH expects this extension to provide | +| `capabilities.granted_actions` | array | yes | Host API methods this connection is authorized to call | +| `capabilities.granted_security` | array | yes | Security grants enforced at dispatch and Host API boundaries | +| `methods.daemon_requests` | array | yes | Base AGH -> extension methods available for this session | +| `methods.extension_services` | array | yes | Capability service methods AGH may call on the extension | +| `runtime.health_check_interval_ms` | integer | yes | Periodic probe interval | +| `runtime.health_check_timeout_ms` | integer | yes | Per-probe timeout | +| `runtime.shutdown_timeout_ms` | integer | yes | Graceful shutdown deadline before signal escalation | +| `runtime.default_hook_timeout_ms` | integer | yes | Default timeout when a hook declaration omits one | + +### 4.3 Initialize response + +The extension must answer with the selected version and the accepted session contract. + +```json +{ + "jsonrpc": "2.0", + "id": 1, + "result": { + "protocol_version": "1", + "extension_info": { + "name": "pgvector-memory", + "version": "0.2.0", + "sdk_name": "@agh/extension-sdk", + "sdk_version": "0.1.0" + }, + "accepted_capabilities": { + "provides": ["memory.backend"], + "actions": ["sessions/list", "sessions/events"], + "security": ["memory.read", "memory.write", "session.read"] + }, + "implemented_methods": ["memory/store", "memory/recall", "memory/forget", "health_check", "shutdown"], + "supported_hook_events": ["prompt.post_assemble", "turn.start", "turn.end"], + "supports": { + "health_check": true, + "provide_tools": false + } + } +} +``` + +### 4.4 Initialize response rules + +- `protocol_version` must be one of the versions AGH offered. +- `accepted_capabilities.actions` must be a subset of `capabilities.granted_actions`. +- `accepted_capabilities.security` must be a subset of `capabilities.granted_security`. +- `accepted_capabilities.provides` must be a subset of `capabilities.provides`. +- `implemented_methods` must include every method required by the accepted `provides` contract. +- `supported_hook_events` must not advertise events outside AGH's known hook taxonomy. +- `supports.provide_tools=false` means AGH must treat `provide_tools` as unavailable for the session. + +### 4.5 Capability negotiation semantics + +Capability negotiation happens in two stages: + +1. **Static declaration** in the manifest: + - `capabilities.provides` + - `actions.requires` + - `security.capabilities` +2. **Runtime grant** in `initialize`: + - AGH applies source-tier policy and startup validation + - AGH sends the effective grants in the request + - the extension either accepts them or rejects the session + +If the extension requires capabilities that were not granted, it must reject initialization with `-32001 capability_denied`. + +Example error: + +```json +{ + "jsonrpc": "2.0", + "id": 1, + "error": { + "code": -32001, + "message": "Capability denied", + "data": { + "missing_actions": ["sessions/events"], + "missing_security": ["memory.write"] + } + } +} +``` + +### 4.6 Generic initialization failure + +If the extension cannot initialize for application-level reasons (e.g., database unreachable, missing config), it must return `-32603 internal error` with structured `data`: + +```json +{ + "jsonrpc": "2.0", + "id": 1, + "error": { + "code": -32603, + "message": "Internal error", + "data": { + "reason": "database_unreachable", + "detail": "Failed to connect to pgvector at localhost:5432" + } + } +} +``` + +### 4.7 Version mismatch + +Unsupported protocol versions must use standard JSON-RPC `-32602 invalid params`, following the same pattern MCP uses for initialization failures. + +Example: + +```json +{ + "jsonrpc": "2.0", + "id": 1, + "error": { + "code": -32602, + "message": "Invalid params", + "data": { + "reason": "unsupported_protocol_version", + "requested": "2", + "supported_protocol_versions": ["1"] + } + } +} +``` + +--- + +## 5. Operational Requests + +### 5.1 Base methods + +The following AGH -> extension methods are part of the base protocol in v1: + +| Method | Required | Purpose | +|---|---|---| +| `execute_hook` | yes | Dispatch one hook invocation with a typed payload | +| `health_check` | yes | Probe liveness/readiness of the running extension | +| `shutdown` | yes | Begin graceful drain and exit | +| `provide_tools` | optional | Request tool definitions when negotiated | + +### 5.2 Host API methods + +The canonical Host API method inventory (Extension -> AGH): + +| Method | Capability | +|---|---| +| `sessions/list` | `session.read` | +| `sessions/create` | `session.write` | +| `sessions/prompt` | `session.write` | +| `sessions/stop` | `session.write` | +| `sessions/status` | `session.read` | +| `sessions/events` | `session.read` | +| `memory/recall` | `memory.read` | +| `memory/store` | `memory.write` | +| `memory/forget` | `memory.write` | +| `observe/health` | `observe.read` | +| `observe/events` | `observe.read` | +| `skills/list` | `skills.read` | + +See `_techspec.md` Host API section for parameter and result schemas. + +This protocol file adds the normative rules around: + +- authorization: every call checked against `granted_actions` (method-level) AND `granted_security` (family-level). Both must be satisfied. `granted_actions` is the fine-grained allowlist; `granted_security` is the coarse-grained family gate. +- error codes: unauthorized calls return `-32001 capability_denied` +- timeout behavior: Host API calls use the daemon's default request timeout +- rate limiting: per-extension rate limits return `-32002 rate_limited` +- startup gating: calls before `initialize` return `-32003 not_initialized` +- shutdown gating: calls during drain return `-32004 shutdown_in_progress` + +### 5.3 Capability service methods + +Capability service methods are AGH -> extension requests enabled by `capabilities.provides`. +In v1, the only normatively grounded service surface is the memory backend family shown in `_techspec.md` and `_examples.md`: + +- `memory/store` +- `memory/recall` +- `memory/forget` + +The wire framing, timeouts, and error rules for those calls are identical to any other operational JSON-RPC request. + +### 5.4 `provide_tools` (optional) + +When an extension declares `supports.provide_tools: true` during initialization, AGH may call `provide_tools` to request tool definitions. + +**Request:** +```json +{"jsonrpc":"2.0","id":10,"method":"provide_tools","params":{}} +``` + +**Response:** +```json +{"jsonrpc":"2.0","id":10,"result":{ + "tools":[ + { + "name":"pgvector_search", + "description":"Semantic search over stored memories", + "input_schema":{"type":"object","properties":{"query":{"type":"string"}},"required":["query"]}, + "read_only":true + } + ] +}} +``` + +Tool definitions follow the `Tool` struct defined in `_techspec.md`. AGH may cache the result and re-request periodically or after extension restart. + +--- + +## 6. Hook Dispatch: `execute_hook` + +`execute_hook` is the canonical AGH -> extension hook invocation method. + +### 6.1 Request shape + +```json +{ + "jsonrpc": "2.0", + "id": 42, + "method": "execute_hook", + "params": { + "invocation_id": "hook-01JRFV8A2M0N6H7R6P6D7M0E7F", + "hook": { + "name": "workspace-context", + "event": "prompt.post_assemble", + "mode": "sync", + "required": false, + "timeout_ms": 5000, + "source": "extension", + "metadata": { + "extension_name": "prompt-enhancer" + } + }, + "payload": { + "event": "prompt.post_assemble", + "timestamp": "2026-04-10T14:03:00.123456Z", + "session_id": "sess_123", + "turn_id": "turn_456", + "prompt": "Explain the current failing test.", + "context_blocks": [] + } + } +} +``` + +### 6.2 Request fields + +| Field | Type | Required | Meaning | +|---|---|---|---| +| `invocation_id` | string | yes | Opaque identifier for one hook invocation. Extensions must not parse or rely on its internal structure. | +| `hook.name` | string | yes | Resolved hook declaration name | +| `hook.event` | string | yes | Canonical hook event name | +| `hook.mode` | `sync` or `async` | yes | Dispatch mode selected by AGH | +| `hook.required` | boolean | yes | Whether a failure blocks the pipeline | +| `hook.timeout_ms` | integer | yes | Effective timeout AGH will enforce for this invocation | +| `hook.source` | string | yes | Human-readable source label for telemetry and diagnostics | +| `hook.metadata` | map<string, string> | no | Optional extension-specific key-value metadata copied from declaration/runtime. Values are always strings. | +| `payload` | object | yes | Event-specific payload object | + +### 6.3 Response shape + +```json +{ + "jsonrpc": "2.0", + "id": 42, + "result": { + "patch": { + "prompt": "Explain the current failing test. Also mention the workspace README." + } + } +} +``` + +### 6.4 Response rules + +- A successful response must return an object. +- `{}` means **no-op**. +- `{"patch": {}}` also means **no-op**. +- `patch` must match the event's patch schema. +- If the extension has nothing to change, it should prefer `{}`. + +### 6.5 Hook payload serialization + +AGH serializes hook payloads exactly according to the JSON field names already defined by the runtime hook types. + +#### Event matrix + +| Event(s) | Payload schema | Patch schema | Sync eligible | Mutable | +|---|---|---|---|---| +| `session.pre_create` | `SessionPreCreatePayload` | `SessionCreatePatch` | yes | yes | +| `session.post_create`, `session.pre_resume`, `session.post_resume`, `session.pre_stop`, `session.post_stop` | `SessionLifecyclePayload` | `SessionCreatePatch` | yes | observe-only | +| `input.pre_submit` | `InputPreSubmitPayload` | `InputPreSubmitPatch` | yes | yes | +| `prompt.post_assemble` | `PromptPayload` | `PromptPatch` | yes | yes | +| `event.pre_record`, `event.post_record` | `EventRecordPayload` | `EventRecordPatch` | no | observe-only | +| `agent.pre_start` | `AgentPreStartPayload` | `AgentStartPatch` | yes | yes | +| `agent.spawned`, `agent.crashed`, `agent.stopped` | `AgentLifecyclePayload` | `AgentLifecyclePatch` | yes | observe-only | +| `turn.start`, `turn.end` | `TurnPayload` | `TurnPatch` | yes | observe-only | +| `message.start` | `MessagePayload` | `MessagePatch` | yes | yes | +| `message.delta` | `MessagePayload` | `MessagePatch` | no | observe-only | +| `message.end` | `MessagePayload` | `MessagePatch` | yes | yes | +| `tool.pre_call` | `ToolPreCallPayload` | `ToolCallPatch` | yes | yes | +| `tool.post_call` | `ToolPostCallPayload` | `ToolResultPatch` | yes | yes | +| `tool.post_error` | `ToolPostErrorPayload` | `ToolResultPatch` | yes | yes | +| `permission.request` | `PermissionRequestPayload` | `PermissionRequestPatch` | yes | yes | +| `permission.resolved` | `PermissionResolutionPayload` | `PermissionResolvedPatch` | no | observe-only | +| `permission.denied` | `PermissionResolutionPayload` | `PermissionDeniedPatch` | no | observe-only | +| `context.pre_compact`, `context.post_compact` | `ContextCompactPayload` | `ContextCompactionPatch` | yes | yes | + +**Mutable** = returned patches are applied to the live pipeline payload. **Observe-only** = patches are accepted but only recorded for telemetry; they do not mutate the pipeline. Extensions returning patches for observe-only events should expect no visible effect. + +### 6.6 Sync versus async semantics + +- **Sync hooks** are on the request's critical path. AGH waits for the JSON-RPC response and may apply the returned patch. +- **Async hooks** are out of band. AGH may dispatch them only after the sync phase has succeeded. +- Returned patches from **async hooks are ignored for live mutation in v1**. AGH may retain them for telemetry/debug only. +- Async hook failures must not block the originating runtime event. +- If AGH cannot enqueue an async hook locally because of backpressure, the invocation is dropped locally and no `execute_hook` request is sent. + +### 6.7 Deny semantics + +Patch types that embed a `deny` / `deny_reason` surface may block an operation only when: + +- the invocation is `sync` +- the event is semantically blockable + +Invalid deny attempts are treated as patch rejection by AGH. + +### 6.8 Failure semantics + +- If a **required sync hook** returns a JSON-RPC error or times out, AGH must fail the pipeline. +- If a **non-required sync hook** fails, AGH records the failure and continues. +- If a patch is structurally valid JSON but semantically invalid for that event, AGH marks it as `rejected`. +- Patch rejection is a daemon-side semantic outcome, not a second JSON-RPC error response. + +--- + +## 7. Health Protocol: `health_check` + +`health_check` is the AGH-specific liveness/readiness probe used for persistent extension subprocesses. v1 keeps this method instead of adopting MCP `ping` because AGH needs structured health state, not only round-trip reachability. + +### 7.1 Request + +```json +{ + "jsonrpc": "2.0", + "id": 90, + "method": "health_check", + "params": {} +} +``` + +### 7.2 Response + +```json +{ + "jsonrpc": "2.0", + "id": 90, + "result": { + "healthy": true, + "message": "", + "details": { + "active_requests": 0, + "queue_depth": 0 + } + } +} +``` + +### 7.3 Response fields + +| Field | Type | Required | Meaning | +|---|---|---|---| +| `healthy` | boolean | yes | Whether the extension considers itself ready to serve requests | +| `message` | string | no | Human-readable summary for diagnostics | +| `details` | object | no | Optional structured metrics such as queue depth or active requests | + +### 7.4 Probe policy + +- Default interval is the manifest's `health_check_interval`, or **30s** if omitted. +- Default timeout is **5s**. +- A transport timeout, disconnect, or JSON-RPC error counts as a failed probe. +- `healthy: false` counts as a failed probe and includes the extension's self-reported reason. + +### 7.5 Unhealthy threshold + +AGH marks the extension **unhealthy** when either condition occurs: + +1. one successful response explicitly returns `healthy: false` +2. two consecutive probes fail because of timeout, disconnect, or JSON-RPC error + +When an extension becomes unhealthy, AGH must: + +1. stop routing new requests to it +2. log the failure +3. begin shutdown/restart recovery as defined by the Extension Manager + +--- + +## 8. Graceful Shutdown: `shutdown` + +`shutdown` is AGH's cooperative drain request. It exists in addition to OS signals. + +### 8.1 Request + +```json +{ + "jsonrpc": "2.0", + "id": 99, + "method": "shutdown", + "params": { + "reason": "daemon_shutdown", + "deadline_ms": 10000 + } +} +``` + +### 8.2 Response + +```json +{ + "jsonrpc": "2.0", + "id": 99, + "result": { + "acknowledged": true + } +} +``` + +### 8.3 Shutdown rules + +- The extension must answer `shutdown` promptly. +- After answering, it must stop accepting new operational requests. +- It may complete in-flight work until `deadline_ms` expires. +- After the `shutdown` response is received, AGH should close the extension's `stdin` to signal that no more requests will arrive. +- It should then close its protocol streams and exit cleanly with status `0`. + +### 8.4 Signal escalation + +If the process does not exit after the cooperative shutdown deadline: + +1. AGH ensures the extension's `stdin` is closed. +2. AGH sends `SIGTERM` to the managed process group on Unix, or the platform-equivalent process termination on Windows. +3. AGH waits a short post-signal grace period. +4. If the process is still alive, AGH sends `SIGKILL` on Unix, or the platform-equivalent forced termination on Windows. + +### 8.5 Default timing + +- Default graceful shutdown deadline is the manifest's `shutdown_timeout`, or **10s** if omitted. +- The post-`SIGTERM` grace period is implementation-defined but should be short and bounded. + +--- + +## 9. Error Model + +The protocol uses JSON-RPC 2.0 error objects. + +### 9.1 Standard JSON-RPC errors + +| Code | Message | Use | +|---|---|---| +| `-32700` | `Parse error` | Invalid JSON on the wire | +| `-32600` | `Invalid request` | Invalid JSON-RPC envelope | +| `-32601` | `Method not found` | The receiving peer does not implement the method | +| `-32602` | `Invalid params` | Invalid method parameters, including unsupported protocol version during `initialize` | +| `-32603` | `Internal error` | Unhandled receiver-side failure | + +### 9.2 AGH-defined server errors + +| Code | Message | Use | +|---|---|---| +| `-32001` | `Capability denied` | Method/event/security grant not authorized for this session | +| `-32002` | `Rate limited` | Local backpressure or explicit per-extension rate limit | +| `-32003` | `Not initialized` | Request arrived before successful `initialize` | +| `-32004` | `Shutdown in progress` | Receiver is draining and will not accept new work | + +### 9.3 `Method not found` versus `Capability denied` + +Use `-32601 method not found` when: + +- the receiver does not recognize the method string at all +- the method is optional and was never implemented on that peer + +Use `-32001 capability denied` when: + +- the method exists, but the caller was not granted that action +- the hook/event family exists, but was not negotiated for this session +- the source-tier policy removed the grant even though the manifest requested it + +### 9.4 Error data + +Errors should include structured `data` when helpful. + +#### Capability denied + +```json +{ + "code": -32001, + "message": "Capability denied", + "data": { + "method": "sessions/create", + "required": ["session.write"], + "granted": ["session.read"] + } +} +``` + +#### Rate limited + +```json +{ + "code": -32002, + "message": "Rate limited", + "data": { + "scope": "host_api.sessions/create", + "retry_after_ms": 1000, + "limit": 10, + "burst": 20 + } +} +``` + +#### Not initialized + +```json +{ + "code": -32003, + "message": "Not initialized", + "data": { + "allowed_methods": ["initialize"] + } +} +``` + +#### Shutdown in progress + +```json +{ + "code": -32004, + "message": "Shutdown in progress", + "data": { + "deadline_ms": 10000 + } +} +``` + +### 9.5 Transport failures versus JSON-RPC errors + +The following are **transport failures**, not JSON-RPC error responses: + +- peer disconnects before a response arrives +- probe/request timeouts +- OS-level process termination + +Callers must treat these as failed requests and apply the lifecycle/recovery rules from this specification. + +--- + +## 10. Rate Limiting and Backpressure + +AGH may protect Host API surfaces with per-extension rate limits. + +### 10.1 Receiver behavior + +- When a peer is willing to reject and retry later, it should return `-32002 rate_limited`. +- `data.retry_after_ms` should be present whenever the receiver can estimate a retry delay. + +### 10.2 Caller behavior + +- Callers should not immediately retry a `rate_limited` request. +- SDKs should expose `retry_after_ms` to extension authors. + +### 10.3 Async hook backpressure + +AGH's internal async hook queue is local implementation detail, but v1 defines the observable contract: + +- queue saturation before wire send results in a **local drop** +- a local drop does not generate a JSON-RPC request +- a local drop should be recorded as hook outcome `dropped` + +--- + +## 11. Protocol Versioning + +### 11.1 Version token + +- v1 uses the exact string `"1"`. +- Protocol versions are exact-match string tokens, not numeric comparisons. + +### 11.2 Negotiation + +- AGH sends its preferred version in `protocol_version`. +- AGH also sends all supported versions in `supported_protocol_versions`. +- The extension must either: + - return a supported `protocol_version` in the response + - or reject initialization with `-32602 invalid params` and include `supported_protocol_versions` + +### 11.3 Forward compatibility + +Within the same protocol version: + +- receivers must ignore unknown fields +- optional fields may be added +- new optional methods may be added if they are negotiated explicitly during initialization + +A new protocol version is required when: + +- a required field is removed or renamed +- method semantics change incompatibly +- an existing success/error contract changes incompatibly + +### 11.4 AGH version versus protocol version + +`agh_version` and `protocol_version` are separate: + +- `agh_version` identifies the daemon build +- `protocol_version` identifies the subprocess wire contract + +Extensions must not infer protocol compatibility from `agh_version` alone. + +--- + +## 12. Conformance Rules + +An extension is v1-conformant only if it satisfies all of the following: + +- speaks JSON-RPC 2.0 over line-delimited UTF-8 JSON on `stdin`/`stdout` +- emits protocol frames only on `stdout` +- implements `initialize`, `health_check`, and `shutdown` +- implements `execute_hook` if it accepts hook execution +- honors negotiated action/security grants +- returns standard JSON-RPC errors for envelope/params failures +- returns AGH custom errors for capability/rate-limit/lifecycle gating failures +- exits cooperatively after `shutdown`, or tolerates signal escalation + +AGH is v1-conformant only if it satisfies all of the following: + +- sends `initialize` first +- never routes operational requests before readiness +- enforces granted actions/security at the Host API boundary +- enforces hook dispatch deadlines +- marks extensions unhealthy according to this spec +- performs cooperative shutdown before signal escalation + +--- + +## 13. Notes for Follow-up Docs + +This file resolves several ambiguities that should later be aligned back into `_techspec.md` and `_examples.md`: + +- persistent extension subprocesses are distinct from the one-shot hook subprocess executor +- method direction determines ownership when names overlap, especially `memory/*` +- async `execute_hook` responses do not mutate live runtime payloads in v1 +- unsupported protocol versions use `-32602 invalid params`, not a custom code diff --git a/.compozy/tasks/ext-architecture/_tasks.md b/.compozy/tasks/ext-architecture/_tasks.md new file mode 100644 index 000000000..a98c47659 --- /dev/null +++ b/.compozy/tasks/ext-architecture/_tasks.md @@ -0,0 +1,17 @@ +# Extension Architecture — Task List + +## Tasks + +| # | Title | Status | Complexity | Dependencies | +|---|-------|--------|------------|--------------| +| 01 | Minimal Tool struct and ToolProvider interface | pending | low | — | +| 02 | Shared subprocess lifecycle package | pending | high | — | +| 03 | Extension manifest parser (TOML and JSON) | pending | medium | — | +| 04 | Capability checker and source-trust tiers | pending | medium | task_03 | +| 05 | Extension registry (SQLite) | pending | medium | task_03, task_04 | +| 06 | Extension Manager (lifecycle orchestrator) | pending | high | task_02, task_04, task_05 | +| 07 | Host API handler (bidirectional JSON-RPC) | pending | high | task_04, task_06 | +| 08 | Daemon boot integration | pending | medium | task_06 | +| 09 | CLI commands (list, install, enable, disable) | pending | medium | task_05, task_06 | +| 10 | TypeScript SDK (@agh/extension-sdk) | pending | high | task_06, task_07 | +| 11 | Reference extensions (Go and TypeScript) | pending | medium | task_06, task_07, task_10 | diff --git a/.compozy/tasks/ext-architecture/_techspec.md b/.compozy/tasks/ext-architecture/_techspec.md new file mode 100644 index 000000000..de539b4f4 --- /dev/null +++ b/.compozy/tasks/ext-architecture/_techspec.md @@ -0,0 +1,612 @@ +# TechSpec: Extension Architecture + +## Executive Summary + +AGH's extension architecture enables third-party developers to extend the daemon's capabilities through a **two-tier execution model**: Go-native interfaces (L1) for first-party compiled-in code, and JSON-RPC subprocess (L3) for all third-party extensions in **Go or TypeScript**. A WASM tier (L2) is designed as a future seam but deferred until hook latency or sandbox requirements justify it. + +Extensions are modeled as **three-dimensional packages** that bundle **resources** (agents, skills, hooks, MCP configs), provide **capabilities** (agent drivers, memory backends, observe exporters), and perform **actions** via a bidirectional Host API (create sessions, manage memory, query events). + +The architecture builds on AGH's existing infrastructure: the 27-event hook system with typed dispatch provides the extension dispatch layer, and the ACP subprocess pattern provides the L3 prototype. The primary technical trade-off is **power vs. security surface** — a rich bidirectional Host API enables extensions to drive complex workflows (channel adapters, scheduled tasks, memory enrichment) but requires capability-scoped security at the Host API boundary to prevent extensions from exceeding their declared privileges. + +Key adjustments from council debate: capability-scoped security at the Host API boundary (not just process isolation), a minimal `Tool` struct to ground the existing hook tool dispatch, and daemon-context failure recovery for headless extension execution. + +--- + +## System Architecture + +### Component Overview + +``` +┌─────────────────────────────────────────────────────────────────┐ +│ AGH Daemon (Go) │ +│ │ +│ ┌──────────────────┐ ┌──────────────────┐ ┌───────────────┐ │ +│ │ Extension │ │ Hook System │ │ Session │ │ +│ │ Manager │ │ (existing) │ │ Manager │ │ +│ │ │ │ │ │ (existing) │ │ +│ │ - Registry │ │ - 27 events │ │ │ │ +│ │ - Manifest load │ │ - Typed dispatch│ │ - AgentDriver│ │ +│ │ - Lifecycle mgmt │ │ - Executors: │ │ - Lifecycle │ │ +│ │ - Capability │ │ native, │ │ - Events │ │ +│ │ enforcement │ │ subprocess, │ │ │ │ +│ │ - Host API │ │ wasm (stub) │ │ │ │ +│ └────────┬─────────┘ └────────┬─────────┘ └──────┬──────┘ │ +│ │ │ │ │ +│ ┌────────┴──────────────────────┴─────────────────────┴──────┐ │ +│ │ Extension Tiers │ │ +│ │ │ │ +│ │ ┌─────────────┐ ┌─────────────────┐ ┌────────────────┐ │ │ +│ │ │ L1: Go │ │ L2: WASM │ │ L3: Subprocess │ │ │ +│ │ │ Native │ │ (future seam) │ │ (JSON-RPC) │ │ │ +│ │ │ │ │ │ │ │ │ │ +│ │ │ Compiled-in │ │ Stub exists. │ │ Out-of-process │ │ │ +│ │ │ interfaces │ │ Implement when │ │ bidirectional │ │ │ +│ │ │ [EXISTS] │ │ needed. │ │ Host API │ │ │ +│ │ └─────────────┘ └─────────────────┘ └────────────────┘ │ │ +│ └─────────────────────────────────────────────────────────────┘ │ +│ │ +│ ┌──────────────────────────────────────────────────────────┐ │ +│ │ Host API (bidirectional) │ │ +│ │ sessions/* │ memory/* │ skills/* │ observe/* │ │ +│ └──────────────────────────────────────────────────────────┘ │ +└─────────────────────────────────────────────────────────────────┘ +``` + +### Data Flow + +1. **Install**: `agh extension install ` → parse manifest → validate capabilities → copy resources → register in extension registry +2. **Boot**: Daemon starts → Extension Manager loads enabled extensions → launch subprocesses → capability negotiation handshake +3. **Runtime**: Hook dispatched → Extension Manager routes to subprocess executor → capability check → execute → return result +4. **Host API call**: Extension sends JSON-RPC request → capability check → execute on daemon → return result +5. **Shutdown**: Daemon stops → Extension Manager sends shutdown to all subprocesses → wait with timeout → SIGKILL stragglers + +--- + +## Implementation Design + +### Core Interfaces + +**Extension Manager** — the orchestrator that lives in `internal/extension/`: + +```go +// internal/extension/manager.go +type Manager struct { + mu sync.RWMutex + registry *Registry + subprocesses map[string]*subprocess.Process + capChecker *CapabilityChecker + hostAPI *HostAPIHandler + logger *slog.Logger +} + +func NewManager(registry *Registry, opts ...Option) *Manager +func (m *Manager) Start(ctx context.Context) error +func (m *Manager) Stop(ctx context.Context) error +func (m *Manager) Get(name string) (*Extension, error) +func (m *Manager) List() []ExtensionInfo +``` + +**Extension manifest** — parsed from `extension.toml` or `extension.json`: + +```go +// internal/extension/manifest.go +type Manifest struct { + Name string `toml:"name"` + Version string `toml:"version"` + Description string `toml:"description"` + MinAGH string `toml:"min_agh_version"` + Resources ResourcesConfig `toml:"resources"` + Capabilities CapabilitiesConfig `toml:"capabilities"` + Actions ActionsConfig `toml:"actions"` + Subprocess SubprocessConfig `toml:"subprocess"` + Security SecurityConfig `toml:"security"` +} +``` + +**Subprocess extension** — generalized from ACP: + +```go +// internal/subprocess/process.go +type Process struct { + cmd *exec.Cmd + rpc *jsonrpc2.Conn + caps NegotiatedCapabilities + health HealthState + logger *slog.Logger +} + +func Launch(ctx context.Context, cfg LaunchConfig) (*Process, error) +func (p *Process) Call(ctx context.Context, method string, + params, result any) error +func (p *Process) Shutdown(ctx context.Context) error +``` + +**Capability Checker** — enforces ADR-003: + +```go +// internal/extension/capability.go +type CapabilityChecker struct { + grants map[string]CapabilityGrant + tiers map[ExtensionSource][]string +} + +func (c *CapabilityChecker) Check(extName, capability string) error +func (c *CapabilityChecker) CheckHostAPI(extName, method string) error +``` + +**Minimal Tool struct** — grounds the hook tool dispatch: + +```go +// internal/tools/tool.go +type Tool struct { + Name string `json:"name"` + Description string `json:"description"` + InputSchema json.RawMessage `json:"input_schema"` + ReadOnly bool `json:"read_only"` + Source ToolSource `json:"source"` +} + +type ToolProvider interface { + Tools(ctx context.Context) ([]Tool, error) +} +``` + +### Data Models + +**Extension Registry** — persisted in global DB: + +| Field | Type | Description | +|---|---|---| +| `name` | TEXT PK | Extension unique identifier | +| `version` | TEXT | Semver version | +| `source` | TEXT | "bundled" \| "user" \| "workspace" \| "marketplace" | +| `enabled` | BOOLEAN | Whether extension is active | +| `manifest_path` | TEXT | Path to manifest file | +| `installed_at` | TIMESTAMP | Installation time | +| `capabilities` | TEXT (JSON) | Declared capabilities | +| `actions` | TEXT (JSON) | Declared Host API actions | +| `checksum` | TEXT | SHA-256 of extension artifact | + +**Extension Manifest** (TOML example): + +```toml +[extension] +name = "pgvector-memory" +version = "0.2.1" +description = "PostgreSQL pgvector memory backend for AGH" +min_agh_version = "0.5.0" + +[resources] +skills = ["skills/"] +agents = ["agents/"] +hooks = [] +mcp_servers = [] + +[capabilities] +provides = ["memory.backend"] + +[actions] +requires = [ + "sessions/list", + "sessions/events", + "memory/store", + "memory/recall", +] + +[subprocess] +command = "agh-ext-pgvector" +args = ["--config", "{{config_dir}}/pgvector.toml"] +health_check_interval = "30s" +shutdown_timeout = "10s" + +[subprocess.env] +PGVECTOR_URL = "{{env:PGVECTOR_URL}}" + +[security] +capabilities = [ + "memory.read", + "memory.write", + "session.read", +] +``` + +**Extension Manifest** (JSON alternative): + +```json +{ + "extension": { + "name": "pgvector-memory", + "version": "0.2.1", + "description": "PostgreSQL pgvector memory backend for AGH", + "min_agh_version": "0.5.0" + }, + "resources": { + "skills": ["skills/"], + "agents": ["agents/"] + }, + "capabilities": { + "provides": ["memory.backend"] + }, + "actions": { + "requires": ["sessions/list", "sessions/events", "memory/store", "memory/recall"] + }, + "subprocess": { + "command": "agh-ext-pgvector", + "args": ["--config", "{{config_dir}}/pgvector.toml"] + }, + "security": { + "capabilities": ["memory.read", "memory.write", "session.read"] + } +} +``` + +### Host API (Subprocess Extensions ↔ AGH) + +Bidirectional JSON-RPC 2.0 over stdio. Extensions call these methods on the daemon: + +**Session Methods:** + +| Method | Params | Result | Capability | +|---|---|---|---| +| `sessions/list` | `{workspace?: string}` | `[{id, name, agent, state, created_at}]` | `session.read` | +| `sessions/create` | `{agent, prompt?, workspace?}` | `{session_id}` | `session.write` | +| `sessions/prompt` | `{session_id, message}` | `{turn_id}` | `session.write` | +| `sessions/stop` | `{session_id}` | `{}` | `session.write` | +| `sessions/status` | `{session_id}` | `{state, agent, started_at, ...}` | `session.read` | +| `sessions/events` | `{session_id, limit?, offset?}` | `[{type, timestamp, data}]` | `session.read` | + +**Memory Methods:** + +| Method | Params | Result | Capability | +|---|---|---|---| +| `memory/recall` | `{query, scope?, limit?}` | `[{key, content, score}]` | `memory.read` | +| `memory/store` | `{key, content, scope?, tags?}` | `{}` | `memory.write` | +| `memory/forget` | `{key, scope?}` | `{}` | `memory.write` | + +**Observe Methods:** + +| Method | Params | Result | Capability | +|---|---|---|---| +| `observe/health` | `{}` | `{uptime, sessions, extensions, ...}` | `observe.read` | +| `observe/events` | `{session_id?, type?, since?, limit?}` | `[{type, timestamp, data}]` | `observe.read` | + +**Skills Methods:** + +| Method | Params | Result | Capability | +|---|---|---|---| +| `skills/list` | `{workspace?}` | `[{name, description, source}]` | `skills.read` | + +**AGH → Extension Methods:** + +| Method | Description | +|---|---| +| `initialize` | Capability negotiation handshake | +| `execute_hook` | Dispatch a hook event to the extension | +| `provide_tools` | Request tool definitions from extension | +| `health_check` | Liveness probe | +| `shutdown` | Graceful shutdown request | + +### Extension Loading Pipeline + +Six-phase pipeline (inspired by OpenClaw, validated across 5/6 harnesses): + +``` +1. DISCOVER → Scan extension directories, find manifests +2. PARSE → Read extension.toml/json, validate schema (no code execution) +3. VALIDATE → Check version compatibility, verify checksums, validate capabilities +4. REGISTER → Copy resources (skills, agents, hooks) into AGH registries +5. INITIALIZE → Launch subprocesses, perform handshake, negotiate capabilities +6. ACTIVATE → Extension is live, hooks dispatch to it, Host API available +``` + +Each phase can fail independently with clear error messages. + +--- + +## Integration Points + +### Daemon Composition Root (`internal/daemon/boot.go`) + +The Extension Manager is initialized between the hooks system and the servers: + +``` +Phase 4: Skills Registry (existing) +Phase 5: Global Registry (existing) +Phase 8: Session Manager (existing) +Phase 9: Hooks System (existing) + ↓ +Phase 9.5: Extension Manager (NEW) + - Load extension registry from global DB + - Discover installed extensions + - Parse manifests + - Register resources (skills, agents, hooks into existing registries) + - Launch subprocess extensions + - Capability negotiation + - Inject extension-provided hook declarations into hooks.Rebuild() + ↓ +Phase 10: Skills Watcher (existing) +Phase 11: Servers (existing) +``` + +### Hook System Integration + +Extensions provide hook declarations through a new `DeclarationProvider`: + +```go +// internal/daemon/hooks_bridge.go (extend existing) +func extensionDeclarationProvider(extMgr *extension.Manager) hooks.DeclarationProvider { + return func(ctx context.Context) ([]hooks.HookDecl, error) { + return extMgr.HookDeclarations(ctx) + } +} +``` + +Extension subprocess hooks use the existing `SubprocessExecutor` — no new executor kind needed. The extension manifest declares hooks with `executor.kind = "subprocess"` pointing to the extension binary. + +### ACP Integration + +Extract shared subprocess lifecycle from `internal/acp/client.go` into `internal/subprocess/`: + +``` +internal/subprocess/ + process.go — Launch, Call, Shutdown, health monitoring + transport.go — JSON-RPC framing over stdin/stdout + handshake.go — Initialize/capability negotiation + signals.go — Graceful shutdown with signal escalation + +internal/acp/ + client.go — ACP-specific methods (imports internal/subprocess) + handlers.go — ACP inbound handlers (existing) + +internal/extension/ + subprocess.go — Extension-specific methods (imports internal/subprocess) + host_api.go — Host API handler (inbound from extensions) +``` + +--- + +## Impact Analysis + +| Component | Impact Type | Description and Risk | Required Action | +|---|---|---|---| +| `internal/acp/client.go` | Modified | Extract subprocess lifecycle into shared package | Medium risk — refactor existing working code | +| `internal/daemon/boot.go` | Modified | Add Extension Manager initialization phase | Low risk — additive phase in boot sequence | +| `internal/daemon/hooks_bridge.go` | Modified | Add extension declaration provider | Low risk — extends existing patterns | +| `internal/extension/` | New | Extension Manager, Registry, manifest loading, capability enforcement, Host API | New package — core of this techspec | +| `internal/subprocess/` | New | Shared subprocess lifecycle extracted from ACP | New package — refactor, not new functionality | +| `internal/tools/` | New | Minimal Tool struct and ToolProvider interface | New package — ~200 LOC | +| `internal/store/globaldb/` | Modified | Add extension registry table | Low risk — new table, no schema changes | +| `internal/cli/` | Modified | Add `agh extension list/install/enable/disable` commands | Additive CLI commands | + +AGH's ACP layer uses `coder/acp-go-sdk` for JSON-RPC framing (ACP-specific). The extension protocol needs its own JSON-RPC framing — evaluate `sourcegraph/jsonrpc2` or a lightweight custom implementation. Only subprocess lifecycle (spawn, signals, health) is extractable from ACP. + +--- + +## TypeScript SDK + +### Package Structure + +``` +@agh/extension-sdk/ + src/ + extension.ts — Main Extension class + transport.ts — StdioTransport (JSON-RPC over stdin/stdout) + host-api.ts — Typed Host API client (sessions, memory, etc.) + types.ts — TypeScript types matching AGH contracts + capabilities.ts — Capability declaration helpers + contracts/ + memory-backend.ts — Type definitions for memory.backend + agent-driver.ts — Type definitions for agent.driver + observe-exporter.ts — Type definitions for observe.exporter + session-hooks.ts — Type definitions for hook payloads + testing/ + mock-transport.ts — In-memory transport for unit tests + harness.ts — Test harness simulating AGH host + bin/ + create-extension.ts — CLI scaffolding: `npx @agh/create-extension` + templates/ + hook-subprocess/ — Starter template for hook extension + memory-backend/ — Starter template for memory backend +``` + +### Core API + +```typescript +import { Extension, HostAPI } from '@agh/extension-sdk'; + +const ext = new Extension({ + name: 'my-memory-backend', + version: '0.1.0', + capabilities: { provides: ['memory.backend'] }, + actions: { requires: ['sessions/list'] }, +}); + +// Handle daemon → extension calls +ext.handle('memory/store', async (ctx, params: StoreParams) => { + await db.insert(params.key, params.content); + return { success: true }; +}); + +ext.handle('memory/recall', async (ctx, params: RecallParams) => { + const results = await db.search(params.query, params.limit); + return { entries: results }; +}); + +// Call Host API (extension → daemon) +ext.onReady(async (host: HostAPI) => { + const sessions = await host.sessions.list(); + console.error(`Connected. ${sessions.length} active sessions.`); +}); + +ext.start(); // Reads stdin, writes stdout +``` + +### Test Harness + +```typescript +import { TestHarness } from '@agh/extension-sdk/testing'; + +const harness = new TestHarness(); +harness.mockHostAPI('sessions/list', () => [ + { id: 'sess-1', name: 'test', agent: 'claude', state: 'active' }, +]); + +const ext = harness.loadExtension('./my-extension'); +const result = await harness.call('memory/store', { + key: 'test', content: 'hello', +}); +expect(result.success).toBe(true); +``` + +--- + +## Testing Approach + +### Unit Tests + +- **Extension Manager**: Mock subprocess launcher. Test lifecycle (start, stop, restart). Test capability enforcement (authorized vs unauthorized calls). +- **Manifest Parser**: Table-driven tests for TOML and JSON manifests. Test validation (missing fields, invalid versions, unknown capabilities). Test both formats produce identical `Manifest` structs. +- **Capability Checker**: Test all source-trust tier combinations. Test wildcard grants. Test unauthorized access returns typed errors. +- **Host API Handler**: Test each method with authorized and unauthorized callers. Test parameter validation. Test error responses. +- **Tool struct**: Test `ToolProvider` interface. Test tool serialization matches hook `ToolCallRef` payloads. +- **Subprocess lifecycle**: Test launch, handshake, health check, graceful shutdown, crash recovery. + +### Integration Tests + +- **End-to-end subprocess extension**: Install a test subprocess extension → daemon boots → handshake completes → extension calls Host API → verify results. +- **Extension lifecycle**: Install → enable → daemon restart → verify extension reloads → disable → verify extension stops → uninstall → verify cleanup. +- **Capability enforcement**: Install extension with limited capabilities → attempt unauthorized Host API call → verify rejection with typed error. +- **Resource registration**: Install extension with skills and agents → verify they appear in skills registry and agent definitions. +- **Host API bidirectional**: Extension creates session via Host API → session runs → extension reads events back. + +--- + +## Development Sequencing + +### Build Order + +1. **`internal/tools/` — Minimal Tool struct + ToolProvider** — no dependencies. ~200 LOC. Grounds the hook tool dispatch that already exists. + +2. **`internal/subprocess/` — Shared subprocess lifecycle** — no dependencies on step 1. Extract from `internal/acp/client.go`: process launch, JSON-RPC framing, handshake, graceful shutdown, health monitoring. + +3. **`internal/extension/manifest.go` — Manifest parser** — no dependencies on prior steps. Parse `extension.toml` and `extension.json`. Validate schema. Produce `Manifest` struct. + +4. **`internal/extension/capability.go` — Capability checker** — depends on step 3 (reads capabilities from manifest). Source-trust tier enforcement. Dispatch-time and Host API checks. + +5. **`internal/extension/registry.go` — Extension registry** — depends on steps 3, 4. SQLite table in global DB. CRUD operations. Enabled/disabled state. + +6. **`internal/extension/manager.go` — Extension Manager** — depends on steps 2, 4, 5. Orchestrates lifecycle: discover → parse → validate → register → initialize → activate. + +7. **`internal/extension/host_api.go` — Host API handler** — depends on steps 4, 6. JSON-RPC method handlers for sessions/\*, memory/\*, observe/\*, skills/\*. Capability-checked. + +8. **`internal/daemon/boot.go` — Daemon integration** — depends on step 6. Add Extension Manager phase to boot sequence. Wire declaration provider. + +9. **`internal/cli/extension.go` — CLI commands** — depends on steps 5, 6. `agh extension list`, `install`, `enable`, `disable`. + +10. **`@agh/extension-sdk` — TypeScript SDK** — depends on steps 6, 7. npm package with Extension class, StdioTransport, Host API client, test harness. + +11. **Reference extensions** — depends on steps 6, 7. Two working examples: one Go subprocess extension, one TypeScript subprocess extension. + +### Technical Dependencies + +- **JSON-RPC library**: ACP uses `coder/acp-go-sdk` (ACP-specific). Extension subprocess protocol needs its own framing — evaluate `sourcegraph/jsonrpc2` (419 importers, MIT, bidirectional over any `io.ReadWriteCloser`) or lightweight custom implementation. +- **Node.js 18+**: Required for TypeScript SDK development and testing. + +--- + +## Monitoring and Observability + +### Key Metrics + +- `agh_extensions_loaded{name, state}` — Gauge of loaded extensions by state +- `agh_extension_hook_duration_ms{extension, event}` — Histogram of hook execution time +- `agh_extension_host_api_calls{extension, method, status}` — Counter of Host API calls +- `agh_extension_subprocess_restarts{extension}` — Counter of subprocess restart events +- `agh_extension_capability_denied{extension, capability}` — Counter of denied capability checks + +### Log Events + +| Event | Level | Fields | +|---|---|---| +| Extension loaded | INFO | name, version, capabilities | +| Extension failed to load | ERROR | name, error, phase (discover/parse/validate/initialize) | +| Subprocess crashed | ERROR | name, exit_code, stderr_tail, restart_count | +| Host API call | DEBUG | extension, method, duration_ms, status | +| Capability denied | WARN | extension, capability, method, source_tier | +| Extension handshake completed | INFO | name, negotiated_capabilities, latency_ms | +| Extension shutdown | INFO | name, reason (graceful/timeout/killed), uptime | + +### Health Endpoint + +Extend `GET /api/observe/health` to include extension status: + +```json +{ + "extensions": { + "loaded": 2, + "healthy": 2, + "unhealthy": 0, + "details": [ + {"name": "pgvector-memory", "state": "active", "uptime": "2h15m", "pid": 42891}, + {"name": "otel-exporter", "state": "active", "uptime": "2h14m", "pid": 42903} + ] + } +} +``` + +--- + +## Technical Considerations + +### Key Decisions + +1. **Two-tier model now, WASM later** (ADR-001): L1 Go-native + L3 subprocess covers Go and TypeScript. WASM stub remains as future seam — implement when hook latency is a measured bottleneck or sandbox is required for marketplace extensions. +2. **Capability-scoped security** (ADR-003): Per-extension capability grants enforced at Host API boundary, not just process isolation. Marketplace extensions restricted by default. +3. **Generalized ACP** (ADR-004): Shared subprocess lifecycle avoids code duplication between agents and extensions. +4. **Three-dimensional package model** (ADR-005): Resources + capabilities + actions maps to different security scopes and loading phases. +5. **Dual manifest format**: TOML primary (consistent with AGH config), JSON as fallback (for TypeScript/npm ecosystem). Loader tries `extension.toml` first. +6. **Minimal new Go dependencies**: Subprocess lifecycle extracted from ACP. JSON-RPC framing requires evaluation of `sourcegraph/jsonrpc2` or custom implementation since ACP uses the ACP-specific `coder/acp-go-sdk`. + +### Known Risks + +| Risk | Likelihood | Mitigation | +|---|---|---| +| Host API contract changes break extensions | Medium | Version the protocol. Extensions declare `min_agh_version`. | +| Capability model too restrictive | Medium | Start with permissive defaults (`*` for bundled/user/workspace). Tighten based on real usage. | +| Subprocess hook latency accumulates | Low | Most hooks are async. Sync hooks can run in parallel. WASM seam exists for future optimization. | +| TypeScript SDK maintenance burden | Medium | Generate types from Go contracts. Minimize hand-written code. | +| ACP refactor breaks existing functionality | Medium | Extract incrementally. ACP integration tests must pass at every step. | + +### Daemon-Context Failure Modes + +Unlike CLI tools, AGH is a headless daemon. Extension failures must be recoverable without user intervention: + +| Failure | Detection | Recovery | +|---|---|---| +| Subprocess crash | `waitForExit` goroutine detects exit | Auto-restart with exponential backoff (1s, 2s, 4s, 8s, max 60s). After 5 consecutive failures, disable extension and log ERROR. | +| Subprocess hangs | Health check timeout | SIGTERM → wait 10s → SIGKILL. Restart with backoff. | +| Subprocess Host API abuse | Rate limiting per extension | Return `rate_limited` error. Log WARN. | +| Extension install corruption | Checksum mismatch at load time | Refuse to load. Log ERROR with expected vs actual checksum. | +| Handshake failure | Timeout during initialize | Extension not activated. Log ERROR. Retry on next daemon boot. | + +### Future Seams (Documented, Not Implemented) + +| Seam | Trigger to Implement | Integration Point | +|---|---|---| +| **L2 WASM tier** (Extism) | Measured hook latency bottleneck or marketplace sandbox requirement | `internal/hooks/executor_wasm_stub.go` — fill existing stub | +| **Tool Registry** (BM25, namespacing) | Extension authors need tool registration | `internal/tools/` — extend minimal Tool struct | +| **Channel adapters** | Demand for Slack/Discord/Telegram integration | Extension capability + Host API `sessions/create` | +| **Cron scheduler** | Demand for scheduled agent runs | New `internal/cron/` package, exposed as extension capability | +| **API route extensions** | Extensions need custom HTTP endpoints | Dynamic route registration in Gin | +| **CLI command extensions** | Extensions need custom `agh` subcommands | Dynamic Cobra command registration | +| **Extension marketplace** | Ecosystem grows enough to need discovery | GitHub-based registry with checksums | + +--- + +## Architecture Decision Records + +- [ADR-001: Two-Tier Extension Model with Future WASM Seam](adrs/adr-001.md) — L1 Go-native + L3 subprocess now; L2 WASM deferred until measured need +- [ADR-002: Extism Go SDK for WASM Runtime (Deferred)](adrs/adr-002.md) — Extism chosen for future WASM tier; deferred until hook latency or sandbox justifies it +- [ADR-003: Capability-Scoped Security Model](adrs/adr-003.md) — Per-extension capability grants enforced at Host API boundary with source-trust tiers +- [ADR-004: Generalize ACP as Subprocess Extension Protocol](adrs/adr-004.md) — Shared subprocess lifecycle between ACP agents and extensions +- [ADR-005: Extension Three-Dimensional Package Model](adrs/adr-005.md) — Resources (declarative) + capabilities (interfaces) + actions (Host API) diff --git a/.compozy/tasks/ext-architecture/adrs/adr-001.md b/.compozy/tasks/ext-architecture/adrs/adr-001.md new file mode 100644 index 000000000..927e1f5d1 --- /dev/null +++ b/.compozy/tasks/ext-architecture/adrs/adr-001.md @@ -0,0 +1,79 @@ +# ADR-001: Two-Tier Extension Model (Go-Native + Subprocess) with Future WASM Seam + +## Status + +Accepted + +## Date + +2026-04-10 + +## Context + +AGH needs an extension architecture that allows third-party developers to extend the daemon's capabilities. The industry research across 6 agent harnesses shows convergence on multi-tier models. AGH already has Go interfaces (L1) and the ACP subprocess pattern (L3). A WASM executor stub exists as a future seam. + +The primary extension languages are **Go** and **TypeScript**. Both run naturally as subprocesses (Go compiles to binary, TypeScript runs via Node.js). WASM would provide faster in-process execution (~1-10μs vs ~100-500μs) and sandbox isolation, but adds dependency complexity (Extism, ~5-8MB binary) for an alpha with zero users and no measured latency bottleneck. + +## Decision + +Adopt a **two-tier extension model** for the initial implementation, with the WASM tier as a documented future seam: + +- **L1 (Go-native)**: Compiled-in extensions using Go interfaces. For first-party, high-performance functionality. Already exists via `AgentDriver`, `Executor`, `PromptAssembler` interfaces. +- **L3 (Subprocess via JSON-RPC)**: Out-of-process extensions for all third-party extensions. Supports Go, TypeScript, Python, or any language. Process-isolated. Bidirectional Host API. +- **L2 (WASM — future)**: The existing `WasmExecutor` stub in `internal/hooks/executor_wasm_stub.go` remains as the integration seam. Implement when hook latency becomes a measured bottleneck or when sandboxed marketplace execution is required. + +| Tier | Latency | Security | Language Support | Status | +|---|---|---|---|---| +| L1 Go-native | Fastest | Trusted | Go only | Exists | +| L3 Subprocess | ~100-500μs | Process-isolated | Any language | Implement now | +| L2 WASM | ~1-10μs | Sandboxed | Rust, Go, TS, C | Future seam | + +## Alternatives Considered + +### Alternative 1: Three Tiers Now (Go + WASM + Subprocess) + +- **Description**: Implement all three tiers including WASM via Extism immediately. +- **Pros**: Fastest hook execution. Sandbox for untrusted code. Multi-language PDKs. +- **Cons**: Adds Extism dependency (38 Go importers), ~5-8MB to binary, WASM debugging is painful, no measured latency problem exists yet. +- **Why deferred**: Zero users, zero extensions, zero evidence that subprocess hook latency is a problem. YAGNI. The WASM stub preserves the seam — implement when data justifies it. + +### Alternative 2: Go Native Plugins + +- **Description**: Use Go's `plugin` package for dynamic loading. +- **Pros**: In-process, shared memory, fastest possible. +- **Cons**: No Windows support, requires CGO, no unloading, no security isolation. +- **Why rejected**: Contradicts AGH's single-binary/zero-CGO constraint. + +## Consequences + +### Positive + +- Simpler architecture: two tiers instead of three +- No new runtime dependencies (no Extism, no wazero in go.mod) +- No binary size increase +- Go and TypeScript fully supported via subprocess +- WASM seam preserved for future without any compatibility cost +- Faster to implement and ship + +### Negative + +- Subprocess hooks add ~100-500μs per call (vs ~1-10μs for WASM) +- No in-process sandboxing for untrusted code (process isolation only) +- If hook latency becomes a problem, WASM implementation is deferred work + +### Risks + +- Hook latency accumulates across many sync hooks. Mitigation: measure first. Most hooks are async. Subprocess hooks can run in parallel where events allow it. +- Marketplace extensions run without sandbox. Mitigation: process isolation + env clearing + capability scoping (ADR-003) provides defense in depth. WASM sandbox can be added later for marketplace tier. + +## Implementation Notes + +- L1 already exists (Go interfaces in `internal/session/interfaces.go`, `internal/hooks/executor.go`) +- L3 requires generalizing the ACP subprocess lifecycle in `internal/acp/` into a shared `internal/subprocess/` package +- L2 stub stays as-is in `internal/hooks/executor_wasm_stub.go` — no changes needed +- When WASM is needed: fill stub with Extism, use opaque `ExecutorConfig` to keep WASM config out of `HookDecl` + +## References + +- [Extensibility Analysis](../../extensability/analysis.md) +- [ADR-002: Extism for WASM Runtime (Deferred)](adr-002.md) diff --git a/.compozy/tasks/ext-architecture/adrs/adr-002.md b/.compozy/tasks/ext-architecture/adrs/adr-002.md new file mode 100644 index 000000000..f1e0cd6c1 --- /dev/null +++ b/.compozy/tasks/ext-architecture/adrs/adr-002.md @@ -0,0 +1,72 @@ +# ADR-002: Extism Go SDK for WASM Runtime + +## Status + +Deferred — WASM tier deferred to future phase. The existing `WasmExecutor` stub remains as the integration seam. Extism will be evaluated when hook latency becomes a measured bottleneck or when sandboxed execution is required for marketplace extensions. + +## Date + +2026-04-10 + +## Context + +AGH's L2 extension tier requires a WASM runtime to execute sandboxed in-process extensions. The WASM executor stub already exists in `internal/hooks/executor_wasm_stub.go` with compile-time interface verification. Two primary options exist: raw wazero (pure Go runtime) or Extism (high-level SDK wrapping wazero). + +## Decision + +Use the Extism Go SDK (v1.3.0+) which wraps wazero internally. Wrap Extism behind AGH's existing `Executor` interface so the implementation can be swapped if needed. + +Key reasons: +- **Fuel metering**: Extism provides CPU limiting that wazero lacks natively. Critical for preventing malicious/buggy plugins from consuming unbounded CPU. +- **Host functions**: High-level API for exposing AGH capabilities to WASM plugins. +- **Multi-language PDKs**: 7+ guest PDKs (Rust, Go, TypeScript/AssemblyScript, C, Python, etc.) ready for extension authors. +- **Timeout management**: Built-in per-call timeout enforcement. +- **Zero CGO**: Extism uses wazero under the hood, preserving AGH's zero-CGO constraint. + +## Alternatives Considered + +### Alternative 1: Raw wazero + +- **Description**: Use wazero directly without Extism wrapper. +- **Pros**: Zero external deps beyond wazero. Full control over runtime. Smaller dependency tree. +- **Cons**: No fuel metering (cannot limit CPU). No multi-language PDKs. Must build host function API, memory management, and plugin loading from scratch (~1000+ LOC vs ~200 LOC with Extism). +- **Why rejected**: Fuel metering is essential for running untrusted code. Building it from scratch is significant effort with high risk of bugs. + +### Alternative 2: Defer WASM entirely (ADOPTED) + +- **Description**: Keep the stub, focus on subprocess extensions only. +- **Pros**: Zero new dependencies. Simpler system. Go and TypeScript both work as subprocesses. No measured latency bottleneck exists yet. +- **Cons**: Subprocess hooks add ~100-500μs per call on the synchronous dispatch path. No sandboxed execution for untrusted code. +- **Why adopted**: The council debate concluded that with zero users and zero extensions, there is no evidence subprocess hook latency is a problem. Most hooks are async. The WASM stub preserves the seam for future implementation when data justifies it. + +## Consequences + +### Positive + +- Fuel metering prevents runaway WASM plugins from starving the daemon +- Extension authors can use familiar languages (Rust, TypeScript) to write WASM hooks +- Single `.wasm` file distribution — no platform-specific builds +- Crash isolation: WASM trap handled by runtime, host unaffected + +### Negative + +- Extism adds ~5-8MB to binary size +- Extism has relatively small Go ecosystem (38 importers) +- Additional dependency in go.mod (extism-go-sdk + wazero transitive) + +### Risks + +- Extism goes unmaintained: Mitigation — Extism is wrapped behind `Executor` interface. Can swap to raw wazero + custom fuel metering if needed. The interface boundary is the insurance policy. +- Extism API breaks: Mitigation — pin version, wrap in thin adapter layer. + +## Implementation Notes + +- Fill `internal/hooks/executor_wasm_stub.go` with Extism implementation +- Use opaque `ExecutorConfig` field (not inline WASM fields on `HookDecl`) per Architect council recommendation +- WASM-specific config (module path, fuel limit, memory limit, host function grants) lives in executor config, not in the hook declaration schema + +## References + +- [Extism Go SDK](https://github.com/extism/go-sdk) +- [wazero](https://github.com/tetratelabs/wazero) +- [Library Research](../analysis_libraries.md) diff --git a/.compozy/tasks/ext-architecture/adrs/adr-003.md b/.compozy/tasks/ext-architecture/adrs/adr-003.md new file mode 100644 index 000000000..5af35394b --- /dev/null +++ b/.compozy/tasks/ext-architecture/adrs/adr-003.md @@ -0,0 +1,83 @@ +# ADR-003: Capability-Scoped Security Model for Extensions + +## Status + +Accepted + +## Date + +2026-04-10 + +## Context + +AGH's hook system already has 27 events with typed dispatch where hooks can mutate session payloads, tool inputs, permission decisions, agent spawn commands, and prompts. The proposed extension architecture adds a bidirectional Host API where extensions can create sessions, send prompts, and write to memory. The council debate identified a critical security gap: **process/WASM isolation protects the compute plane but leaves the data plane ungated**. A sandboxed WASM extension with access to `sessions/create` and `memory/store` is functionally equivalent to a trusted extension for every attack that matters. + +The ClawHavoc incident (341 malicious skills on ClawHub) demonstrates this is not theoretical. + +## Decision + +Implement capability-scoped security at the **Host API boundary**, not just at the process isolation boundary. Every third-party extension (L3 subprocess now, and L2 WASM when implemented) must declare required capabilities in its manifest, and the daemon enforces these at dispatch time. + +Three mechanisms: + +1. **Capability declarations in manifest**: Extensions declare which payload families they need access to (`session`, `tool`, `permission`, `prompt`, `agent`, `memory`, `observe`) and which Host API methods they can call. + +2. **Source-trust tiers**: Different sources have different maximum capability grants: + - `bundled`: All capabilities (trusted, compiled-in) + - `user`: All capabilities (user installed, trusted) + - `workspace`: All capabilities (project-scoped, trusted) + - `marketplace`: Restricted — cannot access `permission.*`, `agent.pre_start` (command rewrite), or Host API write methods without explicit user allowlisting + +3. **Dispatch-time enforcement**: The hook dispatch pipeline checks capability grants before forwarding payloads. The Host API checks capability grants before executing methods. Unauthorized calls return a typed error, not silent drops. + +4. **Audit trail**: `HookRunRecord` extended to log which capabilities were exercised, not just the outcome. Extension-initiated Host API calls logged distinctly from user-initiated ones. + +## Alternatives Considered + +### Alternative 1: Trust All Extensions (Operator Responsibility) + +- **Description**: No capability scoping. If you install an extension, you trust it fully. +- **Pros**: Simplest implementation. OpenClaw and Pi-Mono use this model. +- **Cons**: A single malicious marketplace extension can read all memory, create sessions against any agent, and exfiltrate data through prompt responses. No defense in depth. +- **Why rejected**: AGH is a daemon, not a CLI. Extensions run in the background without user oversight. Trust-all is unacceptable for a headless system. + +### Alternative 2: Defer Security to Later + +- **Description**: Ship extensions without capability scoping, add it later. +- **Pros**: Faster to ship. Security can be retrofitted. +- **Cons**: Capability scoping affects the manifest schema, dispatch pipeline, and Host API — all of which harden into compatibility commitments. Retrofitting is an order of magnitude harder than building it in. +- **Why rejected**: The council's Security Advocate demonstrated that the hook system already has unconstrained mutation power from marketplace sources. Deferring creates a window of vulnerability that grows with every extension. + +## Consequences + +### Positive + +- Extensions operate with least-privilege by default +- Marketplace extensions are restricted from high-risk operations +- Audit trail enables forensic analysis of extension behavior +- Users can make informed trust decisions based on declared capabilities + +### Negative + +- More complex manifest schema (capabilities section) +- Dispatch-time capability checks add a small overhead per hook invocation +- Extension authors must understand and declare capabilities correctly + +### Risks + +- Capability model is too restrictive, blocking legitimate use cases. Mitigation: start with permissive defaults (`*` = all for bundled/user/workspace sources). Tighten based on real usage. +- Capability model is too granular, creating UX friction. Mitigation: use coarse-grained families (session, tool, memory, etc.) not per-method grants. + +## Implementation Notes + +- Add `Capabilities []string` field to `HookDecl` and extension manifest +- Add source-trust tier map to daemon config (overridable per-workspace) +- Check capabilities in `pipeline.executeHook` before forwarding payload +- Check capabilities in Host API handler before executing method +- Extend `HookRunRecord` with `CapabilitiesExercised []string` +- Default: bundled/user/workspace = `["*"]`, marketplace = `["session.read", "tool.read", "observe.read"]` + +## References + +- [Council Debate: Security Advocate opening statement and rebuttal] +- [OpenClaw ClawHavoc incident analysis](.compozy/tasks/ext-architecture/analysis_openclaw.md) diff --git a/.compozy/tasks/ext-architecture/adrs/adr-004.md b/.compozy/tasks/ext-architecture/adrs/adr-004.md new file mode 100644 index 000000000..b7b438612 --- /dev/null +++ b/.compozy/tasks/ext-architecture/adrs/adr-004.md @@ -0,0 +1,74 @@ +# ADR-004: Generalize ACP as Subprocess Extension Protocol + +## Status + +Accepted + +## Date + +2026-04-10 + +## Context + +AGH already manages subprocess communication with ACP agents via JSON-RPC 2.0 over stdio. The subprocess extension tier (L3) needs a protocol for communicating with non-agent extensions (memory backends, observe exporters, etc.). The question is whether to create a new protocol or generalize the existing ACP pattern. + +## Decision + +Generalize the ACP subprocess lifecycle into a shared pattern that both ACP agents and L3 extensions use. The extension protocol is JSON-RPC 2.0 over stdio with an MCP-inspired capability negotiation handshake. + +Shared lifecycle: +1. Daemon launches extension binary as subprocess +2. Initialize handshake: capability negotiation, version check +3. Normal JSON-RPC message exchange (bidirectional) +4. Shutdown: graceful request → timeout → SIGKILL + +The extension protocol adds: +- **Host API methods** (extension → daemon): `sessions/list`, `sessions/create`, `sessions/prompt`, `sessions/stop`, `memory/recall`, `memory/store`, `memory/forget`, `skills/list`, `observe/health`, `sessions/events` +- **Extension methods** (daemon → extension): `execute_hook`, `provide_tools`, `health_check`, `shutdown` +- **Capability negotiation**: Extension declares what it provides and what Host API methods it needs + +## Alternatives Considered + +### Alternative 1: Separate Protocol + +- **Description**: Extensions use a different protocol than ACP agents (e.g., gRPC, custom binary format). +- **Pros**: Can be optimized for extension use cases. No coupling to ACP evolution. +- **Cons**: Duplicates the subprocess lifecycle code. Different toolchain requirements (gRPC needs protobuf). Two protocols to maintain and document. +- **Why rejected**: AGH already has battle-tested JSON-RPC stdio infrastructure. Duplication is wasteful. + +### Alternative 2: Align with MCP Protocol + +- **Description**: Use MCP protocol spec directly (initialize, tools/list, resources/list). +- **Pros**: Ecosystem alignment. Extensions could potentially be MCP servers. +- **Cons**: MCP is designed for tool serving, not for the full range of extension capabilities (memory backends, observe exporters). Would need AGH-specific extensions to MCP anyway. +- **Why rejected**: MCP is a good inspiration but too narrow for the full extension surface. The protocol borrows MCP's initialize handshake pattern but defines AGH-specific methods. + +## Consequences + +### Positive + +- Reuses proven subprocess management code from `internal/acp` +- Consistent developer experience across agents and extensions +- Single protocol to document and maintain +- TypeScript SDK can serve both agent and extension authors + +### Negative + +- ACP and extension protocol may diverge over time, creating maintenance burden +- Extensions may need features ACP doesn't (e.g., bidirectional streaming) + +### Risks + +- Protocol coupling: ACP changes break extensions or vice versa. Mitigation: extract shared subprocess lifecycle into `internal/subprocess` package. ACP and extension packages import shared lifecycle but define their own method sets. + +## Implementation Notes + +- Extract subprocess launch/handshake/shutdown from `internal/acp/client.go` into `internal/subprocess/` (shared) +- `internal/acp/` imports `internal/subprocess/` for agent-specific methods +- `internal/extension/` imports `internal/subprocess/` for extension-specific methods +- Both share: process spawning, stdin/stdout JSON-RPC framing, graceful shutdown with signal escalation, health monitoring + +## References + +- [ACP Client Implementation](internal/acp/client.go) +- [MCP Protocol Spec](https://modelcontextprotocol.io/specification/2025-11-25) diff --git a/.compozy/tasks/ext-architecture/adrs/adr-005.md b/.compozy/tasks/ext-architecture/adrs/adr-005.md new file mode 100644 index 000000000..b545b9e97 --- /dev/null +++ b/.compozy/tasks/ext-architecture/adrs/adr-005.md @@ -0,0 +1,90 @@ +# ADR-005: Extension Three-Dimensional Package Model + +## Status + +Accepted + +## Date + +2026-04-10 + +## Context + +Research across 6 agent harnesses revealed that extensions are not just "code that runs at a hook point." Claude Code plugins bundle skills + MCP servers + hooks + settings. OpenClaw plugins register tools, providers, CLI commands, and HTTP routes. The question is how to model what an extension IS in AGH. + +## Decision + +Model extensions as three-dimensional packages: + +1. **Resources (declarative)**: Static assets bundled with the extension, declared in the manifest. No code execution needed to register them. + - Agent definitions (AGENT.md files) + - Skill files (SKILL.md files) + - Hook declarations + - MCP server configurations + +2. **Capabilities (interfaces)**: Runtime services the extension provides to AGH, implemented via Go interfaces (L1) or JSON-RPC methods (L3), with WASM exports (L2) as a future option. + - `agent.driver` — custom LLM backend + - `memory.backend` — custom storage + - `observe.exporter` — metrics/events export + - `prompt.provider` — inject prompt content + - `message.transform` — modify agent I/O + - `permission.gate` — auth decisions + - `content.validate` — content safety + +3. **Actions (Host API)**: Operations the extension can perform on AGH, available via bidirectional JSON-RPC for L3 extensions. + - `sessions/create`, `sessions/prompt`, `sessions/stop` + - `memory/recall`, `memory/store`, `memory/forget` + - `skills/list`, `sessions/events`, `observe/health` + +This maps cleanly to the existing AGH architecture: +- Resources → `skills.Registry`, `config.AgentDef`, `hooks.DeclarationProvider` +- Capabilities → `session.AgentDriver`, `hooks.Executor`, `session.PromptAssembler` +- Actions → `session.Manager`, `memory.Store`, `observe` query methods + +## Alternatives Considered + +### Alternative 1: Hooks Only + +- **Description**: Extensions can only respond to hooks. No resources, no capabilities, no actions. +- **Pros**: Simplest model. Already works with existing hook system. +- **Cons**: Cannot bundle skills, cannot provide agent drivers, cannot create sessions. Severely limits what extensions can do. +- **Why rejected**: Research shows 6/6 harnesses support extensions that go far beyond hooks. Hooks-only would make AGH's extension system the weakest in the ecosystem. + +### Alternative 2: Flat Capability List + +- **Description**: Everything is a "capability" — no distinction between resources, interfaces, and actions. +- **Pros**: Simpler mental model. One list of things an extension does. +- **Cons**: Conflates static declarations (resources) with runtime services (capabilities) with daemon API access (actions). Makes capability scoping harder — "this extension provides memory.backend" is very different from "this extension can call sessions/create." +- **Why rejected**: The three dimensions map to different security models: resources are safe (declarative), capabilities are controlled (interface contracts), actions are privileged (daemon API access). Collapsing them loses the security granularity. + +## Consequences + +### Positive + +- Clear mental model for extension authors: "I bring X, I provide Y, I can do Z" +- Security model maps naturally: resources = safe, capabilities = interface-bound, actions = capability-scoped +- Manifest schema is self-documenting +- Matches how Claude Code, OpenClaw, and Pi-Mono model their extensions + +### Negative + +- Three dimensions add conceptual complexity for simple extensions +- Manifest schema is larger than a flat capability list +- Extension loading pipeline has three phases (resource registration, capability binding, action authorization) + +### Risks + +- Over-engineering for alpha stage. Mitigation: start with resources + capabilities only. Actions (Host API) can be enabled incrementally. + +## Implementation Notes + +- Manifest sections: `[resources]`, `[capabilities]`, `[actions]` +- Resource registration happens at install time (copy files, register declarations) +- Capability binding happens at daemon boot (launch subprocesses; WASM modules when implemented) +- Action authorization checked per-call via capability scoping (ADR-003) + +## References + +- [Claude Code Plugin System](analysis_claude_code.md) +- [OpenClaw Extension Architecture](analysis_openclaw.md) +- [Pi-Mono Extension System](analysis_pi_mono.md) diff --git a/.compozy/tasks/ext-architecture/analysis_claude_code.md b/.compozy/tasks/ext-architecture/analysis_claude_code.md new file mode 100644 index 000000000..d9d8e054d --- /dev/null +++ b/.compozy/tasks/ext-architecture/analysis_claude_code.md @@ -0,0 +1,584 @@ +# Claude Code Extension Architecture Analysis + +## Overview + +Claude Code implements a multi-layered extensibility system composed of four interlocking subsystems: + +1. **Plugin System** -- distributable bundles of capabilities (tools, MCP servers, hooks, skills, slash commands) discovered through marketplaces and installed to an on-disk cache. +2. **Skill System** -- Markdown-based agentic workflows (`SKILL.md` files with YAML frontmatter) that inject procedures into the agent's context on activation. +3. **Hook System** -- event-driven lifecycle interceptors that can verify, modify, or block actions at 25+ lifecycle points. +4. **MCP Integration** -- Model Context Protocol client that connects to external servers (stdio, SSE, HTTP, WebSocket, claude-ai proxy, in-process) and exposes their tools/resources/prompts as first-class citizens. + +The key architectural insight is the separation of concerns: **plugins package capabilities**, **skills package procedures**, **hooks intercept lifecycle events**, and **MCP bridges external servers**. These four systems compose orthogonally -- a plugin can bundle skills, hooks, and MCP servers together as a single installable unit. + +### Three-Layer State Model + +The plugin system uses a three-layer reconciliation model that is the core design decision for robustness: + +| Layer | Storage | Reconciler | +|-------|---------|-----------| +| **Intent** (settings) | `.claude/settings.json` at user/project/local/managed scope | `pluginStartupCheck.ts` | +| **Materialization** (disk) | `~/.claude/plugins/cache////` | `reconcileMarketplaces()` | +| **Activation** (runtime) | `AppState.plugins`, tool registry, command registry, hook registry | `refreshActivePlugins()` | + +This means a plugin can be configured but not installed, installed but not enabled, or enabled but not loaded -- each state is reconciled independently, making the system resilient to partial failures. + +--- + +## Extension Loading & Discovery + +### Plugin Discovery Flow + +``` +Marketplace (GitHub/URL) --> DiscoverPlugins (UI) --> installPluginFromMarketplace + | + v + ~/.claude/plugins/cache/ + installed_plugins.json + | + v + refreshActivePlugins() + clearAllCaches() + loadAllPlugins() + extract commands/agents/hooks + bump mcp.pluginReconnectKey + re-init LSP + | + v + AppState.plugins + Commander Registry + MCPConnectionManager +``` + +**Source**: `src/services/plugins/pluginOperations.ts` defines five core lifecycle operations as pure library functions: `installPluginOp`, `uninstallPluginOp`, `updatePluginOp`, `enablePluginOp`, `disablePluginOp`. These are consumed by both CLI commands and the interactive UI. + +### Plugin Installation Scopes + +Plugins can be scoped to four levels with strict precedence: + +| Scope | Storage | Availability | +|-------|---------|-------------| +| **Managed** | `managed-settings.json` (MDM-deployed) | Enforced by org policy, cannot be overridden | +| **User** | `~/.claude/settings.json` | Global for current user | +| **Project** | `/.claude/settings.json` (committed) | This repository only | +| **Local** | `/.claude/settings.local.json` (gitignored) | This checkout on this machine | + +**Source**: `src/services/plugins/pluginOperations.ts` lines 72-84 define `VALID_INSTALLABLE_SCOPES = ['user', 'project', 'local']` and `VALID_UPDATE_SCOPES` which adds 'managed'. + +### Built-in Plugin Registry + +**Source**: `src/plugins/builtinPlugins.ts` + +Built-in plugins ship with the CLI binary and are registered via `registerBuiltinPlugin()`. They differ from bundled skills in that: +- They appear in the `/plugin` UI under a "Built-in" section +- Users can enable/disable them (persisted to user settings via `enabledPlugins`) +- They can provide multiple components (skills, hooks, MCP servers) + +Plugin IDs use the format `{name}@builtin` to distinguish from marketplace plugins (`{name}@{marketplace}`). + +```typescript +export type BuiltinPluginDefinition = { + name: string + description: string + version: string + defaultEnabled?: boolean + isAvailable?: () => boolean + skills?: BundledSkillDefinition[] + hooks?: HooksSettings + mcpServers?: Record +} +``` + +The `getBuiltinPlugins()` function splits registered plugins into enabled/disabled based on user settings with `defaultEnabled` as fallback. + +### Skill Discovery + +**Source**: `src/skills/loadSkillsDir.ts` + +Skills are discovered from multiple sources in parallel: + +1. **Managed skills**: `/.claude/skills/` (policy-enforced) +2. **User skills**: `~/.claude/skills/` +3. **Project skills**: `.claude/skills/` at every directory from cwd up to home +4. **Additional directory skills**: `--add-dir` paths +5. **Legacy commands**: `.claude/commands/` directories (deprecated format) +6. **Bundled skills**: Compiled into the CLI binary (`src/skills/bundled/`) +7. **Dynamic skills**: Discovered at runtime as the agent reads/writes files +8. **Conditional skills**: Skills with `paths` frontmatter, activated when matching files are touched + +The `SKILL.md` file format requires directory structure: `skill-name/SKILL.md`. Skills are deduplicated by resolved filesystem path (via `realpath`) to handle symlinks. + +### Dynamic Skill Discovery + +When the agent touches files (Read/Write/Edit), `discoverSkillDirsForPaths()` walks up from the file path to cwd, looking for `.claude/skills/` directories. Newly found directories are loaded via `addSkillDirectories()` and merged into the `dynamicSkills` map. This enables monorepo patterns where sub-packages have their own skill definitions. + +### Conditional Skills (Path-Filtered) + +Skills can declare a `paths` frontmatter field using gitignore-style patterns. These skills are held in a `conditionalSkills` map and only activated when the agent operates on files matching those patterns. Once activated, they move to `dynamicSkills` and become available to the model. This is a token-budget optimization -- skills near rarely-touched code don't consume context. + +--- + +## Hook System + +### Architecture + +The hook system provides 25+ lifecycle events with five execution engine types. Hooks can verify, modify, or block actions before they happen, transform results after completion, or inject context into messages. + +**Source**: `src/utils/hooks.ts` (main orchestrator), `src/utils/hooks/` (sub-modules) + +### Lifecycle Events + +**Session Events:** +- `SessionStart` -- fires once at REPL bootstrap, before any user message +- `SessionEnd` / `Stop` -- fires on graceful shutdown (tight 1.5s default timeout) +- `Setup` -- fires during initial setup + +**Tool Lifecycle Events:** +- `PreToolUse` -- fires before each tool call; can block, modify input, or inject context +- `PostToolUse` -- fires after successful tool call; can transform results +- `PostToolUseFailure` -- fires after failed tool call + +**User Interaction Events:** +- `UserPromptSubmit` -- fires when user submits message; can inject context or rewrite prompt +- `PermissionRequest` -- participates in multi-resolver permission race + +**Context Events:** +- `PreCompact` / `PostCompact` -- fires around context compaction + +**Agent Events:** +- `SubagentStart` / `SubagentStop` -- fires for subagent lifecycle +- `TaskCreated` / `TaskCompleted` -- fires for task lifecycle +- `TeammateIdle` -- fires when teammate becomes idle + +**Other Events:** +- `ConfigChange` -- fires when configuration changes +- `CwdChanged` -- fires when working directory changes +- `FileChanged` -- fires when files change on disk +- `InstructionsLoaded` -- fires when CLAUDE.md instructions load +- `Elicitation` / `ElicitationResult` -- fires for MCP elicitation flows +- `PermissionDenied` -- fires when permission is denied +- `Notification` -- fires for system notifications + +### Hook Types + +**Source**: `src/utils/hooks.ts` lines 166-175 for timeouts; `src/types/hooks.ts` for type definitions + +| Type | Execution | Typical Use | +|------|-----------|-------------| +| `command` | `child_process.spawn()` subprocess | Shell commands (lint, format, tests) | +| `prompt` | Side-query to Claude API | LLM-based verification or rewriting | +| `agent` | Forked sub-agent via `queryLoop()` | Multi-step verification with tool access | +| `http` | HTTP POST to endpoint | Remote automation, approval gates | +| `function` | In-process callback (SDK only) | Programmatic hooks via Agent SDK | + +### Hook Configuration Schema + +```json +{ + "hooks": { + "PreToolUse": [ + { + "matcher": "Write|Edit", + "type": "command", + "command": "prettier --check \"$file\"", + "timeout": 30, + "statusMessage": "Checking format..." + } + ] + } +} +``` + +Fields: `matcher` (regex/glob filter), `type` (execution engine), `command`/`prompt` (payload), `timeout` (seconds), `statusMessage` (UI spinner text). + +### Hook Sources + +Hooks are discovered from four sources, merged at startup and on hot-reload: + +1. **settings.json** -- five-tier settings cascade (policy/user/project/local/session) +2. **Plugins** -- plugin manifests declare hooks, loaded by `loadPluginHooks` +3. **Agent frontmatter** -- agents with markdown frontmatter can declare scoped hooks +4. **SDK callbacks** -- `function`-type hooks registered via Agent SDK + +### Execution Lifecycle + +``` +Event occurs in query loop + | +getRegisteredHooks(eventName) --> List + | +Filter by matcher (matchHook) + | +For each matching hook IN PARALLEL: + command --> child_process.spawn(), write stdin, read stdout + prompt --> side-query to Claude API + agent --> fork sub-agent loop + http --> POST event payload + function --> invoke registered callback + | +Each hook returns HookJSONOutput (or times out) + | +Aggregate results: + - If any returned continue=false --> raise HookBlockedError + - Merge hookSpecificOutput in declaration order + - Record duration (telemetry) + | +Resume query loop +``` + +Hooks for the same event run **concurrently**. The orchestrator waits for all to complete (or timeout) before proceeding, because a single `continue: false` must block downstream execution. + +### Hook Output Protocol + +```json +{ + "continue": true, + "stopReason": "(only if continue=false)", + "hookSpecificOutput": { + "permissionDecision": "allow | deny | ask", + "updatedInput": { "...tool input overrides..." }, + "additionalContext": "(text to inject)", + "transformedResult": "(replacement result)" + } +} +``` + +**PreToolUse** can: allow (pass through), block (`continue: false`), or modify (`updatedInput`). +**PostToolUse** can: transform results via `transformedResult`. +**UserPromptSubmit** can: inject context via `additionalContext`. +**PermissionRequest** can: pre-empt dialog via `permissionDecision`. + +### Timeouts + +- **Tool hook timeout**: 10 minutes default (`TOOL_HOOK_EXECUTION_TIMEOUT_MS = 10 * 60 * 1000`) +- **Session end timeout**: 1.5 seconds default (`SESSION_END_HOOK_TIMEOUT_MS_DEFAULT = 1500`), configurable via `CLAUDE_CODE_SESSIONEND_HOOKS_TIMEOUT_MS` +- **Per-hook timeout**: overrides default via the `timeout` field + +### MDM Policy Enforcement + +- `shouldAllowManagedHooksOnly()` -- when true, only MDM-managed hooks execute; user/project hooks are ignored +- `shouldDisableAllHooksIncludingManaged()` -- kills the hook system entirely + +--- + +## Tool Registration + +### The Tool Interface + +**Source**: `src/Tool.ts` + +Every tool implements a single `Tool` interface built via the `buildTool()` factory: + +```typescript +type Tool = { + name: string + aliases?: string[] + searchHint?: string + inputSchema: ZodSchema + outputSchema?: ZodSchema + + // Execution + call(args, context, canUseTool, parentMessage, onProgress?): Promise + isConcurrencySafe(): boolean + isReadOnly(): boolean + isDestructive?(): boolean + + // Permissions + checkPermissions(input, context): Promise + validateInput?(input, context): Promise + + // UI Rendering (React components) + prompt(): Promise // System prompt text teaching the model + renderToolUseMessage(): ReactNode + renderToolResultMessage?(): ReactNode + renderToolUseProgressMessage?(): ReactNode + + // Budget management + maxResultSizeChars: number + shouldDefer?: boolean // Deferred loading via ToolSearch + alwaysLoad?: boolean // Never deferred +} +``` + +Key properties that drive orchestration: +- `isReadOnly()` -- determines write permission need +- `isConcurrencySafe()` -- determines if tool can run in parallel batch +- `maxResultSizeChars` -- triggers disk persistence for large results +- `shouldDefer` -- lazy schema loading via ToolSearch tool + +### Tool Registration (Central Registry) + +**Source**: `src/tools.ts` + +The central registry in `tools.ts` imports tools statically or conditionally behind feature flags. `getAllBaseTools()` returns the complete tool list for the current environment. Tools are assembled into the final pool via `assembleToolPool()`: + +1. Get built-in tools via `getTools()` (respects mode filtering, deny rules) +2. Filter MCP tools by deny rules +3. Deduplicate by name (built-in tools take precedence) +4. Sort each partition for prompt-cache stability (built-ins as contiguous prefix) + +### Tool Execution Pipeline + +**Source**: `toolOrchestration.ts` + +``` +Claude returns assistant message with tool_use block + | +queryLoop calls runTools(toolUseBlocks) + | +Partition: read-only --> parallel batch, write --> serial batch + | +For each tool: + 1. validateInput() --> Zod schema validation + 2. canUseTool() --> multi-layer permission decision + 3. PreToolUse hooks --> may block/modify + 4. tool.call(input, ctx) --> execute, yield progress + 5. PostToolUse hooks --> may transform result + 6. Result truncation --> enforce maxResultSizeChars + | +yield tool_result block back to queryLoop +``` + +**Smart concurrency**: Read-only tools (`FileRead`, `Glob`, `Grep`, `WebFetch`) execute as parallel batch. Mutating tools (`FileEdit`, `FileWrite`, `Bash`) execute serially. + +### Context-Aware Tool Availability + +The tool registry is filtered per execution context: + +| Context | Allowed | Disallowed | +|---------|---------|------------| +| Async Agent | FileRead, Grep, Bash, FileEdit | Agent, TaskStop, ExitPlanMode | +| Coordinator | Agent, TaskStop, SendMessage | Most filesystem/shell tools | + +### ToolSearch (Deferred Loading) + +Tools can be marked `shouldDefer: true` to keep them out of the initial prompt. The `ToolSearchTool` lets the model query the deferred registry at runtime, fetching full schemas on demand. This keeps the per-turn schema payload small while preserving access to the long tail of MCP tools. + +--- + +## MCP Integration + +### Host Architecture + +**Source**: `src/services/mcp/client.ts`, `src/services/mcp/types.ts` + +Claude Code acts as an MCP **host** managing multiple MCP **clients** (one per configured server). The architecture layers: + +``` +Agent Query Loop (natural language) + | +MCPTool / ReadMcpResourceTool / ListMcpResourcesTool (adapter layer) + | +connectToServer + ensureConnectedClient (client dispatch) +MCPConnectionManager + | +StdioClientTransport | SSE | HTTP | WS | InProc | ClaudeAI-proxy (transport) + | +Local subprocess | Remote server | Claude.ai (external services) +``` + +### Transport Types + +**Source**: `src/services/mcp/types.ts` + +| Transport | Config Schema | Use Case | +|-----------|--------------|----------| +| `stdio` | `McpStdioServerConfigSchema` | Local subprocess (most common) | +| `sse` | `McpSSEServerConfigSchema` | Remote HTTP with Server-Sent Events | +| `http` | `McpHTTPServerConfigSchema` | REST-style via `StreamableHTTPClientTransport` | +| `ws` | `McpWebSocketServerConfigSchema` | Persistent bidirectional | +| `claudeai-proxy` | `McpClaudeAIProxyServerConfigSchema` | Anthropic-hosted servers | +| `InProcessTransport` | Internal only | Built-in capabilities through MCP interface | + +### Tool Namespacing + +**Source**: `src/services/mcp/mcpStringUtils.ts` + +MCP tools are namespaced via `buildMcpToolName(serverName, toolName)` producing `mcp____`: + +``` +Server "github-mcp" exposing "search_repos", "read_file": + mcp__github-mcp__search_repos + mcp__github-mcp__read_file + +Server "postgres" exposing "query": + mcp__postgres__query +``` + +This namespace is the contract for permission rules, hook matchers, and UI display. + +### Connection Lifecycle + +1. `connectToServer(config)` -- resolves transport, creates `McpClient`, completes JSON-RPC initialize handshake +2. `ensureConnectedClient(serverName)` -- cache-aware lookup, lazy reconnect on first tool use +3. Connection states: `pending` -> `connected` | `disabled` | `failed` +4. Session recovery: `isMcpSessionNotFoundError` detects dead sessions (HTTP 404, JSON-RPC -32001), triggers transparent reconnect + +### Output Size Management + +`truncateMcpContentIfNeeded` checks results against configured thresholds. Oversized results are persisted to temp files via `persistBinaryContent`, and the agent receives `getLargeOutputInstructions` with a file reference instead. This prevents context bloat. + +### Authentication + +- Static credentials (API keys in config) +- Interactive OAuth via browser (`handleOAuth401Error` -> `performMCPOAuthFlow`) +- Per-server token management (independent auth state per server) +- `ElicitRequest` messages for runtime credential gathering + +### Server Approval + +**Source**: `src/services/mcpServerApproval.tsx` + +Before a newly discovered server can connect, explicit user approval is required via `MCPServerApprovalDialog`. This prevents malicious workspace configs from silently launching subprocesses. + +### MCP-to-Skill Bridge + +**Source**: `src/skills/mcpSkillBuilders.ts` + +MCP servers can also expose skills (prompt templates). The `registerMCPSkillBuilders` pattern provides a cycle-free bridge between the MCP client and the skill loader -- MCP prompts become first-class slash commands alongside local skills. + +--- + +## Security Model + +### Multi-Layer Permission Cascade + +Tool execution goes through a multi-layer permission cascade: +1. Static rules from settings.json (allow/deny/ask lists) +2. Tool's own `checkPermissions()` method +3. Current permission mode (default/plan/bypass) +4. Hook-based classification or user prompt + +### Plugin Policy Enforcement + +- `isPluginBlockedByPolicy(pluginId)` -- checks against org allowlist/blocklist +- Enforcement at two points: **discovery time** (filtered from marketplace UI) and **load time** (rejected at startup) +- `getManagedPluginNames()` -- lists plugins that cannot be uninstalled (MDM-enforced) + +### Hook Security + +- `shouldAllowManagedHooksOnly()` -- restricts to MDM-managed hooks only +- `shouldDisableAllHooksIncludingManaged()` -- kills hooks entirely +- `ManagedSettingsSecurityDialog` -- blocks startup when MDM pushes hooks/env/permissions, requiring user trust +- Schema validation via Zod at load time + +### Skill Security + +- `allowed-tools` frontmatter field scopes what tools a skill can use +- MCP skills are marked untrusted -- inline shell commands (`!...`) in their markdown body are never executed +- Path validation prevents traversal attacks in bundled skill file extraction + +### Plugin Telemetry Privacy + +"Twin-column" pattern: official Anthropic plugins log real names; third-party plugins use `plugin_id_hash` to preserve anonymity. + +--- + +## Key Patterns for AGH + +### 1. Three-Layer State Reconciliation + +The intent/materialization/activation separation is directly applicable to AGH's extension system. AGH should maintain: +- **Intent**: TOML config declaring desired extensions +- **Materialization**: On-disk cache of downloaded/compiled extensions +- **Activation**: Runtime-loaded extension instances in the daemon + +This makes the system resilient to partial failures and supports offline operation. + +### 2. Progressive Disclosure for Token Budget + +Skills use a `description` + `when_to_use` metadata pattern where only metadata appears in the baseline prompt. Full skill content materializes only when activated. This is critical for AGH's multi-agent sessions where context budget is shared. AGH should adopt this pattern for its skills/instructions system. + +### 3. Hook Output Protocol (Structured JSON) + +The hook output protocol (`continue`, `stopReason`, `hookSpecificOutput`) is clean and extensible. AGH should adopt a similar structured protocol for its hook system, with event-specific output fields. + +### 4. Uniform Tool Interface + +The `Tool` interface with `buildTool()` factory means adding a tool never requires changes to the query loop, permission system, or UI. AGH's ACP driver already has a tool concept, but making MCP tools indistinguishable from built-ins is a pattern worth adopting. + +### 5. Plugin = Capability Bundle, Skill = Procedure + +The clean separation between distributable capability bundles (plugins with tools, hooks, MCP servers) and procedural workflows (SKILL.md files) avoids conflation. AGH should maintain this distinction -- the `internal/skills/` package handles procedures, while a future plugin system handles capability bundles. + +### 6. Namespace Convention for MCP Tools + +The `mcp____` naming convention is simple and effective for disambiguation. AGH should adopt a similar namespacing for tools from different ACP agents. + +### 7. Scoped Configuration Cascade + +The four-scope cascade (managed > user > project > local) with MDM override is essential for enterprise deployment. AGH's TOML config already supports merge, but it should add explicit scope precedence. + +### 8. Dynamic Skill Discovery + +Walking the filesystem from touched files upward to cwd to find `.claude/skills/` directories is a clever pattern for monorepos. AGH could apply this to workspace-scoped memory and skills. + +### 9. Concurrent Hook Execution with Blocking Semantics + +Hooks fire in parallel but a single `continue: false` blocks the operation. This balances throughput (parallel) with safety (any hook can veto). The timeout hierarchy (10min for tool hooks, 1.5s for session-end) prevents hangs. + +### 10. Deferred Tool Loading (ToolSearch) + +For AGH sessions with many connected agents/MCP servers, deferred tool loading keeps the initial prompt lean. The model uses a search tool to fetch schemas on demand. + +--- + +## Code References + +### Core Type Definitions +- `src/Tool.ts` -- `Tool` interface, `buildTool()` factory, `ToolUseContext`, `ToolPermissionContext` +- `src/types/hooks.ts` -- `HookJSONOutput`, `HookCallback`, `PromptRequest`/`PromptResponse` +- `src/entrypoints/agentSdkTypes.ts` -- `HookEvent` union, all hook input types +- `src/types/plugin.ts` -- `LoadedPlugin`, `PluginManifest`, `BuiltinPluginDefinition` +- `src/types/command.ts` -- `Command`, `PromptCommand` (skill representation) + +### Tool Registration +- `src/tools.ts` -- central registry (`getAllBaseTools()`, `getTools()`, `assembleToolPool()`) +- `src/tools/MCPTool/MCPTool.ts` -- MCP tool wrapper (overridden per-instance in `client.ts`) +- `src/tools/SkillTool/SkillTool.ts` -- skill invocation tool +- `src/tools/ToolSearchTool/ToolSearchTool.ts` -- deferred tool search + +### Plugin System +- `src/plugins/builtinPlugins.ts` -- built-in plugin registry and `registerBuiltinPlugin()` +- `src/plugins/bundled/index.ts` -- `initBuiltinPlugins()` scaffold +- `src/services/plugins/pluginOperations.ts` -- install/uninstall/enable/disable/update operations +- `src/services/plugins/PluginInstallationManager.ts` -- background installation with marketplace reconciliation +- `src/utils/plugins/pluginLoader.ts` -- `loadAllPlugins()`, `cachePlugin()`, version management +- `src/utils/plugins/pluginPolicy.ts` -- `isPluginBlockedByPolicy()` +- `src/utils/plugins/pluginStartupCheck.ts` -- startup validation +- `src/utils/plugins/refresh.ts` -- `refreshActivePlugins()` runtime reload + +### Skill System +- `src/skills/bundledSkills.ts` -- `registerBundledSkill()`, `BundledSkillDefinition` type +- `src/skills/loadSkillsDir.ts` -- skill discovery, loading, deduplication, dynamic/conditional activation +- `src/skills/mcpSkillBuilders.ts` -- cycle-free bridge for MCP-to-skill integration +- `src/skills/bundled/` -- built-in skills (skillify, verifyContent, updateConfig, keybindings, claudeApi, etc.) + +### Hook System +- `src/utils/hooks.ts` -- main orchestrator, hook matching, execution, timeouts +- `src/utils/hooks/hookEvents.ts` -- hook event broadcasting system +- `src/utils/hooks/hooksConfigManager.ts` -- hook configuration management +- `src/utils/hooks/hooksConfigSnapshot.ts` -- `captureHooksConfigSnapshot()`, MDM enforcement +- `src/utils/hooks/sessionHooks.ts` -- session-scoped hook registration +- `src/utils/hooks/execPromptHook.ts` -- LLM-based hook execution +- `src/utils/hooks/execAgentHook.ts` -- sub-agent hook execution +- `src/utils/hooks/execHttpHook.ts` -- HTTP webhook execution +- `src/utils/hooks/registerFrontmatterHooks.ts` -- hooks from agent/skill frontmatter +- `src/utils/hooks/registerSkillHooks.ts` -- hooks from skill definitions +- `src/utils/hooks/skillImprovement.ts` -- background skill co-evolution + +### MCP Integration +- `src/services/mcp/client.ts` -- `connectToServer()`, `ensureConnectedClient()`, `callMCPTool()`, auth handling +- `src/services/mcp/types.ts` -- transport schemas, `MCPServerConnection`, `ConfigScope` +- `src/services/mcp/MCPConnectionManager.tsx` -- React context for MCP connection lifecycle +- `src/services/mcp/mcpStringUtils.ts` -- `buildMcpToolName()`, `mcpInfoFromString()`, namespace utilities +- `src/services/mcp/normalization.ts` -- name normalization for MCP identifiers +- `src/services/mcp/config.ts` -- `getAllMcpConfigs()`, `isMcpServerDisabled()` +- `src/services/mcp/auth.ts` -- `ClaudeAuthProvider`, OAuth flow +- `src/services/mcp/InProcessTransport.ts` -- in-process MCP transport for built-in capabilities +- `src/services/mcpServerApproval.tsx` -- server approval security dialog + +### Settings & Configuration +- `src/utils/settings/settings.ts` -- settings cascade, per-source access +- `src/utils/settings/types.ts` -- `SettingsSchema`, `HooksSchema`, `HooksSettings` +- `src/utils/settings/managedPath.ts` -- MDM settings path resolution diff --git a/.compozy/tasks/ext-architecture/analysis_goclaw.md b/.compozy/tasks/ext-architecture/analysis_goclaw.md new file mode 100644 index 000000000..da59cee1e --- /dev/null +++ b/.compozy/tasks/ext-architecture/analysis_goclaw.md @@ -0,0 +1,796 @@ +# GoClaw Extension Architecture Analysis + +**Date:** 2026-04-10 +**Purpose:** Research GoClaw's extension patterns to inform AGH's extension system design. +**Source code:** `/Users/pedronauck/dev/knowledge/.resources/goclaw/` +**Wiki docs:** `/Users/pedronauck/dev/knowledge/goclaw/wiki/concepts/` + +--- + +## Overview + +GoClaw is a Go-based multi-tenant agent gateway that extends agent capabilities through four complementary extension mechanisms: + +1. **Dynamic Tools** -- operator-defined shell-command tools stored in the database +2. **MCP Bridge** -- external tool servers connected via the Model Context Protocol +3. **Skills System** -- document-based knowledge modules (`SKILL.md`) that agents discover and load on demand +4. **Hook and Event Bus** -- function-pointer hooks and a buffered message bus for extensibility and fan-out + +All four mechanisms feed into a shared `tools.Registry` that the agent loop consults when building the tool schema for LLM requests. From the LLM's perspective, a dynamic tool, an MCP tool, and a native tool are indistinguishable -- they all implement the same `Tool` interface. + +### Key Architectural Principle + +GoClaw's extension model is layered around a single abstraction: the `Tool` interface. Everything -- native Go tools, shell-command dynamic tools, MCP bridge wrappers, and skill-management tools -- implements this interface. The `Registry` is the universal dispatch layer. This is the most important pattern for AGH to adopt. + +--- + +## Dynamic Tool System + +### How It Works + +Dynamic tools let operators define new agent capabilities as shell commands stored in a `custom_tool_defs` database table. At session startup, these records are materialized as `DynamicTool` instances and registered into the shared tool registry. + +### Data Model + +Each custom tool definition contains: + +| Field | Type | Purpose | +|-------|------|---------| +| `Name` | `string` | Tool name used by the LLM in tool calls | +| `Description` | `string` | Natural language description for LLM guidance | +| `Command` | `string` | Go `text/template` with `{{.key}}` placeholders | +| `Parameters` | `json.RawMessage` | JSON Schema defining the tool's input parameters | +| `TimeoutSeconds` | `int` | Execution timeout (default 60s) | +| `WorkingDir` | `string` | Optional fixed working directory | +| `Env` | `json.RawMessage` | Optional environment variables (supports encrypted secrets) | + +### Command Template Rendering + +The `renderCommand` function uses Go's `text/template` for parameter substitution. Shell injection is mitigated via `shellEscape`: + +```go +func shellEscape(s string) string { + return "'" + strings.ReplaceAll(s, "'", "'\"'\"'") + "'" +} +``` + +### DynamicTool Wrapper + +The `DynamicTool` struct wraps a `CustomToolDef` and implements the `Tool` interface: + +``` +DynamicTool + +def CustomToolDef + +workspace string + +Execute(ctx, args) : Result + | + v +Tool interface + +Name() : string + +Description() : string + +Parameters() : map[string]any + +Execute(ctx, args) : *Result +``` + +### Lifecycle + +- Custom tools live in the database, not in source code +- Added/modified/removed at runtime without gateway restart +- Loaded per-session from DB, scoped by tenant ID +- Execution bounded by configurable `TimeoutSeconds` with kill-on-timeout +- Exit code 0 = success (stdout as result), non-zero = error (stderr as error message) + +### Relevance to AGH + +AGH does not have a multi-tenant database model, but the concept of operator-defined shell-command tools that implement the standard `Tool` interface is directly applicable. AGH could store custom tool definitions in its TOML config or in SQLite and materialize them at session start. + +--- + +## Hook and Event Bus + +### MessageBus Core + +The `MessageBus` (`internal/bus/bus.go`) is the central fan-out mechanism with three primitives: + +```go +type MessageBus struct { + inbound chan InboundMessage // capacity 1000 + outbound chan OutboundMessage // capacity 1000 + handlers map[string]MessageHandler + subscribers map[string]EventHandler + mu sync.RWMutex +} +``` + +**Key design decisions:** + +- **Buffered channels (1000 slots)** decouple producers from consumers and absorb bursty traffic +- **Non-blocking publish** (`TryPublishInbound`) drops messages rather than blocking -- a slow consumer should not propagate back-pressure to channel adapters +- **Panic recovery** in `Broadcast()` -- each subscriber is called in a deferred recovery wrapper so one bad subscriber cannot crash the bus + +### Event Types + +Events are structured with name + payload + tenant scope: + +```go +type Event struct { + Name string `json:"name"` + Payload any `json:"payload,omitempty"` + TenantID uuid.UUID `json:"-"` // not serialized to clients +} +``` + +Standard lifecycle events: `run.started`, `chunk`, `tool.call`, `tool.result`, `run.completed`. + +Cache invalidation events: `CacheKindAgent`, `CacheKindSkills`, `CacheKindMCP`, etc. (19 kinds defined in `bus/types.go`). + +### Hook Taxonomy + +GoClaw has two levels of hooks: + +**1. Loop-level hooks** (agent lifecycle, declared in `internal/agent/loop_types.go`): + +```go +type EnsureUserProfileFunc func(ctx, agentID, userID, workspace, channel) (effectiveWorkspace, isNew, err) +type SeedUserFilesFunc func(ctx, agentID, userID, agentType, isNew) error +type ContextFileLoaderFunc func(ctx, agentID, userID, agentType) []ContextFile +type BootstrapCleanupFunc func(ctx, agentID, userID) error +type CacheInvalidateFunc func(agentID, userID) +``` + +These are typed function fields on the `Loop` struct, nil-checked before invocation (optional). Called in a fixed order during `runLoop()`: + +``` +1. ensureUserProfile() -> resolves workspace +2. seedUserFiles() -> injects BOOTSTRAP.md, USER.md on first contact +3. loadContextFiles() -> fetches IDENTITY.md, SOUL.md, USER.md each run +4. [LLM iterations] +5. cleanupBootstrap() -> removes BOOTSTRAP.md after 3 user turns +``` + +**2. Handler-level hooks** (RPC pre/post processing in `internal/gateway/methods/`): + +```go +// Pre-hook: validate input before main logic +if h.preValidate != nil { + if err := h.preValidate(ctx, req.Params); err != nil { ... } +} +// Main logic +result, _ := h.agent.Run(ctx, runReq) +// Post-hook: side effects +if h.postTurn != nil { + h.postTurn(ctx, &agent.RunResult{...}) +} +``` + +### Dedupe and Debounce Helpers + +**DedupeCache** (`bus/dedupe.go`): TTL-based (default 20min, max 5000 entries) dedup cache that prevents duplicate processing when channels reconnect and replay messages. Uses content hashing with lazy expiry. + +**InboundDebounceHelper**: Per-chatID debouncing (500ms default) that consolidates rapid-fire user messages into one agent run. + +### Design Pattern: Adding a New Hook + +Five-step pattern: +1. Define callback type +2. Add field to Loop struct +3. Provide setter method +4. Call at appropriate point with nil-check +5. Implement in caller + +This is **compile-time safe, zero-reflection, but requires modifying the Loop struct** for each new hook. + +### Relevance to AGH + +AGH already uses a typed `Notifier` pattern for fan-out (per CLAUDE.md). GoClaw's approach validates this direction: + +- **Function-pointer hooks over event-emitter pattern** -- compile-time safety, no reflection, but requires struct changes for new hooks. AGH should use the same pattern. +- **Buffered channels with drop-on-full** -- good for SSE fan-out to web UI. AGH's `observe` package could adopt this for event broadcasting. +- **Separate bus for message routing vs. event broadcasting** -- GoClaw combines both in one struct but they serve different purposes. + +--- + +## Skills System + +### Architecture + +Skills are **document-based entities** defined by `SKILL.md` files with YAML frontmatter. They differ fundamentally from tools: tools execute code; skills inject knowledge into the agent's context to teach it how to use existing tools. + +### Skill Format + +```yaml +--- +name: pdf-parser +description: Extract text and metadata from PDF files +tags: [pdf, document, parsing] +visibility: public +runtime: python +requires: + - pypdf +version: "1.0" +--- +# PDF Parser +## Instructions +Use the `shell` tool to run the following Python script... +``` + +### Five-Tier Loader Hierarchy + +The `skills.Loader` (`internal/skills/loader.go`) resolves skills through a five-tier precedence chain: + +| Tier | Scope | Location | Purpose | +|------|-------|----------|---------| +| 1 | Workspace | `/skills/` | Project-specific customizations | +| 2 | Project-Agent | `/.agents/skills/` | Agent-level overrides | +| 3 | Personal-Agent | `~/.agents/skills/` | User-specific preferences | +| 4 | Global | `~/.goclaw/skills/` | Tenant-wide shared skills | +| 5 | Builtin | Bundled with binary | Default system skills | + +Plus a **managed skills directory** with versioned subdirectories: `///SKILL.md`. + +Higher tiers override lower tiers by slug name. Hot-reload is supported via filesystem watchers with version-based cache invalidation (`atomic.Int64` version counter). + +### Search-Then-Load Pattern + +Agents do not load skills eagerly. Instead, they use a **search-then-load** pattern: + +1. Agent encounters a task it cannot perform with its default toolset +2. Agent invokes `skill_search` tool with a natural-language query +3. BM25 text search (+ optional pgvector semantic search) finds matching skills +4. Agent selects the best skill and invokes `use_skill` +5. `use_skill` reads the full `SKILL.md` content and injects it into the agent's context + +### BM25 Implementation for Skills + +`internal/skills/search.go` implements a pure-Go BM25 index: + +```go +type Index struct { + docs []skillDoc + df map[string]int // document frequency per term + avgDL float64 // average document length + k1 float64 // 1.2 (term frequency saturation) + b float64 // 0.75 (length normalization) +} +``` + +Tokenization: lowercase, replace non-alphanumeric with spaces, filter tokens < 2 chars. +Scoring: standard BM25 formula: `IDF * tf * (k1+1) / (tf + k1 * (1 - b + b * dl/avgdl))`. + +### Hybrid Search (BM25 + Embeddings) + +`SkillSearchTool` (`internal/tools/skill_search.go`) supports hybrid search: +- BM25 search always available (zero external dependencies) +- Optional pgvector semantic search via `store.EmbeddingSkillSearcher` +- Weights: BM25 0.3, vector 0.7 +- Merges results by deduplicating on skill name and accumulating weighted scores + +### Skill Management Tool + +`SkillManageTool` (`internal/tools/skill_manage.go`) enables agent-driven skill lifecycle: + +- **action=create**: Write new skill from SKILL.md content string +- **action=patch**: Find/replace on latest version, creates new immutable version +- **action=delete**: Archive skill, move to `.trash/` + +Security: Content scanned by `GuardSkillContent()` before any disk write. Ownership checks enforce that only the skill owner can patch/delete. + +### Agent Self-Evolution + +When `skillEvolve=true`, the agent loop: +1. At 70% and 90% of iteration budget: injects ephemeral nudge prompts suggesting skill creation +2. After complex tasks: appends a postscript asking user for consent +3. On approval: agent invokes `skill_manage` to write a new `SKILL.md` + +The **Skill Creator meta-skill** is itself a bundled skill that guides agents through writing new skills -- a bootstrapping pattern. + +### Relevance to AGH + +AGH already has a skills system (`internal/skills/`). Key patterns to consider: + +- **Five-tier hierarchy** -- AGH has workspace + global + bundled; could add project-agent and personal-agent tiers +- **Search-then-load** -- critical for context budget management; AGH should adopt BM25 search +- **Agent self-evolution** -- nudge prompts at iteration budget thresholds are an elegant pattern +- **Security scanning** -- `GuardSkillContent()` runs BEFORE disk writes, blocking poisoned skills at creation time +- **Versioned immutable skills** -- new version on every patch, never modifies in place + +--- + +## MCP Bridge + +### Architecture + +The MCP bridge (`internal/mcp/`) connects external tool servers via the Model Context Protocol. The `Manager` orchestrates connections and brokers tool invocations through `BridgeTool` wrappers. + +``` +agent.Loop + -> tools.Registry.Get("mcp_filesystem__read_file") + -> BridgeTool.Execute(ctx, args) + -> mcpclient.CallTool("read_file", args) // to MCP server +``` + +### Manager Struct + +```go +type Manager struct { + servers map[string]*serverState // active connections + registry *tools.Registry + configs map[string]*config.MCPServerConfig // static config + store store.MCPServerStore // DB-backed dynamic + pool *Pool // connection pooling + deferredTools map[string]*BridgeTool // lazy-loaded tools + activatedTools map[string]struct{} // tracks activated + searchMode bool // >40 tools threshold + userCredServers []store.MCPAccessInfo // per-user credential servers +} +``` + +### Transports + +| Transport | Use Case | +|-----------|----------| +| `stdio` | Local subprocess (e.g., `npx @modelcontextprotocol/server-filesystem`) | +| `sse` | HTTP-based server-sent events | +| `streamable-http` | Bidirectional HTTP with long-poll fallback | + +### Tool Name Namespacing + +Every MCP tool is prefixed: `mcp_{server_name}__{tool_name}` + +```go +func ensureMCPPrefix(prefix, serverName string) string { + // "my-server" -> "mcp_my_server" + // Hyphens converted to underscores +} +``` + +The `BridgeTool` stores the mapping and calls the server with the original unprefixed name. + +### BridgeTool Wrapper + +`internal/mcp/bridge_tool.go` -- implements the `tools.Tool` interface: + +```go +type BridgeTool struct { + serverName string + toolName string // original MCP name + registeredName string // "mcp_filesystem__list_files" + description string + inputSchema map[string]any // JSON Schema + requiredSet map[string]bool + client *mcpclient.Client + timeoutSec int + connected *atomic.Bool +} +``` + +Key behaviors: +- Returns error if server is disconnected (checked via `atomic.Bool`) +- Creates per-call timeout context +- Strips empty optional args (LLMs send "", "null", "optional" for optional fields) +- Wraps output in `<<>>` markers to prevent prompt injection +- Sanitizes any marker-like strings in content to prevent marker spoofing + +### Connection Pooling + +`internal/mcp/pool.go` -- shared connection pool across agents/tenants: + +```go +type Pool struct { + servers map[string]*poolEntry // shared: tenantID/serverName + userServers map[string]*poolEntry // per-user: tenantID/serverName/user:userID + userSlots map[string]chan struct{} // per-server semaphores + cfg PoolConfig // MaxSize=200, MaxIdle=20, IdleTTL=20m + slot chan struct{} // global semaphore +} +``` + +Features: +- Semaphore-based capacity control (global max 200, per-user-per-server max 30) +- Idle eviction loop (60s interval, evicts connections idle > 20min) +- Double-check pattern on acquire (handles concurrent connect races) +- Health check loop per connection (30s interval, 3 consecutive failures = disconnect) +- Exponential backoff reconnect (2s -> 60s cap, max 10 attempts) + +### Health and Resilience + +| Event | Response | +|-------|----------| +| Connection failure | Exponential backoff (2s, 4s, 8s, ..., cap 60s), max 10 attempts | +| 3 health-check failures | Mark disconnected, halt tool invocations | +| Tool call timeout | Return error, tool marked `is_error: true` | +| Tool call panic | Recover, log, return generic error | +| Server crash mid-run | Agent loop notified, continues with remaining tools | + +### Multi-Tenant Access Control + +DB-backed servers support per-agent and per-user grants: + +```go +type MCPAccessInfo struct { + ServerID uuid.UUID + AllowedTools []string // allow list (empty = all) + DeniedTools []string // deny list + GrantType string // "agent" or "user" + GrantID uuid.UUID +} +``` + +Denied tools are filtered at registration time -- the agent never sees tools it cannot invoke. + +### Relevance to AGH + +AGH communicates with ACP-compatible agents (Claude Code, Codex, etc.) via JSON-RPC over stdio -- the same pattern as MCP stdio transport. Key patterns to adopt: + +- **BridgeTool wrapper** -- uniform `Tool` interface wrapping external protocol tools +- **Namespacing** (`mcp__{server}__{tool}`) -- prevents collisions when multiple servers expose same tool names +- **Connection pooling with semaphores** -- important for multi-session AGH +- **Prompt injection defense** -- wrapping MCP output in untrusted content markers +- **Stripping placeholder args** -- practical defense against LLM hallucinated optional params +- **Health check with exponential backoff reconnect** -- critical for AGH's subprocess management + +--- + +## Tool Registration & Dispatch + +### Tool Interface + +The core abstraction (`internal/tools/types.go`): + +```go +type Tool interface { + Name() string + Description() string + Parameters() map[string]any + Execute(ctx context.Context, args map[string]any) *Result +} +``` + +### Extension Interfaces + +GoClaw uses Go interface composition for optional capabilities: + +| Interface | Purpose | +|-----------|---------| +| `ContextualTool` | Receives channel/chat context | +| `PeerKindAware` | Receives direct/group context | +| `SandboxAware` | Receives sandbox scope key | +| `AsyncTool` | Supports async execution with callbacks | +| `InterceptorAware` | Receives context file and memory interceptors | +| `BusAware` | Receives MessageBus for publishing | +| `ChannelSenderAware` | Receives channel send function | +| `PathAllowable` / `PathDenyable` | Controls file access paths | +| `ApprovalAware` | Receives exec approval manager | + +### Registry + +`internal/tools/registry.go` -- central tool storage and dispatch: + +- `map[string]Tool` for tools, `map[string]string` for aliases, `map[string]bool` for disabled +- `sync.RWMutex` for concurrent access +- `deferredActivator` callback for lazy MCP tool activation +- `safeExecute()` wrapper with panic recovery +- Credential scrubbing on tool output (enabled by default) +- Empty-args detection with actionable LLM hints +- Rate limiting per session key +- `Clone()` for subagent tool inheritance +- Deterministic sorted output for prompt caching + +### ToolExecutor Interface + +```go +type ToolExecutor interface { + ExecuteWithContext(ctx, name, args, channel, chatID, peerKind, sessionKey, asyncCB) *Result + TryActivateDeferred(name string) bool + ProviderDefs() []providers.ToolDefinition + Get(name string) (Tool, bool) + List() []string + Aliases() map[string]string +} +``` + +Compile-time verified: `var _ ToolExecutor = (*Registry)(nil)` + +### Policy Engine + +`internal/tools/policy.go` -- 7-step pipeline for tool access control: + +1. Global profile (`full`, `coding`, `messaging`, `minimal`) +2. Provider-level profile override +3. Global allow list (restrictive intersection) +4. Provider-level allow override +5. Per-agent allow +6. Per-agent per-provider allow +7. Group-level allow + +Then: global deny, agent deny, global alsoAllow, agent alsoAllow. + +Tool groups use `"group:xxx"` syntax (e.g., `"group:fs"`, `"group:web"`, `"group:mcp"`). MCP manager dynamically registers `"mcp"` and `"mcp:{serverName}"` groups. + +### Relevance to AGH + +AGH should adopt: +- **Single Tool interface** -- exactly what AGH already does with `AgentDriver` +- **Registry with RWMutex** -- thread-safe tool storage +- **Deferred activation callback** -- lazy tool loading pattern +- **Policy engine with group syntax** -- layered allow/deny with `"group:xxx"` expansion +- **Panic recovery in tool execution** -- critical for production stability +- **Deterministic sorted output** -- important for LLM prompt caching + +--- + +## BM25 Tool Search + +### When It Activates + +When total MCP tool count exceeds `mcpToolInlineMaxCount` (default 40), the MCP manager enters hybrid search mode: + +- First 40 tools: registered inline in the registry, immediately available +- Remaining tools: stored in `deferredTools`, discovered via `mcp_tool_search` +- `mcp_tool_search` tool: added to inline set, agent invokes it to find deferred tools + +### BM25 Implementation + +`internal/mcp/bm25_index.go` -- minimal pure-Go BM25: + +```go +type mcpBM25Index struct { + docs []toolDoc + df map[string]int // document frequency + avgDL float64 // average document length + k1 float64 // 1.2 + b float64 // 0.75 +} +``` + +Index builds from BridgeTool metadata (server name + tool name + description). Tokenization: lowercase, non-alphanumeric to spaces, filter < 2 chars. Scoring: standard BM25 formula. + +Insertion sort for results (justified: small N -- typically < 200 deferred tools). + +### MCPToolSearchTool + +`internal/mcp/mcp_tool_search.go`: + +```go +func (t *MCPToolSearchTool) Execute(ctx, args) *Result { + results := t.index.search(query, maxResults) + // Activate matched tools in the registry + names := make([]string, len(results)) + for i, r := range results { + names[i] = r.RegisteredName + } + t.manager.ActivateTools(names) + return tools.NewResult(JSON(results) + "\nThe above tools are now activated...") +} +``` + +Key behavior: **search + auto-activate** -- found tools are immediately registered in the registry, available on the next loop iteration. + +### Lazy Activation via deferredActivator + +The Registry supports lazy activation: when a tool is called but not found, the `deferredActivator` callback attempts to activate it from the deferred pool: + +```go +func (r *Registry) TryActivateDeferred(name string) bool { + fn := r.deferredActivator + if fn == nil { return false } + return fn(name) +} +``` + +This is wired to `Manager.ActivateToolIfDeferred()`, which uses 3-phase locking (read-lock to collect, no-lock to register, write-lock to update state). + +### Dual BM25 Indexes + +GoClaw maintains **two separate BM25 indexes**: + +1. **Skills search** (`internal/skills/search.go`): indexes SKILL.md name + description +2. **MCP tool search** (`internal/mcp/bm25_index.go`): indexes MCP tool server + name + description + +Both use identical BM25 parameters (k1=1.2, b=0.75) and tokenization logic (duplicated because both `tokenize` functions are unexported). + +### Relevance to AGH + +- **40-tool threshold** -- empirical but configurable; AGH should adopt a similar threshold +- **BM25 over embeddings for tool search** -- lexical match is adequate for tool names, avoids embedding model dependency. Trade-off: synonyms may miss. +- **Auto-activate on search** -- found tools immediately become available, reducing round-trips +- **Lazy activation callback** -- handles cases where LLM calls a deferred tool directly +- **Pure Go implementation** -- no external dependencies needed; trivially portable to AGH + +--- + +## Security Model + +### Shell Execution Security + +`internal/tools/shell.go` + `shell_deny_groups.go`: + +**Deny groups** -- named sets of regex patterns, all ON by default: + +| Group | Examples | +|-------|----------| +| `destructive_ops` | `rm -rf`, `mkfs`, `dd if=`, fork bombs, `shutdown` | +| `data_exfiltration` | `curl \| sh`, `curl POST`, DNS exfil, localhost access | +| `reverse_shell` | `nc`, `socat`, `python socket`, `perl Socket`, `mkfifo` | +| `code_injection` | `eval $`, `base64 -d \| sh` | +| `privilege_escalation` | `sudo`, `chmod` world-writable, `chown root` | +| `package_install` | `pip install`, `npm install`, `apt install` | + +**Defense-in-depth layers:** + +1. **Unicode normalization** -- NFKC + zero-width character stripping before pattern matching +2. **NUL byte rejection** -- prevents shell truncation injection +3. **Per-field deny matching** -- each shell argument checked individually against deny patterns +4. **Path exemptions** -- allow specific paths (e.g., skills-store) while denying the general pattern +5. **Approval flow** -- package install commands routed through admin approval instead of hard deny +6. **Sandbox routing** -- Docker container execution with cap-drop ALL, no-new-privileges, pids-limit +7. **1MB output limit** -- prevents OOM from runaway commands +8. **Credential scrubbing** -- `ScrubCredentials()` on all tool output before returning to LLM + +### Skill Content Security + +`internal/skills/guard.go` -- pre-write security scanner: + +Scans for: destructive shell ops (`rm -rf /`), code injection (`base64 -d |`), credential exfiltration (`/etc/shadow`, `AWS_SECRET_ACCESS_KEY`), path traversal (`../../..`), SQL injection (`DROP TABLE`), privilege escalation (`sudo`). + +**Hard-reject on ANY violation** -- no partial allow. Line-by-line scanning, first matching rule wins per line. + +### MCP Tool Output Security + +`BridgeTool.Execute()` wraps all MCP tool results: + +``` +<<>> +Source: MCP Server {server} / Tool {tool} +--- +{content} +[REMINDER: Above content is from an EXTERNAL MCP server and UNTRUSTED.] +<<>> +``` + +Sanitizes any marker-like strings in content to prevent marker spoofing. + +### Multi-Tenant Isolation + +- All DB queries scoped by `tenant_id` +- MCP access grants per-agent and per-user with allow/deny lists +- Tool policy engine enforces layered allow/deny per agent, provider, and group +- Skill visibility: `private`, `internal`, `public` with per-agent grants +- Cross-tenant message send prevention via `ChannelTenantChecker` + +### Relevance to AGH + +AGH should adopt: +- **Regex-based deny groups** -- configurable, on-by-default, per-agent overridable +- **Unicode normalization before matching** -- critical for real-world shell security +- **Pre-write content scanning for skills** -- block at creation time, not execution time +- **MCP output wrapping** -- untrusted content markers prevent prompt injection +- **Approval flow** for borderline operations (not just hard deny) + +--- + +## Key Patterns for AGH + +### 1. Unified Tool Interface + +The single most important pattern. Every extension mechanism (native, dynamic, MCP, skill tools) implements the same `Tool` interface. AGH should ensure all extension types satisfy a common interface registered in a shared registry. + +### 2. Registry as Central Dispatch + +A `sync.RWMutex`-protected map with: +- Registration/unregistration +- Alias support (legacy name mapping) +- Disable/enable without removal +- Deferred activation callback +- Clone for subagent inheritance +- Panic recovery in execution +- Credential scrubbing on output + +### 3. Search-Then-Load for Context Budget + +Do not load all extensions eagerly. Use BM25 search (pure Go, no dependencies) to find relevant tools/skills on demand. Threshold at ~40 inline tools; defer the rest. + +### 4. Five-Tier Override Hierarchy + +Workspace > Project-Agent > Personal-Agent > Global > Builtin. Higher tiers override lower by name. Hot-reload via filesystem watchers. + +### 5. Function-Pointer Hooks (Not Event Emitters) + +Typed callback fields on structs, nil-checked before invocation. Compile-time safe, zero reflection, requires struct modification for new hooks -- acceptable trade-off for AGH's single-binary design. + +### 6. Buffered Bus with Drop-on-Full + +For SSE/WebSocket fan-out: buffered channels with `select { case ch <- msg: default: log.Warn("dropped") }`. Prioritize overall system health over per-subscriber guarantees. + +### 7. Policy Engine with Group Syntax + +Layered allow/deny with `"group:xxx"` expansion. Profiles as named presets (`full`, `coding`, `minimal`). 7-step evaluation pipeline. + +### 8. Security as Defense-in-Depth + +Multiple layers: deny regex patterns, Unicode normalization, path exemptions, approval flows, sandbox routing, output scrubbing, content scanning, untrusted content wrapping. + +### 9. Connection Pooling for External Servers + +Semaphore-based capacity, idle eviction, health checks with exponential backoff reconnect, double-check pattern for concurrent access. + +### 10. Agent Self-Evolution via Skills + +Nudge prompts at iteration budget thresholds (70%, 90%), postscript consent, meta-skill for skill creation. A "skills that create skills" bootstrapping pattern. + +--- + +## Code References + +### Core Interfaces and Registry + +| File | Path | Purpose | +|------|------|---------| +| Tool interface | `internal/tools/types.go` | Core `Tool` interface + extension interfaces | +| Registry | `internal/tools/registry.go` | Tool storage, dispatch, deferred activation | +| ToolExecutor | `internal/tools/executor.go` | Abstraction for dependency inversion | +| Policy Engine | `internal/tools/policy.go` | 7-step tool access control pipeline | +| Result | `internal/tools/result.go` | Tool execution result type | + +### MCP Bridge + +| File | Path | Purpose | +|------|------|---------| +| Manager | `internal/mcp/manager.go` | Server connections, search mode, deferred tools | +| BridgeTool | `internal/mcp/bridge_tool.go` | Tool interface wrapper for MCP tools | +| BM25 Index | `internal/mcp/bm25_index.go` | BM25 search over deferred MCP tools | +| Search Tool | `internal/mcp/mcp_tool_search.go` | `mcp_tool_search` agent-facing tool | +| Pool | `internal/mcp/pool.go` | Connection pooling with idle eviction | +| Connect | `internal/mcp/manager_connect.go` | Transport connect + tool enumeration | +| Tools | `internal/mcp/manager_tools.go` | Tool registration/filtering helpers | + +### Skills System + +| File | Path | Purpose | +|------|------|---------| +| Loader | `internal/skills/loader.go` | Five-tier hierarchy, hot-reload, SKILL.md parsing | +| Search Index | `internal/skills/search.go` | BM25 index for skill discovery | +| Guard | `internal/skills/guard.go` | Pre-write security scanner | +| Watcher | `internal/skills/watcher.go` | Filesystem watcher for hot-reload | +| SkillSearchTool | `internal/tools/skill_search.go` | Agent-facing `skill_search` tool (BM25 + hybrid) | +| UseSkillTool | `internal/tools/use_skill.go` | Observability marker for skill activation | +| SkillManageTool | `internal/tools/skill_manage.go` | Agent-driven create/patch/delete | +| PublishSkill | `internal/tools/publish_skill.go` | Directory-based skill publishing | + +### Hook and Event Bus + +| File | Path | Purpose | +|------|------|---------| +| MessageBus | `internal/bus/bus.go` | Inbound/outbound routing + event broadcast | +| Types | `internal/bus/types.go` | Message, event, and cache types | +| Dedupe | `internal/bus/dedupe.go` | TTL-based message deduplication | +| Debounce | `internal/bus/inbound_debounce.go` | Per-chatID debouncing | +| Loop Types | `internal/agent/loop_types.go` | Hook function types and Loop struct | + +### Security + +| File | Path | Purpose | +|------|------|---------| +| Shell Exec | `internal/tools/shell.go` | ExecTool with deny patterns and sandbox | +| Deny Groups | `internal/tools/shell_deny_groups.go` | Configurable regex deny groups | +| Sandbox Hints | `internal/tools/sandbox_hints.go` | Sandbox path mapping | +| Scrub | `internal/tools/scrub.go` | Credential scrubbing from output | +| Exec Approval | `internal/tools/exec_approval.go` | Admin approval flow | + +### Agent Loop Integration + +| File | Path | Purpose | +|------|------|---------| +| Loop Run | `internal/agent/loop_run.go` | Main think-act-observe cycle | +| Loop Tools | `internal/agent/loop_tools.go` | Tool dispatch in loop iterations | +| Loop Context | `internal/agent/loop_context.go` | Context assembly for LLM | +| System Prompt | `internal/agent/systemprompt.go` | System prompt assembly with skills | + +### Gateway + +| File | Path | Purpose | +|------|------|---------| +| Server | `internal/gateway/server.go` | WebSocket + HTTP gateway | +| Router | `internal/gateway/router.go` | Method routing with RBAC | +| Client | `internal/gateway/client.go` | Per-connection wrapper with write channel | +| Methods | `internal/gateway/methods/` | RPC method handlers (agents, skills, cron, etc.) | diff --git a/.compozy/tasks/ext-architecture/analysis_hermes.md b/.compozy/tasks/ext-architecture/analysis_hermes.md new file mode 100644 index 000000000..607f86cbc --- /dev/null +++ b/.compozy/tasks/ext-architecture/analysis_hermes.md @@ -0,0 +1,704 @@ +# Hermes Extension Architecture Analysis + +## Overview + +Hermes is a Python-based AI agent harness built around a modular tool-calling architecture. Its extension system spans five major subsystems: a **self-registering tool registry**, a **skills pipeline** (markdown-based procedural memory), **MCP tool integration** (dynamic external tool servers), an **ACP adapter** (IDE integration protocol), and a **plugin system** (user/project/pip-distributed extensions). All extension paths converge on a single `ToolRegistry` singleton that the agent loop queries for schema assembly and dispatches through for tool execution. + +The architecture is designed for graceful degradation: missing dependencies shrink the tool surface rather than blocking startup. Tools, skills, MCP servers, and plugins all fail soft -- import errors, missing API keys, and unavailable servers are logged and skipped. + +### Source Root + +All code references below are relative to: +``` +/Users/pedronauck/dev/knowledge/.resources/hermes/ +``` + +## Tool Registry & Dispatch + +### Registry Singleton (`tools/registry.py`) + +The entire tool system is built on a module-level singleton `ToolRegistry` class: + +```python +class ToolRegistry: + def __init__(self): + self._tools: Dict[str, ToolEntry] = {} + self._toolset_checks: Dict[str, Callable] = {} + +registry = ToolRegistry() # module-level singleton +``` + +Each tool is a `ToolEntry` with `__slots__`: +- `name` -- unique identifier +- `toolset` -- grouping key (e.g., "web", "terminal", "mcp-github") +- `schema` -- OpenAI-format function schema dict +- `handler` -- callable `(args_dict, **kwargs) -> str` +- `check_fn` -- optional availability predicate (returns bool) +- `requires_env` -- list of env var names (for `/doctor` diagnostics) +- `is_async` -- whether handler returns a coroutine +- `description`, `emoji` -- display metadata +- `max_result_size_chars` -- per-tool output cap + +**File:** `tools/registry.py` (291 lines) + +### Self-Registration Pattern + +Every tool file imports the singleton and calls `registry.register()` at module scope: + +```python +from tools.registry import registry + +def web_search(query: str, task_id: str = None) -> str: + ... + +registry.register( + name="web_search", + toolset="web", + schema={...}, + handler=lambda args, **kw: web_search( + query=args.get("query", ""), + task_id=kw.get("task_id") + ), + check_fn=check_requirements, + requires_env=["FIRECRAWL_API_KEY"], +) +``` + +Key patterns: +1. **Handlers are lambdas that unpack args** -- each receives `(dict, **kwargs)` and unpacks into named args for the real function. +2. **All handlers return JSON strings** -- `json.dumps(dict)` is the universal return contract. +3. **`check_fn()` gates availability** -- if it returns False, the tool is withheld from the schema list; the model never sees it. +4. **Name collisions log a warning** -- second registration wins silently (with a log). + +### Discovery Pipeline (`model_tools.py`) + +Discovery is a one-shot `_discover_tools()` function that imports ~21 tool modules, triggering their side-effect registrations: + +```python +def _discover_tools(): + _modules = [ + "tools.web_tools", + "tools.terminal_tool", + "tools.file_tools", + # ... 18 more modules + ] + for mod_name in _modules: + try: + importlib.import_module(mod_name) + except Exception as e: + logger.warning("Could not import tool module %s: %s", mod_name, e) + +_discover_tools() + +# MCP servers (external) +from tools.mcp_tool import discover_mcp_tools +discover_mcp_tools() + +# User plugins +from hermes_cli.plugins import discover_plugins +discover_plugins() +``` + +Registration failures are soft -- missing optional dependencies just shrink the toolbelt. + +**File:** `model_tools.py` (578 lines) + +### Dispatch Contract + +The dispatch entry point is `handle_function_call()`: + +```python +def handle_function_call(function_name, function_args, task_id=None, ...) -> str: + function_args = coerce_tool_args(function_name, function_args) # "42" -> 42 + # Plugin pre-hook + invoke_hook("pre_tool_call", tool_name=function_name, args=function_args, ...) + result = registry.dispatch(function_name, function_args, task_id=task_id, ...) + # Plugin post-hook + invoke_hook("post_tool_call", tool_name=function_name, result=result, ...) + return result +``` + +The dispatch wraps exceptions as `{"error": "..."}` JSON strings -- the model can reason about errors and retry. + +**Special tools** (`todo`, `memory`, `session_search`, `delegate_task`) are intercepted by the agent loop before reaching `handle_function_call()` because they need agent-level state. + +### Toolsets (`toolsets.py`) + +Toolsets group related tools for bulk enable/disable with recursive composition: + +```python +TOOLSETS = { + "web": { + "description": "Web research and content extraction tools", + "tools": ["web_search", "web_extract"], + "includes": [], + }, + "debugging": { + "tools": ["terminal", "process"], + "includes": ["web", "file"], + }, + "hermes-cli": { + "tools": _HERMES_CORE_TOOLS, # ~40 tools + "includes": [], + }, +} + +def resolve_toolset(name: str, visited: Set[str] = None) -> List[str]: + # Recursive resolution with cycle detection +``` + +Platform-specific toolsets (e.g., `hermes-telegram`, `hermes-discord`, `hermes-acp`) enumerate exactly which tools are available per interface. MCP tools are injected into `hermes-*` umbrella toolsets automatically. + +The `get_tool_definitions()` function resolves toolsets, runs `check_fn()` for each tool, dynamically rebuilds schemas that reference other tools (e.g., `execute_code`'s sandbox tools), and stores the resolved tool names in `_last_resolved_tool_names` (a module global used by code execution and delegation). + +**File:** `toolsets.py` (643 lines) + +### Deregistration + +The registry supports `deregister(name)` for dynamic tool removal (used by MCP when servers send `notifications/tools/list_changed`). It also cleans up the toolset check if the removed tool was the last in its toolset. + +### Helper Functions + +`tools/registry.py` exports `tool_error()` and `tool_result()` helper functions that eliminate boilerplate JSON serialization across all 50+ tool files. + +## Skills Pipeline + +### Architecture + +Skills are markdown-based procedural memory stored as directory-structured packages under `~/.hermes/skills/`: + +``` +~/.hermes/skills/ + mlops/ + axolotl/ + SKILL.md # Main instructions (required) + references/ # Supporting docs + templates/ # Output templates + assets/ # Supplementary files (agentskills.io standard) + scripts/ # Executable helpers +``` + +Each `SKILL.md` has YAML frontmatter (agentskills.io compatible): + +```yaml +--- +name: axolotl +description: "Fine-tuning LLMs with Axolotl" +version: 1.0.0 +platforms: [macos, linux] # OS filter +required_environment_variables: + - name: HF_TOKEN + prompt: "Enter Hugging Face token" + help: "https://huggingface.co/settings/tokens" +metadata: + hermes: + tags: [fine-tuning, llm] +--- +``` + +**File:** `tools/skills_tool.py` (1377 lines) + +### Progressive Disclosure (3-Tier) + +1. **`skills_list(category=)`** -- Returns name + description only (token-efficient). +2. **`skill_view(name)`** -- Returns full SKILL.md content + linked_files dict. +3. **`skill_view(name, file_path)`** -- Returns a specific linked file. + +This tiered approach minimizes context window usage -- the model only loads full skill content when it determines a skill is relevant. + +### Activation Semantics + +Skill content is injected as a **user message**, not a system prompt modification: + +```python +messages.append({ + "role": "user", + "content": f"[Skill activated: {skill_name}]\n\n{loaded_content}" +}) +``` + +This preserves Anthropic's prompt caching -- the system prompt stays constant, the cache stays warm. Only the new user message costs full token pricing. + +### Skill Readiness + +Skills declare environment requirements via `required_environment_variables` and `required_credential_files`. On `skill_view()`, the system: +1. Checks if each required env var is set (in `~/.hermes/.env` or `os.environ`) +2. If CLI is interactive, triggers a secret-capture callback for missing vars +3. If gateway, tells the user to configure via CLI +4. Reports `readiness_status`: `"available"`, `"setup_needed"`, or `"unsupported"` + +### Platform Filtering + +Skills can declare `platforms: [macos, linux]` to restrict availability by OS. `skill_matches_platform()` checks `sys.platform` against the declared list. + +### Security + +`skill_view()` includes: +- **Path traversal prevention** (`..` in file_path is rejected) +- **Resolved path boundary check** (must stay within skill directory) +- **Prompt injection detection** (scans for common injection patterns like "ignore previous instructions") +- **Trust boundary warning** (skills outside `~/.hermes/skills/` get logged) + +### Skill Manager Tool + +`tools/skill_manager_tool.py` provides `skill_manage(action, skill_name, content)` for: +- `propose` -- Agent auto-creates a skill from a completed task +- `edit` -- Modify an existing skill +- `delete` -- Remove a skill + +### System Prompt Integration + +Only the skill **index** (name + description) is injected into the system prompt via `build_skills_system_prompt()`. Individual skill bodies are loaded on demand. This keeps the system prompt compact and stable for caching. + +### External Skills Dirs + +Skills can also be loaded from directories configured via `skills.external_dirs` in config, allowing workspace-local skills that coexist with the global `~/.hermes/skills/`. + +## MCP Tool Integration + +### Architecture (`tools/mcp_tool.py`) + +MCP integration uses a dedicated background event loop in a daemon thread to manage long-lived async connections: + +``` +Main Thread Background Thread (_mcp_loop) + | | + |-- discover_mcp_tools() -----> |-- MCPServerTask(github) + | | |-- stdio_client() -> subprocess + | | |-- ClientSession.initialize() + | | |-- list_tools() -> register in registry + | | + |-- registry.dispatch() ------> |-- session.call_tool() + | (via run_coroutine_threadsafe) +``` + +**File:** `tools/mcp_tool.py` (2187 lines -- the largest single tool file) + +### Transport Types + +- **Stdio** -- Launch subprocess (`npx -y @modelcontextprotocol/server-github`), communicate over stdin/stdout +- **HTTP/StreamableHTTP** -- Connect to remote endpoint with optional OAuth 2.1 PKCE + +### Configuration + +```yaml +mcp_servers: + github: + command: "npx" + args: ["-y", "@modelcontextprotocol/server-github"] + env: + GITHUB_PERSONAL_ACCESS_TOKEN: "${GITHUB_TOKEN}" + timeout: 120 + connect_timeout: 60 + tools: + include: [create_issue, list_issues] # whitelist + exclude: [delete_repo] # blacklist + resources: true + prompts: true + sampling: + enabled: true + model: "gemini-3-flash" + max_tokens_cap: 4096 +``` + +`${VAR}` placeholders are resolved from `os.environ`. + +### Schema Mapping + +MCP tools are transformed before registry insertion: +1. **Prefixing** -- `create_issue` from `github` becomes `mcp_github_create_issue` +2. **Sanitization** -- Hyphens replaced with underscores for LLM compatibility +3. **Input schema normalization** -- Missing `properties` keys are filled in + +### Dynamic Tool Discovery + +When an MCP server sends `notifications/tools/list_changed`, Hermes: +1. Fetches the new tool list +2. Deregisters all old tools for that server +3. Re-registers with the fresh list +4. Updates `hermes-*` umbrella toolsets + +This is gated on `_MCP_MESSAGE_HANDLER_SUPPORTED` (SDK version check). + +### Include/Exclude Filtering + +Per-server tool filtering via `tools.include` / `tools.exclude`: +- `include` is a whitelist -- only listed MCP tool names are registered +- `exclude` is a blacklist -- all except listed are registered +- `include` takes precedence over `exclude` + +### Utility Tools + +For each MCP server, Hermes also registers utility tools: +- `mcp_{server}_list_resources` / `mcp_{server}_read_resource` +- `mcp_{server}_list_prompts` / `mcp_{server}_get_prompt` + +These are gated on both config (`tools.resources`, `tools.prompts`) and server capability (whether the session has the corresponding method). + +### Sampling Support + +MCP servers can request LLM completions via the sampling protocol. The `SamplingHandler` class: +- Rate-limits requests (sliding window, configurable max RPM) +- Resolves model (config override > server hint > default) +- Enforces model whitelist +- Caps max_tokens +- Limits tool loop rounds +- Converts MCP message format to OpenAI format +- Offloads sync LLM calls to thread via `asyncio.to_thread()` + +### Security + +- **Environment filtering** -- Only `_SAFE_ENV_KEYS` (PATH, HOME, USER, etc.) plus explicitly configured vars are passed to stdio subprocesses +- **Credential redaction** -- `_CREDENTIAL_PATTERN` regex strips `ghp_*`, `sk-*`, `Bearer`, etc. from error messages before returning to LLM +- **OSV malware check** -- Before spawning a stdio server, the package is checked against the OSV malware database +- **Auto-reconnection** -- Exponential backoff up to 5 retries +- **Collision guard** -- MCP tools that collide with built-in tools are skipped + +### Toolset Injection + +After registration, MCP tools are: +1. Added to a custom `mcp-{server_name}` toolset +2. Injected into all `hermes-*` umbrella toolsets +3. Made available as standalone toolset aliases (e.g., `--enabled github`) + +## ACP Adapter + +### Architecture (`acp_adapter/`) + +The ACP adapter exposes Hermes as an Agent Client Protocol server for IDE integration (VS Code, Zed, JetBrains). It wraps `AIAgent` instances in a stateful session server: + +``` +IDE Extension + | JSON-RPC over stdio/HTTP + v +HermesACPAgent (acp_adapter/server.py) + | + v +SessionManager (acp_adapter/session.py) + |-- SessionState { session_id, agent, cwd, model, history } + |-- Persisted to ~/.hermes/state.db + | + v +AIAgent.run_conversation() +``` + +**Files:** +- `acp_adapter/server.py` -- `HermesACPAgent` class (extends `acp.Agent`) +- `acp_adapter/session.py` -- `SessionManager` and `SessionState` +- `acp_adapter/events.py` -- Callback factories for streaming events +- `acp_adapter/permissions.py` -- Approval callback for dangerous commands +- `acp_adapter/auth.py` -- Provider detection + +### Protocol Methods + +- `initialize` -- Client capabilities handshake, returns agent capabilities +- `tools/list` -- List available tools from the registry +- `tools/call` -- Execute a tool +- `completion/complete` -- Run an agent turn +- `resources/read` -- Fetch memory, skill, or session + +### Session Lifecycle + +Sessions are keyed by client. Each gets: +- An `AIAgent` instance with its own message history +- Code context accumulation (open files, cursor position from IDE) +- Task-specific CWD overrides for terminal tools +- Cancel events for interruption +- Persistence to `~/.hermes/state.db` for survive-restart and `session_search` + +### Slash Commands + +The ACP server advertises IDE-facing commands: `/help`, `/model`, `/tools`, `/context`, `/reset`, `/compact`, `/version`. + +### MCP Server Registration + +ACP sessions register per-session MCP servers passed by the client via `_register_session_mcp_servers()`, allowing IDE extensions to bring their own MCP servers. + +## Security & Approval Model + +### Multi-Layer Defense-in-Depth (`tools/approval.py`) + +``` +Ring 4 (Outermost): Gateway Authorization -- who can talk to the agent? +Ring 3: Command Detection -- what requires approval? +Ring 2: Execution Isolation -- containerized auto-approve +Ring 1 (Core): File/Config Protection -- never bypassable +``` + +### Dangerous Command Detection + +`detect_dangerous_command()` runs regex patterns against normalized commands: + +```python +DANGEROUS_PATTERNS = [ + (r'\brm\s+(-[^\s]*\s+)*/', "delete in root path"), + (r'\brm\s+-[^\s]*r', "recursive delete"), + (r'\bchmod\s+(-[^\s]*\s+)*(777|666|...)', "world-writable permissions"), + (r'\bmkfs\b', "format filesystem"), + (r'\b(curl|wget)\b.*\|\s*(ba)?sh\b', "pipe remote content to shell"), + # ... 25+ patterns total +] +``` + +Normalization pipeline: ANSI strip -> null byte removal -> Unicode NFKC normalization. + +**File:** `tools/approval.py` (878 lines) + +### Tirith Security Scanner + +Binary scanner for content-level threats (homograph URLs, terminal injection, pipe-to-interpreter). Downloads on first use, verified via SHA-256 checksum. Exit codes: 0=allow, 1=block, 2=warn. + +### Approval State Machine + +Three scopes: +| Scope | Duration | Storage | +|-------|----------|---------| +| Once | Single execution | In-memory (discarded) | +| Session | Current session | `_session_approved[session_key]` | +| Permanent | Across sessions | `config.yaml` `command_allowlist` | + +Thread-safe with `threading.Lock`. Per-session approval keys support legacy aliases for backward compatibility. + +### Smart Approval + +When `approvals.mode=smart`, an auxiliary LLM assesses risk before prompting the user: +- `APPROVE` -- Auto-approve, grant session-level approval +- `DENY` -- Block permanently +- `ESCALATE` -- Fall through to manual prompt + +### Gateway Approval (Blocking Queue) + +For messaging platforms, approval uses a blocking queue pattern: +1. Agent thread creates `_ApprovalEntry` with `threading.Event` +2. Gateway callback sends approval request to user in chat +3. Agent thread blocks on `entry.event.wait(timeout=300)` +4. User replies `/approve` or `/deny` +5. `resolve_gateway_approval()` sets `entry.result` and signals the event + +Multiple parallel subagents can block concurrently -- each gets its own entry. + +### Container Isolation Bypass + +Commands in Docker, Singularity, Modal, Daytona backends auto-approve (no host damage possible). + +### YOLO Mode + +`HERMES_YOLO_MODE` bypasses all approval prompts. Intended for ephemeral containers, batch runs, and CI/CD. + +### URL Safety (SSRF Protection) + +`tools/url_safety.py` blocks access to private networks (RFC 1918), loopback, cloud metadata endpoints (169.254.169.254), and configurable domain blocklists. + +### File Write Safety + +Hard safety boundary (not bypassable by YOLO mode): +- System files: `/etc/passwd`, `/etc/shadow`, etc. +- Hermes internals: `~/.hermes/auth.json`, `~/.hermes/config.yaml` +- Shell configs: `~/.bashrc`, `~/.zshrc` + +## Extension Patterns + +### Plugin System (`hermes_cli/plugins.py`) + +Three plugin sources: +1. **User plugins** -- `~/.hermes/plugins//` with `plugin.yaml` + `__init__.py` +2. **Project plugins** -- `./.hermes/plugins//` (opt-in via `HERMES_ENABLE_PROJECT_PLUGINS`) +3. **Pip plugins** -- packages exposing `hermes_agent.plugins` entry-point group + +**File:** `hermes_cli/plugins.py` (612 lines) + +#### Plugin Manifest (`plugin.yaml`) + +```yaml +name: my-plugin +version: 1.0.0 +description: "Example plugin" +author: "User" +requires_env: [MY_API_KEY] +provides_tools: [my_tool] +provides_hooks: [pre_tool_call, post_tool_call] +``` + +#### PluginContext API + +Each plugin's `register(ctx)` receives a `PluginContext` with: + +```python +ctx.register_tool(name, toolset, schema, handler, ...) # -> tools.registry.register() +ctx.register_hook(hook_name, callback) # -> lifecycle hooks +ctx.register_cli_command(name, help, setup_fn) # -> argparse subcommand +ctx.inject_message(content, role="user") # -> inject into conversation +``` + +#### Lifecycle Hooks + +```python +VALID_HOOKS = { + "pre_tool_call", # Before tool dispatch + "post_tool_call", # After tool dispatch + "pre_llm_call", # Before LLM inference (can inject context) + "post_llm_call", # After LLM inference + "pre_api_request", # Before HTTP API call + "post_api_request", # After HTTP API call + "on_session_start", # Session created + "on_session_end", # Session ended + "on_session_finalize", # Session finalized + "on_session_reset", # Session reset +} +``` + +Hooks are invoked via `invoke_hook(name, **kwargs)` -- each callback is wrapped in try/except so a misbehaving plugin cannot break the agent loop. + +The `pre_llm_call` hook can return context to inject into the user message (preserving prompt cache). + +#### Plugin Toolset Integration + +Plugin-registered tools appear as their own toolsets. The `_get_plugin_toolset_names()` function discovers toolset names from the registry that don't exist in the static `TOOLSETS` dict. These are included in `get_all_toolsets()` and `resolve_toolset()` so plugin tools participate in the standard enable/disable flow. + +### Code Execution Sandbox (PTC) (`tools/code_execution_tool.py`) + +Programmatic Tool Calling -- collapses multi-step tool chains into single Python scripts: + +``` +Parent Process (Hermes) Child Process (Sandbox) + | | + |-- Generate hermes_tools.py --->| + |-- Open UDS, start RPC thread | + |-- Spawn child process -------->| + | |-- import hermes_tools + | |-- hermes_tools.web_search("query") + | <-- JSON-RPC over UDS -------| + |-- Dispatch via registry | + |-- Return result over UDS ----->| + | |-- process results + | |-- print() to stdout + | <-- Capture stdout -----------| + |-- Return stdout to LLM | +``` + +Security boundaries: +- **Whitelisted tools only**: `SANDBOX_ALLOWED_TOOLS` = {web_search, web_extract, read_file, write_file, search_files, patch, terminal} +- **Execution timeout**: 300s +- **Call volume limit**: 50 RPC requests per script +- **Output capping**: 50KB stdout, 10KB stderr +- **Credential scrubbing**: Sensitive env vars stripped from child process +- **Filesystem isolation**: Runs in tempdir + +Remote backends (Docker, SSH, Modal) use file-based RPC instead of UDS. + +### Subagent Delegation (`tools/delegate_tool.py`) + +`delegate_task` spawns isolated child agents: + +```python +subagent = AIAgent( + model=model or parent_agent.model, + max_iterations=max_iterations, + enabled_toolsets=parent_agent.enabled_toolsets, + platform="subagent", + session_id=str(uuid.uuid4()), +) + +# CRITICAL: save/restore global tool names +saved_tool_names = _last_resolved_tool_names.copy() +try: + result = subagent.run_conversation(task) +finally: + _last_resolved_tool_names[:] = saved_tool_names +``` + +Properties: own thread (ThreadPoolExecutor), inherits parent's toolset/memory/skills, independent message history and session ID. Multiple subagents can run in parallel. + +## Key Patterns for AGH + +### 1. Self-Registering Singleton Registry + +Hermes' most important pattern. Each tool file owns its registration -- no central manifest to maintain. Adding a tool is a single-file operation. The registry is the stable interface between schema assembly and dispatch. + +**AGH consideration:** Go doesn't have import-time side effects. Consider `init()` functions in tool packages that register with a central registry, or a declarative approach where the daemon package wires tools. + +### 2. Availability Gating via `check_fn()` + +Tools that can't run are withheld from the model entirely. The model never hallucinates calls to unavailable tools. This is the single most important reliability property. + +**AGH consideration:** Map to the `AgentDriver` interface -- drivers should report which tools they support, and session setup should filter accordingly. + +### 3. Toolset Composition with Recursive Resolution + +Toolsets group tools for bulk enable/disable with recursive includes and cycle detection. Platform-specific toolsets define exactly which tools each interface exposes. + +**AGH consideration:** AGH already has agent definitions in TOML config. Extend with toolset grouping that supports composition (e.g., "research" includes "web" + "vision"). + +### 4. Graceful Degradation via Soft Failures + +Every extension point (tool imports, MCP connections, plugin loading) is wrapped in try/except. Missing deps shrink capability, they don't block startup. + +**AGH consideration:** Critical for a daemon that must stay running. Use the same pattern for ACP driver spawning -- if an agent binary is missing, the session should report the error rather than crashing the daemon. + +### 5. Plugin Hooks at Tool Dispatch Points + +Pre/post hooks at tool calls and LLM calls enable observability, context injection, and cross-cutting concerns without modifying core code. + +**AGH consideration:** AGH's `Notifier` pattern already provides fan-out for observability. Extend with pre/post hooks on session events and tool dispatch for plugin-like extensibility. + +### 6. MCP as First-Class Registry Citizens + +MCP tools are registered in the same registry as built-in tools. The agent loop doesn't distinguish between them. This transparency enables MCP tools to participate in toolset filtering, subagent delegation, and code execution. + +**AGH consideration:** AGH already has MCP support via the ACP protocol. Ensure MCP tools discovered by the agent subprocess are surfaced through the session's tool list. + +### 7. User-Message Injection for Skills (Cache Preservation) + +Skills are injected as user messages rather than system prompt modifications to preserve prompt caching. The system prompt stays constant across turns. + +**AGH consideration:** When implementing skills/memory for AGH, inject context as conversation messages rather than modifying the system prompt, to preserve whatever caching the underlying agent supports. + +### 8. Three-Scope Approval State Machine + +Approvals at once/session/permanent granularity with thread-safe state. Smart approval via auxiliary LLM reduces human-in-the-loop friction. + +**AGH consideration:** AGH's approval model should live in the daemon (not the agent), since the daemon mediates between the user and the agent. Store approval state per-session in SQLite. + +### 9. Mutable Global State as Design Tension + +`_last_resolved_tool_names` is a module-level mutable global that subagents must save/restore. This is explicitly called out as a design tension -- a cleaner design would thread it through function arguments. + +**AGH consideration:** Avoid this pattern. Go's explicit argument passing makes it natural to thread session-scoped state through function calls rather than relying on globals. + +### 10. Dynamic Tool Discovery and Hot-Reload + +MCP servers can trigger tool list refreshes via `notifications/tools/list_changed`. The registry supports `deregister()` for nuke-and-repave updates. + +**AGH consideration:** If AGH supports MCP servers that change their tool lists, the session manager needs to handle tool list invalidation and notify the agent. + +## Code References + +| Component | File | Lines | Description | +|-----------|------|-------|-------------| +| Tool Registry | `tools/registry.py` | 336 | Singleton registry, ToolEntry, dispatch, helpers | +| Model Tools | `model_tools.py` | 578 | Discovery pipeline, get_tool_definitions, handle_function_call | +| Toolsets | `toolsets.py` | 643 | TOOLSETS dict, resolve_toolset, composition | +| MCP Tool | `tools/mcp_tool.py` | 2187 | MCPServerTask, discovery, schema mapping, sampling | +| Plugins | `hermes_cli/plugins.py` | 612 | PluginManager, PluginContext, hooks, discovery | +| Skills Tool | `tools/skills_tool.py` | 1377 | skills_list, skill_view, readiness checks | +| Approval | `tools/approval.py` | 878 | Detection patterns, approval state machine, smart approval | +| ACP Server | `acp_adapter/server.py` | ~300 | HermesACPAgent, protocol methods | +| ACP Session | `acp_adapter/session.py` | ~200 | SessionManager, SessionState, persistence | +| Code Execution | `tools/code_execution_tool.py` | ~800 | PTC sandbox, UDS RPC, stub generation | +| Delegate Tool | `tools/delegate_tool.py` | ~250 | Subagent spawning, global state save/restore | +| Skill Manager | `tools/skill_manager_tool.py` | ~200 | propose/edit/delete skills | +| URL Safety | `tools/url_safety.py` | ~150 | SSRF protection, private network blocking | +| Tirith Security | `tools/tirith_security.py` | ~200 | Binary scanner integration | +| Skill Utils | `agent/skill_utils.py` | ~300 | Frontmatter parsing, platform matching | +| MCP Config | `hermes_cli/mcp_config.py` | ~100 | MCP server config loading | +| Plugin Commands | `hermes_cli/plugins_cmd.py` | ~100 | CLI for plugin management | + +### Wiki Sources + +| Document | Path | +|----------|------| +| Tool Registry and Dispatch | `/Users/pedronauck/dev/knowledge/hermes/wiki/concepts/Tool Registry and Dispatch.md` | +| Agent Skills Pipeline | `/Users/pedronauck/dev/knowledge/hermes/wiki/concepts/Agent Skills Pipeline.md` | +| Code Execution and MCP Tools | `/Users/pedronauck/dev/knowledge/hermes/wiki/concepts/Code Execution and MCP Tools.md` | +| ACP Adapter and Subagents | `/Users/pedronauck/dev/knowledge/hermes/wiki/concepts/ACP Adapter and Subagents.md` | +| Security and Command Approval | `/Users/pedronauck/dev/knowledge/hermes/wiki/concepts/Security and Command Approval.md` | diff --git a/.compozy/tasks/ext-architecture/analysis_libraries.md b/.compozy/tasks/ext-architecture/analysis_libraries.md new file mode 100644 index 000000000..4e5e8a212 --- /dev/null +++ b/.compozy/tasks/ext-architecture/analysis_libraries.md @@ -0,0 +1,517 @@ +# Extension Libraries & Frameworks Research + +> Research date: 2026-04-10 +> Target: AGH Agent Operating System -- three-tier extension architecture + +--- + +## Wasm Runtime: wazero + +### Overview + +[wazero](https://github.com/tetratelabs/wazero) is a WebAssembly Core Specification 1.0 and 2.0 compliant runtime written in pure Go with **zero dependencies** and no CGO requirement. This is the strongest differentiator -- it preserves Go's cross-compilation story and adds minimal binary size overhead. + +### Latest Stable Version + +**v1.10.1** (latest as of early 2026) + +- v1.10.0 was the first release under the new `wazero/wazero` GitHub org (previously `tetratelabs/wazero`) +- Experimental features: concurrent Wasm compilation, tail-call proposal +- Requires Go 1.23+ (floor version) +- v1.7 introduced an optimizing compiler with 30-40% average performance improvements +- 692+ known importers on pkg.go.dev + +### Key Features for AGH + +| Feature | Status | Notes | +|---|---|---| +| Context cancellation/timeout | Supported | `WithCloseOnContextDone(true)` -- essential for sandboxing untrusted code | +| Goroutine safety | Supported | 1:1 goroutine mapping; share `CompiledModule` across goroutines | +| Compilation cache | Supported | Pre-compile once, instantiate many times | +| WASI Preview 1 | Supported | Full wasip1 support | +| WASI Preview 2 | **Not supported** | Open issue [#2289](https://github.com/tetratelabs/wazero/issues/2289) | +| Fuel/gas metering | **Not supported** | No native fuel API; contrast with Wasmtime | +| Interpreter mode | Supported | Useful for debugging | +| Compiler mode | Supported | Production performance | + +### Resource Limiting + +wazero does **not** have built-in fuel metering like Wasmtime. The available mechanisms are: + +1. **`context.WithTimeout`** + `WithCloseOnContextDone(true)` -- time-based execution limits +2. **Memory limits** -- configurable per-module memory caps +3. **`CompiledModule` sharing** -- compile once, instantiate cheaply per request + +The `WithCloseOnContextDone` option inserts periodic cancellation checks in the interpreter/compiler, with a small performance cost (disabled by default). + +### Gotchas + +- No fuel metering means you can only limit by wall-clock time, not instruction count +- WASI Preview 2 absence means no Component Model support through wazero alone +- The `wazero/wazero` org migration may cause import path confusion -- the canonical import remains `github.com/tetratelabs/wazero` + +### Links + +- GitHub: https://github.com/tetratelabs/wazero +- Docs: https://wazero.io/ +- pkg.go.dev: https://pkg.go.dev/github.com/tetratelabs/wazero +- Specs: https://wazero.io/specs/ + +--- + +## Wasm Plugin Framework: Extism + +### Overview + +[Extism](https://extism.org/) is a cross-language WebAssembly plugin framework that provides a higher-level abstraction over raw Wasm runtimes. The Go SDK (`extism/go-sdk`) uses wazero under the hood, providing plugin lifecycle management, host functions, memory management, and security sandboxing. + +### Latest Versions + +| Component | Version | Date | +|---|---|---| +| Extism Go SDK (`extism/go-sdk`) | **v1.3.0** | ~March 2025 | +| Extism Core Runtime (`extism/extism`) | **v1.12.0** | 2025 | +| Extism Go PDK (`extism/go-pdk`) | Latest commit Jan 2026 | Published Mar 2025 | +| Extism CLI | v1.5.2 | References go-sdk v1.3.0 | + +### Go SDK API Surface + +```go +// Plugin creation +plugin, err := extism.NewPlugin(ctx, manifest, config, hostFunctions) + +// Compiled plugins for concurrent use +compiled, err := extism.NewCompiledPlugin(ctx, manifest, config, hostFunctions) +instance, err := compiled.Instance(ctx, extism.PluginInstanceConfig{}) + +// Calling exports +exitCode, output, err := plugin.Call("function_name", input) +exitCode, output, err := plugin.CallWithContext(ctx, "function_name", input) +``` + +### Key Features for AGH + +| Feature | Status | Notes | +|---|---|---| +| Timeout | Supported | `Manifest.Timeout` field (uint64 ms), 30s default | +| Fuel metering | Supported | Plugins can be initialized with fuel limits | +| Host functions | Supported | Inject Go functions callable from Wasm | +| Module-scope variables | Supported | Persistent state between calls | +| HTTP control | Supported | Host-controlled HTTP without WASI | +| Memory limits | Supported | `MaxVarBytes`, `MaxHttpResponseBytes` | +| Allowed hosts/paths | Supported | Filesystem and network sandboxing | +| Compilation cache | Supported | Via wazero's compilation cache | +| Multi-language PDKs | Supported | Rust, Go, JS, Python, C/C++, AssemblyScript, Zig, Haskell | + +### Plugin Development Kit (PDK) for Go + +The Go PDK supports both TinyGo and the standard Go toolchain (Go 1.24+): + +```go +// Plugin code (compiled to Wasm) +//go:wasmexport greet +func greet() int32 { + name := pdk.InputString() + pdk.OutputString("Hello, " + name) + return 0 +} +``` + +**Build options:** +- TinyGo: `tinygo build -target wasip1 -o plugin.wasm` (smaller output, ~5x smaller) +- Go native: `GOOS=wasip1 GOARCH=wasm go build -o plugin.wasm -buildmode=c-shared` + +TinyGo 0.37.0 is required for Go 1.24 compatibility and `//go:wasmexport` support. + +### Gotchas + +- The Go SDK wraps wazero's `Module` type -- if you need raw wazero access, you go through Extism's abstraction +- Go PDK with standard toolchain produces larger Wasm binaries than TinyGo +- API stability: the `extism/extism` package once warned "APIs may change until v1.0" -- the newer `extism/go-sdk` appears more stable +- Limited production case studies publicly available; community primarily in Discord +- 38 known importers on pkg.go.dev (relatively small ecosystem) + +### Links + +- Go SDK: https://github.com/extism/go-sdk +- Go PDK: https://github.com/extism/go-pdk +- Docs: https://extism.org/docs/ +- Host Functions: https://extism.org/docs/concepts/host-functions/ + +--- + +## Extism Alternatives + +### 1. knqyf263/go-plugin (Go Plugin System over WebAssembly) + +**Version: v0.9.0** (March 12, 2025) + +A Go plugin system that auto-generates type-safe Go SDKs from Protocol Buffers definitions. Uses wazero under the hood. Inspired by HashiCorp's go-plugin but communicates in-memory via Wasm instead of over RPC. + +**Strengths:** +- Protobuf-based interface definition (familiar to gRPC users) +- Auto-generated Go SDK hides raw Wasm APIs from plugin authors +- Supports native Go plugins with WASI (wasip1) +- Memory-safe, sandboxed, portable +- Used in production by CNCF's Node Resource Interface (NRI) + +**Weaknesses:** +- Depends on TinyGo for protobuf compatibility (well-known types reimplemented) +- v0.9.0 -- not yet v1.0 stable +- Smaller community than Extism + +**Key difference from Extism:** go-plugin is Go-centric with protobuf contracts; Extism is language-agnostic with a universal ABI. + +**Link:** https://github.com/knqyf263/go-plugin + +### 2. HashiCorp go-plugin (RPC-based, not Wasm) + +**Version: v1.6.3** (August 2025) + +The battle-tested plugin system used by Terraform, Vault, Nomad, Boundary, and Waypoint. Launches plugins as subprocesses communicating over gRPC or net/rpc. + +**Strengths:** +- 3,714 known importers -- massive production usage +- Supports plugins in any language (via gRPC) +- Process isolation (crash safety) +- Multiplexed gRPC connections +- Unix socket support with configurable permissions +- Health checking, version negotiation, secure handshake + +**Weaknesses:** +- Subprocess overhead (not in-process like Wasm) +- MPL-2.0 license +- gRPC dependency adds binary size +- Not Wasm-based -- different security model + +**Relevance to AGH:** Already similar to AGH's tier 3 (JSON-RPC subprocess). Could be used as-is for the subprocess tier, or its patterns could inform AGH's own implementation. + +**Link:** https://github.com/hashicorp/go-plugin + +### 3. Go 1.24+ Native Wasm Support + +Go 1.24 added `//go:wasmexport` and WASI reactor build mode, making it possible to build Wasm plugins using the standard Go toolchain without TinyGo or Extism. + +**Strengths:** +- Zero external dependencies +- Standard toolchain support +- Growing ecosystem + +**Weaknesses:** +- No plugin lifecycle management (raw Wasm only) +- Larger binary output than TinyGo +- No host function framework -- you build it yourself +- Type restrictions on `wasmexport`/`wasmimport` (no pointer passing due to 32/64-bit mismatch) + +### 4. Go's Native `plugin` Package + +The standard library `plugin` package loads compiled `.so` shared objects at runtime. + +**Strengths:** +- Part of Go standard library +- No serialization overhead + +**Weaknesses:** +- Linux and macOS only (no Windows) +- Requires matching Go versions between host and plugin +- No sandboxing +- Not suitable for untrusted code + +**Verdict:** Not recommended for AGH's use case. + +### Comparison Matrix + +| Framework | In-Process | Cross-Language | Type-Safe | Sandboxed | Production Maturity | +|---|---|---|---|---|---| +| Extism | Yes (Wasm) | Yes (universal ABI) | Via host functions | Yes | Medium | +| knqyf263/go-plugin | Yes (Wasm) | Go-centric | Protobuf-generated | Yes | Low-Medium | +| HashiCorp go-plugin | No (subprocess) | Yes (gRPC) | Protobuf-generated | Process isolation | Very High | +| Go 1.24 native Wasm | Yes (Wasm) | Build-your-own | Manual | Wasm sandbox | Low | +| Go `plugin` package | Yes (.so) | Go only | Go interfaces | None | Low | + +--- + +## JSON-RPC Libraries + +### Recommended Libraries for AGH's Tier 3 (Subprocess Extensions) + +#### 1. sourcegraph/jsonrpc2 -- Best for Bidirectional Stdio + +**Import:** `github.com/sourcegraph/jsonrpc2` +**Published:** February 2025, MIT license +**Importers:** 419 packages + +The most battle-tested option for bidirectional JSON-RPC 2.0 over stdio. Used extensively in LSP implementations. + +```go +// Symmetric connection -- both client and server +conn := jsonrpc2.NewConn(ctx, jsonrpc2.NewPlainObjectStream(rwc), handler) +``` + +**Features:** +- Bidirectional (symmetric client/server on same connection) +- Works with any `io.ReadWriteCloser` (stdio, TCP, etc.) +- Request/response correlation +- Notification support +- Handler interface for incoming requests + +**Best fit for AGH:** Already the pattern used by LSP servers and ACP-compatible agents. Minimal abstraction, maximum control. + +#### 2. golang.org/x/exp/jsonrpc2 -- Official Experimental + +**Import:** `golang.org/x/exp/jsonrpc2` +**Published:** January 2026, BSD-3-Clause +**Importers:** 21 packages + +The publicly importable version of the internal `golang.org/x/tools` jsonrpc2 implementation used by gopls. + +**Features:** +- Bidirectional `Connection` type +- Pluggable `Framer` (HeaderFramer for LSP, RawFramer for raw JSON) +- `Dial` function with configurable binder +- Preempter and Handler patterns + +**Caveat:** Under `x/exp` -- API stability not guaranteed. + +#### 3. viant/jsonrpc -- Best Stdio Client + +**Import:** `github.com/viant/jsonrpc` + +Purpose-built for launching subprocesses and communicating via JSON-RPC 2.0 over stdin/stdout. Also provides streamable HTTP transport. + +```go +client := stdio.New("my_service", stdio.WithArguments("--flag"), stdio.WithEnvironment("KEY=value")) +response, err := client.Send(ctx, request) +``` + +**Features:** +- Dedicated stdio client transport +- Process execution with configurable args/env +- Request, notification, and batch call support +- Also supports streamable HTTP with session management +- Used as transport layer for Viant's MCP implementation + +#### 4. modelcontextprotocol/go-sdk -- Official MCP SDK + +**Import:** `github.com/modelcontextprotocol/go-sdk` +**Published:** March 31, 2026, Apache-2.0 +**Importers:** 1,443 packages + +The official Go SDK for Model Context Protocol, maintained by Google and Anthropic. Built on JSON-RPC 2.0 with stdio and streamable HTTP transports. + +**Relevance to AGH:** Since AGH already speaks ACP (which is MCP-adjacent), this SDK's transport layer and JSON-RPC patterns are directly relevant. The `jsonrpc` sub-package can be used independently for custom transports. + +### JSON-RPC Library Comparison + +| Library | Stdio | Bidirectional | LSP-Compatible | Maturity | +|---|---|---|---|---| +| `sourcegraph/jsonrpc2` | Via ReadWriteCloser | Yes | Yes | High (419 importers) | +| `golang.org/x/exp/jsonrpc2` | Via Framer | Yes | Yes (HeaderFramer) | Medium (21 importers) | +| `viant/jsonrpc` | Dedicated transport | Client-side | N/A | Medium | +| `modelcontextprotocol/go-sdk` | Built-in | Yes | N/A (MCP-specific) | High (1,443 importers) | + +--- + +## WebAssembly Component Model in Go + +### Current Status (April 2026) + +The Component Model is **not yet natively supported** by the standard Go compiler. The situation is evolving rapidly: + +### Go Compiler Support + +| Capability | Status | +|---|---| +| GOOS=js GOARCH=wasm | Stable (Go 1.21+) | +| GOOS=wasip1 GOARCH=wasm | Stable (Go 1.21+) | +| `//go:wasmexport` | Stable (Go 1.24+) | +| GOOS=wasip2 | **Not supported** | +| GOOS=wasip3 | **Proposed** ([issue #77141](https://github.com/golang/go/issues/77141)) | +| Component Model output | **Not supported** (requires TinyGo or wit-bindgen-go) | + +### WASIp3 Proposal + +A proposal has been filed to add `wasip3/wasm` as a new Go port. WASIp3 is expected in early 2026 and integrates Component Model concurrency primitives (cooperative threads) that map well to Go's goroutine scheduler. However, this is still in proposal stage. + +### Bytecode Alliance Tooling + +The [bytecodealliance/go-modules](https://github.com/bytecodealliance/go-modules) project provides `wit-bindgen-go` to generate Go bindings from WIT (WebAssembly Interface Type) files. This is the primary path for Go developers wanting to use the Component Model today. + +### WASI Roadmap + +| Milestone | Expected Timeline | +|---|---| +| WASI Preview 2 (wasip2) | Released 2025 | +| WASI 0.3 (async I/O) | RC in late 2025, stabilizing 2026 | +| WASI 1.0 (stable) | Late 2026 / early 2027 | + +### Implications for AGH + +- **Short-term (2026):** The Component Model is not practical for AGH's plugin system via Go. Extism + wazero with WASI Preview 1 is the pragmatic choice. +- **Medium-term (2027):** Once WASI 1.0 lands and Go adds wasip3 support, the Component Model could replace Extism's custom ABI for cross-language plugins. +- **Risk:** Building on the Component Model now requires TinyGo or external tooling, adding complexity without clear benefit over Extism's proven approach. + +--- + +## Modern Go Plugin Patterns + +### Pattern 1: Interface-Driven Contracts (Tier 1 -- Go-native) + +The dominant Go pattern. Define interfaces where consumed, implement in separate packages. + +```go +// Consumed by session package +type AgentDriver interface { + Start(ctx context.Context, cfg AgentConfig) error + Send(ctx context.Context, msg Message) error + Stop(ctx context.Context) error +} +``` + +AGH already uses this pattern. For first-party extensions, this is the right approach -- zero overhead, type-safe, compile-time verified. + +### Pattern 2: Functional Options + Registry (Tier 1) + +```go +type ExtensionRegistry struct { + drivers map[string]AgentDriverFactory + hooks map[string][]HookHandler +} + +func NewRegistry(opts ...RegistryOption) *ExtensionRegistry +func WithDriver(name string, factory AgentDriverFactory) RegistryOption +``` + +Small registries (maps for <10 items) are preferred over complex registry interfaces per AGH's architecture principles. + +### Pattern 3: Interface Extension (Progressive Capability) + +Used by go-mysql-server and others. Base interface is required; additional interfaces unlock optional capabilities. + +```go +type Extension interface { + Name() string + Init(ctx context.Context) error +} + +// Optional capabilities +type WithHealthCheck interface { + HealthCheck(ctx context.Context) error +} + +type WithMetrics interface { + Metrics() []Metric +} +``` + +Caveat: `runtime.assertI2I` (type assertions) can consume ~13% CPU in tight loops. Use for initialization/configuration, not hot paths. + +### Pattern 4: Wasm Sandbox (Tier 2) + +Using Extism or raw wazero for in-process sandboxed execution of untrusted code. + +### Pattern 5: RPC Subprocess (Tier 3) + +HashiCorp go-plugin pattern: launch subprocess, negotiate protocol, communicate over gRPC/JSON-RPC. AGH's existing ACP driver model is already this pattern. + +--- + +## Recommended Stack + +### Tier 1: Go-Native Interfaces (First-Party) + +**Approach:** Interface-driven contracts with functional options and a small registry. + +- No additional dependencies needed +- AGH already implements this via `session.AgentDriver` and similar interfaces +- Use interface extension pattern for progressive capability discovery + +### Tier 2: WebAssembly Sandbox (Lightweight Hooks/Validators) + +**Recommended:** Extism Go SDK (`github.com/extism/go-sdk`) v1.3.0+ + +**Rationale:** +- Built on wazero (pure Go, no CGO) -- preserves AGH's single-binary story +- Provides timeout + fuel metering out of the box -- critical for untrusted code +- Host functions enable injecting AGH capabilities into plugins +- Multi-language PDKs let extension authors use Rust, Go, JS, etc. +- Compilation cache + `CompiledPlugin` for concurrent plugin instances + +**Alternative considered:** `knqyf263/go-plugin` (v0.9.0) is appealing for its protobuf-based type safety but is less mature and Go-centric. Extism's universal ABI and multi-language support better fit AGH's agent ecosystem. + +**Risk mitigation:** +- Extism's relatively small Go ecosystem (38 importers) is a concern. Mitigate by keeping a thin adapter layer so the host-side code could be rewritten against raw wazero if needed. +- Wrap Extism types behind AGH-owned interfaces (already an AGH architecture principle). + +### Tier 3: JSON-RPC Subprocess (Rich Extensions/Agent Drivers) + +**Recommended:** `github.com/sourcegraph/jsonrpc2` for the transport layer + +**Rationale:** +- 419 importers, MIT license, actively maintained +- Bidirectional symmetric connection -- both host and extension can send requests +- Works with any `io.ReadWriteCloser` -- trivial to use with subprocess stdio +- Battle-tested in LSP implementations (same protocol pattern as ACP) + +**Also consider:** `github.com/modelcontextprotocol/go-sdk` if AGH wants to adopt MCP-compatible extension protocol directly. The official SDK's `jsonrpc` sub-package provides a clean transport abstraction. + +**Pattern:** Follow HashiCorp go-plugin's lifecycle model (subprocess launch, protocol negotiation, health checking, graceful shutdown) but use JSON-RPC 2.0 instead of gRPC to stay aligned with ACP. + +--- + +## Version Matrix + +| Dependency | Version | Go Minimum | License | Import Path | +|---|---|---|---|---| +| wazero | v1.10.1 | Go 1.23 | Apache-2.0 | `github.com/tetratelabs/wazero` | +| Extism Go SDK | v1.3.0 | (inherits wazero) | BSD-3-Clause | `github.com/extism/go-sdk` | +| Extism Go PDK | ~v1.1.0 | Go 1.24 (native) or TinyGo 0.37.0 | BSD-3-Clause | `github.com/extism/go-pdk` | +| Extism Core Runtime | v1.12.0 | N/A (Rust) | BSD-3-Clause | N/A | +| sourcegraph/jsonrpc2 | latest (Feb 2025) | ~Go 1.21 | MIT | `github.com/sourcegraph/jsonrpc2` | +| golang.org/x/exp/jsonrpc2 | latest (Jan 2026) | ~Go 1.21 | BSD-3-Clause | `golang.org/x/exp/jsonrpc2` | +| viant/jsonrpc | latest | ~Go 1.21 | Apache-2.0 | `github.com/viant/jsonrpc` | +| knqyf263/go-plugin | v0.9.0 | Go 1.21+ | MIT | `github.com/knqyf263/go-plugin` | +| HashiCorp go-plugin | v1.6.3 | Go 1.21+ | MPL-2.0 | `github.com/hashicorp/go-plugin` | +| MCP Go SDK (official) | latest (Mar 2026) | Go 1.22+ | Apache-2.0 | `github.com/modelcontextprotocol/go-sdk` | +| TinyGo | 0.37.0 | Go 1.24.0 | BSD-3-Clause | N/A (compiler) | +| Go (std toolchain) | 1.24+ | N/A | BSD-3-Clause | N/A | + +### Key Compatibility Notes + +1. **wazero v1.10.x requires Go 1.23+** -- AGH should ensure its Go version floor accommodates this +2. **Extism Go SDK depends on wazero** -- version coupling means Extism updates may lag wazero releases +3. **Go PDK with standard toolchain requires Go 1.24+** for `//go:wasmexport`; TinyGo 0.37.0 for TinyGo path +4. **WASI Preview 2 is NOT available** through wazero or Extism -- only WASI Preview 1 (wasip1) +5. **Component Model** requires TinyGo + `wit-bindgen-go` today; native Go support expected ~2027 +6. **sourcegraph/jsonrpc2 is MIT** -- no license compatibility concerns with AGH + +--- + +## Sources + +- [wazero GitHub](https://github.com/tetratelabs/wazero) +- [wazero Docs](https://wazero.io/) +- [wazero vs CGO 2026](https://wasmruntime.com/en/blog/wazero-vs-cgo-2026) +- [wazero WASI Preview 2 Issue #2289](https://github.com/tetratelabs/wazero/issues/2289) +- [Extism Go SDK](https://github.com/extism/go-sdk) +- [Extism Go PDK](https://github.com/extism/go-pdk) +- [Extism Docs](https://extism.org/) +- [Extism Host Functions](https://extism.org/docs/concepts/host-functions/) +- [knqyf263/go-plugin](https://github.com/knqyf263/go-plugin) +- [HashiCorp go-plugin](https://github.com/hashicorp/go-plugin) +- [sourcegraph/jsonrpc2](https://github.com/sourcegraph/jsonrpc2) +- [golang.org/x/exp/jsonrpc2](https://pkg.go.dev/golang.org/x/exp/jsonrpc2) +- [viant/jsonrpc](https://github.com/viant/jsonrpc) +- [MCP Official Go SDK](https://github.com/modelcontextprotocol/go-sdk) +- [Go 1.24 Wasm Blog Post](https://go.dev/blog/wasmexport) +- [Google Cloud: Go 1.24 Wasm](https://cloud.google.com/blog/products/application-development/go-1-24-expands-support-for-wasm) +- [Bytecode Alliance go-modules](https://github.com/bytecodealliance/go-modules) +- [WASIp3 Go Proposal #77141](https://github.com/golang/go/issues/77141) +- [State of WebAssembly 2025-2026](https://platform.uno/blog/the-state-of-webassembly-2025-2026/) +- [WASI Component Model Status](https://eunomia.dev/blog/2025/02/16/wasi-and-the-webassembly-component-model-current-status/) +- [WebAssembly Ecosystem 2026](https://reintech.io/blog/webassembly-ecosystem-2026-tools-frameworks-runtimes) +- [DoltHub Interface Extension Pattern](https://www.dolthub.com/blog/2022-09-12-golang-interface-extension/) +- [Go Plugin System with plugin Package](https://oneuptime.com/blog/post/2026-01-25-plugin-system-go-plugin-package/view) +- [Eli Bendersky: Plugins in Go](https://eli.thegreenplace.net/2021/plugins-in-go/) +- [trpc-mcp-go](https://pkg.go.dev/trpc.group/trpc-go/trpc-mcp-go) +- [Navidrome Plugins (Extism example)](https://github.com/navidrome/navidrome/blob/master/plugins/README.md) diff --git a/.compozy/tasks/ext-architecture/analysis_openclaw.md b/.compozy/tasks/ext-architecture/analysis_openclaw.md new file mode 100644 index 000000000..4be0c47f9 --- /dev/null +++ b/.compozy/tasks/ext-architecture/analysis_openclaw.md @@ -0,0 +1,611 @@ +# OpenClaw Extension Architecture Analysis + +## Overview + +OpenClaw's extensibility is built on a **Plugin SDK boundary** pattern where all extensions -- bundled and third-party alike -- interact with core through a narrow, typed surface at `src/plugin-sdk/`. The system supports 70+ bundled extensions across four plugin types (channel, provider, tool, skill) and distributes third-party extensions through ClawHub (clawhub.ai). Native apps (macOS, iOS, Android) are distinct from plugins; they connect as WebSocket node clients rather than loading into the Gateway process. + +The key architectural insight: **bundled extensions in `/extensions/` follow the exact same boundary rules as third-party plugins installed from ClawHub**. Core does not special-case bundled vs. external. This uniformity is enforced by convention (`extensions/AGENTS.md`), package boundaries (tsconfig isolation), and the Plugin SDK barrel structure. + +OpenClaw is a TypeScript/Node.js system. Extensions are npm packages with a `package.json` containing an `openclaw` block and a companion `openclaw.plugin.json` manifest. The Gateway is the host process; all plugins run in-process. + +## Extension Loading & Discovery + +### Five-Phase Loading Sequence + +Per `src/gateway/server-plugin-bootstrap.ts`, extensions load in a strict order: + +``` +1. Manifest discovery phase -- scan /extensions/ + node_modules/@openclaw/* + NO code execution +2. Manifest validation -- parse openclaw.plugin.json, check JSON Schema, + verify requirements (config keys, binaries) +3. Dependency ordering -- providers before channels before skills +4. Code load phase -- dynamic import of plugin entry points +5. Registration -- each plugin registers with core via Plugin API +6. Ready signal -- all plugins loaded -> Gateway binds WS server +``` + +The split between **discovery** (manifest inspection) and **load** (code execution) is deliberate. A plugin can declare itself, surface its requirements, and appear in `openclaw plugins status` without ever running code. Code runs only if the plugin is enabled. + +### Discovery Locations + +- **Bundled**: `/extensions//` directories in the OpenClaw repo +- **Third-party**: `node_modules/@openclaw/*` (installed via npm/ClawHub) + +### Filtering + +Plugins can be disabled via config: + +```json5 +{ + plugins: { + allow: ["browser", "discord", "anthropic"], // hard allowlist + entries: { + "": { enabled: false } // per-plugin toggle + } + } +} +``` + +If `plugins.allow` is set, only listed plugins load. Everything else is skipped at discovery. + +### Plugin Status States + +| State | Meaning | +|------------|--------------------------------------------| +| `enabled` | Loaded and registered | +| `disabled` | Present but config disables it | +| `missing` | Referenced in config, not installed | + +Missing plugins produce warnings at startup but do not prevent Gateway boot. + +**Source files**: `extensions/AGENTS.md`, `extensions/CLAUDE.md` + +## Extension Manifest Format + +Every extension ships two declaration files: + +### 1. `openclaw.plugin.json` -- Static Manifest + +Declares metadata, capabilities, and configuration schema **without executing code**. Examples from source: + +**Channel plugin (Discord)**: +```json +{ + "id": "discord", + "channels": ["discord"], + "channelEnvVars": { "discord": ["DISCORD_BOT_TOKEN"] }, + "configSchema": { "type": "object", "additionalProperties": false, "properties": {} } +} +``` + +**Provider plugin (Anthropic)**: +```json +{ + "id": "anthropic", + "enabledByDefault": true, + "providers": ["anthropic"], + "modelSupport": { "modelPrefixes": ["claude-"] }, + "cliBackends": ["claude-cli"], + "providerAuthEnvVars": { "anthropic": ["ANTHROPIC_OAUTH_TOKEN", "ANTHROPIC_API_KEY"] }, + "providerAuthChoices": [ ... ], + "contracts": { "mediaUnderstandingProviders": ["anthropic"] }, + "configSchema": { ... } +} +``` + +**Tool plugin (Browser)**: +```json +{ + "id": "browser", + "enabledByDefault": true, + "configSchema": { "type": "object", "additionalProperties": false, "properties": {} } +} +``` + +**Memory plugin (memory-core)**: +```json +{ + "id": "memory-core", + "kind": "memory", + "uiHints": { ... }, + "configSchema": { ... complex schema with dreaming phases ... } +} +``` + +Key manifest fields: +- `id` -- unique identifier +- `enabledByDefault` -- whether the plugin loads without explicit config +- `channels` -- channel IDs this plugin provides +- `providers` -- provider IDs this plugin provides +- `modelSupport.modelPrefixes` -- which model ID prefixes this provider handles +- `providerAuthEnvVars` -- environment variables for auth +- `providerAuthChoices` -- onboarding auth flow options +- `contracts` -- capability contracts (e.g., media understanding, speech, image generation) +- `configSchema` -- JSON Schema for plugin-specific configuration +- `kind` -- plugin kind for specialized plugins (e.g., `"memory"`) +- `uiHints` -- UI rendering hints for config fields +- `channelEnvVars` -- required env vars per channel + +### 2. `package.json` -- npm Metadata + OpenClaw Block + +The `openclaw` block in `package.json` declares build, distribution, and runtime metadata: + +```json +{ + "name": "@openclaw/discord", + "version": "2026.4.10", + "openclaw": { + "extensions": ["./index.ts"], + "setupEntry": "./setup-entry.ts", + "channel": { + "id": "discord", + "label": "Discord", + "selectionLabel": "Discord (Bot API)", + "docsPath": "/channels/discord", + "blurb": "very well supported right now.", + "markdownCapable": true, + "configuredState": { + "specifier": "./configured-state", + "exportName": "hasDiscordConfiguredState" + } + }, + "install": { + "npmSpec": "@openclaw/discord", + "defaultChoice": "npm", + "minHostVersion": ">=2026.4.10" + }, + "compat": { "pluginApi": ">=2026.4.10" }, + "release": { + "publishToClawHub": true, + "publishToNpm": true + } + } +} +``` + +Key `openclaw` block fields: +- `extensions` -- entry point file paths (array; supports multiple) +- `setupEntry` -- separate entry for onboarding/setup flows +- `channel` -- channel metadata (label, docs, capabilities) +- `install` -- npm spec and host version requirements +- `compat.pluginApi` -- minimum Plugin SDK version +- `release` -- distribution targets (ClawHub, npm) +- `bundle.stageRuntimeDependencies` -- build-time bundling hints + +**Source files**: `extensions/discord/openclaw.plugin.json`, `extensions/discord/package.json`, `extensions/anthropic/openclaw.plugin.json`, `extensions/browser/openclaw.plugin.json`, `extensions/memory-core/openclaw.plugin.json` + +## ClawHub Marketplace + +**ClawHub** (https://clawhub.ai) is the public registry for skills and plugins: + +### Distribution Model + +- **Package format**: git repos or npm packages +- **Versioning**: semver tags on git refs +- **Namespacing**: `openclaw/weather`, `@username/custom-tool` +- **Metadata**: registry caches manifests, descriptions, homepage URLs + +### CLI Operations + +```bash +openclaw skills list # browse registry +openclaw skills info github # inspect before install +openclaw skills install github # install to workspace +openclaw skills install github@1.2.3 # pin version +openclaw skills update github # upgrade +openclaw skills update --all # upgrade all +openclaw skills uninstall github # remove +openclaw skills search weather # search index +``` + +### Distribution Config in package.json + +```json +{ + "openclaw": { + "release": { + "publishToClawHub": true, + "publishToNpm": true + } + } +} +``` + +ClawHub is optional -- users can point `skills.load.extraDirs` at any local directory and skip the registry entirely. This matters for air-gapped or sensitive deployments. + +**Source**: Wiki article "Skills, ClawHub and Plugins" + +## Hook System + +Extensions hook into core through the **Plugin API** object (`OpenClawPluginApi`) passed to the `register()` function. The API provides typed registration methods: + +### Entry Point Pattern + +Every extension exports a default entry created by one of: +- `definePluginEntry()` -- general plugins (tools, providers, memory) +- `defineBundledChannelEntry()` -- channel plugins +- `defineBundledChannelSetupEntry()` -- channel setup/onboarding + +**General plugin entry** (from `openclaw/plugin-sdk/plugin-entry`): +```typescript +export default definePluginEntry({ + id: "browser", + name: "Browser", + description: "Default browser tool plugin", + reload: browserPluginReload, // hot-reload config + nodeHostCommands: browserPluginNodeHostCommands, // device commands + securityAuditCollectors: [...collectors], // security audit hooks + register: registerBrowserPlugin, // main registration +}); +``` + +**Channel plugin entry** (from `openclaw/plugin-sdk/channel-entry-contract`): +```typescript +export default defineBundledChannelEntry({ + id: "discord", + name: "Discord", + description: "Discord channel plugin", + importMetaUrl: import.meta.url, + plugin: { specifier: "./channel-plugin-api.js", exportName: "discordPlugin" }, + runtime: { specifier: "./runtime-api.js", exportName: "setDiscordRuntime" }, + registerFull(api) { + api.on("subagent_spawning", async (event) => { ... }); + api.on("subagent_ended", async (event) => { ... }); + api.on("subagent_delivery_target", async (event) => { ... }); + }, +}); +``` + +### Plugin API Registration Methods + +Observed from source code, `OpenClawPluginApi` provides: + +| Method | Purpose | Example | +|--------|---------|---------| +| `api.registerTool(factory, opts?)` | Register agent-callable tool | Browser, memory_search | +| `api.registerProvider(provider)` | Register LLM/inference provider | OpenAI, Anthropic | +| `api.registerCliBackend(backend)` | Register CLI backend for agent control | codex-cli, claude-cli | +| `api.registerCli(fn, opts)` | Register CLI subcommands | `memory`, `browser` | +| `api.registerGatewayMethod(name, handler, opts)` | Register Gateway RPC method | `browser.request` | +| `api.registerService(service)` | Register long-running service | Browser plugin service | +| `api.registerHttpRoute(route)` | Register HTTP endpoint | Webhook routes | +| `api.registerMemoryCapability(cap)` | Register memory subsystem | memory-core | +| `api.registerImageGenerationProvider(p)` | Register image gen provider | OpenAI DALL-E | +| `api.registerRealtimeTranscriptionProvider(p)` | Register transcription | OpenAI Whisper | +| `api.registerRealtimeVoiceProvider(p)` | Register voice provider | OpenAI realtime | +| `api.registerSpeechProvider(p)` | Register TTS/STT | OpenAI speech | +| `api.registerMediaUnderstandingProvider(p)` | Register media understanding | Anthropic, OpenAI | +| `api.registerVideoGenerationProvider(p)` | Register video gen | OpenAI video | +| `api.on(event, handler)` | Subscribe to lifecycle events | subagent_spawning | + +### Event Hooks + +Channel plugins can subscribe to lifecycle events: +- `subagent_spawning` -- before a subagent starts +- `subagent_ended` -- after a subagent finishes +- `subagent_delivery_target` -- routing subagent output + +### Deferred Loading + +Extensions use dynamic `import()` for heavy modules, loading them lazily: +```typescript +let discordSubagentHooksPromise: Promise | null = null; +function loadDiscordSubagentHooksModule() { + discordSubagentHooksPromise ??= import("./subagent-hooks-api.js"); + return discordSubagentHooksPromise; +} +``` + +### Plugin Config Access + +Plugins access their configuration through `api.pluginConfig`, `api.config`, and `api.logger`. + +**Source files**: `extensions/browser/index.ts`, `extensions/discord/index.ts`, `extensions/anthropic/index.ts`, `extensions/openai/index.ts`, `extensions/memory-core/index.ts`, `extensions/webhooks/index.ts` + +## Tool Registration + +Tools are registered via the Plugin API with a factory function pattern: + +```typescript +api.registerTool( + ((ctx: OpenClawPluginToolContext) => + createBrowserTool({ + sandboxBridgeUrl: ctx.browser?.sandboxBridgeUrl, + allowHostControl: ctx.browser?.allowHostControl, + agentSessionKey: ctx.sessionKey, + })) as OpenClawPluginToolFactory, +); +``` + +The factory receives a context object (`OpenClawPluginToolContext`) containing: +- `ctx.sessionKey` -- current agent session +- `ctx.config` -- resolved configuration +- `ctx.browser` -- browser-specific context (for browser plugin) + +Tools can also be registered with explicit names: +```typescript +api.registerTool( + (ctx) => createMemorySearchTool({ config: ctx.config, agentSessionKey: ctx.sessionKey }), + { names: ["memory_search"] }, +); +``` + +### Tool Catalog + +All registered tools appear in the agent's tool catalog via `tools.catalog` RPC. Each tool self-describes with a JSON Schema: + +```json +{ + "name": "browser", + "description": "Browser automation: navigate, click, type, snapshot", + "schema": { + "type": "object", + "properties": { + "action": { "type": "string", "enum": ["open", "click", "type", "snapshot"] }, + "url": { "type": "string" } + }, + "required": ["action"] + } +} +``` + +### Tool Profiles + +Predefined bundles control which tools are available: + +| Profile | Tools | Use | +|---------|-------|-----| +| `none` | (empty) | Text-only assistant | +| `research` | Browser, web search | Information gathering | +| `creative` | Canvas, image generation | Content creation | +| `coding` | Browser, exec, cron | Code work | +| `dangerous` | All tools | Full access (requires approvals) | + +Composition rule: `deny` always wins; `alsoAllow` adds; `allow` replaces. + +### Tool Dispatch Targets + +| Target | Tools | How | +|--------|-------|-----| +| Gateway host | browser, cron, webhooks, api | In-process | +| Paired node | camera.snap, screen.record | WS to device | +| Sandbox container | exec (when sandboxed) | Docker spawn | +| Plugin code | Plugin-provided tools | In-process plugin | + +### Tool Streaming Events + +``` +tool_call -> tool_start -> tool_progress -> tool_result|tool_error -> tool_end +``` + +**Source files**: `extensions/browser/plugin-registration.ts`, `extensions/memory-core/index.ts`, Wiki "Tool System and Approvals" + +## Security Model + +### Trust Boundaries + +OpenClaw operates under a **single trusted operator** model. Three boundary layers: + +1. **Trusted Operator Boundary**: Gateway config, state directory, plugins, skills, memory files, authenticated callers -- all full operator trust +2. **Untrusted Input Boundary**: Channel messages, tool execution results, webhook payloads +3. **Isolation Boundaries** (defense-in-depth): Docker sandbox, network mode, workspace-only filesystem + +### Plugin Trust Level + +**Plugins are trusted code**. They run in-process within the Gateway with full operator privileges. There is no per-plugin sandboxing or capability restriction -- the boundary is at the Plugin SDK contract level, not at a security isolation level. + +### Tool Approval Flow + +Critical tools require operator approval before execution: + +``` +1. Generate approval ID (UUID) +2. Emit exec.approval.request event +3. Broadcast to all connected operators +4. Wait for approval (block tool execution) +5. Timeout after configured duration (default 5 min) -> reject +6. On approval: execute; on denial: return error +``` + +### Sandbox Pipeline + +Docker-based sandboxing for tool execution: + +``` +getBlockedBindReason() -- check bind mounts against denylist +validateSandboxSecurity() -- validate sandbox config +isDangerousNetworkMode() -- check network isolation +resolveSandboxConfigForAgent() -- resolve per-agent policy +Docker/OpenShell/SSH Backend -- launch container +``` + +Blocked bind mounts include: `~/.ssh`, `~/.aws`, `~/.gnupg`, `/etc`, `/var`, `~/.openclaw`. + +### Dangerous Config Flags + +Flags prefixed `dangerous` or `dangerouslyAllow` bypass safety defaults: +- `dangerousAllowUnsafeExec` +- `dangerouslyAllowAllTools` +- `dangerousBrowserControl` + +These are flagged by the `openclaw security audit` command. + +### Security Audit System + +```bash +openclaw security audit +``` + +Checks: filesystem permissions, gateway auth exposure, sandbox config, channel DM policies, installed skills code safety, tool policy. + +### Skill Security + +Third-party skills are code. Defenses: +- Allowlists per agent +- Dangerous code scanner on install (`skills.dangerousCode.mode`: `warn`/`block`/`allow`) +- Optional Docker sandboxing for untrusted skill code + +**Source files**: Wiki "Security Model and Trust Boundaries", `extensions/browser/plugin-registration.ts` (securityAuditCollectors) + +## Native Apps vs Extensions + +| Aspect | Plugins (Extensions) | Nodes (Native Apps) | +|--------|---------------------|---------------------| +| Where they run | Gateway process (in-process) | Separate OS process / device app | +| How they register | At Gateway startup via manifest | At connect time via WS handshake | +| What they contribute | Channels, providers, tools, skills | Device capabilities (camera, screen, etc.) | +| Isolation | In-process (Plugin SDK boundary) | OS-process (network boundary) | +| Trust model | Trusted if installed | Paired + capability-gated | +| Protocol | Plugin SDK + direct function calls | WebSocket RPC | + +### Node Capability Advertisement + +When a native app connects, it advertises capabilities: + +```json +{ + "role": "node", + "capabilities": ["camera", "canvas", "screen", "location", "voice"], + "commands": ["camera.snap", "canvas.navigate", "screen.record", "location.get", "system.run"] +} +``` + +The Gateway indexes devices by capability and routes commands to the appropriate node. Multiple devices can advertise the same capability; the Gateway picks the most-recently-active or routes by explicit `nodeId`. + +### Platform Capabilities + +- **macOS**: Canvas, camera, screen recording, location, shell commands, notifications (all TCC-gated) +- **iOS**: Canvas, camera, screen recording, location, notifications +- **Android**: Canvas, camera/video, screen recording, location, SMS, notifications, calendar, motion sensors + +**Source**: Wiki "Extensions and Native Apps" + +## Key Patterns for AGH + +### 1. Manifest-First Discovery (High Priority) + +OpenClaw's split between manifest inspection and code execution is its strongest pattern. AGH should adopt this: extension manifests (TOML, not JSON) are parsed and validated before any Go code loads. This enables `agh extensions status` without loading extension binaries. + +**AGH adaptation**: Use a TOML manifest (`agh.extension.toml`) with fields like `id`, `capabilities`, `config_schema`, `requires`. Parse at daemon startup before loading extension binaries. + +### 2. Uniform Plugin SDK Boundary (High Priority) + +The rule that bundled and third-party extensions follow identical contracts is critical for ecosystem growth. No "blessed" plugins with privileged access. + +**AGH adaptation**: Define Go interfaces in a `pkg/extension` or `internal/extension` package that all extensions implement. Use compile-time interface verification. + +### 3. Four Plugin Types (Adapt) + +OpenClaw's four types (channel, provider, tool, skill) map partially to AGH: +- **Provider** -> AGH's `AgentDriver` (ACP client for Claude, Codex, Gemini) +- **Tool** -> AGH could expose tools through ACP +- **Skill** -> AGH's skills system (YAML+Markdown, already planned) +- **Channel** -> Less relevant for AGH (no multi-channel messaging) + +### 4. Typed Registration API (High Priority) + +The `api.registerTool()`, `api.registerProvider()` pattern gives extensions a clean, typed surface for declaring capabilities. AGH should provide equivalent Go interfaces: + +```go +type ExtensionAPI interface { + RegisterDriver(driver AgentDriver) + RegisterTool(factory ToolFactory) + RegisterSkill(skill SkillDefinition) + RegisterCLICommand(cmd *cobra.Command) + RegisterHook(event string, handler HookHandler) +} +``` + +### 5. Dependency Ordering (Medium Priority) + +Loading providers before channels before skills prevents registration-order bugs. AGH should define a similar ordering for its extension types. + +### 6. Plugin Config with JSON Schema (Medium Priority) + +Each extension declares its config schema in the manifest. The core validates config against this schema before passing it to the extension. AGH should use JSON Schema (or a Go equivalent like `go-jsonschema`) for extension config validation. + +### 7. Deferred/Lazy Loading (Low Priority for Go) + +OpenClaw uses dynamic `import()` for heavy modules. In Go, this maps to plugin loading via `plugin.Open()` or subprocess-based extensions. Since AGH is a single binary, this pattern is less directly applicable but could matter for optional agent drivers. + +### 8. Security: Trusted but Auditable (High Priority) + +OpenClaw's model -- plugins are trusted code but auditable -- is pragmatic for a single-operator system. AGH should adopt the same: extensions run with daemon privileges but are subject to `agh security audit`. No multi-tenant isolation within one daemon. + +### 9. Skills as Teaching Layer Above Tools (High Priority) + +The separation of tools (raw capabilities) from skills (instructions teaching the agent to use tools) is a powerful pattern AGH should adopt. Skills are YAML+Markdown files; tools are code. Skills compose tools; plugins compose everything. + +### 10. Allowlist-Based Tool Profiles (Medium Priority) + +The profile system (`coding`, `research`, `dangerous`) with `allow`/`deny` composition gives operators coarse and fine-grained control. AGH should implement similar per-session or per-agent tool profiles. + +### 11. Hot-Reload Config (Low Priority) + +The `reload: { restartPrefixes: ["browser"] }` pattern allows extensions to declare which config changes require reloading. Useful for long-running daemons. + +## Code References + +### Extension Directory Structure (Typical) + +``` +extensions// + openclaw.plugin.json -- static manifest (capabilities, config schema) + package.json -- npm metadata + openclaw block (entry points, install, compat) + index.ts -- main entry point (definePluginEntry / defineBundledChannelEntry) + api.ts -- public barrel (exports for core/tests to consume) + runtime-api.ts -- runtime barrel + setup-entry.ts -- onboarding/setup entry (channel plugins) + register.runtime.ts -- runtime registration logic + plugin-registration.ts -- tool/CLI/gateway method registration + src/ -- private implementation + tsconfig.json -- extends package-boundary base + *.test.ts -- co-located tests +``` + +### Key SDK Imports + +| Import Path | Purpose | +|-------------|---------| +| `openclaw/plugin-sdk/plugin-entry` | `definePluginEntry`, `OpenClawPluginApi`, `OpenClawPluginToolContext` | +| `openclaw/plugin-sdk/channel-entry-contract` | `defineBundledChannelEntry`, `defineBundledChannelSetupEntry` | +| `openclaw/plugin-sdk/provider-entry` | Provider plugin entry contract | +| `openclaw/plugin-sdk/core` | Core helpers (Session, Message, etc.) | +| `openclaw/plugin-sdk/channel-contract` | Channel interface (connect, disconnect, send, status) | +| `openclaw/plugin-sdk/provider-auth` | Auth patterns for providers | +| `openclaw/plugin-sdk/extension-shared` | Shared utilities (deferred, passive monitor, status) | +| `openclaw/plugin-sdk/channel-config-primitives` | Channel config validation helpers | + +### Shared Extension Infrastructure + +`extensions/shared/` re-exports SDK utilities: + +| File | Re-exports | +|------|-----------| +| `runtime.ts` | `resolveLoggerBackedRuntime` | +| `deferred.ts` | `createDeferred` | +| `passive-monitor.ts` | `runStoppablePassiveMonitor` | +| `status-issues.ts` | `coerceStatusIssueAccountId`, `readStatusIssueFields` | +| `channel-status-summary.ts` | `buildPassiveChannelStatusSummary`, `buildTrafficStatusSummary` | +| `config-schema-helpers.ts` | `requireChannelOpenAllowFrom` | + +### Package Boundary Enforcement + +- `tsconfig.package-boundary.base.json` extends `tsconfig.package-boundary.paths.json` +- Each extension's `tsconfig.json` extends the base, restricting import paths +- Only `openclaw/plugin-sdk/*` and local barrels (`./api.ts`) are valid imports +- No `src/**`, no `../other-extension/**`, no `openclaw/plugin-sdk-internal/**` + +### Source File Locations + +- Wiki docs: `/Users/pedronauck/dev/knowledge/openclaw/wiki/concepts/` +- Extension source: `/Users/pedronauck/dev/knowledge/.resources/openclaw/extensions/` +- Extension boundary rules: `/Users/pedronauck/dev/knowledge/.resources/openclaw/extensions/AGENTS.md` +- Shared infrastructure: `/Users/pedronauck/dev/knowledge/.resources/openclaw/extensions/shared/` +- Discord (channel): `/Users/pedronauck/dev/knowledge/.resources/openclaw/extensions/discord/` +- Browser (tool): `/Users/pedronauck/dev/knowledge/.resources/openclaw/extensions/browser/` +- Anthropic (provider): `/Users/pedronauck/dev/knowledge/.resources/openclaw/extensions/anthropic/` +- OpenAI (provider): `/Users/pedronauck/dev/knowledge/.resources/openclaw/extensions/openai/` +- Memory Core (memory): `/Users/pedronauck/dev/knowledge/.resources/openclaw/extensions/memory-core/` +- Webhooks (tool/http): `/Users/pedronauck/dev/knowledge/.resources/openclaw/extensions/webhooks/` diff --git a/.compozy/tasks/ext-architecture/analysis_openfang.md b/.compozy/tasks/ext-architecture/analysis_openfang.md new file mode 100644 index 000000000..c4c861201 --- /dev/null +++ b/.compozy/tasks/ext-architecture/analysis_openfang.md @@ -0,0 +1,618 @@ +# OpenFang Extension Architecture Analysis + +Research date: 2026-04-10 +Source: OpenFang v0.5.7 (Rust, 14-crate workspace) +Repository: `https://github.com/RightNow-AI/openfang` + +## Overview + +OpenFang is a Rust single-binary agent daemon with a **compile-time composition** philosophy. Its extension surface is organized into four distinct subsystems: + +1. **53 Builtin Tools** -- Rust functions compiled into the binary (filesystem, web, shell, browser, media, inter-agent, memory, collaboration) +2. **MCP Integration** -- 25 bundled MCP server templates + arbitrary user-defined servers, connected at daemon boot via `rmcp` SDK +3. **40 Channel Adapters** -- messaging platform bridges (Telegram, Discord, Slack, etc.) compiled into the binary +4. **Skills + Hands** -- 60+ bundled skills (Python/WASM/Node/Shell/PromptOnly runtimes) and 7 preconfigured autonomous agent packages ("Hands") + +All four subsystems merge into a **single unified tool catalog** that the LLM sees. The `ToolRunner` dispatches each call to the correct backend based on tool name prefix (`mcp_*`, `agent_*`, skill names, or builtin names). There is **no dynamic plugin loading** for core capabilities -- the binary ships with everything. The escape hatches for runtime extensibility are ClawHub skill marketplace downloads and MCP server connections. + +### 14-Crate Workspace Structure + +``` +crates/ + openfang-types/ # Shared types, config, agent, capability, tool definitions + openfang-runtime/ # Agent loop, tool runner, MCP client, sandbox, audit + openfang-kernel/ # Composition root: kernel, capabilities, workflows, scheduler + openfang-api/ # HTTP/SSE server, channel bridge adapter + openfang-cli/ # CLI commands + openfang-channels/ # 40 channel adapters, bridge manager, router + openfang-skills/ # Skill system: registry, loader, ClawHub, verification + openfang-hands/ # Hand definitions, registry, lifecycle + openfang-extensions/ # Integration registry, credential vault, OAuth, health monitor + openfang-memory/ # Memory substrate (SQLite) + openfang-wire/ # OFP peer network, HMAC-SHA256 auth + openfang-migrate/ # Database migrations + openfang-desktop/ # Desktop integration + xtask/ # Build tasks +``` + +Key dependency flow: `openfang-kernel` is the composition root. It imports `openfang-runtime`, `openfang-memory`, `openfang-skills`, `openfang-hands`, `openfang-extensions`. The runtime defines the `KernelHandle` trait to avoid circular deps -- the kernel implements it. + +## Tool ("Hands") System + +### Builtin Tool Architecture + +The `ToolRunner` in `crates/openfang-runtime/src/tool_runner.rs` is the central dispatch point. Its `execute_tool` function signature reveals the full dependency surface: + +```rust +pub async fn execute_tool( + tool_use_id: &str, + tool_name: &str, + input: &serde_json::Value, + kernel: Option<&Arc>, // Inter-agent ops + allowed_tools: Option<&[String]>, // Capability enforcement + caller_agent_id: Option<&str>, + skill_registry: Option<&SkillRegistry>, // Skill dispatch + mcp_connections: Option<&Mutex>>, // MCP dispatch + web_ctx: Option<&WebToolsContext>, + browser_ctx: Option<&BrowserManager>, + allowed_env_vars: Option<&[String]>, + workspace_root: Option<&Path>, + media_engine: Option<&MediaEngine>, + exec_policy: Option<&ExecPolicy>, + tts_engine: Option<&TtsEngine>, + docker_config: Option<&DockerSandboxConfig>, + process_manager: Option<&ProcessManager>, +) -> ToolResult +``` + +**Dispatch routing** (by tool name): +- `mcp_*` prefix -> routes to `McpConnection::call_tool` on the matching server +- `agent_*` names -> routes to `KernelHandle` inter-agent methods +- Skill tool names -> routes to `SkillRegistry` + `execute_skill_tool` +- Everything else -> direct Rust function call (builtin) + +**Pre-dispatch checks** (every call): +1. Tool name normalization via `normalize_tool_name` (e.g. `fs-write` -> `file_write`) +2. Capability enforcement: reject if tool not in `allowed_tools` list +3. Approval gate: check if tool requires human approval (configurable per risk level) +4. Taint tracking: `check_taint_shell_exec` and `check_taint_net_fetch` sanitize arguments + +### Tool Definition Model + +```rust +// From openfang-types/src/tool.rs +pub struct ToolDefinition { + pub name: String, + pub description: String, + pub input_schema: serde_json::Value, // JSON Schema +} + +pub struct ToolResult { + pub tool_use_id: String, + pub content: String, + pub is_error: bool, +} +``` + +### Tool Execution Matrix + +| Tool Type | Execution | Sandbox | Timeout | +|-------------|------------------|------------------------------|---------| +| Builtin | Direct Rust fn | Kernel restrictions | N/A | +| WASM skill | Wasmtime | Fuel + epoch + watchdog | 30s | +| Python skill| Subprocess | `env_clear()` + allowlist | 120s | +| Node skill | Subprocess | `env_clear()` + allowlist | 120s | +| Shell skill | Subprocess | `env_clear()` + allowlist | 120s | +| MCP tool | Remote call | Transport isolation | 60s | +| Inter-agent | Kernel dispatch | Recursion guard (depth 5) | 600s | + +### Tool Filtering Per Agent + +Agents declare tool subsets in their manifest via TOML config: + +```toml +[[agents]] +name = "chat-assistant" +tools = ["web_search", "web_fetch", "memory_recall"] # Whitelist + +[[agents]] +name = "researcher" +tools_exclude = ["docker_run", "kill_process"] # Blacklist +``` + +The `CapabilityManager` (in `openfang-kernel/src/capabilities.rs`) uses a `DashMap>` for concurrent-safe RBAC. Child agents inherit a subset of parent capabilities, preventing privilege escalation through delegation. + +### Hands System + +**Hands** are preconfigured autonomous agent packages combining: +- A `HAND.toml` manifest (tools, settings, dashboard metrics, requirements, guardrails) +- A system prompt (500+ words expert persona) +- A `SKILL.md` domain knowledge file +- A default cron schedule + +Defined in `crates/openfang-hands/src/lib.rs`, the `HandDefinition` struct includes: + +```rust +pub struct HandDefinition { + pub id: String, + pub name: String, + pub description: String, + pub category: HandCategory, + pub tools: Vec, // Required tool names + pub skills: Vec, // Skill allowlist + pub mcp_servers: Vec, // MCP server allowlist + pub requires: Vec, // Binary/env/API key prereqs + pub settings: Vec, // User-configurable settings + pub agent: HandAgentConfig, // LLM config + system prompt + pub dashboard: HandDashboard, // Metrics schema + pub skill_content: Option, // Injected at load time +} +``` + +**Hand lifecycle states**: `Active -> Paused -> Error -> Inactive` + +**7 Bundled Hands**: Researcher, Lead, Collector, Predictor, Clip, Twitter, Browser -- all compiled into the binary. + +**Key pattern for AGH**: Hands are essentially a "meta-extension" that composes tools + skills + prompts + schedules into a named, configurable, marketplace-distributable agent personality. The `HandRequirement` system (binary checks, env var checks, API key checks) with platform-specific `HandInstallInfo` is particularly well-designed for UX. + +## Channel Adapter Pattern + +### Trait Architecture + +The channel system uses two complementary traits defined in `crates/openfang-channels/`: + +**Inbound** (`ChannelAdapter` in `types.rs`): +```rust +#[async_trait] +pub trait ChannelAdapter: Send + Sync { + fn name(&self) -> &str; + fn channel_type(&self) -> ChannelType; + async fn start(&self) -> Result + Send>>, Box>; +} +``` + +**Outbound** (`MessageAdapter`): +```rust +#[async_trait] +pub trait MessageAdapter { + async fn send(&self, msg: OutboundMessage) -> Result<()>; + async fn connect(&self) -> Result<()>; + async fn disconnect(&self) -> Result<()>; +} +``` + +Each concrete adapter implements both traits. The `start()` method returns an async stream -- the bridge polls this continuously. Each adapter hides its own transport (WebSocket, long-polling, webhooks, SSE) behind the stream abstraction. + +### Unified Message Envelope + +```rust +pub struct ChannelMessage { + pub channel: ChannelType, + pub platform_message_id: String, + pub sender: ChannelUser, + pub content: ChannelContent, // Text | Image | File | FileData | Voice | Location | Command + pub target_agent: Option, + pub timestamp: DateTime, + pub is_group: bool, + pub thread_id: Option, + pub metadata: HashMap, +} +``` + +### Bridge Manager and Routing + +The `BridgeManager` in `bridge.rs` orchestrates adapter lifecycle: +1. Reads channel config from TOML +2. Instantiates each enabled adapter +3. Spawns a Tokio task per adapter to consume its message stream +4. Applies policies (DM policy, group policy, user allow/block lists) +5. Routes through `AgentRouter` with 5-level priority chain: + - Bindings (most specific: user+channel -> agent) + - Direct routes + - User defaults (persisted preference) + - Channel defaults + - System default (global fallback) + +### Channel Bridge Handle + +The `ChannelBridgeHandle` trait (defined in channels crate, implemented in API crate) breaks the circular dependency between channels and kernel: + +```rust +#[async_trait] +pub trait ChannelBridgeHandle: Send + Sync { + async fn send_message(&self, agent_id: AgentId, message: &str) -> Result; + async fn send_message_with_blocks(&self, agent_id: AgentId, blocks: Vec) -> Result; + async fn find_agent_by_name(&self, name: &str) -> Result, String>; + async fn list_agents(&self) -> Result, String>; + async fn spawn_agent_by_name(&self, manifest_name: &str) -> Result; + // ... transcribe_audio, pending_approvals, uptime_info, etc. +} +``` + +### Adding a New Adapter (3 steps) + +1. Implement `ChannelAdapter` + `MessageAdapter` in a new module under `crates/openfang-channels/src/` +2. Register in `crates/openfang-channels/src/lib.rs` factory + add `ChannelType` variant in `openfang-types` +3. Add config support for `[channels.]` section + +No changes to kernel, agent loop, or API server required. The trait is the sole integration point. + +### Configuration and Policies + +```toml +[channels.telegram] +bot_token = "${TELEGRAM_BOT_TOKEN}" +default_agent = "assistant" +model_override = "gpt-4" +rate_limit = 10 +dm_policy = "allowed_only" # Respond | AllowedOnly | Ignore +group_policy = "mention_only" # All | MentionOnly | CommandsOnly | Ignore +output_format = "telegram_html" # Markdown | TelegramHtml | SlackMrkdwn | PlainText +allowed_users = ["@alice", "@bob"] +blocked_users = ["@spammer"] +``` + +Features: per-channel rate limiting, message splitting (platform size limits), user filtering, model override per channel, auto-reply templates, command prefix recognition, hot reload without daemon restart. + +## MCP Integration + +### Client Architecture + +Implemented in `crates/openfang-runtime/src/mcp.rs` using the `rmcp` SDK. + +**Three transports**: +```rust +pub enum McpTransport { + Stdio { command: String, args: Vec }, // Most common for bundled + Sse { url: String }, // Deprecated (2024-11-05) + Http { url: String }, // Current recommended (2025-03-26+) +} +``` + +**Connection lifecycle** (`McpConnection`): +1. **Connect** -- spawn subprocess or open HTTP stream, perform MCP `initialize` handshake +2. **Discover** -- call `tools/list`, convert tool schemas to internal `ToolDefinition` format +3. **Map** -- namespace tools as `mcp_{server}_{tool}`, store original names for reverse lookup +4. **Execute** -- `call_tool` sends JSON-RPC with 60s timeout + +**Key implementation detail**: The `original_names: HashMap` preserves server-side tool names because hyphens (e.g., `list-repos`) are normalized to underscores for LLM function-calling compatibility, but the MCP server expects the original name. + +### Tool Namespacing + +Every MCP tool is prefixed: `mcp_{server_name}_{tool_name}`. This prevents collisions across servers (two servers could both expose `search`). The prefix also makes tool origin transparent in logs. Provider-specific schema adaptation strips unsupported JSON Schema keys (`$schema`, `$defs`, `additionalProperties`, `title`) and inlines `$ref` references. + +### 25 Bundled Integration Templates + +Managed by `IntegrationRegistry` in `crates/openfang-extensions/src/registry.rs`. Each template is an `IntegrationTemplate` struct embedded at compile time via `bundled.rs`. The registry supports: + +- `load_bundled()` -- parse compile-time TOML templates +- `load_installed()` -- merge with `~/.openfang/integrations.toml` +- `install()` / `uninstall()` -- manage installed state +- `to_mcp_configs()` -- convert to `McpServerConfig` for kernel consumption + +**Installation flow** (`openfang add `): +1. Lookup template in `IntegrationRegistry` +2. Prompt operator for credentials (or OAuth PKCE flow) +3. Encrypt credentials in vault (AES-256-GCM) +4. Write integration config to `integrations.toml` +5. Mark as `Ready` or `Setup` based on credential completeness + +### MCP Server Mode + +OpenFang can also operate **as** an MCP server, exposing its tools to external clients. This enables bidirectional MCP: one OpenFang instance connects to another's MCP server endpoint, discovers remote tools, and invokes them. The A2A protocol layers on top of this. + +## Workflow Engine + +Implemented in `crates/openfang-kernel/src/workflow.rs`. + +### Data Model + +```rust +pub struct Workflow { + pub id: WorkflowId, + pub name: String, + pub description: String, + pub steps: Vec, + pub created_at: DateTime, +} + +pub struct WorkflowStep { + pub name: String, + pub agent: StepAgent, // ById { id } or ByName { name } + pub prompt_template: String, // Jinja-style: {{input}}, {{var_name}} + pub mode: StepMode, + pub timeout_secs: u64, + pub error_mode: ErrorMode, // Fail | Skip | Retry { max_retries } + pub output_var: Option, +} +``` + +### Five Step Modes + +| Mode | Agent Invocations | Blocking | Use Case | +|------|-------------------|----------|----------| +| `Sequential` | 1 | Yes | Linear pipeline | +| `FanOut` | N (parallel) | Yes (all) | Multi-perspective analysis | +| `Collect` | 0 (merge only) | No | Aggregate parallel outputs | +| `Conditional { condition }` | 0 or 1 | Yes | Quality gates, branching | +| `Loop { max_iterations, until }` | 1 to N | Yes | Iterative refinement | + +### Variable Interpolation + +Steps reference variables via `{{var_name}}` syntax. Resolution order: +1. Step output variables (highest, most recent wins) +2. Global workflow variables +3. Initial input (`{{input}}`) + +Missing variables leave the placeholder literal -- deliberate to prevent silent failures. + +### Execution Flow + +Each step invocation goes through the standard `run_agent_loop()`, respecting loop guards and metering. The workflow engine adds no execution capability of its own -- it only orchestrates when and how agent loops run. Fan-out uses `tokio::task::JoinSet` for structured concurrency. Each parallel agent gets a fresh session. + +### Configuration + +```toml +[[workflows]] +id = "research-pipeline" +name = "Research Pipeline" + + [[workflows.steps]] + name = "research" + agent_id = "researcher" + prompt = "Research {{topic}}" + mode = "sequential" + timeout_secs = 300 + output_var = "findings" + error_mode = "fail" +``` + +### API Surface + +8 endpoints: CRUD for workflow definitions + run/list-runs/get-run. Also accessible via CLI (`openfang workflow list|create|run`). + +## Plugin/Extension Model + +OpenFang does **not** have a traditional plugin system with dynamic loading. Instead, it uses a **four-layer composition model**: + +### Layer 1: Compile-Time (Builtins, Channel Adapters, Hands) +All 53 tools, 40 adapters, and 7 Hands are compiled into the binary. Benefits: auditability (single hash = known feature set), air-gap deployment, startup speed, integrity. + +### Layer 2: Boot-Time (MCP Servers, Bundled Skills) +MCP server connections are established and skill registries loaded during daemon boot. The tool catalog is rebuilt at this point. + +### Layer 3: Runtime (ClawHub Skills, User MCP Servers) +Skills can be installed from the ClawHub marketplace at runtime: +```bash +openfang skill install financial-analysis +# Downloads, validates Ed25519 signature, installs to ~/.openfang/skills/ +``` + +Skills declare their runtime in `SKILL.toml`: +```rust +pub struct SkillManifest { + pub skill: SkillMeta, // name, version, description, tags + pub runtime: SkillRuntimeConfig, // Python | Wasm | Node | Shell | Builtin | PromptOnly + pub tools: SkillTools, // Tool definitions + pub requirements: SkillRequirements, + pub prompt_context: Option, // For PromptOnly skills + pub source: Option, // Native | Bundled | OpenClaw | ClawHub +} +``` + +The `SkillRegistry` supports: +- `load_bundled()` -- compile-time embedded SKILL.md files +- `load_all()` -- scan `~/.openfang/skills/` directory +- `freeze()` -- lock registry in Stable mode (no new skills) +- Prompt injection scanning on all skills (even bundled, defense-in-depth) +- OpenClaw compatibility layer for cross-framework skill format + +### Layer 4: Protocol-Level (A2A, MCP Server Mode) +Cross-instance extensibility via A2A protocol and MCP server mode. + +### Skill Execution Model + +The `SkillLoader` in `crates/openfang-skills/src/loader.rs` dispatches by runtime type: + +```rust +match manifest.runtime.runtime_type { + SkillRuntime::Python => execute_python(skill_dir, entry, tool_name, input), + SkillRuntime::Node => execute_node(skill_dir, entry, tool_name, input), + SkillRuntime::Shell => execute_shell(skill_dir, entry, tool_name, input), + SkillRuntime::Wasm => Err("not yet implemented"), + SkillRuntime::Builtin => Err("handled by kernel"), + SkillRuntime::PromptOnly => Ok("instructions are in system prompt"), +} +``` + +Skills communicate via **stdin/stdout JSON**: the loader spawns a subprocess, writes `{"tool": name, "input": input}` to stdin, reads JSON from stdout. Environment is cleared (`env_clear()`) with only PATH, HOME, and PYTHONIOENCODING allowed. + +### Multi-Language Support + +- **Python**: Subprocess with `env_clear()`, JSON over stdin/stdout +- **Node.js**: Subprocess (OpenClaw compatibility layer) +- **Shell/Bash**: Subprocess with sandbox +- **WASM**: Wasmtime sandbox (fuel + epoch metering) -- declared but not yet fully implemented +- **PromptOnly**: No code execution, markdown injected into system prompt +- **Builtin**: Direct Rust functions + +## Security Model + +OpenFang implements **16 interlocking security layers** distributed across every crate: + +### Layers Most Relevant to Extensions + +| # | Layer | Purpose for Extensions | +|---|-------|----------------------| +| 1 | WASM dual-metered sandbox | Sandboxes untrusted skills (fuel + epoch + watchdog timeout) | +| 3 | Taint tracking | Prevents credential leakage into LLM prompts via `Tainted` newtype | +| 4 | Ed25519 signed manifests | Validates agent/skill manifests against supply-chain injection | +| 5 | SSRF protection | Blocks `web_fetch` from private IPs, metadata endpoints, DNS rebinding | +| 6 | Secret zeroization | `Zeroizing` auto-wipes credentials on drop | +| 7 | OFP mutual auth | HMAC-SHA256 for A2A peer authentication | +| 8 | Capability gates | RBAC tool allowlists via `CapabilityManager` (deny-by-default) | +| 11 | Subprocess sandbox | `env_clear()` + allowlist for shell/Python/Node skills | +| 12 | Prompt injection scanner | Scans user messages and tool results before LLM prompt | +| 13 | Loop guard | SHA256 cycle detection + 50-iteration hard limit | +| 15 | Path traversal prevention | `std::fs::canonicalize()` + base directory check | + +### Credential Resolution Chain + +```rust +// Priority: vault -> .env -> env var -> interactive prompt +pub fn resolve_credential(&self, key: &str) -> Result> { + // 1. AES-256-GCM encrypted vault (~/.openfang/vault.enc) + // 2. Dotenv file (~/.openfang/.env) + // 3. std::env::var + // 4. Interactive prompt (CLI last resort) +} +``` + +Master key sourced from: OS keyring (preferred) -> `OPENFANG_VAULT_KEY` env var (CI) -> manual backup. + +### Approval System + +Tools classified by risk level: +- **Low** (auto-approve): `kg_query`, `list_files` +- **Medium** (auto-approve): `web_fetch`, `web_search` +- **High** (require approval): `shell_exec`, `write_file` +- **Critical** (require approval): `docker_run`, `delete_file` + +Approval request has 60s timeout, auto-denies on timeout. 5 pending per agent, 100 recent in memory. `--yolo` flag disables all approval gates. + +### Inter-Agent Recursion Guard + +```rust +tokio::task_local! { + static AGENT_CALL_DEPTH: std::cell::Cell; +} +const MAX_AGENT_CALL_DEPTH: u32 = 5; +``` + +Each `agent_send`/`agent_spawn` increments depth. Task-local scoping ensures concurrent agent calls maintain independent counters. + +## Key Patterns for AGH + +### 1. Unified Tool Catalog with Prefix-Based Routing + +**Pattern**: All extension types (builtin, MCP, skill, inter-agent) present tools through the same `ToolDefinition` struct. The `ToolRunner` routes by name prefix. The LLM sees a single flat list. + +**AGH relevance**: AGH already has a similar concept with `AgentDriver` implementations. The key insight is that tool namespacing (e.g., `mcp_github_search`) prevents collisions and makes provenance transparent without requiring a separate routing layer. + +### 2. Trait-Based Extension Points with Crate Boundaries + +**Pattern**: Extension interfaces are traits defined in downstream crates (`ChannelAdapter` in channels crate, `KernelHandle` in runtime crate). The kernel crate implements them. This avoids circular dependencies while allowing clean extension. + +**AGH relevance**: AGH's `session/` defines `AgentDriver`, `acp/` implements it -- same pattern. Could extend this to `ChannelAdapter`-style traits for external integrations. + +### 3. TOML Manifests for Everything + +**Pattern**: Every extension type has a TOML manifest: `SKILL.toml` for skills, `HAND.toml` for hands, `config.toml` sections for channels, `integrations.toml` for MCP servers. Manifests are the declarative contract; code implements the behavior. + +**AGH relevance**: AGH already uses TOML config. The skill manifest pattern (declaring runtime, tools, requirements) is directly applicable for an AGH extension system. + +### 4. Compile-Time Composition with Runtime Escape Hatches + +**Pattern**: Core capabilities are compiled in (auditability, air-gap, speed). Runtime extensibility exists but is sandboxed (WASM, subprocess isolation, MCP transport isolation). + +**AGH relevance**: For AGH Phase 2/3, this suggests: compile critical skills into the binary, allow runtime skill installation with subprocess sandbox, and use MCP as the interop layer for third-party tools. + +### 5. Multi-Runtime Skill Execution + +**Pattern**: Skills declare their runtime type, and the loader dispatches to the appropriate executor. Communication is JSON over stdin/stdout for subprocess runtimes. Each runtime gets its own sandbox profile. + +**AGH relevance**: If AGH adds a skill/extension system, the `SkillRuntime` enum pattern (Python, Shell, WASM, PromptOnly) with stdin/stdout JSON protocol is simple and proven. The `PromptOnly` runtime (inject markdown into system prompt) is particularly elegant for knowledge-only extensions. + +### 6. Credential Vault with Zeroization + +**Pattern**: AES-256-GCM encrypted vault with OS keyring integration, `Zeroizing` wrapper that auto-wipes on drop, and a 4-tier resolution chain (vault -> .env -> env var -> interactive). + +**AGH relevance**: AGH will need credential management for MCP servers and integrations. The vault pattern with zeroization is a strong security baseline. + +### 7. Channel Adapter as Stream Abstraction + +**Pattern**: Each channel adapter returns an async `Stream`. The bridge manager polls streams. This hides all transport complexity (WebSocket, long-polling, webhooks) behind a uniform interface. + +**AGH relevance**: If AGH needs to receive messages from external platforms, the stream-based adapter pattern is the cleanest approach. The `ChannelBridgeHandle` trait pattern for breaking circular deps is also directly useful. + +### 8. Hands as Meta-Extensions + +**Pattern**: Hands compose tools + skills + prompts + schedules + requirements + settings into a named, distributable, marketplace-ready agent personality. The `HandRequirement` system with platform-specific install info provides excellent UX. + +**AGH relevance**: This is essentially what AGH sessions could evolve into with configuration presets. The HAND.toml manifest pattern (declaring required tools, configurable settings, dashboard metrics) is directly applicable for AGH agent templates. + +### 9. Workflow Engine as Orchestration Layer + +**Pattern**: The workflow engine does not add execution capability -- it only orchestrates when and how existing agent loops run. Steps are pure data (prompt template + mode + error handling). Variable interpolation connects steps. + +**AGH relevance**: If AGH adds multi-agent workflows, the step mode taxonomy (Sequential, FanOut, Collect, Conditional, Loop) covers the essential orchestration patterns. The "workflow adds no execution capability" principle keeps the system simple. + +### 10. Defense-in-Depth with Composable Security Layers + +**Pattern**: 16 security layers, each addressing a distinct attack surface, applied structurally (not optionally). Complete mediation: every tool call passes through capability check, approval check, taint check, and sandbox check. + +**AGH relevance**: AGH should plan security layers early. The minimal set for extensions: capability gates (tool allowlists), subprocess sandbox (`env_clear` + allowlist), credential isolation, and an approval system for high-risk operations. + +## Code References + +### Core Extension Infrastructure + +| File | Purpose | +|------|---------| +| `crates/openfang-runtime/src/tool_runner.rs` | Central tool dispatch, capability enforcement, taint checks | +| `crates/openfang-runtime/src/mcp.rs` | MCP client: `McpConnection`, `McpTransport`, tool namespacing | +| `crates/openfang-runtime/src/kernel_handle.rs` | `KernelHandle` trait: inter-agent operations, memory, tasks | +| `crates/openfang-kernel/src/kernel.rs` | `OpenFangKernel` struct: composition root with all subsystems | +| `crates/openfang-kernel/src/capabilities.rs` | `CapabilityManager`: RBAC tool allowlists | +| `crates/openfang-kernel/src/workflow.rs` | `WorkflowEngine`, `WorkflowStep`, `StepMode` | + +### Skills and Hands + +| File | Purpose | +|------|---------| +| `crates/openfang-skills/src/lib.rs` | `SkillManifest`, `SkillRuntime` enum, `SkillToolDef` | +| `crates/openfang-skills/src/registry.rs` | `SkillRegistry`: load/freeze/snapshot, bundled + filesystem skills | +| `crates/openfang-skills/src/loader.rs` | `execute_skill_tool`: dispatches to Python/Node/Shell/WASM runtimes | +| `crates/openfang-skills/src/verify.rs` | `SkillVerifier`: prompt injection scanning | +| `crates/openfang-hands/src/lib.rs` | `HandDefinition`, `HandInstance`, `HandSetting`, `HandRequirement` | +| `crates/openfang-hands/src/registry.rs` | `HandRegistry`: load, activate, deactivate hands | + +### Channel Adapters + +| File | Purpose | +|------|---------| +| `crates/openfang-channels/src/types.rs` | `ChannelAdapter` trait, `ChannelMessage`, `ChannelContent` | +| `crates/openfang-channels/src/bridge.rs` | `BridgeManager`, `ChannelBridgeHandle` trait, chat commands | +| `crates/openfang-channels/src/router.rs` | `AgentRouter`: 5-level priority routing | +| `crates/openfang-channels/src/telegram.rs` (etc.) | Individual adapter implementations | + +### Extensions and Security + +| File | Purpose | +|------|---------| +| `crates/openfang-extensions/src/lib.rs` | `IntegrationTemplate`, `McpTransportTemplate`, `RequiredEnvVar` | +| `crates/openfang-extensions/src/registry.rs` | `IntegrationRegistry`: bundled + installed MCP templates | +| `crates/openfang-extensions/src/credentials.rs` | `CredentialResolver`: vault -> .env -> env var chain | +| `crates/openfang-extensions/src/vault.rs` | `CredentialVault`: AES-256-GCM encrypted storage | +| `crates/openfang-extensions/src/installer.rs` | `install_integration`: one-click MCP server setup | +| `crates/openfang-extensions/src/oauth.rs` | OAuth2 PKCE flow for Google/GitHub/Slack | +| `crates/openfang-types/src/taint.rs` | `TaintedValue`, `TaintLabel`, `TaintSink` | +| `crates/openfang-types/src/manifest_signing.rs` | Ed25519 manifest signing/verification | +| `crates/openfang-runtime/src/subprocess_sandbox.rs` | `env_clear()` + allowlist subprocess isolation | +| `crates/openfang-runtime/src/sandbox.rs` | `WasmSandbox`: Wasmtime fuel + epoch metering | +| `crates/openfang-runtime/src/audit.rs` | `AuditLog`: Merkle hash-chain append-only log | + +### Configuration + +| File | Purpose | +|------|---------| +| `openfang.toml.example` | Example config showing all sections | +| `crates/openfang-types/src/config.rs` | `KernelConfig`, `ChannelOverrides`, `DmPolicy`, `GroupPolicy` | +| `crates/openfang-types/src/capability.rs` | `Capability` enum, `capability_matches` | + +### Workspace + +| File | Purpose | +|------|---------| +| `Cargo.toml` (root) | 14-crate workspace definition | +| `crates/openfang-wire/` | OFP peer network protocol, HMAC-SHA256 mutual auth | diff --git a/.compozy/tasks/ext-architecture/analysis_pi_mono.md b/.compozy/tasks/ext-architecture/analysis_pi_mono.md new file mode 100644 index 000000000..022e43f29 --- /dev/null +++ b/.compozy/tasks/ext-architecture/analysis_pi_mono.md @@ -0,0 +1,470 @@ +# Pi-Mono Extension Architecture Analysis + +## Overview + +Pi-Mono is a TypeScript monorepo (`github.com/badlogic/pi-mono`) created by Mario Zechner for building AI coding agents. It comprises seven packages organized in three tiers: a foundation LLM API (`pi-ai`), an infrastructure agent runtime (`pi-agent-core`, `pi-tui`), and application-tier products (`pi-coding-agent`, `pi-web-ui`, `pi-mom`, `pi-pods`). All packages are published under `@mariozechner` npm scope with lockstep versioning. + +The system's extension architecture is built on a philosophy of **aggressive extensibility**: a minimal core (4 tools, <1000-token system prompt) with deep hooks at every phase of the agent lifecycle. Extensions, skills, prompt templates, and themes form four customization axes, all distributable as "Pi Packages" via npm or git. + +**Key architectural principle**: Pi does NOT use MCP, sub-agents, permission popups, plan mode, or built-in todos. These are all delegated to the extension system, proving the extension API's completeness. + +--- + +## Extension & Customization System + +### Extension Entry Point + +Every extension is a TypeScript module that exports a single default function receiving an `ExtensionAPI` object: + +```typescript +import type { ExtensionAPI } from "@mariozechner/pi-coding-agent"; + +export default function (pi: ExtensionAPI) { + // all registration happens here +} +``` + +There is no plugin DSL, no YAML manifest for capabilities, and no restricted sandbox language. Extensions import the same packages the agent uses. + +**Source**: `packages/coding-agent/src/core/extensions/types.ts` -- `ExtensionFactory` type (line 1273): +```typescript +export type ExtensionFactory = (pi: ExtensionAPI) => void | Promise; +``` + +### ExtensionAPI Surface + +The `ExtensionAPI` interface (types.ts lines 986-1211) is deliberately flat and exposes: + +| Category | Methods | +|----------|---------| +| Event subscription | `pi.on(eventName, handler)` -- 25+ strongly-typed event types | +| Tool registration | `pi.registerTool(toolDef)` | +| Command registration | `pi.registerCommand(name, opts)` | +| Shortcut registration | `pi.registerShortcut(key, opts)` | +| Flag registration | `pi.registerFlag(name, opts)` / `pi.getFlag(name)` | +| Message rendering | `pi.registerMessageRenderer(customType, renderer)` | +| Provider registration | `pi.registerProvider(name, config)` / `pi.unregisterProvider(name)` | +| Actions | `sendMessage`, `sendUserMessage`, `appendEntry`, `exec` | +| Tool management | `getActiveTools`, `getAllTools`, `setActiveTools` | +| Model control | `setModel`, `getThinkingLevel`, `setThinkingLevel` | +| Session metadata | `setSessionName`, `getSessionName`, `setLabel` | +| Inter-extension comms | `pi.events` (shared EventBus) | + +### Extension Loading & Discovery + +Extensions auto-discover from four filesystem locations: + +| Location | Scope | +|----------|-------| +| `.pi/extensions/*.ts` | Project-local | +| `.pi/extensions/*/index.ts` | Project-local (subdirectory) | +| `~/.pi/agent/extensions/*.ts` | Global (all projects) | +| `~/.pi/agent/extensions/*/index.ts` | Global (subdirectory) | + +Additional paths via `settings.json` under `extensions` or `packages` arrays. The `-e ./path.ts` CLI flag loads a single extension for testing. + +**Source**: `packages/coding-agent/src/core/extensions/loader.ts` + +The loader (`discoverAndLoadExtensions()`, loader.ts line 511) resolves extension files, creates a `jiti` transpiler with virtual modules for bundled packages, and calls each extension's default export. Virtual modules ensure extensions can import core packages without installing them: + +```typescript +const VIRTUAL_MODULES: Record = { + "@sinclair/typebox": _bundledTypebox, + "@mariozechner/pi-agent-core": _bundledPiAgentCore, + "@mariozechner/pi-tui": _bundledPiTui, + "@mariozechner/pi-ai": _bundledPiAi, + "@mariozechner/pi-ai/oauth": _bundledPiAiOauth, + "@mariozechner/pi-coding-agent": _bundledPiCodingAgent, +}; +``` + +### Extension Runtime Lifecycle + +The runtime lifecycle proceeds in stages (managed by `ExtensionRunner` in runner.ts): + +1. **Load phase**: Extensions loaded sequentially. `createExtensionRuntime()` creates a runtime with throwing stubs for action methods. Registration calls (tools, commands, etc.) work immediately. Action calls (sendMessage, etc.) throw. + +2. **`bindCore(actions, contextActions)`**: Called by `AgentSession` after initialization. Flushes pending provider registrations and wires real action implementations into the runtime. All extension API objects reference the shared runtime, so wiring is automatic. + +3. **`bindCommandContext(actions)`**: Wires navigation actions (`newSession`, `fork`, `navigateTree`, `switchSession`, `reload`). Only called when UI mode is present. + +4. **Event dispatch**: Multiple specialized emit methods: `emit(event)`, `emitToolCall(event)`, `emitToolResult(event)`, `emitBeforeAgentStart(event)`, `emitContext(messages)`, `emitInput(text, images, source)`, `emitResourcesDiscover(cwd, reason)`. + +### Event System + +25+ strongly-typed events organized into categories: + +- **Session lifecycle**: `session_start`, `session_before_switch`, `session_before_fork`, `session_before_compact`, `session_compact`, `session_shutdown`, `session_before_tree`, `session_tree` +- **Agent loop**: `before_agent_start`, `agent_start`, `agent_end`, `turn_start`, `turn_end`, `context`, `before_provider_request` +- **Messages**: `message_start`, `message_update`, `message_end` +- **Tools**: `tool_execution_start`, `tool_execution_update`, `tool_execution_end`, `tool_call` (can block!), `tool_result` (can modify, chains like middleware) +- **Input**: `input` (transform/handle user input before agent) +- **Resources**: `resources_discover` (contribute skill/prompt/theme paths) +- **Model**: `model_select` +- **Bash**: `user_bash` + +The `tool_call` event is notable: returning `{ block: true, reason: "..." }` prevents execution. The `event.input` is mutable -- mutations propagate to subsequent handlers. No re-validation after mutation. This enables permission gates, path protection, and input rewriting. + +The `tool_result` event chains like middleware: each handler returns patches for `content`, `details`, or `isError`, merged sequentially. + +### Custom Tools + +Tools are registered via `pi.registerTool()` with TypeBox schema validation: + +```typescript +interface ToolDefinition { + name: string; + label: string; + description: string; + parameters: TParams; // TypeBox schema + promptSnippet?: string; // one-liner for system prompt + promptGuidelines?: string[]; // bullets for Guidelines section + prepareArguments?: fn; // pre-validation shim + execute: fn; // core logic + renderCall?: fn; // custom TUI for arguments + renderResult?: fn; // custom TUI for results +} +``` + +**Source**: types.ts lines 369-405 + +The `execute` function receives: `toolCallId`, validated params, `AbortSignal`, `onUpdate` callback (streaming progress), and `ExtensionContext`. + +Extensions can **override built-in tools** by registering a tool with the same name. Rendering inheritance: if override omits `renderCall`/`renderResult`, built-in renderers are used. + +Built-in tools also expose pluggable operations interfaces (`ReadOperations`, `WriteOperations`, `BashOperations`) for delegating execution to remote systems. + +### Custom Commands + +Commands are slash-prefixed (`/mycommand args`), user-initiated (not LLM-invoked). Handlers receive `ExtensionCommandContext` with session control methods: `waitForIdle()`, `newSession()`, `fork()`, `navigateTree()`, `switchSession()`, `reload()`, `shutdown()`. + +Commands bypass skill expansion. If multiple extensions register same name, numeric suffixes assigned (`/review:1`, `/review:2`). Built-in commands always win. + +### State Persistence + +Two mechanisms: +1. **Custom entries**: `pi.appendEntry(customType, data)` -- persisted in session file, survives restarts, NOT sent to LLM +2. **Tool result details**: The `details` field in tool results is stored and replayed during restore/navigation + +--- + +## Provider Plugin Pattern + +### API Registry Architecture + +**Source**: `packages/ai/src/api-registry.ts` + +The API registry is a `Map` keyed by API type string. Each entry holds two streaming functions: `stream` (provider-specific options) and `streamSimple` (unified options). Runtime type checking enforces model's `api` field matches. + +```typescript +export function registerApiProvider( + provider: ApiProvider, + sourceId?: string, +): void { + apiProviderRegistry.set(provider.api, { provider: { ... }, sourceId }); +} +``` + +Operations: `getApiProvider(api)`, `getApiProviders()`, `unregisterApiProviders(sourceId)`, `clearApiProviders()`. + +### Lazy Provider Registration + +**Source**: `packages/ai/src/providers/register-builtins.ts` + +Each provider uses lazy loading via nullish assignment to avoid pulling in heavy SDKs at startup: + +```typescript +function loadAnthropicProviderModule() { + anthropicProviderModulePromise ||= import("./anthropic.js").then((module) => ({ + stream: module.streamAnthropic, + streamSimple: module.streamSimpleAnthropic, + })); + return anthropicProviderModulePromise; +} +``` + +If dynamic import fails, the error is encoded in the stream (stopReason: "error") rather than thrown -- preserving the stream protocol contract. + +`registerBuiltInApiProviders()` runs as a side effect at module load, registering all 10 built-in API types: +- `anthropic-messages`, `openai-completions`, `openai-responses`, `azure-openai-responses`, `openai-codex-responses`, `mistral-conversations`, `google-generative-ai`, `google-gemini-cli`, `google-vertex`, `bedrock-converse-stream` + +### Extension-based Provider Registration + +Extensions register providers at runtime via `pi.registerProvider(name, config)`: + +```typescript +interface ProviderConfig { + baseUrl?: string; + apiKey?: string; + api?: Api; + streamSimple?: (model, context, options?) => AssistantMessageEventStream; + headers?: Record; + authHeader?: boolean; + models?: ProviderModelConfig[]; + oauth?: { name, login, refreshToken, getApiKey, modifyModels? }; +} +``` + +**Source**: types.ts lines 1218-1270 + +Calls during load phase are queued in `pendingProviderRegistrations` and flushed by `bindCore()`. Post-initialization calls take effect immediately. + +The `Api` type is `KnownApi | (string & {})` -- any string is valid at runtime, allowing custom API types without modifying core type definitions. + +### Custom models.json + +Users add custom providers/models via `~/.pi/agent/models.json`. Three modes: +- **Proxy mode**: just `baseUrl`, redirects existing models through proxy +- **Upsert mode**: `models` array alongside built-in provider, replaces matching IDs +- **Model overrides**: `modelOverrides` field for tweaking specific built-ins + +Dynamic value resolution: `apiKey`/`headers` support shell commands (`!` prefix), env vars, or literals. + +### OAuth Provider System + +Five built-in OAuth providers implementing `OAuthProviderInterface`: +- Anthropic (PKCE + callback server, port 53692) +- OpenAI Codex (PKCE + callback server, port 1455) +- GitHub Copilot (device code flow) +- Gemini CLI (Google Cloud OAuth) +- Antigravity (specialized Google Cloud) + +Extensions register custom OAuth via `pi.registerProvider()` with an `oauth` config object. + +--- + +## Skills & Prompt Templates + +### Skills (Agent Skills Standard) + +Skills implement the [agentskills.io](https://agentskills.io/specification) standard. A skill is a `SKILL.md` file with YAML frontmatter and Markdown instructions. + +**Source**: `packages/coding-agent/src/core/skills.ts` + +Frontmatter schema: +```yaml +name: my-skill # lowercase a-z, 0-9, hyphens. Max 64 chars. +description: ... # Max 1024 chars. Determines auto-invocation. +disable-model-invocation: false # Optional +``` + +Discovery locations (in precedence order): +1. `~/.pi/agent/skills/` -- global user +2. `~/.agents/skills/` -- cross-agent compatibility +3. `.pi/skills/` -- project-specific +4. `.agents/skills/` -- project agents (scanned up through parents) + +Project resources override global ones on name collision. + +**Progressive disclosure execution model**: +1. Only skill `name` and `description` injected into system prompt +2. Agent uses `read` tool to fetch full `SKILL.md` on demand +3. Users can force via `/skill:name [args]` + +System prompt format (skills.ts `formatSkillsForPrompt`, line 339): +```xml + + + my-skill + ... + /path/to/SKILL.md + + +``` + +### Prompt Templates + +Markdown files with bash-style variable substitution: +- `$1`, `$2`, ... -- positional arguments +- `$@` / `$ARGUMENTS` -- all arguments +- `${@:N}` -- arguments from index N +- `${@:N:L}` -- L arguments from index N + +Discovery: `~/.pi/agent/prompts/` (global), `.pi/prompts/` (project). Project overrides global. + +### Skills vs Extensions vs MCP (Design Philosophy) + +| Capability | Skills | Extensions | MCP (rejected) | +|-----------|--------|------------|----------------| +| Teach agent procedures | Yes | No | N/A | +| Register custom tools | No | Yes | Yes | +| Requires TypeScript | No | Yes | Yes | +| Context cost | On-demand | Per-tool definition | Always loaded | +| Auto-invoked by agent | Yes | N/A | N/A | + +Pi explicitly rejects MCP due to context bloat (13,700-18,000 tokens per server), composability limits (CLI pipes are more capable), and extensibility friction. + +--- + +## Monorepo Package Architecture + +### Three-Tier Layer Diagram + +``` +APPLICATIONS: pi-coding-agent | pi-web-ui | pi-mom | pi-pods + | | | | +INFRASTRUCTURE: pi-agent-core | pi-tui (standalone) + | +FOUNDATION: pi-ai (standalone) +``` + +### Package Inventory + +| Package | npm name | Purpose | +|---------|----------|---------| +| `packages/ai` | `@mariozechner/pi-ai` | Unified LLM API: 15+ providers, 10 API protocols, streaming, model catalog, cost tracking, OAuth | +| `packages/agent` | `@mariozechner/pi-agent-core` | Minimal agent runtime: turn-based loop, tool execution (sequential/parallel), lifecycle hooks | +| `packages/coding-agent` | `@mariozechner/pi-coding-agent` | CLI (`pi`): 3 run modes (interactive/print/RPC), extension system, session management, compaction | +| `packages/tui` | `@mariozechner/pi-tui` | Terminal UI: differential rendering, components, editor, keyboard handling | +| `packages/web-ui` | `@mariozechner/pi-web-ui` | Web components: chat panel, messages, tool renderers | +| `packages/mom` | `@mariozechner/pi-mom` | Slack bot: delegates to coding agent, per-channel stores, sandbox execution | +| `packages/pods` | `@mariozechner/pi` | GPU pod management: vLLM deployment, SSH-based operations | + +### Dependency Rules + +- Foundation packages (`pi-ai`, `pi-tui`) have zero internal dependencies +- `pi-agent-core` depends only on `pi-ai` +- `pi-coding-agent` pulls all three lower layers +- Leaf packages depend on subsets -- consumers can use each layer independently +- All packages lockstep versioned (same version number) + +### Build System + +- npm workspaces (not Yarn/pnpm/Turborepo) +- Sequential build respecting dependency graph: `tui -> ai -> agent -> coding-agent -> mom -> web-ui -> pods` +- TypeScript compiled with `tsgo` (Go port of tsc); web-ui uses standard `tsc` +- Biome for linting/formatting +- Model catalog auto-generated from provider APIs at build time +- Vitest for testing (pi-ai, pi-agent-core, pi-coding-agent); Node test runner for pi-tui + +### Module Export Patterns + +`pi-ai` uses subpath exports for selective imports (e.g., `@mariozechner/pi-ai/anthropic`). This enables tree-shaking -- unused provider SDKs are eliminated from bundles. + +`pi-coding-agent` exposes core logic and a hooks API for extension developers. + +--- + +## Security Model + +**There is no sandbox.** Extensions run with full trust in the same Node.js process. No capability-based sandbox, no permission manifest, no code review gate. This is a deliberate choice: pi targets developers already running arbitrary code. + +Practical mitigations: +- **Scope isolation**: project-local extensions in `.pi/extensions/` visible only to that project +- **Permission gates via tool_call**: extensions CAN add confirmation prompts (but this is advisory, not a security boundary) +- **Package provenance**: npm audit, git commit history +- **Conflict resolution rules**: + - Reserved keybindings cannot be overridden by extensions + - Built-in commands always win over extension commands + - Extension-vs-extension: first-registered-wins, numeric suffixes for duplicates + - Provider overrides: `registerProvider()` may override built-ins; `unregisterProvider()` restores them + +--- + +## Key Patterns for AGH + +### 1. Single Entry Point Factory Pattern +Every extension exports one default function receiving a typed API object. No manifest files, no class inheritance, no interfaces to implement. This is the lowest-friction extension model possible. + +**AGH implication**: Go equivalent could be a `func(api *ExtensionAPI)` entry point loaded via plugin or embedded scripting. + +### 2. Typed Event Bus with Lifecycle Hooks +25+ strongly-typed events covering every phase: session start/stop, agent loop, tool execution, message streaming, context modification, input transformation. Events can block operations (`tool_call`), modify data in flight (`tool_result`, `context`, `input`), or inject content (`before_agent_start`). + +**AGH implication**: The Notifier pattern in AGH already provides fan-out. Adding typed hook points at session/tool/agent boundaries would enable similar extensibility. Critical hooks to replicate: +- Before/after tool execution (with block/modify capability) +- Context modification before LLM call +- Input transformation +- Session lifecycle (start, compact, shutdown) + +### 3. Deferred Binding / Two-Phase Initialization +Extensions load and register tools/commands/events synchronously, but action methods (sendMessage, etc.) throw until `bindCore()` is called. Provider registrations are queued during load, flushed at bind time. + +**AGH implication**: For Go, this maps to a two-phase init: (1) collect registrations during plugin load, (2) wire in real implementations once daemon services are ready. Use interface stubs or channel-based deferred execution. + +### 4. Virtual Module Injection +Core packages are bundled into the binary and provided as virtual modules to extensions, eliminating dependency installation for simple extensions. + +**AGH implication**: If using embedded scripting (Lua, Starlark, etc.), provide standard library modules pre-loaded. If using Go plugins, provide a well-defined SDK package. + +### 5. Progressive Disclosure for Skills +Only skill name+description are in the system prompt (few tokens). Full content loaded on-demand via the agent's own `read` tool. This preserves context window while enabling unlimited skills. + +**AGH implication**: AGH's skills system should follow this pattern. Inject a compact skill index into the system prompt; let the agent load full skill content when needed. + +### 6. Pi Package Distribution Model +Extensions, skills, prompts, and themes bundled in packages with a `package.json` `"pi"` key: +```json +{ "pi": { "extensions": ["./extensions"], "skills": ["./skills"], "prompts": ["./prompts"], "themes": ["./themes"] } } +``` + +Sources: npm (`npm:@scope/pkg`), git (`git:github.com/user/repo`), local paths. Pattern filtering with glob, `!` exclude, `+` force-include, `-` force-exclude. + +**AGH implication**: AGH could use a similar manifest-based package format. TOML instead of JSON for consistency. Distribution via git (primary) and Go module paths. + +### 7. Tool Override with Renderer Inheritance +Extensions can replace built-in tools by name. If the override omits custom renderers, built-in renderers are used. Built-in tools expose pluggable operations interfaces for delegation. + +**AGH implication**: Allow extensions to wrap or replace ACP tool implementations while preserving observability/rendering. + +### 8. No MCP -- CLI Tools + Skills Instead +Pi's alternative to MCP: teach the agent about CLI tools via skill files. Token cost is on-demand (only when relevant) vs MCP's always-loaded tool definitions. Shell composition (pipes, redirects) provides superior composability. + +**AGH implication**: AGH already has skills. Consider making skills the primary extensibility mechanism for agent capabilities, with extensions reserved for runtime behavior modification. + +### 9. Conflict Resolution is Deterministic +- Shortcuts: reserved cannot be overridden, non-reserved generate warning +- Commands: built-in wins, extension duplicates get numeric suffixes +- Tools: first-registered wins, warning for built-in conflicts +- Providers: explicit override/restore semantics + +**AGH implication**: Define clear precedence rules for every registrable resource type. + +### 10. Settings Cascade: Global + Project +Global settings (`~/.pi/agent/`), project settings (`.pi/`). Project overrides global. Team sharing via committed `.pi/settings.json`. + +**AGH implication**: AGH already has workspace-scoped config. Ensure extension/skill discovery follows the same cascade with project taking precedence. + +--- + +## Code References + +### Extension System +- **Extension types & API**: `packages/coding-agent/src/core/extensions/types.ts` (1450 lines, comprehensive type definitions) +- **Extension loader**: `packages/coding-agent/src/core/extensions/loader.ts` (discovery, jiti transpiler, virtual modules) +- **Extension runner**: `packages/coding-agent/src/core/extensions/runner.ts` (lifecycle, event dispatch, conflict resolution) +- **Extension index**: `packages/coding-agent/src/core/extensions/index.ts` (public API exports) + +### Provider System +- **API registry**: `packages/ai/src/api-registry.ts` (provider registration, type-safe dispatch) +- **Lazy registration**: `packages/ai/src/providers/register-builtins.ts` (lazy loading pattern, 10 built-in providers) +- **Provider modules**: `packages/ai/src/providers/*.ts` (one file per provider: anthropic.ts, google.ts, openai-completions.ts, etc.) + +### Skills System +- **Skill loader**: `packages/coding-agent/src/core/skills.ts` (discovery, validation, prompt formatting) +- **Package manager**: `packages/coding-agent/src/core/package-manager.ts` (npm/git/local source resolution) +- **Resource loader**: `packages/coding-agent/src/core/resource-loader.ts` (aggregation pipeline) + +### Agent Core +- **Agent types**: `packages/agent/src/types.ts` (StreamFn, tool execution modes, hook interfaces) +- **Agent loop**: `packages/agent/src/agent-loop.ts` (core turn-by-turn cycle) + +### Examples (60+ extension examples) +- **Minimal tool**: `packages/coding-agent/examples/extensions/hello.ts` +- **Custom provider with OAuth**: `packages/coding-agent/examples/extensions/custom-provider-anthropic/` (full streaming implementation) +- **Permission gate**: `packages/coding-agent/examples/extensions/confirm-destructive.ts` +- **Git checkpoint**: `packages/coding-agent/examples/extensions/git-checkpoint.ts` +- **Custom compaction**: `packages/coding-agent/examples/extensions/custom-compaction.ts` +- **Dynamic resources**: `packages/coding-agent/examples/extensions/dynamic-resources/` + +### Configuration +- **Root package.json**: `package.json` (workspace config, build scripts) +- **Pi package manifest**: `"pi"` key in `package.json` (extensions, skills, prompts, themes arrays) +- **Settings cascade**: `~/.pi/agent/settings.json` (global) + `.pi/settings.json` (project) + +### Wiki Documentation +- `/Users/pedronauck/dev/knowledge/pi-mono/wiki/concepts/Extension and Customization System.md` +- `/Users/pedronauck/dev/knowledge/pi-mono/wiki/concepts/Design Philosophy and Extensibility.md` +- `/Users/pedronauck/dev/knowledge/pi-mono/wiki/concepts/Pi Skills and Prompt Templates.md` +- `/Users/pedronauck/dev/knowledge/pi-mono/wiki/concepts/Provider and Model System.md` +- `/Users/pedronauck/dev/knowledge/pi-mono/wiki/concepts/Pi Monorepo Architecture.md` diff --git a/.compozy/tasks/ext-architecture/task_01.md b/.compozy/tasks/ext-architecture/task_01.md new file mode 100644 index 000000000..e3c65d9fe --- /dev/null +++ b/.compozy/tasks/ext-architecture/task_01.md @@ -0,0 +1,72 @@ +--- +status: pending +title: Minimal Tool struct and ToolProvider interface +type: backend +complexity: low +dependencies: [] +--- + +# Task 01: Minimal Tool struct and ToolProvider interface + +## Overview + +Create the foundational `Tool` struct and `ToolProvider` interface in a new `internal/tools/` package. This grounds the existing hook tool dispatch (`tool.pre_call`, `tool.post_call`, `tool.post_error`) which already operates against tool semantics (ToolName, ToolNamespace matchers) that have no corresponding data type. Extensions will later implement `ToolProvider` to register tools with AGH. + + +- ALWAYS READ the PRD and TechSpec before starting +- REFERENCE TECHSPEC for implementation details — do not duplicate here +- FOCUS ON "WHAT" — describe what needs to be accomplished, not how +- MINIMIZE CODE — show code only to illustrate current structure or problem areas +- TESTS REQUIRED — every task MUST include tests in deliverables + + + +- MUST create `internal/tools/` package with `Tool` struct containing `Name`, `Description`, `InputSchema json.RawMessage`, `ReadOnly bool`, and `Source ToolSource` +- MUST define `ToolSource` enum with values: `ToolSourceBuiltin`, `ToolSourceMCP`, `ToolSourceExtension`, `ToolSourceDynamic` +- MUST define `ToolProvider` interface with `Tools(ctx context.Context) ([]Tool, error)` method +- MUST include compile-time interface verification +- MUST ensure `Tool` JSON serialization matches the hook payload field names for `ToolCallRef` in `internal/hooks/` + + +## Subtasks +- [ ] 1.1 Create `internal/tools/` package with `Tool` struct and `ToolSource` enum +- [ ] 1.2 Define `ToolProvider` interface +- [ ] 1.3 Verify JSON serialization compatibility with existing hook `ToolCallRef` payload fields +- [ ] 1.4 Write unit tests for Tool serialization and ToolSource enum + +## Implementation Details + +New package `internal/tools/` with two files: `tool.go` (types) and `tool_test.go` (tests). + +See TechSpec "Core Interfaces" section for the `Tool` struct and `ToolProvider` interface definitions. + +### Relevant Files +- `internal/hooks/payloads.go` — Contains `ToolPreCallPayload`, `ToolPostCallPayload` with tool-related fields that `Tool` must be compatible with +- `internal/hooks/types.go` — `HookMatcher` has `ToolName`, `ToolNamespace`, `ToolReadOnly` fields that reference tool semantics +- `internal/hooks/dispatch.go` — Tool dispatch methods that will eventually consume `Tool` types + +### Dependent Files +- `internal/extension/manager.go` — Will use `ToolProvider` to collect tools from extensions (future task) + +### Related ADRs +- [ADR-005: Extension Three-Dimensional Package Model](adrs/adr-005.md) — Tools are part of the "capabilities" dimension + +## Deliverables +- New `internal/tools/tool.go` with `Tool` struct, `ToolSource` enum, `ToolProvider` interface +- Unit tests with 80%+ coverage **(REQUIRED)** + +## Tests +- Unit tests: + - [ ] `Tool` struct JSON marshaling produces expected field names matching hook payloads + - [ ] `Tool` struct JSON unmarshaling from hook-compatible JSON succeeds + - [ ] `ToolSource` string values are correct (`builtin`, `mcp`, `extension`, `dynamic`) + - [ ] `ToolSource` validation rejects unknown values + - [ ] Compile-time interface verification for `ToolProvider` +- Test coverage target: >=80% +- All tests must pass + +## Success Criteria +- All tests passing +- Test coverage >=80% +- `internal/tools/` package exists and compiles +- `make verify` passes diff --git a/.compozy/tasks/ext-architecture/task_02.md b/.compozy/tasks/ext-architecture/task_02.md new file mode 100644 index 000000000..4535631a0 --- /dev/null +++ b/.compozy/tasks/ext-architecture/task_02.md @@ -0,0 +1,102 @@ +--- +status: pending +title: Shared subprocess lifecycle package +type: backend +complexity: high +dependencies: [] +--- + +# Task 02: Shared subprocess lifecycle package + +## Overview + +Extract reusable subprocess lifecycle primitives from `internal/acp/client.go` into a new `internal/subprocess/` package. This provides the foundation for both ACP agent communication and extension subprocess management. The package handles process spawning, JSON-RPC 2.0 framing over stdio, initialize handshake with capability negotiation, health monitoring, and graceful shutdown with signal escalation — all conforming to the protocol spec in `_protocol.md`. + + +- ALWAYS READ the PRD and TechSpec before starting +- REFERENCE TECHSPEC for implementation details — do not duplicate here +- FOCUS ON "WHAT" — describe what needs to be accomplished, not how +- MINIMIZE CODE — show code only to illustrate current structure or problem areas +- TESTS REQUIRED — every task MUST include tests in deliverables + + + +- MUST create `internal/subprocess/` package with `Process` struct managing a single subprocess +- MUST implement `Launch(ctx, LaunchConfig) (*Process, error)` for process spawning with platform-specific process group setup +- MUST implement bidirectional JSON-RPC 2.0 framing over stdin/stdout (one JSON object per line) +- MUST implement `Call(ctx, method, params, result) error` for outbound requests +- MUST implement `HandleMethod(method, handler)` for inbound request routing +- MUST implement `Shutdown(ctx) error` with cooperative drain + signal escalation (SIGTERM → wait → SIGKILL) per protocol spec section 8 +- MUST implement initialize handshake per protocol spec section 4 (capability negotiation, version check) +- MUST implement health check probing per protocol spec section 7 (interval, timeout, unhealthy threshold) +- MUST evaluate `sourcegraph/jsonrpc2` as the JSON-RPC library — use it if suitable, otherwise implement minimal framing +- MUST refactor `internal/acp/client.go` to import shared subprocess primitives where applicable without breaking existing ACP tests +- MUST NOT break any existing tests in `internal/acp/` + + +## Subtasks +- [ ] 2.1 Create `internal/subprocess/` package with `Process` struct and `LaunchConfig` +- [ ] 2.2 Implement JSON-RPC 2.0 transport layer (line-delimited, bidirectional, multiplexed) +- [ ] 2.3 Implement initialize handshake with capability negotiation per protocol spec section 4 +- [ ] 2.4 Implement health check probing with configurable interval, timeout, and unhealthy threshold +- [ ] 2.5 Implement graceful shutdown with signal escalation per protocol spec section 8 +- [ ] 2.6 Refactor `internal/acp/client.go` to use shared subprocess primitives where possible +- [ ] 2.7 Write unit and integration tests for the subprocess lifecycle + +## Implementation Details + +New package `internal/subprocess/` with files: `process.go`, `transport.go`, `handshake.go`, `health.go`, `signals.go`. + +See TechSpec "Core Interfaces" section for `Process` struct. See `_protocol.md` sections 1-4, 7-8 for normative wire-level contract. + +The ACP refactor (subtask 2.6) should be incremental — extract what's cleanly shareable without forcing ACP into the extension protocol shape. The ACP uses `coder/acp-go-sdk` for its own JSON-RPC; the shared package provides an independent framing layer. + +### Relevant Files +- `internal/acp/client.go` — Current subprocess lifecycle (Start, Stop, signal handling) to extract from +- `internal/acp/process_tree_unix.go` — Platform-specific process group setup (Setpgid, SIGTERM/SIGKILL) +- `internal/acp/process_tree_windows.go` — Windows process termination +- `internal/acp/types.go` — `StartOpts`, `AgentProcess` types +- `internal/procutil/procutil.go` — Process alive check and signal helpers + +### Dependent Files +- `internal/acp/client.go` — Will be refactored to import shared subprocess primitives +- `internal/extension/manager.go` — Will use subprocess package to manage extension processes (task 06) +- `internal/extension/host_api.go` — Will use subprocess transport for Host API (task 07) + +### Related ADRs +- [ADR-004: Generalize ACP as Subprocess Extension Protocol](adrs/adr-004.md) — This task implements the shared lifecycle +- [ADR-001: Two-Tier Extension Model](adrs/adr-001.md) — L3 subprocess tier + +## Deliverables +- New `internal/subprocess/` package with process management, JSON-RPC transport, handshake, health, signals +- Refactored `internal/acp/client.go` using shared primitives where applicable +- Unit tests with 80%+ coverage **(REQUIRED)** +- Integration tests for subprocess launch → handshake → call → shutdown lifecycle **(REQUIRED)** + +## Tests +- Unit tests: + - [ ] `Launch()` spawns process and connects stdin/stdout + - [ ] `Call()` sends JSON-RPC request and receives response + - [ ] `Call()` with context cancellation returns error before timeout + - [ ] `HandleMethod()` routes inbound requests to correct handler + - [ ] Initialize handshake succeeds with compatible versions + - [ ] Initialize handshake fails with `-32602` for unsupported protocol version + - [ ] Health check marks extension unhealthy after 2 consecutive probe failures + - [ ] Health check with `healthy: false` response marks unhealthy immediately + - [ ] Shutdown sends cooperative request then escalates signals + - [ ] Shutdown SIGKILL after timeout if process doesn't exit + - [ ] JSON-RPC framing handles one JSON object per line correctly + - [ ] Messages exceeding 10 MiB are rejected +- Integration tests: + - [ ] End-to-end: launch test subprocess → handshake → call → shutdown + - [ ] Crash recovery: subprocess exits unexpectedly → Process detects exit + - [ ] Concurrent requests: multiple outstanding requests resolve correctly +- Test coverage target: >=80% +- All existing `internal/acp/` tests must continue passing + +## Success Criteria +- All tests passing +- Test coverage >=80% +- `internal/subprocess/` package exists and compiles +- `internal/acp/` tests still pass after refactor +- `make verify` passes diff --git a/.compozy/tasks/ext-architecture/task_03.md b/.compozy/tasks/ext-architecture/task_03.md new file mode 100644 index 000000000..0aea085d2 --- /dev/null +++ b/.compozy/tasks/ext-architecture/task_03.md @@ -0,0 +1,93 @@ +--- +status: pending +title: Extension manifest parser (TOML and JSON) +type: backend +complexity: medium +dependencies: [] +--- + +# Task 03: Extension manifest parser (TOML and JSON) + +## Overview + +Create the extension manifest parser in a new `internal/extension/` package. The parser reads `extension.toml` (primary) or `extension.json` (fallback), validates the schema, and produces a `Manifest` struct. Manifest-first discovery is the foundation of the extension loading pipeline — extensions can be listed and validated without executing any code. + + +- ALWAYS READ the PRD and TechSpec before starting +- REFERENCE TECHSPEC for implementation details — do not duplicate here +- FOCUS ON "WHAT" — describe what needs to be accomplished, not how +- MINIMIZE CODE — show code only to illustrate current structure or problem areas +- TESTS REQUIRED — every task MUST include tests in deliverables + + + +- MUST create `internal/extension/` package with `manifest.go` containing the `Manifest` struct and related config types +- MUST support parsing `extension.toml` using `github.com/BurntSushi/toml` (AGH convention) +- MUST support parsing `extension.json` using `encoding/json` as fallback +- MUST implement a loader that tries `extension.toml` first, then `extension.json`, returning a typed error if neither exists +- MUST validate required fields: `name`, `version`, `min_agh_version` +- MUST validate semver format for `version` and `min_agh_version` +- MUST validate `min_agh_version` compatibility with current daemon version +- MUST parse `[resources]`, `[capabilities]`, `[actions]`, `[subprocess]`, `[security]` sections into typed structs +- MUST produce identical `Manifest` structs from equivalent TOML and JSON inputs +- MUST NOT execute any extension code during parsing + + +## Subtasks +- [ ] 3.1 Create `internal/extension/` package with `Manifest` struct and section configs +- [ ] 3.2 Implement TOML parser using `BurntSushi/toml` +- [ ] 3.3 Implement JSON parser using `encoding/json` +- [ ] 3.4 Implement dual-format loader with TOML-first precedence +- [ ] 3.5 Implement schema validation (required fields, semver, capability names) +- [ ] 3.6 Write table-driven tests for both formats and all validation paths + +## Implementation Details + +New package `internal/extension/` with `manifest.go` and `manifest_test.go`. + +See TechSpec "Data Models" section for the `Manifest` struct and the TOML/JSON examples. See `_examples.md` for full manifest examples across multiple extension types. + +Manifest struct mirrors the existing AGH config pattern (BurntSushi/toml with `toml:` tags, also JSON-compatible via `json:` tags on the same fields). + +### Relevant Files +- `internal/config/config.go` — Existing TOML parsing pattern to follow for consistency +- `internal/config/hooks.go` — Existing hook declaration schema that extension manifests will reference +- `internal/skills/types.go` — `Skill` type for resource registration (extensions bundle skills) + +### Dependent Files +- `internal/extension/capability.go` — Will consume `Manifest.Security.Capabilities` (task 04) +- `internal/extension/registry.go` — Will store `Manifest` fields in DB (task 05) +- `internal/extension/manager.go` — Will orchestrate manifest loading (task 06) + +### Related ADRs +- [ADR-005: Extension Three-Dimensional Package Model](adrs/adr-005.md) — Resources/capabilities/actions dimensions map to manifest sections + +## Deliverables +- New `internal/extension/manifest.go` with `Manifest`, `ResourcesConfig`, `CapabilitiesConfig`, `ActionsConfig`, `SubprocessConfig`, `SecurityConfig` structs +- Dual-format loader function `LoadManifest(dir string) (*Manifest, error)` +- Unit tests with 80%+ coverage **(REQUIRED)** + +## Tests +- Unit tests: + - [ ] Parse valid `extension.toml` produces correct Manifest struct + - [ ] Parse valid `extension.json` produces identical Manifest to equivalent TOML + - [ ] Missing `name` field returns validation error + - [ ] Missing `version` field returns validation error + - [ ] Invalid semver `version` returns validation error + - [ ] `min_agh_version` newer than current daemon returns compatibility error + - [ ] Loader returns TOML manifest when both TOML and JSON exist + - [ ] Loader returns JSON manifest when only JSON exists + - [ ] Loader returns typed `ErrManifestNotFound` when neither exists + - [ ] Parse extension with resources (skills, agents, hooks, mcp_servers) sections + - [ ] Parse extension with capabilities.provides and actions.requires + - [ ] Parse extension with subprocess env var substitution placeholders + - [ ] Unknown top-level sections are accepted for forward compatibility (ignored) +- Test coverage target: >=80% +- All tests must pass + +## Success Criteria +- All tests passing +- Test coverage >=80% +- `internal/extension/manifest.go` exists and compiles +- TOML and JSON loaders produce identical structs from equivalent inputs +- `make verify` passes diff --git a/.compozy/tasks/ext-architecture/task_04.md b/.compozy/tasks/ext-architecture/task_04.md new file mode 100644 index 000000000..629e9503a --- /dev/null +++ b/.compozy/tasks/ext-architecture/task_04.md @@ -0,0 +1,93 @@ +--- +status: pending +title: Capability checker and source-trust tiers +type: backend +complexity: medium +dependencies: + - task_03 +--- + +# Task 04: Capability checker and source-trust tiers + +## Overview + +Implement the capability-scoped security model that enforces per-extension grants at both the hook dispatch boundary and the Host API boundary. The `CapabilityChecker` maps extensions to their declared capabilities and applies source-trust tier policy (bundled, user, workspace, marketplace). This is the core of ADR-003 and prevents a compromised extension from exceeding its declared privileges. + + +- ALWAYS READ the PRD and TechSpec before starting +- REFERENCE TECHSPEC for implementation details — do not duplicate here +- FOCUS ON "WHAT" — describe what needs to be accomplished, not how +- MINIMIZE CODE — show code only to illustrate current structure or problem areas +- TESTS REQUIRED — every task MUST include tests in deliverables + + + +- MUST create `internal/extension/capability.go` with `CapabilityChecker` struct +- MUST define `ExtensionSource` enum with values: `SourceBundled`, `SourceUser`, `SourceWorkspace`, `SourceMarketplace` (matching existing `skills.SkillSource` pattern) +- MUST implement source-trust tier policy per ADR-003: + - bundled, user, workspace: all capabilities granted by default (`*`) + - marketplace: restricted (no `permission.*`, no `session.write`, no `memory.write` without explicit allowlist) +- MUST implement `Check(extName, capability string) error` returning typed `ErrCapabilityDenied` with the method, required grants, and granted grants in the error data +- MUST implement `CheckHostAPI(extName, method string) error` enforcing `granted_actions` (method-level) AND `granted_security` (family-level) per protocol spec section 5.2 +- MUST implement `Register(extName string, source ExtensionSource, manifest *Manifest)` that computes effective grants from manifest + source-tier policy +- MUST apply source-tier policy BEFORE consulting manifest requests (tier acts as a ceiling) +- MUST return all security grant denials via `-32001 capability_denied` equivalent Go error + + +## Subtasks +- [ ] 4.1 Define `ExtensionSource` enum with documented trust tier semantics +- [ ] 4.2 Implement `CapabilityChecker` struct with register, check, and check Host API methods +- [ ] 4.3 Implement source-trust tier policy with default grants per tier +- [ ] 4.4 Implement typed `ErrCapabilityDenied` error with structured data +- [ ] 4.5 Write table-driven tests for all source-tier combinations + +## Implementation Details + +New file `internal/extension/capability.go` and `internal/extension/capability_test.go`. Extends the package created in task 03. + +See TechSpec "Capability Checker" section and ADR-003 for the security model. See `_protocol.md` sections 4.5 and 5.2 for the dual-layer enforcement model (granted_actions AND granted_security). + +The `ExtensionSource` enum should mirror the existing `skills.SkillSource` pattern for consistency. + +### Relevant Files +- `internal/extension/manifest.go` — Provides `Manifest.Security.Capabilities` and `Manifest.Actions.Requires` (task 03) +- `internal/skills/types.go` — Existing `SkillSource` enum to mirror for `ExtensionSource` +- `internal/hooks/permission.go` — Existing permission enforcement guard pattern to follow + +### Dependent Files +- `internal/extension/manager.go` — Will invoke `Register()` for each extension at load time (task 06) +- `internal/extension/host_api.go` — Will invoke `CheckHostAPI()` for every method call (task 07) + +### Related ADRs +- [ADR-003: Capability-Scoped Security Model](adrs/adr-003.md) — This task implements the normative decision + +## Deliverables +- New `internal/extension/capability.go` with `CapabilityChecker`, `ExtensionSource`, `ErrCapabilityDenied` +- Source-trust tier default policy map +- Unit tests with 80%+ coverage **(REQUIRED)** + +## Tests +- Unit tests: + - [ ] `Check()` succeeds when extension has granted capability + - [ ] `Check()` returns `ErrCapabilityDenied` when extension lacks capability + - [ ] `CheckHostAPI()` succeeds when both granted_actions and granted_security are satisfied + - [ ] `CheckHostAPI()` fails when granted_actions missing even if granted_security satisfies + - [ ] `CheckHostAPI()` fails when granted_security missing even if granted_actions satisfies + - [ ] Bundled source grants all capabilities by default + - [ ] User source grants all capabilities by default + - [ ] Workspace source grants all capabilities by default + - [ ] Marketplace source denies `permission.*` without explicit allowlist + - [ ] Marketplace source denies `session.write` without explicit allowlist + - [ ] Marketplace source denies `memory.write` without explicit allowlist + - [ ] Marketplace source allows `session.read`, `memory.read`, `observe.read` by default + - [ ] `Register()` applies source-tier ceiling to manifest requests + - [ ] `ErrCapabilityDenied` includes method, required, and granted fields in data + - [ ] Wildcard grant `["*"]` authorizes all capabilities in that family +- Test coverage target: >=80% +- All tests must pass + +## Success Criteria +- All tests passing +- Test coverage >=80% +- `make verify` passes +- All source-tier combinations covered in tests diff --git a/.compozy/tasks/ext-architecture/task_05.md b/.compozy/tasks/ext-architecture/task_05.md new file mode 100644 index 000000000..3aa80d8e1 --- /dev/null +++ b/.compozy/tasks/ext-architecture/task_05.md @@ -0,0 +1,97 @@ +--- +status: pending +title: Extension registry (SQLite) +type: backend +complexity: medium +dependencies: + - task_03 + - task_04 +--- + +# Task 05: Extension registry (SQLite) + +## Overview + +Create the extension registry backed by SQLite in the existing global database (`~/.agh/agh.db`). The registry persists extension installation state, version, source, enabled/disabled state, declared capabilities, and SHA-256 checksums for artifact verification. This is the durable source of truth for what extensions are installed on a daemon. + + +- ALWAYS READ the PRD and TechSpec before starting +- REFERENCE TECHSPEC for implementation details — do not duplicate here +- FOCUS ON "WHAT" — describe what needs to be accomplished, not how +- MINIMIZE CODE — show code only to illustrate current structure or problem areas +- TESTS REQUIRED — every task MUST include tests in deliverables + + + +- MUST create `internal/extension/registry.go` with `Registry` struct operating on `*sql.DB` +- MUST add new `extensions` table to `internal/store/globaldb/global_db.go` schema +- MUST define table columns per TechSpec Extension Registry table: `name` (PK), `version`, `source`, `enabled`, `manifest_path`, `installed_at`, `capabilities` (JSON), `actions` (JSON), `checksum` +- MUST implement CRUD operations: `Install(manifest, path, checksum) error`, `Uninstall(name) error`, `Enable(name) error`, `Disable(name) error`, `List() ([]ExtensionInfo, error)`, `Get(name) (*ExtensionInfo, error)` +- MUST use `IF NOT EXISTS` schema migration pattern (matching existing globaldb pattern) +- MUST verify checksum on Install against provided manifest artifact +- MUST serialize capabilities and actions as JSON in DB and deserialize back to typed structs +- MUST return typed `ErrExtensionNotFound` when extension doesn't exist +- MUST prevent duplicate installations by name (return `ErrExtensionExists`) + + +## Subtasks +- [ ] 5.1 Add `extensions` table to `globalSchemaStatements` in `internal/store/globaldb/global_db.go` +- [ ] 5.2 Create `internal/extension/registry.go` with `Registry` struct and `ExtensionInfo` type +- [ ] 5.3 Implement CRUD operations with parameterized SQL queries +- [ ] 5.4 Implement SHA-256 checksum verification on install +- [ ] 5.5 Write unit and integration tests using `t.TempDir()` for isolated SQLite instances + +## Implementation Details + +Add schema statement to `internal/store/globaldb/global_db.go`. Create `internal/extension/registry.go` and `internal/extension/registry_test.go`. + +See TechSpec "Data Models" section for the Extension Registry table schema. + +Follow the existing pattern in `internal/store/globaldb/global_db.go` for schema declaration and table access. Use parameterized queries exclusively (no string concatenation). + +### Relevant Files +- `internal/store/globaldb/global_db.go` — Existing schema pattern and connection management +- `internal/extension/manifest.go` — `Manifest` struct provides values to persist (task 03) +- `internal/extension/capability.go` — `ExtensionSource` enum used in table column (task 04) +- `internal/skills/provenance.go` — Existing checksum verification pattern to follow + +### Dependent Files +- `internal/extension/manager.go` — Will use Registry to list and load enabled extensions at boot (task 06) +- `internal/cli/extension.go` — Will use Registry for list/install/enable/disable commands (task 09) + +### Related ADRs +- [ADR-005: Extension Three-Dimensional Package Model](adrs/adr-005.md) — Registry persists the extension identity + +## Deliverables +- Extended `internal/store/globaldb/global_db.go` with `extensions` table +- New `internal/extension/registry.go` with `Registry` struct, `ExtensionInfo`, CRUD methods +- Unit tests with 80%+ coverage **(REQUIRED)** +- Integration tests for full install → enable → disable → uninstall lifecycle **(REQUIRED)** + +## Tests +- Unit tests: + - [ ] `Install()` persists extension to DB with correct fields + - [ ] `Install()` rejects duplicate name with `ErrExtensionExists` + - [ ] `Install()` with wrong checksum returns verification error + - [ ] `Get()` returns `ExtensionInfo` for existing extension + - [ ] `Get()` returns `ErrExtensionNotFound` for missing extension + - [ ] `List()` returns all installed extensions + - [ ] `List()` returns empty slice when none installed (not nil) + - [ ] `Enable()` sets `enabled=true` in DB + - [ ] `Disable()` sets `enabled=false` in DB + - [ ] `Uninstall()` removes extension from DB + - [ ] `Uninstall()` on missing extension returns `ErrExtensionNotFound` + - [ ] Capabilities JSON round-trip preserves all fields + - [ ] Actions JSON round-trip preserves all fields + - [ ] Schema migration runs idempotently (IF NOT EXISTS) +- Integration tests: + - [ ] Full lifecycle: install → list → enable → disable → uninstall + - [ ] Two extensions with different sources coexist in same DB +- Test coverage target: >=80% +- All tests must pass + +## Success Criteria +- All tests passing +- Test coverage >=80% +- `extensions` table created on first daemon boot after upgrade +- `make verify` passes diff --git a/.compozy/tasks/ext-architecture/task_06.md b/.compozy/tasks/ext-architecture/task_06.md new file mode 100644 index 000000000..eea3abd8a --- /dev/null +++ b/.compozy/tasks/ext-architecture/task_06.md @@ -0,0 +1,110 @@ +--- +status: pending +title: Extension Manager (lifecycle orchestrator) +type: backend +complexity: high +dependencies: + - task_02 + - task_04 + - task_05 +--- + +# Task 06: Extension Manager (lifecycle orchestrator) + +## Overview + +Create the Extension Manager that orchestrates the 6-phase extension loading pipeline: DISCOVER → PARSE → VALIDATE → REGISTER → INITIALIZE → ACTIVATE. The Manager owns extension subprocess lifecycle, wires extensions into the existing hook declaration system, performs capability-negotiated handshakes, and handles crash recovery with exponential backoff. This is the critical-path component that ties together the manifest parser, capability checker, registry, and subprocess primitives. + + +- ALWAYS READ the PRD and TechSpec before starting +- REFERENCE TECHSPEC for implementation details — do not duplicate here +- FOCUS ON "WHAT" — describe what needs to be accomplished, not how +- MINIMIZE CODE — show code only to illustrate current structure or problem areas +- TESTS REQUIRED — every task MUST include tests in deliverables + + + +- MUST create `internal/extension/manager.go` with `Manager` struct following AGH's functional options pattern +- MUST implement `NewManager(registry *Registry, opts ...Option) *Manager` +- MUST implement `Start(ctx) error` that executes the 6-phase loading pipeline for every enabled extension +- MUST implement `Stop(ctx) error` that gracefully shuts down all subprocess extensions per protocol spec section 8 +- MUST implement each of the 6 phases independently with clear error propagation +- MUST register extension resources (skills, agents, hooks, MCP configs) into the existing AGH registries without duplicating them +- MUST expose `HookDeclarations(ctx) ([]hooks.HookDecl, error)` for wiring into the existing `hooks.DeclarationProvider` pattern +- MUST launch subprocess extensions via `internal/subprocess/` package +- MUST perform capability-negotiated initialize handshake per protocol spec section 4 +- MUST handle subprocess crash recovery with exponential backoff (1s, 2s, 4s, 8s, max 60s) and disable extension after 5 consecutive failures +- MUST handle subprocess hang via health check timeout (SIGTERM → wait 10s → SIGKILL) +- MUST expose extension health status via `ExtensionStatus` struct consumable by observer/health endpoint + + +## Subtasks +- [ ] 6.1 Create `Manager` struct with functional options and dependencies (registry, capability checker, subprocess package) +- [ ] 6.2 Implement 6-phase loading pipeline with per-phase error isolation +- [ ] 6.3 Implement resource registration wiring into existing skills, agent def, hook declaration systems +- [ ] 6.4 Implement subprocess launch with handshake using `internal/subprocess/` primitives +- [ ] 6.5 Implement crash recovery with exponential backoff and failure threshold +- [ ] 6.6 Implement `HookDeclarations()` provider for wiring into `hooks.Rebuild()` +- [ ] 6.7 Write unit and integration tests covering pipeline phases, recovery, and shutdown + +## Implementation Details + +New file `internal/extension/manager.go` and `internal/extension/manager_test.go`. This is the largest single component of the extension architecture and pulls together tasks 02-05. + +See TechSpec "Extension Loading Pipeline" section for the 6 phases. See TechSpec "Core Interfaces" for the `Manager` struct shape. See `_protocol.md` sections 3 and 4 for lifecycle and handshake rules. + +Resource registration must NOT duplicate existing registries — the Manager calls into `skills.Registry`, appends to the hook declaration provider chain, and registers agent definitions through the existing config pattern. + +### Relevant Files +- `internal/extension/registry.go` — Persistent extension state (task 05) +- `internal/extension/manifest.go` — Manifest parsing (task 03) +- `internal/extension/capability.go` — Capability enforcement (task 04) +- `internal/subprocess/process.go` — Subprocess lifecycle primitives (task 02) +- `internal/hooks/hooks.go` — `DeclarationProvider` pattern that Manager plugs into +- `internal/skills/registry.go` — Skills registry for resource registration +- `internal/daemon/hooks_bridge.go` — Existing pattern for wiring declaration providers + +### Dependent Files +- `internal/daemon/boot.go` — Will initialize Manager in new boot phase (task 08) +- `internal/extension/host_api.go` — Will use Manager to look up extensions and enforce capabilities (task 07) +- `internal/cli/extension.go` — Will use Manager for install/enable/disable operations (task 09) + +### Related ADRs +- [ADR-001: Two-Tier Extension Model](adrs/adr-001.md) — Manager owns L3 subprocess tier +- [ADR-005: Extension Three-Dimensional Package Model](adrs/adr-005.md) — Manager implements resource/capability/action loading phases + +## Deliverables +- New `internal/extension/manager.go` with `Manager` struct, functional options, 6-phase pipeline +- Extension hook declaration provider for wiring into existing hooks system +- Crash recovery with exponential backoff +- Unit tests with 80%+ coverage **(REQUIRED)** +- Integration tests for full extension lifecycle **(REQUIRED)** + +## Tests +- Unit tests: + - [ ] `Start()` runs all 6 phases for each enabled extension + - [ ] `Start()` skips disabled extensions from registry + - [ ] DISCOVER phase finds manifests in configured extension directories + - [ ] PARSE phase returns error when manifest is invalid, continues other extensions + - [ ] VALIDATE phase rejects extensions with incompatible `min_agh_version` + - [ ] REGISTER phase adds resources to skills and hooks registries + - [ ] INITIALIZE phase launches subprocess and performs handshake + - [ ] ACTIVATE phase marks extension live and available for Host API + - [ ] Subprocess crash triggers restart with backoff + - [ ] 5 consecutive crashes disables extension and logs error + - [ ] `Stop()` sends shutdown to all subprocesses then waits with timeout + - [ ] `Stop()` escalates to SIGKILL after shutdown timeout + - [ ] `HookDeclarations()` returns declarations from all loaded extensions + - [ ] Failed extension in one phase does not block other extensions +- Integration tests: + - [ ] End-to-end: load test extension → handshake → receive Host API call → shutdown + - [ ] Restart recovery: kill subprocess → verify restart with correct backoff timing + - [ ] Resource registration: install extension with skills → verify skills appear in registry +- Test coverage target: >=80% +- All tests must pass + +## Success Criteria +- All tests passing +- Test coverage >=80% +- Full 6-phase pipeline validated end-to-end +- `make verify` passes diff --git a/.compozy/tasks/ext-architecture/task_07.md b/.compozy/tasks/ext-architecture/task_07.md new file mode 100644 index 000000000..367a0f8ef --- /dev/null +++ b/.compozy/tasks/ext-architecture/task_07.md @@ -0,0 +1,109 @@ +--- +status: pending +title: Host API handler (bidirectional JSON-RPC) +type: backend +complexity: high +dependencies: + - task_04 + - task_06 +--- + +# Task 07: Host API handler (bidirectional JSON-RPC) + +## Overview + +Implement the Host API handler that processes JSON-RPC requests from extensions calling back into AGH. Extensions invoke methods like `sessions/create`, `memory/store`, and `observe/events` to drive AGH workflows. Every call is capability-checked against the extension's negotiated grants per ADR-003 and the protocol spec section 5.2. The handler bridges the subprocess transport into AGH's existing session manager, memory store, observer, and skills registry. + + +- ALWAYS READ the PRD and TechSpec before starting +- REFERENCE TECHSPEC for implementation details — do not duplicate here +- FOCUS ON "WHAT" — describe what needs to be accomplished, not how +- MINIMIZE CODE — show code only to illustrate current structure or problem areas +- TESTS REQUIRED — every task MUST include tests in deliverables + + + +- MUST create `internal/extension/host_api.go` with `HostAPIHandler` struct +- MUST implement all Host API methods from `_protocol.md` section 5.2: `sessions/list`, `sessions/create`, `sessions/prompt`, `sessions/stop`, `sessions/status`, `sessions/events`, `memory/recall`, `memory/store`, `memory/forget`, `observe/health`, `observe/events`, `skills/list` +- MUST enforce capability grants using the `CapabilityChecker` before executing any method (both granted_actions method-level AND granted_security family-level) +- MUST return typed JSON-RPC errors per `_protocol.md` section 9: `-32001 capability_denied`, `-32002 rate_limited`, `-32601 method_not_found` +- MUST implement per-extension rate limiting with typed `-32002` error including `retry_after_ms` +- MUST delegate method execution to existing AGH services: `session.Manager`, `memory.Store`, `observe.Observer`, `skills.Registry` +- MUST NOT expose any AGH internals not listed in the protocol spec Host API inventory +- MUST include `since?` parameter in `observe/events` per protocol spec +- MUST handle unknown method names with `-32601 method_not_found` per protocol spec section 9.3 + + +## Subtasks +- [ ] 7.1 Create `HostAPIHandler` struct with dependencies on session manager, memory store, observer, skills registry +- [ ] 7.2 Implement request dispatcher that maps method names to handler functions +- [ ] 7.3 Implement `sessions/*` method handlers (list, create, prompt, stop, status, events) +- [ ] 7.4 Implement `memory/*` method handlers (recall, store, forget) +- [ ] 7.5 Implement `observe/*` and `skills/*` method handlers +- [ ] 7.6 Implement per-extension rate limiting with typed error responses +- [ ] 7.7 Write unit and integration tests covering all methods and error paths + +## Implementation Details + +New file `internal/extension/host_api.go` and `internal/extension/host_api_test.go`. + +See TechSpec "Host API" section for the method inventory. See `_protocol.md` section 5.2 for the canonical table with capability requirements. See `_protocol.md` section 9 for error codes. + +The handler is transport-agnostic — it receives method + params and returns result + error. The `Manager` (task 06) wires this into the subprocess transport layer so each extension's inbound requests land in the correct `HostAPIHandler` invocation with the extension name attached for capability checks. + +### Relevant Files +- `internal/extension/capability.go` — `CapabilityChecker.CheckHostAPI()` enforcement (task 04) +- `internal/extension/manager.go` — Manager provides extension context and routes inbound requests (task 06) +- `internal/session/manager.go` — Session service methods the Host API delegates to +- `internal/memory/store.go` — Memory service the Host API delegates to +- `internal/observe/observer.go` — Observer for health and events queries +- `internal/skills/registry.go` — Skills registry for skills/list +- `internal/api/httpapi/` — Existing HTTP handler pattern for similar method dispatch + +### Dependent Files +- `internal/extension/manager.go` — Manager wires HostAPIHandler into subprocess inbound message routing (task 06 already prepared) +- `internal/daemon/boot.go` — Will wire HostAPIHandler with real dependencies at boot (task 08) + +### Related ADRs +- [ADR-003: Capability-Scoped Security Model](adrs/adr-003.md) — Every handler call is capability-checked +- [ADR-005: Extension Three-Dimensional Package Model](adrs/adr-005.md) — Host API is the "actions" dimension + +## Deliverables +- New `internal/extension/host_api.go` with `HostAPIHandler` struct and all methods +- Per-extension rate limiting implementation +- Typed error responses matching `_protocol.md` error codes +- Unit tests with 80%+ coverage **(REQUIRED)** +- Integration tests with a real session manager and memory store **(REQUIRED)** + +## Tests +- Unit tests: + - [ ] `sessions/list` returns authorized sessions for extension with `session.read` + - [ ] `sessions/list` returns `-32001 capability_denied` without `session.read` + - [ ] `sessions/create` returns new session ID with `session.write` + - [ ] `sessions/create` returns `-32001` without `session.write` + - [ ] `sessions/prompt` delivers message to session with correct turn ID + - [ ] `sessions/stop` terminates the session + - [ ] `sessions/status` returns session state for authorized extensions + - [ ] `sessions/events` returns event stream with optional `since` parameter + - [ ] `memory/store` persists content with tags + - [ ] `memory/recall` returns ranked matches + - [ ] `memory/forget` removes entries + - [ ] `observe/health` returns daemon health snapshot + - [ ] `observe/events` returns filtered events with `since` parameter + - [ ] `skills/list` returns skills for workspace + - [ ] Unknown method returns `-32601 method_not_found` + - [ ] Rate limit exceeded returns `-32002` with `retry_after_ms` in data + - [ ] All methods return typed error data with method name and required capabilities +- Integration tests: + - [ ] Extension creates session via Host API → session runs → extension reads events back + - [ ] Extension stores memory then recalls it + - [ ] Unauthorized extension attempts all methods → all return capability denied +- Test coverage target: >=80% +- All tests must pass + +## Success Criteria +- All tests passing +- Test coverage >=80% +- All 12 Host API methods implemented and tested +- Capability enforcement validated for every method +- `make verify` passes diff --git a/.compozy/tasks/ext-architecture/task_08.md b/.compozy/tasks/ext-architecture/task_08.md new file mode 100644 index 000000000..585ed1c9a --- /dev/null +++ b/.compozy/tasks/ext-architecture/task_08.md @@ -0,0 +1,91 @@ +--- +status: pending +title: Daemon boot integration +type: backend +complexity: medium +dependencies: + - task_06 +--- + +# Task 08: Daemon boot integration + +## Overview + +Wire the Extension Manager into AGH's daemon composition root. Add a new boot phase between the hooks system initialization and the servers startup. The Extension Manager must be initialized with real dependencies (session manager, memory store, observer, skills registry), register extension-provided hook declarations into the existing hooks rebuild cycle, and participate in the graceful shutdown sequence. + + +- ALWAYS READ the PRD and TechSpec before starting +- REFERENCE TECHSPEC for implementation details — do not duplicate here +- FOCUS ON "WHAT" — describe what needs to be accomplished, not how +- MINIMIZE CODE — show code only to illustrate current structure or problem areas +- TESTS REQUIRED — every task MUST include tests in deliverables + + + +- MUST add a new boot phase in `internal/daemon/boot.go` between `bootHooks()` and `bootServers()` +- MUST initialize `extension.Manager` with real dependencies: session manager, memory store, observer, skills registry, extension registry from global DB +- MUST wire the extension declaration provider into the existing hooks `DeclarationProvider` chain via `hooks_bridge.go` +- MUST trigger `hooks.Rebuild()` after Extension Manager starts so extension-provided hooks are dispatched +- MUST add Extension Manager to the daemon shutdown cleanup chain (LIFO order) so extensions are stopped before servers +- MUST NOT break any existing daemon boot tests +- MUST handle extension manager start failure gracefully (log and continue boot — extensions are not critical) +- MUST emit log events at each extension lifecycle transition (loaded, failed, shutdown) + + +## Subtasks +- [ ] 8.1 Add `bootExtensions()` phase in `internal/daemon/boot.go` between `bootHooks` and `bootServers` +- [ ] 8.2 Wire `extension.Manager` with real session manager, memory, observer, skills dependencies +- [ ] 8.3 Extend `internal/daemon/hooks_bridge.go` with extension declaration provider +- [ ] 8.4 Trigger `hooks.Rebuild()` after extension manager starts +- [ ] 8.5 Add extension manager Stop() to LIFO cleanup chain +- [ ] 8.6 Write integration tests validating extension boot phase and shutdown order + +## Implementation Details + +Modify `internal/daemon/boot.go` and `internal/daemon/hooks_bridge.go`. No new files — this is pure integration work. + +See TechSpec "Integration Points / Daemon Composition Root" section for the boot phase location and wiring pattern. + +The new `bootExtensions` phase follows the existing `bootXxx()` pattern: takes a context, returns a cleanup function, logs its progress. Extension Manager failure should NOT block daemon boot — log and continue with zero extensions loaded. + +### Relevant Files +- `internal/daemon/boot.go` — Boot phase sequence; add new `bootExtensions()` phase +- `internal/daemon/hooks_bridge.go` — Existing `daemonNativeHooks()` and declaration provider patterns +- `internal/daemon/daemon.go` — `Daemon` struct and option pattern +- `internal/extension/manager.go` — Manager to initialize (task 06) +- `internal/hooks/hooks.go` — `hooks.Rebuild()` and `DeclarationProvider` pattern + +### Dependent Files +- `internal/daemon/daemon_test.go` — Existing daemon tests that must continue passing +- `internal/extension/manager.go` — Will be the consumer of the wired dependencies + +### Related ADRs +- [ADR-001: Two-Tier Extension Model](adrs/adr-001.md) — Establishes that extensions are wired at daemon boot composition + +## Deliverables +- Modified `internal/daemon/boot.go` with new `bootExtensions` phase +- Extended `internal/daemon/hooks_bridge.go` with extension declaration provider +- Unit tests with 80%+ coverage **(REQUIRED)** +- Integration tests for full daemon boot with extensions enabled **(REQUIRED)** + +## Tests +- Unit tests: + - [ ] `bootExtensions()` initializes Manager with correct dependencies + - [ ] `bootExtensions()` returns no-op cleanup when no extensions installed + - [ ] Extension Manager start failure logs error but does not fail boot + - [ ] Extension declaration provider returns declarations from loaded extensions + - [ ] `hooks.Rebuild()` is called after Extension Manager starts + - [ ] Shutdown order: Extension Manager stops before servers +- Integration tests: + - [ ] Full daemon boot with one test extension → extension loads → hooks rebuild → extension stops on daemon shutdown + - [ ] Daemon boot with corrupt extension → logs error, continues boot, other extensions load normally + - [ ] Extension provides hook declarations → hook dispatches route to extension correctly +- Test coverage target: >=80% +- All existing daemon tests continue passing + +## Success Criteria +- All tests passing +- Test coverage >=80% +- Daemon boots successfully with zero, one, and multiple extensions +- Shutdown order verified via integration test +- `make verify` passes diff --git a/.compozy/tasks/ext-architecture/task_09.md b/.compozy/tasks/ext-architecture/task_09.md new file mode 100644 index 000000000..defb5a352 --- /dev/null +++ b/.compozy/tasks/ext-architecture/task_09.md @@ -0,0 +1,95 @@ +--- +status: pending +title: CLI commands (list, install, enable, disable) +type: backend +complexity: medium +dependencies: + - task_05 + - task_06 +--- + +# Task 09: CLI commands (list, install, enable, disable) + +## Overview + +Add the `agh extension` subcommand tree to the existing Cobra CLI. Users install local extensions via `agh extension install `, list what is registered, enable or disable extensions without uninstalling, and inspect runtime status. The commands talk to the daemon via the existing UDS API transport. Output honors the standard AGH `--format` flag supporting human-readable, JSON, and TOON. + + +- ALWAYS READ the PRD and TechSpec before starting +- REFERENCE TECHSPEC for implementation details — do not duplicate here +- FOCUS ON "WHAT" — describe what needs to be accomplished, not how +- MINIMIZE CODE — show code only to illustrate current structure or problem areas +- TESTS REQUIRED — every task MUST include tests in deliverables + + + +- MUST create `internal/cli/extension.go` with Cobra command tree: `agh extension {list, install, enable, disable, status}` +- MUST implement `install` subcommand accepting a local directory path, parsing the manifest, computing checksum, and registering via the extension registry +- MUST implement `list` subcommand showing name, version, source, enabled state, capabilities +- MUST implement `enable ` and `disable ` subcommands that update registry state and trigger a daemon reload if daemon is running +- MUST implement `status ` showing runtime state, PID, uptime, health, last error +- MUST honor the existing `--format` flag (human, json, toon) used by other CLI commands +- MUST communicate with the running daemon via the existing UDS API client if the daemon is running, and operate directly on the registry when daemon is offline +- MUST return clear error messages when extension directory is missing, manifest is invalid, or checksum verification fails +- MUST NOT implement git URL installation, marketplace fetch, or remote install in this task (deferred) + + +## Subtasks +- [ ] 9.1 Create `internal/cli/extension.go` with root `agh extension` command and subcommands +- [ ] 9.2 Implement `install ` subcommand with manifest parsing and registry write +- [ ] 9.3 Implement `list` subcommand with --format support +- [ ] 9.4 Implement `enable`, `disable`, `status` subcommands +- [ ] 9.5 Add `agh extension` command tree to `internal/cli/root.go` +- [ ] 9.6 Write unit and integration tests using CLI test harness + +## Implementation Details + +New file `internal/cli/extension.go`. Extend `internal/cli/root.go` to register the new command tree. Follow the existing command patterns used by `skill`, `agent`, and `workspace` commands. + +See TechSpec "Development Sequencing" section for the CLI scope. See `_examples.md` section 8 for the expected output format. + +### Relevant Files +- `internal/cli/root.go` — Root command registration for CLI tree +- `internal/cli/skill.go` — Existing skill command tree to mirror +- `internal/cli/agent.go` — Existing agent command tree for format handling pattern +- `internal/extension/registry.go` — Registry CRUD operations (task 05) +- `internal/extension/manager.go` — Manager for runtime status queries (task 06) +- `internal/extension/manifest.go` — Manifest loader used by install (task 03) +- `internal/api/udsapi/` — UDS client for communicating with running daemon + +### Dependent Files +- Nothing depends on this task; it is a user-facing leaf + +### Related ADRs +- [ADR-005: Extension Three-Dimensional Package Model](adrs/adr-005.md) — CLI exposes the resource/capability/action model to users + +## Deliverables +- New `internal/cli/extension.go` with full command tree +- Updated `internal/cli/root.go` registering the extension command +- Unit tests with 80%+ coverage **(REQUIRED)** +- Integration tests for install → list → disable → uninstall flow via CLI **(REQUIRED)** + +## Tests +- Unit tests: + - [ ] `install` parses valid manifest directory and calls registry.Install + - [ ] `install` with missing directory returns clear error + - [ ] `install` with invalid manifest returns parsing error + - [ ] `install` with checksum mismatch returns verification error + - [ ] `list` outputs human format with columns: name, version, type, state, capabilities + - [ ] `list --format=json` outputs valid JSON array + - [ ] `list --format=toon` outputs TOON format + - [ ] `enable ` sets registry enabled=true + - [ ] `enable ` returns `ErrExtensionNotFound` + - [ ] `disable ` sets registry enabled=false + - [ ] `status ` shows runtime state when daemon is running + - [ ] `status ` shows registry-only state when daemon is offline +- Integration tests: + - [ ] Full CLI flow: install test extension → list shows it → status shows active → disable → list shows disabled +- Test coverage target: >=80% +- All tests must pass + +## Success Criteria +- All tests passing +- Test coverage >=80% +- All 5 subcommands functional with format support +- `make verify` passes diff --git a/.compozy/tasks/ext-architecture/task_10.md b/.compozy/tasks/ext-architecture/task_10.md new file mode 100644 index 000000000..3f00129d0 --- /dev/null +++ b/.compozy/tasks/ext-architecture/task_10.md @@ -0,0 +1,107 @@ +--- +status: pending +title: TypeScript SDK (@agh/extension-sdk) +type: frontend +complexity: high +dependencies: + - task_06 + - task_07 +--- + +# Task 10: TypeScript SDK (@agh/extension-sdk) + +## Overview + +Create the TypeScript SDK npm package that TypeScript extension authors use to build AGH extensions. The SDK provides an `Extension` class that handles the JSON-RPC 2.0 stdio transport, initialize handshake, inbound method routing, and a typed `HostAPI` client for calling back into AGH. It also ships a test harness (mock transport) and a scaffolding CLI (`npx @agh/create-extension`) with starter templates for common extension types. + + +- ALWAYS READ the PRD and TechSpec before starting +- REFERENCE TECHSPEC for implementation details — do not duplicate here +- FOCUS ON "WHAT" — describe what needs to be accomplished, not how +- MINIMIZE CODE — show code only to illustrate current structure or problem areas +- TESTS REQUIRED — every task MUST include tests in deliverables + + + +- MUST create `@agh/extension-sdk` npm package with TypeScript source and compiled JavaScript output +- MUST implement `Extension` class per `_examples.md` section 3 with `handle()`, `onReady()`, `start()` methods +- MUST implement `StdioTransport` class providing JSON-RPC 2.0 over stdin/stdout line-delimited framing +- MUST implement typed `HostAPI` client exposing `sessions.*`, `memory.*`, `observe.*`, `skills.*` methods matching `_protocol.md` section 5.2 +- MUST implement initialize handshake per `_protocol.md` section 4 — send extension info, accept runtime grants, respond with accepted capabilities +- MUST handle bidirectional, multiplexed JSON-RPC (multiple outstanding requests in both directions) +- MUST expose a `TestHarness` class in `@agh/extension-sdk/testing` that allows unit testing extensions without spawning a real subprocess +- MUST emit extension log messages to `stderr` (stdout is reserved for protocol frames per `_protocol.md` section 1.1) +- MUST provide TypeScript type definitions matching the Go contracts (Tool, Manifest sections, hook payloads) +- MUST create a scaffolding CLI `npx @agh/create-extension` with at least two templates: `hook-subprocess` and `memory-backend` +- MUST publish as ESM with CommonJS fallback for Node.js compatibility +- MUST target Node.js 18+ + + +## Subtasks +- [ ] 10.1 Initialize `@agh/extension-sdk` npm package with TypeScript configuration +- [ ] 10.2 Implement `StdioTransport` with line-delimited JSON-RPC framing and multiplexing +- [ ] 10.3 Implement `Extension` class with initialize handshake, handle(), onReady(), start() +- [ ] 10.4 Implement typed `HostAPI` client for all Host API methods +- [ ] 10.5 Implement `TestHarness` for unit testing extensions +- [ ] 10.6 Create scaffolding CLI `@agh/create-extension` with starter templates +- [ ] 10.7 Write unit tests using Vitest and the in-memory transport + +## Implementation Details + +New directory at `sdk/typescript/` (or similar) with `package.json`, `tsconfig.json`, source in `src/`, tests in `src/*.test.ts`. + +See TechSpec "TypeScript SDK" section for the package structure. See `_examples.md` section 3 for the developer-facing API. See `_protocol.md` sections 1-5 for the wire protocol the transport must implement. + +Follow AGH's existing web frontend patterns for TypeScript conventions (biome formatting, Vitest testing). + +### Relevant Files +- `web/` — Existing TypeScript patterns, biome config, testing setup to mirror +- `internal/extension/host_api.go` — Source of truth for Host API method signatures (task 07) +- `internal/extension/manifest.go` — Manifest types to mirror in TypeScript (task 03) +- `internal/tools/tool.go` — Tool types to mirror (task 01) + +### Dependent Files +- `.compozy/tasks/ext-architecture/task_11.md` — Reference extensions will use this SDK (task 11) + +### Related ADRs +- [ADR-001: Two-Tier Extension Model](adrs/adr-001.md) — TypeScript as first-class subprocess extension language +- [ADR-004: Generalize ACP as Subprocess Extension Protocol](adrs/adr-004.md) — TypeScript transport mirrors Go subprocess transport + +## Deliverables +- New `sdk/typescript/` package directory with `package.json`, `tsconfig.json`, source +- `@agh/extension-sdk` with `Extension`, `StdioTransport`, `HostAPI`, type definitions +- `@agh/extension-sdk/testing` subpath export with `TestHarness` +- `create-extension` scaffolding CLI with two templates +- Unit tests with 80%+ coverage **(REQUIRED)** +- Integration test exercising a real subprocess running an SDK-built extension **(REQUIRED)** + +## Tests +- Unit tests: + - [ ] `StdioTransport` encodes one JSON object per line + - [ ] `StdioTransport` decodes multiple concurrent requests correctly + - [ ] `StdioTransport` rejects messages over 10 MiB + - [ ] `StdioTransport` ignores notifications (no id field) + - [ ] `Extension.start()` performs initialize handshake first + - [ ] `Extension.handle()` routes inbound requests to correct handler + - [ ] `Extension.handle()` returns error if method not registered + - [ ] `Extension.onReady()` fires after successful handshake + - [ ] `HostAPI.sessions.create()` sends correct JSON-RPC request + - [ ] `HostAPI.sessions.list()` parses response array + - [ ] `HostAPI.memory.store()` sends correct params + - [ ] `HostAPI.observe.events()` supports `since` parameter + - [ ] Capability denied error throws typed error with code -32001 + - [ ] Rate limited error throws typed error with retry_after_ms + - [ ] `TestHarness.mockHostAPI()` returns mocked responses + - [ ] `TestHarness.loadExtension()` loads extension without spawning subprocess + - [ ] `TestHarness.call()` invokes extension handlers directly +- Integration tests: + - [ ] Build an SDK-based extension, spawn it as a subprocess, send real JSON-RPC, verify responses +- Test coverage target: >=80% +- All tests must pass + +## Success Criteria +- All tests passing +- Test coverage >=80% +- Package builds via `npm run build` +- `npx @agh/create-extension` scaffolds a working starter project +- `make verify` still passes in Go workspace diff --git a/.compozy/tasks/ext-architecture/task_11.md b/.compozy/tasks/ext-architecture/task_11.md new file mode 100644 index 000000000..e14ee4963 --- /dev/null +++ b/.compozy/tasks/ext-architecture/task_11.md @@ -0,0 +1,99 @@ +--- +status: pending +title: Reference extensions (Go and TypeScript) +type: docs +complexity: medium +dependencies: + - task_06 + - task_07 + - task_10 +--- + +# Task 11: Reference extensions (Go and TypeScript) + +## Overview + +Build two working reference extensions that demonstrate the full extension architecture end-to-end: one in Go and one in TypeScript. The Go extension exercises the core L3 subprocess path directly. The TypeScript extension validates the `@agh/extension-sdk` package in realistic conditions. Both extensions are installed into a real AGH daemon in an integration test, which exercises the entire pipeline from manifest parsing through handshake, hook dispatch, and Host API calls. + + +- ALWAYS READ the PRD and TechSpec before starting +- REFERENCE TECHSPEC for implementation details — do not duplicate here +- FOCUS ON "WHAT" — describe what needs to be accomplished, not how +- MINIMIZE CODE — show code only to illustrate current structure or problem areas +- TESTS REQUIRED — every task MUST include tests in deliverables + + + +- MUST create one Go reference extension in `sdk/examples/secret-guard/` that implements a content validator hook per `_examples.md` section 1 +- MUST create one TypeScript reference extension in `sdk/examples/prompt-enhancer/` that implements a prompt enhancer hook per `_examples.md` section 4 +- MUST provide a complete `extension.toml` manifest for each reference extension +- MUST include build instructions (Makefile or README) for each reference extension +- MUST write an end-to-end integration test that installs both extensions into a real AGH daemon and verifies: + - extensions load successfully via the 6-phase pipeline + - capability-negotiated handshake completes + - hooks dispatch to the extensions and patches are applied + - shutdown sequence works correctly +- MUST use the Go SDK primitives (task 02 subprocess package) or direct JSON-RPC for the Go extension +- MUST use `@agh/extension-sdk` (task 10) for the TypeScript extension +- MUST NOT introduce external dependencies beyond what the SDKs provide + + +## Subtasks +- [ ] 11.1 Create `sdk/examples/secret-guard/` Go extension with manifest, main.go, build instructions +- [ ] 11.2 Create `sdk/examples/prompt-enhancer/` TypeScript extension with manifest, index.ts, build instructions +- [ ] 11.3 Write end-to-end integration test spawning real daemon with both extensions installed +- [ ] 11.4 Verify capability enforcement end-to-end (extension with limited grants cannot call write methods) +- [ ] 11.5 Document extension author onboarding steps in each example README + +## Implementation Details + +New directories `sdk/examples/secret-guard/` (Go) and `sdk/examples/prompt-enhancer/` (TypeScript). New integration test in `internal/extension/integration_test.go` with build tag. + +See `_examples.md` sections 1 and 4 for the full extension code. See `_protocol.md` for protocol compliance requirements. + +The integration tests build the reference extension binaries as part of the test setup (`TestMain` with `go build` and `npm run build`). Use `t.TempDir()` to isolate the daemon's state. + +### Relevant Files +- `internal/extension/manager.go` — Manager that loads extensions (task 06) +- `internal/extension/host_api.go` — Host API handler (task 07) +- `sdk/typescript/` — TypeScript SDK (task 10) +- `internal/subprocess/` — Go subprocess primitives (task 02) +- `_examples.md` — Reference extension code and manifests + +### Dependent Files +- None. This is the validation leaf of the task graph. + +### Related ADRs +- [ADR-001: Two-Tier Extension Model](adrs/adr-001.md) — Validates both Go and TypeScript subprocess paths +- [ADR-003: Capability-Scoped Security Model](adrs/adr-003.md) — Validates end-to-end capability enforcement +- [ADR-005: Extension Three-Dimensional Package Model](adrs/adr-005.md) — Validates resource/capability/action loading + +## Deliverables +- Working Go reference extension at `sdk/examples/secret-guard/` +- Working TypeScript reference extension at `sdk/examples/prompt-enhancer/` +- End-to-end integration test in `internal/extension/integration_test.go` with build tag `//go:build integration` +- Per-extension README with build and install instructions +- Unit tests with 80%+ coverage for any shared test helpers **(REQUIRED)** +- Integration tests for end-to-end extension flow **(REQUIRED)** + +## Tests +- Unit tests: + - [ ] Shared test helpers compile and run without requiring a daemon +- Integration tests: + - [ ] Go secret-guard extension loads, handshakes, rejects prompt containing `sk-abc123` + - [ ] Go secret-guard extension accepts safe prompt and returns `allow: true` + - [ ] TypeScript prompt-enhancer loads, handshakes, injects workspace context into prompt + - [ ] Both extensions coexist in the same daemon simultaneously + - [ ] Shutdown sequence gracefully stops both extensions + - [ ] Extension with limited capabilities cannot call `sessions/create` via Host API (capability denied) + - [ ] Extension subprocess crash triggers automatic restart with backoff +- Test coverage target: >=80% +- All tests must pass + +## Success Criteria +- All tests passing +- Test coverage >=80% +- Both reference extensions build and install into a real daemon +- Full end-to-end path validated from manifest parse through hook dispatch through shutdown +- `make verify` passes +- `make test-integration` passes diff --git a/.compozy/tasks/kb-refac/_techspec.md b/.compozy/tasks/kb-refac/_techspec.md new file mode 100644 index 000000000..953079bb7 --- /dev/null +++ b/.compozy/tasks/kb-refac/_techspec.md @@ -0,0 +1,289 @@ +# TechSpec: Kodebase Refactoring — Code Quality & Structural Improvements + +## Executive Summary + +This TechSpec documents refactoring opportunities identified by static analysis of the AGH codebase using kodebase vault inspection across six dimensions: cyclomatic complexity, dead code, code smells, coupling/instability, circular dependencies, and architecture health. The codebase is structurally sound — zero circular dependencies, clean DAG, proper composition root discipline — but carries complexity hotspots, god files, duplicated logic, dead exports, and coupling concentrations that will compound as the system grows. The refactoring targets are organized into three priority tiers with a recommended attack order. + +## Codebase Health Baseline + +| Metric | Value | +|--------|-------| +| Total files | 516 | +| Total symbols | 5,296 | +| Total relations | 16,313 | +| Go files | 290 | +| TS/TSX files | 226 | +| Circular dependencies | 0 | +| Highest cyclomatic complexity | 54 (`boot()`) | +| Highest blast radius | 89 (`newHandlers`) | +| Confirmed dead Go exports | 5 | +| Test-only production exports | 5 | +| Unused UI components | 16 | + +## P0 — Critical Refactoring Targets + +### 1. `boot()` — Monolithic Daemon Bootstrap + +- **File**: `internal/daemon/boot.go` +- **Complexity**: 54 | **LOC**: 351 +- **Problem**: Sequential initialization of ~15 subsystems (config, logger, memory, skills, lock, registry, workspace, dream, hooks, observer, servers) with deeply nested `if cfg.X.Enabled` branches and nil-guarding every optional subsystem. Error-handling cleanup callbacks accumulate in a slice, further inflating branch count. +- **Blast radius**: High — composition root, changes here ripple through the entire daemon startup path. + +**Refactoring approach**: Extract initialization into phase methods: + +```go +func (d *Daemon) boot(ctx context.Context) error { + cfg, err := d.bootConfig(ctx) + if err != nil { return err } + mem, cleanupMem, err := d.bootMemory(ctx, cfg) + if err != nil { return err } + skills, cleanupSkills, err := d.bootSkills(ctx, cfg) + if err != nil { return err } + servers, cleanupServers, err := d.bootServers(ctx, cfg) + if err != nil { return err } + // ... register cleanups, return +} +``` + +Each phase method returns its deps and cleanup function. Main `boot()` becomes a ~15-line pipeline calling phases in sequence. + +### 2. `Create` / `Resume` Duplication + +- **File**: `internal/session/manager_lifecycle.go` +- **`Create`**: 128 LOC | **`Resume`**: 154 LOC | **Shared**: ~80 duplicated lines +- **Problem**: Both follow an identical sequence: validate context, resolve workspace, resolve agent, build `startupPrompt`, build `SessionContext`, `reserve`, `openStore`, construct `Session` struct, build `acp.StartOpts`, `writeMeta`, `driver.Start`, `activateAndWatch`. The only differences are metadata source (opts vs stored meta) and the hook name. + +**Refactoring approach**: Extract a shared `startSession(ctx, sessionSetup)` helper method. `Create` and `Resume` become thin preambles that prepare a `sessionSetup` struct from their respective inputs, then delegate to the common path. Eliminates ~80 lines of near-identical code and ensures future lifecycle changes are applied once. + +### 3. `handleInbound()` — Repetitive JSON-RPC Dispatch + +- **File**: `internal/acp/handlers.go` +- **Complexity**: 28 | **LOC**: 91 +- **Problem**: Switch statement dispatching 9 JSON-RPC methods. Every case repeats the identical unmarshal-call-error pattern. + +**Refactoring approach**: Replace the switch with a handler registry map: + +```go +type handlerFunc func(ctx context.Context, params json.RawMessage) (any, error) + +var handlers = map[string]handlerFunc{ + "session/started": h.handleSessionStarted, + "message/create": h.handleMessageCreate, + "tool/execute": h.handleToolExecute, + // ... +} + +func (h *Handlers) handleInbound(ctx context.Context, method string, params json.RawMessage) (any, error) { + fn, ok := handlers[method] + if !ok { + return nil, fmt.Errorf("unknown method: %s", method) + } + return fn(ctx, params) +} +``` + +Reduces cyclomatic complexity from 28 to ~5. + +### 4. `HookDispatcher` — 21-Method Interface + +- **File**: `internal/session/interfaces.go:160-182` +- **Downstream impact**: Drives 794 lines of boilerplate in `manager_hooks.go` +- **Problem**: Each hook event gets its own typed dispatch method. While type-safe, this violates Go's small-interface principle and creates massive boilerplate in every implementor. + +**Refactoring approach**: Either: +- **(a)** Generic dispatch with generics: `Dispatch[P Payload](ctx context.Context, p P) (P, error)` — single method, type-safe via generics. +- **(b)** Group into sub-interfaces by domain: `SessionLifecycleHooks`, `AgentHooks`, `MessageHooks` — each with 3-5 methods. Implementors embed only what they need. + +Option (b) is lower risk and doesn't require generics plumbing. Both approaches eliminate ~800 lines of boilerplate in `manager_hooks.go`. + +## P1 — High Priority + +### 5. God File: `cli/skill.go` (1,778 LOC) + +- **File**: `internal/cli/skill.go` +- **Problem**: Single file bundles all skill-related CLI subcommands, formatting, and install logic. +- **Fix**: Split into separate files per subcommand (`skill_list.go`, `skill_install.go`, `skill_show.go`, etc.). + +### 6. God File: `hooks/dispatch.go` (868 LOC) + +- **File**: `internal/hooks/dispatch.go` +- **Coupling**: Ce=4, Ca=0 (instability 1.0), 103 relations +- **Problem**: Mixes dispatch orchestration, async submission, filter/match logic, and result aggregation. +- **Fix**: Extract `async_dispatch.go` for the async path. Move filter/match logic into `matcher.go`. + +### 7. God File: `skills/registry.go` (971 LOC) + +- **File**: `internal/skills/registry.go` +- **Problem**: Mixes global skill loading, workspace caching with TTL, snapshot diffing, and hook registration in a single file. +- **Fix**: Extract workspace cache into `ws_cache.go` and snapshot diffing into `snapshot.go`. + +### 8. `httpapi/server.go` — Blast Radius 84-89 + +- **File**: `internal/api/httpapi/server.go` +- **Symbols**: `newHandlers` (BR=89), `RegisterRoutes` (BR=84), `corsMiddleware` (BR=84), `errorMiddleware` (BR=84), `requestLoggingMiddleware` (BR=84) +- **Problem**: All route registration and all middleware definitions in a single file. Any change ripples to 84+ transitive dependents. +- **Fix**: Move middleware to `middleware.go`. Split `RegisterRoutes` into per-domain registration functions (`registerSessionRoutes()`, `registerWorkspaceRoutes()`, `registerMemoryRoutes()`, etc.). + +### 9. `daemon.New()` — Composition Root Defaults + +- **File**: `internal/daemon/daemon.go` +- **Complexity**: 21 | **LOC**: 130 | **Blast radius**: 35 +- **Problem**: 15+ sequential `if d.X == nil { d.X = defaultX }` nil-guards setting default factory functions. +- **Fix**: Consolidate into an `applyDefaults()` method, or set defaults at struct initialization and let options only override. + +### 10. `acp/client.Start()` — Mixed Concerns + +- **File**: `internal/acp/client.go` +- **LOC**: 141 +- **Problem**: Subprocess setup, MCP server spawning, and JSON-RPC initialization all in one function. +- **Fix**: Split into `spawnProcess()`, `initializeMCPServers()`, and `negotiateSession()`. + +## P2 — Medium Priority + +### 11. Struct Bloat / Primitive Obsession + +- **Files**: `internal/api/contract/contract.go` (339 LOC), `internal/acp/types.go` (429 LOC), `internal/store/types.go` (381 LOC), `internal/hooks/types.go` (258 LOC) +- **Problem**: `WorkspaceID` and `WorkspacePath` repeated as flat string fields across 4+ payload structs in different packages. +- **Fix**: Introduce a `WorkspaceRef` value object and embed it in all payload types: + +```go +type WorkspaceRef struct { + ID string `json:"workspace_id,omitempty"` + Path string `json:"workspace_path,omitempty"` +} +``` + +### 12. `StreamSession` — Deep Nesting + +- **File**: `internal/api/core/handlers.go:309` +- **LOC**: 107 | **Nesting depth**: 7 +- **Problem**: Polling loop contains nested `select > range > if > if` chains building SSE messages inline. +- **Fix**: Extract `pollAndSendEvents()` and `writeEventBatch()` helpers to flatten nesting. + +### 13. `cli/client.go` `decodeSSE` — Feature Envy + +- **File**: `internal/cli/client.go` +- **LOC**: 65 | **Nesting depth**: 5 +- **Problem**: SSE parsing logic inlined in the CLI client. This SSE parsing belongs in a shared utility, not the CLI package. +- **Fix**: Extract to a shared `internal/sse` or `internal/api/core` utility. + +### 14. Dependency Hotspots + +- **`hooks/payloads.go`** (168 relations): Most coupled file in the codebase. Consider splitting payload types by domain (session payloads, agent payloads, lifecycle payloads) so consumers only import what they need. +- **`cli/client.go`** (157 relations): Wide API surface touching all contract types. Could benefit from per-domain client files. +- **`session/manager_lifecycle.go`** (instability 1.0, Ce=5, Ca=0): Pure efferent coupling, maximally unstable. Acceptable for leaf orchestration but worth monitoring. + +## Dead Code — Safe to Remove + +### Confirmed Dead Go Exports + +| Symbol | File | Evidence | +|--------|------|----------| +| `AgeDays()` | `internal/memory/staleness.go:9` | Only called from `store_test.go` | +| `AgeText()` | `internal/memory/staleness.go:19` | Only called from `store_test.go` | +| `FreshnessWarning()` | `internal/memory/staleness.go:31` | Only called from `store_test.go` | +| `CanonicalPayload()` | `internal/transcript/transcript.go:618` | Only called from `transcript_test.go` | +| `WithSessionStopTimeout()` | `internal/memory/consolidation/runtime.go:68` | Defined, zero callers anywhere | + +### Should Unexport + +| Symbol | File | Reason | +|--------|------|--------| +| `MergeProvider()` | `internal/config/provider.go:165` | Only used within its own file | + +### Test-Only Production Exports (Violates Project Guidelines) + +These exported functions exist in production code but are only called from tests. Per project rules: "Mock via interfaces, not test-only methods in production code." + +| Symbol | File | +|--------|------| +| `config.Default()` | `internal/config/config.go:248` | +| `config.WithoutDotEnv()` | `internal/config/config.go:158` | +| `config.WithoutValidation()` | `internal/config/config.go:165` | +| `workspace.WithNow()` | `internal/workspace/options.go:50` | +| `memory.WithGoal()` | `internal/memory/dream.go:164` | + +### Unused shadcn/ui Components (16 files) + +Installed by the shadcn CLI but never imported by any component or route: + +`carousel.tsx`, `chart.tsx`, `calendar.tsx`, `context-menu.tsx`, `drawer.tsx`, `menubar.tsx`, `navigation-menu.tsx`, `pagination.tsx`, `radio-group.tsx`, `slider.tsx`, `alert-dialog.tsx`, `hover-card.tsx`, `input-otp.tsx`, `resizable.tsx`, `aspect-ratio.tsx`, `checkbox.tsx` + +## Development Sequencing + +### Recommended Attack Order + +| Phase | Target | Risk | Impact | +|-------|--------|------|--------| +| 1 | Dead code removal (exports + unused UI) | Zero | Immediate cleanup, smaller surface | +| 2 | `Create`/`Resume` deduplication | Low | Eliminates ~80 lines of highest-severity duplication | +| 3 | `boot()` decomposition into phases | Low-Medium | Reduces highest complexity from 54 to ~10 per phase | +| 4 | `handleInbound` registry pattern | Low | Mechanical transform, complexity 28 to ~5 | +| 5 | `HookDispatcher` interface shrink | Medium | Cascading ~800-line reduction in `manager_hooks.go` | +| 6 | God file splits (skill.go, dispatch.go, registry.go, server.go) | Low | Better navigation, lower per-file blast radius | +| 7 | Struct consolidation (`WorkspaceRef` value object) | Medium | Cross-cutting change across 4+ packages | + +### Phase Dependencies + +``` +Phase 1 (dead code) -- independent, do first +Phase 2 (Create/Resume) -- independent +Phase 3 (boot) -- independent +Phase 4 (handleInbound) -- independent +Phase 5 (HookDispatcher) -- should follow Phase 6 (god file splits) for hooks +Phase 6 (god file splits) -- independent per file +Phase 7 (WorkspaceRef) -- should be last, touches multiple packages +``` + +Phases 1-4 are independent and can be parallelized. Phase 5 benefits from Phase 6 completing on hooks first. Phase 7 is cross-cutting and should be last. + +### Verification Gates + +Every phase must pass before closing: + +- `make verify` (fmt, lint, test, build) — zero warnings, zero errors +- `make test` with `-race` flag +- No new golangci-lint issues introduced +- 80% coverage minimum per modified package + +## Technical Considerations + +### Key Decisions + +- **Incremental phases over big-bang refactor**: Each phase is independently shippable and verifiable. No phase requires another to be useful. +- **File splits over package splits**: God files are split within their current package (new files, same package) rather than creating sub-packages. This avoids import churn while improving navigability. +- **Registry pattern for dispatch**: `handleInbound` uses a map-based registry rather than generated code or reflection. Keeps the codebase simple and grep-friendly. +- **Sub-interfaces over generics for HookDispatcher**: Grouping methods into domain sub-interfaces is lower risk than introducing a generic dispatch pattern and doesn't require changes to the Go type system usage. +- **Value object for WorkspaceRef**: Embedding a struct is the simplest way to reduce primitive repetition without changing serialization behavior (JSON field names remain stable). + +### Known Risks + +- **HookDispatcher shrink (Phase 5)**: This is the highest-risk change because it touches the session-hooks integration boundary. The 21-method interface is consumed by `manager_hooks.go` and any test doubles. All implementors must be updated atomically. + - Mitigation: Do the god file split on `hooks/` first (Phase 6) to reduce the blast radius, then tackle the interface. +- **WorkspaceRef embedding (Phase 7)**: Cross-cutting struct change across `contract`, `acp/types`, `store/types`, and `hooks/types`. JSON serialization must remain backward-compatible for the web UI. + - Mitigation: Verify JSON output parity with table-driven tests before and after. +- **Test-only export removal**: Removing `config.Default()`, `config.WithoutDotEnv()`, etc. requires finding alternative test setup patterns (e.g., constructing config structs directly, using interfaces). + - Mitigation: Address per-package, verify test coverage doesn't drop. + +## Architecture Decision Records + +- [ADR-001: Incremental Phase-Based Refactoring Over Big-Bang](adrs/adr-001.md) — Each refactoring target is an independent, shippable phase with its own verification gate. +- [ADR-002: File Splits Over Package Splits for God Files](adrs/adr-002.md) — God files are decomposed within their current package to avoid import churn. +- [ADR-003: Sub-Interfaces Over Generics for HookDispatcher](adrs/adr-003.md) — Domain-grouped sub-interfaces reduce the 21-method interface without introducing generics complexity. +- [ADR-004: Map-Based Handler Registry for ACP Dispatch](adrs/adr-004.md) — Replaces the switch statement with a typed handler map for maintainability. + +## Implementation Status (2026-04-10) + +The refactor plan described above has been implemented and re-verified against the live codebase. + +- **Phase 1 complete**: Removed dead/test-only exports, collapsed test-only option surfaces back to unexported helpers, deleted 16 unused shadcn components, and removed their unused frontend dependencies. +- **Phase 2 complete**: Extracted a shared session-start pipeline so `Create` and `Resume` both delegate through the same startup path. +- **Phase 3 complete**: Decomposed daemon boot into focused boot phases, split ACP client startup into subprocess/init/session stages, and replaced the ACP inbound switch with a typed handler registry. +- **Phase 4 complete**: Replaced the monolithic session hook dependency with grouped hook sub-interfaces collected in `session.HookSet`, then split hook matcher and async dispatch concerns into dedicated files. +- **Phase 5 complete**: Split `httpapi` routing/middleware/handler wiring, split the skills registry cache/snapshot concerns into dedicated files, extracted shared SSE decoding into `internal/sse`, flattened `StreamSession`, and decomposed the CLI skill implementation into focused files (`skill.go`, `skill_workspace.go`, `skill_marketplace.go`, `skill_output.go`, `skill_commands.go`). +- **Phase 7 adapted**: The original embedded `WorkspaceRef` value-object proposal was attempted and rejected because it introduced keyed-composite-literal breakage and `ID` field collisions across existing structs. The shipped solution centralizes workspace reference construction through `internal/workref.PathRef` and `internal/workref.RootRef`, which reduces repeated workspace reference assembly without destabilizing public struct layouts. + +### Final Verification + +- `make verify` passed on 2026-04-10. +- `make test-integration` passed on 2026-04-10. diff --git a/.compozy/tasks/kb-refac/adrs/adr-001.md b/.compozy/tasks/kb-refac/adrs/adr-001.md new file mode 100644 index 000000000..ef9705070 --- /dev/null +++ b/.compozy/tasks/kb-refac/adrs/adr-001.md @@ -0,0 +1,61 @@ +# ADR-001: Incremental Phase-Based Refactoring Over Big-Bang + +## Status + +Accepted + +## Date + +2026-04-10 + +## Context + +The `kb-refac` scope spans daemon boot, session lifecycle, ACP transport, hooks, HTTP transport, registry layout, and dead-surface cleanup. These changes cross multiple packages with separate verification surfaces (`make verify`, integration tests, and web checks). A single large refactor would hide regressions and make rollback difficult. + +## Decision + +Implement the refactor in independent phases. Each phase must remove its own temporary scaffolding before completion and must pass its verification gate before the next phase starts. + +## Alternatives Considered + +### Alternative 1: Big-bang refactor + +- **Description**: Apply all structural changes in one batch. +- **Pros**: Fewer intermediate commits and fewer temporary seams. +- **Cons**: High regression risk, hard to isolate failures, poor reviewability. +- **Why rejected**: The hotspot set spans too many subsystems to validate safely in one pass. + +### Alternative 2: Opportunistic cleanup only + +- **Description**: Limit the work to dead code removal and small splits. +- **Pros**: Low risk and fast. +- **Cons**: Leaves the highest-cost duplication and orchestration issues intact. +- **Why rejected**: It would not resolve the main maintenance risks identified by the techspec. + +## Consequences + +### Positive + +- Verification stays local to each change set. +- Root-cause refactors remain reviewable and reversible. +- No temporary bridge code survives phase boundaries. + +### Negative + +- Some shared concepts are touched in multiple phases. +- The overall effort is longer than a single batch. + +### Risks + +- Phase boundaries can drift if later work depends on hidden assumptions. +- Mitigation: use explicit helper types and tests at each phase boundary. + +## Implementation Notes + +- Start with safe cleanup and artifact sync. +- Land structural refactors only after the cleanup phase is stable. + +## References + +- `.compozy/tasks/kb-refac/_techspec.md` +- `.codex/plans/2026-04-10-kb-refac-full-sweep.md` diff --git a/.compozy/tasks/kb-refac/adrs/adr-002.md b/.compozy/tasks/kb-refac/adrs/adr-002.md new file mode 100644 index 000000000..bb40bb351 --- /dev/null +++ b/.compozy/tasks/kb-refac/adrs/adr-002.md @@ -0,0 +1,58 @@ +# ADR-002: File Splits Over Package Splits for God Files + +## Status + +Accepted + +## Date + +2026-04-10 + +## Context + +The techspec identifies large files in `cli`, `hooks`, `skills`, and `httpapi`. The packages themselves already align with the current architecture. The main problem is file-level concentration of unrelated responsibilities, not an incorrect package graph. + +## Decision + +Decompose oversized files into multiple files within the same package. Preserve existing package APIs and imports unless a later phase has a concrete reason to introduce a new package. + +## Alternatives Considered + +### Alternative 1: New sub-packages + +- **Description**: Introduce sub-packages for routes, middleware, async dispatch, and registry cache behavior. +- **Pros**: Stronger separation at the import level. +- **Cons**: Adds import churn and raises the coordination cost for mostly file-local concerns. +- **Why rejected**: The current architecture does not need more packages to solve file bloat. + +### Alternative 2: Leave files large and only extract helpers + +- **Description**: Keep single files and add small private helpers. +- **Pros**: Minimal churn. +- **Cons**: Navigation and ownership remain poor, and large files keep growing. +- **Why rejected**: It does not adequately reduce maintenance cost or blast radius. + +## Consequences + +### Positive + +- Improves navigation without changing package boundaries. +- Keeps imports stable during refactor phases. +- Reduces merge conflict pressure in hotspot files. + +### Negative + +- Package-level surface remains broad until later refactors narrow it. + +### Risks + +- Splits can become arbitrary if they are not aligned with behavior boundaries. +- Mitigation: group by runtime responsibility, not by line count alone. + +## Implementation Notes + +- Split by behavior domains such as middleware, route registration, async dispatch, or cache management. + +## References + +- `.compozy/tasks/kb-refac/_techspec.md` diff --git a/.compozy/tasks/kb-refac/adrs/adr-003.md b/.compozy/tasks/kb-refac/adrs/adr-003.md new file mode 100644 index 000000000..aaf9670b4 --- /dev/null +++ b/.compozy/tasks/kb-refac/adrs/adr-003.md @@ -0,0 +1,58 @@ +# ADR-003: Sub-Interfaces Over Generics for HookDispatcher + +## Status + +Accepted + +## Date + +2026-04-10 + +## Context + +`session.HookDispatcher` currently exposes 21 methods and forces every implementor and test double to satisfy the entire hook surface. The pain point is dependency breadth, not lack of type safety. The concrete hook implementation already uses typed payloads and generics internally in `internal/hooks/dispatch.go`. + +## Decision + +Replace the single aggregate session-facing hook dependency with grouped domain subinterfaces collected into a hook-set container. Keep typed payload-specific dispatch methods inside each group rather than introducing a generic session-facing dispatch API. + +## Alternatives Considered + +### Alternative 1: Generic session-facing dispatch + +- **Description**: Collapse hook dispatch to one generic method parameterized by payload type. +- **Pros**: Small surface area. +- **Cons**: Adds type-plumbing complexity at the session boundary and makes call sites less explicit. +- **Why rejected**: The current problem is oversized dependency shape, not insufficient abstraction power. + +### Alternative 2: Keep the aggregate interface + +- **Description**: Leave the 21-method interface in place and only split implementation files. +- **Pros**: No dependency updates. +- **Cons**: Test doubles and consumers still carry full-surface boilerplate. +- **Why rejected**: It preserves the main source of coupling called out by the techspec. + +## Consequences + +### Positive + +- Session code depends only on the hook groups it actually uses. +- Tests can stub narrower hook groups. +- Existing typed payload behavior stays explicit at call sites. + +### Negative + +- Constructor wiring becomes slightly more explicit. + +### Risks + +- Partial hook-set defaults can hide missing wiring if nil handling is inconsistent. +- Mitigation: provide explicit no-op defaults for each hook group and cover them with tests. + +## Implementation Notes + +- Split manager-side hook helpers by domain before changing the injected dependency shape. + +## References + +- `.compozy/tasks/kb-refac/_techspec.md` diff --git a/.compozy/tasks/kb-refac/adrs/adr-004.md b/.compozy/tasks/kb-refac/adrs/adr-004.md new file mode 100644 index 000000000..acc59124a --- /dev/null +++ b/.compozy/tasks/kb-refac/adrs/adr-004.md @@ -0,0 +1,58 @@ +# ADR-004: Map-Based Handler Registry for ACP Dispatch + +## Status + +Accepted + +## Date + +2026-04-10 + +## Context + +`internal/acp/handlers.go` handles multiple inbound ACP methods with repeated decode, call, and error-conversion logic. The repetition increases cyclomatic complexity and makes new methods easy to implement inconsistently. + +## Decision + +Use a typed handler registry plus shared decode/execute helpers for inbound ACP dispatch. Preserve explicit request types and method names while removing repetitive switch-case plumbing. + +## Alternatives Considered + +### Alternative 1: Keep the switch + +- **Description**: Leave the switch in place and only extract local helpers inside cases. +- **Pros**: Familiar control flow. +- **Cons**: The method table remains repetitive and keeps growing linearly with each new method. +- **Why rejected**: It does not sufficiently reduce complexity or duplication. + +### Alternative 2: Generated or reflection-based dispatch + +- **Description**: Derive method dispatch from naming conventions or reflection metadata. +- **Pros**: Less handwritten routing code. +- **Cons**: Harder to grep, harder to debug, and inconsistent with the project’s explicit style. +- **Why rejected**: The codebase favors explicit, simple runtime behavior over metaprogramming. + +## Consequences + +### Positive + +- New ACP methods follow one consistent decode/error path. +- The method table becomes data-driven and easy to audit. +- Complexity drops without losing typed handlers. + +### Negative + +- The registry requires one extra helper layer. + +### Risks + +- Special-case methods such as notification-only handlers can be modeled incorrectly. +- Mitigation: keep notification-only cases explicit in the registry contract and cover them with tests. + +## Implementation Notes + +- Invalid-params and request-error conversion must remain behaviorally identical to current tests. + +## References + +- `.compozy/tasks/kb-refac/_techspec.md` diff --git a/.compozy/tasks/kb-refac/reviews-001/_meta.md b/.compozy/tasks/kb-refac/reviews-001/_meta.md new file mode 100644 index 000000000..07f77bacd --- /dev/null +++ b/.compozy/tasks/kb-refac/reviews-001/_meta.md @@ -0,0 +1,11 @@ +--- +provider: coderabbit +pr: "12" +round: 1 +created_at: 2026-04-10T16:15:31.233803Z +--- + +## Summary +- Total: 18 +- Resolved: 18 +- Unresolved: 0 diff --git a/.compozy/tasks/kb-refac/reviews-001/issue_001.md b/.compozy/tasks/kb-refac/reviews-001/issue_001.md new file mode 100644 index 000000000..8da3a1a10 --- /dev/null +++ b/.compozy/tasks/kb-refac/reviews-001/issue_001.md @@ -0,0 +1,22 @@ +--- +status: resolved +file: internal/acp/handlers.go +line: 105 +severity: nitpick +author: coderabbitai[bot] +provider_ref: review:4090986708,nitpick_hash:8b3e741fa573 +review_hash: 8b3e741fa573 +source_review_id: "4090986708" +source_review_submitted_at: "2026-04-10T16:14:03Z" +--- + +# Issue 001: Consider hoisting the handler map to a struct field for reduced allocations. +## Review Comment + +The `handlers` map is recreated on every `handleInbound` call. While the map size is small (9 entries), moving it to a field initialized once during `AgentProcess` construction would eliminate per-call allocations in a potentially hot path. + +## Triage + +- Decision: `INVALID` +- Reasoning: `handleInbound` recreates a 9-entry dispatch map per call, but the review is a speculative allocation micro-optimization rather than a correctness defect. There is no benchmark or observed hot-path regression in this batch, and hoisting the method map into `AgentProcess` would add lifecycle/state surface without fixing broken behavior. +- Fix approach: No code change. Keep the local dispatch table until there is measured evidence that this path needs optimization. diff --git a/.compozy/tasks/kb-refac/reviews-001/issue_002.md b/.compozy/tasks/kb-refac/reviews-001/issue_002.md new file mode 100644 index 000000000..242b6782e --- /dev/null +++ b/.compozy/tasks/kb-refac/reviews-001/issue_002.md @@ -0,0 +1,66 @@ +--- +status: resolved +file: internal/api/core/session_stream.go +line: 127 +author: coderabbitai[bot] +provider_ref: thread:PRRT_kwDOR5y4QM56LrWJ,comment:PRRC_kwDOR5y4QM62twbP +--- + +# Issue 002: _⚠️ Potential issue_ | _🟡 Minor_ +## Review Comment + +_⚠️ Potential issue_ | _🟡 Minor_ + +**Add justification comments for ignored `WriteSSE` errors.** + +Lines 102, 119, and 126 ignore `WriteSSE` errors with `_`. Per coding guidelines, every error must be handled or have a written justification. While ignoring these may be intentional (client disconnect means no recovery is possible), adding brief comments clarifies the intent. + + +
+📝 Suggested justification comments + +```diff + if pollErr != nil { +- _ = WriteSSE(writer, SSEMessage{ ++ // Best-effort error notification; client may have disconnected. ++ _ = WriteSSE(writer, SSEMessage{ + Name: "error", + Data: contract.ErrorPayload{Error: pollErr.Error()}, + }) + return afterSequence, info, true + } +``` + +Apply similar comments at lines 119 and 126. +
+ +As per coding guidelines: "Never ignore errors with _ — every error must be handled or have a written justification". + +
+🤖 Prompt for AI Agents + +``` +Verify each finding against the current code and only fix it if needed. + +In `@internal/api/core/session_stream.go` around lines 100 - 127, The three +occurrences where the WriteSSE or writeSessionStoppedEvent return values are +discarded (the calls to WriteSSE after pollErr and statusErr, and the `_ = +h.writeSessionStoppedEvent(writer, latest)` call) must include a brief comment +justifying the ignored error per guidelines; update each site (the WriteSSE +calls handling pollErr and statusErr, and the writeSessionStoppedEvent call) +with a one-line comment explaining this is intentional/unrecoverable (e.g., +client disconnect or SSE stream closed) so the error cannot be handled or +retried. +``` + +
+ + + + + +## Triage + +- Decision: `VALID` +- Reasoning: The three ignored SSE write errors are intentional best-effort paths, but the current code drops them without documenting why that is safe. That conflicts with the workspace rule that ignored errors must be justified at the call site. +- Fix approach: Add short comments at each ignored write explaining that the stream may already be closed and there is no meaningful recovery path. diff --git a/.compozy/tasks/kb-refac/reviews-001/issue_003.md b/.compozy/tasks/kb-refac/reviews-001/issue_003.md new file mode 100644 index 000000000..1aa62306d --- /dev/null +++ b/.compozy/tasks/kb-refac/reviews-001/issue_003.md @@ -0,0 +1,49 @@ +--- +status: resolved +file: internal/api/httpapi/middleware.go +line: 83 +author: coderabbitai[bot] +provider_ref: thread:PRRT_kwDOR5y4QM56LrWQ,comment:PRRC_kwDOR5y4QM62twbW +--- + +# Issue 003: _⚠️ Potential issue_ | _🟠 Major_ +## Review Comment + +_⚠️ Potential issue_ | _🟠 Major_ + +**Match the full origin, not just the hostname.** + +`resolveAllowedOrigin()` currently accepts any origin that shares the same host name, even when the port or scheme differs. That turns CORS from “same origin” into “same host”, so a page on another port can call this API as long as it runs on the same host. Please compare against the full origin tuple (`scheme`, `host`, and `port`) or an explicit allowlist instead. + +
+🤖 Prompt for AI Agents + +``` +Verify each finding against the current code and only fix it if needed. + +In `@internal/api/httpapi/middleware.go` around lines 61 - 83, The function +resolveAllowedOrigin currently compares only hostnames; change it to compare +full origin tuples (scheme, hostname, port) instead. Parse requestHost and +boundHost into URLs (like you do for origin), normalize scheme and port (use +default ports for http/https when port is empty), build a canonical origin +string (scheme://hostname:port) for origin, request and bound, and then compare +those full canonical origins in the switch cases instead of +originHost/requestHostname/boundHostname; keep special-case loopback handling +but apply it to the full origin (or allow any loopback port if intended), and +preserve wildcard logic by matching bound origin appropriately (e.g., allow +wildcard host only when bound indicates it). Update helper calls (canonicalHost, +hostOnly, isLoopbackHost, isWildcardHost) or add small helpers to normalize and +compare full origins used by resolveAllowedOrigin. +``` + +
+ + + + + +## Triage + +- Decision: `VALID` +- Reasoning: `resolveAllowedOrigin` currently compares only canonical host names for the normal path, so a non-loopback origin on a different port can be treated as same-origin. That weakens the intended CORS boundary from origin matching to host matching. +- Fix approach: Canonicalize request, bound, and origin values as origin tuples, compare full origins for the normal path, preserve the explicit loopback-development allowance, and add regression tests for port-sensitive behavior. diff --git a/.compozy/tasks/kb-refac/reviews-001/issue_004.md b/.compozy/tasks/kb-refac/reviews-001/issue_004.md new file mode 100644 index 000000000..9f6d1f13a --- /dev/null +++ b/.compozy/tasks/kb-refac/reviews-001/issue_004.md @@ -0,0 +1,95 @@ +--- +status: resolved +file: internal/api/httpapi/routes.go +line: 21 +author: coderabbitai[bot] +provider_ref: thread:PRRT_kwDOR5y4QM56LrWW,comment:PRRC_kwDOR5y4QM62twbc +--- + +# Issue 004: _⚠️ Potential issue_ | _🟡 Minor_ +## Review Comment + +_⚠️ Potential issue_ | _🟡 Minor_ + +**Add nil guard for handlers parameter.** + +If `handlers` is nil, calls like `handlers.CreateWorkspace` will panic. While the `NoRoute` setup has a nil check, the route registration calls don't. + + +
+🛡️ Proposed fix + +```diff + func RegisterRoutes(router gin.IRouter, handlers *Handlers) { ++ if handlers == nil { ++ return ++ } + api := router.Group("/api") + + registerWorkspaceRoutes(api, handlers) +``` +
+ + + +
+📝 Committable suggestion + +> ‼️ **IMPORTANT** +> Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements. + +```suggestion +func RegisterRoutes(router gin.IRouter, handlers *Handlers) { + if handlers == nil { + return + } + api := router.Group("/api") + + registerWorkspaceRoutes(api, handlers) + registerSessionRoutes(api, handlers) + registerAgentRoutes(api, handlers) + registerObserveRoutes(api, handlers) + registerHookRoutes(api, handlers) + registerSkillRoutes(api, handlers) + registerMemoryRoutes(api, handlers) + registerDaemonRoutes(api, handlers) + + if engine, ok := router.(*gin.Engine); ok && handlers != nil { + engine.NoRoute(handlers.serveStaticRoute) + } +} +``` + +
+ + + +
+🤖 Prompt for AI Agents + +``` +Verify each finding against the current code and only fix it if needed. + +In `@internal/api/httpapi/routes.go` around lines 6 - 21, The RegisterRoutes +function calls multiple route registration helpers (registerWorkspaceRoutes, +registerSessionRoutes, registerAgentRoutes, registerObserveRoutes, +registerHookRoutes, registerSkillRoutes, registerMemoryRoutes, +registerDaemonRoutes) using the handlers pointer without guarding for nil, which +can cause panics when handlers is nil; update RegisterRoutes to check that +handlers != nil before invoking those register* functions (either return early +or skip registration when handlers is nil) so calls like +handlers.CreateWorkspace won't be dereferenced, while preserving the existing +NoRoute handler setup that already checks for nil. +``` + +
+ + + + + +## Triage + +- Decision: `VALID` +- Reasoning: `RegisterRoutes` registers handler methods even when `handlers` is nil. Method values on a nil receiver do not panic immediately, but they still register broken route handlers that will fail when invoked. The function already treats `nil` specially for `NoRoute`, so guarding the route registration path is consistent. +- Fix approach: Return early when `handlers` is nil and add a regression test that confirms no API routes are registered in that configuration. diff --git a/.compozy/tasks/kb-refac/reviews-001/issue_005.md b/.compozy/tasks/kb-refac/reviews-001/issue_005.md new file mode 100644 index 000000000..d15bbeae4 --- /dev/null +++ b/.compozy/tasks/kb-refac/reviews-001/issue_005.md @@ -0,0 +1,43 @@ +--- +status: resolved +file: internal/cli/skill_commands.go +line: 54 +author: coderabbitai[bot] +provider_ref: thread:PRRT_kwDOR5y4QM56LrWv,comment:PRRC_kwDOR5y4QM62twb8 +--- + +# Issue 005: _⚠️ Potential issue_ | _🟡 Minor_ +## Review Comment + +_⚠️ Potential issue_ | _🟡 Minor_ + +**`--source` help text is missing supported values.** + +`normalizeSkillSourceFilter` also accepts `marketplace` (plus the `agents` aliases), but the flag description only advertises bundled/user/additional/workspace. The CLI help should match the parser. + +
+🤖 Prompt for AI Agents + +``` +Verify each finding against the current code and only fix it if needed. + +In `@internal/cli/skill_commands.go` at line 54, The --source flag help currently +lists only bundled/user/additional/workspace but normalizeSkillSourceFilter also +accepts marketplace and the agents aliases; update the flag registration (where +cmd.Flags().StringVar(&sourceFilter, "source", ... ) is called) to include all +supported values (e.g., bundled, user, additional, workspace, marketplace, +agents) in the help string so the CLI help matches the +normalizeSkillSourceFilter parser and its accepted aliases. +``` + +
+ + + + + +## Triage + +- Decision: `VALID` +- Reasoning: The `--source` help text is incomplete. The parser accepts `marketplace` and the `agents` aliases, but the command help omits them, which makes the CLI contract misleading. +- Fix approach: Update the flag help string to advertise the full supported filter vocabulary. diff --git a/.compozy/tasks/kb-refac/reviews-001/issue_006.md b/.compozy/tasks/kb-refac/reviews-001/issue_006.md new file mode 100644 index 000000000..2fbd4da0b --- /dev/null +++ b/.compozy/tasks/kb-refac/reviews-001/issue_006.md @@ -0,0 +1,92 @@ +--- +status: resolved +file: internal/cli/skill_marketplace.go +line: 94 +author: coderabbitai[bot] +provider_ref: thread:PRRT_kwDOR5y4QM56LrWy,comment:PRRC_kwDOR5y4QM62twcA +--- + +# Issue 006: _⚠️ Potential issue_ | _🟡 Minor_ +## Review Comment + +_⚠️ Potential issue_ | _🟡 Minor_ + +
+🧩 Analysis chain + +🏁 Script executed: + +```shell +cat -n internal/cli/skill_marketplace.go | sed -n '80,100p' +``` + +Repository: compozy/agh + +Length of output: 863 + +--- + +🏁 Script executed: + +```shell +cat -n internal/cli/skill_marketplace.go | sed -n '370,380p' +``` + +Repository: compozy/agh + +Length of output: 338 + +--- + +🏁 Script executed: + +```shell +cat -n internal/cli/skill_marketplace.go | sed -n '408,420p' +``` + +Repository: compozy/agh + +Length of output: 689 + +--- + + + +
+ +**Don't discard cleanup errors with `_`.** + +These three locations silently drop error returns from cleanup operations: +- Lines 85, 376: `archive.Data.Close()` and `gzipReader.Close()` in defer blocks +- Line 413: `file.Close()` in error path (while line 416 properly handles `file.Close()` in success path) + +Per coding guidelines, every error must be handled or have a written justification. Suppressing these errors can hide resource leaks or incomplete operations in production. + +
+🤖 Prompt for AI Agents + +``` +Verify each finding against the current code and only fix it if needed. + +In `@internal/cli/skill_marketplace.go` around lines 84 - 94, The defer and +error-path currently discard cleanup errors for archive.Data.Close(), +gzipReader.Close(), and file.Close(); change each ignore (“_ = ...Close()”) to +capture the error and handle it (e.g., if closeErr := archive.Data.Close(); +closeErr != nil { return or wrap/log the error } or log it with context) so +cleanup failures are not silently dropped; update the defer for tempRoot removal +similarly if needed, and ensure the error-path after creating/writing the file +returns or logs the file.Close() error instead of discarding it, referencing +archive.Data.Close, gzipReader.Close, and file.Close in skill_marketplace.go. +``` + +
+ + + + + +## Triage + +- Decision: `VALID` +- Reasoning: `archive.Data.Close()`, `gzipReader.Close()`, and the error-path `file.Close()` are currently discarded. Cleanup failures on installation I/O should not disappear silently because they can hide truncated downloads or incomplete writes. +- Fix approach: Convert the install/extract functions to report close failures with contextual errors, using joined errors where the primary operation already failed. diff --git a/.compozy/tasks/kb-refac/reviews-001/issue_007.md b/.compozy/tasks/kb-refac/reviews-001/issue_007.md new file mode 100644 index 000000000..4340b89b4 --- /dev/null +++ b/.compozy/tasks/kb-refac/reviews-001/issue_007.md @@ -0,0 +1,53 @@ +--- +status: resolved +file: internal/cli/skill_marketplace.go +line: 145 +author: coderabbitai[bot] +provider_ref: thread:PRRT_kwDOR5y4QM56LrW7,comment:PRRC_kwDOR5y4QM62twcP +--- + +# Issue 007: _⚠️ Potential issue_ | _🟠 Major_ +## Review Comment + +_⚠️ Potential issue_ | _🟠 Major_ + +**Use a stable install path during updates.** + +`installMarketplaceSkill` always derives `targetDir` from the archive's current `Meta.Name`, and `updateMarketplaceSkill` reuses it for replacements. If the registry package renames the skill, the update will install into a new directory and leave the old install behind instead of replacing it. Updates should either reuse `installed.Dir` or fail when the package name changes. + + + + +Also applies to: 234-245 + +
+🤖 Prompt for AI Agents + +``` +Verify each finding against the current code and only fix it if needed. + +In `@internal/cli/skill_marketplace.go` around lines 128 - 145, +installMarketplaceSkill (and updateMarketplaceSkill) compute targetDir from +parsedSkill.Meta.Name which allows a package rename to create a new directory +instead of replacing the existing install; change the logic to prefer the +existing installation directory when updating: in updateMarketplaceSkill, when +an installed record (installed.Dir or installed.Path) exists, reuse that path as +targetDir (or fail if the new package explicitly intends a rename), and only +fall back to deriving a path from parsedSkill.Meta.Name when there is no +existing install; update the call sites around +moveInstalledSkillDir(parsedSkill.Dir, targetDir, replaceExisting) accordingly +so replacements operate on the stable installed.Dir rather than the archive +name. +``` + +
+ + + + + +## Triage + +- Decision: `VALID` +- Reasoning: Updates currently derive the replacement directory from the newly downloaded package metadata. If the marketplace package name changes, an update can install into a new directory and leave the old installation behind instead of replacing it. +- Fix approach: Reuse the existing installed directory for updates, validating that the target remains inside the user skills root, and keep name-derived paths only for fresh installs. diff --git a/.compozy/tasks/kb-refac/reviews-001/issue_008.md b/.compozy/tasks/kb-refac/reviews-001/issue_008.md new file mode 100644 index 000000000..a39048a81 --- /dev/null +++ b/.compozy/tasks/kb-refac/reviews-001/issue_008.md @@ -0,0 +1,227 @@ +--- +status: resolved +file: internal/cli/skill_marketplace.go +line: 570 +author: coderabbitai[bot] +provider_ref: thread:PRRT_kwDOR5y4QM56LrXB,comment:PRRC_kwDOR5y4QM62twcW +--- + +# Issue 008: _⚠️ Potential issue_ | _🟠 Major_ +## Review Comment + +_⚠️ Potential issue_ | _🟠 Major_ + +
+🧩 Analysis chain + +🏁 Script executed: + +```shell +cat -n internal/cli/skill_marketplace.go | sed -n '540,575p' +``` + +Repository: compozy/agh + +Length of output: 1229 + +--- + +🏁 Script executed: + +```shell +# Find related test files +fd -t f "test" internal/cli/ -o fd -t f "*_test.go" internal/cli/ +``` + +Repository: compozy/agh + +Length of output: 178 + +--- + +🏁 Script executed: + +```shell +# Search for tests or usages of versionIsNewer +rg "versionIsNewer|normalizeVersion|parseVersionParts" internal/cli/ -A 2 -B 2 +``` + +Repository: compozy/agh + +Length of output: 2581 + +--- + +🏁 Script executed: + +```shell +# Get the complete implementation of helper functions +sed -n '573,650p' internal/cli/skill_marketplace.go +``` + +Repository: compozy/agh + +Length of output: 777 + +--- + +🏁 Script executed: + +```shell +# Get parseVersionParts implementation +sed -n '600,640p' internal/cli/skill_marketplace.go +``` + +Repository: compozy/agh + +Length of output: 179 + +--- + +🏁 Script executed: + +```shell +# Let's trace through the logic with a test case +python3 << 'EOF' +# Simulate the parseVersionParts logic +def parse_version_parts(version): + segments = version.split(".") + if len(segments) == 0: + return None, False + + parts = [] + for segment in segments: + if segment == "": + return None, False + try: + value = int(segment) + parts.append(value) + except ValueError: + return None, False + return parts, True + +# Test with pre-release version +test_cases = [ + ("1.0.0", "1.0.0"), + ("1.0.0", "1.0.0-rc1"), + ("1.0.0-rc1", "1.0.0"), +] + +for current, latest in test_cases: + curr_parts, curr_numeric = parse_version_parts(current) + latest_parts, latest_numeric = parse_version_parts(latest) + + print(f"\nComparing '{current}' vs '{latest}':") + print(f" current: parts={curr_parts}, numeric={curr_numeric}") + print(f" latest: parts={latest_parts}, numeric={latest_numeric}") + + if curr_numeric and latest_numeric: + print(f" → Uses numeric comparison") + else: + print(f" → Falls back to string comparison") + string_result = latest > current + print(f" '{latest}' > '{current}' = {string_result}") + + # In semver, 1.0.0 should be > 1.0.0-rc1 + if current == "1.0.0-rc1" and latest == "1.0.0": + print(f" Semantic expectation: latest (1.0.0) > current (1.0.0-rc1) = True") + print(f" Actual result matches expectation: {string_result == True}") +EOF +``` + +Repository: compozy/agh + +Length of output: 675 + +--- + +🏁 Script executed: + +```shell +# Check the existing tests more thoroughly +sed -n '/func TestCriticalWarnings/,/^}/p' internal/cli/skill_test.go | head -50 +``` + +Repository: compozy/agh + +Length of output: 37 + +--- + +🏁 Script executed: + +```shell +# Look for any version comparison tests +rg -B5 -A5 "versionIsNewer" internal/cli/skill_test.go +``` + +Repository: compozy/agh + +Length of output: 720 + +--- + +🏁 Script executed: + +```shell +# Check where versionIsNewer is used and if there's any context +rg -B10 -A5 "versionIsNewer" internal/cli/skill_marketplace.go | grep -A10 -B10 "versionIsNewer(currentVersion" +``` + +Repository: compozy/agh + +Length of output: 695 + +--- + +🏁 Script executed: + +```shell +# Check if there are any handling of pre-release versions elsewhere +rg "rc|alpha|beta|pre|release" internal/cli/skill_marketplace.go +``` + +Repository: compozy/agh + +Length of output: 1750 + +--- + + + +
+ +**The string comparison fallback is semantically incorrect for pre-release versions.** + +When either version contains a non-numeric suffix (e.g., pre-release tags like `-rc1`), the comparison falls back to lexical string ordering. This produces wrong results for semantic versioning—specifically, `"1.0.0" > "1.0.0-rc1"` evaluates to `false` when semantically a release should be newer than its pre-release. This prevents users with pre-release versions from upgrading to released versions. + +
+🤖 Prompt for AI Agents + +``` +Verify each finding against the current code and only fix it if needed. + +In `@internal/cli/skill_marketplace.go` around lines 544 - 570, The fallback +string comparison in versionIsNewer is incorrect for pre-release semantics; +update versionIsNewer (and helper parseVersionParts/normalizeVersion if needed) +to detect and handle pre-release suffixes instead of blindly comparing +normalized strings: after comparing numeric parts, if one version has a +pre-release tag and the other does not, treat the one without the tag as newer; +if both have pre-release tags, compare those tags using semver rules (split on +dots/hyphens and compare identifiers numerically when numeric, lexically +otherwise). Ensure versionPartAt/parseVersionParts expose or return the +pre-release portion so versionIsNewer can apply this logic rather than using +normalizedLatest > normalizedCurrent. +``` + +
+ + + + + +## Triage + +- Decision: `VALID` +- Reasoning: `versionIsNewer` falls back to lexical string comparison when parsing fails, which is incorrect for semantic-version prerelease rules. That makes released versions compare incorrectly against prerelease builds such as `1.0.0-rc1`. +- Fix approach: Implement semver-aware prerelease comparison, keep the simple numeric comparison for plain dotted versions, and extend the CLI tests with prerelease coverage. diff --git a/.compozy/tasks/kb-refac/reviews-001/issue_009.md b/.compozy/tasks/kb-refac/reviews-001/issue_009.md new file mode 100644 index 000000000..2fe5138b8 --- /dev/null +++ b/.compozy/tasks/kb-refac/reviews-001/issue_009.md @@ -0,0 +1,282 @@ +--- +status: resolved +file: internal/cli/skill_workspace.go +line: 63 +author: coderabbitai[bot] +provider_ref: thread:PRRT_kwDOR5y4QM56LrXR,comment:PRRC_kwDOR5y4QM62twco +--- + +# Issue 009: _⚠️ Potential issue_ | _🟡 Minor_ +## Review Comment + +_⚠️ Potential issue_ | _🟡 Minor_ + +
+🧩 Analysis chain + +🏁 Script executed: + +```shell +cd /tmp && find . -name "skill_workspace.go" -type f 2>/dev/null | head -5 +``` + +Repository: compozy/agh + +Length of output: 37 + +--- + +🏁 Script executed: + +```shell +git ls-files | grep -E "skill_workspace\.go" +``` + +Repository: compozy/agh + +Length of output: 86 + +--- + +🏁 Script executed: + +```shell +find . -type f -name "*.go" | grep -i skill | head -20 +``` + +Repository: compozy/agh + +Length of output: 773 + +--- + +🏁 Script executed: + +```shell +git ls-files "**/*.go" | grep skill +``` + +Repository: compozy/agh + +Length of output: 1177 + +--- + +🏁 Script executed: + +```shell +find . -type f -path "*/internal/cli/*" -name "*.go" 2>/dev/null | head -10 +``` + +Repository: compozy/agh + +Length of output: 367 + +--- + +🏁 Script executed: + +```shell +ls -la internal/cli/ 2>/dev/null | head -20 +``` + +Repository: compozy/agh + +Length of output: 1292 + +--- + +🏁 Script executed: + +```shell +git ls-files internal/cli/ | head -20 +``` + +Repository: compozy/agh + +Length of output: 609 + +--- + +🏁 Script executed: + +```shell +wc -l internal/cli/skill_workspace.go +``` + +Repository: compozy/agh + +Length of output: 90 + +--- + +🏁 Script executed: + +```shell +sed -n '1,100p' internal/cli/skill_workspace.go | cat -n +``` + +Repository: compozy/agh + +Length of output: 3652 + +--- + +🏁 Script executed: + +```shell +cat -n internal/cli/skill_workspace.go +``` + +Repository: compozy/agh + +Length of output: 19367 + +--- + +🏁 Script executed: + +```shell +# Check the line counts more carefully and verify file integrity +head -30 internal/cli/skill_workspace.go +``` + +Repository: compozy/agh + +Length of output: 764 + +--- + +🏁 Script executed: + +```shell +# Get the actual function starting from the function definition +sed -n '31,75p' internal/cli/skill_workspace.go | cat -n +``` + +Repository: compozy/agh + +Length of output: 1574 + +--- + +🏁 Script executed: + +```shell +# Check what other functions are in this file and look for potential issues +grep -n "^func " internal/cli/skill_workspace.go +``` + +Repository: compozy/agh + +Length of output: 1738 + +--- + +🏁 Script executed: + +```shell +# The file seems corrupted. Let's check the git version to see clean content +git show HEAD:internal/cli/skill_workspace.go | head -100 +``` + +Repository: compozy/agh + +Length of output: 2952 + +--- + +🏁 Script executed: + +```shell +# Continue reading to see all functions and check for other issues +git show HEAD:internal/cli/skill_workspace.go | wc -l +``` + +Repository: compozy/agh + +Length of output: 58 + +--- + +🏁 Script executed: + +```shell +# Get the complete file from git +git show HEAD:internal/cli/skill_workspace.go +``` + +Repository: compozy/agh + +Length of output: 15433 + +--- + +🏁 Script executed: + +```shell +# Check if any of these errors are wrapped with fmt.Errorf +git show HEAD:internal/cli/skill_workspace.go | grep -A2 "if err :=" | grep "fmt.Errorf" +``` + +Repository: compozy/agh + +Length of output: 37 + +--- + +🏁 Script executed: + +```shell +# Confirm all error returns in the function are plain returns +git show HEAD:internal/cli/skill_workspace.go | sed -n '22,63p' | grep -E "return.*err" +``` + +Repository: compozy/agh + +Length of output: 270 + +--- + + + +
+ +**Wrap error returns with context for better diagnostics.** + +All error paths in this function return raw errors without wrapping. As per coding guidelines, use `fmt.Errorf("context: %w", err)` to add phase-specific context to the following error returns: +- Line 25: `loadRuntimeContext` +- Line 30: `resolveCLIWorkspaceRoot` +- Line 35: `aghconfig.ResolveUserAgentsSkillsDir` +- Line 45: `registry.LoadAll` +- Line 50: `resolveSkillWorkspace` +- Line 55: `registry.ForWorkspace` + +This will make failures much easier to diagnose by indicating which phase of initialization failed. + +
+🤖 Prompt for AI Agents + +``` +Verify each finding against the current code and only fix it if needed. + +In `@internal/cli/skill_workspace.go` around lines 22 - 63, The function returns +raw errors from each initialization step; wrap each returned error with context +using fmt.Errorf("...: %w", err) so callers know which phase failed — +specifically wrap errors from loadRuntimeContext, resolveCLIWorkspaceRoot, +aghconfig.ResolveUserAgentsSkillsDir, registry.LoadAll, resolveSkillWorkspace, +and registry.ForWorkspace (update the error returns in loadSkillCommandContext +to include phase-specific messages referencing those functions/steps). +``` + +
+ + + + + +## Triage + +- Decision: `VALID` +- Reasoning: `loadSkillCommandContext` returns several raw errors without adding local context, which makes initialization failures harder to diagnose from CLI output. +- Fix approach: Wrap each propagated error with the operation that failed so callers can distinguish runtime, workspace, registry, and resolver failures. diff --git a/.compozy/tasks/kb-refac/reviews-001/issue_010.md b/.compozy/tasks/kb-refac/reviews-001/issue_010.md new file mode 100644 index 000000000..4daa5a29e --- /dev/null +++ b/.compozy/tasks/kb-refac/reviews-001/issue_010.md @@ -0,0 +1,93 @@ +--- +status: resolved +file: internal/cli/skill_workspace.go +line: 438 +author: coderabbitai[bot] +provider_ref: thread:PRRT_kwDOR5y4QM56LrXa,comment:PRRC_kwDOR5y4QM62twcy +--- + +# Issue 010: _⚠️ Potential issue_ | _🟠 Major_ +## Review Comment + +_⚠️ Potential issue_ | _🟠 Major_ + +**Escape the skill body before embedding it in XML.** + +`content` is written verbatim between XML tags. Any `<`, `&`, or embedded HTML in `SKILL.md` will produce invalid XML and break `skill view` consumers. + + + +
+Proposed fix + +```diff + builder.WriteString(`">`) + builder.WriteString("\n") +- builder.WriteString(content) ++ builder.WriteString(skillXMLTextReplacer.Replace(content)) + if !strings.HasSuffix(content, "\n") { + builder.WriteString("\n") + } +``` +
+ + + +
+📝 Committable suggestion + +> ‼️ **IMPORTANT** +> Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements. + +```suggestion + var builder strings.Builder + builder.WriteString(``) + builder.WriteString("\n") + builder.WriteString(skillXMLTextReplacer.Replace(content)) + if !strings.HasSuffix(content, "\n") { + builder.WriteString("\n") + } + builder.WriteString("\n\n") + for _, resource := range resources { + builder.WriteString(" ") + builder.WriteString(skillXMLTextReplacer.Replace(resource)) + builder.WriteString("\n") + } + builder.WriteString("\n") + builder.WriteString("") + return builder.String(), nil +``` + +
+ + + +
+🤖 Prompt for AI Agents + +``` +Verify each finding against the current code and only fix it if needed. + +In `@internal/cli/skill_workspace.go` around lines 421 - 438, The skill body +(variable content) is embedded verbatim into XML causing unescaped characters +like '<' or '&' to break consumers; update the XML assembly to escape the skill +body before writing it (use the existing skillXMLTextReplacer or an equivalent +XML-escaping function) where content is appended to the strings.Builder (the +same way resources use skillXMLTextReplacer.Replace(resource)), ensuring you +replace/escape content prior to checking/adding trailing newlines and before +builder.WriteString(content) so the produced XML is always well-formed. +``` + +
+ + + + + +## Triage + +- Decision: `VALID` +- Reasoning: `renderSkillXML` escapes the attribute and resource file nodes, but it writes the skill body text verbatim inside XML. Any `<`, `&`, or similar markup in `SKILL.md` content produces malformed XML for `skill view`. +- Fix approach: Escape the body content with the existing XML text replacer and add a regression test covering embedded XML-sensitive characters. diff --git a/.compozy/tasks/kb-refac/reviews-001/issue_011.md b/.compozy/tasks/kb-refac/reviews-001/issue_011.md new file mode 100644 index 000000000..0d0731afb --- /dev/null +++ b/.compozy/tasks/kb-refac/reviews-001/issue_011.md @@ -0,0 +1,46 @@ +--- +status: resolved +file: internal/hooks/dispatch_async.go +line: 26 +author: coderabbitai[bot] +provider_ref: thread:PRRT_kwDOR5y4QM56LrXs,comment:PRRC_kwDOR5y4QM62twdJ +--- + +# Issue 011: _⚠️ Potential issue_ | _🟠 Major_ +## Review Comment + +_⚠️ Potential issue_ | _🟠 Major_ + +**Async execution is detached from parent cancellation** + +At Lines 24-26, `baseCtx` is rooted in `poolCtx`, so cancellation of `parent` does not stop hook execution unless a timeout is set. This can leave orphaned async work after request/session cancellation. As per coding guidelines, "Every goroutine must have explicit ownership and shutdown via context.Context cancellation". + +
+🤖 Prompt for AI Agents + +``` +Verify each finding against the current code and only fix it if needed. + +In `@internal/hooks/dispatch_async.go` around lines 24 - 26, The async hook's +baseCtx is currently rooted in poolCtx so it ignores parent cancellation; change +the base context to be derived from the parent's context (e.g., use +parent.Context() or the parent context variable) when building baseCtx before +calling h.enterDispatch(asyncHook.Event) so that cancellation of parent cancels +the async hook goroutine; update the lines creating baseCtx and the subsequent +WithValue call (which reference dispatchDepthContextKey{} and +dispatchChainContextKey{} and currentDispatchChain(parent)) to use the parent's +context as the root instead of poolCtx so enterDispatch and any spawned +goroutines inherit parent cancellation. +``` + +
+ + + + + +## Triage + +- Decision: `VALID` +- Reasoning: Async hook execution currently roots its derived context in the pool context only, so canceling the parent dispatch does not stop the hook unless a hook timeout is configured. That violates the expectation that request/session cancellation propagates into async work. +- Fix approach: Build the async execution context so it is canceled by both the parent dispatch context and the pool lifecycle, then add regression coverage for parent cancellation. diff --git a/.compozy/tasks/kb-refac/reviews-001/issue_012.md b/.compozy/tasks/kb-refac/reviews-001/issue_012.md new file mode 100644 index 000000000..e9040df33 --- /dev/null +++ b/.compozy/tasks/kb-refac/reviews-001/issue_012.md @@ -0,0 +1,91 @@ +--- +status: resolved +file: internal/hooks/dispatch_async.go +line: 55 +author: coderabbitai[bot] +provider_ref: thread:PRRT_kwDOR5y4QM56LrXm,comment:PRRC_kwDOR5y4QM62twdA +--- + +# Issue 012: _⚠️ Potential issue_ | _🔴 Critical_ +## Review Comment + +_⚠️ Potential issue_ | _🔴 Critical_ + +
+🧩 Analysis chain + +🏁 Script executed: + +```shell +#!/bin/bash +set -euo pipefail + +# 1) Locate pool Submit declarations/signatures. +rg -n -C3 --type=go 'func\s*\([^)]*\)\s*Submit\s*\(' + +# 2) Locate async pool Submit call sites to verify handling. +rg -n -C3 --type=go '\.Submit\s*\(' internal/hooks + +# 3) If submit methods are interface-based, show interface declarations too. +rg -n -C3 --type=go 'type\s+\w+\s+interface\s*{[^}]*Submit\s*\(' +``` + +Repository: compozy/agh + +Length of output: 6177 + +--- + +🏁 Script executed: + +```shell +#!/bin/bash +cat -n internal/hooks/pool.go | head -140 | tail -50 +``` + +Repository: compozy/agh + +Length of output: 1302 + +--- + + + +
+ +**Check and handle `Submit` return value failure path** + +`h.pool.Submit(...)` returns `bool` and can fail (when pool is nil, closed, not started, or queue is full). The current code ignores the return value entirely, causing async hook submissions to fail silently without emitting an outcome event. Per coding guidelines, every error/failure must be handled explicitly. + +When `Submit` returns `false`, the task is never queued and never executes. The pool logs "hook.dispatch.async_dropped" internally, but the caller has no visibility. Emit a hook outcome event (e.g., `HookRunOutcomeSkipped`) when submission fails, matching the pattern used for other failure paths in this function. + +
+🤖 Prompt for AI Agents + +``` +Verify each finding against the current code and only fix it if needed. + +In `@internal/hooks/dispatch_async.go` around lines 21 - 55, The call to +h.pool.Submit(asyncTask{...}) currently ignores its boolean return; if Submit +returns false the asyncTask is dropped silently — update the code after calling +h.pool.Submit to check the returned bool and when it is false call +h.emitHookRun(...) with the same parameters used for other skipped paths (use +asyncPayload, asyncHook.RegisteredHook, HookRunOutcomeSkipped, duration 0, nil +patch, appropriate err value or nil, and parentDepth) and also log/emit the +"hook.dispatch.async_dropped" outcome so callers get visibility; reference +h.pool.Submit, asyncTask, h.emitHookRun, HookRunOutcomeSkipped, asyncPayload, +asyncHook.RegisteredHook and parentDepth to locate where to add this conditional +handling. +``` + +
+ + + + + +## Triage + +- Decision: `VALID` +- Reasoning: `submitAsyncHooks` ignores the `bool` returned by `h.pool.Submit`. When the queue is full or the pool is unavailable, the hook is dropped and the caller receives no hook-run outcome record even though the system defines a `dropped` outcome for this exact case. +- Fix approach: Check the submit result, emit an explicit hook-run record for the drop path, and add a test that forces queue overflow and asserts the recorded outcome. diff --git a/.compozy/tasks/kb-refac/reviews-001/issue_013.md b/.compozy/tasks/kb-refac/reviews-001/issue_013.md new file mode 100644 index 000000000..f43611483 --- /dev/null +++ b/.compozy/tasks/kb-refac/reviews-001/issue_013.md @@ -0,0 +1,85 @@ +--- +status: resolved +file: internal/hooks/matcher.go +line: 191 +author: coderabbitai[bot] +provider_ref: thread:PRRT_kwDOR5y4QM56LrXy,comment:PRRC_kwDOR5y4QM62twdP +--- + +# Issue 013: _⚠️ Potential issue_ | _🟡 Minor_ +## Review Comment + +_⚠️ Potential issue_ | _🟡 Minor_ + +**Handle unknown hook modes explicitly** + +At Line 186, the `switch` has no `default`; an unexpected mode is silently dropped and never dispatched. Please add an explicit fallback (or validation/error path) so this cannot fail quietly. + + + +
+Proposed defensive fix + +```diff + switch hook.Mode { + case HookModeAsync: + asyncHooks = append(asyncHooks, hook) + case HookModeSync: + syncHooks = append(syncHooks, hook) ++ default: ++ // Defensive fallback to avoid silent drops for unexpected mode values. ++ syncHooks = append(syncHooks, hook) + } +``` +
+ + + +
+📝 Committable suggestion + +> ‼️ **IMPORTANT** +> Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements. + +```suggestion + switch hook.Mode { + case HookModeAsync: + asyncHooks = append(asyncHooks, hook) + case HookModeSync: + syncHooks = append(syncHooks, hook) + default: + // Defensive fallback to avoid silent drops for unexpected mode values. + syncHooks = append(syncHooks, hook) + } +``` + +
+ + + +
+🤖 Prompt for AI Agents + +``` +Verify each finding against the current code and only fix it if needed. + +In `@internal/hooks/matcher.go` around lines 171 - 191, selectMatchingHooks +currently drops hooks with unknown hook.Mode silently; add an explicit default +branch in the switch in selectMatchingHooks that handles unexpected modes: log a +clear warning (including hook identity and hook.Mode) and use a safe fallback +(e.g., append the hook to syncHooks) so the hook is not lost; reference the +switch on hook.Mode and constants HookModeAsync and HookModeSync when +implementing the default branch. +``` + +
+ + + + + +## Triage + +- Decision: `INVALID` +- Reasoning: Unknown hook modes are already rejected before they reach `selectMatchingHooks`. `HookMode.Validate`, `RegisteredHook.Validate`, and the normalization path enforce `sync` or `async`, so adding a default branch here would mask an invariant violation rather than fixing a reachable silent-drop bug. +- Fix approach: No code change. Keep the matcher strict and rely on the existing validation boundary to reject invalid modes. diff --git a/.compozy/tasks/kb-refac/reviews-001/issue_014.md b/.compozy/tasks/kb-refac/reviews-001/issue_014.md new file mode 100644 index 000000000..f6322598c --- /dev/null +++ b/.compozy/tasks/kb-refac/reviews-001/issue_014.md @@ -0,0 +1,128 @@ +--- +status: resolved +file: internal/memory/store_test.go +line: 682 +author: coderabbitai[bot] +provider_ref: thread:PRRT_kwDOR5y4QM56LrX6,comment:PRRC_kwDOR5y4QM62twdX +--- + +# Issue 014: _🛠️ Refactor suggestion_ | _🟠 Major_ +## Review Comment + +_🛠️ Refactor suggestion_ | _🟠 Major_ + +**Convert these helper assertions into table-driven subtests.** + +This block still bundles multiple cases into one test, so failures are less isolated and it does not match the repo’s default test shape. Please move these assertions into `t.Run("Should ...")` table cases, and keep `t.Parallel()` on independent subtests. + + + +
+Example refactor + +```diff +- if got := ageDays(today, now); got != 0 { +- t.Fatalf("ageDays(today) = %d, want 0", got) +- } +- if got := ageDays(yesterday, now); got != 1 { +- t.Fatalf("ageDays(yesterday) = %d, want 1", got) +- } +- if got := ageText(today, now); got != "today" { +- t.Fatalf("ageText(today) = %q, want %q", got, "today") +- } +- if got := ageText(yesterday, now); got != "yesterday" { +- t.Fatalf("ageText(yesterday) = %q, want %q", got, "yesterday") +- } +- if got := ageText(threeDaysAgo, now); got != "3 days ago" { +- t.Fatalf("ageText(threeDaysAgo) = %q, want %q", got, "3 days ago") +- } +- if got := freshnessWarning(today, now); got != "" { +- t.Fatalf("freshnessWarning(today) = %q, want empty", got) +- } +- if got := freshnessWarning(yesterday, now); got != "" { +- t.Fatalf("freshnessWarning(yesterday) = %q, want empty", got) +- } +- if got := freshnessWarning(threeDaysAgo, now); !strings.Contains(got, "3 days old") { +- t.Fatalf("freshnessWarning(threeDaysAgo) = %q, want age caveat", got) +- } ++ tests := []struct { ++ name string ++ run func(*testing.T) ++ }{ ++ { ++ name: "Should return zero days for today", ++ run: func(t *testing.T) { ++ t.Parallel() ++ if got := ageDays(today, now); got != 0 { ++ t.Fatalf("ageDays(today) = %d, want 0", got) ++ } ++ }, ++ }, ++ { ++ name: "Should return one day for yesterday", ++ run: func(t *testing.T) { ++ t.Parallel() ++ if got := ageDays(yesterday, now); got != 1 { ++ t.Fatalf("ageDays(yesterday) = %d, want 1", got) ++ } ++ }, ++ }, ++ { ++ name: "Should render relative age text", ++ run: func(t *testing.T) { ++ t.Parallel() ++ if got := ageText(threeDaysAgo, now); got != "3 days ago" { ++ t.Fatalf("ageText(threeDaysAgo) = %q, want %q", got, "3 days ago") ++ } ++ }, ++ }, ++ { ++ name: "Should emit warning only for stale memories", ++ run: func(t *testing.T) { ++ t.Parallel() ++ if got := freshnessWarning(threeDaysAgo, now); !strings.Contains(got, "3 days old") { ++ t.Fatalf("freshnessWarning(threeDaysAgo) = %q, want age caveat", got) ++ } ++ }, ++ }, ++ } ++ ++ for _, tt := range tests { ++ tt := tt ++ t.Run(tt.name, func(t *testing.T) { ++ tt.run(t) ++ }) ++ } +``` +
+ +As per coding guidelines, `**/*_test.go`: `Use table-driven tests with subtests (t.Run) as default in Go tests` and `MUST use t.Run("Should...") pattern for ALL test cases`. + +
+🤖 Prompt for AI Agents + +``` +Verify each finding against the current code and only fix it if needed. + +In `@internal/memory/store_test.go` around lines 660 - 682, Split the block of +assertions into a table-driven set of subtests using t.Run("Should ...") entries +that each test one expectation for ageDays, ageText, and freshnessWarning; +create a testCases slice referencing the inputs (today, yesterday, threeDaysAgo, +now) and expected outputs, loop over it and for each case call t.Run with a +descriptive "Should ..." name, run t.Parallel() inside each subtest, and perform +the single assertion (use equality checks for ageDays/ageText and +strings.Contains for freshnessWarning's "3 days old" expectation) against the +functions ageDays, ageText, and freshnessWarning so failures are isolated. +``` + +
+ + + + + +## Triage + +- Decision: `VALID` +- Reasoning: The `TestStalenessHelpers` assertions are bundled into one block, which weakens failure isolation and diverges from the workspace testing convention for table-driven subtests. This is a test-structure issue rather than a product bug, but it is a legitimate scoped cleanup request. +- Fix approach: Convert the block into `t.Run("Should ...")` table-driven subtests with one assertion per case while preserving the current expectations. diff --git a/.compozy/tasks/kb-refac/reviews-001/issue_015.md b/.compozy/tasks/kb-refac/reviews-001/issue_015.md new file mode 100644 index 000000000..b9bc36ea0 --- /dev/null +++ b/.compozy/tasks/kb-refac/reviews-001/issue_015.md @@ -0,0 +1,24 @@ +--- +status: resolved +file: internal/session/hooks.go +line: 65 +severity: nitpick +author: coderabbitai[bot] +provider_ref: review:4090986708,nitpick_hash:07c62188c14c +review_hash: 07c62188c14c +source_review_id: "4090986708" +source_review_submitted_at: "2026-04-10T16:14:03Z" +--- + +# Issue 015: Consider adding compile-time interface verification. +## Review Comment + +The accessor methods correctly return no-op implementations when fields are nil. Per coding guidelines, consider adding compile-time interface verification for the no-op types to catch signature mismatches early. + +As per coding guidelines: "Use compile-time interface verification: `var _ Interface = (*Type)(nil)`". + +## Triage + +- Decision: `VALID` +- Reasoning: The no-op hook implementations satisfy the grouped interfaces by construction, but there is no compile-time verification to catch signature drift if one interface changes later. +- Fix approach: Add interface assertion lines for the no-op hook implementations so mismatches fail at compile time. diff --git a/.compozy/tasks/kb-refac/reviews-001/issue_016.md b/.compozy/tasks/kb-refac/reviews-001/issue_016.md new file mode 100644 index 000000000..2dc4e6a0f --- /dev/null +++ b/.compozy/tasks/kb-refac/reviews-001/issue_016.md @@ -0,0 +1,193 @@ +--- +status: resolved +file: internal/skills/registry_workspace_cache.go +line: 51 +author: coderabbitai[bot] +provider_ref: thread:PRRT_kwDOR5y4QM56LrX_,comment:PRRC_kwDOR5y4QM62twde +--- + +# Issue 016: _⚠️ Potential issue_ | _🟡 Minor_ +## Review Comment + +_⚠️ Potential issue_ | _🟡 Minor_ + +
+🧩 Analysis chain + +🏁 Script executed: + +```shell +#!/bin/bash +set -euo pipefail + +echo "workspaceCacheKey callsites:" +rg -n --type=go '\bworkspaceCacheKey\s*\(' -C2 + +echo +echo "ResolvedWorkspace constructors/usages:" +rg -n --type=go 'ResolvedWorkspace\s*{' -C4 + +echo +echo "Assignments/accesses for ID/RootDir on ResolvedWorkspace:" +rg -n --type=go '\.(ID|RootDir)\b' -C2 +``` + +Repository: compozy/agh + +Length of output: 1585 + +--- + +🏁 Script executed: + +```shell +# Read the full workspaceCacheKey function +sed -n '113,135p' internal/skills/registry_workspace_cache.go + +# Also check the context around line 48 and 151 +echo "=== Context at line 48 (cache read) ===" +sed -n '43,60p' internal/skills/registry_workspace_cache.go + +echo +echo "=== Context around line 151 (cache write) ===" +sed -n '140,165p' internal/skills/registry.go + +echo +echo "=== ResolvedWorkspace struct definition ===" +rg -n --type=go 'type ResolvedWorkspace struct' -A10 +``` + +Repository: compozy/agh + +Length of output: 2364 + +--- + +🏁 Script executed: + +```shell +# Check where ResolvedWorkspace is created/populated +echo "=== ResolvedWorkspace creation ===" +rg -n --type=go 'ResolvedWorkspace\s*{' -A5 | head -60 + +echo +echo "=== Check if ID/RootDir are set during resolution ===" +rg -n --type=go '\.ID\s*=' -B2 -A2 | grep -A2 -B2 'Resolved' + +echo +echo "=== Check workspace.Workspace struct for ID/RootDir ===" +rg -n --type=go 'type Workspace struct' -A15 +``` + +Repository: compozy/agh + +Length of output: 1289 + +--- + +🏁 Script executed: + +```shell +# Search for ResolvedWorkspace instantiation/return +echo "=== ResolvedWorkspace assignments/returns ===" +rg -n --type=go 'ResolvedWorkspace' | grep -E '(return|:=|=)' | head -30 + +echo +echo "=== Resolver interface/implementations ===" +rg -n --type=go 'type.*Resolver' -A5 | head -40 + +echo +echo "=== workspaceLoadFromResolved function (line 151 caller) ===" +rg -n --type=go 'func.*workspaceLoadFromResolved' -A20 +``` + +Repository: compozy/agh + +Length of output: 7332 + +--- + +🏁 Script executed: + +```shell +# Check buildResolvedWorkspace function +echo "=== buildResolvedWorkspace function ===" +sed -n '236,280p' internal/workspace/resolver.go + +echo +echo "=== Check where workspaceSkillTargetLocked is called ===" +rg -n --type=go 'workspaceSkillTargetLocked' -B3 -A3 + +echo +echo "=== Check if workspace ID/RootDir are validated before use ===" +rg -n --type=go 'if.*ID.*==' -A2 -B2 | head -40 +``` + +Repository: compozy/agh + +Length of output: 5042 + +--- + +🏁 Script executed: + +```shell +# Check full workspaceSkillTargetLocked function signature +sed -n '43,60p' internal/skills/registry_workspace_cache.go + +# Check if ResolvedWorkspace.Skills is used elsewhere in the same file +echo "=== ResolvedWorkspace.Skills usage in registry_workspace_cache.go ===" +rg -n --type=go '\.Skills' internal/skills/registry_workspace_cache.go -B2 -A2 + +# Check workspaceLoad structure +echo "=== workspaceLoad struct ===" +rg -n --type=go 'type workspaceLoad struct' -A8 + +# Check how cache is populated vs accessed +echo "=== Cache population pattern ===" +sed -n '140,180p' internal/skills/registry.go +``` + +Repository: compozy/agh + +Length of output: 2656 + +--- + + + +
+ +**Derive cache key from Skills in read path to match write path.** + +At line 48, `workspaceCacheKey(*resolved, nil)` discards the workspace's Skills. When a workspace has empty `ID` and `RootDir`, the write path (line 151) generates a key using resolved skills paths, but the read path returns an empty key and skips cache lookup. This creates a cache miss for valid entries. + +Extract `resolved.Skills` into paths and pass them to `workspaceCacheKey` at line 48 to ensure read and write paths generate identical cache keys. + +
+🤖 Prompt for AI Agents + +``` +Verify each finding against the current code and only fix it if needed. + +In `@internal/skills/registry_workspace_cache.go` around lines 43 - 51, In +workspaceSkillTargetLocked, the read path calls workspaceCacheKey(*resolved, +nil) and drops resolved.Skills causing cache misses; modify +workspaceSkillTargetLocked to extract the skill paths from resolved.Skills +(e.g., map to []string or the same shape used by the write path) and pass those +paths as the second argument to workspaceCacheKey so the read-key generation +matches the write-path key generation (ensure you use the same transformation of +resolved.Skills that the write path uses). +``` + +
+ + + + + +## Triage + +- Decision: `VALID` +- Reasoning: `workspaceSkillTargetLocked` computes the read-side cache key with `workspaceCacheKey(*resolved, nil)`, which discards resolver-provided skill paths. For workspaces that have neither `ID` nor `RootDir`, that makes valid cache entries unreachable even though the write path keys them by workspace skill paths. +- Fix approach: Derive the same `workspaceSkillPath` slice shape from `resolved.Skills` for the read path and add a regression test for skill-only workspace cache keys. diff --git a/.compozy/tasks/kb-refac/reviews-001/issue_017.md b/.compozy/tasks/kb-refac/reviews-001/issue_017.md new file mode 100644 index 000000000..1e2fdd139 --- /dev/null +++ b/.compozy/tasks/kb-refac/reviews-001/issue_017.md @@ -0,0 +1,90 @@ +--- +status: resolved +file: internal/skills/registry_workspace_cache.go +line: 75 +author: coderabbitai[bot] +provider_ref: thread:PRRT_kwDOR5y4QM56LrYB,comment:PRRC_kwDOR5y4QM62twdh +--- + +# Issue 017: _⚠️ Potential issue_ | _🟠 Major_ +## Review Comment + +_⚠️ Potential issue_ | _🟠 Major_ + +**Wrap propagated errors with local context.** + +At Line 69 and Line 74, errors are returned directly. Please wrap them so failures in workspace loading are diagnosable. + + +
+Suggested fix + +```diff + for _, skillPath := range resolved.Skills { + if err := checkRegistryContext(ctx); err != nil { +- return workspaceLoad{}, err ++ return workspaceLoad{}, fmt.Errorf("skills: check registry context while loading workspace skills: %w", err) + } + + source, include, err := skillSourceFromWorkspacePath(skillPath.Source) + if err != nil { +- return workspaceLoad{}, err ++ return workspaceLoad{}, fmt.Errorf("skills: resolve workspace skill source %q: %w", skillPath.Source, err) + } +``` +
+As per coding guidelines, `Use explicit error returns with wrapped context: fmt.Errorf("context: %w", err)`. + + + +
+📝 Committable suggestion + +> ‼️ **IMPORTANT** +> Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements. + +```suggestion + if err := checkRegistryContext(ctx); err != nil { + return workspaceLoad{}, fmt.Errorf("skills: check registry context while loading workspace skills: %w", err) + } + + source, include, err := skillSourceFromWorkspacePath(skillPath.Source) + if err != nil { + return workspaceLoad{}, fmt.Errorf("skills: resolve workspace skill source %q: %w", skillPath.Source, err) + } +``` + +
+ + + +
+🤖 Prompt for AI Agents + +``` +Verify each finding against the current code and only fix it if needed. + +In `@internal/skills/registry_workspace_cache.go` around lines 68 - 75, The +returns from checkRegistryContext(ctx) and +skillSourceFromWorkspacePath(skillPath.Source) should be wrapped with local +context before propagating so failures in workspace loading are diagnosable; +replace direct returns like `return workspaceLoad{}, err` with wrapped errors +using fmt.Errorf to add context (e.g., "checking registry context failed: %w") +when handling the error from checkRegistryContext and similarly add context for +skillSourceFromWorkspacePath (e.g., "determining skill source from workspace +path failed: %w"), keeping references to the existing symbols +checkRegistryContext, skillSourceFromWorkspacePath, and the workspaceLoad return +value. +``` + +
+ + + + + +## Triage + +- Decision: `VALID` +- Reasoning: `workspaceLoadFromResolved` returns raw errors from `checkRegistryContext` and `skillSourceFromWorkspacePath`, which obscures whether the failure was cancellation or workspace-skill source resolution. +- Fix approach: Wrap both errors with local context so workspace loading failures are diagnosable. diff --git a/.compozy/tasks/kb-refac/reviews-001/issue_018.md b/.compozy/tasks/kb-refac/reviews-001/issue_018.md new file mode 100644 index 000000000..8df40e7d6 --- /dev/null +++ b/.compozy/tasks/kb-refac/reviews-001/issue_018.md @@ -0,0 +1,146 @@ +--- +status: resolved +file: internal/sse/decode.go +line: 52 +author: coderabbitai[bot] +provider_ref: thread:PRRT_kwDOR5y4QM56LrYI,comment:PRRC_kwDOR5y4QM62twdp +--- + +# Issue 018: _⚠️ Potential issue_ | _🟠 Major_ +## Review Comment + +_⚠️ Potential issue_ | _🟠 Major_ + +
+🧩 Analysis chain + +🏁 Script executed: + +```shell +#!/bin/bash +rg -n -C2 '\bsse\.Decode\s*\(' --type go +rg -n -C2 'func Decode\(ctx context\.Context, body io\.Reader, handler Handler\) error' --type go +``` + +Repository: compozy/agh + +Length of output: 704 + +--- + +🏁 Script executed: + +```shell +#!/bin/bash +# Get the full Decode function implementation +rg -A 100 'func Decode\(ctx context\.Context, body io\.Reader, handler Handler\) error' internal/sse/decode.go + +# Check if other functions in the package validate nil parameters +rg -B 2 -A 5 'if.*== nil' internal/sse/ --type go + +# Check the imports and error handling patterns in the package +head -20 internal/sse/decode.go +``` + +Repository: compozy/agh + +Length of output: 1649 + +--- + + + +
+ +**Add nil parameter validation to prevent panics.** + +The `Decode` function calls `ctx.Err()` and `handler(event)` without validating inputs. A nil `ctx` interface or nil `handler` function will panic, violating the guideline against panics in production paths. Add guards at entry. + +
+Proposed fix + +```diff + func Decode(ctx context.Context, body io.Reader, handler Handler) error { ++ if ctx == nil { ++ return fmt.Errorf("sse: context is required") ++ } ++ if handler == nil { ++ return fmt.Errorf("sse: handler is required") ++ } ++ + scanner := bufio.NewScanner(body) + scanner.Buffer(make([]byte, 0, 64*1024), maxLineBytes) +``` +
+ + + +
+📝 Committable suggestion + +> ‼️ **IMPORTANT** +> Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements. + +```suggestion +func Decode(ctx context.Context, body io.Reader, handler Handler) error { + if ctx == nil { + return fmt.Errorf("sse: context is required") + } + if handler == nil { + return fmt.Errorf("sse: handler is required") + } + + scanner := bufio.NewScanner(body) + scanner.Buffer(make([]byte, 0, 64*1024), maxLineBytes) + + event := Event{} + dataLines := make([]string, 0, 4) + emit := func() error { + if event.ID == "" && event.Event == "" && len(dataLines) == 0 { + return nil + } + if len(dataLines) > 0 { + event.Data = json.RawMessage(strings.Join(dataLines, "\n")) + } + err := handler(event) + event = Event{} + dataLines = dataLines[:0] + return err + } + + for scanner.Scan() { + if err := ctx.Err(); err != nil { + return err +``` + +
+ + + +
+🤖 Prompt for AI Agents + +``` +Verify each finding against the current code and only fix it if needed. + +In `@internal/sse/decode.go` around lines 31 - 52, The Decode function must guard +against nil inputs to avoid panics: at the start of Decode check that ctx != nil +and handler != nil (and optionally body != nil) and return a clear error (e.g., +errors.New or fmt.Errorf) if any are nil; this ensures subsequent calls to +ctx.Err() and handler(event) (and the emit closure) are safe. Update the Decode +function signature's entry checks to validate ctx and handler, add appropriate +error returns, and adjust imports if you use errors/fmt. Ensure the emit closure +and the loop assume handler is non-nil after these guards. +``` + +
+ + + + + +## Triage + +- Decision: `VALID` +- Reasoning: `Decode` dereferences `ctx` and calls `handler(event)` without entry validation. A nil context or nil handler function will panic in production code. +- Fix approach: Guard required inputs up front and add SSE decoder tests that cover nil arguments. diff --git a/bun.lock b/bun.lock index 5d5567759..c57a1fa80 100644 --- a/bun.lock +++ b/bun.lock @@ -39,23 +39,17 @@ "clsx": "^2.1.1", "cmdk": "^1.1.1", "date-fns": "^4.1.0", - "embla-carousel-react": "^8.6.0", - "input-otp": "^1.4.2", "lucide-react": "^1.7.0", "next-themes": "^0.4.6", "react": "^19.2.0", - "react-day-picker": "^9.14.0", "react-dom": "^19.2.0", "react-markdown": "^10.1.0", - "react-resizable-panels": "^4.9.0", "react-syntax-highlighter": "^16.1.1", - "recharts": "3.8.0", "remark-gfm": "^4.0.1", "sonner": "^2.0.7", "tailwind-merge": "^3.5.0", "tailwindcss": "^4.2.1", "tw-animate-css": "^1.4.0", - "vaul": "^1.1.2", "zod": "^4.3.0", "zustand": "^5.0.11", }, @@ -213,8 +207,6 @@ "@csstools/css-tokenizer": ["@csstools/css-tokenizer@4.0.0", "", {}, "sha512-QxULHAm7cNu72w97JUNCBFODFaXpbDg+dP8b/oWFAZ2MTRppA3U00Y2L1HqaS4J6yBqxwa/Y3nMBaxVKbB/NsA=="], - "@date-fns/tz": ["@date-fns/tz@1.4.1", "", {}, "sha512-P5LUNhtbj6YfI3iJjw5EL9eUAG6OitD0W3fWQcpQjDRc/QIsL0tRNuO1PcDvPccWL1fSTXXdE1ds+l95DV/OFA=="], - "@dotenvx/dotenvx": ["@dotenvx/dotenvx@1.59.1", "", { "dependencies": { "commander": "^11.1.0", "dotenv": "^17.2.1", "eciesjs": "^0.4.10", "execa": "^5.1.1", "fdir": "^6.2.0", "ignore": "^5.3.0", "object-treeify": "1.1.33", "picomatch": "^4.0.2", "which": "^4.0.0" }, "bin": { "dotenvx": "src/cli/dotenvx.js" } }, "sha512-Qg+meC+XFxliuVSDlEPkKnaUjdaJKK6FNx/Wwl2UxhQR8pyPIuLhMavsF7ePdB9qFZUWV1jEK3ckbJir/WmF4w=="], "@ecies/ciphers": ["@ecies/ciphers@0.2.6", "", { "peerDependencies": { "@noble/ciphers": "^1.0.0" } }, "sha512-patgsRPKGkhhoBjETV4XxD0En4ui5fbX0hzayqI3M8tvNMGUoUvmyYAIWwlxBc1KX5cturfqByYdj5bYGRpN9g=="], @@ -455,8 +447,6 @@ "@radix-ui/react-use-layout-effect": ["@radix-ui/react-use-layout-effect@1.1.1", "", { "peerDependencies": { "@types/react": "*", "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" }, "optionalPeers": ["@types/react"] }, "sha512-RbJRS4UWQFkzHTTwVymMTUv8EqYhOp8dOOviLj2ugtTiXRaRQS7GLGxZTLL1jWhMeoSCf5zmcZkqTl9IiYfXcQ=="], - "@reduxjs/toolkit": ["@reduxjs/toolkit@2.11.2", "", { "dependencies": { "@standard-schema/spec": "^1.0.0", "@standard-schema/utils": "^0.3.0", "immer": "^11.0.0", "redux": "^5.0.1", "redux-thunk": "^3.1.0", "reselect": "^5.1.0" }, "peerDependencies": { "react": "^16.9.0 || ^17.0.0 || ^18 || ^19", "react-redux": "^7.2.1 || ^8.1.3 || ^9.0.0" }, "optionalPeers": ["react", "react-redux"] }, "sha512-Kd6kAHTA6/nUpp8mySPqj3en3dm0tdMIgbttnQ1xFMVpufoj+ADi8pXLBsd4xzTRHQa7t/Jv8W5UnCuW4kuWMQ=="], - "@rolldown/binding-android-arm64": ["@rolldown/binding-android-arm64@1.0.0-rc.12", "", { "os": "android", "cpu": "arm64" }, "sha512-pv1y2Fv0JybcykuiiD3qBOBdz6RteYojRFY1d+b95WVuzx211CRh+ytI/+9iVyWQ6koTh5dawe4S/yRfOFjgaA=="], "@rolldown/binding-darwin-arm64": ["@rolldown/binding-darwin-arm64@1.0.0-rc.12", "", { "os": "darwin", "cpu": "arm64" }, "sha512-cFYr6zTG/3PXXF3pUO+umXxt1wkRK/0AYT8lDwuqvRC+LuKYWSAQAQZjCWDQpAH172ZV6ieYrNnFzVVcnSflAg=="], @@ -501,8 +491,6 @@ "@standard-schema/spec": ["@standard-schema/spec@1.1.0", "", {}, "sha512-l2aFy5jALhniG5HgqrD6jXLi/rUWrKvqN/qJx6yoJsgKhblVd+iqqU4RCXavm/jPityDo5TCvKMnpjKnOriy0w=="], - "@standard-schema/utils": ["@standard-schema/utils@0.3.0", "", {}, "sha512-e7Mew686owMaPJVNNLs55PUvgz371nKgwsc4vxE49zsODpJEnxgxRo2y/OKrqueavXgZNMDVj3DdHFlaSAeU8g=="], - "@storybook/addon-a11y": ["@storybook/addon-a11y@10.3.4", "", { "dependencies": { "@storybook/global": "^5.0.0", "axe-core": "^4.2.0" }, "peerDependencies": { "storybook": "^10.3.4" } }, "sha512-TylBS2+MUPRfgzBKiygL1JoUBnTqEKo5oCEfjHneJZKzYE1UNgdMdk/fiyanaGKTZBKBxWbShxZhT2gLs8kqMA=="], "@storybook/addon-docs": ["@storybook/addon-docs@10.3.4", "", { "dependencies": { "@mdx-js/react": "^3.0.0", "@storybook/csf-plugin": "10.3.4", "@storybook/icons": "^2.0.1", "@storybook/react-dom-shim": "10.3.4", "react": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0", "react-dom": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0", "ts-dedent": "^2.0.0" }, "peerDependencies": { "storybook": "^10.3.4" } }, "sha512-ohS8fX8UIP3LN6+mDZJLCDS4Qd2rsmGwes6V6fD0sbLOmIyCVY5y68r6NHMMGJKFRwadDQOmtOt8Vc6snExrIQ=="], @@ -523,8 +511,6 @@ "@storybook/react-vite": ["@storybook/react-vite@10.3.4", "", { "dependencies": { "@joshwooding/vite-plugin-react-docgen-typescript": "^0.7.0", "@rollup/pluginutils": "^5.0.2", "@storybook/builder-vite": "10.3.4", "@storybook/react": "10.3.4", "empathic": "^2.0.0", "magic-string": "^0.30.0", "react-docgen": "^8.0.0", "resolve": "^1.22.8", "tsconfig-paths": "^4.2.0" }, "peerDependencies": { "react": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0", "react-dom": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0", "storybook": "^10.3.4", "vite": "^5.0.0 || ^6.0.0 || ^7.0.0 || ^8.0.0" } }, "sha512-xaMt7NdvlAb+CwXn5TOiluQ+0WkkMN3mZhCThocpblWGoyfmHH7bgQ5ZwzT+IIp8DGOsAi/HkNmSyS7Z8HRLJg=="], - "@tabby_ai/hijri-converter": ["@tabby_ai/hijri-converter@1.0.5", "", {}, "sha512-r5bClKrcIusDoo049dSL8CawnHR6mRdDwhlQuIgZRNty68q0x8k3Lf1BtPAMxRf/GgnHBnIO4ujd3+GQdLWzxQ=="], - "@tailwindcss/node": ["@tailwindcss/node@4.2.2", "", { "dependencies": { "@jridgewell/remapping": "^2.3.5", "enhanced-resolve": "^5.19.0", "jiti": "^2.6.1", "lightningcss": "1.32.0", "magic-string": "^0.30.21", "source-map-js": "^1.2.1", "tailwindcss": "4.2.2" } }, "sha512-pXS+wJ2gZpVXqFaUEjojq7jzMpTGf8rU6ipJz5ovJV6PUGmlJ+jvIwGrzdHdQ80Sg+wmQxUFuoW1UAAwHNEdFA=="], "@tailwindcss/oxide": ["@tailwindcss/oxide@4.2.2", "", { "optionalDependencies": { "@tailwindcss/oxide-android-arm64": "4.2.2", "@tailwindcss/oxide-darwin-arm64": "4.2.2", "@tailwindcss/oxide-darwin-x64": "4.2.2", "@tailwindcss/oxide-freebsd-x64": "4.2.2", "@tailwindcss/oxide-linux-arm-gnueabihf": "4.2.2", "@tailwindcss/oxide-linux-arm64-gnu": "4.2.2", "@tailwindcss/oxide-linux-arm64-musl": "4.2.2", "@tailwindcss/oxide-linux-x64-gnu": "4.2.2", "@tailwindcss/oxide-linux-x64-musl": "4.2.2", "@tailwindcss/oxide-wasm32-wasi": "4.2.2", "@tailwindcss/oxide-win32-arm64-msvc": "4.2.2", "@tailwindcss/oxide-win32-x64-msvc": "4.2.2" } }, "sha512-qEUA07+E5kehxYp9BVMpq9E8vnJuBHfJEC0vPC5e7iL/hw7HR61aDKoVoKzrG+QKp56vhNZe4qwkRmMC0zDLvg=="], @@ -633,24 +619,6 @@ "@types/chai": ["@types/chai@5.2.3", "", { "dependencies": { "@types/deep-eql": "*", "assertion-error": "^2.0.1" } }, "sha512-Mw558oeA9fFbv65/y4mHtXDs9bPnFMZAL/jxdPFUpOHHIXX91mcgEHbS5Lahr+pwZFR8A7GQleRWeI6cGFC2UA=="], - "@types/d3-array": ["@types/d3-array@3.2.2", "", {}, "sha512-hOLWVbm7uRza0BYXpIIW5pxfrKe0W+D5lrFiAEYR+pb6w3N2SwSMaJbXdUfSEv+dT4MfHBLtn5js0LAWaO6otw=="], - - "@types/d3-color": ["@types/d3-color@3.1.3", "", {}, "sha512-iO90scth9WAbmgv7ogoq57O9YpKmFBbmoEoCHDB2xMBY0+/KVrqAaCDyCE16dUspeOvIxFFRI+0sEtqDqy2b4A=="], - - "@types/d3-ease": ["@types/d3-ease@3.0.2", "", {}, "sha512-NcV1JjO5oDzoK26oMzbILE6HW7uVXOHLQvHshBUW4UMdZGfiY6v5BeQwh9a9tCzv+CeefZQHJt5SRgK154RtiA=="], - - "@types/d3-interpolate": ["@types/d3-interpolate@3.0.4", "", { "dependencies": { "@types/d3-color": "*" } }, "sha512-mgLPETlrpVV1YRJIglr4Ez47g7Yxjl1lj7YKsiMCb27VJH9W8NVM6Bb9d8kkpG/uAQS5AmbA48q2IAolKKo1MA=="], - - "@types/d3-path": ["@types/d3-path@3.1.1", "", {}, "sha512-VMZBYyQvbGmWyWVea0EHs/BwLgxc+MKi1zLDCONksozI4YJMcTt8ZEuIR4Sb1MMTE8MMW49v0IwI5+b7RmfWlg=="], - - "@types/d3-scale": ["@types/d3-scale@4.0.9", "", { "dependencies": { "@types/d3-time": "*" } }, "sha512-dLmtwB8zkAeO/juAMfnV+sItKjlsw2lKdZVVy6LRr0cBmegxSABiLEpGVmSJJ8O08i4+sGR6qQtb6WtuwJdvVw=="], - - "@types/d3-shape": ["@types/d3-shape@3.1.8", "", { "dependencies": { "@types/d3-path": "*" } }, "sha512-lae0iWfcDeR7qt7rA88BNiqdvPS5pFVPpo5OfjElwNaT2yyekbM0C9vK+yqBqEmHr6lDkRnYNoTBYlAgJa7a4w=="], - - "@types/d3-time": ["@types/d3-time@3.0.4", "", {}, "sha512-yuzZug1nkAAaBlBBikKZTgzCeA+k1uy4ZFwWANOfKw5z5LRhV0gNA7gNkKm7HoK+HRN0wX3EkxGk0fpbWhmB7g=="], - - "@types/d3-timer": ["@types/d3-timer@3.0.2", "", {}, "sha512-Ps3T8E8dZDam6fUyNiMkekK3XUsaUEik+idO9/YjPtfj2qruF8tFBXS7XhtE4iIXBLxhmLjP3SXpLhVf21I9Lw=="], - "@types/debug": ["@types/debug@4.1.13", "", { "dependencies": { "@types/ms": "*" } }, "sha512-KSVgmQmzMwPlmtljOomayoR89W4FynCAi3E8PPs7vmDVPe84hT+vGPKkJfThkmXs0x0jAaa9U8uW8bbfyS2fWw=="], "@types/deep-eql": ["@types/deep-eql@4.0.2", "", {}, "sha512-c9h9dVVMigMPc4bwTvC5dxqtqJZwQPePsWjPlpSOnojbor6pGqdk541lfA7AqFQr5pB1BRdq0juY9db81BwyFw=="], @@ -685,8 +653,6 @@ "@types/unist": ["@types/unist@3.0.3", "", {}, "sha512-ko/gIFJRv177XgZsZcBwnqJN5x/Gien8qNOn0D5bQU/zAzVf9Zt3BlcUiLqhV9y4ARk0GbT3tnUiPNgnTXzc/Q=="], - "@types/use-sync-external-store": ["@types/use-sync-external-store@0.0.6", "", {}, "sha512-zFDAD+tlpf2r4asuHEj0XH6pY6i0g5NeAHPn+15wk3BV6JA69eERFXC1gyGThDkVa1zCyKr5jox1+2LbV/AMLg=="], - "@types/validate-npm-package-name": ["@types/validate-npm-package-name@4.0.2", "", {}, "sha512-lrpDziQipxCEeK5kWxvljWYhUvOiB2A9izZd9B2AFarYAkqZshb4lPbRs7zKEic6eGtH8V/2qJW+dPp9OtF6bw=="], "@typescript/native-preview": ["@typescript/native-preview@7.0.0-dev.20260403.1", "", { "optionalDependencies": { "@typescript/native-preview-darwin-arm64": "7.0.0-dev.20260403.1", "@typescript/native-preview-darwin-x64": "7.0.0-dev.20260403.1", "@typescript/native-preview-linux-arm": "7.0.0-dev.20260403.1", "@typescript/native-preview-linux-arm64": "7.0.0-dev.20260403.1", "@typescript/native-preview-linux-x64": "7.0.0-dev.20260403.1", "@typescript/native-preview-win32-arm64": "7.0.0-dev.20260403.1", "@typescript/native-preview-win32-x64": "7.0.0-dev.20260403.1" }, "bin": { "tsgo": "bin/tsgo.js" } }, "sha512-iy+90WygcMl3IPnvI3RLMfbieOCT5OW/RMPnnk+H1kY8AcqQGXqGZfrasUKH+/zfmtM+W+gsDs3nSNk2ScKy4g=="], @@ -883,42 +849,16 @@ "csstype": ["csstype@3.2.3", "", {}, "sha512-z1HGKcYy2xA8AGQfwrn0PAy+PB7X/GSj3UVJW9qKyn43xWa+gl5nXmU4qqLMRzWVLFC8KusUX8T/0kCiOYpAIQ=="], - "d3-array": ["d3-array@3.2.4", "", { "dependencies": { "internmap": "1 - 2" } }, "sha512-tdQAmyA18i4J7wprpYq8ClcxZy3SC31QMeByyCFyRt7BVHdREQZ5lpzoe5mFEYZUWe+oq8HBvk9JjpibyEV4Jg=="], - - "d3-color": ["d3-color@3.1.0", "", {}, "sha512-zg/chbXyeBtMQ1LbD/WSoW2DpC3I0mpmPdW+ynRTj/x2DAWYrIY7qeZIHidozwV24m4iavr15lNwIwLxRmOxhA=="], - - "d3-ease": ["d3-ease@3.0.1", "", {}, "sha512-wR/XK3D3XcLIZwpbvQwQ5fK+8Ykds1ip7A2Txe0yxncXSdq1L9skcG7blcedkOX+ZcgxGAmLX1FrRGbADwzi0w=="], - - "d3-format": ["d3-format@3.1.2", "", {}, "sha512-AJDdYOdnyRDV5b6ArilzCPPwc1ejkHcoyFarqlPqT7zRYjhavcT3uSrqcMvsgh2CgoPbK3RCwyHaVyxYcP2Arg=="], - - "d3-interpolate": ["d3-interpolate@3.0.1", "", { "dependencies": { "d3-color": "1 - 3" } }, "sha512-3bYs1rOD33uo8aqJfKP3JWPAibgw8Zm2+L9vBKEHJ2Rg+viTR7o5Mmv5mZcieN+FRYaAOWX5SJATX6k1PWz72g=="], - - "d3-path": ["d3-path@3.1.0", "", {}, "sha512-p3KP5HCf/bvjBSSKuXid6Zqijx7wIfNW+J/maPs+iwR35at5JCbLUT0LzF1cnjbCHWhqzQTIN2Jpe8pRebIEFQ=="], - - "d3-scale": ["d3-scale@4.0.2", "", { "dependencies": { "d3-array": "2.10.0 - 3", "d3-format": "1 - 3", "d3-interpolate": "1.2.0 - 3", "d3-time": "2.1.1 - 3", "d3-time-format": "2 - 4" } }, "sha512-GZW464g1SH7ag3Y7hXjf8RoUuAFIqklOAq3MRl4OaWabTFJY9PN/E1YklhXLh+OQ3fM9yS2nOkCoS+WLZ6kvxQ=="], - - "d3-shape": ["d3-shape@3.2.0", "", { "dependencies": { "d3-path": "^3.1.0" } }, "sha512-SaLBuwGm3MOViRq2ABk3eLoxwZELpH6zhl3FbAoJ7Vm1gofKx6El1Ib5z23NUEhF9AsGl7y+dzLe5Cw2AArGTA=="], - - "d3-time": ["d3-time@3.1.0", "", { "dependencies": { "d3-array": "2 - 3" } }, "sha512-VqKjzBLejbSMT4IgbmVgDjpkYrNWUYJnbCGo874u7MMKIWsILRX+OpX/gTk8MqjpT1A/c6HY2dCA77ZN0lkQ2Q=="], - - "d3-time-format": ["d3-time-format@4.1.0", "", { "dependencies": { "d3-time": "1 - 3" } }, "sha512-dJxPBlzC7NugB2PDLwo9Q8JiTR3M3e4/XANkreKSUxF8vvXKqm1Yfq4Q5dl8budlunRVlUUaDUgFt7eA8D6NLg=="], - - "d3-timer": ["d3-timer@3.0.1", "", {}, "sha512-ndfJ/JxxMd3nw31uyKoY2naivF+r29V+Lc0svZxe1JvvIRmi8hUsrMvdOwgS1o6uBHmiz91geQ0ylPP0aj1VUA=="], - "data-uri-to-buffer": ["data-uri-to-buffer@4.0.1", "", {}, "sha512-0R9ikRb668HB7QDxT1vkpuUBtqc53YyAwMwGeUFKRojY/NWKvdZ+9UYtRfGmhqNbRkTSVpMbmyhXipFFv2cb/A=="], "data-urls": ["data-urls@7.0.0", "", { "dependencies": { "whatwg-mimetype": "^5.0.0", "whatwg-url": "^16.0.0" } }, "sha512-23XHcCF+coGYevirZceTVD7NdJOqVn+49IHyxgszm+JIiHLoB2TkmPtsYkNWT1pvRSGkc35L6NHs0yHkN2SumA=="], "date-fns": ["date-fns@4.1.0", "", {}, "sha512-Ukq0owbQXxa/U3EGtsdVBkR1w7KOQ5gIBqdH2hkvknzZPYvBxb/aa6E8L7tmjFtkwZBu3UXBbjIgPo/Ez4xaNg=="], - "date-fns-jalali": ["date-fns-jalali@4.1.0-0", "", {}, "sha512-hTIP/z+t+qKwBDcmmsnmjWTduxCg+5KfdqWQvb2X/8C9+knYY6epN/pfxdDuyVlSVeFz0sM5eEfwIUQ70U4ckg=="], - "debug": ["debug@4.4.3", "", { "dependencies": { "ms": "^2.1.3" } }, "sha512-RGwwWnwQvkVfavKVt22FGLw+xYSdzARwm0ru6DhTVA3umU5hZc28V3kO4stgYryrTlLpuvgI9GiijltAjNbcqA=="], "decimal.js": ["decimal.js@10.6.0", "", {}, "sha512-YpgQiITW3JXGntzdUmyUR1V812Hn8T1YVXhCu+wO3OpS4eU9l4YdD3qjyiKdV6mvV29zapkMeD390UVEf2lkUg=="], - "decimal.js-light": ["decimal.js-light@2.5.1", "", {}, "sha512-qIMFpTMZmny+MMIitAB6D7iVPEorVw6YQRWkvarTkT4tBeSLLiHzcwj6q0MmYSFCiVpiqPJTJEYIrpcPzVEIvg=="], - "decode-named-character-reference": ["decode-named-character-reference@1.3.0", "", { "dependencies": { "character-entities": "^2.0.0" } }, "sha512-GtpQYB283KrPp6nRw50q3U9/VfOutZOe103qlN7BPP6Ad27xYnOIWv4lPzo8HCAL+mMZofJ9KEy30fq6MfaK6Q=="], "dedent": ["dedent@1.7.2", "", { "peerDependencies": { "babel-plugin-macros": "^3.1.0" }, "optionalPeers": ["babel-plugin-macros"] }, "sha512-WzMx3mW98SN+zn3hgemf4OzdmyNhhhKz5Ay0pUfQiMQ3e1g+xmTJWp/pKdwKVXhdSkAEGIIzqeuWrL3mV/AXbA=="], @@ -961,12 +901,6 @@ "electron-to-chromium": ["electron-to-chromium@1.5.331", "", {}, "sha512-IbxXrsTlD3hRodkLnbxAPP4OuJYdWCeM3IOdT+CpcMoIwIoDfCmRpEtSPfwBXxVkg9xmBeY7Lz2Eo2TDn/HC3Q=="], - "embla-carousel": ["embla-carousel@8.6.0", "", {}, "sha512-SjWyZBHJPbqxHOzckOfo8lHisEaJWmwd23XppYFYVh10bU66/Pn5tkVkbkCMZVdbUE5eTCI2nD8OyIP4Z+uwkA=="], - - "embla-carousel-react": ["embla-carousel-react@8.6.0", "", { "dependencies": { "embla-carousel": "8.6.0", "embla-carousel-reactive-utils": "8.6.0" }, "peerDependencies": { "react": "^16.8.0 || ^17.0.1 || ^18.0.0 || ^19.0.0 || ^19.0.0-rc" } }, "sha512-0/PjqU7geVmo6F734pmPqpyHqiM99olvyecY7zdweCw+6tKEXnrE90pBiBbMMU8s5tICemzpQ3hi5EpxzGW+JA=="], - - "embla-carousel-reactive-utils": ["embla-carousel-reactive-utils@8.6.0", "", { "peerDependencies": { "embla-carousel": "8.6.0" } }, "sha512-fMVUDUEx0/uIEDM0Mz3dHznDhfX+znCCDCeIophYb1QGVM7YThSWX+wz11zlYwWFOr74b4QLGg0hrGPJeG2s4A=="], - "emoji-regex": ["emoji-regex@8.0.0", "", {}, "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A=="], "empathic": ["empathic@2.0.0", "", {}, "sha512-i6UzDscO/XfAcNYD75CfICkmfLedpyPDdozrLMmQc5ORaQcdMoc21OnlEylMIqI7U8eniKrPMxxtj8k0vhmJhA=="], @@ -991,8 +925,6 @@ "es-object-atoms": ["es-object-atoms@1.1.1", "", { "dependencies": { "es-errors": "^1.3.0" } }, "sha512-FGgH2h8zKNim9ljj7dankFPcICIK9Cp5bm+c2gQSYePhpaG5+esrLODihIorn+Pe6FGJzWhXQotPv73jTaldXA=="], - "es-toolkit": ["es-toolkit@1.45.1", "", {}, "sha512-/jhoOj/Fx+A+IIyDNOvO3TItGmlMKhtX8ISAHKE90c4b/k1tqaqEZ+uUqfpU8DMnW5cgNJv606zS55jGvza0Xw=="], - "esbuild": ["esbuild@0.27.7", "", { "optionalDependencies": { "@esbuild/aix-ppc64": "0.27.7", "@esbuild/android-arm": "0.27.7", "@esbuild/android-arm64": "0.27.7", "@esbuild/android-x64": "0.27.7", "@esbuild/darwin-arm64": "0.27.7", "@esbuild/darwin-x64": "0.27.7", "@esbuild/freebsd-arm64": "0.27.7", "@esbuild/freebsd-x64": "0.27.7", "@esbuild/linux-arm": "0.27.7", "@esbuild/linux-arm64": "0.27.7", "@esbuild/linux-ia32": "0.27.7", "@esbuild/linux-loong64": "0.27.7", "@esbuild/linux-mips64el": "0.27.7", "@esbuild/linux-ppc64": "0.27.7", "@esbuild/linux-riscv64": "0.27.7", "@esbuild/linux-s390x": "0.27.7", "@esbuild/linux-x64": "0.27.7", "@esbuild/netbsd-arm64": "0.27.7", "@esbuild/netbsd-x64": "0.27.7", "@esbuild/openbsd-arm64": "0.27.7", "@esbuild/openbsd-x64": "0.27.7", "@esbuild/openharmony-arm64": "0.27.7", "@esbuild/sunos-x64": "0.27.7", "@esbuild/win32-arm64": "0.27.7", "@esbuild/win32-ia32": "0.27.7", "@esbuild/win32-x64": "0.27.7" }, "bin": { "esbuild": "bin/esbuild" } }, "sha512-IxpibTjyVnmrIQo5aqNpCgoACA/dTKLTlhMHihVHhdkxKyPO1uBBthumT0rdHmcsk9uMonIWS0m4FljWzILh3w=="], "escalade": ["escalade@3.2.0", "", {}, "sha512-WUj2qlxaQtO4g6Pq5c29GTcWGDyd8itL8zTlipgECz3JesAiiOKotd8JU6otB3PACgG6xkJUyVhboMS+bje/jA=="], @@ -1151,10 +1083,6 @@ "inline-style-parser": ["inline-style-parser@0.2.7", "", {}, "sha512-Nb2ctOyNR8DqQoR0OwRG95uNWIC0C1lCgf5Naz5H6Ji72KZ8OcFZLz2P5sNgwlyoJ8Yif11oMuYs5pBQa86csA=="], - "input-otp": ["input-otp@1.4.2", "", { "peerDependencies": { "react": "^16.8 || ^17.0 || ^18.0 || ^19.0.0 || ^19.0.0-rc", "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0.0 || ^19.0.0-rc" } }, "sha512-l3jWwYNvrEa6NTCt7BECfCm48GvwuZzkoeG3gBL2w4CHeOXW3eKFmf9UNYkNfYc3mxMrthMnxjIE07MT0zLBQA=="], - - "internmap": ["internmap@2.0.3", "", {}, "sha512-5Hh7Y1wQbvY5ooGgPbDaL5iYLAPzMTUrjMulskHLH6wnv/A+1q5rgEaiuqEjB+oxGXIVZs1FF+R/KPN3ZSQYYg=="], - "ip-address": ["ip-address@10.1.0", "", {}, "sha512-XXADHxXmvT9+CRxhXg56LJovE+bmWnEWB78LB83VZTprKTmaC5QfruXocxzTZ2Kl0DNwKuBdlIhjL8LeY8Sf8Q=="], "ipaddr.js": ["ipaddr.js@1.9.1", "", {}, "sha512-0KI/607xoxSToH7GjN1FfSbLoU0+btTicjsQSWQlh/hZykN8KpmMf7uYwPW3R+akZ6R/w18ZlXSHBYXiYUPO3g=="], @@ -1543,8 +1471,6 @@ "react": ["react@19.2.4", "", {}, "sha512-9nfp2hYpCwOjAN+8TZFGhtWEwgvWHXqESH8qT89AT/lWklpLON22Lc8pEtnpsZz7VmawabSU0gCjnj8aC0euHQ=="], - "react-day-picker": ["react-day-picker@9.14.0", "", { "dependencies": { "@date-fns/tz": "^1.4.1", "@tabby_ai/hijri-converter": "1.0.5", "date-fns": "^4.1.0", "date-fns-jalali": "4.1.0-0" }, "peerDependencies": { "react": ">=16.8.0" } }, "sha512-tBaoDWjPwe0M5pGrum4H0SR6Lyk+BO9oHnp9JbKpGKW2mlraNPgP9BMfsg5pWpwrssARmeqk7YBl2oXutZTaHA=="], - "react-docgen": ["react-docgen@8.0.3", "", { "dependencies": { "@babel/core": "^7.28.0", "@babel/traverse": "^7.28.0", "@babel/types": "^7.28.2", "@types/babel__core": "^7.20.5", "@types/babel__traverse": "^7.20.7", "@types/doctrine": "^0.0.9", "@types/resolve": "^1.20.2", "doctrine": "^3.0.0", "resolve": "^1.22.1", "strip-indent": "^4.0.0" } }, "sha512-aEZ9qP+/M+58x2qgfSFEWH1BxLyHe5+qkLNJOZQb5iGS017jpbRnoKhNRrXPeA6RfBrZO5wZrT9DMC1UqE1f1w=="], "react-docgen-typescript": ["react-docgen-typescript@2.4.0", "", { "peerDependencies": { "typescript": ">= 4.3.x" } }, "sha512-ZtAp5XTO5HRzQctjPU0ybY0RRCQO19X/8fxn3w7y2VVTUbGHDKULPTL4ky3vB05euSgG5NpALhEhDPvQ56wvXg=="], @@ -1555,14 +1481,10 @@ "react-markdown": ["react-markdown@10.1.0", "", { "dependencies": { "@types/hast": "^3.0.0", "@types/mdast": "^4.0.0", "devlop": "^1.0.0", "hast-util-to-jsx-runtime": "^2.0.0", "html-url-attributes": "^3.0.0", "mdast-util-to-hast": "^13.0.0", "remark-parse": "^11.0.0", "remark-rehype": "^11.0.0", "unified": "^11.0.0", "unist-util-visit": "^5.0.0", "vfile": "^6.0.0" }, "peerDependencies": { "@types/react": ">=18", "react": ">=18" } }, "sha512-qKxVopLT/TyA6BX3Ue5NwabOsAzm0Q7kAPwq6L+wWDwisYs7R8vZ0nRXqq6rkueboxpkjvLGU9fWifiX/ZZFxQ=="], - "react-redux": ["react-redux@9.2.0", "", { "dependencies": { "@types/use-sync-external-store": "^0.0.6", "use-sync-external-store": "^1.4.0" }, "peerDependencies": { "@types/react": "^18.2.25 || ^19", "react": "^18.0 || ^19", "redux": "^5.0.0" }, "optionalPeers": ["@types/react", "redux"] }, "sha512-ROY9fvHhwOD9ySfrF0wmvu//bKCQ6AeZZq1nJNtbDC+kk5DuSuNX/n6YWYF/SYy7bSba4D4FSz8DJeKY/S/r+g=="], - "react-remove-scroll": ["react-remove-scroll@2.7.2", "", { "dependencies": { "react-remove-scroll-bar": "^2.3.7", "react-style-singleton": "^2.2.3", "tslib": "^2.1.0", "use-callback-ref": "^1.3.3", "use-sidecar": "^1.1.3" }, "peerDependencies": { "@types/react": "*", "react": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0 || ^19.0.0-rc" }, "optionalPeers": ["@types/react"] }, "sha512-Iqb9NjCCTt6Hf+vOdNIZGdTiH1QSqr27H/Ek9sv/a97gfueI/5h1s3yRi1nngzMUaOOToin5dI1dXKdXiF+u0Q=="], "react-remove-scroll-bar": ["react-remove-scroll-bar@2.3.8", "", { "dependencies": { "react-style-singleton": "^2.2.2", "tslib": "^2.0.0" }, "peerDependencies": { "@types/react": "*", "react": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0" }, "optionalPeers": ["@types/react"] }, "sha512-9r+yi9+mgU33AKcj6IbT9oRCO78WriSj6t/cF8DWBZJ9aOGPOTEDvdUDz1FwKim7QXWwmHqtdHnRJfhAxEG46Q=="], - "react-resizable-panels": ["react-resizable-panels@4.9.0", "", { "peerDependencies": { "react": "^18.0.0 || ^19.0.0", "react-dom": "^18.0.0 || ^19.0.0" } }, "sha512-sEl+hA6y9/kxa0aPlrUC+G1lcShAf/PiIjoeC8kWXxa53RfAVplVCIxEl01Nwa4L2iRa5JXBXq1/mI8ch6qOZQ=="], - "react-style-singleton": ["react-style-singleton@2.2.3", "", { "dependencies": { "get-nonce": "^1.0.0", "tslib": "^2.0.0" }, "peerDependencies": { "@types/react": "*", "react": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0 || ^19.0.0-rc" }, "optionalPeers": ["@types/react"] }, "sha512-b6jSvxvVnyptAiLjbkWLE/lOnR4lfTtDAl+eUC7RZy+QQWc6wRzIV2CE6xBuMmDxc2qIihtDCZD5NPOFl7fRBQ=="], "react-syntax-highlighter": ["react-syntax-highlighter@16.1.1", "", { "dependencies": { "@babel/runtime": "^7.28.4", "highlight.js": "^10.4.1", "highlightjs-vue": "^1.0.0", "lowlight": "^1.17.0", "prismjs": "^1.30.0", "refractor": "^5.0.0" }, "peerDependencies": { "react": ">= 0.14.0" } }, "sha512-PjVawBGy80C6YbC5DDZJeUjBmC7skaoEUdvfFQediQHgCL7aKyVHe57SaJGfQsloGDac+gCpTfRdtxzWWKmCXA=="], @@ -1571,14 +1493,8 @@ "recast": ["recast@0.23.11", "", { "dependencies": { "ast-types": "^0.16.1", "esprima": "~4.0.0", "source-map": "~0.6.1", "tiny-invariant": "^1.3.3", "tslib": "^2.0.1" } }, "sha512-YTUo+Flmw4ZXiWfQKGcwwc11KnoRAYgzAE2E7mXKCjSviTKShtxBsN6YUUBB2gtaBzKzeKunxhUwNHQuRryhWA=="], - "recharts": ["recharts@3.8.0", "", { "dependencies": { "@reduxjs/toolkit": "^1.9.0 || 2.x.x", "clsx": "^2.1.1", "decimal.js-light": "^2.5.1", "es-toolkit": "^1.39.3", "eventemitter3": "^5.0.1", "immer": "^10.1.1", "react-redux": "8.x.x || 9.x.x", "reselect": "5.1.1", "tiny-invariant": "^1.3.3", "use-sync-external-store": "^1.2.2", "victory-vendor": "^37.0.2" }, "peerDependencies": { "react": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0", "react-dom": "^16.0.0 || ^17.0.0 || ^18.0.0 || ^19.0.0", "react-is": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0" } }, "sha512-Z/m38DX3L73ExO4Tpc9/iZWHmHnlzWG4njQbxsF5aSjwqmHNDDIm0rdEBArkwsBvR8U6EirlEHiQNYWCVh9sGQ=="], - "redent": ["redent@3.0.0", "", { "dependencies": { "indent-string": "^4.0.0", "strip-indent": "^3.0.0" } }, "sha512-6tDA8g98We0zd0GvVeMT9arEOnTw9qM03L9cJXaCjrip1OO764RDBLBfrB4cwzNGDj5OA5ioymC9GkizgWJDUg=="], - "redux": ["redux@5.0.1", "", {}, "sha512-M9/ELqF6fy8FwmkpnF0S3YKOqMyoWJ4+CS5Efg2ct3oY9daQvd/Pc71FpGZsVsbl3Cpb+IIcjBDUnnyBdQbq4w=="], - - "redux-thunk": ["redux-thunk@3.1.0", "", { "peerDependencies": { "redux": "^5.0.0" } }, "sha512-NW2r5T6ksUKXCabzhL9z+h206HQw/NJkcLm1GPImRQ8IzfXwRGqjVhKJGauHirT0DAuyy6hjdnMZaRoAcy0Klw=="], - "refractor": ["refractor@5.0.0", "", { "dependencies": { "@types/hast": "^3.0.0", "@types/prismjs": "^1.0.0", "hastscript": "^9.0.0", "parse-entities": "^4.0.0" } }, "sha512-QXOrHQF5jOpjjLfiNk5GFnWhRXvxjUVnlFxkeDmewR5sXkr3iM46Zo+CnRR8B+MDVqkULW4EcLVcRBNOPXHosw=="], "remark-gfm": ["remark-gfm@4.0.1", "", { "dependencies": { "@types/mdast": "^4.0.0", "mdast-util-gfm": "^3.0.0", "micromark-extension-gfm": "^3.0.0", "remark-parse": "^11.0.0", "remark-stringify": "^11.0.0", "unified": "^11.0.0" } }, "sha512-1quofZ2RQ9EWdeN34S79+KExV1764+wCUGop5CPL1WGdD0ocPpu91lzPGbwWMECpEpd42kJGQwzRfyov9j4yNg=="], @@ -1809,14 +1725,10 @@ "vary": ["vary@1.1.2", "", {}, "sha512-BNGbWLfd0eUPabhkXUVm0j8uuvREyTh5ovRa/dyow/BqAbZJyC+5fU+IzQOzmAKzYqYRAISoRhdQr3eIZ/PXqg=="], - "vaul": ["vaul@1.1.2", "", { "dependencies": { "@radix-ui/react-dialog": "^1.1.1" }, "peerDependencies": { "react": "^16.8 || ^17.0 || ^18.0 || ^19.0.0 || ^19.0.0-rc", "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0.0 || ^19.0.0-rc" } }, "sha512-ZFkClGpWyI2WUQjdLJ/BaGuV6AVQiJ3uELGk3OYtP+B6yCO7Cmn9vPFXVJkRaGkOJu3m8bQMgtyzNHixULceQA=="], - "vfile": ["vfile@6.0.3", "", { "dependencies": { "@types/unist": "^3.0.0", "vfile-message": "^4.0.0" } }, "sha512-KzIbH/9tXat2u30jf+smMwFCsno4wHVdNmzFyL+T/L3UGqqk6JKfVqOFOZEpZSHADH1k40ab6NUIXZq422ov3Q=="], "vfile-message": ["vfile-message@4.0.3", "", { "dependencies": { "@types/unist": "^3.0.0", "unist-util-stringify-position": "^4.0.0" } }, "sha512-QTHzsGd1EhbZs4AsQ20JX1rC3cOlt/IWJruk893DfLRr57lcnOeMaWG4K0JrRta4mIJZKth2Au3mM3u03/JWKw=="], - "victory-vendor": ["victory-vendor@37.3.6", "", { "dependencies": { "@types/d3-array": "^3.0.3", "@types/d3-ease": "^3.0.0", "@types/d3-interpolate": "^3.0.1", "@types/d3-scale": "^4.0.2", "@types/d3-shape": "^3.1.0", "@types/d3-time": "^3.0.0", "@types/d3-timer": "^3.0.0", "d3-array": "^3.1.6", "d3-ease": "^3.0.1", "d3-interpolate": "^3.0.1", "d3-scale": "^4.0.2", "d3-shape": "^3.1.0", "d3-time": "^3.0.0", "d3-timer": "^3.0.1" } }, "sha512-SbPDPdDBYp+5MJHhBCAyI7wKM3d5ivekigc2Dk2s7pgbZ9wIgIBYGVw4zGHBml/qTFbexrofXW6Gu4noGxrOwQ=="], - "vite": ["vite@8.0.3", "", { "dependencies": { "lightningcss": "^1.32.0", "picomatch": "^4.0.4", "postcss": "^8.5.8", "rolldown": "1.0.0-rc.12", "tinyglobby": "^0.2.15" }, "optionalDependencies": { "fsevents": "~2.3.3" }, "peerDependencies": { "@types/node": "^20.19.0 || >=22.12.0", "@vitejs/devtools": "^0.1.0", "esbuild": "^0.27.0", "jiti": ">=1.21.0", "less": "^4.0.0", "sass": "^1.70.0", "sass-embedded": "^1.70.0", "stylus": ">=0.54.8", "sugarss": "^5.0.0", "terser": "^5.16.0", "tsx": "^4.8.1", "yaml": "^2.4.2" }, "optionalPeers": ["@types/node", "@vitejs/devtools", "esbuild", "jiti", "less", "sass", "sass-embedded", "stylus", "sugarss", "terser", "tsx", "yaml"], "bin": { "vite": "bin/vite.js" } }, "sha512-B9ifbFudT1TFhfltfaIPgjo9Z3mDynBTJSUYxTjOQruf/zHH+ezCQKcoqO+h7a9Pw9Nm/OtlXAiGT1axBgwqrQ=="], "vitest": ["vitest@4.1.2", "", { "dependencies": { "@vitest/expect": "4.1.2", "@vitest/mocker": "4.1.2", "@vitest/pretty-format": "4.1.2", "@vitest/runner": "4.1.2", "@vitest/snapshot": "4.1.2", "@vitest/spy": "4.1.2", "@vitest/utils": "4.1.2", "es-module-lexer": "^2.0.0", "expect-type": "^1.3.0", "magic-string": "^0.30.21", "obug": "^2.1.1", "pathe": "^2.0.3", "picomatch": "^4.0.3", "std-env": "^4.0.0-rc.1", "tinybench": "^2.9.0", "tinyexec": "^1.0.2", "tinyglobby": "^0.2.15", "tinyrainbow": "^3.1.0", "vite": "^6.0.0 || ^7.0.0 || ^8.0.0", "why-is-node-running": "^2.3.0" }, "peerDependencies": { "@edge-runtime/vm": "*", "@opentelemetry/api": "^1.9.0", "@types/node": "^20.0.0 || ^22.0.0 || >=24.0.0", "@vitest/browser-playwright": "4.1.2", "@vitest/browser-preview": "4.1.2", "@vitest/browser-webdriverio": "4.1.2", "@vitest/ui": "4.1.2", "happy-dom": "*", "jsdom": "*" }, "optionalPeers": ["@edge-runtime/vm", "@opentelemetry/api", "@types/node", "@vitest/browser-playwright", "@vitest/browser-preview", "@vitest/browser-webdriverio", "@vitest/ui", "happy-dom", "jsdom"], "bin": { "vitest": "vitest.mjs" } }, "sha512-xjR1dMTVHlFLh98JE3i/f/WePqJsah4A0FK9cc8Ehp9Udk0AZk6ccpIZhh1qJ/yxVWRZ+Q54ocnD8TXmkhspGg=="], @@ -1897,8 +1809,6 @@ "@radix-ui/react-primitive/@radix-ui/react-slot": ["@radix-ui/react-slot@1.2.4", "", { "dependencies": { "@radix-ui/react-compose-refs": "1.1.2" }, "peerDependencies": { "@types/react": "*", "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" }, "optionalPeers": ["@types/react"] }, "sha512-Jl+bCv8HxKnlTLVrcDE8zTMJ09R9/ukw4qBs/oZClOfoQk/cOTbDn+NceXfV7j09YPVQUryJPHurafcSg6EVKA=="], - "@reduxjs/toolkit/immer": ["immer@11.1.4", "", {}, "sha512-XREFCPo6ksxVzP4E0ekD5aMdf8WMwmdNaz6vuvxgI40UaEiu6q3p8X52aU6GdyvLY3XXX/8R7JOTXStz/nBbRw=="], - "@rollup/pluginutils/estree-walker": ["estree-walker@2.0.2", "", {}, "sha512-Rfkk/Mp/DL7JVje3u18FxFujQlTNR2q6QfMSMB7AvCBx91NGj/ba3kCfza0f6dVDbw7YlRf/nDrn7pQrCCyQ/w=="], "@tailwindcss/oxide-wasm32-wasi/@emnapi/core": ["@emnapi/core@1.9.2", "", { "dependencies": { "@emnapi/wasi-threads": "1.2.1", "tslib": "^2.4.0" }, "bundled": true }, "sha512-UC+ZhH3XtczQYfOlu3lNEkdW/p4dsJ1r/bP7H8+rhao3TTTMO1ATq/4DdIi23XuGoFY+Cz0JmCbdVl0hz9jZcA=="], diff --git a/internal/acp/client.go b/internal/acp/client.go index 80c95f6c3..3f8964fa8 100644 --- a/internal/acp/client.go +++ b/internal/acp/client.go @@ -124,11 +124,31 @@ func (d *Driver) Start(ctx context.Context, opts StartOpts) (*AgentProcess, erro return nil, err } + process, err := d.spawnProcess(normalized) + if err != nil { + return nil, err + } + + if err := d.initializeConnection(ctx, process, normalized.AgentName); err != nil { + return nil, d.cleanupFailedStart(process, err) + } + if err := d.negotiateSession(ctx, process, normalized); err != nil { + return nil, d.cleanupFailedStart(process, err) + } + return process, nil +} + +func (d *Driver) spawnProcess(normalized StartOpts) (*AgentProcess, error) { command, args, err := parseCommandString(normalized.Command) if err != nil { return nil, err } + policy, err := newPermissionPolicy(normalized.Permissions, normalized.Cwd) + if err != nil { + return nil, err + } + procCtx, cancelProcess := context.WithCancel(context.Background()) cmd := exec.CommandContext(procCtx, command, args...) configureManagedCommand(cmd) @@ -158,12 +178,6 @@ func (d *Driver) Start(ctx context.Context, opts StartOpts) (*AgentProcess, erro return nil, fmt.Errorf("acp: start subprocess %q: %w", normalized.Command, err) } - policy, err := newPermissionPolicy(normalized.Permissions, normalized.Cwd) - if err != nil { - cancelProcess() - return nil, err - } - process := &AgentProcess{ PID: cmd.Process.Pid, AgentName: normalized.AgentName, @@ -186,6 +200,10 @@ func (d *Driver) Start(ctx context.Context, opts StartOpts) (*AgentProcess, erro go process.waitForExit() + return process, nil +} + +func (d *Driver) initializeConnection(ctx context.Context, process *AgentProcess, agentName string) error { initRequest := acpsdk.InitializeRequest{ ProtocolVersion: acpsdk.ProtocolVersionNumber, ClientCapabilities: acpsdk.ClientCapabilities{ @@ -202,41 +220,49 @@ func (d *Driver) Start(ctx context.Context, opts StartOpts) (*AgentProcess, erro } initializeResponse, err := acpsdk.SendRequest[acpsdk.InitializeResponse](process.conn, ctx, acpsdk.AgentMethodInitialize, initRequest) if err != nil { - return nil, d.cleanupFailedStart(process, fmt.Errorf("acp: initialize session for %q: %w", normalized.AgentName, err)) + return fmt.Errorf("acp: initialize session for %q: %w", agentName, err) } process.Caps = ACPCaps{ SupportsLoadSession: initializeResponse.AgentCapabilities.LoadSession, } + return nil +} +func (d *Driver) negotiateSession(ctx context.Context, process *AgentProcess, normalized StartOpts) error { if normalized.ResumeSessionID != "" { - if !process.Caps.SupportsLoadSession { - startErr := fmt.Errorf("%w: agent %q does not support session/load for resume %q", ErrAgentDoesNotSupportSession, normalized.AgentName, normalized.ResumeSessionID) - return nil, d.cleanupFailedStart(process, startErr) - } + return d.loadSession(ctx, process, normalized) + } + return d.createSession(ctx, process, normalized) +} - loadRequest := acpsdk.LoadSessionRequest{ - Cwd: normalized.Cwd, - McpServers: toSDKMCPServers(normalized.MCPServers), - SessionId: acpsdk.SessionId(normalized.ResumeSessionID), - } - loadWireRequest := wireLoadSessionRequest{ - Cwd: loadRequest.Cwd, - McpServers: loadRequest.McpServers, - AdditionalDirs: append([]string(nil), normalized.AdditionalDirs...), - SessionID: loadRequest.SessionId, - } - loadResponse, loadErr := acpsdk.SendRequest[acpsdk.LoadSessionResponse](process.conn, ctx, acpsdk.AgentMethodSessionLoad, loadWireRequest) - if loadErr != nil { - startErr := fmt.Errorf("%w: load session %q for %q: %w", ErrLoadSessionFailed, normalized.ResumeSessionID, normalized.AgentName, loadErr) - return nil, d.cleanupFailedStart(process, startErr) - } +func (d *Driver) loadSession(ctx context.Context, process *AgentProcess, normalized StartOpts) error { + if !process.Caps.SupportsLoadSession { + return fmt.Errorf("%w: agent %q does not support session/load for resume %q", ErrAgentDoesNotSupportSession, normalized.AgentName, normalized.ResumeSessionID) + } - process.SessionID = normalized.ResumeSessionID - process.Caps = captureCaps(process.Caps.SupportsLoadSession, loadResponse.Modes, loadResponse.Models) - return process, nil + loadRequest := acpsdk.LoadSessionRequest{ + Cwd: normalized.Cwd, + McpServers: toSDKMCPServers(normalized.MCPServers), + SessionId: acpsdk.SessionId(normalized.ResumeSessionID), + } + loadWireRequest := wireLoadSessionRequest{ + Cwd: loadRequest.Cwd, + McpServers: loadRequest.McpServers, + AdditionalDirs: append([]string(nil), normalized.AdditionalDirs...), + SessionID: loadRequest.SessionId, + } + loadResponse, err := acpsdk.SendRequest[acpsdk.LoadSessionResponse](process.conn, ctx, acpsdk.AgentMethodSessionLoad, loadWireRequest) + if err != nil { + return fmt.Errorf("%w: load session %q for %q: %w", ErrLoadSessionFailed, normalized.ResumeSessionID, normalized.AgentName, err) } + process.SessionID = normalized.ResumeSessionID + process.Caps = captureCaps(process.Caps.SupportsLoadSession, loadResponse.Modes, loadResponse.Models) + return nil +} + +func (d *Driver) createSession(ctx context.Context, process *AgentProcess, normalized StartOpts) error { newRequest := acpsdk.NewSessionRequest{ Cwd: normalized.Cwd, McpServers: toSDKMCPServers(normalized.MCPServers), @@ -248,12 +274,12 @@ func (d *Driver) Start(ctx context.Context, opts StartOpts) (*AgentProcess, erro } newResponse, err := acpsdk.SendRequest[acpsdk.NewSessionResponse](process.conn, ctx, acpsdk.AgentMethodSessionNew, newWireRequest) if err != nil { - return nil, d.cleanupFailedStart(process, fmt.Errorf("acp: create session for %q: %w", normalized.AgentName, err)) + return fmt.Errorf("acp: create session for %q: %w", normalized.AgentName, err) } process.SessionID = string(newResponse.SessionId) process.Caps = captureCaps(process.Caps.SupportsLoadSession, newResponse.Modes, newResponse.Models) - return process, nil + return nil } func (d *Driver) cleanupFailedStart(process *AgentProcess, startErr error) error { diff --git a/internal/acp/handlers.go b/internal/acp/handlers.go index acfec9676..4033e9f30 100644 --- a/internal/acp/handlers.go +++ b/internal/acp/handlers.go @@ -102,95 +102,70 @@ type terminalOutputWriter struct { } func (p *AgentProcess) handleInbound(ctx context.Context, method string, params json.RawMessage) (any, *acpsdk.RequestError) { - switch method { - case acpsdk.ClientMethodFsReadTextFile: - var request acpsdk.ReadTextFileRequest - if err := json.Unmarshal(params, &request); err != nil { - return nil, acpsdk.NewInvalidParams(map[string]any{"error": err.Error()}) - } - response, err := p.handleReadTextFile(ctx, request) - if err != nil { - return nil, requestError(err) - } - return response, nil - case acpsdk.ClientMethodFsWriteTextFile: - var request acpsdk.WriteTextFileRequest - if err := json.Unmarshal(params, &request); err != nil { - return nil, acpsdk.NewInvalidParams(map[string]any{"error": err.Error()}) - } - response, err := p.handleWriteTextFile(ctx, request) - if err != nil { - return nil, requestError(err) - } - return response, nil - case acpsdk.ClientMethodSessionRequestPermission: - var request acpsdk.RequestPermissionRequest - if err := json.Unmarshal(params, &request); err != nil { - return nil, acpsdk.NewInvalidParams(map[string]any{"error": err.Error()}) - } - response, err := p.handleRequestPermission(ctx, request) - if err != nil { - return nil, requestError(err) - } - return response, nil - case acpsdk.ClientMethodSessionUpdate: - if err := p.handleSessionUpdate(params); err != nil { - return nil, requestError(err) - } - return nil, nil - case acpsdk.ClientMethodTerminalCreate: - var request acpsdk.CreateTerminalRequest - if err := json.Unmarshal(params, &request); err != nil { - return nil, acpsdk.NewInvalidParams(map[string]any{"error": err.Error()}) - } - response, err := p.handleCreateTerminal(request) - if err != nil { - return nil, requestError(err) - } - return response, nil - case acpsdk.ClientMethodTerminalKill: - var request acpsdk.KillTerminalCommandRequest - if err := json.Unmarshal(params, &request); err != nil { - return nil, acpsdk.NewInvalidParams(map[string]any{"error": err.Error()}) - } - response, err := p.handleKillTerminal(request) - if err != nil { - return nil, requestError(err) - } - return response, nil - case acpsdk.ClientMethodTerminalOutput: - var request acpsdk.TerminalOutputRequest - if err := json.Unmarshal(params, &request); err != nil { - return nil, acpsdk.NewInvalidParams(map[string]any{"error": err.Error()}) - } - response, err := p.handleTerminalOutput(request) - if err != nil { - return nil, requestError(err) - } - return response, nil - case acpsdk.ClientMethodTerminalWaitForExit: - var request acpsdk.WaitForTerminalExitRequest - if err := json.Unmarshal(params, &request); err != nil { - return nil, acpsdk.NewInvalidParams(map[string]any{"error": err.Error()}) - } - response, err := p.handleWaitForTerminalExit(ctx, request) - if err != nil { - return nil, requestError(err) - } - return response, nil - case acpsdk.ClientMethodTerminalRelease: - var request acpsdk.ReleaseTerminalRequest - if err := json.Unmarshal(params, &request); err != nil { - return nil, acpsdk.NewInvalidParams(map[string]any{"error": err.Error()}) - } - response, err := p.handleReleaseTerminal(request) - if err != nil { - return nil, requestError(err) - } - return response, nil - default: + handlers := map[string]func(context.Context, json.RawMessage) (any, *acpsdk.RequestError){ + acpsdk.ClientMethodFsReadTextFile: func(ctx context.Context, params json.RawMessage) (any, *acpsdk.RequestError) { + return handleInboundRequest(ctx, params, p.handleReadTextFile) + }, + acpsdk.ClientMethodFsWriteTextFile: func(ctx context.Context, params json.RawMessage) (any, *acpsdk.RequestError) { + return handleInboundRequest(ctx, params, p.handleWriteTextFile) + }, + acpsdk.ClientMethodSessionRequestPermission: func(ctx context.Context, params json.RawMessage) (any, *acpsdk.RequestError) { + return handleInboundRequest(ctx, params, p.handleRequestPermission) + }, + acpsdk.ClientMethodSessionUpdate: func(_ context.Context, params json.RawMessage) (any, *acpsdk.RequestError) { + if err := p.handleSessionUpdate(params); err != nil { + return nil, requestError(err) + } + return nil, nil + }, + acpsdk.ClientMethodTerminalCreate: func(_ context.Context, params json.RawMessage) (any, *acpsdk.RequestError) { + return handleInboundRequestNoContext(params, p.handleCreateTerminal) + }, + acpsdk.ClientMethodTerminalKill: func(_ context.Context, params json.RawMessage) (any, *acpsdk.RequestError) { + return handleInboundRequestNoContext(params, p.handleKillTerminal) + }, + acpsdk.ClientMethodTerminalOutput: func(_ context.Context, params json.RawMessage) (any, *acpsdk.RequestError) { + return handleInboundRequestNoContext(params, p.handleTerminalOutput) + }, + acpsdk.ClientMethodTerminalWaitForExit: func(ctx context.Context, params json.RawMessage) (any, *acpsdk.RequestError) { + return handleInboundRequest(ctx, params, p.handleWaitForTerminalExit) + }, + acpsdk.ClientMethodTerminalRelease: func(_ context.Context, params json.RawMessage) (any, *acpsdk.RequestError) { + return handleInboundRequestNoContext(params, p.handleReleaseTerminal) + }, + } + + handler, ok := handlers[method] + if !ok { return nil, acpsdk.NewMethodNotFound(method) } + return handler(ctx, params) +} + +func handleInboundRequest[Req any, Resp any](ctx context.Context, params json.RawMessage, fn func(context.Context, Req) (Resp, error)) (any, *acpsdk.RequestError) { + var request Req + if err := json.Unmarshal(params, &request); err != nil { + return nil, acpsdk.NewInvalidParams(map[string]any{"error": err.Error()}) + } + + response, err := fn(ctx, request) + if err != nil { + return nil, requestError(err) + } + return response, nil +} + +func handleInboundRequestNoContext[Req any, Resp any](params json.RawMessage, fn func(Req) (Resp, error)) (any, *acpsdk.RequestError) { + var request Req + if err := json.Unmarshal(params, &request); err != nil { + return nil, acpsdk.NewInvalidParams(map[string]any{"error": err.Error()}) + } + + response, err := fn(request) + if err != nil { + return nil, requestError(err) + } + return response, nil } func (p *AgentProcess) handleReadTextFile(_ context.Context, request acpsdk.ReadTextFileRequest) (acpsdk.ReadTextFileResponse, error) { diff --git a/internal/api/core/conversions.go b/internal/api/core/conversions.go index 67db0a10b..59f72dd4a 100644 --- a/internal/api/core/conversions.go +++ b/internal/api/core/conversions.go @@ -11,6 +11,7 @@ import ( "github.com/pedronauck/agh/internal/session" "github.com/pedronauck/agh/internal/skills" "github.com/pedronauck/agh/internal/store" + "github.com/pedronauck/agh/internal/workref" workspacepkg "github.com/pedronauck/agh/internal/workspace" ) @@ -21,12 +22,13 @@ func SessionPayloadFromInfo(info *session.SessionInfo) contract.SessionPayload { return payload } + ref := workref.NewPath(info.WorkspaceID, info.Workspace) payload = contract.SessionPayload{ ID: info.ID, Name: info.Name, AgentName: info.AgentName, - WorkspaceID: info.WorkspaceID, - WorkspacePath: info.Workspace, + WorkspaceID: ref.WorkspaceID, + WorkspacePath: ref.WorkspacePath, State: string(info.State), StopReason: string(info.StopReason), StopDetail: info.StopDetail, @@ -67,7 +69,7 @@ func ACPCapsPayloadFromInfo(caps acp.ACPCaps) *contract.ACPCapsPayload { // SessionEventPayloadFromEvent converts a session event into the shared payload. func SessionEventPayloadFromEvent(event store.SessionEvent, info *session.SessionInfo) contract.SessionEventPayload { - workspaceID, workspacePath := sessionWorkspaceFromInfo(info) + ref := workref.NewPath(sessionWorkspaceFromInfo(info)) return contract.SessionEventPayload{ ID: event.ID, SessionID: event.SessionID, @@ -75,8 +77,8 @@ func SessionEventPayloadFromEvent(event store.SessionEvent, info *session.Sessio TurnID: event.TurnID, Type: event.Type, AgentName: event.AgentName, - WorkspaceID: workspaceID, - WorkspacePath: workspacePath, + WorkspaceID: ref.WorkspaceID, + WorkspacePath: ref.WorkspacePath, Content: PayloadJSON(event.Content), Timestamp: event.Timestamp, } diff --git a/internal/api/core/handlers.go b/internal/api/core/handlers.go index 3bbfe8449..7e76767ef 100644 --- a/internal/api/core/handlers.go +++ b/internal/api/core/handlers.go @@ -8,7 +8,6 @@ import ( "net/http" "os" "sort" - "strconv" "strings" "sync" "sync/atomic" @@ -318,13 +317,9 @@ func (h *BaseHandlers) StreamSession(c *gin.Context) { h.respondError(c, http.StatusBadRequest, err) return } - if lastEventID := strings.TrimSpace(c.GetHeader("Last-Event-ID")); lastEventID != "" { - after, parseErr := strconv.ParseInt(lastEventID, 10, 64) - if parseErr != nil { - h.respondError(c, http.StatusBadRequest, fmt.Errorf("%s: invalid Last-Event-ID %q: %w", h.transportName(), lastEventID, parseErr)) - return - } - query.AfterSequence = after + if query.AfterSequence, err = parseLastEventID(c.GetHeader("Last-Event-ID"), h.transportName()); err != nil { + h.respondError(c, http.StatusBadRequest, err) + return } initial, err := h.Sessions.Events(c.Request.Context(), c.Param("id"), query) @@ -339,79 +334,14 @@ func (h *BaseHandlers) StreamSession(c *gin.Context) { return } - afterSequence := query.AfterSequence - for _, event := range initial { - afterSequence = event.Sequence - if err := WriteSSE(writer, SSEMessage{ - ID: strconv.FormatInt(event.Sequence, 10), - Name: event.Type, - Data: SessionEventPayloadFromEvent(event, info), - }); err != nil { - return - } + afterSequence, err := h.writeSessionEventBatch(writer, initial, info) + if err != nil { + return } pollQuery := query pollQuery.Limit = 0 - pollQuery.AfterSequence = afterSequence - - ticker := time.NewTicker(h.PollInterval) - defer ticker.Stop() - - for { - select { - case <-c.Request.Context().Done(): - return - case <-h.StreamDoneChannel(): - return - case <-ticker.C: - pollQuery.AfterSequence = afterSequence - events, pollErr := h.Sessions.Events(c.Request.Context(), c.Param("id"), pollQuery) - if pollErr != nil { - _ = WriteSSE(writer, SSEMessage{ - Name: "error", - Data: contract.ErrorPayload{Error: pollErr.Error()}, - }) - return - } - for _, event := range events { - afterSequence = event.Sequence - if err := WriteSSE(writer, SSEMessage{ - ID: strconv.FormatInt(event.Sequence, 10), - Name: event.Type, - Data: SessionEventPayloadFromEvent(event, info), - }); err != nil { - return - } - } - if len(events) == 0 { - latest, statusErr := h.Sessions.Status(c.Request.Context(), c.Param("id")) - if statusErr != nil { - _ = WriteSSE(writer, SSEMessage{ - Name: "error", - Data: contract.ErrorPayload{Error: statusErr.Error()}, - }) - return - } - if latest != nil && latest.State == session.StateStopped { - _ = WriteSSE(writer, SSEMessage{ - Name: session.EventTypeSessionStopped, - Data: contract.SessionEventPayload{ - SessionID: latest.ID, - Type: session.EventTypeSessionStopped, - WorkspaceID: strings.TrimSpace(latest.WorkspaceID), - WorkspacePath: strings.TrimSpace(latest.Workspace), - Timestamp: latest.UpdatedAt, - }, - }) - return - } - if h.IncludeSessionWorkspaceInSSE { - info = latest - } - } - } - } + h.pollAndStreamSessionEvents(c, writer, c.Param("id"), info, pollQuery, afterSequence) } // ListAgents returns all readable agent definitions in home paths. diff --git a/internal/api/core/session_stream.go b/internal/api/core/session_stream.go new file mode 100644 index 000000000..812ac6bdc --- /dev/null +++ b/internal/api/core/session_stream.go @@ -0,0 +1,137 @@ +package core + +import ( + "fmt" + "strconv" + "strings" + "time" + + "github.com/gin-gonic/gin" + "github.com/pedronauck/agh/internal/api/contract" + "github.com/pedronauck/agh/internal/session" + "github.com/pedronauck/agh/internal/store" + "github.com/pedronauck/agh/internal/workref" +) + +func parseLastEventID(lastEventID string, transportName string) (int64, error) { + trimmed := strings.TrimSpace(lastEventID) + if trimmed == "" { + return 0, nil + } + + after, err := strconv.ParseInt(trimmed, 10, 64) + if err != nil { + return 0, fmt.Errorf("%s: invalid Last-Event-ID %q: %w", transportName, trimmed, err) + } + return after, nil +} + +func (h *BaseHandlers) writeSessionEventBatch(writer FlushWriter, events []store.SessionEvent, info *session.SessionInfo) (int64, error) { + var afterSequence int64 + for _, event := range events { + afterSequence = event.Sequence + if err := WriteSSE(writer, SSEMessage{ + ID: strconv.FormatInt(event.Sequence, 10), + Name: event.Type, + Data: SessionEventPayloadFromEvent(event, info), + }); err != nil { + return afterSequence, err + } + } + return afterSequence, nil +} + +func (h *BaseHandlers) writeSessionStoppedEvent(writer FlushWriter, latest *session.SessionInfo) error { + if latest == nil || latest.State != session.StateStopped { + return nil + } + + ref := workref.NewPath(latest.WorkspaceID, latest.Workspace) + return WriteSSE(writer, SSEMessage{ + Name: session.EventTypeSessionStopped, + Data: contract.SessionEventPayload{ + SessionID: latest.ID, + Type: session.EventTypeSessionStopped, + WorkspaceID: ref.WorkspaceID, + WorkspacePath: ref.WorkspacePath, + Timestamp: latest.UpdatedAt, + }, + }) +} + +func (h *BaseHandlers) pollAndStreamSessionEvents( + c *gin.Context, + writer FlushWriter, + sessionID string, + info *session.SessionInfo, + pollQuery store.EventQuery, + afterSequence int64, +) { + ticker := time.NewTicker(h.PollInterval) + defer ticker.Stop() + + currentInfo := info + for { + select { + case <-c.Request.Context().Done(): + return + case <-h.StreamDoneChannel(): + return + case <-ticker.C: + var done bool + afterSequence, currentInfo, done = h.pollSessionStreamTick(c, writer, sessionID, currentInfo, pollQuery, afterSequence) + if done { + return + } + } + } +} + +func (h *BaseHandlers) pollSessionStreamTick( + c *gin.Context, + writer FlushWriter, + sessionID string, + info *session.SessionInfo, + pollQuery store.EventQuery, + afterSequence int64, +) (int64, *session.SessionInfo, bool) { + pollQuery.AfterSequence = afterSequence + + events, pollErr := h.Sessions.Events(c.Request.Context(), sessionID, pollQuery) + if pollErr != nil { + // Best-effort notification; the SSE client may already be disconnected. + _ = WriteSSE(writer, SSEMessage{ + Name: "error", + Data: contract.ErrorPayload{Error: pollErr.Error()}, + }) + return afterSequence, info, true + } + + nextSequence, err := h.writeSessionEventBatch(writer, events, info) + if err != nil { + return nextSequence, info, true + } + if nextSequence > afterSequence { + return nextSequence, info, false + } + + latest, statusErr := h.Sessions.Status(c.Request.Context(), sessionID) + if statusErr != nil { + // Best-effort notification; the SSE client may already be disconnected. + _ = WriteSSE(writer, SSEMessage{ + Name: "error", + Data: contract.ErrorPayload{Error: statusErr.Error()}, + }) + return afterSequence, info, true + } + if latest != nil && latest.State == session.StateStopped { + // Best-effort terminal event; there is nothing else to do if the stream is closed. + _ = h.writeSessionStoppedEvent(writer, latest) + return afterSequence, latest, true + } + if h.IncludeSessionWorkspaceInSSE { + info = latest + } + + return afterSequence, info, false +} diff --git a/internal/api/httpapi/handlers.go b/internal/api/httpapi/handlers.go new file mode 100644 index 000000000..3a060b913 --- /dev/null +++ b/internal/api/httpapi/handlers.go @@ -0,0 +1,79 @@ +package httpapi + +import ( + "io/fs" + "log/slog" + "time" + + "github.com/pedronauck/agh/internal/api/core" + aghconfig "github.com/pedronauck/agh/internal/config" + "github.com/pedronauck/agh/internal/memory" +) + +type handlerConfig struct { + sessions core.SessionManager + observer core.Observer + workspaces core.WorkspaceService + skillsRegistry core.SkillsRegistry + memoryStore *memory.Store + dreamTrigger core.DreamTrigger + staticFS fs.FS + homePaths aghconfig.HomePaths + config aghconfig.Config + logger *slog.Logger + startedAt time.Time + now func() time.Time + pollInterval time.Duration + agentLoader core.AgentLoader + httpPort int +} + +// Handlers expose request/response and SSE endpoints for the AGH API. +type Handlers struct { + *core.BaseHandlers + staticFS fs.FS +} + +func newHandlers(cfg handlerConfig) *Handlers { + if cfg.pollInterval <= 0 { + cfg.pollInterval = defaultPollInterval + } + if cfg.httpPort <= 0 { + cfg.httpPort = cfg.config.HTTP.Port + } + + return &Handlers{ + BaseHandlers: core.NewBaseHandlers(core.BaseHandlerConfig{ + TransportName: "httpapi", + MaskInternalErrors: true, + IncludeSessionWorkspaceInSSE: false, + Sessions: cfg.sessions, + Observer: cfg.observer, + Workspaces: cfg.workspaces, + SkillsRegistry: cfg.skillsRegistry, + MemoryStore: cfg.memoryStore, + DreamTrigger: cfg.dreamTrigger, + HomePaths: cfg.homePaths, + Config: cfg.config, + Logger: cfg.logger, + StartedAt: cfg.startedAt, + Now: cfg.now, + PollInterval: cfg.pollInterval, + AgentLoader: cfg.agentLoader, + HTTPPort: cfg.httpPort, + }), + staticFS: cfg.staticFS, + } +} + +func (h *Handlers) setStreamDone(done <-chan struct{}) { + if h != nil && h.BaseHandlers != nil { + h.SetStreamDone(done) + } +} + +func (h *Handlers) setHTTPPort(port int) { + if h != nil && h.BaseHandlers != nil { + h.SetHTTPPort(port) + } +} diff --git a/internal/api/httpapi/handlers_error_test.go b/internal/api/httpapi/handlers_error_test.go index 217ee5384..58305bd49 100644 --- a/internal/api/httpapi/handlers_error_test.go +++ b/internal/api/httpapi/handlers_error_test.go @@ -333,6 +333,15 @@ func TestCORSMiddlewareAllowsLoopbackOrigins(t *testing.T) { } } +func TestResolveAllowedOriginRejectsSameHostDifferentPort(t *testing.T) { + t.Parallel() + + allowedOrigin, ok := resolveAllowedOrigin("http://example.com:3000", "http", "example.com:2123", "example.com") + if ok { + t.Fatalf("resolveAllowedOrigin() = %q, true, want rejection for same-host different-port origin", allowedOrigin) + } +} + func TestRespondErrorSanitizesInternalFailures(t *testing.T) { homePaths := newTestHomePaths(t) engine := newTestRouter(t, newTestHandlers(t, stubSessionManager{ diff --git a/internal/api/httpapi/handlers_test.go b/internal/api/httpapi/handlers_test.go index 0bef9baa9..6f7f3d548 100644 --- a/internal/api/httpapi/handlers_test.go +++ b/internal/api/httpapi/handlers_test.go @@ -12,6 +12,7 @@ import ( "testing" "time" + "github.com/gin-gonic/gin" "github.com/pedronauck/agh/internal/acp" "github.com/pedronauck/agh/internal/api/contract" core "github.com/pedronauck/agh/internal/api/core" @@ -85,6 +86,19 @@ func TestRegisterRoutesCoversTechSpecEndpoints(t *testing.T) { } } +func TestRegisterRoutesSkipsNilHandlers(t *testing.T) { + t.Parallel() + + gin.SetMode(gin.TestMode) + engine := gin.New() + + RegisterRoutes(engine, nil) + + if got := len(engine.Routes()); got != 0 { + t.Fatalf("len(routes) = %d, want 0", got) + } +} + func TestCreateSessionHandlerReturnsSessionID(t *testing.T) { homePaths := newTestHomePaths(t) manager := stubSessionManager{ diff --git a/internal/api/httpapi/middleware.go b/internal/api/httpapi/middleware.go new file mode 100644 index 000000000..d1e6db97c --- /dev/null +++ b/internal/api/httpapi/middleware.go @@ -0,0 +1,228 @@ +package httpapi + +import ( + "log/slog" + "net" + "net/http" + "net/url" + "strings" + "time" + + "github.com/gin-gonic/gin" + "github.com/pedronauck/agh/internal/api/contract" + "github.com/pedronauck/agh/internal/api/core" +) + +func requestLoggingMiddleware(logger *slog.Logger) gin.HandlerFunc { + if logger == nil { + logger = slog.Default() + } + + return func(c *gin.Context) { + started := time.Now() + c.Next() + + logger.Info( + "httpapi: request", + "method", c.Request.Method, + "path", c.FullPath(), + "status", c.Writer.Status(), + "latency_ms", time.Since(started).Milliseconds(), + "client_ip", c.ClientIP(), + ) + } +} + +func corsMiddleware(boundHost string) gin.HandlerFunc { + return func(c *gin.Context) { + origin := strings.TrimSpace(c.GetHeader("Origin")) + headers := c.Writer.Header() + headers.Set("Access-Control-Allow-Headers", "Content-Type, Last-Event-ID, Accept") + headers.Set("Access-Control-Allow-Methods", "GET, POST, PUT, DELETE, OPTIONS") + headers.Set("Access-Control-Expose-Headers", "Content-Type, Last-Event-ID, x-vercel-ai-ui-message-stream") + headers.Set("Vary", "Origin") + if origin != "" { + allowedOrigin, ok := resolveAllowedOrigin(origin, requestScheme(c.Request), c.Request.Host, boundHost) + if !ok { + c.AbortWithStatusJSON(http.StatusForbidden, contract.ErrorPayload{Error: "origin not allowed"}) + return + } + headers.Set("Access-Control-Allow-Origin", allowedOrigin) + } + + if c.Request.Method == http.MethodOptions { + c.AbortWithStatus(http.StatusNoContent) + return + } + c.Next() + } +} + +func resolveAllowedOrigin(origin string, requestScheme string, requestHost string, boundHost string) (string, bool) { + parsed, err := url.Parse(strings.TrimSpace(origin)) + if err != nil || parsed.Scheme == "" || parsed.Host == "" { + return "", false + } + + originSpec, ok := canonicalOriginFromURL(parsed) + if !ok { + return "", false + } + + requestSpec, ok := canonicalOriginFromHost(requestHost, requestScheme, "") + if !ok { + return "", false + } + + boundSpec, ok := canonicalOriginFromHost(boundHost, requestSpec.scheme, requestSpec.port) + switch { + case originSpec.canonical == requestSpec.canonical: + return origin, true + case originSpec.loopback && requestSpec.loopback && originSpec.scheme == requestSpec.scheme: + return origin, true + case ok && !boundSpec.wildcard && originSpec.canonical == boundSpec.canonical: + return origin, true + default: + return "", false + } +} + +type canonicalOrigin struct { + scheme string + hostname string + port string + canonical string + loopback bool + wildcard bool +} + +func requestScheme(r *http.Request) string { + if r == nil { + return "http" + } + if forwarded := strings.TrimSpace(r.Header.Get("X-Forwarded-Proto")); forwarded != "" { + return strings.ToLower(forwarded) + } + if r.TLS != nil { + return "https" + } + if scheme := strings.TrimSpace(r.URL.Scheme); scheme != "" { + return strings.ToLower(scheme) + } + return "http" +} + +func canonicalOriginFromURL(parsed *url.URL) (canonicalOrigin, bool) { + scheme := strings.ToLower(strings.TrimSpace(parsed.Scheme)) + hostname := canonicalHost(parsed.Hostname()) + port := normalizePort(scheme, parsed.Port()) + if scheme == "" || hostname == "" || port == "" { + return canonicalOrigin{}, false + } + + return canonicalOrigin{ + scheme: scheme, + hostname: hostname, + port: port, + canonical: scheme + "://" + net.JoinHostPort(hostname, port), + loopback: isLoopbackHost(hostname), + wildcard: isWildcardHost(hostname), + }, true +} + +func canonicalOriginFromHost(host string, scheme string, fallbackPort string) (canonicalOrigin, bool) { + trimmedHost := strings.TrimSpace(host) + scheme = strings.ToLower(strings.TrimSpace(scheme)) + if trimmedHost == "" || scheme == "" { + return canonicalOrigin{}, false + } + + hostname := canonicalHost(trimmedHost) + port := "" + if parsedHost, parsedPort, err := net.SplitHostPort(trimmedHost); err == nil { + hostname = canonicalHost(parsedHost) + port = parsedPort + } + if hostname == "" { + return canonicalOrigin{}, false + } + + port = normalizePort(scheme, firstNonEmptyString(port, fallbackPort)) + if port == "" { + return canonicalOrigin{}, false + } + + return canonicalOrigin{ + scheme: scheme, + hostname: hostname, + port: port, + canonical: scheme + "://" + net.JoinHostPort(hostname, port), + loopback: isLoopbackHost(hostname), + wildcard: isWildcardHost(hostname), + }, true +} + +func normalizePort(scheme string, port string) string { + trimmed := strings.TrimSpace(port) + if trimmed != "" { + return trimmed + } + + switch strings.ToLower(strings.TrimSpace(scheme)) { + case "http": + return "80" + case "https": + return "443" + default: + return "" + } +} + +func firstNonEmptyString(values ...string) string { + for _, value := range values { + if trimmed := strings.TrimSpace(value); trimmed != "" { + return trimmed + } + } + return "" +} + +func canonicalHost(value string) string { + host := strings.TrimSpace(value) + if host == "" { + return "" + } + if strings.Contains(host, "://") { + if parsed, err := url.Parse(host); err == nil { + host = parsed.Hostname() + } + } + return strings.Trim(strings.TrimSpace(host), "[]") +} + +func isLoopbackHost(host string) bool { + if strings.EqualFold(host, "localhost") { + return true + } + ip := net.ParseIP(host) + return ip != nil && ip.IsLoopback() +} + +func isWildcardHost(host string) bool { + switch host { + case "", "0.0.0.0", "::": + return true + default: + return false + } +} + +func errorMiddleware() gin.HandlerFunc { + return func(c *gin.Context) { + c.Next() + if len(c.Errors) == 0 || c.Writer.Written() { + return + } + core.RespondError(c, http.StatusInternalServerError, c.Errors.Last(), true) + } +} diff --git a/internal/api/httpapi/routes.go b/internal/api/httpapi/routes.go new file mode 100644 index 000000000..bcd1fdc2b --- /dev/null +++ b/internal/api/httpapi/routes.go @@ -0,0 +1,93 @@ +package httpapi + +import "github.com/gin-gonic/gin" + +// RegisterRoutes registers the shared AGH API routes on the supplied Gin router. +func RegisterRoutes(router gin.IRouter, handlers *Handlers) { + if handlers == nil { + return + } + + api := router.Group("/api") + + registerWorkspaceRoutes(api, handlers) + registerSessionRoutes(api, handlers) + registerAgentRoutes(api, handlers) + registerObserveRoutes(api, handlers) + registerHookRoutes(api, handlers) + registerSkillRoutes(api, handlers) + registerMemoryRoutes(api, handlers) + registerDaemonRoutes(api, handlers) + + if engine, ok := router.(*gin.Engine); ok { + engine.NoRoute(handlers.serveStaticRoute) + } +} + +func registerWorkspaceRoutes(api gin.IRouter, handlers *Handlers) { + workspaces := api.Group("/workspaces") + workspaces.POST("", handlers.CreateWorkspace) + workspaces.GET("", handlers.ListWorkspaces) + workspaces.GET("/:id", handlers.GetWorkspace) + workspaces.PATCH("/:id", handlers.UpdateWorkspace) + workspaces.DELETE("/:id", handlers.DeleteWorkspace) + workspaces.POST("/resolve", handlers.ResolveWorkspace) +} + +func registerSessionRoutes(api gin.IRouter, handlers *Handlers) { + sessions := api.Group("/sessions") + sessions.GET("", handlers.ListSessions) + sessions.POST("", handlers.CreateSession) + sessions.GET("/:id", handlers.GetSession) + sessions.DELETE("/:id", handlers.StopSession) + sessions.POST("/:id/resume", handlers.ResumeSession) + sessions.POST("/:id/prompt", handlers.promptSession) + sessions.GET("/:id/events", handlers.SessionEvents) + sessions.GET("/:id/history", handlers.SessionHistory) + sessions.GET("/:id/transcript", handlers.SessionTranscript) + sessions.GET("/:id/stream", handlers.StreamSession) + sessions.POST("/:id/approve", handlers.approveSession) +} + +func registerAgentRoutes(api gin.IRouter, handlers *Handlers) { + agents := api.Group("/agents") + agents.GET("", handlers.ListAgents) + agents.GET("/:name", handlers.GetAgent) +} + +func registerObserveRoutes(api gin.IRouter, handlers *Handlers) { + observeGroup := api.Group("/observe") + observeGroup.GET("/events", handlers.ObserveEvents) + observeGroup.GET("/events/stream", handlers.StreamObserveEvents) + observeGroup.GET("/health", handlers.Health) +} + +func registerHookRoutes(api gin.IRouter, handlers *Handlers) { + hooksGroup := api.Group("/hooks") + hooksGroup.GET("/catalog", handlers.HookCatalog) + hooksGroup.GET("/runs", handlers.HookRuns) + hooksGroup.GET("/events", handlers.HookEvents) +} + +func registerSkillRoutes(api gin.IRouter, handlers *Handlers) { + skillsGroup := api.Group("/skills") + skillsGroup.GET("", handlers.ListSkills) + skillsGroup.GET("/:name", handlers.GetSkill) + skillsGroup.GET("/:name/content", handlers.GetSkillContent) + skillsGroup.POST("/:name/enable", handlers.EnableSkill) + skillsGroup.POST("/:name/disable", handlers.DisableSkill) +} + +func registerMemoryRoutes(api gin.IRouter, handlers *Handlers) { + memoryGroup := api.Group("/memory") + memoryGroup.GET("", handlers.ListMemory) + memoryGroup.GET("/:filename", handlers.ReadMemory) + memoryGroup.PUT("/:filename", handlers.WriteMemory) + memoryGroup.DELETE("/:filename", handlers.DeleteMemory) + memoryGroup.POST("/consolidate", handlers.ConsolidateMemory) +} + +func registerDaemonRoutes(api gin.IRouter, handlers *Handlers) { + daemonGroup := api.Group("/daemon") + daemonGroup.GET("/status", handlers.DaemonStatus) +} diff --git a/internal/api/httpapi/server.go b/internal/api/httpapi/server.go index aec5cb7e7..0d4728c16 100644 --- a/internal/api/httpapi/server.go +++ b/internal/api/httpapi/server.go @@ -5,18 +5,15 @@ import ( "context" "errors" "fmt" - "io/fs" "log/slog" "net" "net/http" - "net/url" "strconv" "strings" "sync" "time" "github.com/gin-gonic/gin" - "github.com/pedronauck/agh/internal/api/contract" "github.com/pedronauck/agh/internal/api/core" aghconfig "github.com/pedronauck/agh/internal/config" "github.com/pedronauck/agh/internal/memory" @@ -62,30 +59,6 @@ type Server struct { actualPort int } -type handlerConfig struct { - sessions core.SessionManager - observer core.Observer - workspaces core.WorkspaceService - skillsRegistry core.SkillsRegistry - memoryStore *memory.Store - dreamTrigger core.DreamTrigger - staticFS fs.FS - homePaths aghconfig.HomePaths - config aghconfig.Config - logger *slog.Logger - startedAt time.Time - now func() time.Time - pollInterval time.Duration - agentLoader core.AgentLoader - httpPort int -} - -// Handlers expose request/response and SSE endpoints for the AGH API. -type Handlers struct { - *core.BaseHandlers - staticFS fs.FS -} - // WithHomePaths overrides the resolved AGH home layout. func WithHomePaths(homePaths aghconfig.HomePaths) Option { return func(server *Server) { @@ -418,238 +391,6 @@ func (s *Server) Shutdown(ctx context.Context) error { return errors.Join(errs...) } -// RegisterRoutes registers the shared AGH API routes on the supplied Gin router. -func RegisterRoutes(router gin.IRouter, handlers *Handlers) { - api := router.Group("/api") - - workspaces := api.Group("/workspaces") - { - workspaces.POST("", handlers.CreateWorkspace) - workspaces.GET("", handlers.ListWorkspaces) - workspaces.GET("/:id", handlers.GetWorkspace) - workspaces.PATCH("/:id", handlers.UpdateWorkspace) - workspaces.DELETE("/:id", handlers.DeleteWorkspace) - workspaces.POST("/resolve", handlers.ResolveWorkspace) - } - - sessions := api.Group("/sessions") - { - sessions.GET("", handlers.ListSessions) - sessions.POST("", handlers.CreateSession) - sessions.GET("/:id", handlers.GetSession) - sessions.DELETE("/:id", handlers.StopSession) - sessions.POST("/:id/resume", handlers.ResumeSession) - sessions.POST("/:id/prompt", handlers.promptSession) - sessions.GET("/:id/events", handlers.SessionEvents) - sessions.GET("/:id/history", handlers.SessionHistory) - sessions.GET("/:id/transcript", handlers.SessionTranscript) - sessions.GET("/:id/stream", handlers.StreamSession) - sessions.POST("/:id/approve", handlers.approveSession) - } - - agents := api.Group("/agents") - { - agents.GET("", handlers.ListAgents) - agents.GET("/:name", handlers.GetAgent) - } - - observeGroup := api.Group("/observe") - { - observeGroup.GET("/events", handlers.ObserveEvents) - observeGroup.GET("/events/stream", handlers.StreamObserveEvents) - observeGroup.GET("/health", handlers.Health) - } - - hooksGroup := api.Group("/hooks") - { - hooksGroup.GET("/catalog", handlers.HookCatalog) - hooksGroup.GET("/runs", handlers.HookRuns) - hooksGroup.GET("/events", handlers.HookEvents) - } - - skillsGroup := api.Group("/skills") - { - skillsGroup.GET("", handlers.ListSkills) - skillsGroup.GET("/:name", handlers.GetSkill) - skillsGroup.GET("/:name/content", handlers.GetSkillContent) - skillsGroup.POST("/:name/enable", handlers.EnableSkill) - skillsGroup.POST("/:name/disable", handlers.DisableSkill) - } - - memoryGroup := api.Group("/memory") - { - memoryGroup.GET("", handlers.ListMemory) - memoryGroup.GET("/:filename", handlers.ReadMemory) - memoryGroup.PUT("/:filename", handlers.WriteMemory) - memoryGroup.DELETE("/:filename", handlers.DeleteMemory) - memoryGroup.POST("/consolidate", handlers.ConsolidateMemory) - } - - daemonGroup := api.Group("/daemon") - { - daemonGroup.GET("/status", handlers.DaemonStatus) - } - - if engine, ok := router.(*gin.Engine); ok && handlers != nil { - engine.NoRoute(handlers.serveStaticRoute) - } -} - -func newHandlers(cfg handlerConfig) *Handlers { - if cfg.pollInterval <= 0 { - cfg.pollInterval = defaultPollInterval - } - if cfg.httpPort <= 0 { - cfg.httpPort = cfg.config.HTTP.Port - } - - return &Handlers{ - BaseHandlers: core.NewBaseHandlers(core.BaseHandlerConfig{ - TransportName: "httpapi", - MaskInternalErrors: true, - IncludeSessionWorkspaceInSSE: false, - Sessions: cfg.sessions, - Observer: cfg.observer, - Workspaces: cfg.workspaces, - SkillsRegistry: cfg.skillsRegistry, - MemoryStore: cfg.memoryStore, - DreamTrigger: cfg.dreamTrigger, - HomePaths: cfg.homePaths, - Config: cfg.config, - Logger: cfg.logger, - StartedAt: cfg.startedAt, - Now: cfg.now, - PollInterval: cfg.pollInterval, - AgentLoader: cfg.agentLoader, - HTTPPort: cfg.httpPort, - }), - staticFS: cfg.staticFS, - } -} - -func (h *Handlers) setStreamDone(done <-chan struct{}) { - if h != nil && h.BaseHandlers != nil { - h.SetStreamDone(done) - } -} - -func (h *Handlers) setHTTPPort(port int) { - if h != nil && h.BaseHandlers != nil { - h.SetHTTPPort(port) - } -} - -func requestLoggingMiddleware(logger *slog.Logger) gin.HandlerFunc { - if logger == nil { - logger = slog.Default() - } - - return func(c *gin.Context) { - started := time.Now() - c.Next() - - logger.Info( - "httpapi: request", - "method", c.Request.Method, - "path", c.FullPath(), - "status", c.Writer.Status(), - "latency_ms", time.Since(started).Milliseconds(), - "client_ip", c.ClientIP(), - ) - } -} - -func corsMiddleware(boundHost string) gin.HandlerFunc { - return func(c *gin.Context) { - origin := strings.TrimSpace(c.GetHeader("Origin")) - headers := c.Writer.Header() - headers.Set("Access-Control-Allow-Headers", "Content-Type, Last-Event-ID, Accept") - headers.Set("Access-Control-Allow-Methods", "GET, POST, PUT, DELETE, OPTIONS") - headers.Set("Access-Control-Expose-Headers", "Content-Type, Last-Event-ID, x-vercel-ai-ui-message-stream") - headers.Set("Vary", "Origin") - if origin != "" { - allowedOrigin, ok := resolveAllowedOrigin(origin, c.Request.Host, boundHost) - if !ok { - c.AbortWithStatusJSON(http.StatusForbidden, contract.ErrorPayload{Error: "origin not allowed"}) - return - } - headers.Set("Access-Control-Allow-Origin", allowedOrigin) - } - - if c.Request.Method == http.MethodOptions { - c.AbortWithStatus(http.StatusNoContent) - return - } - c.Next() - } -} - -func resolveAllowedOrigin(origin string, requestHost string, boundHost string) (string, bool) { - parsed, err := url.Parse(strings.TrimSpace(origin)) - if err != nil || parsed.Scheme == "" || parsed.Host == "" { - return "", false - } - - originHost := canonicalHost(parsed.Hostname()) - requestHostname := canonicalHost(hostOnly(requestHost)) - boundHostname := canonicalHost(hostOnly(boundHost)) - - switch { - case originHost == "" || requestHostname == "": - return "", false - case originHost == requestHostname: - return origin, true - case isLoopbackHost(originHost) && isLoopbackHost(requestHostname): - return origin, true - case boundHostname != "" && !isWildcardHost(boundHostname) && originHost == boundHostname: - return origin, true - default: - return "", false - } -} - -func hostOnly(value string) string { - host := strings.TrimSpace(value) - if host == "" { - return "" - } - if parsedHost, _, err := net.SplitHostPort(host); err == nil { - return parsedHost - } - return host -} - -func canonicalHost(value string) string { - return strings.Trim(strings.TrimSpace(value), "[]") -} - -func isLoopbackHost(host string) bool { - if strings.EqualFold(host, "localhost") { - return true - } - ip := net.ParseIP(host) - return ip != nil && ip.IsLoopback() -} - -func isWildcardHost(host string) bool { - switch host { - case "", "0.0.0.0", "::": - return true - default: - return false - } -} - -func errorMiddleware() gin.HandlerFunc { - return func(c *gin.Context) { - c.Next() - if len(c.Errors) == 0 || c.Writer.Written() { - return - } - core.RespondError(c, http.StatusInternalServerError, c.Errors.Last(), true) - } -} - func waitForServeDone(ctx context.Context, done <-chan struct{}) error { if done == nil { return nil diff --git a/internal/api/udsapi/server.go b/internal/api/udsapi/server.go index 98ec3d97a..4800d053e 100644 --- a/internal/api/udsapi/server.go +++ b/internal/api/udsapi/server.go @@ -233,9 +233,6 @@ func New(opts ...Option) (*Server, error) { if server.workspaces == nil { return nil, errors.New("udsapi: workspace resolver is required") } - if server.skillsRegistry == nil { - return nil, errors.New("udsapi: skills registry is required") - } if strings.TrimSpace(server.config.Daemon.Socket) == "" { server.config.Daemon.Socket = server.homePaths.DaemonSocket } diff --git a/internal/api/udsapi/server_test.go b/internal/api/udsapi/server_test.go index c0013b1b1..096ec3269 100644 --- a/internal/api/udsapi/server_test.go +++ b/internal/api/udsapi/server_test.go @@ -100,8 +100,8 @@ func TestNewRequiresSessionManagerObserverAndWorkspaceResolver(t *testing.T) { WithSessionManager(stubSessionManager{}), WithObserver(stubObserver{}), WithWorkspaceResolver(stubWorkspaceService{}), - ); err == nil { - t.Fatal("New() without skills registry error = nil, want non-nil") + ); err != nil { + t.Fatalf("New() without skills registry error = %v, want nil", err) } } diff --git a/internal/cli/client.go b/internal/cli/client.go index 301478a2b..03e747e4a 100644 --- a/internal/cli/client.go +++ b/internal/cli/client.go @@ -1,7 +1,6 @@ package cli import ( - "bufio" "bytes" "context" "encoding/json" @@ -17,11 +16,11 @@ import ( "github.com/pedronauck/agh/internal/api/contract" "github.com/pedronauck/agh/internal/memory" + "github.com/pedronauck/agh/internal/sse" ) const ( baseURL = "http://unix" - maxSSELineBytes = 1024 * 1024 defaultUserAgentName = "agh-cli" ) @@ -173,21 +172,15 @@ type IdentityRecord struct { } // SSEEvent is one parsed server-sent event frame. -type SSEEvent struct { - ID string - Event string - Data json.RawMessage -} - -// SSEHandler consumes parsed SSE frames. -type SSEHandler func(SSEEvent) error +type SSEEvent = sse.Event +type SSEHandler = sse.Handler type unixSocketClient struct { socketPath string httpClient *http.Client } -var errStopSSE = errors.New("cli: stop sse stream") +var errStopSSE = sse.ErrStop // NewClient constructs a daemon client that talks HTTP over a Unix domain socket. func NewClient(socketPath string) (DaemonClient, error) { @@ -549,69 +542,7 @@ func (c *unixSocketClient) doRequest(ctx context.Context, method string, path st } func decodeSSE(ctx context.Context, body io.Reader, handler SSEHandler) error { - scanner := bufio.NewScanner(body) - scanner.Buffer(make([]byte, 0, 64*1024), maxSSELineBytes) - - event := SSEEvent{} - dataLines := make([]string, 0, 4) - emit := func() error { - if event.ID == "" && event.Event == "" && len(dataLines) == 0 { - return nil - } - if len(dataLines) > 0 { - event.Data = json.RawMessage(strings.Join(dataLines, "\n")) - } - err := handler(event) - event = SSEEvent{} - dataLines = dataLines[:0] - return err - } - - for scanner.Scan() { - if err := ctx.Err(); err != nil { - return err - } - - line := scanner.Text() - if line == "" { - if err := emit(); err != nil { - if errors.Is(err, errStopSSE) { - return nil - } - return err - } - continue - } - if strings.HasPrefix(line, ":") { - continue - } - - field, value, found := strings.Cut(line, ":") - if !found { - continue - } - value = strings.TrimPrefix(value, " ") - - switch field { - case "id": - event.ID = value - case "event": - event.Event = value - case "data": - dataLines = append(dataLines, value) - } - } - - if err := scanner.Err(); err != nil { - return fmt.Errorf("cli: read sse stream: %w", err) - } - if err := emit(); err != nil { - if errors.Is(err, errStopSSE) { - return nil - } - return err - } - return nil + return sse.Decode(ctx, body, handler) } func sessionListValues(query SessionListQuery) url.Values { diff --git a/internal/cli/install_test.go b/internal/cli/install_test.go index 98cdb0ccf..4ce98519c 100644 --- a/internal/cli/install_test.go +++ b/internal/cli/install_test.go @@ -72,10 +72,7 @@ func TestInstallCommandWritesBootstrapConfigAndAgent(t *testing.T) { func TestBuildInstallWizardInputAndBundleFormats(t *testing.T) { t.Parallel() - cfg, err := aghconfig.Default() - if err != nil { - t.Fatalf("aghconfig.Default() error = %v", err) - } + cfg := aghconfig.DefaultWithHome(aghconfig.HomePaths{}) cfg.Defaults.Provider = "codex" cfg.Providers["custom"] = aghconfig.ProviderConfig{DefaultModel: "custom-model"} diff --git a/internal/cli/skill.go b/internal/cli/skill.go index 6dcf99d21..684a36421 100644 --- a/internal/cli/skill.go +++ b/internal/cli/skill.go @@ -1,31 +1,11 @@ package cli import ( - "archive/tar" - "compress/gzip" - "context" - "encoding/json" - "errors" - "fmt" - "io" "io/fs" - "os" - "path" - "path/filepath" "regexp" - "sort" - "strconv" "strings" - "time" - aghconfig "github.com/pedronauck/agh/internal/config" "github.com/pedronauck/agh/internal/skills" - skillbundled "github.com/pedronauck/agh/internal/skills/bundled" - "github.com/pedronauck/agh/internal/skills/marketplace" - "github.com/pedronauck/agh/internal/skills/marketplace/clawhub" - "github.com/pedronauck/agh/internal/store/globaldb" - workspacepkg "github.com/pedronauck/agh/internal/workspace" - "github.com/spf13/cobra" ) const ( @@ -109,1670 +89,3 @@ type skillUpdateItem struct { Path string `json:"path"` Status string `json:"status"` } - -type installedMarketplaceSkill struct { - Name string - Dir string - FilePath string - Provenance skills.Provenance -} - -func newSkillCommand(deps commandDeps) *cobra.Command { - cmd := &cobra.Command{ - Use: "skill", - Short: "Manage local AgentSkills", - } - - cmd.AddCommand(newSkillListCommand(deps)) - cmd.AddCommand(newSkillViewCommand(deps)) - cmd.AddCommand(newSkillInfoCommand(deps)) - cmd.AddCommand(newSkillSearchCommand(deps)) - cmd.AddCommand(newSkillInstallCommand(deps)) - cmd.AddCommand(newSkillRemoveCommand(deps)) - cmd.AddCommand(newSkillUpdateCommand(deps)) - cmd.AddCommand(newSkillCreateCommand(deps)) - return cmd -} - -func newSkillListCommand(deps commandDeps) *cobra.Command { - var sourceFilter string - - cmd := &cobra.Command{ - Use: "list", - Short: "List locally available skills", - Args: cobra.NoArgs, - RunE: func(cmd *cobra.Command, _ []string) error { - ctx, err := loadSkillCommandContext(cmd.Context(), deps) - if err != nil { - return err - } - - items, err := skillListItems(ctx.skills, sourceFilter) - if err != nil { - return err - } - - return writeCommandOutput(cmd, skillListBundle(items)) - }, - } - cmd.Flags().StringVar(&sourceFilter, "source", "", "Filter by source: bundled, user, additional, or workspace") - return cmd -} - -func newSkillViewCommand(deps commandDeps) *cobra.Command { - var filePath string - - cmd := &cobra.Command{ - Use: "view ", - Short: "Read a skill or one of its resource files", - Args: cobra.ExactArgs(1), - RunE: func(cmd *cobra.Command, args []string) error { - ctx, err := loadSkillCommandContext(cmd.Context(), deps) - if err != nil { - return err - } - - skill, err := findSkillByName(ctx.skills, args[0]) - if err != nil { - return err - } - - if strings.TrimSpace(filePath) != "" { - content, err := readSkillResource(skill, ctx.bundledFS, filePath) - if err != nil { - return err - } - - item := skillViewItem{ - Name: skill.Meta.Name, - Source: skillSourceLabel(skill.Source), - Path: skill.FilePath, - File: strings.TrimSpace(filePath), - Content: content, - } - return writeCommandOutput(cmd, skillViewBundle(item, content)) - } - - resources, err := listSkillResources(skill, ctx.bundledFS) - if err != nil { - return err - } - - content, err := ctx.registry.LoadContent(cmd.Context(), skill) - if err != nil { - return err - } - - rendered, err := renderSkillXML(skill, content, resources) - if err != nil { - return err - } - - item := skillViewItem{ - Name: skill.Meta.Name, - Source: skillSourceLabel(skill.Source), - Path: skill.FilePath, - Content: rendered, - Resources: resources, - } - return writeCommandOutput(cmd, skillViewBundle(item, rendered)) - }, - } - cmd.Flags().StringVar(&filePath, "file", "", "Relative file path inside the skill directory") - return cmd -} - -func newSkillInfoCommand(deps commandDeps) *cobra.Command { - return &cobra.Command{ - Use: "info ", - Short: "Show detailed metadata for one skill", - Args: cobra.ExactArgs(1), - RunE: func(cmd *cobra.Command, args []string) error { - ctx, err := loadSkillCommandContext(cmd.Context(), deps) - if err != nil { - return err - } - - skill, err := findSkillByName(ctx.skills, args[0]) - if err != nil { - return err - } - - resources, err := listSkillResources(skill, ctx.bundledFS) - if err != nil { - return err - } - - item := skillInfoItem{ - Name: skill.Meta.Name, - Description: skill.Meta.Description, - Version: skill.Meta.Version, - Source: skillSourceLabel(skill.Source), - Path: skill.FilePath, - Enabled: skill.Enabled, - Metadata: cloneMetadata(skill.Meta.Metadata), - Resources: resources, - } - - return writeCommandOutput(cmd, skillInfoBundle(item)) - }, - } -} - -func newSkillCreateCommand(deps commandDeps) *cobra.Command { - return &cobra.Command{ - Use: "create [name]", - Short: "Scaffold a new workspace skill", - Args: cobra.MaximumNArgs(1), - RunE: func(cmd *cobra.Command, args []string) error { - name := defaultSkillName - if len(args) == 1 { - name = args[0] - } - - skillName, err := normalizeSkillName(name) - if err != nil { - return err - } - - workspace, err := resolveCLIWorkspaceRoot(deps) - if err != nil { - return err - } - - skillDir := filepath.Join(workspace, aghconfig.DirName, aghconfig.SkillsDirName, skillName) - if _, err := os.Stat(skillDir); err == nil { - return fmt.Errorf("skill %q already exists at %s", skillName, skillDir) - } else if !errors.Is(err, os.ErrNotExist) { - return fmt.Errorf("cli: inspect skill directory %q: %w", skillDir, err) - } - - if err := os.MkdirAll(skillDir, 0o755); err != nil { - return fmt.Errorf("cli: create skill directory %q: %w", skillDir, err) - } - - skillFilePath := filepath.Join(skillDir, skillMarkdownFileName) - content := defaultSkillTemplate(skillName) - if err := os.WriteFile(skillFilePath, []byte(content), 0o644); err != nil { - return fmt.Errorf("cli: write skill template %q: %w", skillFilePath, err) - } - - if _, err := skills.ParseSkillFile(skillFilePath); err != nil { - return fmt.Errorf("cli: validate generated skill %q: %w", skillFilePath, err) - } - - return writeCommandOutput(cmd, skillCreateBundle(skillCreateItem{ - Name: skillName, - Path: skillDir, - File: skillFilePath, - Source: "workspace", - Status: "created", - })) - }, - } -} - -func newSkillSearchCommand(deps commandDeps) *cobra.Command { - limit := defaultMarketplaceSearchLim - - cmd := &cobra.Command{ - Use: "search ", - Short: "Search marketplace skills", - Args: cobra.ExactArgs(1), - RunE: func(cmd *cobra.Command, args []string) error { - if limit <= 0 { - return fmt.Errorf("cli: search limit must be positive: %d", limit) - } - - _, registry, _, err := loadMarketplaceRegistry(deps) - if err != nil { - return err - } - - results, err := registry.Search(cmd.Context(), args[0], marketplace.SearchOpts{Limit: limit}) - if err != nil { - return err - } - - return writeCommandOutput(cmd, skillSearchBundle(results)) - }, - } - cmd.Flags().IntVar(&limit, "limit", defaultMarketplaceSearchLim, "Maximum number of marketplace results to return") - return cmd -} - -func newSkillInstallCommand(deps commandDeps) *cobra.Command { - return &cobra.Command{ - Use: "install ", - Short: "Install a marketplace skill", - Args: cobra.ExactArgs(1), - RunE: func(cmd *cobra.Command, args []string) error { - slug, err := normalizeSkillSlug(args[0]) - if err != nil { - return err - } - - runtime, registry, registryName, err := loadMarketplaceRegistry(deps) - if err != nil { - return err - } - - item, err := installMarketplaceSkill(cmd.Context(), runtime, registry, registryName, slug, false) - if err != nil { - return err - } - - return writeCommandOutput(cmd, skillInstallBundle(item)) - }, - } -} - -func newSkillRemoveCommand(deps commandDeps) *cobra.Command { - return &cobra.Command{ - Use: "remove ", - Short: "Remove an installed marketplace skill", - Args: cobra.ExactArgs(1), - RunE: func(cmd *cobra.Command, args []string) error { - name, err := normalizeSkillName(args[0]) - if err != nil { - return err - } - - runtime, err := loadRuntimeContext(deps) - if err != nil { - return err - } - - item, err := removeMarketplaceSkill(runtime.HomePaths.SkillsDir, name) - if err != nil { - return err - } - - return writeCommandOutput(cmd, skillRemoveBundle(item)) - }, - } -} - -func newSkillUpdateCommand(deps commandDeps) *cobra.Command { - updateAll := false - - cmd := &cobra.Command{ - Use: "update [name]", - Short: "Update installed marketplace skills", - Args: func(_ *cobra.Command, args []string) error { - if updateAll && len(args) > 0 { - return errors.New("cli: update accepts either a skill name or --all, not both") - } - if !updateAll && len(args) != 1 { - return errors.New("cli: update requires a skill name unless --all is set") - } - if len(args) == 1 { - _, err := normalizeSkillName(args[0]) - return err - } - return nil - }, - RunE: func(cmd *cobra.Command, args []string) error { - runtime, registry, registryName, err := loadMarketplaceRegistry(deps) - if err != nil { - return err - } - - items, err := updateMarketplaceSkills(cmd.Context(), runtime, registry, registryName, args, updateAll) - if err != nil { - return err - } - - return writeCommandOutput(cmd, skillUpdateBundle(items)) - }, - } - cmd.Flags().BoolVar(&updateAll, "all", false, "Update every installed marketplace skill") - return cmd -} - -func loadMarketplaceRegistry(deps commandDeps) (runtimeContext, marketplace.Registry, string, error) { - runtime, err := loadRuntimeContext(deps) - if err != nil { - return runtimeContext{}, nil, "", err - } - - registryCfg := runtime.Config.Skills.Marketplace - registryName := strings.ToLower(strings.TrimSpace(registryCfg.Registry)) - if registryName == "" { - registryName = defaultMarketplaceRegistry - } - - switch registryName { - case defaultMarketplaceRegistry: - return runtime, clawhub.NewClient(registryCfg.BaseURL), registryName, nil - default: - return runtimeContext{}, nil, "", fmt.Errorf("cli: unsupported marketplace registry %q", registryCfg.Registry) - } -} - -func normalizeSkillSlug(slug string) (string, error) { - trimmed := strings.TrimSpace(slug) - if trimmed == "" { - return "", errors.New("skill slug is required") - } - if !validSkillSlugPattern.MatchString(trimmed) { - return "", errors.New(`skill slug must match "@author/name"`) - } - return trimmed, nil -} - -func installMarketplaceSkill( - ctx context.Context, - runtime runtimeContext, - registry marketplace.Registry, - registryName string, - slug string, - replaceExisting bool, -) (skillInstallItem, error) { - if err := os.MkdirAll(runtime.HomePaths.SkillsDir, 0o755); err != nil { - return skillInstallItem{}, fmt.Errorf("cli: create skills directory %q: %w", runtime.HomePaths.SkillsDir, err) - } - - archive, err := registry.Download(ctx, slug) - if err != nil { - return skillInstallItem{}, err - } - if archive == nil { - return skillInstallItem{}, fmt.Errorf("cli: marketplace download returned no archive for %q", slug) - } - if archive.Data == nil { - return skillInstallItem{}, fmt.Errorf("cli: marketplace download returned no archive stream for %q", slug) - } - defer func() { - _ = archive.Data.Close() - }() - - tempRoot, err := os.MkdirTemp(runtime.HomePaths.SkillsDir, ".agh-skill-install-*") - if err != nil { - return skillInstallItem{}, fmt.Errorf("cli: create temporary install directory: %w", err) - } - defer func() { - _ = os.RemoveAll(tempRoot) - }() - - if err := extractMarketplaceArchive(archive.Data, tempRoot); err != nil { - return skillInstallItem{}, fmt.Errorf("cli: extract skill archive for %q: %w", slug, err) - } - - skillFile, err := locateExtractedSkillFile(tempRoot) - if err != nil { - return skillInstallItem{}, fmt.Errorf("cli: locate extracted skill for %q: %w", slug, err) - } - - parsedSkill, err := skills.ParseSkillFile(skillFile) - if err != nil { - return skillInstallItem{}, fmt.Errorf("cli: parse extracted skill for %q: %w", slug, err) - } - - content, err := skills.ReadSkillContent(skillFile) - if err != nil { - return skillInstallItem{}, fmt.Errorf("cli: read extracted skill content for %q: %w", slug, err) - } - - if critical := criticalWarnings(skills.VerifyContent(content)); len(critical) > 0 { - return skillInstallItem{}, fmt.Errorf( - "cli: install blocked for %q due to critical verification findings: %s", - slug, - strings.Join(critical, "; "), - ) - } - - hash, err := skills.ComputeDirectoryHash(parsedSkill.Dir) - if err != nil { - return skillInstallItem{}, fmt.Errorf("cli: compute extracted skill hash for %q: %w", slug, err) - } - - version := firstNonEmpty(archive.Version, parsedSkill.Meta.Version) - targetDir, err := pathWithinRoot(runtime.HomePaths.SkillsDir, parsedSkill.Meta.Name) - if err != nil { - return skillInstallItem{}, fmt.Errorf("cli: resolve install path for %q: %w", parsedSkill.Meta.Name, err) - } - - if err := skills.WriteSidecar(parsedSkill.Dir, skills.Provenance{ - Hash: hash, - Registry: registryName, - Slug: slug, - Version: version, - InstalledAt: time.Now().UTC(), - }); err != nil { - return skillInstallItem{}, fmt.Errorf("cli: write provenance for %q: %w", slug, err) - } - - if err := moveInstalledSkillDir(parsedSkill.Dir, targetDir, replaceExisting); err != nil { - return skillInstallItem{}, err - } - - return skillInstallItem{ - Name: parsedSkill.Meta.Name, - Slug: slug, - Version: version, - Registry: registryName, - Path: targetDir, - Hash: hash, - Status: "installed", - }, nil -} - -func updateMarketplaceSkills( - ctx context.Context, - runtime runtimeContext, - registry marketplace.Registry, - registryName string, - args []string, - updateAll bool, -) ([]skillUpdateItem, error) { - if updateAll { - installedSkills, err := listInstalledMarketplaceSkills(runtime.HomePaths.SkillsDir) - if err != nil { - return nil, err - } - - items := make([]skillUpdateItem, 0, len(installedSkills)) - for _, installed := range installedSkills { - item, err := updateMarketplaceSkill(ctx, runtime, registry, registryName, installed) - if err != nil { - return nil, err - } - items = append(items, item) - } - return items, nil - } - - name, err := normalizeSkillName(args[0]) - if err != nil { - return nil, err - } - - installed, err := findInstalledMarketplaceSkill(runtime.HomePaths.SkillsDir, name) - if err != nil { - return nil, err - } - - item, err := updateMarketplaceSkill(ctx, runtime, registry, registryName, installed) - if err != nil { - return nil, err - } - return []skillUpdateItem{item}, nil -} - -func updateMarketplaceSkill( - ctx context.Context, - runtime runtimeContext, - registry marketplace.Registry, - registryName string, - installed installedMarketplaceSkill, -) (skillUpdateItem, error) { - slug := strings.TrimSpace(installed.Provenance.Slug) - if slug == "" { - return skillUpdateItem{}, fmt.Errorf("cli: marketplace skill %q is missing registry slug metadata", installed.Name) - } - - detail, err := registry.Info(ctx, slug) - if err != nil { - return skillUpdateItem{}, err - } - - currentVersion := strings.TrimSpace(installed.Provenance.Version) - latestVersion := "" - if detail != nil { - latestVersion = strings.TrimSpace(detail.Version) - } - if !versionIsNewer(currentVersion, latestVersion) { - return skillUpdateItem{ - Name: installed.Name, - Slug: slug, - CurrentVersion: currentVersion, - LatestVersion: firstNonEmpty(latestVersion, currentVersion), - Path: installed.Dir, - Status: "already up to date", - }, nil - } - - installedItem, err := installMarketplaceSkill(ctx, runtime, registry, registryName, slug, true) - if err != nil { - return skillUpdateItem{}, err - } - - return skillUpdateItem{ - Name: installedItem.Name, - Slug: slug, - CurrentVersion: currentVersion, - LatestVersion: firstNonEmpty(installedItem.Version, latestVersion), - Path: installedItem.Path, - Status: "updated", - }, nil -} - -func removeMarketplaceSkill(skillsDir string, name string) (skillRemoveItem, error) { - installed, err := findInstalledMarketplaceSkill(skillsDir, name) - if err != nil { - return skillRemoveItem{}, err - } - - if err := os.RemoveAll(installed.Dir); err != nil { - return skillRemoveItem{}, fmt.Errorf("cli: remove marketplace skill %q: %w", name, err) - } - - return skillRemoveItem{ - Name: installed.Name, - Slug: installed.Provenance.Slug, - Path: installed.Dir, - Status: "removed", - }, nil -} - -func findInstalledMarketplaceSkill(skillsDir string, name string) (installedMarketplaceSkill, error) { - skillDir, err := pathWithinRoot(skillsDir, name) - if err != nil { - return installedMarketplaceSkill{}, fmt.Errorf("cli: resolve skill path for %q: %w", name, err) - } - - info, err := os.Stat(skillDir) - if err != nil { - if errors.Is(err, os.ErrNotExist) { - return installedMarketplaceSkill{}, fmt.Errorf("skill %q not found", name) - } - return installedMarketplaceSkill{}, fmt.Errorf("cli: inspect skill directory %q: %w", skillDir, err) - } - if !info.IsDir() { - return installedMarketplaceSkill{}, fmt.Errorf("skill %q is not a directory", name) - } - - hasSidecar, err := skills.HasSidecar(skillDir) - if err != nil { - return installedMarketplaceSkill{}, err - } - if !hasSidecar { - return installedMarketplaceSkill{}, fmt.Errorf("skill %q is not a marketplace-installed skill", name) - } - - return readInstalledMarketplaceSkill(skillDir) -} - -func listInstalledMarketplaceSkills(skillsDir string) ([]installedMarketplaceSkill, error) { - entries, err := os.ReadDir(skillsDir) - if err != nil { - if errors.Is(err, os.ErrNotExist) { - return []installedMarketplaceSkill{}, nil - } - return nil, fmt.Errorf("cli: read installed skills directory %q: %w", skillsDir, err) - } - - items := make([]installedMarketplaceSkill, 0, len(entries)) - for _, entry := range entries { - if !entry.IsDir() { - continue - } - - skillDir, err := pathWithinRoot(skillsDir, entry.Name()) - if err != nil { - return nil, fmt.Errorf("cli: resolve installed skill path for %q: %w", entry.Name(), err) - } - - hasSidecar, err := skills.HasSidecar(skillDir) - if err != nil { - return nil, err - } - if !hasSidecar { - continue - } - - item, err := readInstalledMarketplaceSkill(skillDir) - if err != nil { - return nil, err - } - items = append(items, item) - } - - sort.Slice(items, func(i, j int) bool { - return items[i].Name < items[j].Name - }) - return items, nil -} - -func readInstalledMarketplaceSkill(skillDir string) (installedMarketplaceSkill, error) { - provenance, err := skills.ReadSidecar(skillDir) - if err != nil { - return installedMarketplaceSkill{}, err - } - if provenance == nil { - return installedMarketplaceSkill{}, fmt.Errorf("cli: missing provenance for %q", skillDir) - } - - skillFile, err := pathWithinRoot(skillDir, skillMarkdownFileName) - if err != nil { - return installedMarketplaceSkill{}, fmt.Errorf("cli: resolve skill file in %q: %w", skillDir, err) - } - - parsedSkill, err := skills.ParseSkillFile(skillFile) - if err != nil { - return installedMarketplaceSkill{}, err - } - - return installedMarketplaceSkill{ - Name: parsedSkill.Meta.Name, - Dir: parsedSkill.Dir, - FilePath: parsedSkill.FilePath, - Provenance: *provenance, - }, nil -} - -func extractMarketplaceArchive(reader io.Reader, destRoot string) error { - if strings.TrimSpace(destRoot) == "" { - return errors.New("destination root is required") - } - if err := os.MkdirAll(destRoot, 0o755); err != nil { - return fmt.Errorf("create destination root %q: %w", destRoot, err) - } - - gzipReader, err := gzip.NewReader(reader) - if err != nil { - return fmt.Errorf("open gzip stream: %w", err) - } - defer func() { - _ = gzipReader.Close() - }() - - tarReader := tar.NewReader(gzipReader) - for { - header, err := tarReader.Next() - if errors.Is(err, io.EOF) { - return nil - } - if err != nil { - return fmt.Errorf("read tar entry: %w", err) - } - - entryName, err := cleanArchiveEntryPath(header.Name) - if err != nil { - return err - } - targetPath, err := pathWithinRoot(destRoot, filepath.FromSlash(entryName)) - if err != nil { - return fmt.Errorf("resolve archive entry %q: %w", header.Name, err) - } - - switch header.Typeflag { - case tar.TypeDir: - if err := os.MkdirAll(targetPath, 0o755); err != nil { - return fmt.Errorf("create archive directory %q: %w", targetPath, err) - } - case tar.TypeReg: - if err := os.MkdirAll(filepath.Dir(targetPath), 0o755); err != nil { - return fmt.Errorf("create archive parent %q: %w", filepath.Dir(targetPath), err) - } - - file, err := os.OpenFile(targetPath, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0o644) - if err != nil { - return fmt.Errorf("create archive file %q: %w", targetPath, err) - } - if _, err := io.Copy(file, tarReader); err != nil { - _ = file.Close() - return fmt.Errorf("write archive file %q: %w", targetPath, err) - } - if err := file.Close(); err != nil { - return fmt.Errorf("close archive file %q: %w", targetPath, err) - } - default: - return fmt.Errorf("unsupported archive entry type %d for %q", header.Typeflag, header.Name) - } - } -} - -func locateExtractedSkillFile(root string) (string, error) { - var matches []string - - err := filepath.WalkDir(root, func(current string, entry fs.DirEntry, walkErr error) error { - if walkErr != nil { - return walkErr - } - if entry.IsDir() { - return nil - } - if entry.Name() != skillMarkdownFileName { - return nil - } - matches = append(matches, current) - if len(matches) > 1 { - return errors.New("multiple SKILL.md files found in archive") - } - return nil - }) - if err != nil { - return "", err - } - if len(matches) == 0 { - return "", errors.New("archive did not contain SKILL.md") - } - return matches[0], nil -} - -func moveInstalledSkillDir(extractedDir string, targetDir string, replaceExisting bool) error { - if !replaceExisting { - if _, err := os.Stat(targetDir); err == nil { - return fmt.Errorf("skill %q already exists at %s", filepath.Base(targetDir), targetDir) - } else if !errors.Is(err, os.ErrNotExist) { - return fmt.Errorf("cli: inspect target skill directory %q: %w", targetDir, err) - } - - if err := os.Rename(extractedDir, targetDir); err != nil { - return fmt.Errorf("cli: install skill into %q: %w", targetDir, err) - } - return nil - } - - if _, err := os.Stat(targetDir); err != nil { - if !errors.Is(err, os.ErrNotExist) { - return fmt.Errorf("cli: inspect target skill directory %q: %w", targetDir, err) - } - if err := os.Rename(extractedDir, targetDir); err != nil { - return fmt.Errorf("cli: install updated skill into %q: %w", targetDir, err) - } - return nil - } - - backupDir := fmt.Sprintf("%s.backup-%d", targetDir, time.Now().UTC().UnixNano()) - if err := os.Rename(targetDir, backupDir); err != nil { - return fmt.Errorf("cli: stage existing skill backup %q: %w", targetDir, err) - } - - if err := os.Rename(extractedDir, targetDir); err != nil { - revertErr := os.Rename(backupDir, targetDir) - if revertErr != nil { - return errors.Join( - fmt.Errorf("cli: install updated skill into %q: %w", targetDir, err), - fmt.Errorf("cli: restore original skill from %q: %w", backupDir, revertErr), - ) - } - return fmt.Errorf("cli: install updated skill into %q: %w", targetDir, err) - } - - if err := os.RemoveAll(backupDir); err != nil { - return fmt.Errorf("cli: remove backup skill directory %q: %w", backupDir, err) - } - return nil -} - -func cleanArchiveEntryPath(entry string) (string, error) { - cleaned := path.Clean(strings.TrimSpace(strings.ReplaceAll(entry, "\\", "/"))) - switch { - case cleaned == ".", cleaned == "": - return "", errors.New("archive entry path is required") - case strings.HasPrefix(cleaned, "/"): - return "", fmt.Errorf("archive entry %q must be relative", entry) - case cleaned == "..", strings.HasPrefix(cleaned, "../"): - return "", fmt.Errorf("archive entry %q escapes the extraction root", entry) - default: - return cleaned, nil - } -} - -func pathWithinRoot(root string, child string) (string, error) { - absRoot, err := filepath.Abs(strings.TrimSpace(root)) - if err != nil { - return "", fmt.Errorf("resolve root %q: %w", root, err) - } - targetPath := filepath.Join(absRoot, child) - absTarget, err := filepath.Abs(targetPath) - if err != nil { - return "", fmt.Errorf("resolve target %q: %w", targetPath, err) - } - relative, err := filepath.Rel(absRoot, absTarget) - if err != nil { - return "", fmt.Errorf("resolve target %q within %q: %w", absTarget, absRoot, err) - } - if relative == ".." || strings.HasPrefix(relative, ".."+string(filepath.Separator)) { - return "", errors.New("path must stay within the root directory") - } - return absTarget, nil -} - -func criticalWarnings(warnings []skills.Warning) []string { - items := make([]string, 0, len(warnings)) - for _, warning := range warnings { - if warning.Severity != skills.SeverityCritical { - continue - } - items = append(items, firstNonEmpty(warning.Message, warning.Pattern)) - } - return items -} - -func versionIsNewer(current string, latest string) bool { - normalizedCurrent := normalizeVersion(current) - normalizedLatest := normalizeVersion(latest) - if normalizedLatest == "" { - return false - } - if normalizedCurrent == "" { - return true - } - - currentParts, currentNumeric := parseVersionParts(normalizedCurrent) - latestParts, latestNumeric := parseVersionParts(normalizedLatest) - if currentNumeric && latestNumeric { - for i := 0; i < max(len(currentParts), len(latestParts)); i++ { - currentPart := versionPartAt(currentParts, i) - latestPart := versionPartAt(latestParts, i) - switch { - case latestPart > currentPart: - return true - case latestPart < currentPart: - return false - } - } - return false - } - - return normalizedLatest > normalizedCurrent -} - -func normalizeVersion(version string) string { - trimmed := strings.TrimSpace(version) - trimmed = strings.TrimPrefix(trimmed, "v") - trimmed = strings.TrimPrefix(trimmed, "V") - return trimmed -} - -func parseVersionParts(version string) ([]int, bool) { - segments := strings.Split(version, ".") - if len(segments) == 0 { - return nil, false - } - - parts := make([]int, 0, len(segments)) - for _, segment := range segments { - if segment == "" { - return nil, false - } - value, err := strconv.Atoi(segment) - if err != nil { - return nil, false - } - parts = append(parts, value) - } - return parts, true -} - -func versionPartAt(parts []int, index int) int { - if index < 0 || index >= len(parts) { - return 0 - } - return parts[index] -} - -func loadSkillCommandContext(ctx context.Context, deps commandDeps) (skillCommandContext, error) { - runtime, err := loadRuntimeContext(deps) - if err != nil { - return skillCommandContext{}, err - } - - workspace, err := resolveCLIWorkspaceRoot(deps) - if err != nil { - return skillCommandContext{}, err - } - - userAgentsDir, err := aghconfig.ResolveUserAgentsSkillsDir(deps.getenv) - if err != nil { - return skillCommandContext{}, err - } - - registry := skills.NewRegistry(skills.RegistryConfig{ - BundledFS: skillbundled.FS(), - UserSkillsDir: runtime.HomePaths.SkillsDir, - UserAgentsDir: userAgentsDir, - DisabledSkills: append([]string(nil), runtime.Config.Skills.DisabledSkills...), - }) - if err := registry.LoadAll(ctx); err != nil { - return skillCommandContext{}, err - } - - resolvedWorkspace, err := resolveSkillWorkspace(ctx, runtime, workspace) - if err != nil { - return skillCommandContext{}, err - } - - skillList, err := registry.ForWorkspace(ctx, resolvedWorkspace) - if err != nil { - return skillCommandContext{}, err - } - - return skillCommandContext{ - workspace: workspace, - bundledFS: skillbundled.FS(), - registry: registry, - skills: skillList, - }, nil -} - -func resolveSkillWorkspace(ctx context.Context, runtime runtimeContext, workspaceRoot string) (workspacepkg.ResolvedWorkspace, error) { - fallback, err := cliResolvedWorkspace(workspaceRoot) - if err != nil { - return workspacepkg.ResolvedWorkspace{}, err - } - - if strings.TrimSpace(workspaceRoot) == "" { - return fallback, nil - } - - if _, err := os.Stat(runtime.HomePaths.DatabaseFile); err != nil { - if errors.Is(err, os.ErrNotExist) { - return fallback, nil - } - return workspacepkg.ResolvedWorkspace{}, fmt.Errorf("cli: stat workspace database %q: %w", runtime.HomePaths.DatabaseFile, err) - } - - resolved, err := resolveRegisteredSkillWorkspace(ctx, runtime, workspaceRoot) - if err != nil { - if errors.Is(err, workspacepkg.ErrWorkspaceNotFound) { - return fallback, nil - } - return workspacepkg.ResolvedWorkspace{}, err - } - - return resolved, nil -} - -func resolveRegisteredSkillWorkspace(ctx context.Context, runtime runtimeContext, workspaceRoot string) (resolved workspacepkg.ResolvedWorkspace, err error) { - globalDB, err := globaldb.OpenGlobalDB(ctx, runtime.HomePaths.DatabaseFile) - if err != nil { - return workspacepkg.ResolvedWorkspace{}, fmt.Errorf("cli: open workspace database %q: %w", runtime.HomePaths.DatabaseFile, err) - } - defer func() { - if closeErr := globalDB.Close(ctx); closeErr != nil { - closeErr = fmt.Errorf("cli: close workspace database %q: %w", runtime.HomePaths.DatabaseFile, closeErr) - if err == nil { - err = closeErr - return - } - err = errors.Join(err, closeErr) - } - }() - - resolver, err := workspacepkg.NewResolver( - globalDB, - workspacepkg.WithHomePaths(runtime.HomePaths), - workspacepkg.WithConfigLoader(func(rootDir string) (aghconfig.Config, error) { - return aghconfig.LoadForHome(runtime.HomePaths, aghconfig.WithWorkspaceRoot(rootDir)) - }), - ) - if err != nil { - return workspacepkg.ResolvedWorkspace{}, fmt.Errorf("cli: create workspace resolver: %w", err) - } - - resolved, err = resolver.Resolve(ctx, workspaceRoot) - if err != nil { - return workspacepkg.ResolvedWorkspace{}, fmt.Errorf("cli: resolve workspace %q: %w", workspaceRoot, err) - } - - return resolved, nil -} - -func cliResolvedWorkspace(root string) (workspacepkg.ResolvedWorkspace, error) { - workspaceRoot := strings.TrimSpace(root) - if workspaceRoot == "" { - return workspacepkg.ResolvedWorkspace{}, nil - } - - skillRoots, err := os.ReadDir(filepath.Join(workspaceRoot, aghconfig.DirName, aghconfig.SkillsDirName)) - if err != nil { - if errors.Is(err, os.ErrNotExist) { - return workspacepkg.ResolvedWorkspace{ - Workspace: workspacepkg.Workspace{RootDir: workspaceRoot}, - }, nil - } - return workspacepkg.ResolvedWorkspace{}, fmt.Errorf("cli: read workspace skills %q: %w", workspaceRoot, err) - } - - skillPaths := make([]workspacepkg.SkillPath, 0, len(skillRoots)) - for _, entry := range skillRoots { - if !entry.IsDir() { - continue - } - - skillDir := filepath.Join(workspaceRoot, aghconfig.DirName, aghconfig.SkillsDirName, entry.Name()) - skillFile := filepath.Join(skillDir, skillMarkdownFileName) - if _, err := os.Stat(skillFile); err != nil { - if errors.Is(err, os.ErrNotExist) { - continue - } - return workspacepkg.ResolvedWorkspace{}, fmt.Errorf("cli: inspect workspace skill %q: %w", skillFile, err) - } - - skillPaths = append(skillPaths, workspacepkg.SkillPath{ - Dir: skillDir, - Source: "workspace", - }) - } - - return workspacepkg.ResolvedWorkspace{ - Workspace: workspacepkg.Workspace{RootDir: workspaceRoot}, - Skills: skillPaths, - }, nil -} - -func resolveCLIWorkspaceRoot(deps commandDeps) (string, error) { - workspace, err := currentWorkingDirectory(deps) - if err != nil { - return "", err - } - - absWorkspace, err := filepath.Abs(workspace) - if err != nil { - return "", fmt.Errorf("cli: resolve workspace root %q: %w", workspace, err) - } - return absWorkspace, nil -} - -func skillListItems(allSkills []*skills.Skill, sourceFilter string) ([]skillListItem, error) { - filter, err := normalizeSkillSourceFilter(sourceFilter) - if err != nil { - return nil, err - } - - items := make([]skillListItem, 0, len(allSkills)) - for _, skill := range allSkills { - if skill == nil { - continue - } - - source := skillSourceLabel(skill.Source) - if filter != "" && source != filter { - continue - } - - items = append(items, skillListItem{ - Name: skill.Meta.Name, - Description: skill.Meta.Description, - Source: source, - Enabled: skill.Enabled, - }) - } - - return items, nil -} - -func normalizeSkillSourceFilter(sourceFilter string) (string, error) { - filter := strings.ToLower(strings.TrimSpace(sourceFilter)) - switch filter { - case "": - return "", nil - case "bundled", "marketplace", "user", "additional", "workspace": - return filter, nil - case "agents", ".agents": - return "additional", nil - default: - return "", fmt.Errorf("cli: invalid skill source %q", sourceFilter) - } -} - -func findSkillByName(allSkills []*skills.Skill, name string) (*skills.Skill, error) { - skillName := strings.TrimSpace(name) - if skillName == "" { - return nil, errors.New("skill name is required") - } - - for _, skill := range allSkills { - if skill == nil { - continue - } - if skill.Meta.Name == skillName { - return skill, nil - } - } - - return nil, fmt.Errorf("skill %q not found", skillName) -} - -func listSkillResources(skill *skills.Skill, bundledFS fs.FS) ([]string, error) { - if skill == nil { - return nil, errors.New("skill is required") - } - - resources := make([]string, 0) - switch skill.Source { - case skills.SourceBundled: - if bundledFS == nil { - return nil, errors.New("bundled skills filesystem is required") - } - - root := strings.TrimSpace(skill.Dir) - if root == "" { - return []string{}, nil - } - - err := fs.WalkDir(bundledFS, root, func(resourcePath string, entry fs.DirEntry, walkErr error) error { - if walkErr != nil { - return walkErr - } - if entry.IsDir() { - return nil - } - - relative := strings.TrimPrefix(resourcePath, root+"/") - if resourcePath == root { - relative = skillMarkdownFileName - } - if relative == skillMarkdownFileName { - return nil - } - - resources = append(resources, relative) - return nil - }) - if err != nil { - return nil, fmt.Errorf("cli: list bundled skill resources for %q: %w", skill.Meta.Name, err) - } - default: - root := strings.TrimSpace(skill.Dir) - if root == "" { - return []string{}, nil - } - - err := filepath.WalkDir(root, func(resourcePath string, entry fs.DirEntry, walkErr error) error { - if walkErr != nil { - return walkErr - } - if entry.IsDir() { - return nil - } - - relative, err := filepath.Rel(root, resourcePath) - if err != nil { - return err - } - if filepath.Clean(relative) == skillMarkdownFileName { - return nil - } - - resources = append(resources, filepath.ToSlash(relative)) - return nil - }) - if err != nil { - return nil, fmt.Errorf("cli: list skill resources for %q: %w", skill.Meta.Name, err) - } - } - - sort.Strings(resources) - return resources, nil -} - -func readSkillResource(skill *skills.Skill, bundledFS fs.FS, relativePath string) (string, error) { - if skill == nil { - return "", errors.New("skill is required") - } - - switch skill.Source { - case skills.SourceBundled: - if bundledFS == nil { - return "", errors.New("bundled skills filesystem is required") - } - - cleanPath, err := cleanBundledSkillRelativePath(relativePath) - if err != nil { - return "", err - } - root := strings.TrimSpace(skill.Dir) - if root == "" { - return "", errors.New("skill directory is required") - } - - content, err := fs.ReadFile(bundledFS, path.Join(root, cleanPath)) - if err != nil { - return "", fmt.Errorf("cli: read bundled skill file %q: %w", cleanPath, err) - } - return string(content), nil - default: - cleanPath, err := cleanFilesystemSkillRelativePath(relativePath) - if err != nil { - return "", err - } - - root := strings.TrimSpace(skill.Dir) - if root == "" { - return "", errors.New("skill directory is required") - } - - targetPath := filepath.Join(root, cleanPath) - absRoot, err := filepath.Abs(root) - if err != nil { - return "", fmt.Errorf("cli: resolve skill directory %q: %w", root, err) - } - resolvedRoot, err := filepath.EvalSymlinks(absRoot) - if err != nil { - return "", fmt.Errorf("cli: resolve skill directory %q: %w", absRoot, err) - } - absTarget, err := filepath.Abs(targetPath) - if err != nil { - return "", fmt.Errorf("cli: resolve skill file %q: %w", targetPath, err) - } - resolvedTarget, err := filepath.EvalSymlinks(absTarget) - if err != nil { - return "", fmt.Errorf("cli: resolve skill file %q: %w", absTarget, err) - } - - relativeToRoot, err := filepath.Rel(resolvedRoot, resolvedTarget) - if err != nil { - return "", fmt.Errorf("cli: resolve skill file %q within %q: %w", resolvedTarget, resolvedRoot, err) - } - if relativeToRoot == ".." || strings.HasPrefix(relativeToRoot, ".."+string(filepath.Separator)) { - return "", errors.New("skill file path must stay within the skill directory") - } - - content, err := os.ReadFile(resolvedTarget) - if err != nil { - return "", fmt.Errorf("cli: read skill file %q: %w", cleanPath, err) - } - return string(content), nil - } -} - -func cleanBundledSkillRelativePath(relativePath string) (string, error) { - cleaned := path.Clean(strings.TrimSpace(strings.ReplaceAll(relativePath, "\\", "/"))) - switch { - case cleaned == ".", cleaned == "": - return "", errors.New("skill file path is required") - case strings.HasPrefix(cleaned, "/"): - return "", errors.New("skill file path must be relative") - case cleaned == "..", strings.HasPrefix(cleaned, "../"): - return "", errors.New("skill file path must stay within the skill directory") - default: - return cleaned, nil - } -} - -func cleanFilesystemSkillRelativePath(relativePath string) (string, error) { - cleaned := filepath.Clean(strings.TrimSpace(relativePath)) - switch { - case cleaned == ".", cleaned == "": - return "", errors.New("skill file path is required") - case filepath.IsAbs(cleaned): - return "", errors.New("skill file path must be relative") - case cleaned == "..", strings.HasPrefix(cleaned, ".."+string(filepath.Separator)): - return "", errors.New("skill file path must stay within the skill directory") - default: - return cleaned, nil - } -} - -func renderSkillXML(skill *skills.Skill, content string, resources []string) (string, error) { - if skill == nil { - return "", errors.New("skill is required") - } - - var builder strings.Builder - builder.WriteString(``) - builder.WriteString("\n") - builder.WriteString(content) - if !strings.HasSuffix(content, "\n") { - builder.WriteString("\n") - } - builder.WriteString("\n\n") - for _, resource := range resources { - builder.WriteString(" ") - builder.WriteString(skillXMLTextReplacer.Replace(resource)) - builder.WriteString("\n") - } - builder.WriteString("\n") - builder.WriteString("") - return builder.String(), nil -} - -func normalizeSkillName(name string) (string, error) { - trimmed := strings.TrimSpace(name) - switch { - case trimmed == "": - return "", errors.New("skill name is required") - case trimmed == ".", trimmed == "..": - return "", errors.New("skill name must not be a relative path segment") - case filepath.IsAbs(trimmed): - return "", errors.New("skill name must be relative") - case strings.Contains(trimmed, "/"), strings.Contains(trimmed, `\`): - return "", errors.New("skill name must not include path separators") - case !validSkillNamePattern.MatchString(trimmed): - return "", errors.New("skill name must contain only letters, numbers, dots, underscores, and hyphens") - default: - return trimmed, nil - } -} - -func defaultSkillTemplate(name string) string { - trimmedName := strings.TrimSpace(name) - if trimmedName == "" { - trimmedName = defaultSkillName - } - - return fmt.Sprintf(`--- -name: %q -description: Describe when to use this skill. ---- - -# %s - -Describe the workflow, constraints, and expected outcome for this skill. -`, trimmedName, titleizeSkillName(trimmedName)) -} - -func titleizeSkillName(name string) string { - parts := strings.FieldsFunc(name, func(r rune) bool { - return r == '-' || r == '_' || r == ' ' - }) - if len(parts) == 0 { - return "New Skill" - } - - titled := make([]string, 0, len(parts)) - for _, part := range parts { - if part == "" { - continue - } - - lower := strings.ToLower(part) - titled = append(titled, strings.ToUpper(lower[:1])+lower[1:]) - } - if len(titled) == 0 { - return "New Skill" - } - return strings.Join(titled, " ") -} - -func skillSourceLabel(source skills.SkillSource) string { - switch source { - case skills.SourceBundled: - return "bundled" - case skills.SourceMarketplace: - return "marketplace" - case skills.SourceUser: - return "user" - case skills.SourceAdditional: - return "additional" - case skills.SourceWorkspace: - return "workspace" - default: - return "unknown" - } -} - -func skillSearchBundle(items []marketplace.SkillListing) outputBundle { - return listBundle( - items, - items, - "Marketplace Skills", - []string{"Slug", "Name", "Description", "Author", "Version", "Downloads"}, - "skills", - []string{"slug", "name", "description", "author", "version", "downloads"}, - func(item marketplace.SkillListing) []string { - return []string{ - stringOrDash(item.Slug), - stringOrDash(item.Name), - stringOrDash(item.Description), - stringOrDash(item.Author), - stringOrDash(item.Version), - strconv.Itoa(item.Downloads), - } - }, - func(item marketplace.SkillListing) []string { - return []string{ - item.Slug, - item.Name, - item.Description, - item.Author, - item.Version, - strconv.Itoa(item.Downloads), - } - }, - ) -} - -func skillListBundle(items []skillListItem) outputBundle { - return listBundle( - items, - items, - "Skills", - []string{"Name", "Description", "Source", "Enabled"}, - "skills", - []string{"name", "description", "source", "enabled"}, - func(item skillListItem) []string { - return []string{ - stringOrDash(item.Name), - stringOrDash(item.Description), - stringOrDash(item.Source), - strconv.FormatBool(item.Enabled), - } - }, - func(item skillListItem) []string { - return []string{ - item.Name, - item.Description, - item.Source, - strconv.FormatBool(item.Enabled), - } - }, - ) -} - -func skillViewBundle(item skillViewItem, rendered string) outputBundle { - return outputBundle{ - jsonValue: item, - human: func() (string, error) { - return rendered, nil - }, - toon: func() (string, error) { - return rendered, nil - }, - } -} - -func skillInfoBundle(item skillInfoItem) outputBundle { - return outputBundle{ - jsonValue: item, - human: func() (string, error) { - base := renderHumanSection("Skill", []keyValue{ - {Label: "Name", Value: stringOrDash(item.Name)}, - {Label: "Description", Value: stringOrDash(item.Description)}, - {Label: "Version", Value: stringOrDash(item.Version)}, - {Label: "Source", Value: stringOrDash(item.Source)}, - {Label: "Path", Value: stringOrDash(item.Path)}, - {Label: "Enabled", Value: strconv.FormatBool(item.Enabled)}, - }) - - metadataRows := make([][]string, 0, len(item.Metadata)) - for _, entry := range sortedSkillMetadataEntries(item.Metadata) { - metadataRows = append(metadataRows, []string{entry.Label, entry.Value}) - } - metadata := renderHumanTable("Metadata", []string{"Key", "Value"}, metadataRows) - - resourceRows := make([][]string, 0, len(item.Resources)) - for _, resource := range item.Resources { - resourceRows = append(resourceRows, []string{resource}) - } - resources := renderHumanTable("Resources", []string{"Path"}, resourceRows) - - return renderHumanBlocks(base, metadata, resources), nil - }, - toon: func() (string, error) { - metadataRows := make([][]string, 0, len(item.Metadata)) - for _, entry := range sortedSkillMetadataEntries(item.Metadata) { - metadataRows = append(metadataRows, []string{entry.Label, entry.Value}) - } - - resourceRows := make([][]string, 0, len(item.Resources)) - for _, resource := range item.Resources { - resourceRows = append(resourceRows, []string{resource}) - } - - return renderHumanBlocks( - renderToonObject("skill", []string{"name", "description", "version", "source", "path", "enabled"}, []string{ - item.Name, - item.Description, - item.Version, - item.Source, - item.Path, - strconv.FormatBool(item.Enabled), - }), - renderToonArray("metadata", []string{"key", "value"}, metadataRows), - renderToonArray("resources", []string{"path"}, resourceRows), - ), nil - }, - } -} - -func skillCreateBundle(item skillCreateItem) outputBundle { - return outputBundle{ - jsonValue: item, - human: func() (string, error) { - return renderHumanSection("Skill", []keyValue{ - {Label: "Name", Value: stringOrDash(item.Name)}, - {Label: "Source", Value: stringOrDash(item.Source)}, - {Label: "Path", Value: stringOrDash(item.Path)}, - {Label: "File", Value: stringOrDash(item.File)}, - {Label: "Status", Value: stringOrDash(item.Status)}, - }), nil - }, - toon: func() (string, error) { - return renderToonObject("skill", []string{"name", "source", "path", "file", "status"}, []string{ - item.Name, - item.Source, - item.Path, - item.File, - item.Status, - }), nil - }, - } -} - -func skillInstallBundle(item skillInstallItem) outputBundle { - return outputBundle{ - jsonValue: item, - human: func() (string, error) { - return renderHumanSection("Skill Install", []keyValue{ - {Label: "Name", Value: stringOrDash(item.Name)}, - {Label: "Slug", Value: stringOrDash(item.Slug)}, - {Label: "Version", Value: stringOrDash(item.Version)}, - {Label: "Registry", Value: stringOrDash(item.Registry)}, - {Label: "Path", Value: stringOrDash(item.Path)}, - {Label: "Hash", Value: stringOrDash(item.Hash)}, - {Label: "Status", Value: stringOrDash(item.Status)}, - }), nil - }, - toon: func() (string, error) { - return renderToonObject("skill_install", []string{"name", "slug", "version", "registry", "path", "hash", "status"}, []string{ - item.Name, - item.Slug, - item.Version, - item.Registry, - item.Path, - item.Hash, - item.Status, - }), nil - }, - } -} - -func skillRemoveBundle(item skillRemoveItem) outputBundle { - return outputBundle{ - jsonValue: item, - human: func() (string, error) { - return renderHumanSection("Skill Remove", []keyValue{ - {Label: "Name", Value: stringOrDash(item.Name)}, - {Label: "Slug", Value: stringOrDash(item.Slug)}, - {Label: "Path", Value: stringOrDash(item.Path)}, - {Label: "Status", Value: stringOrDash(item.Status)}, - }), nil - }, - toon: func() (string, error) { - return renderToonObject("skill_remove", []string{"name", "slug", "path", "status"}, []string{ - item.Name, - item.Slug, - item.Path, - item.Status, - }), nil - }, - } -} - -func skillUpdateBundle(items []skillUpdateItem) outputBundle { - return listBundle( - items, - items, - "Skill Updates", - []string{"Name", "Slug", "Current", "Latest", "Path", "Status"}, - "skill_updates", - []string{"name", "slug", "current_version", "latest_version", "path", "status"}, - func(item skillUpdateItem) []string { - return []string{ - stringOrDash(item.Name), - stringOrDash(item.Slug), - stringOrDash(item.CurrentVersion), - stringOrDash(item.LatestVersion), - stringOrDash(item.Path), - stringOrDash(item.Status), - } - }, - func(item skillUpdateItem) []string { - return []string{ - item.Name, - item.Slug, - item.CurrentVersion, - item.LatestVersion, - item.Path, - item.Status, - } - }, - ) -} - -func sortedSkillMetadataEntries(metadata map[string]any) []keyValue { - if len(metadata) == 0 { - return nil - } - - keys := make([]string, 0, len(metadata)) - for key := range metadata { - keys = append(keys, key) - } - sort.Strings(keys) - - entries := make([]keyValue, 0, len(keys)) - for _, key := range keys { - entries = append(entries, keyValue{ - Label: key, - Value: formatSkillMetadataValue(metadata[key]), - }) - } - return entries -} - -func formatSkillMetadataValue(value any) string { - switch typed := value.(type) { - case nil: - return "" - case string: - return typed - default: - payload, err := json.Marshal(typed) - if err != nil { - return fmt.Sprint(typed) - } - return compactJSON(payload) - } -} - -func cloneMetadata(metadata map[string]any) map[string]any { - if metadata == nil { - return nil - } - - clone := make(map[string]any, len(metadata)) - for key, value := range metadata { - clone[key] = value - } - return clone -} diff --git a/internal/cli/skill_commands.go b/internal/cli/skill_commands.go new file mode 100644 index 000000000..e76247fcc --- /dev/null +++ b/internal/cli/skill_commands.go @@ -0,0 +1,327 @@ +package cli + +import ( + "errors" + "fmt" + "os" + "path/filepath" + "strings" + + aghconfig "github.com/pedronauck/agh/internal/config" + "github.com/pedronauck/agh/internal/skills" + "github.com/pedronauck/agh/internal/skills/marketplace" + "github.com/spf13/cobra" +) + +func newSkillCommand(deps commandDeps) *cobra.Command { + cmd := &cobra.Command{ + Use: "skill", + Short: "Manage local AgentSkills", + } + + cmd.AddCommand(newSkillListCommand(deps)) + cmd.AddCommand(newSkillViewCommand(deps)) + cmd.AddCommand(newSkillInfoCommand(deps)) + cmd.AddCommand(newSkillSearchCommand(deps)) + cmd.AddCommand(newSkillInstallCommand(deps)) + cmd.AddCommand(newSkillRemoveCommand(deps)) + cmd.AddCommand(newSkillUpdateCommand(deps)) + cmd.AddCommand(newSkillCreateCommand(deps)) + return cmd +} + +func newSkillListCommand(deps commandDeps) *cobra.Command { + var sourceFilter string + + cmd := &cobra.Command{ + Use: "list", + Short: "List locally available skills", + Args: cobra.NoArgs, + RunE: func(cmd *cobra.Command, _ []string) error { + ctx, err := loadSkillCommandContext(cmd.Context(), deps) + if err != nil { + return err + } + + items, err := skillListItems(ctx.skills, sourceFilter) + if err != nil { + return err + } + + return writeCommandOutput(cmd, skillListBundle(items)) + }, + } + cmd.Flags().StringVar(&sourceFilter, "source", "", "Filter by source: bundled, marketplace, user, additional (or agents/.agents), or workspace") + return cmd +} + +func newSkillViewCommand(deps commandDeps) *cobra.Command { + var filePath string + + cmd := &cobra.Command{ + Use: "view ", + Short: "Read a skill or one of its resource files", + Args: cobra.ExactArgs(1), + RunE: func(cmd *cobra.Command, args []string) error { + ctx, err := loadSkillCommandContext(cmd.Context(), deps) + if err != nil { + return err + } + + skill, err := findSkillByName(ctx.skills, args[0]) + if err != nil { + return err + } + + if strings.TrimSpace(filePath) != "" { + content, err := readSkillResource(skill, ctx.bundledFS, filePath) + if err != nil { + return err + } + + item := skillViewItem{ + Name: skill.Meta.Name, + Source: skillSourceLabel(skill.Source), + Path: skill.FilePath, + File: strings.TrimSpace(filePath), + Content: content, + } + return writeCommandOutput(cmd, skillViewBundle(item, content)) + } + + resources, err := listSkillResources(skill, ctx.bundledFS) + if err != nil { + return err + } + + content, err := ctx.registry.LoadContent(cmd.Context(), skill) + if err != nil { + return err + } + + rendered, err := renderSkillXML(skill, content, resources) + if err != nil { + return err + } + + item := skillViewItem{ + Name: skill.Meta.Name, + Source: skillSourceLabel(skill.Source), + Path: skill.FilePath, + Content: rendered, + Resources: resources, + } + return writeCommandOutput(cmd, skillViewBundle(item, rendered)) + }, + } + cmd.Flags().StringVar(&filePath, "file", "", "Relative file path inside the skill directory") + return cmd +} + +func newSkillInfoCommand(deps commandDeps) *cobra.Command { + return &cobra.Command{ + Use: "info ", + Short: "Show detailed metadata for one skill", + Args: cobra.ExactArgs(1), + RunE: func(cmd *cobra.Command, args []string) error { + ctx, err := loadSkillCommandContext(cmd.Context(), deps) + if err != nil { + return err + } + + skill, err := findSkillByName(ctx.skills, args[0]) + if err != nil { + return err + } + + resources, err := listSkillResources(skill, ctx.bundledFS) + if err != nil { + return err + } + + item := skillInfoItem{ + Name: skill.Meta.Name, + Description: skill.Meta.Description, + Version: skill.Meta.Version, + Source: skillSourceLabel(skill.Source), + Path: skill.FilePath, + Enabled: skill.Enabled, + Metadata: cloneMetadata(skill.Meta.Metadata), + Resources: resources, + } + + return writeCommandOutput(cmd, skillInfoBundle(item)) + }, + } +} + +func newSkillCreateCommand(deps commandDeps) *cobra.Command { + return &cobra.Command{ + Use: "create [name]", + Short: "Scaffold a new workspace skill", + Args: cobra.MaximumNArgs(1), + RunE: func(cmd *cobra.Command, args []string) error { + name := defaultSkillName + if len(args) == 1 { + name = args[0] + } + + skillName, err := normalizeSkillName(name) + if err != nil { + return err + } + + workspace, err := resolveCLIWorkspaceRoot(deps) + if err != nil { + return err + } + + skillDir := filepath.Join(workspace, aghconfig.DirName, aghconfig.SkillsDirName, skillName) + if _, err := os.Stat(skillDir); err == nil { + return fmt.Errorf("skill %q already exists at %s", skillName, skillDir) + } else if !errors.Is(err, os.ErrNotExist) { + return fmt.Errorf("cli: inspect skill directory %q: %w", skillDir, err) + } + + if err := os.MkdirAll(skillDir, 0o755); err != nil { + return fmt.Errorf("cli: create skill directory %q: %w", skillDir, err) + } + + skillFilePath := filepath.Join(skillDir, skillMarkdownFileName) + content := defaultSkillTemplate(skillName) + if err := os.WriteFile(skillFilePath, []byte(content), 0o644); err != nil { + return fmt.Errorf("cli: write skill template %q: %w", skillFilePath, err) + } + + if _, err := skills.ParseSkillFile(skillFilePath); err != nil { + return fmt.Errorf("cli: validate generated skill %q: %w", skillFilePath, err) + } + + return writeCommandOutput(cmd, skillCreateBundle(skillCreateItem{ + Name: skillName, + Path: skillDir, + File: skillFilePath, + Source: "workspace", + Status: "created", + })) + }, + } +} + +func newSkillSearchCommand(deps commandDeps) *cobra.Command { + limit := defaultMarketplaceSearchLim + + cmd := &cobra.Command{ + Use: "search ", + Short: "Search marketplace skills", + Args: cobra.ExactArgs(1), + RunE: func(cmd *cobra.Command, args []string) error { + if limit <= 0 { + return fmt.Errorf("cli: search limit must be positive: %d", limit) + } + + _, registry, _, err := loadMarketplaceRegistry(deps) + if err != nil { + return err + } + + results, err := registry.Search(cmd.Context(), args[0], marketplace.SearchOpts{Limit: limit}) + if err != nil { + return err + } + + return writeCommandOutput(cmd, skillSearchBundle(results)) + }, + } + cmd.Flags().IntVar(&limit, "limit", defaultMarketplaceSearchLim, "Maximum number of marketplace results to return") + return cmd +} + +func newSkillInstallCommand(deps commandDeps) *cobra.Command { + return &cobra.Command{ + Use: "install ", + Short: "Install a marketplace skill", + Args: cobra.ExactArgs(1), + RunE: func(cmd *cobra.Command, args []string) error { + slug, err := normalizeSkillSlug(args[0]) + if err != nil { + return err + } + + runtime, registry, registryName, err := loadMarketplaceRegistry(deps) + if err != nil { + return err + } + + item, err := installMarketplaceSkill(cmd.Context(), runtime, registry, registryName, slug, false, "") + if err != nil { + return err + } + + return writeCommandOutput(cmd, skillInstallBundle(item)) + }, + } +} + +func newSkillRemoveCommand(deps commandDeps) *cobra.Command { + return &cobra.Command{ + Use: "remove ", + Short: "Remove an installed marketplace skill", + Args: cobra.ExactArgs(1), + RunE: func(cmd *cobra.Command, args []string) error { + name, err := normalizeSkillName(args[0]) + if err != nil { + return err + } + + runtime, err := loadRuntimeContext(deps) + if err != nil { + return err + } + + item, err := removeMarketplaceSkill(runtime.HomePaths.SkillsDir, name) + if err != nil { + return err + } + + return writeCommandOutput(cmd, skillRemoveBundle(item)) + }, + } +} + +func newSkillUpdateCommand(deps commandDeps) *cobra.Command { + updateAll := false + + cmd := &cobra.Command{ + Use: "update [name]", + Short: "Update installed marketplace skills", + Args: func(_ *cobra.Command, args []string) error { + if updateAll && len(args) > 0 { + return errors.New("cli: update accepts either a skill name or --all, not both") + } + if !updateAll && len(args) != 1 { + return errors.New("cli: update requires a skill name unless --all is set") + } + if len(args) == 1 { + _, err := normalizeSkillName(args[0]) + return err + } + return nil + }, + RunE: func(cmd *cobra.Command, args []string) error { + runtime, registry, registryName, err := loadMarketplaceRegistry(deps) + if err != nil { + return err + } + + items, err := updateMarketplaceSkills(cmd.Context(), runtime, registry, registryName, args, updateAll) + if err != nil { + return err + } + + return writeCommandOutput(cmd, skillUpdateBundle(items)) + }, + } + cmd.Flags().BoolVar(&updateAll, "all", false, "Update every installed marketplace skill") + return cmd +} diff --git a/internal/cli/skill_marketplace.go b/internal/cli/skill_marketplace.go new file mode 100644 index 000000000..398649f81 --- /dev/null +++ b/internal/cli/skill_marketplace.go @@ -0,0 +1,762 @@ +package cli + +import ( + "archive/tar" + "compress/gzip" + "context" + "errors" + "fmt" + "io" + "io/fs" + "os" + "path" + "path/filepath" + "sort" + "strconv" + "strings" + "time" + + "github.com/pedronauck/agh/internal/skills" + "github.com/pedronauck/agh/internal/skills/marketplace" + "github.com/pedronauck/agh/internal/skills/marketplace/clawhub" +) + +type installedMarketplaceSkill struct { + Name string + Dir string + FilePath string + Provenance skills.Provenance +} + +func loadMarketplaceRegistry(deps commandDeps) (runtimeContext, marketplace.Registry, string, error) { + runtime, err := loadRuntimeContext(deps) + if err != nil { + return runtimeContext{}, nil, "", err + } + + registryCfg := runtime.Config.Skills.Marketplace + registryName := strings.ToLower(strings.TrimSpace(registryCfg.Registry)) + if registryName == "" { + registryName = defaultMarketplaceRegistry + } + + switch registryName { + case defaultMarketplaceRegistry: + return runtime, clawhub.NewClient(registryCfg.BaseURL), registryName, nil + default: + return runtimeContext{}, nil, "", fmt.Errorf("cli: unsupported marketplace registry %q", registryCfg.Registry) + } +} + +func normalizeSkillSlug(slug string) (string, error) { + trimmed := strings.TrimSpace(slug) + if trimmed == "" { + return "", errors.New("skill slug is required") + } + if !validSkillSlugPattern.MatchString(trimmed) { + return "", errors.New(`skill slug must match "@author/name"`) + } + return trimmed, nil +} + +func installMarketplaceSkill( + ctx context.Context, + runtime runtimeContext, + registry marketplace.Registry, + registryName string, + slug string, + replaceExisting bool, + targetDirOverride string, +) (item skillInstallItem, err error) { + if err := os.MkdirAll(runtime.HomePaths.SkillsDir, 0o755); err != nil { + return skillInstallItem{}, fmt.Errorf("cli: create skills directory %q: %w", runtime.HomePaths.SkillsDir, err) + } + + archive, err := registry.Download(ctx, slug) + if err != nil { + return skillInstallItem{}, err + } + if archive == nil { + return skillInstallItem{}, fmt.Errorf("cli: marketplace download returned no archive for %q", slug) + } + if archive.Data == nil { + return skillInstallItem{}, fmt.Errorf("cli: marketplace download returned no archive stream for %q", slug) + } + defer func() { + err = joinContextError(err, archive.Data.Close(), "cli: close marketplace archive for %q: %w", slug) + }() + + tempRoot, err := os.MkdirTemp(runtime.HomePaths.SkillsDir, ".agh-skill-install-*") + if err != nil { + return skillInstallItem{}, fmt.Errorf("cli: create temporary install directory: %w", err) + } + defer func() { + // Best-effort cleanup; install correctness is determined by the primary result. + _ = os.RemoveAll(tempRoot) + }() + + if err := extractMarketplaceArchive(archive.Data, tempRoot); err != nil { + return skillInstallItem{}, fmt.Errorf("cli: extract skill archive for %q: %w", slug, err) + } + + skillFile, err := locateExtractedSkillFile(tempRoot) + if err != nil { + return skillInstallItem{}, fmt.Errorf("cli: locate extracted skill for %q: %w", slug, err) + } + + parsedSkill, err := skills.ParseSkillFile(skillFile) + if err != nil { + return skillInstallItem{}, fmt.Errorf("cli: parse extracted skill for %q: %w", slug, err) + } + + content, err := skills.ReadSkillContent(skillFile) + if err != nil { + return skillInstallItem{}, fmt.Errorf("cli: read extracted skill content for %q: %w", slug, err) + } + + if critical := criticalWarnings(skills.VerifyContent(content)); len(critical) > 0 { + return skillInstallItem{}, fmt.Errorf( + "cli: install blocked for %q due to critical verification findings: %s", + slug, + strings.Join(critical, "; "), + ) + } + + hash, err := skills.ComputeDirectoryHash(parsedSkill.Dir) + if err != nil { + return skillInstallItem{}, fmt.Errorf("cli: compute extracted skill hash for %q: %w", slug, err) + } + + version := firstNonEmpty(archive.Version, parsedSkill.Meta.Version) + targetDir, err := resolveMarketplaceInstallTarget(runtime.HomePaths.SkillsDir, parsedSkill.Meta.Name, targetDirOverride) + if err != nil { + return skillInstallItem{}, fmt.Errorf("cli: resolve install path for %q: %w", slug, err) + } + + if err := skills.WriteSidecar(parsedSkill.Dir, skills.Provenance{ + Hash: hash, + Registry: registryName, + Slug: slug, + Version: version, + InstalledAt: time.Now().UTC(), + }); err != nil { + return skillInstallItem{}, fmt.Errorf("cli: write provenance for %q: %w", slug, err) + } + + if err := moveInstalledSkillDir(parsedSkill.Dir, targetDir, replaceExisting); err != nil { + return skillInstallItem{}, err + } + + return skillInstallItem{ + Name: parsedSkill.Meta.Name, + Slug: slug, + Version: version, + Registry: registryName, + Path: targetDir, + Hash: hash, + Status: "installed", + }, nil +} + +func updateMarketplaceSkills( + ctx context.Context, + runtime runtimeContext, + registry marketplace.Registry, + registryName string, + args []string, + updateAll bool, +) ([]skillUpdateItem, error) { + if updateAll { + installedSkills, err := listInstalledMarketplaceSkills(runtime.HomePaths.SkillsDir) + if err != nil { + return nil, err + } + + items := make([]skillUpdateItem, 0, len(installedSkills)) + for _, installed := range installedSkills { + item, err := updateMarketplaceSkill(ctx, runtime, registry, registryName, installed) + if err != nil { + return nil, err + } + items = append(items, item) + } + return items, nil + } + + name, err := normalizeSkillName(args[0]) + if err != nil { + return nil, err + } + + installed, err := findInstalledMarketplaceSkill(runtime.HomePaths.SkillsDir, name) + if err != nil { + return nil, err + } + + item, err := updateMarketplaceSkill(ctx, runtime, registry, registryName, installed) + if err != nil { + return nil, err + } + return []skillUpdateItem{item}, nil +} + +func updateMarketplaceSkill( + ctx context.Context, + runtime runtimeContext, + registry marketplace.Registry, + registryName string, + installed installedMarketplaceSkill, +) (skillUpdateItem, error) { + slug := strings.TrimSpace(installed.Provenance.Slug) + if slug == "" { + return skillUpdateItem{}, fmt.Errorf("cli: marketplace skill %q is missing registry slug metadata", installed.Name) + } + + detail, err := registry.Info(ctx, slug) + if err != nil { + return skillUpdateItem{}, err + } + + currentVersion := strings.TrimSpace(installed.Provenance.Version) + latestVersion := "" + if detail != nil { + latestVersion = strings.TrimSpace(detail.Version) + } + if !versionIsNewer(currentVersion, latestVersion) { + return skillUpdateItem{ + Name: installed.Name, + Slug: slug, + CurrentVersion: currentVersion, + LatestVersion: firstNonEmpty(latestVersion, currentVersion), + Path: installed.Dir, + Status: "already up to date", + }, nil + } + + installedItem, err := installMarketplaceSkill(ctx, runtime, registry, registryName, slug, true, installed.Dir) + if err != nil { + return skillUpdateItem{}, err + } + + return skillUpdateItem{ + Name: installedItem.Name, + Slug: slug, + CurrentVersion: currentVersion, + LatestVersion: firstNonEmpty(installedItem.Version, latestVersion), + Path: installedItem.Path, + Status: "updated", + }, nil +} + +func removeMarketplaceSkill(skillsDir string, name string) (skillRemoveItem, error) { + installed, err := findInstalledMarketplaceSkill(skillsDir, name) + if err != nil { + return skillRemoveItem{}, err + } + + if err := os.RemoveAll(installed.Dir); err != nil { + return skillRemoveItem{}, fmt.Errorf("cli: remove marketplace skill %q: %w", name, err) + } + + return skillRemoveItem{ + Name: installed.Name, + Slug: installed.Provenance.Slug, + Path: installed.Dir, + Status: "removed", + }, nil +} + +func findInstalledMarketplaceSkill(skillsDir string, name string) (installedMarketplaceSkill, error) { + skillDir, err := pathWithinRoot(skillsDir, name) + if err != nil { + return installedMarketplaceSkill{}, fmt.Errorf("cli: resolve skill path for %q: %w", name, err) + } + + info, err := os.Stat(skillDir) + if err != nil { + if errors.Is(err, os.ErrNotExist) { + return installedMarketplaceSkill{}, fmt.Errorf("skill %q not found", name) + } + return installedMarketplaceSkill{}, fmt.Errorf("cli: inspect skill directory %q: %w", skillDir, err) + } + if !info.IsDir() { + return installedMarketplaceSkill{}, fmt.Errorf("skill %q is not a directory", name) + } + + hasSidecar, err := skills.HasSidecar(skillDir) + if err != nil { + return installedMarketplaceSkill{}, err + } + if !hasSidecar { + return installedMarketplaceSkill{}, fmt.Errorf("skill %q is not a marketplace-installed skill", name) + } + + return readInstalledMarketplaceSkill(skillDir) +} + +func listInstalledMarketplaceSkills(skillsDir string) ([]installedMarketplaceSkill, error) { + entries, err := os.ReadDir(skillsDir) + if err != nil { + if errors.Is(err, os.ErrNotExist) { + return []installedMarketplaceSkill{}, nil + } + return nil, fmt.Errorf("cli: read installed skills directory %q: %w", skillsDir, err) + } + + items := make([]installedMarketplaceSkill, 0, len(entries)) + for _, entry := range entries { + if !entry.IsDir() { + continue + } + + skillDir, err := pathWithinRoot(skillsDir, entry.Name()) + if err != nil { + return nil, fmt.Errorf("cli: resolve installed skill path for %q: %w", entry.Name(), err) + } + + hasSidecar, err := skills.HasSidecar(skillDir) + if err != nil { + return nil, err + } + if !hasSidecar { + continue + } + + item, err := readInstalledMarketplaceSkill(skillDir) + if err != nil { + return nil, err + } + items = append(items, item) + } + + sort.Slice(items, func(i, j int) bool { + return items[i].Name < items[j].Name + }) + return items, nil +} + +func readInstalledMarketplaceSkill(skillDir string) (installedMarketplaceSkill, error) { + provenance, err := skills.ReadSidecar(skillDir) + if err != nil { + return installedMarketplaceSkill{}, err + } + if provenance == nil { + return installedMarketplaceSkill{}, fmt.Errorf("cli: missing provenance for %q", skillDir) + } + + skillFile, err := pathWithinRoot(skillDir, skillMarkdownFileName) + if err != nil { + return installedMarketplaceSkill{}, fmt.Errorf("cli: resolve skill file in %q: %w", skillDir, err) + } + + parsedSkill, err := skills.ParseSkillFile(skillFile) + if err != nil { + return installedMarketplaceSkill{}, err + } + + return installedMarketplaceSkill{ + Name: parsedSkill.Meta.Name, + Dir: parsedSkill.Dir, + FilePath: parsedSkill.FilePath, + Provenance: *provenance, + }, nil +} + +func extractMarketplaceArchive(reader io.Reader, destRoot string) (err error) { + if strings.TrimSpace(destRoot) == "" { + return errors.New("destination root is required") + } + if err := os.MkdirAll(destRoot, 0o755); err != nil { + return fmt.Errorf("create destination root %q: %w", destRoot, err) + } + + gzipReader, err := gzip.NewReader(reader) + if err != nil { + return fmt.Errorf("open gzip stream: %w", err) + } + defer func() { + err = joinContextError(err, gzipReader.Close(), "close gzip stream: %w") + }() + + tarReader := tar.NewReader(gzipReader) + for { + header, err := tarReader.Next() + if errors.Is(err, io.EOF) { + return nil + } + if err != nil { + return fmt.Errorf("read tar entry: %w", err) + } + + entryName, err := cleanArchiveEntryPath(header.Name) + if err != nil { + return err + } + targetPath, err := pathWithinRoot(destRoot, filepath.FromSlash(entryName)) + if err != nil { + return fmt.Errorf("resolve archive entry %q: %w", header.Name, err) + } + + switch header.Typeflag { + case tar.TypeDir: + if err := os.MkdirAll(targetPath, 0o755); err != nil { + return fmt.Errorf("create archive directory %q: %w", targetPath, err) + } + case tar.TypeReg: + if err := os.MkdirAll(filepath.Dir(targetPath), 0o755); err != nil { + return fmt.Errorf("create archive parent %q: %w", filepath.Dir(targetPath), err) + } + + file, err := os.OpenFile(targetPath, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0o644) + if err != nil { + return fmt.Errorf("create archive file %q: %w", targetPath, err) + } + if _, err := io.Copy(file, tarReader); err != nil { + writeErr := fmt.Errorf("write archive file %q: %w", targetPath, err) + if closeErr := file.Close(); closeErr != nil { + return errors.Join(writeErr, fmt.Errorf("close archive file %q after write failure: %w", targetPath, closeErr)) + } + return writeErr + } + if err := file.Close(); err != nil { + return fmt.Errorf("close archive file %q: %w", targetPath, err) + } + default: + return fmt.Errorf("unsupported archive entry type %d for %q", header.Typeflag, header.Name) + } + } +} + +func locateExtractedSkillFile(root string) (string, error) { + var matches []string + + err := filepath.WalkDir(root, func(current string, entry fs.DirEntry, walkErr error) error { + if walkErr != nil { + return walkErr + } + if entry.IsDir() { + return nil + } + if entry.Name() != skillMarkdownFileName { + return nil + } + matches = append(matches, current) + if len(matches) > 1 { + return errors.New("multiple SKILL.md files found in archive") + } + return nil + }) + if err != nil { + return "", err + } + if len(matches) == 0 { + return "", errors.New("archive did not contain SKILL.md") + } + return matches[0], nil +} + +func moveInstalledSkillDir(extractedDir string, targetDir string, replaceExisting bool) error { + if !replaceExisting { + if _, err := os.Stat(targetDir); err == nil { + return fmt.Errorf("skill %q already exists at %s", filepath.Base(targetDir), targetDir) + } else if !errors.Is(err, os.ErrNotExist) { + return fmt.Errorf("cli: inspect target skill directory %q: %w", targetDir, err) + } + + if err := os.Rename(extractedDir, targetDir); err != nil { + return fmt.Errorf("cli: install skill into %q: %w", targetDir, err) + } + return nil + } + + if _, err := os.Stat(targetDir); err != nil { + if !errors.Is(err, os.ErrNotExist) { + return fmt.Errorf("cli: inspect target skill directory %q: %w", targetDir, err) + } + if err := os.Rename(extractedDir, targetDir); err != nil { + return fmt.Errorf("cli: install updated skill into %q: %w", targetDir, err) + } + return nil + } + + backupDir := fmt.Sprintf("%s.backup-%d", targetDir, time.Now().UTC().UnixNano()) + if err := os.Rename(targetDir, backupDir); err != nil { + return fmt.Errorf("cli: stage existing skill backup %q: %w", targetDir, err) + } + + if err := os.Rename(extractedDir, targetDir); err != nil { + revertErr := os.Rename(backupDir, targetDir) + if revertErr != nil { + return errors.Join( + fmt.Errorf("cli: install updated skill into %q: %w", targetDir, err), + fmt.Errorf("cli: restore original skill from %q: %w", backupDir, revertErr), + ) + } + return fmt.Errorf("cli: install updated skill into %q: %w", targetDir, err) + } + + if err := os.RemoveAll(backupDir); err != nil { + return fmt.Errorf("cli: remove backup skill directory %q: %w", backupDir, err) + } + return nil +} + +func cleanArchiveEntryPath(entry string) (string, error) { + cleaned := path.Clean(strings.TrimSpace(strings.ReplaceAll(entry, "\\", "/"))) + switch { + case cleaned == ".", cleaned == "": + return "", errors.New("archive entry path is required") + case strings.HasPrefix(cleaned, "/"): + return "", fmt.Errorf("archive entry %q must be relative", entry) + case cleaned == "..", strings.HasPrefix(cleaned, "../"): + return "", fmt.Errorf("archive entry %q escapes the extraction root", entry) + default: + return cleaned, nil + } +} + +func pathWithinRoot(root string, child string) (string, error) { + absRoot, err := filepath.Abs(strings.TrimSpace(root)) + if err != nil { + return "", fmt.Errorf("resolve root %q: %w", root, err) + } + targetPath := filepath.Join(absRoot, child) + absTarget, err := filepath.Abs(targetPath) + if err != nil { + return "", fmt.Errorf("resolve target %q: %w", targetPath, err) + } + relative, err := filepath.Rel(absRoot, absTarget) + if err != nil { + return "", fmt.Errorf("resolve target %q within %q: %w", absTarget, absRoot, err) + } + if relative == ".." || strings.HasPrefix(relative, ".."+string(filepath.Separator)) { + return "", errors.New("path must stay within the root directory") + } + return absTarget, nil +} + +func pathInsideRoot(root string, target string) (string, error) { + absRoot, err := filepath.Abs(strings.TrimSpace(root)) + if err != nil { + return "", fmt.Errorf("resolve root %q: %w", root, err) + } + + absTarget, err := filepath.Abs(strings.TrimSpace(target)) + if err != nil { + return "", fmt.Errorf("resolve target %q: %w", target, err) + } + + relative, err := filepath.Rel(absRoot, absTarget) + if err != nil { + return "", fmt.Errorf("resolve target %q within %q: %w", absTarget, absRoot, err) + } + if relative == ".." || strings.HasPrefix(relative, ".."+string(filepath.Separator)) { + return "", errors.New("path must stay within the root directory") + } + return absTarget, nil +} + +func resolveMarketplaceInstallTarget(skillsDir string, parsedName string, targetDirOverride string) (string, error) { + if trimmedOverride := strings.TrimSpace(targetDirOverride); trimmedOverride != "" { + return pathInsideRoot(skillsDir, trimmedOverride) + } + return pathWithinRoot(skillsDir, parsedName) +} + +func joinContextError(base error, extra error, format string, args ...any) error { + if extra == nil { + return base + } + + args = append(args, extra) + wrapped := fmt.Errorf(format, args...) + if base == nil { + return wrapped + } + return errors.Join(base, wrapped) +} + +func criticalWarnings(warnings []skills.Warning) []string { + items := make([]string, 0, len(warnings)) + for _, warning := range warnings { + if warning.Severity != skills.SeverityCritical { + continue + } + items = append(items, firstNonEmpty(warning.Message, warning.Pattern)) + } + return items +} + +func versionIsNewer(current string, latest string) bool { + normalizedCurrent := normalizeVersion(current) + normalizedLatest := normalizeVersion(latest) + if normalizedLatest == "" { + return false + } + if normalizedCurrent == "" { + return true + } + + currentVersion, currentOK := parseSemanticVersion(normalizedCurrent) + latestVersion, latestOK := parseSemanticVersion(normalizedLatest) + if currentOK && latestOK { + return compareSemanticVersions(currentVersion, latestVersion) < 0 + } + + return normalizedLatest > normalizedCurrent +} + +func normalizeVersion(version string) string { + trimmed := strings.TrimSpace(version) + trimmed = strings.TrimPrefix(trimmed, "v") + trimmed = strings.TrimPrefix(trimmed, "V") + return trimmed +} + +func parseVersionParts(version string) ([]int, bool) { + segments := strings.Split(version, ".") + if len(segments) == 0 { + return nil, false + } + + parts := make([]int, 0, len(segments)) + for _, segment := range segments { + if segment == "" { + return nil, false + } + value, err := strconv.Atoi(segment) + if err != nil { + return nil, false + } + parts = append(parts, value) + } + return parts, true +} + +func versionPartAt(parts []int, index int) int { + if index < 0 || index >= len(parts) { + return 0 + } + return parts[index] +} + +type semanticVersion struct { + core []int + prerelease []string +} + +func parseSemanticVersion(version string) (semanticVersion, bool) { + trimmed := strings.TrimSpace(version) + if trimmed == "" { + return semanticVersion{}, false + } + + corePart, _, _ := strings.Cut(trimmed, "+") + corePart, prereleasePart, hasPrerelease := strings.Cut(corePart, "-") + + core, ok := parseVersionParts(corePart) + if !ok { + return semanticVersion{}, false + } + + parsed := semanticVersion{core: core} + if !hasPrerelease { + return parsed, true + } + + identifiers, ok := parsePrereleaseIdentifiers(prereleasePart) + if !ok { + return semanticVersion{}, false + } + parsed.prerelease = identifiers + return parsed, true +} + +func parsePrereleaseIdentifiers(value string) ([]string, bool) { + trimmed := strings.TrimSpace(value) + if trimmed == "" { + return nil, false + } + + parts := strings.Split(trimmed, ".") + identifiers := make([]string, 0, len(parts)) + for _, part := range parts { + part = strings.TrimSpace(part) + if part == "" { + return nil, false + } + identifiers = append(identifiers, part) + } + return identifiers, true +} + +func compareSemanticVersions(current semanticVersion, latest semanticVersion) int { + for i := 0; i < max(len(current.core), len(latest.core)); i++ { + currentPart := versionPartAt(current.core, i) + latestPart := versionPartAt(latest.core, i) + switch { + case currentPart < latestPart: + return -1 + case currentPart > latestPart: + return 1 + } + } + + switch { + case len(current.prerelease) == 0 && len(latest.prerelease) == 0: + return 0 + case len(current.prerelease) == 0: + return 1 + case len(latest.prerelease) == 0: + return -1 + default: + return comparePrereleaseIdentifiers(current.prerelease, latest.prerelease) + } +} + +func comparePrereleaseIdentifiers(current []string, latest []string) int { + for i := 0; i < max(len(current), len(latest)); i++ { + switch { + case i >= len(current): + return -1 + case i >= len(latest): + return 1 + } + + currentID := current[i] + latestID := latest[i] + currentNumber, currentNumeric := parseNumericIdentifier(currentID) + latestNumber, latestNumeric := parseNumericIdentifier(latestID) + + switch { + case currentNumeric && latestNumeric: + switch { + case currentNumber < latestNumber: + return -1 + case currentNumber > latestNumber: + return 1 + } + case currentNumeric: + return -1 + case latestNumeric: + return 1 + case currentID < latestID: + return -1 + case currentID > latestID: + return 1 + } + } + + return 0 +} + +func parseNumericIdentifier(value string) (int, bool) { + if value == "" { + return 0, false + } + number, err := strconv.Atoi(value) + if err != nil { + return 0, false + } + return number, true +} diff --git a/internal/cli/skill_output.go b/internal/cli/skill_output.go new file mode 100644 index 000000000..a1227b5db --- /dev/null +++ b/internal/cli/skill_output.go @@ -0,0 +1,236 @@ +package cli + +import ( + "strconv" + + "github.com/pedronauck/agh/internal/skills/marketplace" +) + +func skillSearchBundle(items []marketplace.SkillListing) outputBundle { + return listBundle( + items, + items, + "Marketplace Skills", + []string{"Slug", "Name", "Description", "Author", "Version", "Downloads"}, + "skills", + []string{"slug", "name", "description", "author", "version", "downloads"}, + func(item marketplace.SkillListing) []string { + return []string{ + stringOrDash(item.Slug), + stringOrDash(item.Name), + stringOrDash(item.Description), + stringOrDash(item.Author), + stringOrDash(item.Version), + strconv.Itoa(item.Downloads), + } + }, + func(item marketplace.SkillListing) []string { + return []string{ + item.Slug, + item.Name, + item.Description, + item.Author, + item.Version, + strconv.Itoa(item.Downloads), + } + }, + ) +} + +func skillListBundle(items []skillListItem) outputBundle { + return listBundle( + items, + items, + "Skills", + []string{"Name", "Description", "Source", "Enabled"}, + "skills", + []string{"name", "description", "source", "enabled"}, + func(item skillListItem) []string { + return []string{ + stringOrDash(item.Name), + stringOrDash(item.Description), + stringOrDash(item.Source), + strconv.FormatBool(item.Enabled), + } + }, + func(item skillListItem) []string { + return []string{ + item.Name, + item.Description, + item.Source, + strconv.FormatBool(item.Enabled), + } + }, + ) +} + +func skillViewBundle(item skillViewItem, rendered string) outputBundle { + return outputBundle{ + jsonValue: item, + human: func() (string, error) { + return rendered, nil + }, + toon: func() (string, error) { + return rendered, nil + }, + } +} + +func skillInfoBundle(item skillInfoItem) outputBundle { + return outputBundle{ + jsonValue: item, + human: func() (string, error) { + base := renderHumanSection("Skill", []keyValue{ + {Label: "Name", Value: stringOrDash(item.Name)}, + {Label: "Description", Value: stringOrDash(item.Description)}, + {Label: "Version", Value: stringOrDash(item.Version)}, + {Label: "Source", Value: stringOrDash(item.Source)}, + {Label: "Path", Value: stringOrDash(item.Path)}, + {Label: "Enabled", Value: strconv.FormatBool(item.Enabled)}, + }) + + metadataRows := make([][]string, 0, len(item.Metadata)) + for _, entry := range sortedSkillMetadataEntries(item.Metadata) { + metadataRows = append(metadataRows, []string{entry.Label, entry.Value}) + } + metadata := renderHumanTable("Metadata", []string{"Key", "Value"}, metadataRows) + + resourceRows := make([][]string, 0, len(item.Resources)) + for _, resource := range item.Resources { + resourceRows = append(resourceRows, []string{resource}) + } + resources := renderHumanTable("Resources", []string{"Path"}, resourceRows) + + return renderHumanBlocks(base, metadata, resources), nil + }, + toon: func() (string, error) { + metadataRows := make([][]string, 0, len(item.Metadata)) + for _, entry := range sortedSkillMetadataEntries(item.Metadata) { + metadataRows = append(metadataRows, []string{entry.Label, entry.Value}) + } + + resourceRows := make([][]string, 0, len(item.Resources)) + for _, resource := range item.Resources { + resourceRows = append(resourceRows, []string{resource}) + } + + return renderHumanBlocks( + renderToonObject("skill", []string{"name", "description", "version", "source", "path", "enabled"}, []string{ + item.Name, + item.Description, + item.Version, + item.Source, + item.Path, + strconv.FormatBool(item.Enabled), + }), + renderToonArray("metadata", []string{"key", "value"}, metadataRows), + renderToonArray("resources", []string{"path"}, resourceRows), + ), nil + }, + } +} + +func skillCreateBundle(item skillCreateItem) outputBundle { + return outputBundle{ + jsonValue: item, + human: func() (string, error) { + return renderHumanSection("Skill", []keyValue{ + {Label: "Name", Value: stringOrDash(item.Name)}, + {Label: "Source", Value: stringOrDash(item.Source)}, + {Label: "Path", Value: stringOrDash(item.Path)}, + {Label: "File", Value: stringOrDash(item.File)}, + {Label: "Status", Value: stringOrDash(item.Status)}, + }), nil + }, + toon: func() (string, error) { + return renderToonObject("skill", []string{"name", "source", "path", "file", "status"}, []string{ + item.Name, + item.Source, + item.Path, + item.File, + item.Status, + }), nil + }, + } +} + +func skillInstallBundle(item skillInstallItem) outputBundle { + return outputBundle{ + jsonValue: item, + human: func() (string, error) { + return renderHumanSection("Skill Install", []keyValue{ + {Label: "Name", Value: stringOrDash(item.Name)}, + {Label: "Slug", Value: stringOrDash(item.Slug)}, + {Label: "Version", Value: stringOrDash(item.Version)}, + {Label: "Registry", Value: stringOrDash(item.Registry)}, + {Label: "Path", Value: stringOrDash(item.Path)}, + {Label: "Hash", Value: stringOrDash(item.Hash)}, + {Label: "Status", Value: stringOrDash(item.Status)}, + }), nil + }, + toon: func() (string, error) { + return renderToonObject("skill_install", []string{"name", "slug", "version", "registry", "path", "hash", "status"}, []string{ + item.Name, + item.Slug, + item.Version, + item.Registry, + item.Path, + item.Hash, + item.Status, + }), nil + }, + } +} + +func skillRemoveBundle(item skillRemoveItem) outputBundle { + return outputBundle{ + jsonValue: item, + human: func() (string, error) { + return renderHumanSection("Skill Remove", []keyValue{ + {Label: "Name", Value: stringOrDash(item.Name)}, + {Label: "Slug", Value: stringOrDash(item.Slug)}, + {Label: "Path", Value: stringOrDash(item.Path)}, + {Label: "Status", Value: stringOrDash(item.Status)}, + }), nil + }, + toon: func() (string, error) { + return renderToonObject("skill_remove", []string{"name", "slug", "path", "status"}, []string{ + item.Name, + item.Slug, + item.Path, + item.Status, + }), nil + }, + } +} + +func skillUpdateBundle(items []skillUpdateItem) outputBundle { + return listBundle( + items, + items, + "Skill Updates", + []string{"Name", "Slug", "Current", "Latest", "Path", "Status"}, + "skill_updates", + []string{"name", "slug", "current_version", "latest_version", "path", "status"}, + func(item skillUpdateItem) []string { + return []string{ + stringOrDash(item.Name), + stringOrDash(item.Slug), + stringOrDash(item.CurrentVersion), + stringOrDash(item.LatestVersion), + stringOrDash(item.Path), + stringOrDash(item.Status), + } + }, + func(item skillUpdateItem) []string { + return []string{ + item.Name, + item.Slug, + item.CurrentVersion, + item.LatestVersion, + item.Path, + item.Status, + } + }, + ) +} diff --git a/internal/cli/skill_test.go b/internal/cli/skill_test.go index 437e6d407..03b4eeb4c 100644 --- a/internal/cli/skill_test.go +++ b/internal/cli/skill_test.go @@ -8,6 +8,7 @@ import ( "encoding/json" "errors" "fmt" + "io" "net/http" "net/http/httptest" "os" @@ -38,6 +39,15 @@ type marketplaceRegistryStub struct { infoFn func(context.Context, string) (*marketplace.SkillDetail, error) } +type errorReadCloser struct { + io.Reader + closeErr error +} + +func (r errorReadCloser) Close() error { + return r.closeErr +} + func (s marketplaceRegistryStub) Search(ctx context.Context, query string, opts marketplace.SearchOpts) ([]marketplace.SkillListing, error) { if s.searchFn == nil { return nil, nil @@ -199,6 +209,19 @@ func TestSkillListCommandFiltersBySource(t *testing.T) { } } +func TestSkillListCommandSourceHelpIncludesMarketplaceAndAgents(t *testing.T) { + t.Parallel() + + cmd := newSkillListCommand(newSkillTestEnv(t, nil).deps) + usage := cmd.Flags().Lookup("source").Usage + + for _, expected := range []string{"marketplace", "agents", ".agents"} { + if !strings.Contains(usage, expected) { + t.Fatalf("source flag usage = %q, want mention of %q", usage, expected) + } + } +} + func TestSkillViewCommandReturnsXMLLikeContent(t *testing.T) { t.Parallel() @@ -1065,7 +1088,7 @@ func TestSkillMarketplaceHelpers(t *testing.T) { t.Fatalf("loadMarketplaceRegistry() error = %v", err) } - item, err := installMarketplaceSkill(testutil.Context(t), runtime, registry, registryName, "@agh/review", true) + item, err := installMarketplaceSkill(testutil.Context(t), runtime, registry, registryName, "@agh/review", true, "") if err != nil { t.Fatalf("installMarketplaceSkill(replace) error = %v", err) } @@ -1108,7 +1131,7 @@ func TestSkillMarketplaceHelpers(t *testing.T) { t.Fatalf("loadMarketplaceRegistry() error = %v", err) } - if _, err := installMarketplaceSkill(testutil.Context(t), runtime, registry, registryName, "@agh/review", false); err == nil { + if _, err := installMarketplaceSkill(testutil.Context(t), runtime, registry, registryName, "@agh/review", false, ""); err == nil { t.Fatal("installMarketplaceSkill(no replace) error = nil, want existing-target failure") } }) @@ -1122,7 +1145,7 @@ func TestSkillMarketplaceHelpers(t *testing.T) { downloadFn: func(context.Context, string) (*marketplace.SkillArchive, error) { return nil, nil }, - }, "clawhub", "@agh/review", false) + }, "clawhub", "@agh/review", false, "") if err == nil { t.Fatal("installMarketplaceSkill(nil archive) error = nil, want failure") } @@ -1140,7 +1163,7 @@ func TestSkillMarketplaceHelpers(t *testing.T) { downloadFn: func(context.Context, string) (*marketplace.SkillArchive, error) { return &marketplace.SkillArchive{Version: "1.0.0"}, nil }, - }, "clawhub", "@agh/review", false) + }, "clawhub", "@agh/review", false, "") if err == nil { t.Fatal("installMarketplaceSkill(nil stream) error = nil, want failure") } @@ -1174,13 +1197,37 @@ func TestSkillMarketplaceHelpers(t *testing.T) { t.Fatalf("loadMarketplaceRegistry() error = %v", err) } - if _, err := installMarketplaceSkill(testutil.Context(t), runtime, registry, registryName, "@agh/review", false); err == nil { + if _, err := installMarketplaceSkill(testutil.Context(t), runtime, registry, registryName, "@agh/review", false, ""); err == nil { t.Fatal("installMarketplaceSkill(missing skill file) error = nil, want failure") } else if !strings.Contains(err.Error(), "archive did not contain SKILL.md") { t.Fatalf("installMarketplaceSkill(missing skill file) error = %v, want missing-skill-file context", err) } }) + t.Run("install-marketplace-skill-surfaces-archive-close-errors", func(t *testing.T) { + env := newSkillTestEnv(t, nil) + + _, err := installMarketplaceSkill(testutil.Context(t), runtimeContext{ + HomePaths: env.homePaths, + }, marketplaceRegistryStub{ + downloadFn: func(context.Context, string) (*marketplace.SkillArchive, error) { + return &marketplace.SkillArchive{ + Version: "1.0.0", + Data: errorReadCloser{ + Reader: bytes.NewReader(mustTarGz(t, map[string]string{"review/SKILL.md": skillDocument("review", "Review helper", "body")})), + closeErr: errors.New("stream close failed"), + }, + }, nil + }, + }, "clawhub", "@agh/review", false, "") + if err == nil { + t.Fatal("installMarketplaceSkill(close error) error = nil, want failure") + } + if !strings.Contains(err.Error(), "close marketplace archive") { + t.Fatalf("installMarketplaceSkill(close error) error = %v, want archive close context", err) + } + }) + t.Run("list-installed-marketplace-skills-missing-dir", func(t *testing.T) { items, err := listInstalledMarketplaceSkills(filepath.Join(t.TempDir(), "missing")) if err != nil { @@ -1210,6 +1257,62 @@ func TestSkillMarketplaceHelpers(t *testing.T) { t.Fatalf("updateMarketplaceSkill(missing slug) error = %v, want slug-metadata validation", err) } }) + + t.Run("update-marketplace-skill-keeps-existing-directory-when-package-name-changes", func(t *testing.T) { + server := newMarketplaceTestServer(t, marketplaceServerFixture{ + info: map[string]marketplace.SkillDetail{ + "@agh/review": {SkillListing: marketplace.SkillListing{Slug: "@agh/review", Name: "review", Version: "2.0.0"}}, + }, + downloads: map[string]marketplaceDownloadFixture{ + "@agh/review": { + version: "2.0.0", + files: map[string]string{ + "renamed-review/SKILL.md": skillDocument("renamed-review", "Renamed review helper", "renamed body"), + }, + }, + }, + }) + defer server.Close() + + env := newSkillTestEnv(t, func(cfg *aghconfig.Config) { + cfg.Skills.Marketplace = aghconfig.MarketplaceConfig{ + Registry: "clawhub", + BaseURL: server.URL(), + } + }) + writeInstalledMarketplaceSkill(t, env.homePaths, "review", "@agh/review", "1.0.0", skillDocument("review", "Review helper", "old body")) + + runtime, registry, registryName, err := loadMarketplaceRegistry(env.deps) + if err != nil { + t.Fatalf("loadMarketplaceRegistry() error = %v", err) + } + + installed, err := findInstalledMarketplaceSkill(env.homePaths.SkillsDir, "review") + if err != nil { + t.Fatalf("findInstalledMarketplaceSkill() error = %v", err) + } + + item, err := updateMarketplaceSkill(testutil.Context(t), runtime, registry, registryName, installed) + if err != nil { + t.Fatalf("updateMarketplaceSkill(rename) error = %v", err) + } + + expectedDir := filepath.Join(env.homePaths.SkillsDir, "review") + if item.Path != expectedDir { + t.Fatalf("updateMarketplaceSkill(rename) path = %q, want %q", item.Path, expectedDir) + } + if _, statErr := os.Stat(filepath.Join(env.homePaths.SkillsDir, "renamed-review")); !errors.Is(statErr, os.ErrNotExist) { + t.Fatalf("renamed install directory stat error = %v, want not-exist", statErr) + } + + content, err := os.ReadFile(filepath.Join(expectedDir, skillMarkdownFileName)) + if err != nil { + t.Fatalf("ReadFile(updated renamed skill) error = %v", err) + } + if !strings.Contains(string(content), "renamed body") { + t.Fatalf("updated renamed skill content = %q, want replacement content", string(content)) + } + }) } func TestSkillHelpersAndBundles(t *testing.T) { @@ -1270,10 +1373,36 @@ func TestSkillHelpersAndBundles(t *testing.T) { if got := versionIsNewer("1.0.1", "1.0.0"); got { t.Fatal("versionIsNewer(1.0.1, 1.0.0) = true, want false") } + if got := versionIsNewer("1.0.0-rc1", "1.0.0"); !got { + t.Fatal("versionIsNewer(1.0.0-rc1, 1.0.0) = false, want true") + } + if got := versionIsNewer("1.0.0", "1.0.0-rc1"); got { + t.Fatal("versionIsNewer(1.0.0, 1.0.0-rc1) = true, want false") + } if got := criticalWarnings([]skills.Warning{{Severity: skills.SeverityCritical, Message: "bad"}}); len(got) != 1 || got[0] != "bad" { t.Fatalf("criticalWarnings() = %#v, want bad", got) } + if _, err := loadSkillCommandContext(testutil.Context(t), func() commandDeps { + deps := env.deps + deps.getwd = func() (string, error) { + return "", errors.New("boom") + } + return deps + }()); err == nil || !strings.Contains(err.Error(), "cli: resolve skill workspace root") { + t.Fatalf("loadSkillCommandContext(getwd failure) error = %v, want wrapped workspace-root context", err) + } + + rendered, err := renderSkillXML(&skills.Skill{ + Meta: skills.SkillMeta{Name: "xml-skill"}, + }, "&body", []string{"refs/checklist.md"}) + if err != nil { + t.Fatalf("renderSkillXML() error = %v", err) + } + if !strings.Contains(rendered, "<skill>&body</skill>") { + t.Fatalf("renderSkillXML() = %q, want escaped body", rendered) + } + if got := formatSkillMetadataValue(map[string]any{"alpha": 1}); got != `{"alpha":1}` { t.Fatalf("formatSkillMetadataValue(map) = %q, want compact JSON", got) } diff --git a/internal/cli/skill_workspace.go b/internal/cli/skill_workspace.go new file mode 100644 index 000000000..7743a871a --- /dev/null +++ b/internal/cli/skill_workspace.go @@ -0,0 +1,562 @@ +package cli + +import ( + "context" + "encoding/json" + "errors" + "fmt" + "io/fs" + "os" + "path" + "path/filepath" + "sort" + "strings" + + aghconfig "github.com/pedronauck/agh/internal/config" + "github.com/pedronauck/agh/internal/skills" + skillbundled "github.com/pedronauck/agh/internal/skills/bundled" + "github.com/pedronauck/agh/internal/store/globaldb" + workspacepkg "github.com/pedronauck/agh/internal/workspace" +) + +func loadSkillCommandContext(ctx context.Context, deps commandDeps) (skillCommandContext, error) { + runtime, err := loadRuntimeContext(deps) + if err != nil { + return skillCommandContext{}, fmt.Errorf("cli: load skill runtime context: %w", err) + } + + workspace, err := resolveCLIWorkspaceRoot(deps) + if err != nil { + return skillCommandContext{}, fmt.Errorf("cli: resolve skill workspace root: %w", err) + } + + userAgentsDir, err := aghconfig.ResolveUserAgentsSkillsDir(deps.getenv) + if err != nil { + return skillCommandContext{}, fmt.Errorf("cli: resolve user agent skills directory: %w", err) + } + + registry := skills.NewRegistry(skills.RegistryConfig{ + BundledFS: skillbundled.FS(), + UserSkillsDir: runtime.HomePaths.SkillsDir, + UserAgentsDir: userAgentsDir, + DisabledSkills: append([]string(nil), runtime.Config.Skills.DisabledSkills...), + }) + if err := registry.LoadAll(ctx); err != nil { + return skillCommandContext{}, fmt.Errorf("cli: load skill registry: %w", err) + } + + resolvedWorkspace, err := resolveSkillWorkspace(ctx, runtime, workspace) + if err != nil { + return skillCommandContext{}, fmt.Errorf("cli: resolve skill workspace: %w", err) + } + + skillList, err := registry.ForWorkspace(ctx, resolvedWorkspace) + if err != nil { + return skillCommandContext{}, fmt.Errorf("cli: load workspace skills: %w", err) + } + + return skillCommandContext{ + workspace: workspace, + bundledFS: skillbundled.FS(), + registry: registry, + skills: skillList, + }, nil +} + +func resolveSkillWorkspace(ctx context.Context, runtime runtimeContext, workspaceRoot string) (workspacepkg.ResolvedWorkspace, error) { + fallback, err := cliResolvedWorkspace(workspaceRoot) + if err != nil { + return workspacepkg.ResolvedWorkspace{}, err + } + + if strings.TrimSpace(workspaceRoot) == "" { + return fallback, nil + } + + if _, err := os.Stat(runtime.HomePaths.DatabaseFile); err != nil { + if errors.Is(err, os.ErrNotExist) { + return fallback, nil + } + return workspacepkg.ResolvedWorkspace{}, fmt.Errorf("cli: stat workspace database %q: %w", runtime.HomePaths.DatabaseFile, err) + } + + resolved, err := resolveRegisteredSkillWorkspace(ctx, runtime, workspaceRoot) + if err != nil { + if errors.Is(err, workspacepkg.ErrWorkspaceNotFound) { + return fallback, nil + } + return workspacepkg.ResolvedWorkspace{}, err + } + + return resolved, nil +} + +func resolveRegisteredSkillWorkspace(ctx context.Context, runtime runtimeContext, workspaceRoot string) (resolved workspacepkg.ResolvedWorkspace, err error) { + globalDB, err := globaldb.OpenGlobalDB(ctx, runtime.HomePaths.DatabaseFile) + if err != nil { + return workspacepkg.ResolvedWorkspace{}, fmt.Errorf("cli: open workspace database %q: %w", runtime.HomePaths.DatabaseFile, err) + } + defer func() { + if closeErr := globalDB.Close(ctx); closeErr != nil { + closeErr = fmt.Errorf("cli: close workspace database %q: %w", runtime.HomePaths.DatabaseFile, closeErr) + if err == nil { + err = closeErr + return + } + err = errors.Join(err, closeErr) + } + }() + + resolver, err := workspacepkg.NewResolver( + globalDB, + workspacepkg.WithHomePaths(runtime.HomePaths), + workspacepkg.WithConfigLoader(func(rootDir string) (aghconfig.Config, error) { + return aghconfig.LoadForHome(runtime.HomePaths, aghconfig.WithWorkspaceRoot(rootDir)) + }), + ) + if err != nil { + return workspacepkg.ResolvedWorkspace{}, fmt.Errorf("cli: create workspace resolver: %w", err) + } + + resolved, err = resolver.Resolve(ctx, workspaceRoot) + if err != nil { + return workspacepkg.ResolvedWorkspace{}, fmt.Errorf("cli: resolve workspace %q: %w", workspaceRoot, err) + } + + return resolved, nil +} + +func cliResolvedWorkspace(root string) (workspacepkg.ResolvedWorkspace, error) { + workspaceRoot := strings.TrimSpace(root) + if workspaceRoot == "" { + return workspacepkg.ResolvedWorkspace{}, nil + } + + skillRoots, err := os.ReadDir(filepath.Join(workspaceRoot, aghconfig.DirName, aghconfig.SkillsDirName)) + if err != nil { + if errors.Is(err, os.ErrNotExist) { + return workspacepkg.ResolvedWorkspace{ + Workspace: workspacepkg.Workspace{RootDir: workspaceRoot}, + }, nil + } + return workspacepkg.ResolvedWorkspace{}, fmt.Errorf("cli: read workspace skills %q: %w", workspaceRoot, err) + } + + skillPaths := make([]workspacepkg.SkillPath, 0, len(skillRoots)) + for _, entry := range skillRoots { + if !entry.IsDir() { + continue + } + + skillDir := filepath.Join(workspaceRoot, aghconfig.DirName, aghconfig.SkillsDirName, entry.Name()) + skillFile := filepath.Join(skillDir, skillMarkdownFileName) + if _, err := os.Stat(skillFile); err != nil { + if errors.Is(err, os.ErrNotExist) { + continue + } + return workspacepkg.ResolvedWorkspace{}, fmt.Errorf("cli: inspect workspace skill %q: %w", skillFile, err) + } + + skillPaths = append(skillPaths, workspacepkg.SkillPath{ + Dir: skillDir, + Source: "workspace", + }) + } + + return workspacepkg.ResolvedWorkspace{ + Workspace: workspacepkg.Workspace{RootDir: workspaceRoot}, + Skills: skillPaths, + }, nil +} + +func resolveCLIWorkspaceRoot(deps commandDeps) (string, error) { + workspace, err := currentWorkingDirectory(deps) + if err != nil { + return "", err + } + + absWorkspace, err := filepath.Abs(workspace) + if err != nil { + return "", fmt.Errorf("cli: resolve workspace root %q: %w", workspace, err) + } + return absWorkspace, nil +} + +func skillListItems(allSkills []*skills.Skill, sourceFilter string) ([]skillListItem, error) { + filter, err := normalizeSkillSourceFilter(sourceFilter) + if err != nil { + return nil, err + } + + items := make([]skillListItem, 0, len(allSkills)) + for _, skill := range allSkills { + if skill == nil { + continue + } + + source := skillSourceLabel(skill.Source) + if filter != "" && source != filter { + continue + } + + items = append(items, skillListItem{ + Name: skill.Meta.Name, + Description: skill.Meta.Description, + Source: source, + Enabled: skill.Enabled, + }) + } + + return items, nil +} + +func normalizeSkillSourceFilter(sourceFilter string) (string, error) { + filter := strings.ToLower(strings.TrimSpace(sourceFilter)) + switch filter { + case "": + return "", nil + case "bundled", "marketplace", "user", "additional", "workspace": + return filter, nil + case "agents", ".agents": + return "additional", nil + default: + return "", fmt.Errorf("cli: invalid skill source %q", sourceFilter) + } +} + +func findSkillByName(allSkills []*skills.Skill, name string) (*skills.Skill, error) { + skillName := strings.TrimSpace(name) + if skillName == "" { + return nil, errors.New("skill name is required") + } + + for _, skill := range allSkills { + if skill == nil { + continue + } + if skill.Meta.Name == skillName { + return skill, nil + } + } + + return nil, fmt.Errorf("skill %q not found", skillName) +} + +func listSkillResources(skill *skills.Skill, bundledFS fs.FS) ([]string, error) { + if skill == nil { + return nil, errors.New("skill is required") + } + + resources := make([]string, 0) + switch skill.Source { + case skills.SourceBundled: + if bundledFS == nil { + return nil, errors.New("bundled skills filesystem is required") + } + + root := strings.TrimSpace(skill.Dir) + if root == "" { + return []string{}, nil + } + + err := fs.WalkDir(bundledFS, root, func(resourcePath string, entry fs.DirEntry, walkErr error) error { + if walkErr != nil { + return walkErr + } + if entry.IsDir() { + return nil + } + + relative := strings.TrimPrefix(resourcePath, root+"/") + if resourcePath == root { + relative = skillMarkdownFileName + } + if relative == skillMarkdownFileName { + return nil + } + + resources = append(resources, relative) + return nil + }) + if err != nil { + return nil, fmt.Errorf("cli: list bundled skill resources for %q: %w", skill.Meta.Name, err) + } + default: + root := strings.TrimSpace(skill.Dir) + if root == "" { + return []string{}, nil + } + + err := filepath.WalkDir(root, func(resourcePath string, entry fs.DirEntry, walkErr error) error { + if walkErr != nil { + return walkErr + } + if entry.IsDir() { + return nil + } + + relative, err := filepath.Rel(root, resourcePath) + if err != nil { + return err + } + if filepath.Clean(relative) == skillMarkdownFileName { + return nil + } + + resources = append(resources, filepath.ToSlash(relative)) + return nil + }) + if err != nil { + return nil, fmt.Errorf("cli: list skill resources for %q: %w", skill.Meta.Name, err) + } + } + + sort.Strings(resources) + return resources, nil +} + +func readSkillResource(skill *skills.Skill, bundledFS fs.FS, relativePath string) (string, error) { + if skill == nil { + return "", errors.New("skill is required") + } + + switch skill.Source { + case skills.SourceBundled: + if bundledFS == nil { + return "", errors.New("bundled skills filesystem is required") + } + + cleanPath, err := cleanBundledSkillRelativePath(relativePath) + if err != nil { + return "", err + } + root := strings.TrimSpace(skill.Dir) + if root == "" { + return "", errors.New("skill directory is required") + } + + content, err := fs.ReadFile(bundledFS, path.Join(root, cleanPath)) + if err != nil { + return "", fmt.Errorf("cli: read bundled skill file %q: %w", cleanPath, err) + } + return string(content), nil + default: + cleanPath, err := cleanFilesystemSkillRelativePath(relativePath) + if err != nil { + return "", err + } + + root := strings.TrimSpace(skill.Dir) + if root == "" { + return "", errors.New("skill directory is required") + } + + targetPath := filepath.Join(root, cleanPath) + absRoot, err := filepath.Abs(root) + if err != nil { + return "", fmt.Errorf("cli: resolve skill directory %q: %w", root, err) + } + resolvedRoot, err := filepath.EvalSymlinks(absRoot) + if err != nil { + return "", fmt.Errorf("cli: resolve skill directory %q: %w", absRoot, err) + } + absTarget, err := filepath.Abs(targetPath) + if err != nil { + return "", fmt.Errorf("cli: resolve skill file %q: %w", targetPath, err) + } + resolvedTarget, err := filepath.EvalSymlinks(absTarget) + if err != nil { + return "", fmt.Errorf("cli: resolve skill file %q: %w", absTarget, err) + } + + relativeToRoot, err := filepath.Rel(resolvedRoot, resolvedTarget) + if err != nil { + return "", fmt.Errorf("cli: resolve skill file %q within %q: %w", resolvedTarget, resolvedRoot, err) + } + if relativeToRoot == ".." || strings.HasPrefix(relativeToRoot, ".."+string(filepath.Separator)) { + return "", errors.New("skill file path must stay within the skill directory") + } + + content, err := os.ReadFile(resolvedTarget) + if err != nil { + return "", fmt.Errorf("cli: read skill file %q: %w", cleanPath, err) + } + return string(content), nil + } +} + +func cleanBundledSkillRelativePath(relativePath string) (string, error) { + cleaned := path.Clean(strings.TrimSpace(strings.ReplaceAll(relativePath, "\\", "/"))) + switch { + case cleaned == ".", cleaned == "": + return "", errors.New("skill file path is required") + case strings.HasPrefix(cleaned, "/"): + return "", errors.New("skill file path must be relative") + case cleaned == "..", strings.HasPrefix(cleaned, "../"): + return "", errors.New("skill file path must stay within the skill directory") + default: + return cleaned, nil + } +} + +func cleanFilesystemSkillRelativePath(relativePath string) (string, error) { + cleaned := filepath.Clean(strings.TrimSpace(relativePath)) + switch { + case cleaned == ".", cleaned == "": + return "", errors.New("skill file path is required") + case filepath.IsAbs(cleaned): + return "", errors.New("skill file path must be relative") + case cleaned == "..", strings.HasPrefix(cleaned, ".."+string(filepath.Separator)): + return "", errors.New("skill file path must stay within the skill directory") + default: + return cleaned, nil + } +} + +func renderSkillXML(skill *skills.Skill, content string, resources []string) (string, error) { + if skill == nil { + return "", errors.New("skill is required") + } + + var builder strings.Builder + builder.WriteString(``) + builder.WriteString("\n") + builder.WriteString(skillXMLTextReplacer.Replace(content)) + if !strings.HasSuffix(content, "\n") { + builder.WriteString("\n") + } + builder.WriteString("\n\n") + for _, resource := range resources { + builder.WriteString(" ") + builder.WriteString(skillXMLTextReplacer.Replace(resource)) + builder.WriteString("\n") + } + builder.WriteString("\n") + builder.WriteString("") + return builder.String(), nil +} + +func normalizeSkillName(name string) (string, error) { + trimmed := strings.TrimSpace(name) + switch { + case trimmed == "": + return "", errors.New("skill name is required") + case trimmed == ".", trimmed == "..": + return "", errors.New("skill name must not be a relative path segment") + case filepath.IsAbs(trimmed): + return "", errors.New("skill name must be relative") + case strings.Contains(trimmed, "/"), strings.Contains(trimmed, `\`): + return "", errors.New("skill name must not include path separators") + case !validSkillNamePattern.MatchString(trimmed): + return "", errors.New("skill name must contain only letters, numbers, dots, underscores, and hyphens") + default: + return trimmed, nil + } +} + +func defaultSkillTemplate(name string) string { + trimmedName := strings.TrimSpace(name) + if trimmedName == "" { + trimmedName = defaultSkillName + } + + return fmt.Sprintf(`--- +name: %q +description: Describe when to use this skill. +--- + +# %s + +Describe the workflow, constraints, and expected outcome for this skill. +`, trimmedName, titleizeSkillName(trimmedName)) +} + +func titleizeSkillName(name string) string { + parts := strings.FieldsFunc(name, func(r rune) bool { + return r == '-' || r == '_' || r == ' ' + }) + if len(parts) == 0 { + return "New Skill" + } + + titled := make([]string, 0, len(parts)) + for _, part := range parts { + if part == "" { + continue + } + + lower := strings.ToLower(part) + titled = append(titled, strings.ToUpper(lower[:1])+lower[1:]) + } + if len(titled) == 0 { + return "New Skill" + } + return strings.Join(titled, " ") +} + +func skillSourceLabel(source skills.SkillSource) string { + switch source { + case skills.SourceBundled: + return "bundled" + case skills.SourceMarketplace: + return "marketplace" + case skills.SourceUser: + return "user" + case skills.SourceAdditional: + return "additional" + case skills.SourceWorkspace: + return "workspace" + default: + return "unknown" + } +} + +func sortedSkillMetadataEntries(metadata map[string]any) []keyValue { + if len(metadata) == 0 { + return nil + } + + keys := make([]string, 0, len(metadata)) + for key := range metadata { + keys = append(keys, key) + } + sort.Strings(keys) + + entries := make([]keyValue, 0, len(keys)) + for _, key := range keys { + entries = append(entries, keyValue{ + Label: key, + Value: formatSkillMetadataValue(metadata[key]), + }) + } + return entries +} + +func formatSkillMetadataValue(value any) string { + switch typed := value.(type) { + case nil: + return "" + case string: + return typed + default: + payload, err := json.Marshal(typed) + if err != nil { + return fmt.Sprint(typed) + } + return compactJSON(payload) + } +} + +func cloneMetadata(metadata map[string]any) map[string]any { + if metadata == nil { + return nil + } + + clone := make(map[string]any, len(metadata)) + for key, value := range metadata { + clone[key] = value + } + return clone +} diff --git a/internal/config/config.go b/internal/config/config.go index 0a7010d03..07a33188f 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -154,15 +154,13 @@ func WithWorkspaceRoot(root string) LoadOption { } } -// WithoutDotEnv disables automatic `.env` loading during config load. -func WithoutDotEnv() LoadOption { +func withoutDotEnv() LoadOption { return func(opts *loadOptions) { opts.skipDotEnv = true } } -// WithoutValidation returns the merged config without validating it. -func WithoutValidation() LoadOption { +func withoutValidation() LoadOption { return func(opts *loadOptions) { opts.skipValidate = true } @@ -244,8 +242,7 @@ func loadWithHome(homePaths HomePaths, workspaceRoot string, skipValidate bool) return cfg, nil } -// Default returns the built-in default configuration for the resolved AGH home. -func Default() (Config, error) { +func defaultConfig() (Config, error) { homePaths, err := ResolveHomePaths() if err != nil { return Config{}, err diff --git a/internal/config/config_test.go b/internal/config/config_test.go index 07179736a..85dd38908 100644 --- a/internal/config/config_test.go +++ b/internal/config/config_test.go @@ -766,7 +766,7 @@ func TestLoadWithoutDotEnvOptionIgnoresDotEnv(t *testing.T) { t.Fatalf("ResolveHomePathsFrom() error = %v", err) } - cfg, err := Load(WithWorkspaceRoot(workspaceRoot), WithoutDotEnv()) + cfg, err := Load(WithWorkspaceRoot(workspaceRoot), withoutDotEnv()) if err != nil { t.Fatalf("Load() error = %v", err) } @@ -794,7 +794,7 @@ host = "localhost" port = 0 `) - cfg, err := Load(WithWorkspaceRoot(workspaceRoot), WithoutValidation()) + cfg, err := Load(WithWorkspaceRoot(workspaceRoot), withoutValidation()) if err != nil { t.Fatalf("Load() error = %v", err) } @@ -833,27 +833,27 @@ func TestLoadMissingConfigReturnsDefaults(t *testing.T) { } } -func TestDefaultUsesResolvedHomePaths(t *testing.T) { +func TestDefaultConfigUsesResolvedHomePaths(t *testing.T) { t.Setenv("AGH_HOME", "") - cfg, err := Default() + cfg, err := defaultConfig() if err != nil { - t.Fatalf("Default() error = %v", err) + t.Fatalf("defaultConfig() error = %v", err) } if cfg.HTTP.Port != 2123 || cfg.Defaults.Agent != DefaultAgentName { - t.Fatalf("Default() = %#v", cfg) + t.Fatalf("defaultConfig() = %#v", cfg) } if cfg.Permissions.Mode != PermissionModeApproveAll { - t.Fatalf("Default() Permissions.Mode = %q, want %q", cfg.Permissions.Mode, PermissionModeApproveAll) + t.Fatalf("defaultConfig() Permissions.Mode = %q, want %q", cfg.Permissions.Mode, PermissionModeApproveAll) } if cfg.Memory.Dream.Agent != DefaultAgentName { - t.Fatalf("Default() Memory.Dream.Agent = %q, want %q", cfg.Memory.Dream.Agent, DefaultAgentName) + t.Fatalf("defaultConfig() Memory.Dream.Agent = %q, want %q", cfg.Memory.Dream.Agent, DefaultAgentName) } if !cfg.Skills.Enabled { - t.Fatal("Default() Skills.Enabled = false, want true") + t.Fatal("defaultConfig() Skills.Enabled = false, want true") } if got, want := cfg.Skills.PollInterval, 3*time.Second; got != want { - t.Fatalf("Default() Skills.PollInterval = %s, want %s", got, want) + t.Fatalf("defaultConfig() Skills.PollInterval = %s, want %s", got, want) } } diff --git a/internal/config/provider.go b/internal/config/provider.go index 6c04a7cb7..934fdd98a 100644 --- a/internal/config/provider.go +++ b/internal/config/provider.go @@ -82,7 +82,7 @@ func (c Config) ResolveProvider(name string) (ProviderConfig, error) { resolved, hasBuiltin := builtinProviders[providerName] if override, ok := c.Providers[providerName]; ok { - resolved = MergeProvider(resolved, override) + resolved = mergeProvider(resolved, override) } if !hasBuiltin { @@ -161,8 +161,7 @@ func (c Config) ResolveAgent(agent AgentDef) (ResolvedAgent, error) { return resolved, nil } -// MergeProvider merges an override provider config into a base provider config. -func MergeProvider(base ProviderConfig, override ProviderConfig) ProviderConfig { +func mergeProvider(base ProviderConfig, override ProviderConfig) ProviderConfig { merged := cloneProvider(base) if strings.TrimSpace(override.Command) != "" { merged.Command = override.Command diff --git a/internal/daemon/boot.go b/internal/daemon/boot.go index cb88f563c..4955dacfe 100644 --- a/internal/daemon/boot.go +++ b/internal/daemon/boot.go @@ -4,6 +4,7 @@ import ( "context" "errors" "fmt" + "log/slog" "os" "strings" "time" @@ -21,27 +22,116 @@ import ( workspacepkg "github.com/pedronauck/agh/internal/workspace" ) +type bootState struct { + cfg aghconfig.Config + logger *slog.Logger + closeLogger func() error + lock *Lock + memoryStore *memory.Store + skillsRegistry *skills.Registry + mcpResolver *skills.MCPResolver + dreamSvc consolidation.Service + dreamRuntime *consolidation.Runtime + globalMemoryDir string + promptAssembler session.PromptAssembler + notifier *hooksNotifier + registry Registry + workspaceResolver workspacepkg.WorkspaceResolver + sessions SessionManager + observer Observer + hooks hookRuntime + httpServer Server + udsServer Server + skillsCancel context.CancelFunc + skillsDone chan struct{} + startedAt time.Time + info Info + deps RuntimeDeps +} + +type bootCleanup struct { + fns []func(context.Context) error +} + +func (c *bootCleanup) add(fn func(context.Context) error) { + if fn == nil { + return + } + c.fns = append(c.fns, fn) +} + +func (c *bootCleanup) run(err *error) { + if err == nil || *err == nil { + return + } + + var cleanupErrs []error + for i := len(c.fns) - 1; i >= 0; i-- { + if cleanupErr := c.fns[i](context.Background()); cleanupErr != nil { + cleanupErrs = append(cleanupErrs, cleanupErr) + } + } + *err = errors.Join(*err, errors.Join(cleanupErrs...)) +} + func (d *Daemon) boot(ctx context.Context) (err error) { if ctx == nil { return errors.New("daemon: boot context is required") } + if err := d.beginBoot(); err != nil { + return err + } + defer d.finishBoot(&err) + + state := &bootState{} + cleanup := &bootCleanup{} + defer cleanup.run(&err) + + if err := d.bootConfig(state, cleanup); err != nil { + return err + } + if err := d.bootPromptProviders(ctx, state); err != nil { + return err + } + if err := d.bootRuntime(ctx, state, cleanup); err != nil { + return err + } + if err := d.bootHooks(ctx, state, cleanup); err != nil { + return err + } + if err := d.bootServers(ctx, state, cleanup); err != nil { + return err + } + if err := d.bootFinalize(ctx, state); err != nil { + return err + } + + d.publishBootState(state) + return nil +} + +func (d *Daemon) beginBoot() error { d.mu.Lock() + defer d.mu.Unlock() + if d.booting || d.lock != nil || d.registry != nil || d.sessions != nil || d.observer != nil { - d.mu.Unlock() return errors.New("daemon: already booted") } d.booting = true + return nil +} + +func (d *Daemon) finishBoot(err *error) { + if err == nil || *err == nil { + return + } + d.mu.Lock() + d.booting = false d.mu.Unlock() - defer func() { - if err == nil { - return - } - d.mu.Lock() - d.booting = false - d.mu.Unlock() - }() +} +func (d *Daemon) bootConfig(state *bootState, cleanup *bootCleanup) error { cfg, err := d.loadConfig() if err != nil { return err @@ -68,74 +158,62 @@ func (d *Daemon) boot(ctx context.Context) (err error) { closeLogger = func() error { return nil } } - var ( - memoryStore *memory.Store - skillsRegistry *skills.Registry - mcpResolver *skills.MCPResolver - dreamSvc consolidation.Service - dreamRuntime *consolidation.Runtime - globalMemoryDir string - skillsCancel context.CancelFunc - skillsDone chan struct{} - prependProviders []session.PromptProvider - appendProviders []session.PromptProvider - ) - if cfg.Memory.Enabled { - globalMemoryDir = strings.TrimSpace(cfg.Memory.GlobalDir) - if globalMemoryDir == "" { - globalMemoryDir = d.homePaths.MemoryDir + state.cfg = cfg + state.logger = logger + state.closeLogger = closeLogger + cleanup.add(func(context.Context) error { + return closeLogger() + }) + return nil +} + +func (d *Daemon) bootPromptProviders(ctx context.Context, state *bootState) error { + var prependProviders []session.PromptProvider + var appendProviders []session.PromptProvider + + if state.cfg.Memory.Enabled { + state.globalMemoryDir = strings.TrimSpace(state.cfg.Memory.GlobalDir) + if state.globalMemoryDir == "" { + state.globalMemoryDir = d.homePaths.MemoryDir } - memoryStore = memory.NewStore(globalMemoryDir) - if err := memoryStore.EnsureDirs(); err != nil { + state.memoryStore = memory.NewStore(state.globalMemoryDir) + if err := state.memoryStore.EnsureDirs(); err != nil { return fmt.Errorf("daemon: ensure memory store directories: %w", err) } - prependProviders = append(prependProviders, memory.NewAssembler(memoryStore)) + prependProviders = append(prependProviders, memory.NewAssembler(state.memoryStore)) } - cleanupFns := make([]func(context.Context) error, 0, 8) - defer func() { - if err == nil { - return - } - var cleanupErrs []error - for i := len(cleanupFns) - 1; i >= 0; i-- { - if cleanupErr := cleanupFns[i](context.Background()); cleanupErr != nil { - cleanupErrs = append(cleanupErrs, cleanupErr) - } - } - err = errors.Join(err, errors.Join(cleanupErrs...)) - }() - cleanupFns = append(cleanupFns, func(context.Context) error { - return closeLogger() - }) - - if cfg.Skills.Enabled { - skillsCfg, err := d.skillsRegistryConfig(cfg) + if state.cfg.Skills.Enabled { + skillsCfg, err := d.skillsRegistryConfig(state.cfg) if err != nil { return err } - skillsRegistry = skills.NewRegistry(skillsCfg, skills.WithLogger(logger)) - if err := skillsRegistry.LoadAll(ctx); err != nil { + state.skillsRegistry = skills.NewRegistry(skillsCfg, skills.WithLogger(state.logger)) + if err := state.skillsRegistry.LoadAll(ctx); err != nil { return fmt.Errorf("daemon: load skills registry: %w", err) } - mcpResolver = skills.NewMCPResolver(cfg.Skills, logger) - appendProviders = append(appendProviders, skills.NewCatalogProvider(skillsRegistry)) + state.mcpResolver = skills.NewMCPResolver(state.cfg.Skills, state.logger) + appendProviders = append(appendProviders, skills.NewCatalogProvider(state.skillsRegistry)) } - promptAssembler := NewComposedAssembler( + state.promptAssembler = NewComposedAssembler( WithPrependPromptProviders(prependProviders...), WithAppendPromptProviders(appendProviders...), ) + return nil +} +func (d *Daemon) bootRuntime(ctx context.Context, state *bootState, cleanup *bootCleanup) error { pid := d.pid() lock, err := d.acquireLock(d.homePaths.DaemonLock, pid) if err != nil { return err } - cleanupFns = append(cleanupFns, func(context.Context) error { + cleanup.add(func(context.Context) error { return lock.Release() }) + state.lock = lock stalePID := lock.StalePID() if stalePID == 0 { @@ -144,16 +222,16 @@ func (d *Daemon) boot(ctx context.Context) (err error) { case readErr == nil && existingInfo.PID > 0 && existingInfo.PID != pid && !d.processAlive(existingInfo.PID): stalePID = existingInfo.PID case readErr != nil && !errors.Is(readErr, os.ErrNotExist): - logger.Warn("daemon: read stale daemon info failed", "path", d.homePaths.DaemonInfo, "error", readErr) + state.logger.Warn("daemon: read stale daemon info failed", "path", d.homePaths.DaemonInfo, "error", readErr) } } if stalePID > 0 { if cleanupErr := d.cleanupOrphans(ctx, stalePID); cleanupErr != nil { - logger.Warn("daemon: cleanup orphan processes failed", "stale_pid", stalePID, "error", cleanupErr) + state.logger.Warn("daemon: cleanup orphan processes failed", "stale_pid", stalePID, "error", cleanupErr) } } - if err := removeStaleSocket(cfg.Daemon.Socket); err != nil { + if err := removeStaleSocket(state.cfg.Daemon.Socket); err != nil { return err } @@ -161,14 +239,14 @@ func (d *Daemon) boot(ctx context.Context) (err error) { if err != nil { return fmt.Errorf("daemon: open global database %q: %w", d.homePaths.DatabaseFile, err) } - cleanupFns = append(cleanupFns, func(ctx context.Context) error { + cleanup.add(func(ctx context.Context) error { return registry.Close(ctx) }) workspaceResolver, err := workspacepkg.NewResolver( registry, workspacepkg.WithHomePaths(d.homePaths), - workspacepkg.WithLogger(logger), + workspacepkg.WithLogger(state.logger), workspacepkg.WithConfigLoader(func(rootDir string) (aghconfig.Config, error) { return aghconfig.LoadForHome(d.homePaths, aghconfig.WithWorkspaceRoot(rootDir)) }), @@ -177,33 +255,42 @@ func (d *Daemon) boot(ctx context.Context) (err error) { return fmt.Errorf("daemon: create workspace resolver: %w", err) } - if cfg.Memory.Enabled && cfg.Memory.Dream.Enabled { - dreamSvc = d.newDreamService( - memory.WithMemoryStore(memoryStore), + if state.cfg.Memory.Enabled && state.cfg.Memory.Dream.Enabled { + state.dreamSvc = d.newDreamService( + memory.WithMemoryStore(state.memoryStore), memory.WithSessionsDir(d.homePaths.SessionsDir), - memory.WithMinHours(cfg.Memory.Dream.MinHours), - memory.WithMinSessions(cfg.Memory.Dream.MinSessions), - memory.WithLogger(logger), + memory.WithMinHours(state.cfg.Memory.Dream.MinHours), + memory.WithMinSessions(state.cfg.Memory.Dream.MinSessions), + memory.WithLogger(state.logger), memory.WithWorkspaceResolver(workspaceResolver), ) } - startedAt := d.now().UTC() - notifier := newHooksNotifier(logger, d.now) + state.startedAt = d.now().UTC() + state.notifier = newHooksNotifier(state.logger, d.now) + var skillRegistryDep session.SkillRegistry - if skillsRegistry != nil { - skillRegistryDep = skillsRegistry + if state.skillsRegistry != nil { + skillRegistryDep = state.skillsRegistry } var mcpResolverDep session.MCPResolver - if mcpResolver != nil { - mcpResolverDep = mcpResolver + if state.mcpResolver != nil { + mcpResolverDep = state.mcpResolver } + sessions, err := d.newSessionManager(ctx, SessionManagerDeps{ - HomePaths: d.homePaths, - Logger: logger, - Notifier: notifier, - Hooks: notifier, - PromptAssembler: promptAssembler, + HomePaths: d.homePaths, + Logger: state.logger, + Notifier: state.notifier, + Hooks: session.HookSet{ + Session: state.notifier, + Prompt: state.notifier, + Events: state.notifier, + Agent: state.notifier, + Conversation: state.notifier, + Compaction: state.notifier, + }, + PromptAssembler: state.promptAssembler, SkillRegistry: skillRegistryDep, MCPResolver: mcpResolverDep, WorkspaceResolver: workspaceResolver, @@ -212,127 +299,148 @@ func (d *Daemon) boot(ctx context.Context) (err error) { return fmt.Errorf("daemon: create session manager: %w", err) } - dreamSpawner := consolidation.NewSessionSpawner(sessions, workspaceResolver, cfg, globalMemoryDir) + dreamSpawner := consolidation.NewSessionSpawner(sessions, workspaceResolver, state.cfg, state.globalMemoryDir) var dreamTrigger DreamTrigger - if dreamSvc != nil { - lockPath := memory.ConsolidationLockPath(globalMemoryDir) - dreamRuntime = consolidation.NewRuntime( - cfg.Memory.Dream.Enabled, - dreamSvc, + if state.dreamSvc != nil { + lockPath := memory.ConsolidationLockPath(state.globalMemoryDir) + state.dreamRuntime = consolidation.NewRuntime( + state.cfg.Memory.Dream.Enabled, + state.dreamSvc, dreamSpawner, - cfg.Memory.Dream.CheckInterval, - logger, + state.cfg.Memory.Dream.CheckInterval, + state.logger, func() (time.Time, error) { return memory.NewConsolidationLock(lockPath).LastConsolidatedAt() }, ) - dreamTrigger = dreamRuntime + dreamTrigger = state.dreamRuntime } var skillsRegistryAPI core.SkillsRegistry - if skillsRegistry != nil { - skillsRegistryAPI = skillsRegistry + if state.skillsRegistry != nil { + skillsRegistryAPI = state.skillsRegistry } - deps := RuntimeDeps{ - Config: cfg, + state.deps = RuntimeDeps{ + Config: state.cfg, HomePaths: d.homePaths, - Logger: logger, + Logger: state.logger, Sessions: sessions, Registry: registry, - MemoryStore: memoryStore, + MemoryStore: state.memoryStore, WorkspaceResolver: workspaceResolver, WorkspaceService: workspaceResolver, SkillsRegistry: skillsRegistryAPI, DreamTrigger: dreamTrigger, - StartedAt: startedAt, + StartedAt: state.startedAt, } - observer, err := d.newObserver(ctx, deps) + observer, err := d.newObserver(ctx, state.deps) if err != nil { return fmt.Errorf("daemon: create observer: %w", err) } - deps.Observer = observer - nativeDecls, nativeExecutors := daemonNativeHooks(observer, dreamRuntime) + state.registry = registry + state.workspaceResolver = workspaceResolver + state.sessions = sessions + state.observer = observer + state.deps.Observer = observer + return nil +} + +func (d *Daemon) bootHooks(ctx context.Context, state *bootState, cleanup *bootCleanup) error { + nativeDecls, nativeExecutors := daemonNativeHooks(state.observer, state.dreamRuntime) hookOptions := []hookspkg.Option{ - hookspkg.WithLogger(logger), + hookspkg.WithLogger(state.logger), hookspkg.WithNow(d.now), - hookspkg.WithDebugPatchAudit(strings.EqualFold(cfg.Log.Level, "debug")), + hookspkg.WithDebugPatchAudit(strings.EqualFold(state.cfg.Log.Level, "debug")), hookspkg.WithExecutorResolver(daemonExecutorResolver(nativeExecutors)), hookspkg.WithNativeDeclarations(nativeDecls), - hookspkg.WithConfigDeclarationProvider(configDeclarationProvider(registry, workspaceResolver, logger)), - hookspkg.WithAgentDeclarationProvider(agentDeclarationProvider(registry, workspaceResolver, logger)), - hookspkg.WithSkillDeclarationProvider(skillDeclarationProvider(skillsRegistry, registry, workspaceResolver, cfg.Skills.AllowedMarketplaceHooks, logger)), + hookspkg.WithConfigDeclarationProvider(configDeclarationProvider(state.registry, state.workspaceResolver, state.logger)), + hookspkg.WithAgentDeclarationProvider(agentDeclarationProvider(state.registry, state.workspaceResolver, state.logger)), + hookspkg.WithSkillDeclarationProvider(skillDeclarationProvider(state.skillsRegistry, state.registry, state.workspaceResolver, state.cfg.Skills.AllowedMarketplaceHooks, state.logger)), } - if sink, ok := observer.(hookspkg.TelemetrySink); ok { + if sink, ok := state.observer.(hookspkg.TelemetrySink); ok { hookOptions = append(hookOptions, hookspkg.WithTelemetrySink(sink)) } + hooks := hookspkg.NewHooks(hookOptions...) if err := hooks.Rebuild(ctx); err != nil { hooks.Close() return fmt.Errorf("daemon: rebuild hooks: %w", err) } - if hookAwareObserver, ok := observer.(interface { + if hookAwareObserver, ok := state.observer.(interface { AttachHooks(observe.HookCatalogSource) }); ok { hookAwareObserver.AttachHooks(hooks) } - notifier.setRuntime(hooks, observer) - cleanupFns = append(cleanupFns, func(context.Context) error { + state.notifier.setRuntime(hooks, state.observer) + cleanup.add(func(context.Context) error { hooks.Close() return nil }) - if skillsRegistry != nil { - skillsCancel, skillsDone = startSkillsWatcher(ctx, skillsRegistry, cfg.Skills.PollInterval, func(refreshCtx context.Context) error { + if state.skillsRegistry != nil { + state.skillsCancel, state.skillsDone = startSkillsWatcher(ctx, state.skillsRegistry, state.cfg.Skills.PollInterval, func(refreshCtx context.Context) error { return hooks.Rebuild(refreshCtx) }) - cleanupFns = append(cleanupFns, func(context.Context) error { - stopSkillsWatcher(skillsCancel, skillsDone) + cleanup.add(func(context.Context) error { + stopSkillsWatcher(state.skillsCancel, state.skillsDone) return nil }) } - httpServer, err := d.httpFactory(ctx, deps) + state.hooks = hooks + return nil +} + +func (d *Daemon) bootServers(ctx context.Context, state *bootState, cleanup *bootCleanup) error { + httpServer, err := d.httpFactory(ctx, state.deps) if err != nil { return fmt.Errorf("daemon: create http server: %w", err) } if err := httpServer.Start(ctx); err != nil { return fmt.Errorf("daemon: start http server: %w", err) } - cleanupFns = append(cleanupFns, func(ctx context.Context) error { + cleanup.add(func(ctx context.Context) error { return httpServer.Shutdown(ctx) }) - udsServer, err := d.udsFactory(ctx, deps) + udsServer, err := d.udsFactory(ctx, state.deps) if err != nil { return fmt.Errorf("daemon: create uds server: %w", err) } if err := udsServer.Start(ctx); err != nil { return fmt.Errorf("daemon: start uds server: %w", err) } - cleanupFns = append(cleanupFns, func(ctx context.Context) error { + cleanup.add(func(ctx context.Context) error { return udsServer.Shutdown(ctx) }) info := Info{ - PID: pid, - Port: resolveDaemonPort(cfg.HTTP.Port, httpServer), - StartedAt: startedAt, + PID: d.pid(), + Port: resolveDaemonPort(state.cfg.HTTP.Port, httpServer), + StartedAt: state.startedAt, } if err := WriteInfo(d.homePaths.DaemonInfo, info); err != nil { return err } - cleanupFns = append(cleanupFns, func(context.Context) error { + cleanup.add(func(context.Context) error { return RemoveInfo(d.homePaths.DaemonInfo) }) - reconcileResult, err := observer.Reconcile(ctx) + state.httpServer = httpServer + state.udsServer = udsServer + state.info = info + return nil +} + +func (d *Daemon) bootFinalize(ctx context.Context, state *bootState) error { + reconcileResult, err := state.observer.Reconcile(ctx) if err != nil { return fmt.Errorf("daemon: reconcile sessions: %w", err) } - logger.Info( + state.logger.Info( "daemon: boot reconciliation complete", "indexed_sessions", len(reconcileResult.Indexed), "orphaned_sessions", len(reconcileResult.Orphaned), @@ -340,37 +448,39 @@ func (d *Daemon) boot(ctx context.Context) (err error) { if d.shouldVerifyBoundaries() { if boundaryErr := d.Boundaries(ctx); boundaryErr != nil { - logger.Warn("daemon: boundary verification warning", "error", boundaryErr) + state.logger.Warn("daemon: boundary verification warning", "error", boundaryErr) } } + return nil +} +func (d *Daemon) publishBootState(state *bootState) { d.mu.Lock() - d.config = cfg - d.logger = logger - d.closeLogger = closeLogger + defer d.mu.Unlock() + + d.config = state.cfg + d.logger = state.logger + d.closeLogger = state.closeLogger d.booting = false - d.lock = lock - d.registry = registry - d.memoryStore = memoryStore - d.sessions = sessions - d.hooks = hooks - d.observer = observer - d.httpServer = httpServer - d.udsServer = udsServer - d.dreamRuntime = dreamRuntime - d.workspaceResolver = workspaceResolver - d.skillsRegistry = skillsRegistry - d.skillsCancel = skillsCancel - d.skillsDone = skillsDone - d.startedAt = startedAt - d.info = info + d.lock = state.lock + d.registry = state.registry + d.memoryStore = state.memoryStore + d.sessions = state.sessions + d.hooks = state.hooks + d.observer = state.observer + d.httpServer = state.httpServer + d.udsServer = state.udsServer + d.dreamRuntime = state.dreamRuntime + d.workspaceResolver = state.workspaceResolver + d.skillsRegistry = state.skillsRegistry + d.skillsCancel = state.skillsCancel + d.skillsDone = state.skillsDone + d.startedAt = state.startedAt + d.info = state.info if !d.readyClosed { close(d.readyCh) d.readyClosed = true } - d.mu.Unlock() - - return nil } func (d *Daemon) skillsRegistryConfig(cfg aghconfig.Config) (skills.RegistryConfig, error) { diff --git a/internal/daemon/daemon.go b/internal/daemon/daemon.go index 089c83c53..4bdf2a0ce 100644 --- a/internal/daemon/daemon.go +++ b/internal/daemon/daemon.go @@ -91,7 +91,7 @@ type SessionManagerDeps struct { HomePaths aghconfig.HomePaths Logger *slog.Logger Notifier session.Notifier - Hooks session.HookDispatcher + Hooks session.HookSet PromptAssembler session.PromptAssembler SkillRegistry session.SkillRegistry MCPResolver session.MCPResolver @@ -228,6 +228,14 @@ func New(opts ...Option) (*Daemon, error) { } } + if err := d.applyDefaults(); err != nil { + return nil, err + } + + return d, nil +} + +func (d *Daemon) applyDefaults() error { if d.now == nil { d.now = func() time.Time { return time.Now().UTC() @@ -251,7 +259,7 @@ func New(opts ...Option) (*Daemon, error) { session.WithLifecycleContext(ctx), session.WithLogger(deps.Logger), session.WithNotifier(deps.Notifier), - session.WithHookDispatcher(deps.Hooks), + session.WithHookSet(deps.Hooks), session.WithPromptAssembler(deps.PromptAssembler), session.WithSkillRegistry(deps.SkillRegistry), session.WithMCPResolver(deps.MCPResolver), @@ -340,7 +348,7 @@ func New(opts ...Option) (*Daemon, error) { d.orphanPollWait = orphanCleanupPollWait } - return d, nil + return nil } // Run boots the daemon, blocks until signal or context cancellation, then performs graceful shutdown. diff --git a/internal/daemon/daemon_integration_test.go b/internal/daemon/daemon_integration_test.go index e9084ccef..bd2ed6fa8 100644 --- a/internal/daemon/daemon_integration_test.go +++ b/internal/daemon/daemon_integration_test.go @@ -472,7 +472,7 @@ body if capturedDeps.Notifier == nil { t.Fatal("boot() did not inject the hooks notifier") } - if capturedDeps.Hooks == nil { + if capturedDeps.Hooks.Session == nil { t.Fatal("boot() did not inject the hooks dispatcher") } @@ -488,10 +488,10 @@ body UpdatedAt: time.Date(2026, 4, 9, 11, 0, 0, 0, time.UTC), } - if _, err := capturedDeps.Hooks.DispatchSessionPostCreate(testutil.Context(t), hookspkg.SessionPostCreatePayload(hookSessionLifecyclePayload(sess, hookspkg.HookSessionPostCreate, time.Now().UTC()))); err != nil { + if _, err := capturedDeps.Hooks.Session.DispatchSessionPostCreate(testutil.Context(t), hookspkg.SessionPostCreatePayload(hookSessionLifecyclePayload(sess, hookspkg.HookSessionPostCreate, time.Now().UTC()))); err != nil { t.Fatalf("DispatchSessionPostCreate() error = %v", err) } - if _, err := capturedDeps.Hooks.DispatchSessionPostStop(testutil.Context(t), hookspkg.SessionPostStopPayload(hookSessionLifecyclePayload(sess, hookspkg.HookSessionPostStop, time.Now().UTC()))); err != nil { + if _, err := capturedDeps.Hooks.Session.DispatchSessionPostStop(testutil.Context(t), hookspkg.SessionPostStopPayload(hookSessionLifecyclePayload(sess, hookspkg.HookSessionPostStop, time.Now().UTC()))); err != nil { t.Fatalf("DispatchSessionPostStop() error = %v", err) } @@ -543,7 +543,7 @@ func TestBootSkillsWatcherRebuildsHooksBeforeNextDispatch(t *testing.T) { t.Fatalf("Shutdown() error = %v", err) } }) - if capturedDeps.Hooks == nil { + if capturedDeps.Hooks.Session == nil { t.Fatal("boot() did not inject the hooks dispatcher") } @@ -582,7 +582,7 @@ body UpdatedAt: time.Date(2026, 4, 9, 12, 0, 0, 0, time.UTC), } - if _, err := capturedDeps.Hooks.DispatchSessionPostCreate(testutil.Context(t), hookspkg.SessionPostCreatePayload(hookSessionLifecyclePayload(sess, hookspkg.HookSessionPostCreate, time.Now().UTC()))); err != nil { + if _, err := capturedDeps.Hooks.Session.DispatchSessionPostCreate(testutil.Context(t), hookspkg.SessionPostCreatePayload(hookSessionLifecyclePayload(sess, hookspkg.HookSessionPostCreate, time.Now().UTC()))); err != nil { t.Fatalf("DispatchSessionPostCreate() error = %v", err) } assertLifecycleHookPayload(t, outputPath, hookspkg.HookSessionPostCreate, resolvedWorkspace) diff --git a/internal/daemon/daemon_test.go b/internal/daemon/daemon_test.go index ceb7d1601..7d772ffa1 100644 --- a/internal/daemon/daemon_test.go +++ b/internal/daemon/daemon_test.go @@ -1349,7 +1349,7 @@ func TestSessionStopNotifierQueuesDreamCheck(t *testing.T) { return spawn(ctx, "memory-consolidation", "session-stop prompt", workspace) }, } - var dispatcher session.HookDispatcher + var dispatcher session.HookSet d := newTestDaemon(t, homePaths, cfg) d.newSessionManager = func(_ context.Context, deps SessionManagerDeps) (SessionManager, error) { @@ -1381,12 +1381,12 @@ func TestSessionStopNotifierQueuesDreamCheck(t *testing.T) { defer d.mu.Unlock() return d.dreamRuntime != nil }) - if dispatcher == nil { - t.Fatal("session manager hook dispatcher = nil") + if dispatcher.Session == nil { + t.Fatal("session manager hook set = nil") } resolved := resolveDaemonWorkspace(t, d.workspaceResolver, workspace) - if _, err := dispatcher.DispatchSessionPostStop(context.Background(), hookspkg.SessionPostStopPayload{ + if _, err := dispatcher.Session.DispatchSessionPostStop(context.Background(), hookspkg.SessionPostStopPayload{ PayloadBase: hookspkg.PayloadBase{ Event: hookspkg.HookSessionPostStop, Timestamp: time.Date(2026, 4, 9, 12, 0, 0, 0, time.UTC), @@ -1413,7 +1413,7 @@ func TestSessionStopNotifierQueuesDreamCheck(t *testing.T) { t.Fatalf("Create() workspace_path = %q, want empty", got) } - if _, err := dispatcher.DispatchSessionPostStop(context.Background(), hookspkg.SessionPostStopPayload{ + if _, err := dispatcher.Session.DispatchSessionPostStop(context.Background(), hookspkg.SessionPostStopPayload{ PayloadBase: hookspkg.PayloadBase{ Event: hookspkg.HookSessionPostStop, Timestamp: time.Date(2026, 4, 9, 12, 0, 0, 0, time.UTC), diff --git a/internal/daemon/hooks_bridge.go b/internal/daemon/hooks_bridge.go index 3e06eb8de..3af16c9d0 100644 --- a/internal/daemon/hooks_bridge.go +++ b/internal/daemon/hooks_bridge.go @@ -64,7 +64,12 @@ type hooksNotifier struct { } var _ session.Notifier = (*hooksNotifier)(nil) -var _ session.HookDispatcher = (*hooksNotifier)(nil) +var _ session.SessionLifecycleHooks = (*hooksNotifier)(nil) +var _ session.PromptHooks = (*hooksNotifier)(nil) +var _ session.EventHooks = (*hooksNotifier)(nil) +var _ session.AgentHooks = (*hooksNotifier)(nil) +var _ session.ConversationHooks = (*hooksNotifier)(nil) +var _ session.CompactionHooks = (*hooksNotifier)(nil) func newHooksNotifier(logger *slog.Logger, now func() time.Time) *hooksNotifier { if logger == nil { diff --git a/internal/hooks/dispatch.go b/internal/hooks/dispatch.go index bd1b516ba..e2e5f7866 100644 --- a/internal/hooks/dispatch.go +++ b/internal/hooks/dispatch.go @@ -7,8 +7,6 @@ import ( "time" ) -type matcherFunc[P any] func(HookMatcher, P) bool - type dispatchConfig[P any, R any] struct { match matcherFunc[P] apply func(P, R) P @@ -529,147 +527,10 @@ func executeDispatch[P any, R any]( return result, dispatchErr } -func selectMatchingHooks[P any]( - snapshot []*ResolvedHook, - payload P, - match matcherFunc[P], -) ([]*ResolvedHook, []*ResolvedHook) { - syncHooks := make([]*ResolvedHook, 0, len(snapshot)) - asyncHooks := make([]*ResolvedHook, 0, len(snapshot)) - - for _, hook := range snapshot { - if hook == nil { - continue - } - if match != nil && !match(hook.Matcher, payload) { - continue - } - switch hook.Mode { - case HookModeAsync: - asyncHooks = append(asyncHooks, hook) - case HookModeSync: - syncHooks = append(syncHooks, hook) - } - } - - return syncHooks, asyncHooks -} - -func submitAsyncHooks[P any, R any](h *Hooks, parent context.Context, payload P, hooks []*ResolvedHook, pipe pipeline[P, R]) { - if h == nil || h.pool == nil { - return - } - - parentDepth := currentDispatchDepth(parent) - for _, hook := range hooks { - if hook == nil { - continue - } - - asyncHook := *hook - asyncPayload := payload - h.pool.Submit(asyncTask{ - hook: asyncHook.RegisteredHook, - run: func(poolCtx context.Context) { - baseCtx := context.WithValue(poolCtx, dispatchDepthContextKey{}, parentDepth) - baseCtx = context.WithValue(baseCtx, dispatchChainContextKey{}, currentDispatchChain(parent)) - hookCtx, depth, err := h.enterDispatch(baseCtx, asyncHook.Event) - if err != nil { - h.emitHookRun(poolCtx, asyncPayload, asyncHook.RegisteredHook, HookRunOutcomeSkipped, 0, nil, err, parentDepth) - return - } - - cancel := func() {} - if asyncHook.Timeout > 0 { - hookCtx, cancel = context.WithTimeout(hookCtx, asyncHook.Timeout) - } - defer cancel() - - started := time.Now() - _, rawPatch, err := pipe.runHook(hookCtx, asyncHook.RegisteredHook, asyncPayload) - duration := time.Since(started) - if err != nil { - h.emitHookRun(hookCtx, asyncPayload, asyncHook.RegisteredHook, HookRunOutcomeFailed, duration, rawPatch, err, depth) - h.logger.WarnContext( - hookCtx, - "hook.dispatch.async_failed", - "hook", asyncHook.Name, - "event", asyncHook.Event.String(), - "source", asyncHook.Source.String(), - "error", err, - ) - return - } - h.emitHookRun(hookCtx, asyncPayload, asyncHook.RegisteredHook, HookRunOutcomeApplied, duration, rawPatch, nil, depth) - }, - }) - } -} - func applyNoop[P any, R any](payload P, _ R) P { return payload } -func matchSessionPreCreate(matcher HookMatcher, payload SessionPreCreatePayload) bool { - return matcher.MatchesSession(payload.SessionContext) -} - -func matchSessionLifecycle(matcher HookMatcher, payload SessionLifecyclePayload) bool { - return matcher.MatchesSession(payload.SessionContext) -} - -func matchInputPreSubmit(matcher HookMatcher, payload InputPreSubmitPayload) bool { - return matcher.MatchesInput(payload) -} - -func matchPrompt(matcher HookMatcher, payload PromptPayload) bool { - return matcher.MatchesPrompt(payload) -} - -func matchEventRecord(matcher HookMatcher, payload EventRecordPayload) bool { - return matcher.MatchesEvent(payload) -} - -func matchAgentPreStart(matcher HookMatcher, payload AgentPreStartPayload) bool { - return matcher.MatchesAgentPreStart(payload) -} - -func matchAgentLifecycle(matcher HookMatcher, payload AgentLifecyclePayload) bool { - return matcher.MatchesAgentLifecycle(payload) -} - -func matchTurn(matcher HookMatcher, payload TurnPayload) bool { - return matcher.MatchesTurn(payload) -} - -func matchMessage(matcher HookMatcher, payload MessagePayload) bool { - return matcher.MatchesMessage(payload) -} - -func matchToolPreCall(matcher HookMatcher, payload ToolPreCallPayload) bool { - return matcher.MatchesToolPreCall(payload) -} - -func matchToolPostCall(matcher HookMatcher, payload ToolPostCallPayload) bool { - return matcher.MatchesToolPostCall(payload) -} - -func matchToolPostError(matcher HookMatcher, payload ToolPostErrorPayload) bool { - return matcher.MatchesToolPostError(payload) -} - -func matchPermissionRequest(matcher HookMatcher, payload PermissionRequestPayload) bool { - return matcher.MatchesPermissionRequest(payload) -} - -func matchPermissionResolution(matcher HookMatcher, payload PermissionResolutionPayload) bool { - return matcher.MatchesPermissionResolution(payload) -} - -func matchContextCompact(matcher HookMatcher, payload ContextCompactPayload) bool { - return matcher.MatchesContextCompact(payload) -} - func applySessionContextPatch(payload SessionContext, patch SessionCreatePatch) SessionContext { if patch.SessionName != nil { payload.SessionName = *patch.SessionName diff --git a/internal/hooks/dispatch_async.go b/internal/hooks/dispatch_async.go new file mode 100644 index 000000000..9e6010273 --- /dev/null +++ b/internal/hooks/dispatch_async.go @@ -0,0 +1,69 @@ +package hooks + +import ( + "context" + "errors" + "time" +) + +var errAsyncHookDropped = errors.New("hooks: async hook submission dropped") + +func submitAsyncHooks[P any, R any](h *Hooks, parent context.Context, payload P, hooks []*ResolvedHook, pipe pipeline[P, R]) { + if h == nil || h.pool == nil { + return + } + + parentDepth := currentDispatchDepth(parent) + for _, hook := range hooks { + if hook == nil { + continue + } + + asyncHook := *hook + asyncPayload := payload + if !h.pool.Submit(asyncTask{ + hook: asyncHook.RegisteredHook, + run: func(poolCtx context.Context) { + baseCtx, cancelBase := context.WithCancel(parent) + stopPoolCancel := context.AfterFunc(poolCtx, cancelBase) + defer func() { + stopPoolCancel() + cancelBase() + }() + + baseCtx = context.WithValue(baseCtx, dispatchDepthContextKey{}, parentDepth) + baseCtx = context.WithValue(baseCtx, dispatchChainContextKey{}, currentDispatchChain(parent)) + hookCtx, depth, err := h.enterDispatch(baseCtx, asyncHook.Event) + if err != nil { + h.emitHookRun(baseCtx, asyncPayload, asyncHook.RegisteredHook, HookRunOutcomeSkipped, 0, nil, err, parentDepth) + return + } + + cancel := func() {} + if asyncHook.Timeout > 0 { + hookCtx, cancel = context.WithTimeout(hookCtx, asyncHook.Timeout) + } + defer cancel() + + started := time.Now() + _, rawPatch, err := pipe.runHook(hookCtx, asyncHook.RegisteredHook, asyncPayload) + duration := time.Since(started) + if err != nil { + h.emitHookRun(hookCtx, asyncPayload, asyncHook.RegisteredHook, HookRunOutcomeFailed, duration, rawPatch, err, depth) + h.logger.WarnContext( + hookCtx, + "hook.dispatch.async_failed", + "hook", asyncHook.Name, + "event", asyncHook.Event.String(), + "source", asyncHook.Source.String(), + "error", err, + ) + return + } + h.emitHookRun(hookCtx, asyncPayload, asyncHook.RegisteredHook, HookRunOutcomeApplied, duration, rawPatch, nil, depth) + }, + }) { + h.emitHookRun(parent, asyncPayload, asyncHook.RegisteredHook, HookRunOutcomeDropped, 0, nil, errAsyncHookDropped, parentDepth) + } + } +} diff --git a/internal/hooks/hooks_test.go b/internal/hooks/hooks_test.go index ee441b7aa..f1347d38f 100644 --- a/internal/hooks/hooks_test.go +++ b/internal/hooks/hooks_test.go @@ -737,6 +737,52 @@ func TestDispatchEventPreRecordRunsAsyncHook(t *testing.T) { } } +func TestDispatchEventPreRecordAsyncHookUsesParentCancellation(t *testing.T) { + t.Parallel() + + canceled := make(chan error, 1) + hooks := newTestHooks( + t, + WithNativeDeclarations([]HookDecl{ + { + Name: "event-observer", + Event: HookEventPreRecord, + Mode: HookModeAsync, + ExecutorKind: HookExecutorNative, + }, + }), + WithExecutorResolver(testExecutorResolver(map[string]Executor{ + "event-observer": NewTypedNativeExecutor(func(ctx context.Context, _ RegisteredHook, _ EventPreRecordPayload) (EventPreRecordPatch, error) { + <-ctx.Done() + canceled <- ctx.Err() + return EventPreRecordPatch{}, ctx.Err() + }), + })), + ) + + if err := hooks.Rebuild(t.Context()); err != nil { + t.Fatalf("Rebuild() error = %v, want nil", err) + } + + ctx, cancel := context.WithCancel(t.Context()) + if _, err := hooks.DispatchEventPreRecord(ctx, EventPreRecordPayload{ + PayloadBase: PayloadBase{Event: HookEventPreRecord}, + RecordType: "agent_message", + }); err != nil { + t.Fatalf("DispatchEventPreRecord() error = %v, want nil", err) + } + cancel() + + select { + case err := <-canceled: + if !errors.Is(err, context.Canceled) { + t.Fatalf("async hook ctx err = %v, want context.Canceled", err) + } + case <-time.After(time.Second): + t.Fatal("async hook did not observe parent cancellation") + } +} + func TestDispatchInputPreSubmitSkipsAsyncHooksWhenSyncPhaseDoesNotSucceed(t *testing.T) { t.Parallel() diff --git a/internal/hooks/matcher.go b/internal/hooks/matcher.go index 873dc27d6..840c3dbd2 100644 --- a/internal/hooks/matcher.go +++ b/internal/hooks/matcher.go @@ -7,6 +7,8 @@ import ( "strings" ) +type matcherFunc[P any] func(HookMatcher, P) bool + var allowedMatcherFieldsByFamily = map[HookEventFamily]map[string]struct{}{ HookEventFamilySession: { "agent_name": {}, @@ -166,6 +168,92 @@ func (m HookMatcher) MatchesContextCompact(payload ContextCompactPayload) bool { matchStringField(m.CompactionStrategy, payload.Strategy) } +func selectMatchingHooks[P any]( + snapshot []*ResolvedHook, + payload P, + match matcherFunc[P], +) ([]*ResolvedHook, []*ResolvedHook) { + syncHooks := make([]*ResolvedHook, 0, len(snapshot)) + asyncHooks := make([]*ResolvedHook, 0, len(snapshot)) + + for _, hook := range snapshot { + if hook == nil { + continue + } + if match != nil && !match(hook.Matcher, payload) { + continue + } + switch hook.Mode { + case HookModeAsync: + asyncHooks = append(asyncHooks, hook) + case HookModeSync: + syncHooks = append(syncHooks, hook) + } + } + + return syncHooks, asyncHooks +} + +func matchSessionPreCreate(matcher HookMatcher, payload SessionPreCreatePayload) bool { + return matcher.MatchesSession(payload.SessionContext) +} + +func matchSessionLifecycle(matcher HookMatcher, payload SessionLifecyclePayload) bool { + return matcher.MatchesSession(payload.SessionContext) +} + +func matchInputPreSubmit(matcher HookMatcher, payload InputPreSubmitPayload) bool { + return matcher.MatchesInput(payload) +} + +func matchPrompt(matcher HookMatcher, payload PromptPayload) bool { + return matcher.MatchesPrompt(payload) +} + +func matchEventRecord(matcher HookMatcher, payload EventRecordPayload) bool { + return matcher.MatchesEvent(payload) +} + +func matchAgentPreStart(matcher HookMatcher, payload AgentPreStartPayload) bool { + return matcher.MatchesAgentPreStart(payload) +} + +func matchAgentLifecycle(matcher HookMatcher, payload AgentLifecyclePayload) bool { + return matcher.MatchesAgentLifecycle(payload) +} + +func matchTurn(matcher HookMatcher, payload TurnPayload) bool { + return matcher.MatchesTurn(payload) +} + +func matchMessage(matcher HookMatcher, payload MessagePayload) bool { + return matcher.MatchesMessage(payload) +} + +func matchToolPreCall(matcher HookMatcher, payload ToolPreCallPayload) bool { + return matcher.MatchesToolPreCall(payload) +} + +func matchToolPostCall(matcher HookMatcher, payload ToolPostCallPayload) bool { + return matcher.MatchesToolPostCall(payload) +} + +func matchToolPostError(matcher HookMatcher, payload ToolPostErrorPayload) bool { + return matcher.MatchesToolPostError(payload) +} + +func matchPermissionRequest(matcher HookMatcher, payload PermissionRequestPayload) bool { + return matcher.MatchesPermissionRequest(payload) +} + +func matchPermissionResolution(matcher HookMatcher, payload PermissionResolutionPayload) bool { + return matcher.MatchesPermissionResolution(payload) +} + +func matchContextCompact(matcher HookMatcher, payload ContextCompactPayload) bool { + return matcher.MatchesContextCompact(payload) +} + func (m HookMatcher) matchSessionContext(payload SessionContext, includeSessionType bool) bool { if !matchStringField(m.AgentName, payload.AgentName) { return false diff --git a/internal/hooks/telemetry_test.go b/internal/hooks/telemetry_test.go index 1add85e26..049457c1a 100644 --- a/internal/hooks/telemetry_test.go +++ b/internal/hooks/telemetry_test.go @@ -4,7 +4,9 @@ import ( "context" "encoding/json" "errors" + "sync" "testing" + "time" ) func TestHookTelemetrySecurityPatchPersistsAllFields(t *testing.T) { @@ -169,23 +171,101 @@ func TestHookTelemetryRecordsFailureOutcomeAndDuration(t *testing.T) { } } +func TestHookTelemetryRecordsDroppedAsyncSubmission(t *testing.T) { + t.Parallel() + + writer := &captureHookRunWriter{} + blocked := make(chan struct{}) + hooks := NewHooks( + WithLogger(discardPoolLogger()), + WithAsyncWorkerCount(1), + WithAsyncQueueCapacity(1), + WithNativeDeclarations([]HookDecl{{ + Name: "async-event", + Event: HookEventPreRecord, + Mode: HookModeAsync, + ExecutorKind: HookExecutorNative, + }}), + WithExecutorResolver(func(decl HookDecl) (Executor, error) { + if decl.Name != "async-event" { + return nil, errors.New("missing executor") + } + return NewTypedNativeExecutor(func(context.Context, RegisteredHook, EventPreRecordPayload) (EventPreRecordPatch, error) { + <-blocked + return EventPreRecordPatch{}, nil + }), nil + }), + ) + t.Cleanup(hooks.Close) + if err := hooks.Rebuild(t.Context()); err != nil { + t.Fatalf("Rebuild() error = %v", err) + } + + ctx := WithHookRunWriter(t.Context(), writer) + payload := EventPreRecordPayload{PayloadBase: PayloadBase{Event: HookEventPreRecord}, RecordType: "agent_message"} + for i := 0; i < 3; i++ { + if _, err := hooks.DispatchEventPreRecord(ctx, payload); err != nil { + t.Fatalf("DispatchEventPreRecord() #%d error = %v", i+1, err) + } + } + + deadline := time.After(time.Second) + for { + records := writer.recordsSnapshot() + if len(records) > 0 { + record := records[0] + if record.Outcome != HookRunOutcomeDropped { + t.Fatalf("record.Outcome = %q, want %q", record.Outcome, HookRunOutcomeDropped) + } + if record.Error != errAsyncHookDropped.Error() { + t.Fatalf("record.Error = %q, want %q", record.Error, errAsyncHookDropped.Error()) + } + close(blocked) + return + } + + select { + case <-deadline: + close(blocked) + t.Fatal("expected dropped async hook telemetry record") + case <-time.After(10 * time.Millisecond): + } + } +} + type captureHookRunWriter struct { + mu sync.Mutex records []HookRunRecord } func (c *captureHookRunWriter) RecordHookRun(_ context.Context, record HookRunRecord) error { + c.mu.Lock() + defer c.mu.Unlock() c.records = append(c.records, cloneTelemetryRecord(record)) return nil } func (c *captureHookRunWriter) singleRecord(t *testing.T) HookRunRecord { t.Helper() + c.mu.Lock() + defer c.mu.Unlock() if got, want := len(c.records), 1; got != want { t.Fatalf("len(records) = %d, want %d", got, want) } return c.records[0] } +func (c *captureHookRunWriter) recordsSnapshot() []HookRunRecord { + c.mu.Lock() + defer c.mu.Unlock() + + records := make([]HookRunRecord, len(c.records)) + for i, record := range c.records { + records[i] = cloneTelemetryRecord(record) + } + return records +} + func newTelemetryTestHooks(t *testing.T, debug bool, decl HookDecl, executors map[string]Executor) *Hooks { t.Helper() diff --git a/internal/memory/consolidation/runtime.go b/internal/memory/consolidation/runtime.go index a032b21b4..9b5322425 100644 --- a/internal/memory/consolidation/runtime.go +++ b/internal/memory/consolidation/runtime.go @@ -57,22 +57,6 @@ type checkRequest struct { const defaultSessionStopTimeout = 10 * time.Second -type sessionSpawnerConfig struct { - stopTimeout time.Duration -} - -// SessionSpawnerOption customizes dream session spawning. -type SessionSpawnerOption func(*sessionSpawnerConfig) - -// WithSessionStopTimeout overrides the timeout used when stopping dream sessions after prompting. -func WithSessionStopTimeout(timeout time.Duration) SessionSpawnerOption { - return func(cfg *sessionSpawnerConfig) { - if timeout > 0 { - cfg.stopTimeout = timeout - } - } -} - // NewRuntime constructs a dream runtime that can be started by the daemon. func NewRuntime( enabled bool, @@ -253,19 +237,11 @@ func NewSessionSpawner( resolver workspacepkg.WorkspaceResolver, cfg aghconfig.Config, globalMemoryDir string, - opts ...SessionSpawnerOption, ) memory.SessionSpawner { if !cfg.Memory.Enabled || !cfg.Memory.Dream.Enabled || sessions == nil || resolver == nil { return nil } - spawnerCfg := sessionSpawnerConfig{stopTimeout: defaultSessionStopTimeout} - for _, opt := range opts { - if opt != nil { - opt(&spawnerCfg) - } - } - return func(ctx context.Context, goal, prompt, workspace string) error { workspaces, err := resolveWorkspaces(ctx, sessions, resolver, globalMemoryDir, workspace) if err != nil { @@ -273,7 +249,7 @@ func NewSessionSpawner( } for _, workspaceID := range workspaces { - if err := spawnSession(ctx, sessions, cfg.Memory.Dream.Agent, goal, prompt, workspaceID, spawnerCfg.stopTimeout); err != nil { + if err := spawnSession(ctx, sessions, cfg.Memory.Dream.Agent, goal, prompt, workspaceID, defaultSessionStopTimeout); err != nil { return err } } diff --git a/internal/memory/dream.go b/internal/memory/dream.go index f537c0def..885749a2c 100644 --- a/internal/memory/dream.go +++ b/internal/memory/dream.go @@ -160,8 +160,7 @@ func WithLogger(logger *slog.Logger) Option { } } -// WithGoal overrides the goal passed to the session spawner. -func WithGoal(goal string) Option { +func withGoal(goal string) Option { return func(service *Service) { if trimmed := strings.TrimSpace(goal); trimmed != "" { service.goal = trimmed diff --git a/internal/memory/dream_test.go b/internal/memory/dream_test.go index 85efb5c11..66ee77f0a 100644 --- a/internal/memory/dream_test.go +++ b/internal/memory/dream_test.go @@ -56,7 +56,7 @@ func TestServiceConstructionOverridesDefaults(t *testing.T) { WithMinHours(12), WithMinSessions(5), WithLogger(logger), - WithGoal("custom-goal"), + withGoal("custom-goal"), ) if service.memStore != store { @@ -254,7 +254,7 @@ func TestServiceRunCallsSessionSpawnerWithGoalPromptAndWorkspaceID(t *testing.T) workspaceID := "ws-dream" service := NewService( withLock(lock), - WithGoal("custom-goal"), + withGoal("custom-goal"), WithMemoryStore(NewStore(globalMemoryDir)), WithWorkspaceResolver(&fakeDreamWorkspaceResolver{ resolved: workspacepkg.ResolvedWorkspace{ diff --git a/internal/memory/staleness.go b/internal/memory/staleness.go index 53a126df5..32569eb47 100644 --- a/internal/memory/staleness.go +++ b/internal/memory/staleness.go @@ -5,8 +5,7 @@ import ( "time" ) -// AgeDays reports the number of elapsed calendar-day boundaries since modTime using the supplied clock value. -func AgeDays(modTime time.Time, now time.Time) int { +func ageDays(modTime time.Time, now time.Time) int { days := calendarDayNumber(now.In(modTime.Location())) - calendarDayNumber(modTime.In(modTime.Location())) if days < 0 { return 0 @@ -15,9 +14,8 @@ func AgeDays(modTime time.Time, now time.Time) int { return days } -// AgeText returns a human-readable label for the memory age using the supplied clock value. -func AgeText(modTime time.Time, now time.Time) string { - switch age := AgeDays(modTime, now); age { +func ageText(modTime time.Time, now time.Time) string { + switch age := ageDays(modTime, now); age { case 0: return "today" case 1: @@ -27,9 +25,8 @@ func AgeText(modTime time.Time, now time.Time) string { } } -// FreshnessWarning returns a staleness caveat for memories older than one day using the supplied clock value. -func FreshnessWarning(modTime time.Time, now time.Time) string { - age := AgeDays(modTime, now) +func freshnessWarning(modTime time.Time, now time.Time) string { + age := ageDays(modTime, now) if age <= 1 { return "" } diff --git a/internal/memory/store_test.go b/internal/memory/store_test.go index a242ddc04..abb5d0399 100644 --- a/internal/memory/store_test.go +++ b/internal/memory/store_test.go @@ -657,29 +657,89 @@ func TestStalenessHelpers(t *testing.T) { yesterday := today.Add(-24 * time.Hour) threeDaysAgo := today.Add(-72 * time.Hour) - if got := AgeDays(today, now); got != 0 { - t.Fatalf("AgeDays(today) = %d, want 0", got) - } - if got := AgeDays(yesterday, now); got != 1 { - t.Fatalf("AgeDays(yesterday) = %d, want 1", got) - } - if got := AgeText(today, now); got != "today" { - t.Fatalf("AgeText(today) = %q, want %q", got, "today") - } - if got := AgeText(yesterday, now); got != "yesterday" { - t.Fatalf("AgeText(yesterday) = %q, want %q", got, "yesterday") - } - if got := AgeText(threeDaysAgo, now); got != "3 days ago" { - t.Fatalf("AgeText(threeDaysAgo) = %q, want %q", got, "3 days ago") - } - if got := FreshnessWarning(today, now); got != "" { - t.Fatalf("FreshnessWarning(today) = %q, want empty", got) - } - if got := FreshnessWarning(yesterday, now); got != "" { - t.Fatalf("FreshnessWarning(yesterday) = %q, want empty", got) + testCases := []struct { + name string + run func(*testing.T) + }{ + { + name: "Should return zero days for today", + run: func(t *testing.T) { + t.Parallel() + if got := ageDays(today, now); got != 0 { + t.Fatalf("ageDays(today) = %d, want 0", got) + } + }, + }, + { + name: "Should return one day for yesterday", + run: func(t *testing.T) { + t.Parallel() + if got := ageDays(yesterday, now); got != 1 { + t.Fatalf("ageDays(yesterday) = %d, want 1", got) + } + }, + }, + { + name: "Should render today age text", + run: func(t *testing.T) { + t.Parallel() + if got := ageText(today, now); got != "today" { + t.Fatalf("ageText(today) = %q, want %q", got, "today") + } + }, + }, + { + name: "Should render yesterday age text", + run: func(t *testing.T) { + t.Parallel() + if got := ageText(yesterday, now); got != "yesterday" { + t.Fatalf("ageText(yesterday) = %q, want %q", got, "yesterday") + } + }, + }, + { + name: "Should render multi-day age text", + run: func(t *testing.T) { + t.Parallel() + if got := ageText(threeDaysAgo, now); got != "3 days ago" { + t.Fatalf("ageText(threeDaysAgo) = %q, want %q", got, "3 days ago") + } + }, + }, + { + name: "Should omit freshness warning for today", + run: func(t *testing.T) { + t.Parallel() + if got := freshnessWarning(today, now); got != "" { + t.Fatalf("freshnessWarning(today) = %q, want empty", got) + } + }, + }, + { + name: "Should omit freshness warning for yesterday", + run: func(t *testing.T) { + t.Parallel() + if got := freshnessWarning(yesterday, now); got != "" { + t.Fatalf("freshnessWarning(yesterday) = %q, want empty", got) + } + }, + }, + { + name: "Should warn for stale memories", + run: func(t *testing.T) { + t.Parallel() + if got := freshnessWarning(threeDaysAgo, now); !strings.Contains(got, "3 days old") { + t.Fatalf("freshnessWarning(threeDaysAgo) = %q, want age caveat", got) + } + }, + }, } - if got := FreshnessWarning(threeDaysAgo, now); !strings.Contains(got, "3 days old") { - t.Fatalf("FreshnessWarning(threeDaysAgo) = %q, want age caveat", got) + + for _, tt := range testCases { + tt := tt + t.Run(tt.name, func(t *testing.T) { + tt.run(t) + }) } } diff --git a/internal/session/hooks.go b/internal/session/hooks.go new file mode 100644 index 000000000..1b793b8cf --- /dev/null +++ b/internal/session/hooks.go @@ -0,0 +1,208 @@ +package session + +import ( + "context" + + hookspkg "github.com/pedronauck/agh/internal/hooks" +) + +// SessionLifecycleHooks groups create/resume/stop session lifecycle hook dispatch. +type SessionLifecycleHooks interface { + DispatchSessionPreCreate(context.Context, hookspkg.SessionPreCreatePayload) (hookspkg.SessionPreCreatePayload, error) + DispatchSessionPostCreate(context.Context, hookspkg.SessionPostCreatePayload) (hookspkg.SessionPostCreatePayload, error) + DispatchSessionPreResume(context.Context, hookspkg.SessionPreResumePayload) (hookspkg.SessionPreResumePayload, error) + DispatchSessionPostResume(context.Context, hookspkg.SessionPostResumePayload) (hookspkg.SessionPostResumePayload, error) + DispatchSessionPreStop(context.Context, hookspkg.SessionPreStopPayload) (hookspkg.SessionPreStopPayload, error) + DispatchSessionPostStop(context.Context, hookspkg.SessionPostStopPayload) (hookspkg.SessionPostStopPayload, error) +} + +// PromptHooks groups prompt assembly and user-input hook dispatch. +type PromptHooks interface { + DispatchInputPreSubmit(context.Context, hookspkg.InputPreSubmitPayload) (hookspkg.InputPreSubmitPayload, error) + DispatchPromptPostAssemble(context.Context, hookspkg.PromptPayload) (hookspkg.PromptPayload, error) +} + +// EventHooks groups event-record persistence hook dispatch. +type EventHooks interface { + DispatchEventPreRecord(context.Context, hookspkg.EventPreRecordPayload) (hookspkg.EventPreRecordPayload, error) + DispatchEventPostRecord(context.Context, hookspkg.EventPostRecordPayload) (hookspkg.EventPostRecordPayload, error) +} + +// AgentHooks groups agent start and stop lifecycle hook dispatch. +type AgentHooks interface { + DispatchAgentPreStart(context.Context, hookspkg.AgentPreStartPayload) (hookspkg.AgentPreStartPayload, error) + DispatchAgentSpawned(context.Context, hookspkg.AgentSpawnedPayload) (hookspkg.AgentSpawnedPayload, error) + DispatchAgentCrashed(context.Context, hookspkg.AgentCrashedPayload) (hookspkg.AgentCrashedPayload, error) + DispatchAgentStopped(context.Context, hookspkg.AgentStoppedPayload) (hookspkg.AgentStoppedPayload, error) +} + +// ConversationHooks groups turn/message hook dispatch used during prompt streaming. +type ConversationHooks interface { + DispatchTurnStart(context.Context, hookspkg.TurnStartPayload) (hookspkg.TurnStartPayload, error) + DispatchTurnEnd(context.Context, hookspkg.TurnEndPayload) (hookspkg.TurnEndPayload, error) + DispatchMessageStart(context.Context, hookspkg.MessageStartPayload) (hookspkg.MessageStartPayload, error) + DispatchMessageDelta(context.Context, hookspkg.MessageDeltaPayload) (hookspkg.MessageDeltaPayload, error) + DispatchMessageEnd(context.Context, hookspkg.MessageEndPayload) (hookspkg.MessageEndPayload, error) +} + +// CompactionHooks groups context compaction hook dispatch. +type CompactionHooks interface { + DispatchContextPreCompact(context.Context, hookspkg.ContextPreCompactPayload) (hookspkg.ContextPreCompactPayload, error) + DispatchContextPostCompact(context.Context, hookspkg.ContextPostCompactPayload) (hookspkg.ContextPostCompactPayload, error) +} + +// HookSet collects the grouped session hook domains. Nil groups are treated as +// no-op implementations so callers only provide the domains they exercise. +type HookSet struct { + Session SessionLifecycleHooks + Prompt PromptHooks + Events EventHooks + Agent AgentHooks + Conversation ConversationHooks + Compaction CompactionHooks +} + +var _ SessionLifecycleHooks = noopSessionLifecycleHooks{} +var _ PromptHooks = noopPromptHooks{} +var _ EventHooks = noopEventHooks{} +var _ AgentHooks = noopAgentHooks{} +var _ ConversationHooks = noopConversationHooks{} +var _ CompactionHooks = noopCompactionHooks{} + +func (h HookSet) session() SessionLifecycleHooks { + if h.Session != nil { + return h.Session + } + return noopSessionLifecycleHooks{} +} + +func (h HookSet) prompt() PromptHooks { + if h.Prompt != nil { + return h.Prompt + } + return noopPromptHooks{} +} + +func (h HookSet) events() EventHooks { + if h.Events != nil { + return h.Events + } + return noopEventHooks{} +} + +func (h HookSet) agent() AgentHooks { + if h.Agent != nil { + return h.Agent + } + return noopAgentHooks{} +} + +func (h HookSet) conversation() ConversationHooks { + if h.Conversation != nil { + return h.Conversation + } + return noopConversationHooks{} +} + +func (h HookSet) compaction() CompactionHooks { + if h.Compaction != nil { + return h.Compaction + } + return noopCompactionHooks{} +} + +type noopSessionLifecycleHooks struct{} + +func (noopSessionLifecycleHooks) DispatchSessionPreCreate(_ context.Context, payload hookspkg.SessionPreCreatePayload) (hookspkg.SessionPreCreatePayload, error) { + return payload, nil +} + +func (noopSessionLifecycleHooks) DispatchSessionPostCreate(_ context.Context, payload hookspkg.SessionPostCreatePayload) (hookspkg.SessionPostCreatePayload, error) { + return payload, nil +} + +func (noopSessionLifecycleHooks) DispatchSessionPreResume(_ context.Context, payload hookspkg.SessionPreResumePayload) (hookspkg.SessionPreResumePayload, error) { + return payload, nil +} + +func (noopSessionLifecycleHooks) DispatchSessionPostResume(_ context.Context, payload hookspkg.SessionPostResumePayload) (hookspkg.SessionPostResumePayload, error) { + return payload, nil +} + +func (noopSessionLifecycleHooks) DispatchSessionPreStop(_ context.Context, payload hookspkg.SessionPreStopPayload) (hookspkg.SessionPreStopPayload, error) { + return payload, nil +} + +func (noopSessionLifecycleHooks) DispatchSessionPostStop(_ context.Context, payload hookspkg.SessionPostStopPayload) (hookspkg.SessionPostStopPayload, error) { + return payload, nil +} + +type noopPromptHooks struct{} + +func (noopPromptHooks) DispatchInputPreSubmit(_ context.Context, payload hookspkg.InputPreSubmitPayload) (hookspkg.InputPreSubmitPayload, error) { + return payload, nil +} + +func (noopPromptHooks) DispatchPromptPostAssemble(_ context.Context, payload hookspkg.PromptPayload) (hookspkg.PromptPayload, error) { + return payload, nil +} + +type noopEventHooks struct{} + +func (noopEventHooks) DispatchEventPreRecord(_ context.Context, payload hookspkg.EventPreRecordPayload) (hookspkg.EventPreRecordPayload, error) { + return payload, nil +} + +func (noopEventHooks) DispatchEventPostRecord(_ context.Context, payload hookspkg.EventPostRecordPayload) (hookspkg.EventPostRecordPayload, error) { + return payload, nil +} + +type noopAgentHooks struct{} + +func (noopAgentHooks) DispatchAgentPreStart(_ context.Context, payload hookspkg.AgentPreStartPayload) (hookspkg.AgentPreStartPayload, error) { + return payload, nil +} + +func (noopAgentHooks) DispatchAgentSpawned(_ context.Context, payload hookspkg.AgentSpawnedPayload) (hookspkg.AgentSpawnedPayload, error) { + return payload, nil +} + +func (noopAgentHooks) DispatchAgentCrashed(_ context.Context, payload hookspkg.AgentCrashedPayload) (hookspkg.AgentCrashedPayload, error) { + return payload, nil +} + +func (noopAgentHooks) DispatchAgentStopped(_ context.Context, payload hookspkg.AgentStoppedPayload) (hookspkg.AgentStoppedPayload, error) { + return payload, nil +} + +type noopConversationHooks struct{} + +func (noopConversationHooks) DispatchTurnStart(_ context.Context, payload hookspkg.TurnStartPayload) (hookspkg.TurnStartPayload, error) { + return payload, nil +} + +func (noopConversationHooks) DispatchTurnEnd(_ context.Context, payload hookspkg.TurnEndPayload) (hookspkg.TurnEndPayload, error) { + return payload, nil +} + +func (noopConversationHooks) DispatchMessageStart(_ context.Context, payload hookspkg.MessageStartPayload) (hookspkg.MessageStartPayload, error) { + return payload, nil +} + +func (noopConversationHooks) DispatchMessageDelta(_ context.Context, payload hookspkg.MessageDeltaPayload) (hookspkg.MessageDeltaPayload, error) { + return payload, nil +} + +func (noopConversationHooks) DispatchMessageEnd(_ context.Context, payload hookspkg.MessageEndPayload) (hookspkg.MessageEndPayload, error) { + return payload, nil +} + +type noopCompactionHooks struct{} + +func (noopCompactionHooks) DispatchContextPreCompact(_ context.Context, payload hookspkg.ContextPreCompactPayload) (hookspkg.ContextPreCompactPayload, error) { + return payload, nil +} + +func (noopCompactionHooks) DispatchContextPostCompact(_ context.Context, payload hookspkg.ContextPostCompactPayload) (hookspkg.ContextPostCompactPayload, error) { + return payload, nil +} diff --git a/internal/session/interfaces.go b/internal/session/interfaces.go index a4aaba405..580362d85 100644 --- a/internal/session/interfaces.go +++ b/internal/session/interfaces.go @@ -8,7 +8,6 @@ import ( "github.com/pedronauck/agh/internal/acp" aghconfig "github.com/pedronauck/agh/internal/config" - hookspkg "github.com/pedronauck/agh/internal/hooks" skillspkg "github.com/pedronauck/agh/internal/skills" "github.com/pedronauck/agh/internal/store" workspacepkg "github.com/pedronauck/agh/internal/workspace" @@ -155,32 +154,6 @@ type Notifier interface { OnAgentEvent(ctx context.Context, sessionID string, event any) } -// HookDispatcher exposes the typed hook dispatch surface consumed directly by -// the session manager. -type HookDispatcher interface { - DispatchSessionPreCreate(context.Context, hookspkg.SessionPreCreatePayload) (hookspkg.SessionPreCreatePayload, error) - DispatchSessionPostCreate(context.Context, hookspkg.SessionPostCreatePayload) (hookspkg.SessionPostCreatePayload, error) - DispatchSessionPreResume(context.Context, hookspkg.SessionPreResumePayload) (hookspkg.SessionPreResumePayload, error) - DispatchSessionPostResume(context.Context, hookspkg.SessionPostResumePayload) (hookspkg.SessionPostResumePayload, error) - DispatchSessionPreStop(context.Context, hookspkg.SessionPreStopPayload) (hookspkg.SessionPreStopPayload, error) - DispatchSessionPostStop(context.Context, hookspkg.SessionPostStopPayload) (hookspkg.SessionPostStopPayload, error) - DispatchInputPreSubmit(context.Context, hookspkg.InputPreSubmitPayload) (hookspkg.InputPreSubmitPayload, error) - DispatchPromptPostAssemble(context.Context, hookspkg.PromptPayload) (hookspkg.PromptPayload, error) - DispatchEventPreRecord(context.Context, hookspkg.EventPreRecordPayload) (hookspkg.EventPreRecordPayload, error) - DispatchEventPostRecord(context.Context, hookspkg.EventPostRecordPayload) (hookspkg.EventPostRecordPayload, error) - DispatchAgentPreStart(context.Context, hookspkg.AgentPreStartPayload) (hookspkg.AgentPreStartPayload, error) - DispatchAgentSpawned(context.Context, hookspkg.AgentSpawnedPayload) (hookspkg.AgentSpawnedPayload, error) - DispatchAgentCrashed(context.Context, hookspkg.AgentCrashedPayload) (hookspkg.AgentCrashedPayload, error) - DispatchAgentStopped(context.Context, hookspkg.AgentStoppedPayload) (hookspkg.AgentStoppedPayload, error) - DispatchTurnStart(context.Context, hookspkg.TurnStartPayload) (hookspkg.TurnStartPayload, error) - DispatchTurnEnd(context.Context, hookspkg.TurnEndPayload) (hookspkg.TurnEndPayload, error) - DispatchMessageStart(context.Context, hookspkg.MessageStartPayload) (hookspkg.MessageStartPayload, error) - DispatchMessageDelta(context.Context, hookspkg.MessageDeltaPayload) (hookspkg.MessageDeltaPayload, error) - DispatchMessageEnd(context.Context, hookspkg.MessageEndPayload) (hookspkg.MessageEndPayload, error) - DispatchContextPreCompact(context.Context, hookspkg.ContextPreCompactPayload) (hookspkg.ContextPreCompactPayload, error) - DispatchContextPostCompact(context.Context, hookspkg.ContextPostCompactPayload) (hookspkg.ContextPostCompactPayload, error) -} - // PromptAssembler assembles the prompt context for a new session start. type PromptAssembler interface { Assemble(ctx context.Context, agent aghconfig.AgentDef, workspace workspacepkg.ResolvedWorkspace) (string, error) diff --git a/internal/session/manager.go b/internal/session/manager.go index 539a39de0..c77cc28f8 100644 --- a/internal/session/manager.go +++ b/internal/session/manager.go @@ -62,7 +62,7 @@ type Manager struct { logger *slog.Logger driver AgentDriver notifier Notifier - hooks HookDispatcher + hooks HookSet skillRegistry SkillRegistry mcpResolver MCPResolver homePaths aghconfig.HomePaths @@ -112,11 +112,11 @@ func WithNotifier(notifier Notifier) Option { } } -// WithHookDispatcher injects the typed hook dispatch surface used by the -// session manager for lifecycle and runtime hook points. -func WithHookDispatcher(dispatcher HookDispatcher) Option { +// WithHookSet injects the grouped hook dispatch surface used by the session +// manager for lifecycle and runtime hook points. +func WithHookSet(hooks HookSet) Option { return func(manager *Manager) { - manager.hooks = dispatcher + manager.hooks = hooks } } diff --git a/internal/session/manager_hooks.go b/internal/session/manager_hooks.go index 1e76cf11a..2c659e090 100644 --- a/internal/session/manager_hooks.go +++ b/internal/session/manager_hooks.go @@ -13,6 +13,7 @@ import ( aghconfig "github.com/pedronauck/agh/internal/config" hookspkg "github.com/pedronauck/agh/internal/hooks" "github.com/pedronauck/agh/internal/store" + "github.com/pedronauck/agh/internal/workref" ) const ( @@ -53,11 +54,11 @@ func newPromptTurnDispatchState(session *Session, turnID string, inputClass stri } func (m *Manager) dispatchSessionPreCreate(ctx context.Context, opts CreateOpts) (CreateOpts, error) { - if m == nil || m.hooks == nil { + if m == nil { return opts, nil } - payload, err := m.hooks.DispatchSessionPreCreate(ctx, hookspkg.SessionPreCreatePayload{ + payload, err := m.hooks.session().DispatchSessionPreCreate(ctx, hookspkg.SessionPreCreatePayload{ PayloadBase: hookspkg.PayloadBase{ Event: hookspkg.HookSessionPreCreate, Timestamp: m.now(), @@ -100,11 +101,11 @@ func (m *Manager) dispatchSessionPreCreate(ctx context.Context, opts CreateOpts) } func (m *Manager) dispatchSessionPreResume(ctx context.Context, meta store.SessionMeta) (store.SessionMeta, error) { - if m == nil || m.hooks == nil { + if m == nil { return meta, nil } - payload, err := m.hooks.DispatchSessionPreResume(ctx, hookspkg.SessionPreResumePayload{ + payload, err := m.hooks.session().DispatchSessionPreResume(ctx, hookspkg.SessionPreResumePayload{ PayloadBase: hookspkg.PayloadBase{ Event: hookspkg.HookSessionPreResume, Timestamp: m.now(), @@ -142,12 +143,12 @@ func (m *Manager) dispatchSessionPostResume(ctx context.Context, session *Sessio } func (m *Manager) dispatchSessionPreStop(ctx context.Context, session *Session) error { - if m == nil || m.hooks == nil || session == nil { + if m == nil || session == nil { return nil } ctx = hookDispatchContext(ctx, session) - payload, err := m.hooks.DispatchSessionPreStop(ctx, hookSessionLifecyclePayload(session, hookspkg.HookSessionPreStop, m.now())) + payload, err := m.hooks.session().DispatchSessionPreStop(ctx, hookSessionLifecyclePayload(session, hookspkg.HookSessionPreStop, m.now())) if err != nil { return fmt.Errorf("session: dispatch session.pre_stop: %w", err) } @@ -161,20 +162,21 @@ func (m *Manager) dispatchSessionPostStop(ctx context.Context, session *Session) } func (m *Manager) dispatchSessionLifecycleObservation(ctx context.Context, session *Session, event hookspkg.HookEvent) { - if m == nil || m.hooks == nil || session == nil { + if m == nil || session == nil { return } ctx = hookDispatchContext(ctx, session) payload := hookSessionLifecyclePayload(session, event, m.now()) var err error + lifecycleHooks := m.hooks.session() switch event { case hookspkg.HookSessionPostCreate: - _, err = m.hooks.DispatchSessionPostCreate(ctx, hookspkg.SessionPostCreatePayload(payload)) + _, err = lifecycleHooks.DispatchSessionPostCreate(ctx, hookspkg.SessionPostCreatePayload(payload)) case hookspkg.HookSessionPostResume: - _, err = m.hooks.DispatchSessionPostResume(ctx, hookspkg.SessionPostResumePayload(payload)) + _, err = lifecycleHooks.DispatchSessionPostResume(ctx, hookspkg.SessionPostResumePayload(payload)) case hookspkg.HookSessionPostStop: - _, err = m.hooks.DispatchSessionPostStop(ctx, hookspkg.SessionPostStopPayload(payload)) + _, err = lifecycleHooks.DispatchSessionPostStop(ctx, hookspkg.SessionPostStopPayload(payload)) default: return } @@ -184,12 +186,12 @@ func (m *Manager) dispatchSessionLifecycleObservation(ctx context.Context, sessi } func (m *Manager) dispatchInputPreSubmit(ctx context.Context, session *Session, turnID string, message string) (string, error) { - if m == nil || m.hooks == nil { + if m == nil { return message, nil } ctx = hookDispatchContext(ctx, session) - payload, err := m.hooks.DispatchInputPreSubmit(ctx, hookspkg.InputPreSubmitPayload{ + payload, err := m.hooks.prompt().DispatchInputPreSubmit(ctx, hookspkg.InputPreSubmitPayload{ PayloadBase: hookspkg.PayloadBase{ Event: hookspkg.HookInputPreSubmit, Timestamp: m.now(), @@ -207,11 +209,11 @@ func (m *Manager) dispatchInputPreSubmit(ctx context.Context, session *Session, } func (m *Manager) dispatchPromptPostAssemble(ctx context.Context, sessionCtx hookspkg.SessionContext, prompt string) (string, error) { - if m == nil || m.hooks == nil { + if m == nil { return prompt, nil } - payload, err := m.hooks.DispatchPromptPostAssemble(ctx, hookspkg.PromptPayload{ + payload, err := m.hooks.prompt().DispatchPromptPostAssemble(ctx, hookspkg.PromptPayload{ PayloadBase: hookspkg.PayloadBase{ Event: hookspkg.HookPromptPostAssemble, Timestamp: m.now(), @@ -228,12 +230,12 @@ func (m *Manager) dispatchPromptPostAssemble(ctx context.Context, sessionCtx hoo } func (m *Manager) dispatchTurnStart(ctx context.Context, state *promptTurnDispatchState) error { - if m == nil || m.hooks == nil || state == nil { + if m == nil || state == nil { return nil } ctx = hookDispatchContext(ctx, state.session) - _, err := m.hooks.DispatchTurnStart(ctx, hookspkg.TurnStartPayload{ + _, err := m.hooks.conversation().DispatchTurnStart(ctx, hookspkg.TurnStartPayload{ PayloadBase: hookspkg.PayloadBase{ Event: hookspkg.HookTurnStart, Timestamp: m.now(), @@ -255,12 +257,12 @@ func (m *Manager) dispatchTurnEnd(ctx context.Context, state *promptTurnDispatch return } state.turnEnded = true - if m == nil || m.hooks == nil { + if m == nil { return } ctx = hookDispatchContext(ctx, state.session) - _, err := m.hooks.DispatchTurnEnd(ctx, hookspkg.TurnEndPayload{ + _, err := m.hooks.conversation().DispatchTurnEnd(ctx, hookspkg.TurnEndPayload{ PayloadBase: hookspkg.PayloadBase{ Event: hookspkg.HookTurnEnd, Timestamp: hookTimestamp(m.now(), eventTime), @@ -311,12 +313,12 @@ func (m *Manager) dispatchMessageStart(ctx context.Context, state *promptTurnDis role: strings.TrimSpace(role), } state.openMessage = message - if m == nil || m.hooks == nil { + if m == nil { return event } ctx = hookDispatchContext(ctx, state.session) - payload, err := m.hooks.DispatchMessageStart(ctx, hookspkg.MessageStartPayload{ + payload, err := m.hooks.conversation().DispatchMessageStart(ctx, hookspkg.MessageStartPayload{ PayloadBase: hookspkg.PayloadBase{ Event: hookspkg.HookMessageStart, Timestamp: hookTimestamp(m.now(), event.Timestamp), @@ -340,12 +342,12 @@ func (m *Manager) dispatchMessageStart(ctx context.Context, state *promptTurnDis } func (m *Manager) dispatchMessageDelta(ctx context.Context, state *promptTurnDispatchState, event acp.AgentEvent, deltaType string) { - if m == nil || m.hooks == nil || state == nil || state.openMessage == nil { + if m == nil || state == nil || state.openMessage == nil { return } ctx = hookDispatchContext(ctx, state.session) - _, err := m.hooks.DispatchMessageDelta(ctx, hookspkg.MessageDeltaPayload{ + _, err := m.hooks.conversation().DispatchMessageDelta(ctx, hookspkg.MessageDeltaPayload{ PayloadBase: hookspkg.PayloadBase{ Event: hookspkg.HookMessageDelta, Timestamp: hookTimestamp(m.now(), event.Timestamp), @@ -370,12 +372,12 @@ func (m *Manager) finishPromptMessage(ctx context.Context, state *promptTurnDisp message := state.openMessage state.openMessage = nil - if m == nil || m.hooks == nil { + if m == nil { return } ctx = hookDispatchContext(ctx, state.session) - _, err := m.hooks.DispatchMessageEnd(ctx, hookspkg.MessageEndPayload{ + _, err := m.hooks.conversation().DispatchMessageEnd(ctx, hookspkg.MessageEndPayload{ PayloadBase: hookspkg.PayloadBase{ Event: hookspkg.HookMessageEnd, Timestamp: hookTimestamp(m.now(), eventTime), @@ -394,12 +396,12 @@ func (m *Manager) finishPromptMessage(ctx context.Context, state *promptTurnDisp } func (m *Manager) dispatchEventPreRecord(ctx context.Context, session *Session, event acp.AgentEvent, content string) { - if m == nil || m.hooks == nil { + if m == nil { return } ctx = hookDispatchContext(ctx, session) - _, err := m.hooks.DispatchEventPreRecord(ctx, hookspkg.EventPreRecordPayload{ + _, err := m.hooks.events().DispatchEventPreRecord(ctx, hookspkg.EventPreRecordPayload{ PayloadBase: hookspkg.PayloadBase{ Event: hookspkg.HookEventPreRecord, Timestamp: hookTimestamp(m.now(), event.Timestamp), @@ -448,8 +450,8 @@ func (m *Manager) runContextCompaction( } var err error - if m != nil && m.hooks != nil { - prePayload, err = m.hooks.DispatchContextPreCompact(ctx, prePayload) + if m != nil { + prePayload, err = m.hooks.compaction().DispatchContextPreCompact(ctx, prePayload) if err != nil { return hookspkg.ContextPostCompactPayload{}, fmt.Errorf("session: dispatch context.pre_compact: %w", err) } @@ -480,8 +482,8 @@ func (m *Manager) runContextCompaction( postPayload.ContextBlocks = cloneSessionContextBlocks(prePayload.ContextBlocks) } - if m != nil && m.hooks != nil { - if _, err := m.hooks.DispatchContextPostCompact(ctx, postPayload); err != nil { + if m != nil { + if _, err := m.hooks.compaction().DispatchContextPostCompact(ctx, postPayload); err != nil { m.warnHookDispatch(ctx, session, hookspkg.HookContextPostCompact, err) } } @@ -490,12 +492,12 @@ func (m *Manager) runContextCompaction( } func (m *Manager) dispatchEventPostRecord(ctx context.Context, session *Session, event acp.AgentEvent, content string) { - if m == nil || m.hooks == nil { + if m == nil { return } ctx = hookDispatchContext(ctx, session) - _, err := m.hooks.DispatchEventPostRecord(ctx, hookspkg.EventPostRecordPayload{ + _, err := m.hooks.events().DispatchEventPostRecord(ctx, hookspkg.EventPostRecordPayload{ PayloadBase: hookspkg.PayloadBase{ Event: hookspkg.HookEventPostRecord, Timestamp: hookTimestamp(m.now(), event.Timestamp), @@ -511,13 +513,13 @@ func (m *Manager) dispatchEventPostRecord(ctx context.Context, session *Session, } func (m *Manager) dispatchAgentPreStart(ctx context.Context, session *Session, resolved aghconfig.ResolvedAgent, opts acp.StartOpts) (acp.StartOpts, error) { - if m == nil || m.hooks == nil { + if m == nil { return opts, nil } ctx = hookDispatchContext(ctx, session) command, args := splitCommand(opts.Command) - payload, err := m.hooks.DispatchAgentPreStart(ctx, hookspkg.AgentPreStartPayload{ + payload, err := m.hooks.agent().DispatchAgentPreStart(ctx, hookspkg.AgentPreStartPayload{ PayloadBase: hookspkg.PayloadBase{ Event: hookspkg.HookAgentPreStart, Timestamp: m.now(), @@ -552,7 +554,7 @@ func (m *Manager) dispatchAgentStopped(ctx context.Context, session *Session, pr } func (m *Manager) dispatchAgentObservation(ctx context.Context, session *Session, proc *AgentProcess, resolved aghconfig.ResolvedAgent, waitErr error, event hookspkg.HookEvent) { - if m == nil || m.hooks == nil { + if m == nil { return } ctx = hookDispatchContext(ctx, session) @@ -576,13 +578,14 @@ func (m *Manager) dispatchAgentObservation(ctx context.Context, session *Session } var err error + agentHooks := m.hooks.agent() switch event { case hookspkg.HookAgentSpawned: - _, err = m.hooks.DispatchAgentSpawned(ctx, hookspkg.AgentSpawnedPayload(payload)) + _, err = agentHooks.DispatchAgentSpawned(ctx, hookspkg.AgentSpawnedPayload(payload)) case hookspkg.HookAgentCrashed: - _, err = m.hooks.DispatchAgentCrashed(ctx, hookspkg.AgentCrashedPayload(payload)) + _, err = agentHooks.DispatchAgentCrashed(ctx, hookspkg.AgentCrashedPayload(payload)) case hookspkg.HookAgentStopped: - _, err = m.hooks.DispatchAgentStopped(ctx, hookspkg.AgentStoppedPayload(payload)) + _, err = agentHooks.DispatchAgentStopped(ctx, hookspkg.AgentStoppedPayload(payload)) default: return } @@ -611,13 +614,14 @@ func hookSessionContext(session *Session) hookspkg.SessionContext { return hookspkg.SessionContext{} } + ref := workref.NewRoot(info.WorkspaceID, info.Workspace) return hookspkg.SessionContext{ SessionID: strings.TrimSpace(info.ID), SessionName: strings.TrimSpace(info.Name), SessionType: string(info.Type), AgentName: strings.TrimSpace(info.AgentName), - WorkspaceID: strings.TrimSpace(info.WorkspaceID), - Workspace: strings.TrimSpace(info.Workspace), + WorkspaceID: ref.WorkspaceID, + Workspace: ref.Workspace, ACPSessionID: strings.TrimSpace(info.ACPSessionID), State: string(info.State), CreatedAt: info.CreatedAt, diff --git a/internal/session/manager_hooks_test.go b/internal/session/manager_hooks_test.go index df6f37567..7a0e92440 100644 --- a/internal/session/manager_hooks_test.go +++ b/internal/session/manager_hooks_test.go @@ -38,7 +38,7 @@ func TestCreateFailsWhenSessionPreCreateDenied(t *testing.T) { }, ) - h := newHarness(t, WithHookDispatcher(hooks)) + h := newHarness(t, WithHookSet(fullHookSet(hooks))) _, err := h.manager.Create(testutil.Context(t), CreateOpts{ AgentName: "coder", Workspace: h.workspaceID, @@ -77,7 +77,7 @@ func TestCreateUsesPatchedSessionPreCreatePayload(t *testing.T) { }, ) - h := newHarness(t, WithHookDispatcher(hooks)) + h := newHarness(t, WithHookSet(fullHookSet(hooks))) session, err := h.manager.Create(testutil.Context(t), CreateOpts{ AgentName: "coder", Name: "original", @@ -121,7 +121,7 @@ func TestPostCreateHookFiresAfterSessionActive(t *testing.T) { }, ) - h := newHarness(t, WithHookDispatcher(hooks)) + h := newHarness(t, WithHookSet(fullHookSet(hooks))) session := createSession(t, h) t.Cleanup(func() { _ = h.manager.Stop(testutil.Context(t), session.ID) @@ -165,7 +165,7 @@ func TestResumeUsesPatchedPreResumePayloadAndFiresPostResume(t *testing.T) { }, } - h.manager = newManagerWithHarness(t, h, WithHookDispatcher(dispatcher)) + h.manager = newManagerWithHarness(t, h, WithHookSet(fullHookSet(dispatcher))) resumed, err := h.manager.Resume(testutil.Context(t), session.ID) if err != nil { t.Fatalf("Resume() error = %v", err) @@ -209,7 +209,7 @@ func TestPromptUsesPatchedInputMessage(t *testing.T) { }, ) - h := newHarness(t, WithHookDispatcher(hooks)) + h := newHarness(t, WithHookSet(fullHookSet(hooks))) session := createSession(t, h) t.Cleanup(func() { _ = h.manager.Stop(testutil.Context(t), session.ID) @@ -252,7 +252,7 @@ func TestCreateUsesPatchedPrompt(t *testing.T) { }, ) - h := newHarness(t, WithHookDispatcher(hooks)) + h := newHarness(t, WithHookSet(fullHookSet(hooks))) session := createSession(t, h) t.Cleanup(func() { _ = h.manager.Stop(testutil.Context(t), session.ID) @@ -282,7 +282,7 @@ func TestAgentCrashedHookFiresOnProcessCrash(t *testing.T) { }, ) - h := newHarness(t, WithHookDispatcher(hooks)) + h := newHarness(t, WithHookSet(fullHookSet(hooks))) session := createSession(t, h) h.driver.lastProcess().crash(errors.New("boom"), "stderr trace") @@ -318,7 +318,7 @@ func TestRecordEventDispatchesAroundPersistence(t *testing.T) { return payload, nil }, } - h := newHarness(t, WithHookDispatcher(dispatcher)) + h := newHarness(t, WithHookSet(fullHookSet(dispatcher))) recorder := &orderedRecorder{ onRecord: func(event store.SessionEvent) { @@ -394,7 +394,7 @@ func TestPromptDispatchesTurnAndMessageHooksAtACPBoundaries(t *testing.T) { }, } - h := newHarness(t, WithHookDispatcher(dispatcher)) + h := newHarness(t, WithHookSet(fullHookSet(dispatcher))) session := createSession(t, h) t.Cleanup(func() { _ = h.manager.Stop(testutil.Context(t), session.ID) @@ -470,7 +470,7 @@ func TestMessageStartPatchUpdatesFirstAssistantChunk(t *testing.T) { }, ) - h := newHarness(t, WithHookDispatcher(hooks)) + h := newHarness(t, WithHookSet(fullHookSet(hooks))) session := createSession(t, h) t.Cleanup(func() { _ = h.manager.Stop(testutil.Context(t), session.ID) @@ -535,7 +535,7 @@ func TestMessageDeltaAsyncHooksDoNotBlockPromptStreaming(t *testing.T) { } t.Cleanup(hooks.Close) - h := newHarness(t, WithHookDispatcher(hooks)) + h := newHarness(t, WithHookSet(fullHookSet(hooks))) session := createSession(t, h) t.Cleanup(func() { _ = h.manager.Stop(testutil.Context(t), session.ID) @@ -621,7 +621,7 @@ func TestContextCompactionDispatchesHooksAndUsesPatchedParams(t *testing.T) { }, } - h := newHarness(t, WithHookDispatcher(dispatcher)) + h := newHarness(t, WithHookSet(fullHookSet(dispatcher))) result, err := h.manager.runContextCompaction( testutil.Context(t), session, @@ -683,6 +683,24 @@ func newNativeHookDispatcher(t *testing.T, decls []hookspkg.HookDecl, executors return hooks } +func fullHookSet(runtime interface { + SessionLifecycleHooks + PromptHooks + EventHooks + AgentHooks + ConversationHooks + CompactionHooks +}) HookSet { + return HookSet{ + Session: runtime, + Prompt: runtime, + Events: runtime, + Agent: runtime, + Conversation: runtime, + Compaction: runtime, + } +} + type spyHookDispatcher struct { dispatchSessionPreCreateFn func(context.Context, hookspkg.SessionPreCreatePayload) (hookspkg.SessionPreCreatePayload, error) dispatchSessionPostCreateFn func(context.Context, hookspkg.SessionPostCreatePayload) (hookspkg.SessionPostCreatePayload, error) diff --git a/internal/session/manager_integration_test.go b/internal/session/manager_integration_test.go index 4a70b309c..684586cdb 100644 --- a/internal/session/manager_integration_test.go +++ b/internal/session/manager_integration_test.go @@ -200,7 +200,7 @@ func TestManagerIntegrationFullLifecycleHooksFireInOrder(t *testing.T) { }, } - h := newHarness(t, WithHookDispatcher(dispatcher)) + h := newHarness(t, WithHookSet(fullHookSet(dispatcher))) session := createSession(t, h) eventsCh, err := h.manager.Prompt(testutil.Context(t), session.ID, "hello") @@ -279,7 +279,7 @@ func TestManagerIntegrationContextCompactionUsesPatchedParams(t *testing.T) { }, ) - h := newHarness(t, WithHookDispatcher(hooks)) + h := newHarness(t, WithHookSet(fullHookSet(hooks))) session := createSession(t, h) t.Cleanup(func() { _ = h.manager.Stop(testutil.Context(t), session.ID) @@ -336,7 +336,7 @@ func TestManagerIntegrationPreStopRequiredHookErrorPreventsCleanStop(t *testing. }, ) - h := newHarness(t, WithHookDispatcher(hooks)) + h := newHarness(t, WithHookSet(fullHookSet(hooks))) session := createSession(t, h) err := h.manager.Stop(testutil.Context(t), session.ID) @@ -350,7 +350,7 @@ func TestManagerIntegrationPreStopRequiredHookErrorPreventsCleanStop(t *testing. t.Fatalf("Get(%q) = missing, want active session after failed stop", session.ID) } - h.manager.hooks = nil + h.manager.hooks = HookSet{} if cleanupErr := h.manager.Stop(testutil.Context(t), session.ID); cleanupErr != nil { t.Fatalf("cleanup Stop() error = %v", cleanupErr) } diff --git a/internal/session/manager_lifecycle.go b/internal/session/manager_lifecycle.go index c8a569d34..2aedce4ce 100644 --- a/internal/session/manager_lifecycle.go +++ b/internal/session/manager_lifecycle.go @@ -10,7 +10,6 @@ import ( "github.com/pedronauck/agh/internal/acp" aghconfig "github.com/pedronauck/agh/internal/config" - hookspkg "github.com/pedronauck/agh/internal/hooks" "github.com/pedronauck/agh/internal/store" workspacepkg "github.com/pedronauck/agh/internal/workspace" ) @@ -21,128 +20,12 @@ func (m *Manager) Create(ctx context.Context, opts CreateOpts) (_ *Session, err return nil, errors.New("session: create context is required") } - opts, err = m.dispatchSessionPreCreate(ctx, opts) + spec, err := m.prepareCreateStart(ctx, opts) if err != nil { return nil, err } - resolvedWorkspace, err := m.resolveCreateWorkspace(ctx, opts) - if err != nil { - return nil, err - } - - agentName, err := aghconfig.ResolveAgentName(opts.AgentName, resolvedWorkspace.Config) - if err != nil { - return nil, fmt.Errorf("session: resolve agent name: %w", err) - } - - agentDef, err := resolveWorkspaceAgent(agentName, resolvedWorkspace) - if err != nil { - return nil, fmt.Errorf("session: resolve workspace agent %q: %w", agentName, err) - } - - sessionID := strings.TrimSpace(m.newSessionID()) - if sessionID == "" { - return nil, errors.New("session: session id generator returned empty id") - } - - startupPrompt, err := m.startupPrompt(ctx, hookspkg.SessionContext{ - SessionID: sessionID, - SessionName: strings.TrimSpace(opts.Name), - SessionType: string(normalizeSessionType(opts.Type)), - AgentName: strings.TrimSpace(agentName), - WorkspaceID: strings.TrimSpace(resolvedWorkspace.ID), - Workspace: strings.TrimSpace(resolvedWorkspace.RootDir), - State: string(StateStarting), - }, agentDef, resolvedWorkspace) - if err != nil { - return nil, err - } - agentDef.Prompt = startupPrompt - - resolved, err := resolvedWorkspace.Config.ResolveAgent(agentDef) - if err != nil { - return nil, fmt.Errorf("session: resolve agent %q: %w", agentName, err) - } - - startMCPServers, err := m.resolveStartMCPServers(ctx, resolvedWorkspace, resolved.MCPServers) - if err != nil { - return nil, err - } - - if err := m.reserve(sessionID, m.effectiveMaxSessions(resolvedWorkspace.Config)); err != nil { - return nil, err - } - defer func() { - if err != nil { - m.releaseReservation(sessionID) - } - }() - - sessionDir := filepath.Join(m.homePaths.SessionsDir, sessionID) - if err := os.MkdirAll(sessionDir, 0o755); err != nil { - return nil, fmt.Errorf("session: create session directory %q: %w", sessionDir, err) - } - - dbPath := store.SessionDBFile(sessionDir) - recorder, err := m.openStore(ctx, sessionID, dbPath) - if err != nil { - return nil, fmt.Errorf("session: open session store %q: %w", dbPath, err) - } - - var proc *AgentProcess - defer func() { - if err == nil { - return - } - err = errors.Join(err, m.cleanupFailedStart(sessionDir, recorder, proc)) - }() - - now := m.now() - session := &Session{ - ID: sessionID, - Name: strings.TrimSpace(opts.Name), - AgentName: resolved.Name, - WorkspaceID: resolvedWorkspace.ID, - Workspace: resolvedWorkspace.RootDir, - Type: normalizeSessionType(opts.Type), - State: StateStarting, - CreatedAt: now, - UpdatedAt: now, - sessionDir: sessionDir, - metaPath: store.SessionMetaFile(sessionDir), - dbPath: dbPath, - recorder: recorder, - } - - startOpts := acp.StartOpts{ - AgentName: resolved.Name, - Command: resolved.Command, - Cwd: resolvedWorkspace.RootDir, - AdditionalDirs: append([]string(nil), resolvedWorkspace.AdditionalDirs...), - MCPServers: startMCPServers, - Permissions: m.startPermissions(session.Type, resolved.Permissions), - SystemPrompt: resolved.Prompt, - } - startOpts, err = m.dispatchAgentPreStart(ctx, session, resolved, startOpts) - if err != nil { - return nil, err - } - - if err := m.writeMeta(session); err != nil { - return nil, err - } - - proc, err = m.driver.Start(ctx, startOpts) - if err != nil { - return nil, fmt.Errorf("session: start agent for %q: %w", sessionID, err) - } - - if err := m.activateAndWatch(ctx, session, proc, resolved, hookspkg.HookSessionPostCreate); err != nil { - return nil, err - } - - return session, nil + return m.startSession(ctx, spec) } // Stop stops an active session and persists the stopped state to disk. @@ -188,122 +71,12 @@ func (m *Manager) Resume(ctx context.Context, id string) (_ *Session, err error) ) } - meta, err = m.dispatchSessionPreResume(ctx, meta) - if err != nil { - return nil, err - } - - resolvedWorkspace, err := m.resolveResumeWorkspace(ctx, meta) - if err != nil { - return nil, err - } - - agentDef, err := resolveWorkspaceAgent(meta.AgentName, resolvedWorkspace) - if err != nil { - return nil, fmt.Errorf("session: resolve workspace agent %q: %w", meta.AgentName, err) - } - startupPrompt, err := m.startupPrompt(ctx, hookspkg.SessionContext{ - SessionID: strings.TrimSpace(meta.ID), - SessionName: strings.TrimSpace(meta.Name), - SessionType: string(normalizeSessionType(SessionType(meta.SessionType))), - AgentName: strings.TrimSpace(meta.AgentName), - WorkspaceID: strings.TrimSpace(resolvedWorkspace.ID), - Workspace: strings.TrimSpace(resolvedWorkspace.RootDir), - ACPSessionID: strings.TrimSpace(derefString(meta.ACPSessionID)), - State: string(StateStarting), - CreatedAt: meta.CreatedAt, - UpdatedAt: m.now(), - }, agentDef, resolvedWorkspace) - if err != nil { - return nil, err - } - agentDef.Prompt = startupPrompt - - resolved, err := resolvedWorkspace.Config.ResolveAgent(agentDef) - if err != nil { - return nil, fmt.Errorf("session: resolve agent %q: %w", meta.AgentName, err) - } - - startMCPServers, err := m.resolveStartMCPServers(ctx, resolvedWorkspace, resolved.MCPServers) + spec, err := m.prepareResumeStart(ctx, meta) if err != nil { return nil, err } - if err := m.reserve(meta.ID, m.effectiveMaxSessions(resolvedWorkspace.Config)); err != nil { - return nil, err - } - defer func() { - if err != nil { - m.releaseReservation(meta.ID) - } - }() - - dbPath := store.SessionDBFile(sessionDir) - recorder, err := m.openStore(ctx, meta.ID, dbPath) - if err != nil { - return nil, fmt.Errorf("session: open session store %q: %w", dbPath, err) - } - - var proc *AgentProcess - defer func() { - if err == nil { - return - } - err = errors.Join(err, m.cleanupFailedStart("", recorder, proc)) - }() - - createdAt := meta.CreatedAt - if createdAt.IsZero() { - createdAt = m.now() - } - session := &Session{ - ID: meta.ID, - Name: meta.Name, - AgentName: meta.AgentName, - WorkspaceID: strings.TrimSpace(meta.WorkspaceID), - Workspace: resolvedWorkspace.RootDir, - Type: normalizeSessionType(SessionType(meta.SessionType)), - State: StateStarting, - stopReason: sessionMetaStopReason(meta), - stopDetail: strings.TrimSpace(meta.StopDetail), - ACPSessionID: derefString(meta.ACPSessionID), - CreatedAt: createdAt, - UpdatedAt: m.now(), - sessionDir: sessionDir, - metaPath: metaPath, - dbPath: dbPath, - recorder: recorder, - } - - startOpts := acp.StartOpts{ - AgentName: resolved.Name, - Command: resolved.Command, - Cwd: resolvedWorkspace.RootDir, - AdditionalDirs: append([]string(nil), resolvedWorkspace.AdditionalDirs...), - MCPServers: startMCPServers, - Permissions: m.startPermissions(session.Type, resolved.Permissions), - SystemPrompt: resolved.Prompt, - ResumeSessionID: derefString(meta.ACPSessionID), - } - startOpts, err = m.dispatchAgentPreStart(ctx, session, resolved, startOpts) - if err != nil { - return nil, err - } - - if err := m.writeMeta(session); err != nil { - return nil, err - } - - proc, err = m.driver.Start(ctx, startOpts) - if err != nil { - return nil, fmt.Errorf("session: resume agent for %q: %w", meta.ID, err) - } - - if err := m.activateAndWatch(ctx, session, proc, resolved, hookspkg.HookSessionPostResume); err != nil { - return nil, err - } - - return session, nil + return m.startSession(ctx, spec) } func (m *Manager) watchProcess(ctx context.Context, session *Session) { diff --git a/internal/session/manager_start.go b/internal/session/manager_start.go new file mode 100644 index 000000000..e0225a182 --- /dev/null +++ b/internal/session/manager_start.go @@ -0,0 +1,227 @@ +package session + +import ( + "context" + "errors" + "fmt" + "os" + "path/filepath" + "strings" + "time" + + "github.com/pedronauck/agh/internal/acp" + aghconfig "github.com/pedronauck/agh/internal/config" + hookspkg "github.com/pedronauck/agh/internal/hooks" + "github.com/pedronauck/agh/internal/store" + "github.com/pedronauck/agh/internal/workref" + workspacepkg "github.com/pedronauck/agh/internal/workspace" +) + +type sessionStartSpec struct { + sessionID string + sessionName string + agentName string + workspace workspacepkg.ResolvedWorkspace + sessionType SessionType + postEvent hookspkg.HookEvent + startAction string + cleanupSessionDir bool + includePromptUpdatedAt bool + createdAt time.Time + acpSessionID string + stopReason store.StopReason + stopDetail string +} + +func (m *Manager) prepareCreateStart(ctx context.Context, opts CreateOpts) (sessionStartSpec, error) { + opts, err := m.dispatchSessionPreCreate(ctx, opts) + if err != nil { + return sessionStartSpec{}, err + } + + resolvedWorkspace, err := m.resolveCreateWorkspace(ctx, opts) + if err != nil { + return sessionStartSpec{}, err + } + + agentName, err := aghconfig.ResolveAgentName(opts.AgentName, resolvedWorkspace.Config) + if err != nil { + return sessionStartSpec{}, fmt.Errorf("session: resolve agent name: %w", err) + } + + sessionID := strings.TrimSpace(m.newSessionID()) + if sessionID == "" { + return sessionStartSpec{}, errors.New("session: session id generator returned empty id") + } + + return sessionStartSpec{ + sessionID: sessionID, + sessionName: strings.TrimSpace(opts.Name), + agentName: strings.TrimSpace(agentName), + workspace: resolvedWorkspace, + sessionType: normalizeSessionType(opts.Type), + postEvent: hookspkg.HookSessionPostCreate, + startAction: "start", + cleanupSessionDir: true, + }, nil +} + +func (m *Manager) prepareResumeStart(ctx context.Context, meta store.SessionMeta) (sessionStartSpec, error) { + meta, err := m.dispatchSessionPreResume(ctx, meta) + if err != nil { + return sessionStartSpec{}, err + } + + resolvedWorkspace, err := m.resolveResumeWorkspace(ctx, meta) + if err != nil { + return sessionStartSpec{}, err + } + + return sessionStartSpec{ + sessionID: meta.ID, + sessionName: meta.Name, + agentName: meta.AgentName, + workspace: resolvedWorkspace, + sessionType: normalizeSessionType(SessionType(meta.SessionType)), + postEvent: hookspkg.HookSessionPostResume, + startAction: "resume", + includePromptUpdatedAt: true, + createdAt: meta.CreatedAt, + acpSessionID: derefString(meta.ACPSessionID), + stopReason: sessionMetaStopReason(meta), + stopDetail: strings.TrimSpace(meta.StopDetail), + }, nil +} + +func (m *Manager) startSession(ctx context.Context, spec sessionStartSpec) (_ *Session, err error) { + agentDef, err := resolveWorkspaceAgent(spec.agentName, spec.workspace) + if err != nil { + return nil, fmt.Errorf("session: resolve workspace agent %q: %w", spec.agentName, err) + } + + startupPrompt, err := m.startupPrompt(ctx, spec.startupSessionContext(m.now()), agentDef, spec.workspace) + if err != nil { + return nil, err + } + agentDef.Prompt = startupPrompt + + resolved, err := spec.workspace.Config.ResolveAgent(agentDef) + if err != nil { + return nil, fmt.Errorf("session: resolve agent %q: %w", spec.agentName, err) + } + + startMCPServers, err := m.resolveStartMCPServers(ctx, spec.workspace, resolved.MCPServers) + if err != nil { + return nil, err + } + + if err := m.reserve(spec.sessionID, m.effectiveMaxSessions(spec.workspace.Config)); err != nil { + return nil, err + } + defer func() { + if err != nil { + m.releaseReservation(spec.sessionID) + } + }() + + sessionDir := filepath.Join(m.homePaths.SessionsDir, spec.sessionID) + if spec.cleanupSessionDir { + if err := os.MkdirAll(sessionDir, 0o755); err != nil { + return nil, fmt.Errorf("session: create session directory %q: %w", sessionDir, err) + } + } + + metaPath := store.SessionMetaFile(sessionDir) + dbPath := store.SessionDBFile(sessionDir) + recorder, err := m.openStore(ctx, spec.sessionID, dbPath) + if err != nil { + return nil, fmt.Errorf("session: open session store %q: %w", dbPath, err) + } + + var proc *AgentProcess + defer func() { + if err == nil { + return + } + + cleanupDir := "" + if spec.cleanupSessionDir { + cleanupDir = sessionDir + } + err = errors.Join(err, m.cleanupFailedStart(cleanupDir, recorder, proc)) + }() + + now := m.now() + createdAt := spec.createdAt + if createdAt.IsZero() { + createdAt = now + } + + session := &Session{ + ID: spec.sessionID, + Name: spec.sessionName, + AgentName: resolved.Name, + WorkspaceID: spec.workspace.ID, + Workspace: spec.workspace.RootDir, + Type: normalizeSessionType(spec.sessionType), + State: StateStarting, + stopReason: spec.stopReason, + stopDetail: spec.stopDetail, + ACPSessionID: spec.acpSessionID, + CreatedAt: createdAt, + UpdatedAt: now, + sessionDir: sessionDir, + metaPath: metaPath, + dbPath: dbPath, + recorder: recorder, + } + + startOpts := acp.StartOpts{ + AgentName: resolved.Name, + Command: resolved.Command, + Cwd: spec.workspace.RootDir, + AdditionalDirs: append([]string(nil), spec.workspace.AdditionalDirs...), + MCPServers: startMCPServers, + Permissions: m.startPermissions(session.Type, resolved.Permissions), + SystemPrompt: resolved.Prompt, + ResumeSessionID: spec.acpSessionID, + } + startOpts, err = m.dispatchAgentPreStart(ctx, session, resolved, startOpts) + if err != nil { + return nil, err + } + + if err := m.writeMeta(session); err != nil { + return nil, err + } + + proc, err = m.driver.Start(ctx, startOpts) + if err != nil { + return nil, fmt.Errorf("session: %s agent for %q: %w", spec.startAction, spec.sessionID, err) + } + + if err := m.activateAndWatch(ctx, session, proc, resolved, spec.postEvent); err != nil { + return nil, err + } + + return session, nil +} + +func (s sessionStartSpec) startupSessionContext(updatedAt time.Time) hookspkg.SessionContext { + ref := workref.NewRoot(s.workspace.ID, s.workspace.RootDir) + ctx := hookspkg.SessionContext{ + SessionID: strings.TrimSpace(s.sessionID), + SessionName: strings.TrimSpace(s.sessionName), + SessionType: string(normalizeSessionType(s.sessionType)), + AgentName: strings.TrimSpace(s.agentName), + WorkspaceID: ref.WorkspaceID, + Workspace: ref.Workspace, + ACPSessionID: strings.TrimSpace(s.acpSessionID), + State: string(StateStarting), + CreatedAt: s.createdAt, + } + if s.includePromptUpdatedAt { + ctx.UpdatedAt = updatedAt + } + return ctx +} diff --git a/internal/skills/registry.go b/internal/skills/registry.go index 654a74df8..a8ca453de 100644 --- a/internal/skills/registry.go +++ b/internal/skills/registry.go @@ -4,10 +4,7 @@ import ( "context" "errors" "fmt" - "io/fs" "log/slog" - "path" - "path/filepath" "slices" "strings" "sync" @@ -15,7 +12,6 @@ import ( "time" "github.com/pedronauck/agh/internal/filesnap" - hookspkg "github.com/pedronauck/agh/internal/hooks" workspacepkg "github.com/pedronauck/agh/internal/workspace" ) @@ -40,22 +36,6 @@ type Registry struct { now func() time.Time } -type wsCache struct { - skills map[string]*Skill - snapshots map[string]filesnap.Snapshot - lastAccess time.Time -} - -type workspaceLoad struct { - paths []workspaceSkillPath - snapshots map[string]filesnap.Snapshot -} - -type workspaceSkillPath struct { - filePath string - source SkillSource -} - // WithLogger injects the logger used for registry diagnostics. func WithLogger(logger *slog.Logger) Option { return func(registry *Registry) { @@ -452,24 +432,6 @@ func (r *Registry) verifyMarketplaceSkill(skill *Skill) error { return err } -func recordSidecarSnapshots(paths []string, snapshots map[string]filesnap.Snapshot) error { - for _, skillPath := range paths { - sidecarPath := filepath.Join(filepath.Dir(skillPath), sidecarFileName) - snapshot, err := filesnap.FromPath(sidecarPath) - if err != nil { - if errors.Is(err, fs.ErrNotExist) { - continue - } - - return fmt.Errorf("skills: snapshot provenance sidecar %q: %w", sidecarPath, err) - } - - snapshots[sidecarPath] = snapshot - } - - return nil -} - func (r *Registry) applyDisabled(skill *Skill, disabledSkills []string) { if skill == nil { return @@ -524,17 +486,6 @@ func (r *Registry) globalDisabledSkillsSnapshot() []string { return slices.Clone(r.cfg.DisabledSkills) } -func (r *Registry) workspaceDisabledSkillsSnapshot(cacheKey string, configured []string) []string { - r.mu.RLock() - defer r.mu.RUnlock() - - disabledSkills := mergeDisabledSkills(r.cfg.DisabledSkills, configured) - if cacheKey == "" { - return disabledSkills - } - return mergeDisabledSkills(disabledSkills, r.workspaceDisabled[cacheKey]) -} - func mergeDisabledSkills(base []string, extra []string) []string { merged := slices.Clone(base) for _, name := range extra { @@ -543,24 +494,6 @@ func mergeDisabledSkills(base []string, extra []string) []string { return merged } -func (r *Registry) workspaceSkillTargetLocked(name string, resolved *workspacepkg.ResolvedWorkspace) (string, *Skill) { - if resolved == nil { - return "", nil - } - - cacheKey := workspaceCacheKey(*resolved, nil) - if cacheKey == "" { - return "", nil - } - - cached := r.wsCache[cacheKey] - if cached == nil { - return cacheKey, nil - } - - return cacheKey, cached.skills[name] -} - func (r *Registry) overlaySkill(dst map[string]*Skill, skill *Skill) { if existing, ok := dst[skill.Meta.Name]; ok { r.logger.Warn( @@ -603,58 +536,6 @@ func (r *Registry) logVerificationWarnings(skill *Skill, warnings []Warning) { } } -func (r *Registry) workspaceLoadFromResolved(ctx context.Context, resolved workspacepkg.ResolvedWorkspace) (workspaceLoad, error) { - load := workspaceLoad{ - paths: make([]workspaceSkillPath, 0, len(resolved.Skills)), - snapshots: make(map[string]filesnap.Snapshot, len(resolved.Skills)), - } - - for _, skillPath := range resolved.Skills { - if err := checkRegistryContext(ctx); err != nil { - return workspaceLoad{}, err - } - - source, include, err := skillSourceFromWorkspacePath(skillPath.Source) - if err != nil { - return workspaceLoad{}, err - } - if !include { - continue - } - - skillDir := strings.TrimSpace(skillPath.Dir) - if skillDir == "" { - continue - } - - skillFile := filepath.Join(skillDir, skillFileName) - snapshot, err := filesnap.FromPath(skillFile) - if err != nil { - if errors.Is(err, fs.ErrNotExist) { - continue - } - return workspaceLoad{}, fmt.Errorf("skills: snapshot workspace skill %q: %w", skillFile, err) - } - - load.snapshots[skillFile] = snapshot - load.paths = append(load.paths, workspaceSkillPath{ - filePath: skillFile, - source: source, - }) - } - - return load, nil -} - -func (r *Registry) evictExpiredWorkspaceLocked(now time.Time) { - cutoff := now.Add(-workspaceCacheTTL) - for workspace, entry := range r.wsCache { - if entry.lastAccess.Before(cutoff) { - delete(r.wsCache, workspace) - } - } -} - func checkRegistryContext(ctx context.Context) error { if ctx == nil { return errors.New("skills: context is required") @@ -662,240 +543,6 @@ func checkRegistryContext(ctx context.Context) error { return ctx.Err() } -func hasCriticalWarning(warnings []Warning) bool { - for _, warning := range warnings { - if warning.Severity == SeverityCritical { - return true - } - } - return false -} - -func mergedSkillList(globalSkills, workspaceSkills map[string]*Skill) []*Skill { - if len(globalSkills) == 0 && len(workspaceSkills) == 0 { - return nil - } - - merged := make(map[string]*Skill, len(globalSkills)+len(workspaceSkills)) - for name, skill := range globalSkills { - merged[name] = skill - } - for name, skill := range workspaceSkills { - merged[name] = skill - } - - names := make([]string, 0, len(merged)) - for name := range merged { - names = append(names, name) - } - slices.Sort(names) - - skills := make([]*Skill, 0, len(names)) - for _, name := range names { - skills = append(skills, cloneSkill(merged[name])) - } - - return skills -} - -func cloneSkill(skill *Skill) *Skill { - if skill == nil { - return nil - } - - clone := *skill - clone.Meta = cloneSkillMeta(skill.Meta) - clone.MCPServers = cloneMCPServerDecls(skill.MCPServers) - if len(skill.Hooks) > 0 { - clone.Hooks = make([]hookspkg.HookDecl, 0, len(skill.Hooks)) - for idx, decl := range skill.Hooks { - cloned := decl - cloned.Args = append([]string(nil), decl.Args...) - cloned.Env = cloneStringMap(decl.Env) - cloned.Metadata = cloneStringMap(decl.Metadata) - if decl.Matcher.ToolReadOnly != nil { - value := *decl.Matcher.ToolReadOnly - cloned.Matcher.ToolReadOnly = &value - } - clone.Hooks = append(clone.Hooks, normalizeSkillHookDecl(skill, cloned, idx, len(skill.Hooks))) - } - } - clone.Provenance = cloneProvenance(skill.Provenance) - - return &clone -} - -func cloneSkillMeta(meta SkillMeta) SkillMeta { - clone := meta - clone.Metadata = cloneMetadataMap(meta.Metadata) - return clone -} - -func cloneMetadataMap(metadata map[string]any) map[string]any { - if metadata == nil { - return nil - } - - clone := make(map[string]any, len(metadata)) - for key, value := range metadata { - clone[key] = cloneMetadataValue(value) - } - - return clone -} - -func cloneMetadataValue(value any) any { - switch typed := value.(type) { - case map[string]any: - return cloneMetadataMap(typed) - case []any: - clone := make([]any, len(typed)) - for i := range typed { - clone[i] = cloneMetadataValue(typed[i]) - } - return clone - default: - return typed - } -} - -func cloneMCPServerDecls(decls []MCPServerDecl) []MCPServerDecl { - if decls == nil { - return nil - } - - clone := make([]MCPServerDecl, len(decls)) - for i, decl := range decls { - clone[i] = MCPServerDecl{ - Name: decl.Name, - Command: decl.Command, - Args: append([]string(nil), decl.Args...), - Env: cloneStringMap(decl.Env), - } - } - - return clone -} - -func cloneStringMap(input map[string]string) map[string]string { - if input == nil { - return nil - } - - clone := make(map[string]string, len(input)) - for key, value := range input { - clone[key] = value - } - - return clone -} - -func cloneProvenance(provenance *Provenance) *Provenance { - if provenance == nil { - return nil - } - - clone := *provenance - return &clone -} - -func (r *Registry) globalSnapshotState() (map[string]filesnap.Snapshot, bool) { - if r == nil { - return make(map[string]filesnap.Snapshot), false - } - - r.mu.RLock() - defer r.mu.RUnlock() - - return filesnap.Clone(r.globalSnapshots), r.globalLoaded -} - -func parseBundledSkill(fsys fs.FS, skillPath string) (*Skill, error) { - skill, _, err := parseBundledSkillDocument(fsys, skillPath) - return skill, err -} - -func readBundledSkillContent(fsys fs.FS, skillPath string) (string, error) { - _, body, err := parseBundledSkillDocument(fsys, skillPath) - if err != nil { - return "", err - } - return body, nil -} - -func parseBundledSkillDocument(fsys fs.FS, skillPath string) (*Skill, string, error) { - content, err := fs.ReadFile(fsys, skillPath) - if err != nil { - return nil, "", fmt.Errorf("skills: read bundled skill %q: %w", skillPath, err) - } - - dir := path.Dir(skillPath) - if dir == "." { - dir = "" - } - - return parseSkillDocument(skillPath, dir, content, SourceBundled) -} - -func scanBundledFS(fsys fs.FS) ([]string, error) { - paths := make([]string, 0, maxScanCandidates) - - walkErr := fs.WalkDir(fsys, ".", func(current string, entry fs.DirEntry, walkErr error) error { - if walkErr != nil { - return walkErr - } - if current == "." { - return nil - } - - depth := fsPathDepth(current, entry.IsDir()) - if entry.IsDir() { - if shouldSkipDir(path.Base(current)) { - return fs.SkipDir - } - if depth > maxScanDepth { - return fs.SkipDir - } - return nil - } - - if path.Base(current) != skillFileName || depth > maxScanDepth { - return nil - } - - if _, err := fs.Stat(fsys, current); err != nil { - return err - } - - paths = append(paths, current) - if len(paths) >= maxScanCandidates { - return errScanLimitReached - } - - return nil - }) - if walkErr != nil && !errors.Is(walkErr, errScanLimitReached) { - return nil, fmt.Errorf("skills: scan bundled skills: %w", walkErr) - } - - slices.Sort(paths) - return paths, nil -} - -func fsPathDepth(current string, isDir bool) int { - trimmed := strings.Trim(current, "/") - if trimmed == "" { - return 0 - } - - parts := strings.Split(trimmed, "/") - if isDir { - return len(parts) - } - - return max(len(parts)-1, 0) -} - // SkillSourceName returns the canonical string label for a skill source. func SkillSourceName(source SkillSource) string { return skillSourceName(source) @@ -933,30 +580,6 @@ func skillSourceFromWorkspacePath(source string) (SkillSource, bool, error) { } } -func workspaceCacheKey(resolved workspacepkg.ResolvedWorkspace, paths []workspaceSkillPath) string { - if id := strings.TrimSpace(resolved.ID); id != "" { - return "id:" + id - } - if root := strings.TrimSpace(resolved.RootDir); root != "" { - return "root:" + root - } - if len(paths) == 0 { - return "" - } - - var builder strings.Builder - for _, path := range paths { - if builder.Len() > 0 { - builder.WriteByte('|') - } - builder.WriteString(skillSourceName(path.source)) - builder.WriteByte(':') - builder.WriteString(path.filePath) - } - - return builder.String() -} - func warningSeverityName(severity WarningSeverity) string { switch severity { case SeverityInfo: diff --git a/internal/skills/registry_snapshot.go b/internal/skills/registry_snapshot.go new file mode 100644 index 000000000..bb999b5f2 --- /dev/null +++ b/internal/skills/registry_snapshot.go @@ -0,0 +1,266 @@ +package skills + +import ( + "errors" + "fmt" + "io/fs" + "path" + "path/filepath" + "slices" + "strings" + + "github.com/pedronauck/agh/internal/filesnap" + hookspkg "github.com/pedronauck/agh/internal/hooks" +) + +func recordSidecarSnapshots(paths []string, snapshots map[string]filesnap.Snapshot) error { + for _, skillPath := range paths { + sidecarPath := filepath.Join(filepath.Dir(skillPath), sidecarFileName) + snapshot, err := filesnap.FromPath(sidecarPath) + if err != nil { + if errors.Is(err, fs.ErrNotExist) { + continue + } + + return fmt.Errorf("skills: snapshot provenance sidecar %q: %w", sidecarPath, err) + } + + snapshots[sidecarPath] = snapshot + } + + return nil +} + +func hasCriticalWarning(warnings []Warning) bool { + for _, warning := range warnings { + if warning.Severity == SeverityCritical { + return true + } + } + return false +} + +func mergedSkillList(globalSkills, workspaceSkills map[string]*Skill) []*Skill { + if len(globalSkills) == 0 && len(workspaceSkills) == 0 { + return nil + } + + merged := make(map[string]*Skill, len(globalSkills)+len(workspaceSkills)) + for name, skill := range globalSkills { + merged[name] = skill + } + for name, skill := range workspaceSkills { + merged[name] = skill + } + + names := make([]string, 0, len(merged)) + for name := range merged { + names = append(names, name) + } + slices.Sort(names) + + skills := make([]*Skill, 0, len(names)) + for _, name := range names { + skills = append(skills, cloneSkill(merged[name])) + } + + return skills +} + +func cloneSkill(skill *Skill) *Skill { + if skill == nil { + return nil + } + + clone := *skill + clone.Meta = cloneSkillMeta(skill.Meta) + clone.MCPServers = cloneMCPServerDecls(skill.MCPServers) + if len(skill.Hooks) > 0 { + clone.Hooks = make([]hookspkg.HookDecl, 0, len(skill.Hooks)) + for idx, decl := range skill.Hooks { + cloned := decl + cloned.Args = append([]string(nil), decl.Args...) + cloned.Env = cloneStringMap(decl.Env) + cloned.Metadata = cloneStringMap(decl.Metadata) + if decl.Matcher.ToolReadOnly != nil { + value := *decl.Matcher.ToolReadOnly + cloned.Matcher.ToolReadOnly = &value + } + clone.Hooks = append(clone.Hooks, normalizeSkillHookDecl(skill, cloned, idx, len(skill.Hooks))) + } + } + clone.Provenance = cloneProvenance(skill.Provenance) + + return &clone +} + +func cloneSkillMeta(meta SkillMeta) SkillMeta { + clone := meta + clone.Metadata = cloneMetadataMap(meta.Metadata) + return clone +} + +func cloneMetadataMap(metadata map[string]any) map[string]any { + if metadata == nil { + return nil + } + + clone := make(map[string]any, len(metadata)) + for key, value := range metadata { + clone[key] = cloneMetadataValue(value) + } + + return clone +} + +func cloneMetadataValue(value any) any { + switch typed := value.(type) { + case map[string]any: + return cloneMetadataMap(typed) + case []any: + clone := make([]any, len(typed)) + for i := range typed { + clone[i] = cloneMetadataValue(typed[i]) + } + return clone + default: + return typed + } +} + +func cloneMCPServerDecls(decls []MCPServerDecl) []MCPServerDecl { + if decls == nil { + return nil + } + + clone := make([]MCPServerDecl, len(decls)) + for i, decl := range decls { + clone[i] = MCPServerDecl{ + Name: decl.Name, + Command: decl.Command, + Args: append([]string(nil), decl.Args...), + Env: cloneStringMap(decl.Env), + } + } + + return clone +} + +func cloneStringMap(input map[string]string) map[string]string { + if input == nil { + return nil + } + + clone := make(map[string]string, len(input)) + for key, value := range input { + clone[key] = value + } + + return clone +} + +func cloneProvenance(provenance *Provenance) *Provenance { + if provenance == nil { + return nil + } + + clone := *provenance + return &clone +} + +func (r *Registry) globalSnapshotState() (map[string]filesnap.Snapshot, bool) { + if r == nil { + return make(map[string]filesnap.Snapshot), false + } + + r.mu.RLock() + defer r.mu.RUnlock() + + return filesnap.Clone(r.globalSnapshots), r.globalLoaded +} + +func parseBundledSkill(fsys fs.FS, skillPath string) (*Skill, error) { + skill, _, err := parseBundledSkillDocument(fsys, skillPath) + return skill, err +} + +func readBundledSkillContent(fsys fs.FS, skillPath string) (string, error) { + _, body, err := parseBundledSkillDocument(fsys, skillPath) + if err != nil { + return "", err + } + return body, nil +} + +func parseBundledSkillDocument(fsys fs.FS, skillPath string) (*Skill, string, error) { + content, err := fs.ReadFile(fsys, skillPath) + if err != nil { + return nil, "", fmt.Errorf("skills: read bundled skill %q: %w", skillPath, err) + } + + dir := path.Dir(skillPath) + if dir == "." { + dir = "" + } + + return parseSkillDocument(skillPath, dir, content, SourceBundled) +} + +func scanBundledFS(fsys fs.FS) ([]string, error) { + paths := make([]string, 0, maxScanCandidates) + + walkErr := fs.WalkDir(fsys, ".", func(current string, entry fs.DirEntry, walkErr error) error { + if walkErr != nil { + return walkErr + } + if current == "." { + return nil + } + + depth := fsPathDepth(current, entry.IsDir()) + if entry.IsDir() { + if shouldSkipDir(path.Base(current)) { + return fs.SkipDir + } + if depth > maxScanDepth { + return fs.SkipDir + } + return nil + } + + if path.Base(current) != skillFileName || depth > maxScanDepth { + return nil + } + + if _, err := fs.Stat(fsys, current); err != nil { + return err + } + + paths = append(paths, current) + if len(paths) >= maxScanCandidates { + return errScanLimitReached + } + + return nil + }) + if walkErr != nil && !errors.Is(walkErr, errScanLimitReached) { + return nil, fmt.Errorf("skills: scan bundled skills: %w", walkErr) + } + + slices.Sort(paths) + return paths, nil +} + +func fsPathDepth(current string, isDir bool) int { + trimmed := strings.Trim(current, "/") + if trimmed == "" { + return 0 + } + + parts := strings.Split(trimmed, "/") + if isDir { + return len(parts) + } + + return max(len(parts)-1, 0) +} diff --git a/internal/skills/registry_test.go b/internal/skills/registry_test.go index 34fc7893a..dd917771d 100644 --- a/internal/skills/registry_test.go +++ b/internal/skills/registry_test.go @@ -1286,6 +1286,52 @@ func TestRegistrySetEnabled(t *testing.T) { }) } +func TestRegistrySetEnabledUsesSkillOnlyWorkspaceCacheKey(t *testing.T) { + t.Parallel() + + workspaceDir := t.TempDir() + writeSkillFile(t, filepath.Join(workspaceDir, ".agh", "skills"), filepath.Join("workspace-skill", skillFileName), skillWithBody("workspace-skill", "Workspace skill", "body")) + + registry := newTestRegistry(t, RegistryConfig{}) + resolved := resolvedWorkspaceForTest("", "", + resolvedSkillPath(filepath.Join(workspaceDir, ".agh", "skills", "workspace-skill"), "workspace"), + ) + + if _, err := registry.ForWorkspace(context.Background(), resolved); err != nil { + t.Fatalf("ForWorkspace(skill-only) error = %v", err) + } + if entry := cacheEntryForWorkspace(t, registry, resolved); entry == nil { + t.Fatal("cacheEntryForWorkspace(skill-only) = nil, want cached workspace entry") + } + + if err := registry.SetEnabled("workspace-skill", &resolved, false); err != nil { + t.Fatalf("SetEnabled(skill-only workspace) error = %v", err) + } + + entry := cacheEntryForWorkspace(t, registry, resolved) + if entry == nil || entry.skills["workspace-skill"] == nil { + t.Fatalf("workspace cache entry = %#v, want workspace-skill override", entry) + } + if entry.skills["workspace-skill"].Enabled { + t.Fatal("workspace-skill enabled = true, want false after SetEnabled") + } +} + +func TestWorkspaceLoadFromResolvedWrapsWorkspaceSourceErrors(t *testing.T) { + t.Parallel() + + registry := newTestRegistry(t, RegistryConfig{}) + _, err := registry.workspaceLoadFromResolved(context.Background(), resolvedWorkspaceForTest("ws-invalid-source", "", + resolvedSkillPath(t.TempDir(), "unknown-source"), + )) + if err == nil { + t.Fatal("workspaceLoadFromResolved(invalid source) error = nil, want failure") + } + if !strings.Contains(err.Error(), `skills: resolve workspace skill source "unknown-source"`) { + t.Fatalf("workspaceLoadFromResolved(invalid source) error = %v, want wrapped source context", err) + } +} + func newTestRegistry(t *testing.T, cfg RegistryConfig, opts ...Option) *Registry { t.Helper() @@ -1362,7 +1408,11 @@ func cacheEntryForWorkspace(t *testing.T, registry *Registry, workspace workspac registry.mu.RLock() defer registry.mu.RUnlock() - return registry.wsCache[workspaceCacheKey(workspace, nil)] + paths, ok := workspaceCacheKeyPaths(workspace) + if !ok { + return nil + } + return registry.wsCache[workspaceCacheKey(workspace, paths)] } func resolvedWorkspaceForTest(id string, root string, skills ...workspacepkg.SkillPath) workspacepkg.ResolvedWorkspace { diff --git a/internal/skills/registry_workspace_cache.go b/internal/skills/registry_workspace_cache.go new file mode 100644 index 000000000..5d8f36408 --- /dev/null +++ b/internal/skills/registry_workspace_cache.go @@ -0,0 +1,165 @@ +package skills + +import ( + "context" + "errors" + "fmt" + "io/fs" + "path/filepath" + "strings" + "time" + + "github.com/pedronauck/agh/internal/filesnap" + workspacepkg "github.com/pedronauck/agh/internal/workspace" +) + +type wsCache struct { + skills map[string]*Skill + snapshots map[string]filesnap.Snapshot + lastAccess time.Time +} + +type workspaceLoad struct { + paths []workspaceSkillPath + snapshots map[string]filesnap.Snapshot +} + +type workspaceSkillPath struct { + filePath string + source SkillSource +} + +func (r *Registry) workspaceDisabledSkillsSnapshot(cacheKey string, configured []string) []string { + r.mu.RLock() + defer r.mu.RUnlock() + + disabledSkills := mergeDisabledSkills(r.cfg.DisabledSkills, configured) + if cacheKey == "" { + return disabledSkills + } + return mergeDisabledSkills(disabledSkills, r.workspaceDisabled[cacheKey]) +} + +func (r *Registry) workspaceSkillTargetLocked(name string, resolved *workspacepkg.ResolvedWorkspace) (string, *Skill) { + if resolved == nil { + return "", nil + } + + paths, ok := workspaceCacheKeyPaths(*resolved) + if !ok { + return "", nil + } + + cacheKey := workspaceCacheKey(*resolved, paths) + if cacheKey == "" { + return "", nil + } + + cached := r.wsCache[cacheKey] + if cached == nil { + return cacheKey, nil + } + + return cacheKey, cached.skills[name] +} + +func (r *Registry) workspaceLoadFromResolved(ctx context.Context, resolved workspacepkg.ResolvedWorkspace) (workspaceLoad, error) { + load := workspaceLoad{ + paths: make([]workspaceSkillPath, 0, len(resolved.Skills)), + snapshots: make(map[string]filesnap.Snapshot, len(resolved.Skills)), + } + + for _, skillPath := range resolved.Skills { + if err := checkRegistryContext(ctx); err != nil { + return workspaceLoad{}, fmt.Errorf("skills: check registry context while loading workspace skills: %w", err) + } + + path, include, err := workspaceSkillLoadPath(skillPath) + if err != nil { + return workspaceLoad{}, err + } + if !include { + continue + } + + snapshot, err := filesnap.FromPath(path.filePath) + if err != nil { + if errors.Is(err, fs.ErrNotExist) { + continue + } + return workspaceLoad{}, fmt.Errorf("skills: snapshot workspace skill %q: %w", path.filePath, err) + } + + load.snapshots[path.filePath] = snapshot + load.paths = append(load.paths, path) + } + + return load, nil +} + +func workspaceCacheKeyPaths(resolved workspacepkg.ResolvedWorkspace) ([]workspaceSkillPath, bool) { + paths := make([]workspaceSkillPath, 0, len(resolved.Skills)) + for _, skillPath := range resolved.Skills { + path, include, err := workspaceSkillLoadPath(skillPath) + if err != nil { + return nil, false + } + if include { + paths = append(paths, path) + } + } + return paths, true +} + +func workspaceSkillLoadPath(skillPath workspacepkg.SkillPath) (workspaceSkillPath, bool, error) { + source, include, err := skillSourceFromWorkspacePath(skillPath.Source) + if err != nil { + return workspaceSkillPath{}, false, fmt.Errorf("skills: resolve workspace skill source %q: %w", skillPath.Source, err) + } + if !include { + return workspaceSkillPath{}, false, nil + } + + skillDir := strings.TrimSpace(skillPath.Dir) + if skillDir == "" { + return workspaceSkillPath{}, false, nil + } + + return workspaceSkillPath{ + filePath: filepath.Join(skillDir, skillFileName), + source: source, + }, true, nil +} + +func (r *Registry) evictExpiredWorkspaceLocked(now time.Time) { + cutoff := now.Add(-workspaceCacheTTL) + for workspace, entry := range r.wsCache { + if entry.lastAccess.Before(cutoff) { + delete(r.wsCache, workspace) + } + } +} + +func workspaceCacheKey(resolved workspacepkg.ResolvedWorkspace, paths []workspaceSkillPath) string { + if id := strings.TrimSpace(resolved.ID); id != "" { + return "id:" + id + } + if root := strings.TrimSpace(resolved.RootDir); root != "" { + return "root:" + root + } + if len(paths) == 0 { + return "" + } + + var builder strings.Builder + for _, path := range paths { + if builder.Len() > 0 { + builder.WriteByte('|') + } + builder.WriteString(skillSourceName(path.source)) + builder.WriteByte(':') + builder.WriteString(path.filePath) + } + + return builder.String() +} diff --git a/internal/sse/decode.go b/internal/sse/decode.go new file mode 100644 index 000000000..9acefdcaa --- /dev/null +++ b/internal/sse/decode.go @@ -0,0 +1,120 @@ +// Package sse provides shared server-sent event decoding helpers. +package sse + +import ( + "bufio" + "context" + "encoding/json" + "errors" + "fmt" + "io" + "reflect" + "strings" +) + +const maxLineBytes = 1024 * 1024 + +// Event is one parsed server-sent event frame. +type Event struct { + ID string + Event string + Data json.RawMessage +} + +// Handler consumes parsed SSE frames. +type Handler func(Event) error + +// ErrStop stops SSE decoding without surfacing an error. +var ErrStop = errors.New("sse: stop stream") + +// Decode reads one SSE stream from body until EOF, context cancellation, or a +// handler error. +func Decode(ctx context.Context, body io.Reader, handler Handler) error { + if ctx == nil { + return fmt.Errorf("sse: context is required") + } + if readerIsNil(body) { + return fmt.Errorf("sse: body is required") + } + if handler == nil { + return fmt.Errorf("sse: handler is required") + } + + scanner := bufio.NewScanner(body) + scanner.Buffer(make([]byte, 0, 64*1024), maxLineBytes) + + event := Event{} + dataLines := make([]string, 0, 4) + emit := func() error { + if event.ID == "" && event.Event == "" && len(dataLines) == 0 { + return nil + } + if len(dataLines) > 0 { + event.Data = json.RawMessage(strings.Join(dataLines, "\n")) + } + err := handler(event) + event = Event{} + dataLines = dataLines[:0] + return err + } + + for scanner.Scan() { + if err := ctx.Err(); err != nil { + return err + } + + line := scanner.Text() + if line == "" { + if err := emit(); err != nil { + if errors.Is(err, ErrStop) { + return nil + } + return err + } + continue + } + if strings.HasPrefix(line, ":") { + continue + } + + field, value, found := strings.Cut(line, ":") + if !found { + continue + } + value = strings.TrimPrefix(value, " ") + + switch field { + case "id": + event.ID = value + case "event": + event.Event = value + case "data": + dataLines = append(dataLines, value) + } + } + + if err := scanner.Err(); err != nil { + return fmt.Errorf("sse: read stream: %w", err) + } + if err := emit(); err != nil { + if errors.Is(err, ErrStop) { + return nil + } + return err + } + return nil +} + +func readerIsNil(reader io.Reader) bool { + if reader == nil { + return true + } + + value := reflect.ValueOf(reader) + switch value.Kind() { + case reflect.Chan, reflect.Func, reflect.Interface, reflect.Map, reflect.Pointer, reflect.Slice: + return value.IsNil() + default: + return false + } +} diff --git a/internal/sse/decode_test.go b/internal/sse/decode_test.go new file mode 100644 index 000000000..faeb9287d --- /dev/null +++ b/internal/sse/decode_test.go @@ -0,0 +1,54 @@ +package sse + +import ( + "context" + "io" + "strings" + "testing" +) + +func TestDecodeRejectsNilArguments(t *testing.T) { + t.Parallel() + + testCases := []struct { + name string + ctx context.Context + body io.Reader + handler Handler + wantErr string + }{ + { + name: "Should reject nil context", + ctx: nil, + body: strings.NewReader("event: ping\n\n"), + handler: func(Event) error { return nil }, + wantErr: "sse: context is required", + }, + { + name: "Should reject nil body", + ctx: context.Background(), + body: nil, + handler: func(Event) error { return nil }, + wantErr: "sse: body is required", + }, + { + name: "Should reject nil handler", + ctx: context.Background(), + body: strings.NewReader("event: ping\n\n"), + handler: nil, + wantErr: "sse: handler is required", + }, + } + + for _, tt := range testCases { + tt := tt + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + err := Decode(tt.ctx, tt.body, tt.handler) + if err == nil || err.Error() != tt.wantErr { + t.Fatalf("Decode() error = %v, want %q", err, tt.wantErr) + } + }) + } +} diff --git a/internal/transcript/transcript.go b/internal/transcript/transcript.go index 5e0313690..ae9cbdc30 100644 --- a/internal/transcript/transcript.go +++ b/internal/transcript/transcript.go @@ -614,8 +614,7 @@ func firstNonNil(values ...any) any { return nil } -// CanonicalPayload returns the stored canonical event envelope for replay-aware events. -func CanonicalPayload(eventType string, turnID string, timestamp time.Time, text string, toolName string, toolCallID string, toolInput json.RawMessage, toolResult *ToolResult, toolError bool) ([]byte, error) { +func canonicalPayload(eventType string, turnID string, timestamp time.Time, text string, toolName string, toolCallID string, toolInput json.RawMessage, toolResult *ToolResult, toolError bool) ([]byte, error) { payload := canonicalEventPayload{ Schema: CanonicalSchema, Type: strings.TrimSpace(eventType), diff --git a/internal/transcript/transcript_test.go b/internal/transcript/transcript_test.go index a0a8f14d8..d213ba241 100644 --- a/internal/transcript/transcript_test.go +++ b/internal/transcript/transcript_test.go @@ -358,9 +358,9 @@ func TestMarshalAgentEventPreservesRawToolResultShape(t *testing.T) { func mustMarshalCanonical(t *testing.T, eventType string, turnID string, timestamp time.Time, text string, toolName string, toolCallID string, toolInput json.RawMessage, toolResult *ToolResult, toolError bool) string { t.Helper() - payload, err := CanonicalPayload(eventType, turnID, timestamp, text, toolName, toolCallID, toolInput, toolResult, toolError) + payload, err := canonicalPayload(eventType, turnID, timestamp, text, toolName, toolCallID, toolInput, toolResult, toolError) if err != nil { - t.Fatalf("CanonicalPayload() error = %v", err) + t.Fatalf("canonicalPayload() error = %v", err) } return string(payload) } diff --git a/internal/workref/ref.go b/internal/workref/ref.go new file mode 100644 index 000000000..f14818725 --- /dev/null +++ b/internal/workref/ref.go @@ -0,0 +1,33 @@ +// Package workref provides tiny shared workspace reference value objects used to +// pass workspace identifiers and paths through transport and runtime helpers. +package workref + +import "strings" + +// PathRef identifies one workspace by id plus transport-facing filesystem path. +type PathRef struct { + WorkspaceID string `json:"workspace_id,omitempty" yaml:"workspace_id,omitempty"` + WorkspacePath string `json:"workspace_path,omitempty" yaml:"workspace_path,omitempty"` +} + +// RootRef identifies one workspace by id plus runtime/root-directory path. +type RootRef struct { + WorkspaceID string `json:"workspace_id,omitempty" yaml:"workspace_id,omitempty"` + Workspace string `json:"workspace,omitempty" yaml:"workspace,omitempty"` +} + +// NewPath constructs one normalized transport-facing workspace reference. +func NewPath(id string, path string) PathRef { + return PathRef{ + WorkspaceID: strings.TrimSpace(id), + WorkspacePath: strings.TrimSpace(path), + } +} + +// NewRoot constructs one normalized runtime-facing workspace reference. +func NewRoot(id string, root string) RootRef { + return RootRef{ + WorkspaceID: strings.TrimSpace(id), + Workspace: strings.TrimSpace(root), + } +} diff --git a/internal/workspace/options.go b/internal/workspace/options.go index 6739728dd..975418ca7 100644 --- a/internal/workspace/options.go +++ b/internal/workspace/options.go @@ -46,8 +46,7 @@ func WithLogger(logger *slog.Logger) Option { } } -// WithNow overrides the clock used for timestamps and cache eviction. -func WithNow(now func() time.Time) Option { +func withNow(now func() time.Time) Option { return func(opts *resolverOptions) { opts.now = now } diff --git a/internal/workspace/resolver_test.go b/internal/workspace/resolver_test.go index 3c41b6ec7..327bc1fa0 100644 --- a/internal/workspace/resolver_test.go +++ b/internal/workspace/resolver_test.go @@ -322,7 +322,7 @@ func TestResolveCacheHitInvalidateAndEviction(t *testing.T) { resolver := newTestResolver(t, store, WithHomePaths(homePaths), WithConfigLoader(loader.Load), - WithNow(func() time.Time { return currentTime }), + withNow(func() time.Time { return currentTime }), WithCacheTTL(10*time.Minute), ) @@ -545,7 +545,7 @@ func TestNewResolverValidatesDependenciesAndDefaults(t *testing.T) { resolver, err := NewResolver(store, WithLogger(nil), - WithNow(nil), + withNow(nil), WithCacheTTL(0), WithIDGenerator(nil), ) diff --git a/skills-lock.json b/skills-lock.json index e35e87ad2..28be03152 100644 --- a/skills-lock.json +++ b/skills-lock.json @@ -61,6 +61,11 @@ "sourceType": "github", "computedHash": "8bc7a75020275e4ad1813cb4b616dc9e0c7624a6e71178f98341e69c39bd65f4" }, + "kodebase": { + "source": "pedronauck/kodebase-go", + "sourceType": "github", + "computedHash": "66fe9703df1731b1db4b32093be7dcd67b925ee264e781f462c4ea25e77a30d0" + }, "mermaid-diagrams": { "source": "pedronauck/skills", "sourceType": "github", diff --git a/web/package.json b/web/package.json index 3fa55d5d6..49683d6be 100644 --- a/web/package.json +++ b/web/package.json @@ -29,23 +29,17 @@ "clsx": "^2.1.1", "cmdk": "^1.1.1", "date-fns": "^4.1.0", - "embla-carousel-react": "^8.6.0", - "input-otp": "^1.4.2", "lucide-react": "^1.7.0", "next-themes": "^0.4.6", "react": "^19.2.0", - "react-day-picker": "^9.14.0", "react-dom": "^19.2.0", "react-markdown": "^10.1.0", - "react-resizable-panels": "^4.9.0", "react-syntax-highlighter": "^16.1.1", - "recharts": "3.8.0", "remark-gfm": "^4.0.1", "sonner": "^2.0.7", "tailwind-merge": "^3.5.0", "tailwindcss": "^4.2.1", "tw-animate-css": "^1.4.0", - "vaul": "^1.1.2", "zod": "^4.3.0", "zustand": "^5.0.11" }, diff --git a/web/src/components/ui/alert-dialog.tsx b/web/src/components/ui/alert-dialog.tsx deleted file mode 100644 index f5dc22bed..000000000 --- a/web/src/components/ui/alert-dialog.tsx +++ /dev/null @@ -1,160 +0,0 @@ -import * as React from "react"; -import { AlertDialog as AlertDialogPrimitive } from "@base-ui/react/alert-dialog"; - -import { cn } from "@/lib/utils"; -import { Button } from "@/components/ui/button"; - -function AlertDialog({ ...props }: AlertDialogPrimitive.Root.Props) { - return ; -} - -function AlertDialogTrigger({ ...props }: AlertDialogPrimitive.Trigger.Props) { - return ; -} - -function AlertDialogPortal({ ...props }: AlertDialogPrimitive.Portal.Props) { - return ; -} - -function AlertDialogOverlay({ className, ...props }: AlertDialogPrimitive.Backdrop.Props) { - return ( - - ); -} - -function AlertDialogContent({ - className, - size = "default", - ...props -}: AlertDialogPrimitive.Popup.Props & { - size?: "default" | "sm"; -}) { - return ( - - - - - ); -} - -function AlertDialogHeader({ className, ...props }: React.ComponentProps<"div">) { - return ( -
- ); -} - -function AlertDialogFooter({ className, ...props }: React.ComponentProps<"div">) { - return ( -
- ); -} - -function AlertDialogMedia({ className, ...props }: React.ComponentProps<"div">) { - return ( -
- ); -} - -function AlertDialogTitle({ - className, - ...props -}: React.ComponentProps) { - return ( - - ); -} - -function AlertDialogDescription({ - className, - ...props -}: React.ComponentProps) { - return ( - - ); -} - -function AlertDialogAction({ className, ...props }: React.ComponentProps) { - return - ); -} - -function CarouselNext({ - className, - variant = "outline", - size = "icon-sm", - ...props -}: React.ComponentProps) { - const { orientation, scrollNext, canScrollNext } = useCarousel(); - - return ( - - ); -} - -export { - type CarouselApi, - Carousel, - CarouselContent, - CarouselItem, - CarouselPrevious, - CarouselNext, - useCarousel, -}; diff --git a/web/src/components/ui/chart.tsx b/web/src/components/ui/chart.tsx deleted file mode 100644 index fe6abfa18..000000000 --- a/web/src/components/ui/chart.tsx +++ /dev/null @@ -1,339 +0,0 @@ -"use client"; - -import * as React from "react"; -import * as RechartsPrimitive from "recharts"; -import type { TooltipValueType } from "recharts"; - -import { cn } from "@/lib/utils"; - -// Format: { THEME_NAME: CSS_SELECTOR } -const THEMES = { light: "", dark: ".dark" } as const; - -const INITIAL_DIMENSION = { width: 320, height: 200 } as const; -type TooltipNameType = number | string; - -export type ChartConfig = Record< - string, - { - label?: React.ReactNode; - icon?: React.ComponentType; - } & ( - | { color?: string; theme?: never } - | { color?: never; theme: Record } - ) ->; - -type ChartContextProps = { - config: ChartConfig; -}; - -const ChartContext = React.createContext(null); - -function useChart() { - const context = React.useContext(ChartContext); - - if (!context) { - throw new Error("useChart must be used within a "); - } - - return context; -} - -function ChartContainer({ - id, - className, - children, - config, - initialDimension = INITIAL_DIMENSION, - ...props -}: React.ComponentProps<"div"> & { - config: ChartConfig; - children: React.ComponentProps["children"]; - initialDimension?: { - width: number; - height: number; - }; -}) { - const uniqueId = React.useId(); - const chartId = `chart-${id ?? uniqueId.replace(/:/g, "")}`; - - return ( - -
- - - {children} - -
-
- ); -} - -const ChartStyle = ({ id, config }: { id: string; config: ChartConfig }) => { - const colorConfig = Object.entries(config).filter(([, config]) => config.theme ?? config.color); - - if (!colorConfig.length) { - return null; - } - - return ( -